mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Conversion pipeline: Fix broken link rewriting for inline CSS embedded in HTML
This commit is contained in:
parent
6abc12cf18
commit
523185f7a9
@ -11,12 +11,11 @@ import os, re, uuid, logging
|
||||
from mimetypes import types_map
|
||||
from collections import defaultdict
|
||||
from itertools import count
|
||||
from urlparse import urldefrag, urlparse, urlunparse
|
||||
from urlparse import urldefrag, urlparse, urlunparse, urljoin
|
||||
from urllib import unquote as urlunquote
|
||||
from urlparse import urljoin
|
||||
|
||||
from lxml import etree, html
|
||||
from cssutils import CSSParser
|
||||
from cssutils import CSSParser, parseString, parseStyle, replaceUrls
|
||||
from cssutils.css import CSSRule
|
||||
|
||||
import calibre
|
||||
@ -88,11 +87,11 @@ def XLINK(name):
|
||||
def CALIBRE(name):
|
||||
return '{%s}%s' % (CALIBRE_NS, name)
|
||||
|
||||
_css_url_re = re.compile(r'url\((.*?)\)', re.I)
|
||||
_css_url_re = re.compile(r'url\s*\((.*?)\)', re.I)
|
||||
_css_import_re = re.compile(r'@import "(.*?)"')
|
||||
_archive_re = re.compile(r'[^ ]+')
|
||||
|
||||
def iterlinks(root):
|
||||
def iterlinks(root, find_links_in_css=True):
|
||||
'''
|
||||
Iterate over all links in a OEB Document.
|
||||
|
||||
@ -134,6 +133,8 @@ def iterlinks(root):
|
||||
yield (el, attr, attribs[attr], 0)
|
||||
|
||||
|
||||
if not find_links_in_css:
|
||||
continue
|
||||
if tag == XHTML('style') and el.text:
|
||||
for match in _css_url_re.finditer(el.text):
|
||||
yield (el, None, match.group(1), match.start(1))
|
||||
@ -180,7 +181,7 @@ def rewrite_links(root, link_repl_func, resolve_base_href=False):
|
||||
'''
|
||||
if resolve_base_href:
|
||||
resolve_base_href(root)
|
||||
for el, attrib, link, pos in iterlinks(root):
|
||||
for el, attrib, link, pos in iterlinks(root, find_links_in_css=False):
|
||||
new_link = link_repl_func(link.strip())
|
||||
if new_link == link:
|
||||
continue
|
||||
@ -203,6 +204,44 @@ def rewrite_links(root, link_repl_func, resolve_base_href=False):
|
||||
new = cur[:pos] + new_link + cur[pos+len(link):]
|
||||
el.attrib[attrib] = new
|
||||
|
||||
def set_property(v):
|
||||
if v.CSS_PRIMITIVE_VALUE == v.cssValueType and \
|
||||
v.CSS_URI == v.primitiveType:
|
||||
v.setStringValue(v.CSS_URI,
|
||||
link_repl_func(v.getStringValue()))
|
||||
|
||||
for el in root.iter():
|
||||
try:
|
||||
tag = el.tag
|
||||
except UnicodeDecodeError:
|
||||
continue
|
||||
|
||||
if tag == XHTML('style') and el.text and \
|
||||
(_css_url_re.search(el.text) is not None or '@import' in
|
||||
el.text):
|
||||
stylesheet = parseString(el.text)
|
||||
replaceUrls(stylesheet, link_repl_func)
|
||||
el.text = '\n'+stylesheet.cssText + '\n'
|
||||
|
||||
if 'style' in el.attrib:
|
||||
text = el.attrib['style']
|
||||
if _css_url_re.search(text) is not None:
|
||||
stext = parseStyle(text)
|
||||
changed = False
|
||||
for p in stext.getProperties(all=True):
|
||||
v = p.cssValue
|
||||
if v.CSS_VALUE_LIST == v.cssValueType:
|
||||
for item in v:
|
||||
changed = True
|
||||
set_property(item)
|
||||
elif v.CSS_PRIMITIVE_VALUE == v.cssValueType:
|
||||
changed = True
|
||||
set_property(v)
|
||||
if changed:
|
||||
el.attrib['style'] = stext.cssText.replace('\n', ' ').replace('\r',
|
||||
' ')
|
||||
|
||||
|
||||
|
||||
EPUB_MIME = types_map['.epub']
|
||||
XHTML_MIME = types_map['.xhtml']
|
||||
|
Loading…
x
Reference in New Issue
Block a user