mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Conversion pipeline: Fix broken link rewriting for inline CSS embedded in HTML
This commit is contained in:
parent
6abc12cf18
commit
523185f7a9
@ -11,12 +11,11 @@ import os, re, uuid, logging
|
|||||||
from mimetypes import types_map
|
from mimetypes import types_map
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from itertools import count
|
from itertools import count
|
||||||
from urlparse import urldefrag, urlparse, urlunparse
|
from urlparse import urldefrag, urlparse, urlunparse, urljoin
|
||||||
from urllib import unquote as urlunquote
|
from urllib import unquote as urlunquote
|
||||||
from urlparse import urljoin
|
|
||||||
|
|
||||||
from lxml import etree, html
|
from lxml import etree, html
|
||||||
from cssutils import CSSParser
|
from cssutils import CSSParser, parseString, parseStyle, replaceUrls
|
||||||
from cssutils.css import CSSRule
|
from cssutils.css import CSSRule
|
||||||
|
|
||||||
import calibre
|
import calibre
|
||||||
@ -88,11 +87,11 @@ def XLINK(name):
|
|||||||
def CALIBRE(name):
|
def CALIBRE(name):
|
||||||
return '{%s}%s' % (CALIBRE_NS, name)
|
return '{%s}%s' % (CALIBRE_NS, name)
|
||||||
|
|
||||||
_css_url_re = re.compile(r'url\((.*?)\)', re.I)
|
_css_url_re = re.compile(r'url\s*\((.*?)\)', re.I)
|
||||||
_css_import_re = re.compile(r'@import "(.*?)"')
|
_css_import_re = re.compile(r'@import "(.*?)"')
|
||||||
_archive_re = re.compile(r'[^ ]+')
|
_archive_re = re.compile(r'[^ ]+')
|
||||||
|
|
||||||
def iterlinks(root):
|
def iterlinks(root, find_links_in_css=True):
|
||||||
'''
|
'''
|
||||||
Iterate over all links in a OEB Document.
|
Iterate over all links in a OEB Document.
|
||||||
|
|
||||||
@ -134,6 +133,8 @@ def iterlinks(root):
|
|||||||
yield (el, attr, attribs[attr], 0)
|
yield (el, attr, attribs[attr], 0)
|
||||||
|
|
||||||
|
|
||||||
|
if not find_links_in_css:
|
||||||
|
continue
|
||||||
if tag == XHTML('style') and el.text:
|
if tag == XHTML('style') and el.text:
|
||||||
for match in _css_url_re.finditer(el.text):
|
for match in _css_url_re.finditer(el.text):
|
||||||
yield (el, None, match.group(1), match.start(1))
|
yield (el, None, match.group(1), match.start(1))
|
||||||
@ -180,7 +181,7 @@ def rewrite_links(root, link_repl_func, resolve_base_href=False):
|
|||||||
'''
|
'''
|
||||||
if resolve_base_href:
|
if resolve_base_href:
|
||||||
resolve_base_href(root)
|
resolve_base_href(root)
|
||||||
for el, attrib, link, pos in iterlinks(root):
|
for el, attrib, link, pos in iterlinks(root, find_links_in_css=False):
|
||||||
new_link = link_repl_func(link.strip())
|
new_link = link_repl_func(link.strip())
|
||||||
if new_link == link:
|
if new_link == link:
|
||||||
continue
|
continue
|
||||||
@ -203,6 +204,44 @@ def rewrite_links(root, link_repl_func, resolve_base_href=False):
|
|||||||
new = cur[:pos] + new_link + cur[pos+len(link):]
|
new = cur[:pos] + new_link + cur[pos+len(link):]
|
||||||
el.attrib[attrib] = new
|
el.attrib[attrib] = new
|
||||||
|
|
||||||
|
def set_property(v):
|
||||||
|
if v.CSS_PRIMITIVE_VALUE == v.cssValueType and \
|
||||||
|
v.CSS_URI == v.primitiveType:
|
||||||
|
v.setStringValue(v.CSS_URI,
|
||||||
|
link_repl_func(v.getStringValue()))
|
||||||
|
|
||||||
|
for el in root.iter():
|
||||||
|
try:
|
||||||
|
tag = el.tag
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if tag == XHTML('style') and el.text and \
|
||||||
|
(_css_url_re.search(el.text) is not None or '@import' in
|
||||||
|
el.text):
|
||||||
|
stylesheet = parseString(el.text)
|
||||||
|
replaceUrls(stylesheet, link_repl_func)
|
||||||
|
el.text = '\n'+stylesheet.cssText + '\n'
|
||||||
|
|
||||||
|
if 'style' in el.attrib:
|
||||||
|
text = el.attrib['style']
|
||||||
|
if _css_url_re.search(text) is not None:
|
||||||
|
stext = parseStyle(text)
|
||||||
|
changed = False
|
||||||
|
for p in stext.getProperties(all=True):
|
||||||
|
v = p.cssValue
|
||||||
|
if v.CSS_VALUE_LIST == v.cssValueType:
|
||||||
|
for item in v:
|
||||||
|
changed = True
|
||||||
|
set_property(item)
|
||||||
|
elif v.CSS_PRIMITIVE_VALUE == v.cssValueType:
|
||||||
|
changed = True
|
||||||
|
set_property(v)
|
||||||
|
if changed:
|
||||||
|
el.attrib['style'] = stext.cssText.replace('\n', ' ').replace('\r',
|
||||||
|
' ')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
EPUB_MIME = types_map['.epub']
|
EPUB_MIME = types_map['.epub']
|
||||||
XHTML_MIME = types_map['.xhtml']
|
XHTML_MIME = types_map['.xhtml']
|
||||||
|
Loading…
x
Reference in New Issue
Block a user