diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 0f364b8030..d1bd1cab78 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -11,12 +11,11 @@ import os, re, uuid, logging from mimetypes import types_map from collections import defaultdict from itertools import count -from urlparse import urldefrag, urlparse, urlunparse +from urlparse import urldefrag, urlparse, urlunparse, urljoin from urllib import unquote as urlunquote -from urlparse import urljoin from lxml import etree, html -from cssutils import CSSParser +from cssutils import CSSParser, parseString, parseStyle, replaceUrls from cssutils.css import CSSRule import calibre @@ -88,11 +87,11 @@ def XLINK(name): def CALIBRE(name): return '{%s}%s' % (CALIBRE_NS, name) -_css_url_re = re.compile(r'url\((.*?)\)', re.I) +_css_url_re = re.compile(r'url\s*\((.*?)\)', re.I) _css_import_re = re.compile(r'@import "(.*?)"') _archive_re = re.compile(r'[^ ]+') -def iterlinks(root): +def iterlinks(root, find_links_in_css=True): ''' Iterate over all links in a OEB Document. @@ -134,6 +133,8 @@ def iterlinks(root): yield (el, attr, attribs[attr], 0) + if not find_links_in_css: + continue if tag == XHTML('style') and el.text: for match in _css_url_re.finditer(el.text): yield (el, None, match.group(1), match.start(1)) @@ -180,7 +181,7 @@ def rewrite_links(root, link_repl_func, resolve_base_href=False): ''' if resolve_base_href: resolve_base_href(root) - for el, attrib, link, pos in iterlinks(root): + for el, attrib, link, pos in iterlinks(root, find_links_in_css=False): new_link = link_repl_func(link.strip()) if new_link == link: continue @@ -203,6 +204,44 @@ def rewrite_links(root, link_repl_func, resolve_base_href=False): new = cur[:pos] + new_link + cur[pos+len(link):] el.attrib[attrib] = new + def set_property(v): + if v.CSS_PRIMITIVE_VALUE == v.cssValueType and \ + v.CSS_URI == v.primitiveType: + v.setStringValue(v.CSS_URI, + link_repl_func(v.getStringValue())) + + for el in root.iter(): + try: + tag = el.tag + except UnicodeDecodeError: + continue + + if tag == XHTML('style') and el.text and \ + (_css_url_re.search(el.text) is not None or '@import' in + el.text): + stylesheet = parseString(el.text) + replaceUrls(stylesheet, link_repl_func) + el.text = '\n'+stylesheet.cssText + '\n' + + if 'style' in el.attrib: + text = el.attrib['style'] + if _css_url_re.search(text) is not None: + stext = parseStyle(text) + changed = False + for p in stext.getProperties(all=True): + v = p.cssValue + if v.CSS_VALUE_LIST == v.cssValueType: + for item in v: + changed = True + set_property(item) + elif v.CSS_PRIMITIVE_VALUE == v.cssValueType: + changed = True + set_property(v) + if changed: + el.attrib['style'] = stext.cssText.replace('\n', ' ').replace('\r', + ' ') + + EPUB_MIME = types_map['.epub'] XHTML_MIME = types_map['.xhtml']