Separate miscellaneous transforms from virtualization

This commit is contained in:
Kovid Goyal 2019-10-15 18:03:56 +05:30
parent 07bd906363
commit d9284fe6ab
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -21,7 +21,7 @@ from calibre.customize.ui import plugin_for_input_format
from calibre.ebooks import parse_css_length from calibre.ebooks import parse_css_length
from calibre.ebooks.css_transform_rules import StyleDeclaration from calibre.ebooks.css_transform_rules import StyleDeclaration
from calibre.ebooks.oeb.base import ( from calibre.ebooks.oeb.base import (
EPUB_NS, OEB_DOCS, OEB_STYLES, OPF, XHTML, XHTML_NS, XLINK, XPath, rewrite_links, EPUB_NS, OEB_DOCS, OEB_STYLES, OPF, XHTML, XHTML_NS, XLINK, XPath as _XPath, rewrite_links,
urlunquote urlunquote
) )
from calibre.ebooks.oeb.iterator.book import extract_book from calibre.ebooks.oeb.iterator.book import extract_book
@ -29,7 +29,7 @@ from calibre.ebooks.oeb.polish.container import Container as ContainerBase
from calibre.ebooks.oeb.polish.cover import ( from calibre.ebooks.oeb.polish.cover import (
find_cover_image, has_epub_cover, set_epub_cover find_cover_image, has_epub_cover, set_epub_cover
) )
from calibre.ebooks.oeb.polish.css import transform_css from calibre.ebooks.oeb.polish.css import transform_inline_styles
from calibre.ebooks.oeb.polish.toc import from_xpaths, get_landmarks, get_toc from calibre.ebooks.oeb.polish.toc import from_xpaths, get_landmarks, get_toc
from calibre.ebooks.oeb.polish.utils import extract, guess_type from calibre.ebooks.oeb.polish.utils import extract, guess_type
from calibre.srv.metadata import encode_datetime from calibre.srv.metadata import encode_datetime
@ -50,6 +50,16 @@ RENDER_VERSION = 1
BLANK_JPEG = b'\xff\xd8\xff\xdb\x00C\x00\x03\x02\x02\x02\x02\x02\x03\x02\x02\x02\x03\x03\x03\x03\x04\x06\x04\x04\x04\x04\x04\x08\x06\x06\x05\x06\t\x08\n\n\t\x08\t\t\n\x0c\x0f\x0c\n\x0b\x0e\x0b\t\t\r\x11\r\x0e\x0f\x10\x10\x11\x10\n\x0c\x12\x13\x12\x10\x13\x0f\x10\x10\x10\xff\xc9\x00\x0b\x08\x00\x01\x00\x01\x01\x01\x11\x00\xff\xcc\x00\x06\x00\x10\x10\x05\xff\xda\x00\x08\x01\x01\x00\x00?\x00\xd2\xcf \xff\xd9' # noqa BLANK_JPEG = b'\xff\xd8\xff\xdb\x00C\x00\x03\x02\x02\x02\x02\x02\x03\x02\x02\x02\x03\x03\x03\x03\x04\x06\x04\x04\x04\x04\x04\x08\x06\x06\x05\x06\t\x08\n\n\t\x08\t\t\n\x0c\x0f\x0c\n\x0b\x0e\x0b\t\t\r\x11\r\x0e\x0f\x10\x10\x11\x10\n\x0c\x12\x13\x12\x10\x13\x0f\x10\x10\x10\xff\xc9\x00\x0b\x08\x00\x01\x00\x01\x01\x01\x11\x00\xff\xcc\x00\x06\x00\x10\x10\x05\xff\xda\x00\x08\x01\x01\x00\x00?\x00\xd2\xcf \xff\xd9' # noqa
def XPath(expr):
ans = XPath.cache.get(expr)
if ans is None:
ans = XPath.cache[expr] = _XPath(expr)
return ans
XPath.cache = {}
def encode_url(name, frag=''): def encode_url(name, frag=''):
name = encode_component(name) name = encode_component(name)
if frag: if frag:
@ -75,11 +85,14 @@ def convert_fontsize(length, unit, base_font_size=16.0, dpi=96.0):
return length * length_factors.get(unit, 1) * pt_to_rem return length * length_factors.get(unit, 1) * pt_to_rem
page_break_properties = ('page-break-before', 'page-break-after', 'page-break-inside')
def transform_declaration(decl): def transform_declaration(decl):
decl = StyleDeclaration(decl) decl = StyleDeclaration(decl)
changed = False changed = False
for prop, parent_prop in tuple(decl): for prop, parent_prop in tuple(decl):
if prop.name in {'page-break-before', 'page-break-after', 'page-break-inside'}: if prop.name in page_break_properties:
changed = True changed = True
name = prop.name.partition('-')[2] name = prop.name.partition('-')[2]
for prefix in ('', '-webkit-column-'): for prefix in ('', '-webkit-column-'):
@ -252,7 +265,7 @@ class Container(ContainerBase):
# Mark the spine as dirty since we have to ensure it is normalized # Mark the spine as dirty since we have to ensure it is normalized
for name in data['spine']: for name in data['spine']:
self.parsed(name), self.dirty(name) self.parsed(name), self.dirty(name)
self.transform_css() self.transform_all()
self.virtualized_names = set() self.virtualized_names = set()
self.virtualize_resources() self.virtualize_resources()
@ -350,16 +363,16 @@ class Container(ContainerBase):
self.dirty(self.opf_name) self.dirty(self.opf_name)
return raster_cover_name, titlepage_name return raster_cover_name, titlepage_name
def transform_css(self): def transform_html(self, name):
transform_css(self, transform_sheet=transform_sheet, transform_style=transform_declaration) style_xpath = XPath('//h:style')
img_xpath = XPath('//h:img[@src]')
res_link_xpath = XPath('//h:link[@href]')
head = ensure_head(self.parsed(name))
changed = False
root = self.parsed(name)
for style in style_xpath(root):
# Firefox flakes out sometimes when dynamically creating <style> tags, # Firefox flakes out sometimes when dynamically creating <style> tags,
# so convert them to external stylesheets to ensure they never fail # so convert them to external stylesheets to ensure they never fail
style_xpath = XPath('//h:style')
for name, mt in tuple(iteritems(self.mime_map)):
mt = mt.lower()
if mt in OEB_DOCS:
head = ensure_head(self.parsed(name))
for style in style_xpath(self.parsed(name)):
if style.text and (style.get('type') or 'text/css').lower() == 'text/css': if style.text and (style.get('type') or 'text/css').lower() == 'text/css':
in_head = has_ancestor(style, head) in_head = has_ancestor(style, head)
if not in_head: if not in_head:
@ -372,6 +385,43 @@ class Container(ContainerBase):
style.set('rel', 'stylesheet') style.set('rel', 'stylesheet')
sname = self.add_file(name + '.css', css.encode('utf-8'), modify_name_if_needed=True) sname = self.add_file(name + '.css', css.encode('utf-8'), modify_name_if_needed=True)
style.set('href', self.name_to_href(sname, name)) style.set('href', self.name_to_href(sname, name))
changed = True
# Used for viewing images
for img in img_xpath(root):
img_name = self.href_to_name(img.get('src'), name)
if img_name:
img.set('data-calibre-src', img_name)
changed = True
# Disable non stylsheet link tags. This link will not be loaded by the
# browser anyway and will causes the resource load check to hang
for link in res_link_xpath(root):
ltype = (link.get('type') or 'text/css').lower()
rel = (link.get('rel') or 'stylesheet').lower()
if ltype != 'text/css' or rel != 'stylesheet':
link.attrib.clear()
changed = True
# Transform <style> and style=""
if transform_inline_styles(self, name, transform_sheet=transform_sheet, transform_style=transform_declaration):
changed = True
if changed:
self.dirty(name)
def transform_css(self, name):
sheet = self.parsed(name)
if transform_sheet(sheet):
self.dirty(name)
def transform_all(self):
for name, mt in tuple(iteritems(self.mime_map)):
mt = mt.lower()
if mt in OEB_DOCS:
self.transform_html(name)
elif mt in OEB_STYLES:
self.transform_css(name)
def virtualize_resources(self): def virtualize_resources(self):
@ -380,8 +430,6 @@ class Container(ContainerBase):
resource_template = link_uid + '|{}|' resource_template = link_uid + '|{}|'
xlink_xpath = XPath('//*[@xl:href]') xlink_xpath = XPath('//*[@xl:href]')
link_xpath = XPath('//h:a[@href]') link_xpath = XPath('//h:a[@href]')
img_xpath = XPath('//h:img[@src]')
res_link_xpath = XPath('//h:link[@href]')
def link_replacer(base, url): def link_replacer(base, url):
if url.startswith('#'): if url.startswith('#'):
@ -420,19 +468,6 @@ class Container(ContainerBase):
elif mt in OEB_DOCS: elif mt in OEB_DOCS:
self.virtualized_names.add(name) self.virtualized_names.add(name)
root = self.parsed(name) root = self.parsed(name)
for img in img_xpath(root):
img_name = self.href_to_name(img.get('src'), name)
if img_name:
img.set('data-calibre-src', img_name)
changed.add(name)
for link in res_link_xpath(root):
ltype = (link.get('type') or 'text/css').lower()
rel = (link.get('rel') or 'stylesheet').lower()
if ltype != 'text/css' or rel != 'stylesheet':
# This link will not be loaded by the browser anyway
# and will causes the resource load check to hang
link.attrib.clear()
changed.add(name)
rewrite_links(root, partial(link_replacer, name)) rewrite_links(root, partial(link_replacer, name))
for a in link_xpath(root): for a in link_xpath(root):
href = a.get('href') href = a.get('href')
@ -445,15 +480,17 @@ class Container(ContainerBase):
else: else:
a.set('target', '_blank') a.set('target', '_blank')
a.set('rel', 'noopener noreferrer') a.set('rel', 'noopener noreferrer')
changed.add(name)
elif mt == 'image/svg+xml': elif mt == 'image/svg+xml':
self.virtualized_names.add(name) self.virtualized_names.add(name)
changed.add(name)
xlink = XLINK('href') xlink = XLINK('href')
altered = False
for elem in xlink_xpath(self.parsed(name)): for elem in xlink_xpath(self.parsed(name)):
href = elem.get(xlink) href = elem.get(xlink)
if not href.startswith('#'): if not href.startswith('#'):
elem.set(xlink, link_replacer(name, href)) elem.set(xlink, link_replacer(name, href))
altered = True
if altered:
changed.add(name)
for name, amap in iteritems(ltm): for name, amap in iteritems(ltm):
for k, v in tuple(iteritems(amap)): for k, v in tuple(iteritems(amap)):