From ac919b48caa806abbcabb955152c007ea45f3072 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 9 Jul 2014 14:19:10 +0530 Subject: [PATCH] Edit Book/Book polishing: Do not auto-resolve @import rules in AZW3 files. Also do not add a @namespace rule to all stylesheets that contain tag selectors. --- src/calibre/ebooks/conversion/plumber.py | 4 +- src/calibre/ebooks/mobi/writer8/main.py | 33 +++++++++++++++-- src/calibre/ebooks/oeb/polish/container.py | 43 +++++++++++----------- src/calibre/ebooks/oeb/polish/create.py | 5 ++- src/calibre/ebooks/oeb/polish/utils.py | 21 +++++++++++ 5 files changed, 78 insertions(+), 28 deletions(-) diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index c9f2151647..0142409f0c 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -1207,7 +1207,7 @@ def set_regex_wizard_callback(f): regex_wizard_callback = f def create_oebbook(log, path_or_stream, opts, reader=None, - encoding='utf-8', populate=True, for_regex_wizard=False): + encoding='utf-8', populate=True, for_regex_wizard=False, specialize=None): ''' Create an OEBBook. ''' @@ -1219,6 +1219,8 @@ def create_oebbook(log, path_or_stream, opts, reader=None, pretty_print=opts.pretty_print, input_encoding=encoding) if not populate: return oeb + if specialize is not None: + oeb = specialize(oeb) or oeb # Read OEB Book into OEBBook log('Parsing all content...') if reader is None: diff --git a/src/calibre/ebooks/mobi/writer8/main.py b/src/calibre/ebooks/mobi/writer8/main.py index b291691d62..723010f784 100644 --- a/src/calibre/ebooks/mobi/writer8/main.py +++ b/src/calibre/ebooks/mobi/writer8/main.py @@ -14,6 +14,7 @@ from io import BytesIO from struct import pack import cssutils +from cssutils.css import CSSRule from lxml import etree from calibre import isbytestring, force_unicode @@ -151,11 +152,23 @@ class KF8Writer(object): for item in self.oeb.manifest: if item.media_type in OEB_STYLES: + sheet = self.data(item) if not self.opts.expand_css and hasattr(item.data, 'cssText'): - condense_sheet(self.data(item)) - data = self.data(item).cssText + condense_sheet(sheet) sheets[item.href] = len(self.flows) - self.flows.append(force_unicode(data, 'utf-8')) + self.flows.append(sheet) + + def fix_import_rules(sheet): + changed = False + for rule in sheet.cssRules.rulesOfType(CSSRule.IMPORT_RULE): + if rule.href: + href = item.abshref(rule.href) + idx = sheets.get(href, None) + if idx is not None: + idx = to_ref(idx) + rule.href = 'kindle:flow:%s?mime=text/css'%idx + changed = True + return changed for item in self.oeb.spine: root = self.data(item) @@ -174,6 +187,10 @@ class KF8Writer(object): if not raw or not raw.strip(): extract(tag) continue + sheet = cssutils.parseString(raw, validate=False) + if fix_import_rules(sheet): + raw = force_unicode(sheet.cssText, 'utf-8') + repl = etree.Element(XHTML('link'), type='text/css', rel='stylesheet') repl.tail='\n' @@ -187,6 +204,16 @@ class KF8Writer(object): for link in elems: link.set('href', 'kindle:flow:%s?mime=text/css'%idx) + for item in self.oeb.manifest: + if item.media_type in OEB_STYLES: + sheet = self.data(item) + if hasattr(sheet, 'cssRules'): + fix_import_rules(sheet) + + for i, sheet in enumerate(tuple(self.flows)): + if hasattr(sheet, 'cssText'): + self.flows[i] = force_unicode(sheet.cssText, 'utf-8') + def extract_svg_into_flows(self): images = {} diff --git a/src/calibre/ebooks/oeb/polish/container.py b/src/calibre/ebooks/oeb/polish/container.py index 44a876a18b..3e091d87e7 100644 --- a/src/calibre/ebooks/oeb/polish/container.py +++ b/src/calibre/ebooks/oeb/polish/container.py @@ -26,11 +26,11 @@ from calibre.ebooks.mobi import MobiError from calibre.ebooks.mobi.reader.headers import MetadataHeader from calibre.ebooks.mobi.tweak import set_cover from calibre.ebooks.oeb.base import ( - serialize, OEB_DOCS, _css_logger, OEB_STYLES, OPF2_NS, DC11_NS, OPF, + serialize, OEB_DOCS, OEB_STYLES, OPF2_NS, DC11_NS, OPF, Manifest, rewrite_links, iterlinks, itercsslinks, urlquote, urlunquote) from calibre.ebooks.oeb.polish.errors import InvalidBook, DRMError from calibre.ebooks.oeb.polish.parsing import parse as parse_html_tweak -from calibre.ebooks.oeb.polish.utils import PositionFinder, CommentFinder, guess_type +from calibre.ebooks.oeb.polish.utils import PositionFinder, CommentFinder, guess_type, parse_css from calibre.ebooks.oeb.parse_utils import NotHTML, parse_html, RECOVER_PARSER from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile from calibre.utils.filenames import nlinks_file, hardlink_file @@ -490,21 +490,8 @@ class Container(object): # {{{ return ans def parse_css(self, data, fname='', is_declaration=False): - from cssutils import CSSParser, log - log.setLevel(logging.WARN) - log.raiseExceptions = False - if isinstance(data, bytes): - data = self.decode(data) - if not self.tweak_mode: - data = self.css_preprocessor(data) - parser = CSSParser(loglevel=logging.WARNING, - # We dont care about @import rules - fetcher=lambda x: (None, None), log=_css_logger) - if is_declaration: - data = parser.parseStyle(data, validate=False) - else: - data = parser.parseString(data, href=fname, validate=False) - return data + return parse_css(data, fname=fname, is_declaration=is_declaration, decode=self.decode, log_level=logging.WARNING, + css_preprocessor=(None if self.tweak_mode else self.css_preprocessor)) def parsed(self, name): ''' Return a parsed representation of the file specified by name. For @@ -1129,16 +1116,28 @@ def do_explode(path, dest): return opf, obfuscated_fonts -def opf_to_azw3(opf, outpath, log): + +def opf_to_azw3(opf, outpath, container): from calibre.ebooks.conversion.plumber import Plumber, create_oebbook - plumber = Plumber(opf, outpath, log) + + class Item(Manifest.Item): + + def _parse_css(self, data): + # The default CSS parser used by oeb.base inserts the h namespace + # and resolves all @import rules. We dont want that. + return container.parse_css(data) + + def specialize(oeb): + oeb.manifest.Item = Item + + plumber = Plumber(opf, outpath, container.log) plumber.setup_options() inp = plugin_for_input_format('azw3') outp = plugin_for_output_format('azw3') plumber.opts.mobi_passthrough = True - oeb = create_oebbook(log, opf, plumber.opts) + oeb = create_oebbook(container.log, opf, plumber.opts, specialize=specialize) set_cover(oeb) - outp.convert(oeb, outpath, inp, plumber.opts, log) + outp.convert(oeb, outpath, inp, plumber.opts, container.log) class AZW3Container(Container): @@ -1205,7 +1204,7 @@ class AZW3Container(Container): super(AZW3Container, self).commit(keep_parsed=keep_parsed) if outpath is None: outpath = self.pathtoazw3 - opf_to_azw3(self.name_path_map[self.opf_name], outpath, self.log) + opf_to_azw3(self.name_path_map[self.opf_name], outpath, self) @dynamic_property def path_to_ebook(self): diff --git a/src/calibre/ebooks/oeb/polish/create.py b/src/calibre/ebooks/oeb/polish/create.py index 73e5b3fd5b..81993c7940 100644 --- a/src/calibre/ebooks/oeb/polish/create.py +++ b/src/calibre/ebooks/oeb/polish/create.py @@ -15,7 +15,7 @@ from calibre.ptempfile import TemporaryDirectory from calibre.ebooks.oeb.base import serialize from calibre.ebooks.metadata.opf2 import metadata_to_opf from calibre.ebooks.oeb.polish.parsing import parse -from calibre.ebooks.oeb.polish.container import OPF_NAMESPACES, opf_to_azw3 +from calibre.ebooks.oeb.polish.container import OPF_NAMESPACES, opf_to_azw3, Container from calibre.ebooks.oeb.polish.utils import guess_type from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree, pretty_html_tree from calibre.ebooks.oeb.polish.toc import TOC, create_ncx @@ -87,7 +87,8 @@ def create_book(mi, path, fmt='epub', opf_name='metadata.opf', html_name='start. for name, data in ((opf_name, opf), (html_name, HTML), (toc_name, ncx)): with open(name, 'wb') as f: f.write(data) - opf_to_azw3(opf_name, path, DevNull()) + c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name, DevNull()) + opf_to_azw3(opf_name, path, c) else: with ZipFile(path, 'w', compression=ZIP_STORED) as zf: zf.writestr('mimetype', b'application/epub+zip', compression=ZIP_STORED) diff --git a/src/calibre/ebooks/oeb/polish/utils.py b/src/calibre/ebooks/oeb/polish/utils.py index 3f3a190c43..e5b16c7569 100644 --- a/src/calibre/ebooks/oeb/polish/utils.py +++ b/src/calibre/ebooks/oeb/polish/utils.py @@ -150,3 +150,24 @@ def lead_text(top_elem, num_words=10): stack.extend(reversed(list((c, 'text') for c in elem.iterchildren('*')))) return ' '.join(words[:num_words]) +def parse_css(data, fname='', is_declaration=False, decode=None, log_level=None, css_preprocessor=None): + if log_level is None: + import logging + log_level = logging.WARNING + from cssutils import CSSParser, log + from calibre.ebooks.oeb.base import _css_logger + log.setLevel(log_level) + log.raiseExceptions = False + if isinstance(data, bytes): + data = data.decode('utf-8') if decode is None else decode(data) + if css_preprocessor is not None: + data = css_preprocessor(data) + parser = CSSParser(loglevel=log_level, + # We dont care about @import rules + fetcher=lambda x: (None, None), log=_css_logger) + if is_declaration: + data = parser.parseStyle(data, validate=False) + else: + data = parser.parseString(data, href=fname, validate=False) + return data +