Edit Book/Book polishing: Do not auto-resolve @import rules in AZW3 files. Also do not add a @namespace rule to all stylesheets that contain tag selectors.

This commit is contained in:
Kovid Goyal 2014-07-09 14:19:10 +05:30
parent a629e4b5a7
commit ac919b48ca
5 changed files with 78 additions and 28 deletions

View File

@ -1207,7 +1207,7 @@ def set_regex_wizard_callback(f):
regex_wizard_callback = f regex_wizard_callback = f
def create_oebbook(log, path_or_stream, opts, reader=None, def create_oebbook(log, path_or_stream, opts, reader=None,
encoding='utf-8', populate=True, for_regex_wizard=False): encoding='utf-8', populate=True, for_regex_wizard=False, specialize=None):
''' '''
Create an OEBBook. Create an OEBBook.
''' '''
@ -1219,6 +1219,8 @@ def create_oebbook(log, path_or_stream, opts, reader=None,
pretty_print=opts.pretty_print, input_encoding=encoding) pretty_print=opts.pretty_print, input_encoding=encoding)
if not populate: if not populate:
return oeb return oeb
if specialize is not None:
oeb = specialize(oeb) or oeb
# Read OEB Book into OEBBook # Read OEB Book into OEBBook
log('Parsing all content...') log('Parsing all content...')
if reader is None: if reader is None:

View File

@ -14,6 +14,7 @@ from io import BytesIO
from struct import pack from struct import pack
import cssutils import cssutils
from cssutils.css import CSSRule
from lxml import etree from lxml import etree
from calibre import isbytestring, force_unicode from calibre import isbytestring, force_unicode
@ -151,11 +152,23 @@ class KF8Writer(object):
for item in self.oeb.manifest: for item in self.oeb.manifest:
if item.media_type in OEB_STYLES: if item.media_type in OEB_STYLES:
sheet = self.data(item)
if not self.opts.expand_css and hasattr(item.data, 'cssText'): if not self.opts.expand_css and hasattr(item.data, 'cssText'):
condense_sheet(self.data(item)) condense_sheet(sheet)
data = self.data(item).cssText
sheets[item.href] = len(self.flows) sheets[item.href] = len(self.flows)
self.flows.append(force_unicode(data, 'utf-8')) self.flows.append(sheet)
def fix_import_rules(sheet):
changed = False
for rule in sheet.cssRules.rulesOfType(CSSRule.IMPORT_RULE):
if rule.href:
href = item.abshref(rule.href)
idx = sheets.get(href, None)
if idx is not None:
idx = to_ref(idx)
rule.href = 'kindle:flow:%s?mime=text/css'%idx
changed = True
return changed
for item in self.oeb.spine: for item in self.oeb.spine:
root = self.data(item) root = self.data(item)
@ -174,6 +187,10 @@ class KF8Writer(object):
if not raw or not raw.strip(): if not raw or not raw.strip():
extract(tag) extract(tag)
continue continue
sheet = cssutils.parseString(raw, validate=False)
if fix_import_rules(sheet):
raw = force_unicode(sheet.cssText, 'utf-8')
repl = etree.Element(XHTML('link'), type='text/css', repl = etree.Element(XHTML('link'), type='text/css',
rel='stylesheet') rel='stylesheet')
repl.tail='\n' repl.tail='\n'
@ -187,6 +204,16 @@ class KF8Writer(object):
for link in elems: for link in elems:
link.set('href', 'kindle:flow:%s?mime=text/css'%idx) link.set('href', 'kindle:flow:%s?mime=text/css'%idx)
for item in self.oeb.manifest:
if item.media_type in OEB_STYLES:
sheet = self.data(item)
if hasattr(sheet, 'cssRules'):
fix_import_rules(sheet)
for i, sheet in enumerate(tuple(self.flows)):
if hasattr(sheet, 'cssText'):
self.flows[i] = force_unicode(sheet.cssText, 'utf-8')
def extract_svg_into_flows(self): def extract_svg_into_flows(self):
images = {} images = {}

View File

@ -26,11 +26,11 @@ from calibre.ebooks.mobi import MobiError
from calibre.ebooks.mobi.reader.headers import MetadataHeader from calibre.ebooks.mobi.reader.headers import MetadataHeader
from calibre.ebooks.mobi.tweak import set_cover from calibre.ebooks.mobi.tweak import set_cover
from calibre.ebooks.oeb.base import ( from calibre.ebooks.oeb.base import (
serialize, OEB_DOCS, _css_logger, OEB_STYLES, OPF2_NS, DC11_NS, OPF, serialize, OEB_DOCS, OEB_STYLES, OPF2_NS, DC11_NS, OPF, Manifest,
rewrite_links, iterlinks, itercsslinks, urlquote, urlunquote) rewrite_links, iterlinks, itercsslinks, urlquote, urlunquote)
from calibre.ebooks.oeb.polish.errors import InvalidBook, DRMError from calibre.ebooks.oeb.polish.errors import InvalidBook, DRMError
from calibre.ebooks.oeb.polish.parsing import parse as parse_html_tweak from calibre.ebooks.oeb.polish.parsing import parse as parse_html_tweak
from calibre.ebooks.oeb.polish.utils import PositionFinder, CommentFinder, guess_type from calibre.ebooks.oeb.polish.utils import PositionFinder, CommentFinder, guess_type, parse_css
from calibre.ebooks.oeb.parse_utils import NotHTML, parse_html, RECOVER_PARSER from calibre.ebooks.oeb.parse_utils import NotHTML, parse_html, RECOVER_PARSER
from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
from calibre.utils.filenames import nlinks_file, hardlink_file from calibre.utils.filenames import nlinks_file, hardlink_file
@ -490,21 +490,8 @@ class Container(object): # {{{
return ans return ans
def parse_css(self, data, fname='<string>', is_declaration=False): def parse_css(self, data, fname='<string>', is_declaration=False):
from cssutils import CSSParser, log return parse_css(data, fname=fname, is_declaration=is_declaration, decode=self.decode, log_level=logging.WARNING,
log.setLevel(logging.WARN) css_preprocessor=(None if self.tweak_mode else self.css_preprocessor))
log.raiseExceptions = False
if isinstance(data, bytes):
data = self.decode(data)
if not self.tweak_mode:
data = self.css_preprocessor(data)
parser = CSSParser(loglevel=logging.WARNING,
# We dont care about @import rules
fetcher=lambda x: (None, None), log=_css_logger)
if is_declaration:
data = parser.parseStyle(data, validate=False)
else:
data = parser.parseString(data, href=fname, validate=False)
return data
def parsed(self, name): def parsed(self, name):
''' Return a parsed representation of the file specified by name. For ''' Return a parsed representation of the file specified by name. For
@ -1129,16 +1116,28 @@ def do_explode(path, dest):
return opf, obfuscated_fonts return opf, obfuscated_fonts
def opf_to_azw3(opf, outpath, log):
def opf_to_azw3(opf, outpath, container):
from calibre.ebooks.conversion.plumber import Plumber, create_oebbook from calibre.ebooks.conversion.plumber import Plumber, create_oebbook
plumber = Plumber(opf, outpath, log)
class Item(Manifest.Item):
def _parse_css(self, data):
# The default CSS parser used by oeb.base inserts the h namespace
# and resolves all @import rules. We dont want that.
return container.parse_css(data)
def specialize(oeb):
oeb.manifest.Item = Item
plumber = Plumber(opf, outpath, container.log)
plumber.setup_options() plumber.setup_options()
inp = plugin_for_input_format('azw3') inp = plugin_for_input_format('azw3')
outp = plugin_for_output_format('azw3') outp = plugin_for_output_format('azw3')
plumber.opts.mobi_passthrough = True plumber.opts.mobi_passthrough = True
oeb = create_oebbook(log, opf, plumber.opts) oeb = create_oebbook(container.log, opf, plumber.opts, specialize=specialize)
set_cover(oeb) set_cover(oeb)
outp.convert(oeb, outpath, inp, plumber.opts, log) outp.convert(oeb, outpath, inp, plumber.opts, container.log)
class AZW3Container(Container): class AZW3Container(Container):
@ -1205,7 +1204,7 @@ class AZW3Container(Container):
super(AZW3Container, self).commit(keep_parsed=keep_parsed) super(AZW3Container, self).commit(keep_parsed=keep_parsed)
if outpath is None: if outpath is None:
outpath = self.pathtoazw3 outpath = self.pathtoazw3
opf_to_azw3(self.name_path_map[self.opf_name], outpath, self.log) opf_to_azw3(self.name_path_map[self.opf_name], outpath, self)
@dynamic_property @dynamic_property
def path_to_ebook(self): def path_to_ebook(self):

View File

@ -15,7 +15,7 @@ from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks.oeb.base import serialize from calibre.ebooks.oeb.base import serialize
from calibre.ebooks.metadata.opf2 import metadata_to_opf from calibre.ebooks.metadata.opf2 import metadata_to_opf
from calibre.ebooks.oeb.polish.parsing import parse from calibre.ebooks.oeb.polish.parsing import parse
from calibre.ebooks.oeb.polish.container import OPF_NAMESPACES, opf_to_azw3 from calibre.ebooks.oeb.polish.container import OPF_NAMESPACES, opf_to_azw3, Container
from calibre.ebooks.oeb.polish.utils import guess_type from calibre.ebooks.oeb.polish.utils import guess_type
from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree, pretty_html_tree from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree, pretty_html_tree
from calibre.ebooks.oeb.polish.toc import TOC, create_ncx from calibre.ebooks.oeb.polish.toc import TOC, create_ncx
@ -87,7 +87,8 @@ def create_book(mi, path, fmt='epub', opf_name='metadata.opf', html_name='start.
for name, data in ((opf_name, opf), (html_name, HTML), (toc_name, ncx)): for name, data in ((opf_name, opf), (html_name, HTML), (toc_name, ncx)):
with open(name, 'wb') as f: with open(name, 'wb') as f:
f.write(data) f.write(data)
opf_to_azw3(opf_name, path, DevNull()) c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name, DevNull())
opf_to_azw3(opf_name, path, c)
else: else:
with ZipFile(path, 'w', compression=ZIP_STORED) as zf: with ZipFile(path, 'w', compression=ZIP_STORED) as zf:
zf.writestr('mimetype', b'application/epub+zip', compression=ZIP_STORED) zf.writestr('mimetype', b'application/epub+zip', compression=ZIP_STORED)

View File

@ -150,3 +150,24 @@ def lead_text(top_elem, num_words=10):
stack.extend(reversed(list((c, 'text') for c in elem.iterchildren('*')))) stack.extend(reversed(list((c, 'text') for c in elem.iterchildren('*'))))
return ' '.join(words[:num_words]) return ' '.join(words[:num_words])
def parse_css(data, fname='<string>', is_declaration=False, decode=None, log_level=None, css_preprocessor=None):
if log_level is None:
import logging
log_level = logging.WARNING
from cssutils import CSSParser, log
from calibre.ebooks.oeb.base import _css_logger
log.setLevel(log_level)
log.raiseExceptions = False
if isinstance(data, bytes):
data = data.decode('utf-8') if decode is None else decode(data)
if css_preprocessor is not None:
data = css_preprocessor(data)
parser = CSSParser(loglevel=log_level,
# We dont care about @import rules
fetcher=lambda x: (None, None), log=_css_logger)
if is_declaration:
data = parser.parseStyle(data, validate=False)
else:
data = parser.parseString(data, href=fname, validate=False)
return data