Edit Book/Book polishing: Do not auto-resolve @import rules in AZW3 files. Also do not add a @namespace rule to all stylesheets that contain tag selectors.

This commit is contained in:
Kovid Goyal 2014-07-09 14:19:10 +05:30
parent a629e4b5a7
commit ac919b48ca
5 changed files with 78 additions and 28 deletions

View File

@ -1207,7 +1207,7 @@ def set_regex_wizard_callback(f):
regex_wizard_callback = f
def create_oebbook(log, path_or_stream, opts, reader=None,
encoding='utf-8', populate=True, for_regex_wizard=False):
encoding='utf-8', populate=True, for_regex_wizard=False, specialize=None):
'''
Create an OEBBook.
'''
@ -1219,6 +1219,8 @@ def create_oebbook(log, path_or_stream, opts, reader=None,
pretty_print=opts.pretty_print, input_encoding=encoding)
if not populate:
return oeb
if specialize is not None:
oeb = specialize(oeb) or oeb
# Read OEB Book into OEBBook
log('Parsing all content...')
if reader is None:

View File

@ -14,6 +14,7 @@ from io import BytesIO
from struct import pack
import cssutils
from cssutils.css import CSSRule
from lxml import etree
from calibre import isbytestring, force_unicode
@ -151,11 +152,23 @@ class KF8Writer(object):
for item in self.oeb.manifest:
if item.media_type in OEB_STYLES:
sheet = self.data(item)
if not self.opts.expand_css and hasattr(item.data, 'cssText'):
condense_sheet(self.data(item))
data = self.data(item).cssText
condense_sheet(sheet)
sheets[item.href] = len(self.flows)
self.flows.append(force_unicode(data, 'utf-8'))
self.flows.append(sheet)
def fix_import_rules(sheet):
changed = False
for rule in sheet.cssRules.rulesOfType(CSSRule.IMPORT_RULE):
if rule.href:
href = item.abshref(rule.href)
idx = sheets.get(href, None)
if idx is not None:
idx = to_ref(idx)
rule.href = 'kindle:flow:%s?mime=text/css'%idx
changed = True
return changed
for item in self.oeb.spine:
root = self.data(item)
@ -174,6 +187,10 @@ class KF8Writer(object):
if not raw or not raw.strip():
extract(tag)
continue
sheet = cssutils.parseString(raw, validate=False)
if fix_import_rules(sheet):
raw = force_unicode(sheet.cssText, 'utf-8')
repl = etree.Element(XHTML('link'), type='text/css',
rel='stylesheet')
repl.tail='\n'
@ -187,6 +204,16 @@ class KF8Writer(object):
for link in elems:
link.set('href', 'kindle:flow:%s?mime=text/css'%idx)
for item in self.oeb.manifest:
if item.media_type in OEB_STYLES:
sheet = self.data(item)
if hasattr(sheet, 'cssRules'):
fix_import_rules(sheet)
for i, sheet in enumerate(tuple(self.flows)):
if hasattr(sheet, 'cssText'):
self.flows[i] = force_unicode(sheet.cssText, 'utf-8')
def extract_svg_into_flows(self):
images = {}

View File

@ -26,11 +26,11 @@ from calibre.ebooks.mobi import MobiError
from calibre.ebooks.mobi.reader.headers import MetadataHeader
from calibre.ebooks.mobi.tweak import set_cover
from calibre.ebooks.oeb.base import (
serialize, OEB_DOCS, _css_logger, OEB_STYLES, OPF2_NS, DC11_NS, OPF,
serialize, OEB_DOCS, OEB_STYLES, OPF2_NS, DC11_NS, OPF, Manifest,
rewrite_links, iterlinks, itercsslinks, urlquote, urlunquote)
from calibre.ebooks.oeb.polish.errors import InvalidBook, DRMError
from calibre.ebooks.oeb.polish.parsing import parse as parse_html_tweak
from calibre.ebooks.oeb.polish.utils import PositionFinder, CommentFinder, guess_type
from calibre.ebooks.oeb.polish.utils import PositionFinder, CommentFinder, guess_type, parse_css
from calibre.ebooks.oeb.parse_utils import NotHTML, parse_html, RECOVER_PARSER
from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
from calibre.utils.filenames import nlinks_file, hardlink_file
@ -490,21 +490,8 @@ class Container(object): # {{{
return ans
def parse_css(self, data, fname='<string>', is_declaration=False):
from cssutils import CSSParser, log
log.setLevel(logging.WARN)
log.raiseExceptions = False
if isinstance(data, bytes):
data = self.decode(data)
if not self.tweak_mode:
data = self.css_preprocessor(data)
parser = CSSParser(loglevel=logging.WARNING,
# We dont care about @import rules
fetcher=lambda x: (None, None), log=_css_logger)
if is_declaration:
data = parser.parseStyle(data, validate=False)
else:
data = parser.parseString(data, href=fname, validate=False)
return data
return parse_css(data, fname=fname, is_declaration=is_declaration, decode=self.decode, log_level=logging.WARNING,
css_preprocessor=(None if self.tweak_mode else self.css_preprocessor))
def parsed(self, name):
''' Return a parsed representation of the file specified by name. For
@ -1129,16 +1116,28 @@ def do_explode(path, dest):
return opf, obfuscated_fonts
def opf_to_azw3(opf, outpath, log):
def opf_to_azw3(opf, outpath, container):
from calibre.ebooks.conversion.plumber import Plumber, create_oebbook
plumber = Plumber(opf, outpath, log)
class Item(Manifest.Item):
def _parse_css(self, data):
# The default CSS parser used by oeb.base inserts the h namespace
# and resolves all @import rules. We dont want that.
return container.parse_css(data)
def specialize(oeb):
oeb.manifest.Item = Item
plumber = Plumber(opf, outpath, container.log)
plumber.setup_options()
inp = plugin_for_input_format('azw3')
outp = plugin_for_output_format('azw3')
plumber.opts.mobi_passthrough = True
oeb = create_oebbook(log, opf, plumber.opts)
oeb = create_oebbook(container.log, opf, plumber.opts, specialize=specialize)
set_cover(oeb)
outp.convert(oeb, outpath, inp, plumber.opts, log)
outp.convert(oeb, outpath, inp, plumber.opts, container.log)
class AZW3Container(Container):
@ -1205,7 +1204,7 @@ class AZW3Container(Container):
super(AZW3Container, self).commit(keep_parsed=keep_parsed)
if outpath is None:
outpath = self.pathtoazw3
opf_to_azw3(self.name_path_map[self.opf_name], outpath, self.log)
opf_to_azw3(self.name_path_map[self.opf_name], outpath, self)
@dynamic_property
def path_to_ebook(self):

View File

@ -15,7 +15,7 @@ from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks.oeb.base import serialize
from calibre.ebooks.metadata.opf2 import metadata_to_opf
from calibre.ebooks.oeb.polish.parsing import parse
from calibre.ebooks.oeb.polish.container import OPF_NAMESPACES, opf_to_azw3
from calibre.ebooks.oeb.polish.container import OPF_NAMESPACES, opf_to_azw3, Container
from calibre.ebooks.oeb.polish.utils import guess_type
from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree, pretty_html_tree
from calibre.ebooks.oeb.polish.toc import TOC, create_ncx
@ -87,7 +87,8 @@ def create_book(mi, path, fmt='epub', opf_name='metadata.opf', html_name='start.
for name, data in ((opf_name, opf), (html_name, HTML), (toc_name, ncx)):
with open(name, 'wb') as f:
f.write(data)
opf_to_azw3(opf_name, path, DevNull())
c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name, DevNull())
opf_to_azw3(opf_name, path, c)
else:
with ZipFile(path, 'w', compression=ZIP_STORED) as zf:
zf.writestr('mimetype', b'application/epub+zip', compression=ZIP_STORED)

View File

@ -150,3 +150,24 @@ def lead_text(top_elem, num_words=10):
stack.extend(reversed(list((c, 'text') for c in elem.iterchildren('*'))))
return ' '.join(words[:num_words])
def parse_css(data, fname='<string>', is_declaration=False, decode=None, log_level=None, css_preprocessor=None):
if log_level is None:
import logging
log_level = logging.WARNING
from cssutils import CSSParser, log
from calibre.ebooks.oeb.base import _css_logger
log.setLevel(log_level)
log.raiseExceptions = False
if isinstance(data, bytes):
data = data.decode('utf-8') if decode is None else decode(data)
if css_preprocessor is not None:
data = css_preprocessor(data)
parser = CSSParser(loglevel=log_level,
# We dont care about @import rules
fetcher=lambda x: (None, None), log=_css_logger)
if is_declaration:
data = parser.parseStyle(data, validate=False)
else:
data = parser.parseString(data, href=fname, validate=False)
return data