mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Edit Book/Book polishing: Do not auto-resolve @import rules in AZW3 files. Also do not add a @namespace rule to all stylesheets that contain tag selectors.
This commit is contained in:
parent
a629e4b5a7
commit
ac919b48ca
@ -1207,7 +1207,7 @@ def set_regex_wizard_callback(f):
|
||||
regex_wizard_callback = f
|
||||
|
||||
def create_oebbook(log, path_or_stream, opts, reader=None,
|
||||
encoding='utf-8', populate=True, for_regex_wizard=False):
|
||||
encoding='utf-8', populate=True, for_regex_wizard=False, specialize=None):
|
||||
'''
|
||||
Create an OEBBook.
|
||||
'''
|
||||
@ -1219,6 +1219,8 @@ def create_oebbook(log, path_or_stream, opts, reader=None,
|
||||
pretty_print=opts.pretty_print, input_encoding=encoding)
|
||||
if not populate:
|
||||
return oeb
|
||||
if specialize is not None:
|
||||
oeb = specialize(oeb) or oeb
|
||||
# Read OEB Book into OEBBook
|
||||
log('Parsing all content...')
|
||||
if reader is None:
|
||||
|
@ -14,6 +14,7 @@ from io import BytesIO
|
||||
from struct import pack
|
||||
|
||||
import cssutils
|
||||
from cssutils.css import CSSRule
|
||||
from lxml import etree
|
||||
|
||||
from calibre import isbytestring, force_unicode
|
||||
@ -151,11 +152,23 @@ class KF8Writer(object):
|
||||
|
||||
for item in self.oeb.manifest:
|
||||
if item.media_type in OEB_STYLES:
|
||||
sheet = self.data(item)
|
||||
if not self.opts.expand_css and hasattr(item.data, 'cssText'):
|
||||
condense_sheet(self.data(item))
|
||||
data = self.data(item).cssText
|
||||
condense_sheet(sheet)
|
||||
sheets[item.href] = len(self.flows)
|
||||
self.flows.append(force_unicode(data, 'utf-8'))
|
||||
self.flows.append(sheet)
|
||||
|
||||
def fix_import_rules(sheet):
|
||||
changed = False
|
||||
for rule in sheet.cssRules.rulesOfType(CSSRule.IMPORT_RULE):
|
||||
if rule.href:
|
||||
href = item.abshref(rule.href)
|
||||
idx = sheets.get(href, None)
|
||||
if idx is not None:
|
||||
idx = to_ref(idx)
|
||||
rule.href = 'kindle:flow:%s?mime=text/css'%idx
|
||||
changed = True
|
||||
return changed
|
||||
|
||||
for item in self.oeb.spine:
|
||||
root = self.data(item)
|
||||
@ -174,6 +187,10 @@ class KF8Writer(object):
|
||||
if not raw or not raw.strip():
|
||||
extract(tag)
|
||||
continue
|
||||
sheet = cssutils.parseString(raw, validate=False)
|
||||
if fix_import_rules(sheet):
|
||||
raw = force_unicode(sheet.cssText, 'utf-8')
|
||||
|
||||
repl = etree.Element(XHTML('link'), type='text/css',
|
||||
rel='stylesheet')
|
||||
repl.tail='\n'
|
||||
@ -187,6 +204,16 @@ class KF8Writer(object):
|
||||
for link in elems:
|
||||
link.set('href', 'kindle:flow:%s?mime=text/css'%idx)
|
||||
|
||||
for item in self.oeb.manifest:
|
||||
if item.media_type in OEB_STYLES:
|
||||
sheet = self.data(item)
|
||||
if hasattr(sheet, 'cssRules'):
|
||||
fix_import_rules(sheet)
|
||||
|
||||
for i, sheet in enumerate(tuple(self.flows)):
|
||||
if hasattr(sheet, 'cssText'):
|
||||
self.flows[i] = force_unicode(sheet.cssText, 'utf-8')
|
||||
|
||||
def extract_svg_into_flows(self):
|
||||
images = {}
|
||||
|
||||
|
@ -26,11 +26,11 @@ from calibre.ebooks.mobi import MobiError
|
||||
from calibre.ebooks.mobi.reader.headers import MetadataHeader
|
||||
from calibre.ebooks.mobi.tweak import set_cover
|
||||
from calibre.ebooks.oeb.base import (
|
||||
serialize, OEB_DOCS, _css_logger, OEB_STYLES, OPF2_NS, DC11_NS, OPF,
|
||||
serialize, OEB_DOCS, OEB_STYLES, OPF2_NS, DC11_NS, OPF, Manifest,
|
||||
rewrite_links, iterlinks, itercsslinks, urlquote, urlunquote)
|
||||
from calibre.ebooks.oeb.polish.errors import InvalidBook, DRMError
|
||||
from calibre.ebooks.oeb.polish.parsing import parse as parse_html_tweak
|
||||
from calibre.ebooks.oeb.polish.utils import PositionFinder, CommentFinder, guess_type
|
||||
from calibre.ebooks.oeb.polish.utils import PositionFinder, CommentFinder, guess_type, parse_css
|
||||
from calibre.ebooks.oeb.parse_utils import NotHTML, parse_html, RECOVER_PARSER
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
|
||||
from calibre.utils.filenames import nlinks_file, hardlink_file
|
||||
@ -490,21 +490,8 @@ class Container(object): # {{{
|
||||
return ans
|
||||
|
||||
def parse_css(self, data, fname='<string>', is_declaration=False):
|
||||
from cssutils import CSSParser, log
|
||||
log.setLevel(logging.WARN)
|
||||
log.raiseExceptions = False
|
||||
if isinstance(data, bytes):
|
||||
data = self.decode(data)
|
||||
if not self.tweak_mode:
|
||||
data = self.css_preprocessor(data)
|
||||
parser = CSSParser(loglevel=logging.WARNING,
|
||||
# We dont care about @import rules
|
||||
fetcher=lambda x: (None, None), log=_css_logger)
|
||||
if is_declaration:
|
||||
data = parser.parseStyle(data, validate=False)
|
||||
else:
|
||||
data = parser.parseString(data, href=fname, validate=False)
|
||||
return data
|
||||
return parse_css(data, fname=fname, is_declaration=is_declaration, decode=self.decode, log_level=logging.WARNING,
|
||||
css_preprocessor=(None if self.tweak_mode else self.css_preprocessor))
|
||||
|
||||
def parsed(self, name):
|
||||
''' Return a parsed representation of the file specified by name. For
|
||||
@ -1129,16 +1116,28 @@ def do_explode(path, dest):
|
||||
|
||||
return opf, obfuscated_fonts
|
||||
|
||||
def opf_to_azw3(opf, outpath, log):
|
||||
|
||||
def opf_to_azw3(opf, outpath, container):
|
||||
from calibre.ebooks.conversion.plumber import Plumber, create_oebbook
|
||||
plumber = Plumber(opf, outpath, log)
|
||||
|
||||
class Item(Manifest.Item):
|
||||
|
||||
def _parse_css(self, data):
|
||||
# The default CSS parser used by oeb.base inserts the h namespace
|
||||
# and resolves all @import rules. We dont want that.
|
||||
return container.parse_css(data)
|
||||
|
||||
def specialize(oeb):
|
||||
oeb.manifest.Item = Item
|
||||
|
||||
plumber = Plumber(opf, outpath, container.log)
|
||||
plumber.setup_options()
|
||||
inp = plugin_for_input_format('azw3')
|
||||
outp = plugin_for_output_format('azw3')
|
||||
plumber.opts.mobi_passthrough = True
|
||||
oeb = create_oebbook(log, opf, plumber.opts)
|
||||
oeb = create_oebbook(container.log, opf, plumber.opts, specialize=specialize)
|
||||
set_cover(oeb)
|
||||
outp.convert(oeb, outpath, inp, plumber.opts, log)
|
||||
outp.convert(oeb, outpath, inp, plumber.opts, container.log)
|
||||
|
||||
|
||||
class AZW3Container(Container):
|
||||
@ -1205,7 +1204,7 @@ class AZW3Container(Container):
|
||||
super(AZW3Container, self).commit(keep_parsed=keep_parsed)
|
||||
if outpath is None:
|
||||
outpath = self.pathtoazw3
|
||||
opf_to_azw3(self.name_path_map[self.opf_name], outpath, self.log)
|
||||
opf_to_azw3(self.name_path_map[self.opf_name], outpath, self)
|
||||
|
||||
@dynamic_property
|
||||
def path_to_ebook(self):
|
||||
|
@ -15,7 +15,7 @@ from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.ebooks.oeb.base import serialize
|
||||
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
||||
from calibre.ebooks.oeb.polish.parsing import parse
|
||||
from calibre.ebooks.oeb.polish.container import OPF_NAMESPACES, opf_to_azw3
|
||||
from calibre.ebooks.oeb.polish.container import OPF_NAMESPACES, opf_to_azw3, Container
|
||||
from calibre.ebooks.oeb.polish.utils import guess_type
|
||||
from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree, pretty_html_tree
|
||||
from calibre.ebooks.oeb.polish.toc import TOC, create_ncx
|
||||
@ -87,7 +87,8 @@ def create_book(mi, path, fmt='epub', opf_name='metadata.opf', html_name='start.
|
||||
for name, data in ((opf_name, opf), (html_name, HTML), (toc_name, ncx)):
|
||||
with open(name, 'wb') as f:
|
||||
f.write(data)
|
||||
opf_to_azw3(opf_name, path, DevNull())
|
||||
c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name, DevNull())
|
||||
opf_to_azw3(opf_name, path, c)
|
||||
else:
|
||||
with ZipFile(path, 'w', compression=ZIP_STORED) as zf:
|
||||
zf.writestr('mimetype', b'application/epub+zip', compression=ZIP_STORED)
|
||||
|
@ -150,3 +150,24 @@ def lead_text(top_elem, num_words=10):
|
||||
stack.extend(reversed(list((c, 'text') for c in elem.iterchildren('*'))))
|
||||
return ' '.join(words[:num_words])
|
||||
|
||||
def parse_css(data, fname='<string>', is_declaration=False, decode=None, log_level=None, css_preprocessor=None):
|
||||
if log_level is None:
|
||||
import logging
|
||||
log_level = logging.WARNING
|
||||
from cssutils import CSSParser, log
|
||||
from calibre.ebooks.oeb.base import _css_logger
|
||||
log.setLevel(log_level)
|
||||
log.raiseExceptions = False
|
||||
if isinstance(data, bytes):
|
||||
data = data.decode('utf-8') if decode is None else decode(data)
|
||||
if css_preprocessor is not None:
|
||||
data = css_preprocessor(data)
|
||||
parser = CSSParser(loglevel=log_level,
|
||||
# We dont care about @import rules
|
||||
fetcher=lambda x: (None, None), log=_css_logger)
|
||||
if is_declaration:
|
||||
data = parser.parseStyle(data, validate=False)
|
||||
else:
|
||||
data = parser.parseString(data, href=fname, validate=False)
|
||||
return data
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user