HTML Input: Fix handling of @import rules in stylesheets nested more than one level deep. Fixes #1930922 [Incorrect interpretation of CSS @import](https://bugs.launchpad.net/calibre/+bug/1930922)

This commit is contained in:
Kovid Goyal 2021-06-08 11:54:00 +05:30
parent 0818d3a110
commit 9040501684
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 24 additions and 29 deletions

View File

@ -177,6 +177,7 @@ class HTMLInput(InputFormatPlugin):
self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
self.urldefrag = urldefrag
self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME
self.stylesheets_to_process = []
self.log('Rewriting HTML links')
for f in filelist:
@ -190,15 +191,14 @@ class HTMLInput(InputFormatPlugin):
item = oeb.manifest.hrefs[urlnormalize(href)]
rewrite_links(item.data, partial(self.resource_adder, base=dpath))
for item in oeb.manifest.values():
while self.stylesheets_to_process:
sheet = self.stylesheets_to_process.pop()
css_parser.replaceUrls(sheet.data, partial(self.resource_adder, base=sheet.html_input_dirpath))
for item in oeb.manifest:
if item.media_type in self.OEB_STYLES:
dpath = None
for path, href in self.added_resources.items():
if href == item.href:
dpath = os.path.dirname(path)
break
css_parser.replaceUrls(item.data,
partial(self.resource_adder, base=dpath))
item.resolve_css_imports = True
item.override_css_fetch = None
item.reparse_css()
toc = self.oeb.toc
self.oeb.auto_generated_toc = True
@ -272,10 +272,11 @@ class HTMLInput(InputFormatPlugin):
if not self.is_case_sensitive(tempfile.gettempdir()):
link = link.lower()
if link not in self.added_resources:
guessed = self.guess_type(os.path.basename(link))[0]
media_type = guessed or self.BINARY_MIME
is_stylesheet = media_type in self.OEB_STYLES
bhref = os.path.basename(link)
id, href = self.oeb.manifest.generate(id='added', href=sanitize_file_name(bhref))
guessed = self.guess_type(href)[0]
media_type = guessed or self.BINARY_MIME
if media_type == 'text/plain':
self.log.warn('Ignoring link to text file %r'%link_)
return None
@ -289,7 +290,7 @@ class HTMLInput(InputFormatPlugin):
if img:
media_type = self.guess_type('dummy.'+img)[0] or self.BINARY_MIME
self.oeb.log.debug('Added', link)
self.oeb.log.debug('Added', link, 'with href:', href)
self.oeb.container = self.DirContainer(os.path.dirname(link),
self.oeb.log, ignore_opf=True)
# Load into memory
@ -300,9 +301,11 @@ class HTMLInput(InputFormatPlugin):
if isinstance(bhref, unicode_type):
bhref = bhref.encode('utf-8')
item.html_input_href = as_unicode(quote(bhref))
if guessed in self.OEB_STYLES:
item.override_css_fetch = partial(
self.css_import_handler, os.path.dirname(link))
if is_stylesheet:
item.html_input_dirpath = os.path.dirname(link)
item.resolve_css_imports = False
item.override_css_fetch = lambda url: (None, '')
self.stylesheets_to_process.append(item)
item.data
self.added_resources[link] = href
@ -310,16 +313,3 @@ class HTMLInput(InputFormatPlugin):
if frag:
nlink = '#'.join((nlink, frag))
return nlink
def css_import_handler(self, base, href):
link, frag = self.link_to_local_path(href, base=base)
if link is None or not os.access(link, os.R_OK) or os.path.isdir(link):
return None, None
try:
with open(link, 'rb') as f:
raw = f.read().decode('utf-8', 'replace')
raw = self.oeb.css_preprocessor(raw, add_namespace=False)
except:
self.log.exception('Failed to read CSS file: %r'%link)
return None, None
return None, raw

View File

@ -279,7 +279,7 @@ def rewrite_links(root, link_repl_func, resolve_base_href=False):
el.attrib[attrib] = new
parser = CSSParser(raiseExceptions=False, log=_css_logger,
fetcher=lambda x:(None, None))
fetcher=lambda x:(None, ''))
for el in root.iter(etree.Element):
try:
tag = el.tag
@ -922,6 +922,7 @@ class Manifest(object):
self.media_type = media_type
self.fallback = fallback
self.override_css_fetch = None
self.resolve_css_imports = True
self.spine_position = None
self.linear = True
if loader is None and data is None:
@ -986,6 +987,7 @@ class Manifest(object):
fetcher=self.override_css_fetch or self._fetch_css,
log=_css_logger)
data = parser.parseString(data, href=self.href, validate=False)
if self.resolve_css_imports:
data = resolveImports(data)
for rule in tuple(data.cssRules.rulesOfType(CSSRule.PAGE_RULE)):
data.cssRules.remove(rule)
@ -1054,6 +1056,9 @@ class Manifest(object):
def data(self):
self._data = None
def reparse_css(self):
self._data = self._parse_css(str(self))
def unload_data_from_memory(self, memory=None):
if isinstance(self._data, bytes):
if memory is None: