mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 18:54:09 -04:00
Fix #1695 (Cover Pages in Penguin samples)
This commit is contained in:
parent
4ce35e4fd7
commit
61e22407f8
@ -194,6 +194,8 @@ class HTMLProcessor(Processor, Rationalizer):
|
||||
if not tag.text and not tag.get('src', False):
|
||||
tag.getparent().remove(tag)
|
||||
|
||||
|
||||
|
||||
def save(self):
|
||||
for meta in list(self.root.xpath('//meta')):
|
||||
meta.getparent().remove(meta)
|
||||
|
@ -417,39 +417,44 @@ class Parser(PreProcessor, LoggingInterface):
|
||||
self.level = self.htmlfile.level
|
||||
for f in self.htmlfiles:
|
||||
name = os.path.basename(f.path)
|
||||
name = os.path.splitext(name)[0] + '.xhtml'
|
||||
if name in self.htmlfile_map.values():
|
||||
name = os.path.splitext(name)[0] + '_cr_%d'%save_counter + os.path.splitext(name)[1]
|
||||
save_counter += 1
|
||||
self.htmlfile_map[f.path] = name
|
||||
|
||||
self.parse_html()
|
||||
# Handle <image> tags inside embedded <svg>
|
||||
# At least one source of EPUB files (Penguin) uses xlink:href
|
||||
# without declaring the xlink namespace
|
||||
for image in self.root.xpath('//image'):
|
||||
for attr in image.attrib.keys():
|
||||
if attr.endswith(':href'):
|
||||
nhref = self.rewrite_links(image.get(attr))
|
||||
image.set(attr, nhref)
|
||||
|
||||
self.root.rewrite_links(self.rewrite_links, resolve_base_href=False)
|
||||
for bad in ('xmlns', 'lang', 'xml:lang'): # lxml also adds these attributes for XHTML documents, leading to duplicates
|
||||
if self.root.get(bad, None) is not None:
|
||||
self.root.attrib.pop(bad)
|
||||
|
||||
|
||||
|
||||
def save_path(self):
|
||||
return os.path.join(self.tdir, self.htmlfile_map[self.htmlfile.path])
|
||||
|
||||
def declare_xhtml_namespace(self, match):
|
||||
if not match.group('raw'):
|
||||
return '<html xmlns="http://www.w3.org/1999/xhtml">'
|
||||
raw = match.group('raw')
|
||||
m = re.search(r'(?i)xmlns\s*=\s*[\'"](?P<uri>[^"\']*)[\'"]', raw)
|
||||
if not m:
|
||||
return '<html xmlns="http://www.w3.org/1999/xhtml" %s>'%raw
|
||||
else:
|
||||
return match.group().replace(m.group('uri'), "http://www.w3.org/1999/xhtml")
|
||||
|
||||
def save(self):
|
||||
'''
|
||||
Save processed HTML into the content directory.
|
||||
Should be called after all HTML processing is finished.
|
||||
'''
|
||||
ans = tostring(self.root, pretty_print=self.opts.pretty_print)
|
||||
ans = re.sub(r'(?i)<\s*html(?P<raw>\s+[^>]*){0,1}>', self.declare_xhtml_namespace, ans[:1000]) + ans[1000:]
|
||||
ans = re.compile(r'<head>', re.IGNORECASE).sub('<head>\n\t<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />\n', ans[:1000])+ans[1000:]
|
||||
self.root.set('xmlns', 'http://www.w3.org/1999/xhtml')
|
||||
self.root.set('xmlns:xlink', 'http://www.w3.org/1999/xlink')
|
||||
for svg in self.root.xpath('//svg'):
|
||||
svg.set('xmlns', 'http://www.w3.org/2000/svg')
|
||||
|
||||
ans = tostring(self.root, pretty_print=self.opts.pretty_print)
|
||||
ans = re.compile(r'<head>', re.IGNORECASE).sub('<head>\n\t<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />\n', ans[:1000])+ans[1000:]
|
||||
with open(self.save_path(), 'wb') as f:
|
||||
f.write(ans)
|
||||
return f.name
|
||||
|
@ -46,9 +46,10 @@ class SVGRasterizer(object):
|
||||
data = QByteArray(xml2str(elem))
|
||||
svg = QSvgRenderer(data)
|
||||
size = svg.defaultSize()
|
||||
view_box = elem.get('viewBox', elem.get('viewbox', None))
|
||||
if size.width() == 100 and size.height() == 100 \
|
||||
and 'viewBox' in elem.attrib:
|
||||
box = [float(x) for x in elem.attrib['viewBox'].split()]
|
||||
and view_box is not None:
|
||||
box = [float(x) for x in view_box.split()]
|
||||
size.setWidth(box[2] - box[0])
|
||||
size.setHeight(box[3] - box[1])
|
||||
if width or height:
|
||||
|
Loading…
x
Reference in New Issue
Block a user