Fix #1695 (Cover Pages in Penguin samples)

This commit is contained in:
Kovid Goyal 2009-01-26 00:40:17 -08:00
parent 4ce35e4fd7
commit 61e22407f8
3 changed files with 22 additions and 14 deletions

View File

@ -193,6 +193,8 @@ class HTMLProcessor(Processor, Rationalizer):
for tag in self.root.xpath('//script'): for tag in self.root.xpath('//script'):
if not tag.text and not tag.get('src', False): if not tag.text and not tag.get('src', False):
tag.getparent().remove(tag) tag.getparent().remove(tag)
def save(self): def save(self):
for meta in list(self.root.xpath('//meta')): for meta in list(self.root.xpath('//meta')):

View File

@ -417,39 +417,44 @@ class Parser(PreProcessor, LoggingInterface):
self.level = self.htmlfile.level self.level = self.htmlfile.level
for f in self.htmlfiles: for f in self.htmlfiles:
name = os.path.basename(f.path) name = os.path.basename(f.path)
name = os.path.splitext(name)[0] + '.xhtml'
if name in self.htmlfile_map.values(): if name in self.htmlfile_map.values():
name = os.path.splitext(name)[0] + '_cr_%d'%save_counter + os.path.splitext(name)[1] name = os.path.splitext(name)[0] + '_cr_%d'%save_counter + os.path.splitext(name)[1]
save_counter += 1 save_counter += 1
self.htmlfile_map[f.path] = name self.htmlfile_map[f.path] = name
self.parse_html() self.parse_html()
# Handle <image> tags inside embedded <svg>
# At least one source of EPUB files (Penguin) uses xlink:href
# without declaring the xlink namespace
for image in self.root.xpath('//image'):
for attr in image.attrib.keys():
if attr.endswith(':href'):
nhref = self.rewrite_links(image.get(attr))
image.set(attr, nhref)
self.root.rewrite_links(self.rewrite_links, resolve_base_href=False) self.root.rewrite_links(self.rewrite_links, resolve_base_href=False)
for bad in ('xmlns', 'lang', 'xml:lang'): # lxml also adds these attributes for XHTML documents, leading to duplicates for bad in ('xmlns', 'lang', 'xml:lang'): # lxml also adds these attributes for XHTML documents, leading to duplicates
if self.root.get(bad, None) is not None: if self.root.get(bad, None) is not None:
self.root.attrib.pop(bad) self.root.attrib.pop(bad)
def save_path(self): def save_path(self):
return os.path.join(self.tdir, self.htmlfile_map[self.htmlfile.path]) return os.path.join(self.tdir, self.htmlfile_map[self.htmlfile.path])
def declare_xhtml_namespace(self, match):
if not match.group('raw'):
return '<html xmlns="http://www.w3.org/1999/xhtml">'
raw = match.group('raw')
m = re.search(r'(?i)xmlns\s*=\s*[\'"](?P<uri>[^"\']*)[\'"]', raw)
if not m:
return '<html xmlns="http://www.w3.org/1999/xhtml" %s>'%raw
else:
return match.group().replace(m.group('uri'), "http://www.w3.org/1999/xhtml")
def save(self): def save(self):
''' '''
Save processed HTML into the content directory. Save processed HTML into the content directory.
Should be called after all HTML processing is finished. Should be called after all HTML processing is finished.
''' '''
self.root.set('xmlns', 'http://www.w3.org/1999/xhtml')
self.root.set('xmlns:xlink', 'http://www.w3.org/1999/xlink')
for svg in self.root.xpath('//svg'):
svg.set('xmlns', 'http://www.w3.org/2000/svg')
ans = tostring(self.root, pretty_print=self.opts.pretty_print) ans = tostring(self.root, pretty_print=self.opts.pretty_print)
ans = re.sub(r'(?i)<\s*html(?P<raw>\s+[^>]*){0,1}>', self.declare_xhtml_namespace, ans[:1000]) + ans[1000:]
ans = re.compile(r'<head>', re.IGNORECASE).sub('<head>\n\t<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />\n', ans[:1000])+ans[1000:] ans = re.compile(r'<head>', re.IGNORECASE).sub('<head>\n\t<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />\n', ans[:1000])+ans[1000:]
with open(self.save_path(), 'wb') as f: with open(self.save_path(), 'wb') as f:
f.write(ans) f.write(ans)
return f.name return f.name

View File

@ -46,9 +46,10 @@ class SVGRasterizer(object):
data = QByteArray(xml2str(elem)) data = QByteArray(xml2str(elem))
svg = QSvgRenderer(data) svg = QSvgRenderer(data)
size = svg.defaultSize() size = svg.defaultSize()
view_box = elem.get('viewBox', elem.get('viewbox', None))
if size.width() == 100 and size.height() == 100 \ if size.width() == 100 and size.height() == 100 \
and 'viewBox' in elem.attrib: and view_box is not None:
box = [float(x) for x in elem.attrib['viewBox'].split()] box = [float(x) for x in view_box.split()]
size.setWidth(box[2] - box[0]) size.setWidth(box[2] - box[0])
size.setHeight(box[3] - box[1]) size.setHeight(box[3] - box[1])
if width or height: if width or height: