Misc. minor fixes

This commit is contained in:
Kovid Goyal 2009-04-25 08:26:58 -07:00
parent 316e55244a
commit 91bb71ed84
3 changed files with 17 additions and 25 deletions

View File

@ -91,7 +91,7 @@ class EPUBOutput(OutputFormatPlugin):
self.condense_ncx([os.path.join(tdir, x) for x in os.listdir(tdir)\ self.condense_ncx([os.path.join(tdir, x) for x in os.listdir(tdir)\
if x.endswith('.ncx')][0]) if x.endswith('.ncx')][0])
from calibre.epub import initialize_container from calibre.ebooks.epub import initialize_container
epub = initialize_container(output_path, os.path.basename(opf)) epub = initialize_container(output_path, os.path.basename(opf))
epub.add_dir(tdir) epub.add_dir(tdir)
epub.close() epub.close()
@ -136,7 +136,7 @@ class EPUBOutput(OutputFormatPlugin):
if 'cover' in g: if 'cover' in g:
tp = self.TITLEPAGE_COVER%unquote(g['cover'].href) tp = self.TITLEPAGE_COVER%unquote(g['cover'].href)
id, href = m.generate('titlepage', 'titlepage.xhtml') id, href = m.generate('titlepage', 'titlepage.xhtml')
item = m.add(id, href, guess_type('t.xhtml'), item = m.add(id, href, guess_type('t.xhtml')[0],
data=etree.fromstring(tp)) data=etree.fromstring(tp))
else: else:
item = self.default_cover() item = self.default_cover()
@ -146,6 +146,7 @@ class EPUBOutput(OutputFormatPlugin):
if item is not None: if item is not None:
self.oeb.spine.insert(0, item, True) self.oeb.spine.insert(0, item, True)
self.oeb.guide.refs['cover'].href = item.href self.oeb.guide.refs['cover'].href = item.href
if 'titlepage' in self.oeb.guide.refs:
self.oeb.guide.refs['titlepage'].href = item.href self.oeb.guide.refs['titlepage'].href = item.href
@ -180,7 +181,7 @@ class EPUBOutput(OutputFormatPlugin):
body = body[0] body = body[0]
# Replace <br> that are children of <body> as ADE doesn't handle them # Replace <br> that are children of <body> as ADE doesn't handle them
if hasattr(body, 'xpath'): if hasattr(body, 'xpath'):
for br in body.xpath('./h:br'): for br in XPath('./h:br')(body):
if br.getparent() is None: if br.getparent() is None:
continue continue
try: try:
@ -204,29 +205,29 @@ class EPUBOutput(OutputFormatPlugin):
if self.opts.output_profile.remove_object_tags: if self.opts.output_profile.remove_object_tags:
for tag in root.xpath('//h:embed'): for tag in XPath('//h:embed')(root):
tag.getparent().remove(tag) tag.getparent().remove(tag)
for tag in root.xpath('//h:object'): for tag in XPath('//h:object')(root):
if tag.get('type', '').lower().strip() in ('image/svg+xml',): if tag.get('type', '').lower().strip() in ('image/svg+xml',):
continue continue
tag.getparent().remove(tag) tag.getparent().remove(tag)
for tag in root.xpath('//h:title|//h:style'): for tag in XPath('//h:title|//h:style')(root):
if not tag.text: if not tag.text:
tag.getparent().remove(tag) tag.getparent().remove(tag)
for tag in root.xpath('//h:script'): for tag in XPath('//h:script')(root):
if not tag.text and not tag.get('src', False): if not tag.text and not tag.get('src', False):
tag.getparent().remove(tag) tag.getparent().remove(tag)
for tag in root.xpath('//h:form'): for tag in XPath('//h:form')(root):
tag.getparent().remove(tag) tag.getparent().remove(tag)
for tag in root.xpath('//h:center'): for tag in XPath('//h:center')(root):
tag.tag = XHTML('div') tag.tag = XHTML('div')
tag.set('style', 'text-align:center') tag.set('style', 'text-align:center')
# ADE can't handle &amp; in an img url # ADE can't handle &amp; in an img url
for tag in self.root.xpath('//h:img[@src]'): for tag in XPath('//h:img[@src]')(root):
tag.set('src', tag.get('src', '').replace('&', '')) tag.set('src', tag.get('src', '').replace('&', ''))
stylesheet = self.oeb.manifest.hrefs['stylesheet.css'] stylesheet = self.oeb.manifest.hrefs['stylesheet.css']

View File

@ -298,6 +298,11 @@ class MobiReader(object):
self.log.debug('Parsing HTML...') self.log.debug('Parsing HTML...')
root = html.fromstring(self.processed_html) root = html.fromstring(self.processed_html)
if root.xpath('descendant::p/descendant::p'):
from lxml.html import soupparser
self.log.warning('Markup contains unclosed <p> tags, parsing using',
'BeatifulSoup')
root = soupparser.fromstring(self.processed_html)
self.upshift_markup(root) self.upshift_markup(root)
guides = root.xpath('//guide') guides = root.xpath('//guide')
guide = guides[0] if guides else None guide = guides[0] if guides else None

View File

@ -14,10 +14,6 @@ class Clean(object):
from calibre.ebooks.oeb.base import urldefrag from calibre.ebooks.oeb.base import urldefrag
self.oeb, self.log, self.opts = oeb, oeb.log, opts self.oeb, self.log, self.opts = oeb, oeb.log, opts
protected_hrefs = set([])
if 'titlepage' in self.oeb.guide:
protected_hrefs.add(urldefrag(
self.oeb.guide['titlepage'].href)[0])
if 'cover' not in self.oeb.guide: if 'cover' not in self.oeb.guide:
covers = [] covers = []
for x in ('other.ms-coverimage-standard', for x in ('other.ms-coverimage-standard',
@ -35,20 +31,10 @@ class Clean(object):
self.log('Choosing %s:%s as the cover'%(ref.type, ref.href)) self.log('Choosing %s:%s as the cover'%(ref.type, ref.href))
ref.type = 'cover' ref.type = 'cover'
self.oeb.guide.refs['cover'] = ref self.oeb.guide.refs['cover'] = ref
protected_hrefs.add(urldefrag(ref.href)[0])
else:
protected_hrefs.add(urldefrag(self.oeb.guide.refs['cover'].href)[0])
for x in list(self.oeb.guide): for x in list(self.oeb.guide):
href = urldefrag(self.oeb.guide[x].href)[0] href = urldefrag(self.oeb.guide[x].href)[0]
if x.lower() not in ('cover', 'titlepage'): if x.lower() not in ('cover', 'titlepage'):
try:
if href not in protected_hrefs:
item = self.oeb.manifest.hrefs[href]
if item not in self.oeb.spine:
self.oeb.manifest.remove(self.oeb.manifest.hrefs[href])
except KeyError:
pass
self.oeb.guide.remove(x) self.oeb.guide.remove(x)