Misc. minor fixes

This commit is contained in:
Kovid Goyal 2009-04-25 08:26:58 -07:00
parent 316e55244a
commit 91bb71ed84
3 changed files with 17 additions and 25 deletions

View File

@ -91,7 +91,7 @@ class EPUBOutput(OutputFormatPlugin):
self.condense_ncx([os.path.join(tdir, x) for x in os.listdir(tdir)\
if x.endswith('.ncx')][0])
from calibre.epub import initialize_container
from calibre.ebooks.epub import initialize_container
epub = initialize_container(output_path, os.path.basename(opf))
epub.add_dir(tdir)
epub.close()
@ -136,7 +136,7 @@ class EPUBOutput(OutputFormatPlugin):
if 'cover' in g:
tp = self.TITLEPAGE_COVER%unquote(g['cover'].href)
id, href = m.generate('titlepage', 'titlepage.xhtml')
item = m.add(id, href, guess_type('t.xhtml'),
item = m.add(id, href, guess_type('t.xhtml')[0],
data=etree.fromstring(tp))
else:
item = self.default_cover()
@ -146,7 +146,8 @@ class EPUBOutput(OutputFormatPlugin):
if item is not None:
self.oeb.spine.insert(0, item, True)
self.oeb.guide.refs['cover'].href = item.href
self.oeb.guide.refs['titlepage'].href = item.href
if 'titlepage' in self.oeb.guide.refs:
self.oeb.guide.refs['titlepage'].href = item.href
@ -180,7 +181,7 @@ class EPUBOutput(OutputFormatPlugin):
body = body[0]
# Replace <br> that are children of <body> as ADE doesn't handle them
if hasattr(body, 'xpath'):
for br in body.xpath('./h:br'):
for br in XPath('./h:br')(body):
if br.getparent() is None:
continue
try:
@ -204,29 +205,29 @@ class EPUBOutput(OutputFormatPlugin):
if self.opts.output_profile.remove_object_tags:
for tag in root.xpath('//h:embed'):
for tag in XPath('//h:embed')(root):
tag.getparent().remove(tag)
for tag in root.xpath('//h:object'):
for tag in XPath('//h:object')(root):
if tag.get('type', '').lower().strip() in ('image/svg+xml',):
continue
tag.getparent().remove(tag)
for tag in root.xpath('//h:title|//h:style'):
for tag in XPath('//h:title|//h:style')(root):
if not tag.text:
tag.getparent().remove(tag)
for tag in root.xpath('//h:script'):
for tag in XPath('//h:script')(root):
if not tag.text and not tag.get('src', False):
tag.getparent().remove(tag)
for tag in root.xpath('//h:form'):
for tag in XPath('//h:form')(root):
tag.getparent().remove(tag)
for tag in root.xpath('//h:center'):
for tag in XPath('//h:center')(root):
tag.tag = XHTML('div')
tag.set('style', 'text-align:center')
# ADE can't handle &amp; in an img url
for tag in self.root.xpath('//h:img[@src]'):
for tag in XPath('//h:img[@src]')(root):
tag.set('src', tag.get('src', '').replace('&', ''))
stylesheet = self.oeb.manifest.hrefs['stylesheet.css']

View File

@ -298,6 +298,11 @@ class MobiReader(object):
self.log.debug('Parsing HTML...')
root = html.fromstring(self.processed_html)
if root.xpath('descendant::p/descendant::p'):
from lxml.html import soupparser
self.log.warning('Markup contains unclosed <p> tags, parsing using',
'BeatifulSoup')
root = soupparser.fromstring(self.processed_html)
self.upshift_markup(root)
guides = root.xpath('//guide')
guide = guides[0] if guides else None

View File

@ -14,10 +14,6 @@ class Clean(object):
from calibre.ebooks.oeb.base import urldefrag
self.oeb, self.log, self.opts = oeb, oeb.log, opts
protected_hrefs = set([])
if 'titlepage' in self.oeb.guide:
protected_hrefs.add(urldefrag(
self.oeb.guide['titlepage'].href)[0])
if 'cover' not in self.oeb.guide:
covers = []
for x in ('other.ms-coverimage-standard',
@ -35,20 +31,10 @@ class Clean(object):
self.log('Choosing %s:%s as the cover'%(ref.type, ref.href))
ref.type = 'cover'
self.oeb.guide.refs['cover'] = ref
protected_hrefs.add(urldefrag(ref.href)[0])
else:
protected_hrefs.add(urldefrag(self.oeb.guide.refs['cover'].href)[0])
for x in list(self.oeb.guide):
href = urldefrag(self.oeb.guide[x].href)[0]
if x.lower() not in ('cover', 'titlepage'):
try:
if href not in protected_hrefs:
item = self.oeb.manifest.hrefs[href]
if item not in self.oeb.spine:
self.oeb.manifest.remove(self.oeb.manifest.hrefs[href])
except KeyError:
pass
self.oeb.guide.remove(x)