From 91bb71ed8467cf9a5608b27c4d505141caa87a21 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 25 Apr 2009 08:26:58 -0700 Subject: [PATCH] Misc. minor fixes --- src/calibre/ebooks/epub/output.py | 23 +++++++++++----------- src/calibre/ebooks/mobi/reader.py | 5 +++++ src/calibre/ebooks/oeb/transforms/guide.py | 14 ------------- 3 files changed, 17 insertions(+), 25 deletions(-) diff --git a/src/calibre/ebooks/epub/output.py b/src/calibre/ebooks/epub/output.py index a43ca4e5e3..1b37f054b0 100644 --- a/src/calibre/ebooks/epub/output.py +++ b/src/calibre/ebooks/epub/output.py @@ -91,7 +91,7 @@ class EPUBOutput(OutputFormatPlugin): self.condense_ncx([os.path.join(tdir, x) for x in os.listdir(tdir)\ if x.endswith('.ncx')][0]) - from calibre.epub import initialize_container + from calibre.ebooks.epub import initialize_container epub = initialize_container(output_path, os.path.basename(opf)) epub.add_dir(tdir) epub.close() @@ -136,7 +136,7 @@ class EPUBOutput(OutputFormatPlugin): if 'cover' in g: tp = self.TITLEPAGE_COVER%unquote(g['cover'].href) id, href = m.generate('titlepage', 'titlepage.xhtml') - item = m.add(id, href, guess_type('t.xhtml'), + item = m.add(id, href, guess_type('t.xhtml')[0], data=etree.fromstring(tp)) else: item = self.default_cover() @@ -146,7 +146,8 @@ class EPUBOutput(OutputFormatPlugin): if item is not None: self.oeb.spine.insert(0, item, True) self.oeb.guide.refs['cover'].href = item.href - self.oeb.guide.refs['titlepage'].href = item.href + if 'titlepage' in self.oeb.guide.refs: + self.oeb.guide.refs['titlepage'].href = item.href @@ -180,7 +181,7 @@ class EPUBOutput(OutputFormatPlugin): body = body[0] # Replace
that are children of as ADE doesn't handle them if hasattr(body, 'xpath'): - for br in body.xpath('./h:br'): + for br in XPath('./h:br')(body): if br.getparent() is None: continue try: @@ -204,29 +205,29 @@ class EPUBOutput(OutputFormatPlugin): if self.opts.output_profile.remove_object_tags: - for tag in root.xpath('//h:embed'): + for tag in XPath('//h:embed')(root): tag.getparent().remove(tag) - for tag in root.xpath('//h:object'): + for tag in XPath('//h:object')(root): if tag.get('type', '').lower().strip() in ('image/svg+xml',): continue tag.getparent().remove(tag) - for tag in root.xpath('//h:title|//h:style'): + for tag in XPath('//h:title|//h:style')(root): if not tag.text: tag.getparent().remove(tag) - for tag in root.xpath('//h:script'): + for tag in XPath('//h:script')(root): if not tag.text and not tag.get('src', False): tag.getparent().remove(tag) - for tag in root.xpath('//h:form'): + for tag in XPath('//h:form')(root): tag.getparent().remove(tag) - for tag in root.xpath('//h:center'): + for tag in XPath('//h:center')(root): tag.tag = XHTML('div') tag.set('style', 'text-align:center') # ADE can't handle & in an img url - for tag in self.root.xpath('//h:img[@src]'): + for tag in XPath('//h:img[@src]')(root): tag.set('src', tag.get('src', '').replace('&', '')) stylesheet = self.oeb.manifest.hrefs['stylesheet.css'] diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index 38de3476d1..25b4114cc2 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -298,6 +298,11 @@ class MobiReader(object): self.log.debug('Parsing HTML...') root = html.fromstring(self.processed_html) + if root.xpath('descendant::p/descendant::p'): + from lxml.html import soupparser + self.log.warning('Markup contains unclosed

tags, parsing using', + 'BeatifulSoup') + root = soupparser.fromstring(self.processed_html) self.upshift_markup(root) guides = root.xpath('//guide') guide = guides[0] if guides else None diff --git a/src/calibre/ebooks/oeb/transforms/guide.py b/src/calibre/ebooks/oeb/transforms/guide.py index dc7123446b..aaeba67d80 100644 --- a/src/calibre/ebooks/oeb/transforms/guide.py +++ b/src/calibre/ebooks/oeb/transforms/guide.py @@ -14,10 +14,6 @@ class Clean(object): from calibre.ebooks.oeb.base import urldefrag self.oeb, self.log, self.opts = oeb, oeb.log, opts - protected_hrefs = set([]) - if 'titlepage' in self.oeb.guide: - protected_hrefs.add(urldefrag( - self.oeb.guide['titlepage'].href)[0]) if 'cover' not in self.oeb.guide: covers = [] for x in ('other.ms-coverimage-standard', @@ -35,20 +31,10 @@ class Clean(object): self.log('Choosing %s:%s as the cover'%(ref.type, ref.href)) ref.type = 'cover' self.oeb.guide.refs['cover'] = ref - protected_hrefs.add(urldefrag(ref.href)[0]) - else: - protected_hrefs.add(urldefrag(self.oeb.guide.refs['cover'].href)[0]) for x in list(self.oeb.guide): href = urldefrag(self.oeb.guide[x].href)[0] if x.lower() not in ('cover', 'titlepage'): - try: - if href not in protected_hrefs: - item = self.oeb.manifest.hrefs[href] - if item not in self.oeb.spine: - self.oeb.manifest.remove(self.oeb.manifest.hrefs[href]) - except KeyError: - pass self.oeb.guide.remove(x)