From 17957bab849cbe5b35077afd9735852e39adbc90 Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Tue, 27 Jan 2009 23:13:19 -0500 Subject: [PATCH 1/3] Fix #1716 (LIT: Chapter mark switch ignored and rule (
) is displayed) --- src/calibre/ebooks/html.py | 32 ++++++++++---------------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html.py index 32601320d4..2e2a27e172 100644 --- a/src/calibre/ebooks/html.py +++ b/src/calibre/ebooks/html.py @@ -561,28 +561,16 @@ class Processor(Parser): for elem in self.detected_chapters: text = u' '.join([t.strip() for t in elem.xpath('descendant::text()')]) self.log_info('\tDetected chapter: %s', text[:50]) - if self.opts.chapter_mark != 'none': - hr = etree.Element('hr') - if elem.getprevious() is None: - elem.getparent()[:0] = [hr] - elif elem.getparent() is not None: - insert = None - for i, c in enumerate(elem.getparent()): - if c is elem: - insert = i - break - elem.getparent()[insert:insert] = [hr] - if self.opts.chapter_mark != 'rule': - hr.set('style', 'width:0pt;page-break-before:always') - if self.opts.chapter_mark == 'both': - hr2 = etree.Element('hr') - hr2.tail = u'\u00a0' - p = hr.getparent() - i = p.index(hr) - p[i:i] = [hr2] - - - + chapter_mark = self.opts.chapter_mark + if chapter_mark != 'none': + tag = 'hr' if chapter_mark != 'pagebreak' else 'br' + mark = etree.Element(tag) + elem.addprevious(mark) + if chapter_mark == 'both': + mark.set('style', 'page-break-before: always') + elif chapter_mark == 'pagebreak': + mark.set('style', 'page-break-after: always') + def save(self): style_path = os.path.splitext(os.path.basename(self.save_path()))[0] for i, sheet in enumerate([self.stylesheet, self.font_css, self.override_css]): From 65ca22dfe7f9278a631ae4a063a2527a226514c3 Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Wed, 28 Jan 2009 07:35:10 -0500 Subject: [PATCH 2/3] Fix fix to #1716 to produce more valid XHTML. --- src/calibre/ebooks/html.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html.py index 2e2a27e172..4a5cdbb514 100644 --- a/src/calibre/ebooks/html.py +++ b/src/calibre/ebooks/html.py @@ -562,14 +562,16 @@ class Processor(Parser): text = u' '.join([t.strip() for t in elem.xpath('descendant::text()')]) self.log_info('\tDetected chapter: %s', text[:50]) chapter_mark = self.opts.chapter_mark - if chapter_mark != 'none': - tag = 'hr' if chapter_mark != 'pagebreak' else 'br' - mark = etree.Element(tag) + if chapter_mark == 'pagebreak': + style = elem.get('style', '').split(';') + style = filter(None, map(lambda x: x.strip(), style)) + style.append('page-break-before: always') + elem.set('style', '; '.join(style)) + elif chapter_mark in ('rule', 'both'): + mark = etree.Element('hr') elem.addprevious(mark) if chapter_mark == 'both': mark.set('style', 'page-break-before: always') - elif chapter_mark == 'pagebreak': - mark.set('style', 'page-break-after: always') def save(self): style_path = os.path.splitext(os.path.basename(self.save_path()))[0] From 0f5d6073599a856fcc06e90638ddbd27b0698668 Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Wed, 28 Jan 2009 08:33:23 -0500 Subject: [PATCH 3/3] Fix #1694. Render HTML cover as last-resort to locate a cover image. --- src/calibre/ebooks/metadata/epub.py | 47 +++++++++++++++-------------- src/calibre/ebooks/oeb/base.py | 24 +++++++++------ 2 files changed, 39 insertions(+), 32 deletions(-) diff --git a/src/calibre/ebooks/metadata/epub.py b/src/calibre/ebooks/metadata/epub.py index a8c2105c02..360869cc9c 100644 --- a/src/calibre/ebooks/metadata/epub.py +++ b/src/calibre/ebooks/metadata/epub.py @@ -106,9 +106,11 @@ class CoverRenderer(QObject): WIDTH = 600 HEIGHT = 800 - def __init__(self, url, size, loop): + def __init__(self, path): + if QApplication.instance() is None: + QApplication([]) QObject.__init__(self) - self.loop = loop + self.loop = QEventLoop() self.page = QWebPage() pal = self.page.palette() pal.setBrush(QPalette.Background, Qt.white) @@ -117,33 +119,43 @@ class CoverRenderer(QObject): self.page.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff) self.page.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff) QObject.connect(self.page, SIGNAL('loadFinished(bool)'), self.render_html) - self.image_data = None + self._image_data = None self.rendered = False + url = QUrl.fromLocalFile(os.path.normpath(path)) self.page.mainFrame().load(url) def render_html(self, ok): - self.rendered = True try: if not ok: + self.rendered = True return - #size = self.page.mainFrame().contentsSize() - #width, height = fit_image(size.width(), size.height(), self.WIDTH, self.HEIGHT)[1:] - #self.page.setViewportSize(QSize(width, height)) image = QImage(self.page.viewportSize(), QImage.Format_ARGB32) image.setDotsPerMeterX(96*(100/2.54)) image.setDotsPerMeterY(96*(100/2.54)) painter = QPainter(image) self.page.mainFrame().render(painter) painter.end() - ba = QByteArray() buf = QBuffer(ba) buf.open(QBuffer.WriteOnly) image.save(buf, 'JPEG') - self.image_data = str(ba.data()) + self._image_data = str(ba.data()) finally: self.loop.exit(0) - + self.rendered = True + + def image_data(): + def fget(self): + if not self.rendered: + self.loop.exec_() + count = 0 + while count < 50 and not self.rendered: + time.sleep(0.1) + count += 1 + return self._image_data + return property(fget=fget) + image_data = image_data() + def get_cover(opf, opf_path, stream): spine = list(opf.spine_items()) @@ -155,20 +167,11 @@ def get_cover(opf, opf_path, stream): stream.seek(0) ZipFile(stream).extractall() opf_path = opf_path.replace('/', os.sep) - cpage = os.path.join(tdir, os.path.dirname(opf_path), *cpage.split('/')) + cpage = os.path.join(tdir, os.path.dirname(opf_path), cpage) if not os.path.exists(cpage): return - if QApplication.instance() is None: - QApplication([]) - url = QUrl.fromLocalFile(cpage) - loop = QEventLoop() - cr = CoverRenderer(url, os.stat(cpage).st_size, loop) - loop.exec_() - count = 0 - while count < 50 and not cr.rendered: - time.sleep(0.1) - count += 1 - return cr.image_data + cr = CoverRenderer(cpage) + return cr.image_data def get_metadata(stream, extract_cover=True): """ Return metadata as a :class:`MetaInformation` object """ diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 927592ac0e..8b505ae4aa 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -23,6 +23,8 @@ from calibre import LoggingInterface from calibre.translations.dynamic import translate from calibre.startup import get_lang from calibre.ebooks.oeb.entitydefs import ENTITYDEFS +from calibre.ebooks.metadata.epub import CoverRenderer +from calibre.ptempfile import TemporaryDirectory XML_NS = 'http://www.w3.org/XML/1998/namespace' XHTML_NS = 'http://www.w3.org/1999/xhtml' @@ -798,7 +800,6 @@ class TOC(object): class OEBBook(object): COVER_SVG_XP = XPath('h:body//svg:svg[position() = 1]') COVER_OBJECT_XP = XPath('h:body//h:object[@data][position() = 1]') - COVER_IMG_XP = XPath('h:body//h:img[@src][position() = 1]') def __init__(self, opfpath=None, container=None, encoding=None, logger=FauxLogger()): @@ -1055,6 +1056,17 @@ class OEBBook(object): if self._toc_from_html(opf): return self._toc_from_spine(opf) + def _cover_from_html(self, hcover): + with TemporaryDirectory('_html_cover') as tdir: + writer = DirWriter() + writer.dump(self, tdir) + path = os.path.join(tdir, hcover.href) + renderer = CoverRenderer(path) + data = renderer.image_data + id, href = self.manifest.generate('cover', 'cover.jpeg') + item = self.manifest.add(id, href, JPEG_MIME, data=data) + return item + def _locate_cover_image(self): if self.metadata.cover: id = str(self.metadata.cover[0]) @@ -1088,18 +1100,10 @@ class OEBBook(object): item = self.manifest.hrefs.get(href, None) if item is not None and item.media_type in OEB_IMAGES: return item - if self.COVER_IMG_XP(html): - img = self.COVER_IMG_XP(html)[0] - href = hcover.abshref(img.get('src')) - item = self.manifest.hrefs.get(href, None) - if item is not None and item.media_type in OEB_IMAGES: - return item - return None + return self._cover_from_html(hcover) def _ensure_cover_image(self): cover = self._locate_cover_image() - if not cover: - return if self.metadata.cover: self.metadata.cover[0].value = cover.id return