From 763ceacafb819a0647a85409f1ea50255d41493d Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Wed, 28 Jan 2009 12:04:22 -0500 Subject: [PATCH 1/3] Fix #1720. Fix HTML comments which aren't valid XML comments. --- src/calibre/ebooks/oeb/base.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 8b505ae4aa..0cfdec6355 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -353,9 +353,13 @@ class Manifest(object): try: data = etree.fromstring(data) except etree.XMLSyntaxError: + # TODO: Factor out HTML->XML coercion self.oeb.logger.warn('Parsing file %r as HTML' % self.href) data = html.fromstring(data) data.attrib.pop('xmlns', None) + for elem in data.iter(tag=etree.Comment): + if elem.text: + elem.text = elem.text.strip('-') data = etree.tostring(data, encoding=unicode) data = etree.fromstring(data) # Force into the XHTML namespace From 0c57045cf85de33a653bcda584373b2e4d81bdc3 Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Wed, 28 Jan 2009 18:27:09 -0500 Subject: [PATCH 2/3] Fix #1716. For real this time? --- src/calibre/ebooks/html.py | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html.py index 32601320d4..0199bedf19 100644 --- a/src/calibre/ebooks/html.py +++ b/src/calibre/ebooks/html.py @@ -561,28 +561,17 @@ class Processor(Parser): for elem in self.detected_chapters: text = u' '.join([t.strip() for t in elem.xpath('descendant::text()')]) self.log_info('\tDetected chapter: %s', text[:50]) - if self.opts.chapter_mark != 'none': - hr = etree.Element('hr') - if elem.getprevious() is None: - elem.getparent()[:0] = [hr] - elif elem.getparent() is not None: - insert = None - for i, c in enumerate(elem.getparent()): - if c is elem: - insert = i - break - elem.getparent()[insert:insert] = [hr] - if self.opts.chapter_mark != 'rule': - hr.set('style', 'width:0pt;page-break-before:always') - if self.opts.chapter_mark == 'both': - hr2 = etree.Element('hr') - hr2.tail = u'\u00a0' - p = hr.getparent() - i = p.index(hr) - p[i:i] = [hr2] - - - + chapter_mark = self.opts.chapter_mark + if chapter_mark == 'none': + continue + elif chapter_mark == 'rule': + mark = etree.Element('hr') + elif chapter_mark == 'pagebreak': + mark = etree.Element('div', style='page-break-after: always') + else: # chapter_mark == 'both': + mark = etree.Element('hr', style='page-break-before: always') + elem.addprevious(mark) + def save(self): style_path = os.path.splitext(os.path.basename(self.save_path()))[0] for i, sheet in enumerate([self.stylesheet, self.font_css, self.override_css]): From 2fac1ba61c0c199b479b6a84cbb43d85d2138720 Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Wed, 28 Jan 2009 18:51:34 -0500 Subject: [PATCH 3/3] Fix #1716. Ah, allow breaks to always always work under ADE. --- src/calibre/ebooks/html.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html.py index 0199bedf19..2de1b093ba 100644 --- a/src/calibre/ebooks/html.py +++ b/src/calibre/ebooks/html.py @@ -558,18 +558,20 @@ class Processor(Parser): def detect_chapters(self): self.detected_chapters = self.opts.chapter(self.root) + chapter_mark = self.opts.chapter_mark + page_break_before = 'display: block; page-break-before: always' + page_break_after = 'display: block; page-break-after: always' for elem in self.detected_chapters: text = u' '.join([t.strip() for t in elem.xpath('descendant::text()')]) self.log_info('\tDetected chapter: %s', text[:50]) - chapter_mark = self.opts.chapter_mark if chapter_mark == 'none': continue elif chapter_mark == 'rule': mark = etree.Element('hr') elif chapter_mark == 'pagebreak': - mark = etree.Element('div', style='page-break-after: always') + mark = etree.Element('div', style=page_break_after) else: # chapter_mark == 'both': - mark = etree.Element('hr', style='page-break-before: always') + mark = etree.Element('hr', style=page_break_before) elem.addprevious(mark) def save(self):