diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html.py
index 47001a95fc..bb7081658e 100644
--- a/src/calibre/ebooks/html.py
+++ b/src/calibre/ebooks/html.py
@@ -558,31 +558,22 @@ class Processor(Parser):
def detect_chapters(self):
self.detected_chapters = self.opts.chapter(self.root)
+ chapter_mark = self.opts.chapter_mark
+ page_break_before = 'display: block; page-break-before: always'
+ page_break_after = 'display: block; page-break-after: always'
for elem in self.detected_chapters:
text = u' '.join([t.strip() for t in elem.xpath('descendant::text()')])
self.log_info('\tDetected chapter: %s', text[:50])
- if self.opts.chapter_mark != 'none':
- hr = etree.Element('hr')
- if elem.getprevious() is None:
- elem.getparent()[:0] = [hr]
- elif elem.getparent() is not None:
- insert = None
- for i, c in enumerate(elem.getparent()):
- if c is elem:
- insert = i
- break
- elem.getparent()[insert:insert] = [hr]
- if self.opts.chapter_mark != 'rule':
- hr.set('style', 'width:0pt;page-break-before:always')
- if self.opts.chapter_mark == 'both':
- hr2 = etree.Element('hr')
- hr2.tail = u'\u00a0'
- p = hr.getparent()
- i = p.index(hr)
- p[i:i] = [hr2]
-
-
-
+ if chapter_mark == 'none':
+ continue
+ elif chapter_mark == 'rule':
+ mark = etree.Element('hr')
+ elif chapter_mark == 'pagebreak':
+ mark = etree.Element('div', style=page_break_after)
+ else: # chapter_mark == 'both':
+ mark = etree.Element('hr', style=page_break_before)
+ elem.addprevious(mark)
+
def save(self):
style_path = os.path.splitext(os.path.basename(self.save_path()))[0]
for i, sheet in enumerate([self.stylesheet, self.font_css, self.override_css]):
diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py
index 8b505ae4aa..0cfdec6355 100644
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@@ -353,9 +353,13 @@ class Manifest(object):
try:
data = etree.fromstring(data)
except etree.XMLSyntaxError:
+ # TODO: Factor out HTML->XML coercion
self.oeb.logger.warn('Parsing file %r as HTML' % self.href)
data = html.fromstring(data)
data.attrib.pop('xmlns', None)
+ for elem in data.iter(tag=etree.Comment):
+ if elem.text:
+ elem.text = elem.text.strip('-')
data = etree.tostring(data, encoding=unicode)
data = etree.fromstring(data)
# Force into the XHTML namespace