From acac5a479b925bc8b981b42e395b8fa9279517b0 Mon Sep 17 00:00:00 2001
From: John Schember
Date: Tue, 30 Nov 2010 18:27:09 -0500
Subject: [PATCH 1/3] Implement bug #7738: FB2 Output option to create section
per HTML file. Implement FB2 options to wrap h1-3 tags with title elments.
---
src/calibre/ebooks/fb2/fb2ml.py | 14 +++++++++++-
src/calibre/ebooks/fb2/output.py | 14 ++++++++++++
src/calibre/gui2/convert/fb2_output.py | 4 +++-
src/calibre/gui2/convert/fb2_output.ui | 30 +++++++++++++++++++++++++-
4 files changed, 59 insertions(+), 3 deletions(-)
diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py
index 3987ffa1b8..2a9a92612e 100644
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@@ -91,6 +91,10 @@ class FB2MLizer(object):
return u'\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode, pretty_print=True)
def clean_text(self, text):
+ text = re.sub(r'(?miu)', '', text)
+ text = re.sub(r'(?miu)\s+', '', text)
+ text = re.sub(r'(?miu)\n\n', text)
+
text = re.sub(r'(?miu)\s*
', '', text)
text = re.sub(r'(?miu)\s+
', '', text)
text = re.sub(r'(?miu)', '
\n\n', text)
@@ -166,11 +170,15 @@ class FB2MLizer(object):
def get_text(self):
text = []
- for item in self.oeb_book.spine:
+ for i, item in enumerate(self.oeb_book.spine):
+ if self.opts.sectionize_chapters_using_file_structure and i is not 0:
+ text.append('')
self.log.debug('Converting %s to FictionBook2 XML' % item.href)
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
text.append(self.add_page_anchor(item))
text += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
+ if self.opts.sectionize_chapters_using_file_structure and i is not len(self.oeb_book.spine) - 1:
+ text.append('')
return ''.join(text)
def fb2_body_footer(self):
@@ -258,6 +266,10 @@ class FB2MLizer(object):
if id_name:
fb2_text.append(self.get_anchor(page, id_name))
+ if tag == 'h1' and self.opts.h1_to_title or tag == 'h2' and self.opts.h2_to_title or tag == 'h3' and self.opts.h3_to_title:
+ fb2_text.append('
')
+ tags.append('title')
+
fb2_tag = TAG_MAP.get(tag, None)
if fb2_tag == 'p':
if 'p' in tag_stack+tags:
diff --git a/src/calibre/ebooks/fb2/output.py b/src/calibre/ebooks/fb2/output.py
index d6c7a25a90..bacaf0da91 100644
--- a/src/calibre/ebooks/fb2/output.py
+++ b/src/calibre/ebooks/fb2/output.py
@@ -25,6 +25,20 @@ class FB2Output(OutputFormatPlugin):
'WARNING: ' \
'This option is experimental. It can cause conversion ' \
'to fail. It can also produce unexpected output.')),
+ OptionRecommendation(name='sectionize_chapters_using_file_structure',
+ recommended_value=False, level=OptionRecommendation.LOW,
+ help=_('Try to turn chapters into individual sections using the ' \
+ 'internal structure of the ebook. This works well for EPUB ' \
+ 'books that have been internally split by chapter.')),
+ OptionRecommendation(name='h1_to_title',
+ recommended_value=False, level=OptionRecommendation.LOW,
+ help=_('Wrap all h1 tags with fb2 title elements.')),
+ OptionRecommendation(name='h2_to_title',
+ recommended_value=False, level=OptionRecommendation.LOW,
+ help=_('Wrap all h2 tags with fb2 title elements.')),
+ OptionRecommendation(name='h3_to_title',
+ recommended_value=False, level=OptionRecommendation.LOW,
+ help=_('Wrap all h3 tags with fb2 title elements.')),
])
def convert(self, oeb_book, output_path, input_plugin, opts, log):
diff --git a/src/calibre/gui2/convert/fb2_output.py b/src/calibre/gui2/convert/fb2_output.py
index a3cbe0e647..5d927146a5 100644
--- a/src/calibre/gui2/convert/fb2_output.py
+++ b/src/calibre/gui2/convert/fb2_output.py
@@ -17,6 +17,8 @@ class PluginWidget(Widget, Ui_Form):
ICON = I('mimetypes/fb2.png')
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
- Widget.__init__(self, parent, ['inline_toc', 'sectionize_chapters'])
+ Widget.__init__(self, parent, ['inline_toc', 'sectionize_chapters',
+ 'sectionize_chapters_using_file_structure', 'h1_to_title',
+ 'h2_to_title', 'h3_to_title'])
self.db, self.book_id = db, book_id
self.initialize_options(get_option, get_help, db, book_id)
diff --git a/src/calibre/gui2/convert/fb2_output.ui b/src/calibre/gui2/convert/fb2_output.ui
index a43a8b72ea..a90ecd615e 100644
--- a/src/calibre/gui2/convert/fb2_output.ui
+++ b/src/calibre/gui2/convert/fb2_output.ui
@@ -14,7 +14,7 @@
Form
- -
+
-
Qt::Vertical
@@ -41,6 +41,34 @@
+ -
+
+
+ Sectionize Chapters using file structure
+
+
+
+ -
+
+
+ Wrap h1 tags with <title> elements
+
+
+
+ -
+
+
+ Wrap h2 tags with <title> elements
+
+
+
+ -
+
+
+ Wrap h3 tags with <title> elements
+
+
+
From 37cde21c6d96b9b385f85d543227b0b3806db879 Mon Sep 17 00:00:00 2001
From: John Schember
Date: Tue, 30 Nov 2010 18:48:21 -0500
Subject: [PATCH 2/3] Fix bug #7745: PML output should ignore external links as
it is not support by the spec. Fix part of bug #7742: PML output extra %
sign.
---
src/calibre/ebooks/pml/pmlml.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py
index 560a132ce1..f97f74f4a0 100644
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@@ -216,7 +216,7 @@ class PMLMLizer(object):
w = '\\w'
width = elem.get('width')
if width:
- w += '="%s%%"' % width
+ w += '="%s"' % width
else:
w += '="50%"'
text.append(w)
@@ -252,8 +252,8 @@ class PMLMLizer(object):
if href not in self.link_hrefs.keys():
self.link_hrefs[href] = 'calibre_link-%s' % len(self.link_hrefs.keys())
href = '#%s' % self.link_hrefs[href]
- text.append('\\q="%s"' % href)
- tags.append('q')
+ text.append('\\q="%s"' % href)
+ tags.append('q')
# Anchor ids
id_name = elem.get('id')
From c38eb08018ca9eb404247de0ccc84bf73196ed20 Mon Sep 17 00:00:00 2001
From: John Schember
Date: Tue, 30 Nov 2010 19:04:26 -0500
Subject: [PATCH 3/3] PML Output: ensure \w always ends with a %.
---
src/calibre/ebooks/pml/pmlml.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py
index f97f74f4a0..ceb7f36124 100644
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@@ -216,6 +216,8 @@ class PMLMLizer(object):
w = '\\w'
width = elem.get('width')
if width:
+ if not width.endswith('%'):
+ width += '%'
w += '="%s"' % width
else:
w += '="50%"'