diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py
index 5efc360f1f..90c88c3cd0 100644
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@@ -27,13 +27,10 @@ class FB2MLizer(object):
'''
Todo: * Include more FB2 specific tags in the conversion.
* Handle a tags.
- * Figure out some way to turn oeb_book.toc items into
- to allow for readers to generate toc from the document.
'''
def __init__(self, log):
self.log = log
- self.image_hrefs = {}
self.reset_state()
def reset_state(self):
@@ -43,17 +40,25 @@ class FB2MLizer(object):
# in different directories. FB2 images are all in a flat layout so we rename all images
# into a sequential numbering system to ensure there are no collisions between image names.
self.image_hrefs = {}
+ # Mapping of toc items and their
+ self.toc = {}
+ # Used to see whether a new needs to be opened
+ self.section_level = 0
def extract_content(self, oeb_book, opts):
self.log.info('Converting XHTML to FB2 markup...')
self.oeb_book = oeb_book
self.opts = opts
+ self.reset_state()
+
+ # Used for adding s and s to allow readers
+ # to generate toc from the document.
+ if self.opts.sectionize == 'toc':
+ self.create_flat_toc(self.oeb_book.toc, 1)
return self.fb2mlize_spine()
def fb2mlize_spine(self):
- self.reset_state()
-
output = [self.fb2_header()]
output.append(self.get_text())
output.append(self.fb2mlize_images())
@@ -66,13 +71,19 @@ class FB2MLizer(object):
return u'' + output
def clean_text(self, text):
- text = re.sub(r'(?miu)', '', text)
- text = re.sub(r'(?miu)\s+', '', text)
- text = re.sub(r'(?miu)
\n\n', text)
-
text = re.sub(r'(?miu)\s*
', '', text)
- text = re.sub(r'(?miu)\s+
', '', text)
- text = re.sub(r'(?miu)', '
\n\n', text)
+ text = re.sub(r'(?miu)\s*
', '', text)
+ text = re.sub(r'(?miu)\s*', '
\n\n', text)
+
+ text = re.sub(r'(?miu)
\s*', '', text)
+ text = re.sub(r'(?miu)\s+', '', text)
+
+ text = re.sub(r'(?miu)', '', text)
+ text = re.sub(r'(?miu)\s*', '\n', text)
+ text = re.sub(r'(?miu)\s*', '\n\n', text)
+ text = re.sub(r'(?miu)\s*', '\n', text)
+ text = re.sub(r'(?miu)\s*', '\n', text)
+ text = re.sub(r'(?miu)\n\n', text)
if self.opts.insert_blank_line:
text = re.sub(r'(?miu)', '', text)
@@ -144,12 +155,34 @@ class FB2MLizer(object):
def get_text(self):
text = ['']
+
+ # Create main section if there are no others to create
+ if self.opts.sectionize == 'nothing':
+ text.append('')
+ self.section_level += 1
+
for item in self.oeb_book.spine:
self.log.debug('Converting %s to FictionBook2 XML' % item.href)
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
- text.append('')
+
+ # Start a if we must sectionize each file or if the TOC references this page
+ page_section_open = False
+ if self.opts.sectionize == 'files' or self.toc.get(item.href) == 'page':
+ text.append('')
+ page_section_open = True
+ self.section_level += 1
+
text += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
+
+ if page_section_open:
+ text.append('')
+ self.section_level -= 1
+
+ # Close any open sections
+ while self.section_level > 0:
text.append('')
+ self.section_level -= 1
+
return ''.join(text) + ''
def fb2mlize_images(self):
@@ -184,6 +217,17 @@ class FB2MLizer(object):
'%s.' % (item.href, e))
return ''.join(images)
+ def create_flat_toc(self, nodes, level):
+ for item in nodes:
+ href, mid, id = item.href.partition('#')
+ if not id:
+ self.toc[href] = 'page'
+ else:
+ if not self.toc.get(href, None):
+ self.toc[href] = {}
+ self.toc[href][id] = level
+ self.create_flat_toc(item.nodes, level + 1)
+
def ensure_p(self):
if self.in_p:
return [], []
@@ -254,10 +298,38 @@ class FB2MLizer(object):
# First tag in tree
tag = barename(elem_tree.tag)
+ # Convert TOC entries to s and add s
+ if self.opts.sectionize == 'toc':
+ # A section cannot be a child of any other element than another section,
+ # so leave the tag alone if there are parents
+ if not tag_stack:
+ # There are two reasons to start a new section here: the TOC pointed to
+ # this page (then we use the first non- on the page as a ), or
+ # the TOC pointed to a specific element
+ newlevel = 0
+ toc_entry = self.toc.get(page.href, None)
+ if toc_entry == 'page':
+ if tag != 'body' and hasattr(elem_tree, 'text') and elem_tree.text:
+ newlevel = 1
+ self.toc[page.href] = None
+ elif toc_entry and elem_tree.attrib.get('id', None):
+ newlevel = toc_entry.get(elem_tree.attrib.get('id', None), None)
+
+ # Start a new section if necessary
+ if newlevel:
+ if not (newlevel > self.section_level):
+ fb2_out.append('')
+ self.section_level -= 1
+ fb2_out.append('')
+ self.section_level += 1
+ fb2_out.append('')
+ tags.append('title')
+ if self.section_level == 0:
+ # If none of the prior processing made a section, make one now to be FB2 spec compliant
+ fb2_out.append('')
+ self.section_level += 1
+
# Process the XHTML tag if it needs to be converted to an FB2 tag.
- if tag == 'h1' and self.opts.h1_to_title or tag == 'h2' and self.opts.h2_to_title or tag == 'h3' and self.opts.h3_to_title:
- fb2_out.append('')
- tags.append('title')
if tag == 'img':
if elem_tree.attrib.get('src', None):
# Only write the image tag if it is in the manifest.
diff --git a/src/calibre/ebooks/fb2/output.py b/src/calibre/ebooks/fb2/output.py
index 33714c6e6e..e8b50d6f77 100644
--- a/src/calibre/ebooks/fb2/output.py
+++ b/src/calibre/ebooks/fb2/output.py
@@ -16,15 +16,15 @@ class FB2Output(OutputFormatPlugin):
file_type = 'fb2'
options = set([
- OptionRecommendation(name='h1_to_title',
- recommended_value=False, level=OptionRecommendation.LOW,
- help=_('Wrap all h1 tags with fb2 title elements.')),
- OptionRecommendation(name='h2_to_title',
- recommended_value=False, level=OptionRecommendation.LOW,
- help=_('Wrap all h2 tags with fb2 title elements.')),
- OptionRecommendation(name='h3_to_title',
- recommended_value=False, level=OptionRecommendation.LOW,
- help=_('Wrap all h3 tags with fb2 title elements.')),
+ OptionRecommendation(name='sectionize',
+ recommended_value='files', level=OptionRecommendation.LOW,
+ choices=['toc', 'files', 'nothing'],
+ help=_('Specify the sectionization of elements. '
+ 'A value of "nothing" turns the book into a single section. '
+ 'A value of "files" turns each file into a separate section; use this if your device is having trouble. '
+ 'A value of "Table of Contents" turns the entries in the Table of Contents into titles and creates sections; '
+ 'if it fails, adjust the "Structure Detection" and/or "Table of Contents" settings '
+ '(turn on "Force use of auto-generated Table of Contents).')),
])
def convert(self, oeb_book, output_path, input_plugin, opts, log):
diff --git a/src/calibre/gui2/convert/fb2_output.py b/src/calibre/gui2/convert/fb2_output.py
index 6b1497a9db..66296ee666 100644
--- a/src/calibre/gui2/convert/fb2_output.py
+++ b/src/calibre/gui2/convert/fb2_output.py
@@ -17,6 +17,8 @@ class PluginWidget(Widget, Ui_Form):
ICON = I('mimetypes/fb2.png')
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
- Widget.__init__(self, parent, ['h1_to_title', 'h2_to_title', 'h3_to_title'])
+ Widget.__init__(self, parent, ['sectionize'])
self.db, self.book_id = db, book_id
+ for x in ('toc', 'files', 'nothing'):
+ self.opt_sectionize.addItem(x)
self.initialize_options(get_option, get_help, db, book_id)
diff --git a/src/calibre/gui2/convert/fb2_output.ui b/src/calibre/gui2/convert/fb2_output.ui
index 436719aed4..891aa29857 100644
--- a/src/calibre/gui2/convert/fb2_output.ui
+++ b/src/calibre/gui2/convert/fb2_output.ui
@@ -14,7 +14,7 @@
Form
- -
+
-
Qt::Vertical
@@ -28,23 +28,19 @@
-
-
-
- Wrap h1 tags with <title> elements
-
-
-
- -
-
-
- Wrap h2 tags with <title> elements
-
-
-
- -
-
-
- Wrap h3 tags with <title> elements
+
+
+ Sectionize:
+
+
+ opt_sectionize
+
+
+
+ -
+
+
+ 20