From e9cdd3d2402eed396df918a18d0d3014ce137c88 Mon Sep 17 00:00:00 2001 From: John Schember Date: Sat, 18 Dec 2010 09:22:16 -0500 Subject: [PATCH 1/3] Remove 2.1 tags. --- src/calibre/ebooks/fb2/fb2ml.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py index 5efc360f1f..89c12db103 100644 --- a/src/calibre/ebooks/fb2/fb2ml.py +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -297,18 +297,6 @@ class FB2MLizer(object): s_out, s_tags = self.handle_simple_tag('emphasis', tag_stack+tags) fb2_out += s_out tags += s_tags - elif tag in ('del', 'strike'): - s_out, s_tags = self.handle_simple_tag('strikethrough', tag_stack+tags) - fb2_out += s_out - tags += s_tags - elif tag == 'sub': - s_out, s_tags = self.handle_simple_tag('sub', tag_stack+tags) - fb2_out += s_out - tags += s_tags - elif tag == 'sup': - s_out, s_tags = self.handle_simple_tag('sup', tag_stack+tags) - fb2_out += s_out - tags += s_tags # Processes style information. if style['font-style'] == 'italic': @@ -319,10 +307,6 @@ class FB2MLizer(object): s_out, s_tags = self.handle_simple_tag('strong', tag_stack+tags) fb2_out += s_out tags += s_tags - elif style['text-decoration'] == 'line-through': - s_out, s_tags = self.handle_simple_tag('strikethrough', tag_stack+tags) - fb2_out += s_out - tags += s_tags # Process element text. if hasattr(elem_tree, 'text') and elem_tree.text: From a202b5bc0e58b03ac4be370301ea4550542a77b9 Mon Sep 17 00:00:00 2001 From: John Schember Date: Sat, 18 Dec 2010 20:17:32 -0500 Subject: [PATCH 2/3] Restore 2.1 features. --- src/calibre/ebooks/fb2/fb2ml.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py index 89c12db103..5efc360f1f 100644 --- a/src/calibre/ebooks/fb2/fb2ml.py +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -297,6 +297,18 @@ class FB2MLizer(object): s_out, s_tags = self.handle_simple_tag('emphasis', tag_stack+tags) fb2_out += s_out tags += s_tags + elif tag in ('del', 'strike'): + s_out, s_tags = self.handle_simple_tag('strikethrough', tag_stack+tags) + fb2_out += s_out + tags += s_tags + elif tag == 'sub': + s_out, s_tags = self.handle_simple_tag('sub', tag_stack+tags) + fb2_out += s_out + tags += s_tags + elif tag == 'sup': + s_out, s_tags = self.handle_simple_tag('sup', tag_stack+tags) + fb2_out += s_out + tags += s_tags # Processes style information. if style['font-style'] == 'italic': @@ -307,6 +319,10 @@ class FB2MLizer(object): s_out, s_tags = self.handle_simple_tag('strong', tag_stack+tags) fb2_out += s_out tags += s_tags + elif style['text-decoration'] == 'line-through': + s_out, s_tags = self.handle_simple_tag('strikethrough', tag_stack+tags) + fb2_out += s_out + tags += s_tags # Process element text. if hasattr(elem_tree, 'text') and elem_tree.text: From 0bc4a7a7f4e1dd72e52ff9adb518ee9eee6091ac Mon Sep 17 00:00:00 2001 From: John Schember Date: Sat, 18 Dec 2010 20:47:12 -0500 Subject: [PATCH 3/3] FB2 Output: Implement bug #7936: Sectionize TOC entries --- src/calibre/ebooks/fb2/fb2ml.py | 102 +++++++++++++++++++++---- src/calibre/ebooks/fb2/output.py | 18 ++--- src/calibre/gui2/convert/fb2_output.py | 4 +- src/calibre/gui2/convert/fb2_output.ui | 32 ++++---- 4 files changed, 113 insertions(+), 43 deletions(-) diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py index 5efc360f1f..90c88c3cd0 100644 --- a/src/calibre/ebooks/fb2/fb2ml.py +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -27,13 +27,10 @@ class FB2MLizer(object): ''' Todo: * Include more FB2 specific tags in the conversion. * Handle a tags. - * Figure out some way to turn oeb_book.toc items into
- <p> to allow for readers to generate toc from the document. ''' def __init__(self, log): self.log = log - self.image_hrefs = {} self.reset_state() def reset_state(self): @@ -43,17 +40,25 @@ class FB2MLizer(object): # in different directories. FB2 images are all in a flat layout so we rename all images # into a sequential numbering system to ensure there are no collisions between image names. self.image_hrefs = {} + # Mapping of toc items and their + self.toc = {} + # Used to see whether a new <section> needs to be opened + self.section_level = 0 def extract_content(self, oeb_book, opts): self.log.info('Converting XHTML to FB2 markup...') self.oeb_book = oeb_book self.opts = opts + self.reset_state() + + # Used for adding <section>s and <title>s to allow readers + # to generate toc from the document. + if self.opts.sectionize == 'toc': + self.create_flat_toc(self.oeb_book.toc, 1) return self.fb2mlize_spine() def fb2mlize_spine(self): - self.reset_state() - output = [self.fb2_header()] output.append(self.get_text()) output.append(self.fb2mlize_images()) @@ -66,13 +71,19 @@ class FB2MLizer(object): return u'<?xml version="1.0" encoding="UTF-8"?>' + output def clean_text(self, text): - text = re.sub(r'(?miu)<section>\s*</section>', '', text) - text = re.sub(r'(?miu)\s+</section>', '</section>', text) - text = re.sub(r'(?miu)</section><section>', '</section>\n\n<section>', text) - text = re.sub(r'(?miu)<p>\s*</p>', '', text) - text = re.sub(r'(?miu)\s+</p>', '</p>', text) - text = re.sub(r'(?miu)</p><p>', '</p>\n\n<p>', text) + text = re.sub(r'(?miu)\s*</p>', '</p>', text) + text = re.sub(r'(?miu)</p>\s*<p>', '</p>\n\n<p>', text) + + text = re.sub(r'(?miu)<title>\s*', '', text) + text = re.sub(r'(?miu)\s+', '', text) + + text = re.sub(r'(?miu)
\s*
', '', text) + text = re.sub(r'(?miu)\s*
', '\n', text) + text = re.sub(r'(?miu)\s*', '\n\n', text) + text = re.sub(r'(?miu)\s*
', '\n
', text) + text = re.sub(r'(?miu)
\s*', '
\n', text) + text = re.sub(r'(?miu)
', '
\n\n
', text) if self.opts.insert_blank_line: text = re.sub(r'(?miu)

', '

', text) @@ -144,12 +155,34 @@ class FB2MLizer(object): def get_text(self): text = [''] + + # Create main section if there are no others to create + if self.opts.sectionize == 'nothing': + text.append('
') + self.section_level += 1 + for item in self.oeb_book.spine: self.log.debug('Converting %s to FictionBook2 XML' % item.href) stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile) - text.append('
') + + # Start a
if we must sectionize each file or if the TOC references this page + page_section_open = False + if self.opts.sectionize == 'files' or self.toc.get(item.href) == 'page': + text.append('
') + page_section_open = True + self.section_level += 1 + text += self.dump_text(item.data.find(XHTML('body')), stylizer, item) + + if page_section_open: + text.append('
') + self.section_level -= 1 + + # Close any open sections + while self.section_level > 0: text.append('
') + self.section_level -= 1 + return ''.join(text) + '' def fb2mlize_images(self): @@ -184,6 +217,17 @@ class FB2MLizer(object): '%s.' % (item.href, e)) return ''.join(images) + def create_flat_toc(self, nodes, level): + for item in nodes: + href, mid, id = item.href.partition('#') + if not id: + self.toc[href] = 'page' + else: + if not self.toc.get(href, None): + self.toc[href] = {} + self.toc[href][id] = level + self.create_flat_toc(item.nodes, level + 1) + def ensure_p(self): if self.in_p: return [], [] @@ -254,10 +298,38 @@ class FB2MLizer(object): # First tag in tree tag = barename(elem_tree.tag) + # Convert TOC entries to s and add <section>s + if self.opts.sectionize == 'toc': + # A section cannot be a child of any other element than another section, + # so leave the tag alone if there are parents + if not tag_stack: + # There are two reasons to start a new section here: the TOC pointed to + # this page (then we use the first non-<body> on the page as a <title>), or + # the TOC pointed to a specific element + newlevel = 0 + toc_entry = self.toc.get(page.href, None) + if toc_entry == 'page': + if tag != 'body' and hasattr(elem_tree, 'text') and elem_tree.text: + newlevel = 1 + self.toc[page.href] = None + elif toc_entry and elem_tree.attrib.get('id', None): + newlevel = toc_entry.get(elem_tree.attrib.get('id', None), None) + + # Start a new section if necessary + if newlevel: + if not (newlevel > self.section_level): + fb2_out.append('</section>') + self.section_level -= 1 + fb2_out.append('<section>') + self.section_level += 1 + fb2_out.append('<title>') + tags.append('title') + if self.section_level == 0: + # If none of the prior processing made a section, make one now to be FB2 spec compliant + fb2_out.append('<section>') + self.section_level += 1 + # Process the XHTML tag if it needs to be converted to an FB2 tag. - if tag == 'h1' and self.opts.h1_to_title or tag == 'h2' and self.opts.h2_to_title or tag == 'h3' and self.opts.h3_to_title: - fb2_out.append('<title>') - tags.append('title') if tag == 'img': if elem_tree.attrib.get('src', None): # Only write the image tag if it is in the manifest. diff --git a/src/calibre/ebooks/fb2/output.py b/src/calibre/ebooks/fb2/output.py index 33714c6e6e..e8b50d6f77 100644 --- a/src/calibre/ebooks/fb2/output.py +++ b/src/calibre/ebooks/fb2/output.py @@ -16,15 +16,15 @@ class FB2Output(OutputFormatPlugin): file_type = 'fb2' options = set([ - OptionRecommendation(name='h1_to_title', - recommended_value=False, level=OptionRecommendation.LOW, - help=_('Wrap all h1 tags with fb2 title elements.')), - OptionRecommendation(name='h2_to_title', - recommended_value=False, level=OptionRecommendation.LOW, - help=_('Wrap all h2 tags with fb2 title elements.')), - OptionRecommendation(name='h3_to_title', - recommended_value=False, level=OptionRecommendation.LOW, - help=_('Wrap all h3 tags with fb2 title elements.')), + OptionRecommendation(name='sectionize', + recommended_value='files', level=OptionRecommendation.LOW, + choices=['toc', 'files', 'nothing'], + help=_('Specify the sectionization of elements. ' + 'A value of "nothing" turns the book into a single section. ' + 'A value of "files" turns each file into a separate section; use this if your device is having trouble. ' + 'A value of "Table of Contents" turns the entries in the Table of Contents into titles and creates sections; ' + 'if it fails, adjust the "Structure Detection" and/or "Table of Contents" settings ' + '(turn on "Force use of auto-generated Table of Contents).')), ]) def convert(self, oeb_book, output_path, input_plugin, opts, log): diff --git a/src/calibre/gui2/convert/fb2_output.py b/src/calibre/gui2/convert/fb2_output.py index 6b1497a9db..66296ee666 100644 --- a/src/calibre/gui2/convert/fb2_output.py +++ b/src/calibre/gui2/convert/fb2_output.py @@ -17,6 +17,8 @@ class PluginWidget(Widget, Ui_Form): ICON = I('mimetypes/fb2.png') def __init__(self, parent, get_option, get_help, db=None, book_id=None): - Widget.__init__(self, parent, ['h1_to_title', 'h2_to_title', 'h3_to_title']) + Widget.__init__(self, parent, ['sectionize']) self.db, self.book_id = db, book_id + for x in ('toc', 'files', 'nothing'): + self.opt_sectionize.addItem(x) self.initialize_options(get_option, get_help, db, book_id) diff --git a/src/calibre/gui2/convert/fb2_output.ui b/src/calibre/gui2/convert/fb2_output.ui index 436719aed4..891aa29857 100644 --- a/src/calibre/gui2/convert/fb2_output.ui +++ b/src/calibre/gui2/convert/fb2_output.ui @@ -14,7 +14,7 @@ <string>Form</string> </property> <layout class="QGridLayout" name="gridLayout"> - <item row="3" column="0"> + <item row="1" column="0"> <spacer name="verticalSpacer"> <property name="orientation"> <enum>Qt::Vertical</enum> @@ -28,23 +28,19 @@ </spacer> </item> <item row="0" column="0"> - <widget class="QCheckBox" name="opt_h1_to_title"> - <property name="text"> - <string>Wrap h1 tags with <title> elements</string> - </property> - </widget> - </item> - <item row="1" column="0"> - <widget class="QCheckBox" name="opt_h2_to_title"> - <property name="text"> - <string>Wrap h2 tags with <title> elements</string> - </property> - </widget> - </item> - <item row="2" column="0"> - <widget class="QCheckBox" name="opt_h3_to_title"> - <property name="text"> - <string>Wrap h3 tags with <title> elements</string> + <widget class="QLabel" name="label"> + <property name="text"> + <string>Sectionize:</string> + </property> + <property name="buddy"> + <cstring>opt_sectionize</cstring> + </property> + </widget> + </item> + <item row="0" column="1"> + <widget class="QComboBox" name="opt_sectionize"> + <property name="minimumContentsLength"> + <number>20</number> </property> </widget> </item>