FB2 Output: Experimental option to break book into sections based upon chapters.

This commit is contained in:
John Schember 2010-08-25 08:24:43 -04:00
parent 803d580a93
commit e8a0d60620
5 changed files with 24 additions and 3 deletions

View File

@ -28,3 +28,4 @@ nbproject/
*.userprefs
.project
.pydevproject
.settings/

View File

@ -86,10 +86,12 @@ class FB2MLizer(object):
output.append(self.fb2_footer())
output = ''.join(output).replace(u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk', self.get_toc())
output = self.clean_text(output)
if self.opts.sectionize_chapters:
output = self.sectionize_chapters(output)
return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode, pretty_print=True)
def clean_text(self, text):
text = re.sub('<p>[ ]*</p>', '', text)
text = re.sub('<p>\s*</p>', '', text)
return text
@ -149,6 +151,11 @@ class FB2MLizer(object):
self.oeb.warn('Ignoring toc item: %s not found in document.' % item)
return ''.join(toc)
def sectionize_chapters(self, text):
text = re.sub(r'(?imsu)(?P<anchor><a\s+id="calibre_link-\d+"\s*/>)\s*(?P<strong>(<p>)*\s*<strong>.+?</strong>\s*(</p>)*)', lambda mo: '</section><section>%s<title>%s</title>' % (mo.group('anchor'), mo.group('strong')), text)
text = re.sub(r'(?imsu)<p>\s*(?P<anchor><a\s+id="calibre_link-\d+"\s*/>)\s*</p>\s*(?P<strong>(<p>)*\s*<strong>.+?</strong>\s*(</p>)*)', lambda mo: '</section><section>%s<title>%s</title>' % (mo.group('anchor'), mo.group('strong')), text)
return text
def get_text(self):
text = []
for item in self.oeb_book.spine:

View File

@ -19,6 +19,12 @@ class FB2Output(OutputFormatPlugin):
OptionRecommendation(name='inline_toc',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Add Table of Contents to beginning of the book.')),
OptionRecommendation(name='sectionize_chapters',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Try to turn chapters into individual sections. ' \
'WARNING: ' \
'This option is experimental. It can cause conversion ' \
'to fail. It can also produce unexpected output.')),
])
def convert(self, oeb_book, output_path, input_plugin, opts, log):

View File

@ -16,6 +16,6 @@ class PluginWidget(Widget, Ui_Form):
COMMIT_NAME = 'fb2_output'
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
Widget.__init__(self, parent, ['inline_toc'])
Widget.__init__(self, parent, ['inline_toc', 'sectionize_chapters'])
self.db, self.book_id = db, book_id
self.initialize_options(get_option, get_help, db, book_id)

View File

@ -14,7 +14,7 @@
<string>Form</string>
</property>
<layout class="QGridLayout" name="gridLayout">
<item row="1" column="0">
<item row="2" column="0">
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
@ -34,6 +34,13 @@
</property>
</widget>
</item>
<item row="1" column="0">
<widget class="QCheckBox" name="opt_sectionize_chapters">
<property name="text">
<string>Sectionize Chapters (Use with care!)</string>
</property>
</widget>
</item>
</layout>
</widget>
<resources/>