Add option to detect chapters based on tagname and attributes

This commit is contained in:
Kovid Goyal 2008-06-18 10:42:19 -07:00
parent 2265152c77
commit b5b6f10c48
4 changed files with 40 additions and 10 deletions

View File

@ -158,12 +158,13 @@ def option_parser(usage, gui_mode=False):
chapter = parser.add_option_group('CHAPTER OPTIONS') chapter = parser.add_option_group('CHAPTER OPTIONS')
chapter.add_option('--disable-chapter-detection', action='store_true', chapter.add_option('--disable-chapter-detection', action='store_true',
default=False, dest='disable_chapter_detection', default=False, dest='disable_chapter_detection',
help=_('''Prevent the automatic insertion of page breaks''' help=_('''Prevent the automatic detection chapters.'''))
''' before detected chapters.'''))
chapter.add_option('--chapter-regex', dest='chapter_regex', chapter.add_option('--chapter-regex', dest='chapter_regex',
default='chapter|book|appendix', default='chapter|book|appendix',
help=_('''The regular expression used to detect chapter titles.''' help=_('''The regular expression used to detect chapter titles.'''
''' It is searched for in heading tags (h1-h6). Defaults to %default''')) ''' It is searched for in heading tags (h1-h6). Defaults to %default'''))
chapter.add_option('--chapter-attr', default='$,,$',
help=_('Detect a chapter beginning at an element having the specified attribute. The format for this option is tagname regexp,attribute name,attribute value regexp. For example to match all heading tags that have the attribute class="chapter" you would use "h\d,class,chapter". Default is %default'''))
chapter.add_option('--page-break-before-tag', dest='page_break', default='h[12]', chapter.add_option('--page-break-before-tag', dest='page_break', default='h[12]',
help=_('''If html2lrf does not find any page breaks in the ''' help=_('''If html2lrf does not find any page breaks in the '''
'''html file and cannot detect chapter headings, it will ''' '''html file and cannot detect chapter headings, it will '''

View File

@ -1427,6 +1427,18 @@ class HTMLConverter(object, LoggingInterface):
return return
except KeyError: except KeyError:
pass pass
if not self.disable_chapter_detection and \
(self.chapter_attr[0].match(tagname) and \
tag.has_key(self.chapter_attr[1]) and \
self.chapter_attr[2].match(tag[self.chapter_attr[1]])):
self.log_debug('Detected chapter %s', tagname)
self.end_page()
self.page_break_found = True
if self.options.add_chapters_to_toc:
self.extra_toc_entries.append((self.get_text(tag,
limit=1000), self.current_block))
end_page = self.process_page_breaks(tag, tagname, tag_css) end_page = self.process_page_breaks(tag, tagname, tag_css)
try: try:
if tagname in ["title", "script", "meta", 'del', 'frameset']: if tagname in ["title", "script", "meta", 'del', 'frameset']:
@ -1850,6 +1862,9 @@ def process_file(path, options, logger=None):
re.compile('$') re.compile('$')
fpb = re.compile(options.force_page_break, re.IGNORECASE) if options.force_page_break else \ fpb = re.compile(options.force_page_break, re.IGNORECASE) if options.force_page_break else \
re.compile('$') re.compile('$')
cq = options.chapter_attr.split(',')
options.chapter_attr = [re.compile(cq[0], re.IGNORECASE), cq[1],
re.compile(cq[2], re.IGNORECASE)]
options.force_page_break = fpb options.force_page_break = fpb
options.link_exclude = le options.link_exclude = le
options.page_break = pb options.page_break = pb

View File

@ -1,8 +1,8 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import os, cPickle, codecs import os, codecs
from PyQt4.QtCore import QObject, SIGNAL, Qt, QVariant, QByteArray from PyQt4.QtCore import QObject, SIGNAL, Qt
from PyQt4.QtGui import QAbstractSpinBox, QLineEdit, QCheckBox, QDialog, \ from PyQt4.QtGui import QAbstractSpinBox, QLineEdit, QCheckBox, QDialog, \
QPixmap, QTextEdit, QListWidgetItem, QIcon QPixmap, QTextEdit, QListWidgetItem, QIcon
@ -65,7 +65,7 @@ class LRFSingleDialog(QDialog, Ui_LRFSingleDialog):
QObject.connect(self.categoryList, SIGNAL('itemEntered(QListWidgetItem *)'), QObject.connect(self.categoryList, SIGNAL('itemEntered(QListWidgetItem *)'),
self.show_category_help) self.show_category_help)
QObject.connect(self.cover_button, SIGNAL("clicked(bool)"), self.select_cover) QObject.connect(self.cover_button, SIGNAL("clicked(bool)"), self.select_cover)
self.categoryList.leaveEvent = self.reset_help #self.categoryList.leaveEvent = self.reset_help
self.reset_help() self.reset_help()
self.selected_format = None self.selected_format = None
self.initialize_common() self.initialize_common()
@ -277,9 +277,9 @@ class LRFSingleDialog(QDialog, Ui_LRFSingleDialog):
obj.setWhatsThis(help) obj.setWhatsThis(help)
self.option_map[guiname] = opt self.option_map[guiname] = opt
obj.__class__.enterEvent = show_item_help obj.__class__.enterEvent = show_item_help
obj.leaveEvent = self.reset_help #obj.leaveEvent = self.reset_help
self.preprocess.__class__.enterEvent = show_item_help self.preprocess.__class__.enterEvent = show_item_help
self.preprocess.leaveEvent = self.reset_help #self.preprocess.leaveEvent = self.reset_help
def show_category_help(self, item): def show_category_help(self, item):
@ -293,6 +293,7 @@ class LRFSingleDialog(QDialog, Ui_LRFSingleDialog):
self.set_help(help[text]) self.set_help(help[text])
def set_help(self, msg): def set_help(self, msg):
if msg and getattr(msg, 'strip', lambda:True)():
self.help_view.setHtml('<html><body>%s</body></html>'%(msg,)) self.help_view.setHtml('<html><body>%s</body></html>'%(msg,))
def reset_help(self, *args): def reset_help(self, *args):

View File

@ -115,7 +115,7 @@
<item row="0" column="0" > <item row="0" column="0" >
<widget class="QStackedWidget" name="stack" > <widget class="QStackedWidget" name="stack" >
<property name="currentIndex" > <property name="currentIndex" >
<number>2</number> <number>0</number>
</property> </property>
<widget class="QWidget" name="metadata_page" > <widget class="QWidget" name="metadata_page" >
<property name="geometry" > <property name="geometry" >
@ -951,6 +951,19 @@
<item row="2" column="1" > <item row="2" column="1" >
<widget class="QLineEdit" name="gui_force_page_break_before_attr" /> <widget class="QLineEdit" name="gui_force_page_break_before_attr" />
</item> </item>
<item row="3" column="0" >
<widget class="QLabel" name="label_28" >
<property name="text" >
<string>Detect chapter &amp;at tag:</string>
</property>
<property name="buddy" >
<cstring>gui_chapter_attr</cstring>
</property>
</widget>
</item>
<item row="3" column="1" >
<widget class="QLineEdit" name="gui_chapter_attr" />
</item>
</layout> </layout>
</widget> </widget>
</item> </item>