From b5b6f10c4861330652f546beb78f30173257cdf0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 18 Jun 2008 10:42:19 -0700 Subject: [PATCH] Add option to detect chapters based on tagname and attributes --- src/calibre/ebooks/lrf/__init__.py | 7 ++++--- src/calibre/ebooks/lrf/html/convert_from.py | 15 +++++++++++++++ src/calibre/gui2/dialogs/lrf_single.py | 13 +++++++------ src/calibre/gui2/dialogs/lrf_single.ui | 15 ++++++++++++++- 4 files changed, 40 insertions(+), 10 deletions(-) diff --git a/src/calibre/ebooks/lrf/__init__.py b/src/calibre/ebooks/lrf/__init__.py index af93bca6cb..1cdba123d8 100644 --- a/src/calibre/ebooks/lrf/__init__.py +++ b/src/calibre/ebooks/lrf/__init__.py @@ -158,12 +158,13 @@ def option_parser(usage, gui_mode=False): chapter = parser.add_option_group('CHAPTER OPTIONS') chapter.add_option('--disable-chapter-detection', action='store_true', default=False, dest='disable_chapter_detection', - help=_('''Prevent the automatic insertion of page breaks''' - ''' before detected chapters.''')) + help=_('''Prevent the automatic detection chapters.''')) chapter.add_option('--chapter-regex', dest='chapter_regex', default='chapter|book|appendix', help=_('''The regular expression used to detect chapter titles.''' - ''' It is searched for in heading tags (h1-h6). Defaults to %default''')) + ''' It is searched for in heading tags (h1-h6). Defaults to %default''')) + chapter.add_option('--chapter-attr', default='$,,$', + help=_('Detect a chapter beginning at an element having the specified attribute. The format for this option is tagname regexp,attribute name,attribute value regexp. For example to match all heading tags that have the attribute class="chapter" you would use "h\d,class,chapter". Default is %default''')) chapter.add_option('--page-break-before-tag', dest='page_break', default='h[12]', help=_('''If html2lrf does not find any page breaks in the ''' '''html file and cannot detect chapter headings, it will ''' diff --git a/src/calibre/ebooks/lrf/html/convert_from.py b/src/calibre/ebooks/lrf/html/convert_from.py index aca8cf96b3..218ace2bf7 100644 --- a/src/calibre/ebooks/lrf/html/convert_from.py +++ b/src/calibre/ebooks/lrf/html/convert_from.py @@ -1427,6 +1427,18 @@ class HTMLConverter(object, LoggingInterface): return except KeyError: pass + if not self.disable_chapter_detection and \ + (self.chapter_attr[0].match(tagname) and \ + tag.has_key(self.chapter_attr[1]) and \ + self.chapter_attr[2].match(tag[self.chapter_attr[1]])): + self.log_debug('Detected chapter %s', tagname) + self.end_page() + self.page_break_found = True + + if self.options.add_chapters_to_toc: + self.extra_toc_entries.append((self.get_text(tag, + limit=1000), self.current_block)) + end_page = self.process_page_breaks(tag, tagname, tag_css) try: if tagname in ["title", "script", "meta", 'del', 'frameset']: @@ -1850,6 +1862,9 @@ def process_file(path, options, logger=None): re.compile('$') fpb = re.compile(options.force_page_break, re.IGNORECASE) if options.force_page_break else \ re.compile('$') + cq = options.chapter_attr.split(',') + options.chapter_attr = [re.compile(cq[0], re.IGNORECASE), cq[1], + re.compile(cq[2], re.IGNORECASE)] options.force_page_break = fpb options.link_exclude = le options.page_break = pb diff --git a/src/calibre/gui2/dialogs/lrf_single.py b/src/calibre/gui2/dialogs/lrf_single.py index bc5bdcf06b..c8895a4243 100644 --- a/src/calibre/gui2/dialogs/lrf_single.py +++ b/src/calibre/gui2/dialogs/lrf_single.py @@ -1,8 +1,8 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' -import os, cPickle, codecs +import os, codecs -from PyQt4.QtCore import QObject, SIGNAL, Qt, QVariant, QByteArray +from PyQt4.QtCore import QObject, SIGNAL, Qt from PyQt4.QtGui import QAbstractSpinBox, QLineEdit, QCheckBox, QDialog, \ QPixmap, QTextEdit, QListWidgetItem, QIcon @@ -65,7 +65,7 @@ class LRFSingleDialog(QDialog, Ui_LRFSingleDialog): QObject.connect(self.categoryList, SIGNAL('itemEntered(QListWidgetItem *)'), self.show_category_help) QObject.connect(self.cover_button, SIGNAL("clicked(bool)"), self.select_cover) - self.categoryList.leaveEvent = self.reset_help + #self.categoryList.leaveEvent = self.reset_help self.reset_help() self.selected_format = None self.initialize_common() @@ -277,9 +277,9 @@ class LRFSingleDialog(QDialog, Ui_LRFSingleDialog): obj.setWhatsThis(help) self.option_map[guiname] = opt obj.__class__.enterEvent = show_item_help - obj.leaveEvent = self.reset_help + #obj.leaveEvent = self.reset_help self.preprocess.__class__.enterEvent = show_item_help - self.preprocess.leaveEvent = self.reset_help + #self.preprocess.leaveEvent = self.reset_help def show_category_help(self, item): @@ -293,7 +293,8 @@ class LRFSingleDialog(QDialog, Ui_LRFSingleDialog): self.set_help(help[text]) def set_help(self, msg): - self.help_view.setHtml('%s'%(msg,)) + if msg and getattr(msg, 'strip', lambda:True)(): + self.help_view.setHtml('%s'%(msg,)) def reset_help(self, *args): self.set_help(_('No help available')) diff --git a/src/calibre/gui2/dialogs/lrf_single.ui b/src/calibre/gui2/dialogs/lrf_single.ui index 080970b96f..d0d304ac6e 100644 --- a/src/calibre/gui2/dialogs/lrf_single.ui +++ b/src/calibre/gui2/dialogs/lrf_single.ui @@ -115,7 +115,7 @@ - 2 + 0 @@ -951,6 +951,19 @@ + + + + Detect chapter &at tag: + + + gui_chapter_attr + + + + + +