From a0d9e40869bc5c1cd4572c850c5b1000e5a3d125 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 19 Apr 2009 15:06:19 -0700 Subject: [PATCH] Add --filter-toc option --- src/calibre/ebooks/conversion/plumber.py | 9 +++++++++ src/calibre/ebooks/oeb/base.py | 10 ++++++++++ src/calibre/ebooks/oeb/transforms/structure.py | 8 ++++++++ 3 files changed, 27 insertions(+) diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 453591e433..3a2d39c314 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -176,6 +176,15 @@ OptionRecommendation(name='max_toc_links', ) ), +OptionRecommendation(name='toc_filter', + recommended_value=None, level=OptionRecommendation.LOW, + help=_('Remove entries from the Table of Contents whose titles ' + 'match the specified regular expression. Matching entries and all ' + 'their children are removed.' + ) + ), + + OptionRecommendation(name='chapter', recommended_value="//*[((name()='h1' or name()='h2') and " "re:test(., 'chapter|book|section|part', 'i')) or @class " diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 85510e2127..70303470d7 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -1276,6 +1276,16 @@ class TOC(object): self.nodes.append(node) return node + def remove(self, node): + for child in self.nodes: + if child is node: + self.nodes.remove(child) + return True + else: + if child.remove(node): + return True + return False + def iter(self): """Iterate over this node and all descendants in depth-first order.""" yield self diff --git a/src/calibre/ebooks/oeb/transforms/structure.py b/src/calibre/ebooks/oeb/transforms/structure.py index 0f1502ef03..6499a5e9c4 100644 --- a/src/calibre/ebooks/oeb/transforms/structure.py +++ b/src/calibre/ebooks/oeb/transforms/structure.py @@ -6,6 +6,8 @@ __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' +import re + from lxml import etree from urlparse import urlparse @@ -37,6 +39,12 @@ class DetectStructure(object): self.log('Auto generated TOC with %d entries.' % self.oeb.toc.count()) + if opts.toc_filter is not None: + regexp = re.compile(opts.toc_filter) + for node in self.oeb.toc.iter(): + if not node.title or regexp.search(node.title) is not None: + self.oeb.toc.remove(node) + def detect_chapters(self): self.detected_chapters = []