Add --filter-toc option

This commit is contained in:
Kovid Goyal 2009-04-19 15:06:19 -07:00
parent 10a95db60a
commit a0d9e40869
3 changed files with 27 additions and 0 deletions

View File

@ -176,6 +176,15 @@ OptionRecommendation(name='max_toc_links',
)
),
OptionRecommendation(name='toc_filter',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('Remove entries from the Table of Contents whose titles '
'match the specified regular expression. Matching entries and all '
'their children are removed.'
)
),
OptionRecommendation(name='chapter',
recommended_value="//*[((name()='h1' or name()='h2') and "
"re:test(., 'chapter|book|section|part', 'i')) or @class "

View File

@ -1276,6 +1276,16 @@ class TOC(object):
self.nodes.append(node)
return node
def remove(self, node):
for child in self.nodes:
if child is node:
self.nodes.remove(child)
return True
else:
if child.remove(node):
return True
return False
def iter(self):
"""Iterate over this node and all descendants in depth-first order."""
yield self

View File

@ -6,6 +6,8 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re
from lxml import etree
from urlparse import urlparse
@ -37,6 +39,12 @@ class DetectStructure(object):
self.log('Auto generated TOC with %d entries.' %
self.oeb.toc.count())
if opts.toc_filter is not None:
regexp = re.compile(opts.toc_filter)
for node in self.oeb.toc.iter():
if not node.title or regexp.search(node.title) is not None:
self.oeb.toc.remove(node)
def detect_chapters(self):
self.detected_chapters = []