MOBI Input:Fix extraction of Table of Contents for MOBI files that have their TOC at the start of the book instead of the end

This commit is contained in:
Kovid Goyal 2009-03-03 19:18:30 -08:00
parent 73af726c71
commit 7e849659cd
3 changed files with 9 additions and 5 deletions

View File

@ -640,7 +640,7 @@ class Processor(Parser):
name = self.htmlfile_map[self.htmlfile.path]
href = 'content/'+name
# Add level 1 and level 2 TOC items
# Add level* TOC items
counter = 0
def elem_to_link(elem, href, counter):
@ -711,6 +711,8 @@ class Processor(Parser):
if len(toc) > 0:
# Detected TOC entries using --level* options
# so aborting all other toc processing
return
# Add chapters to TOC
if not self.opts.no_chapters_in_toc:

View File

@ -361,7 +361,7 @@ class MobiReader(object):
continue
if reached and x.tag == 'a':
href = x.get('href', '')
if href:
if href and re.match('\w+://', href) is None:
try:
text = u' '.join([t.strip() for t in \
x.xpath('descendant::text()')])
@ -370,6 +370,8 @@ class MobiReader(object):
text = ent_pat.sub(entity_to_unicode, text)
tocobj.add_item(toc.partition('#')[0], href[1:],
text)
if reached and x.get('class', None) == 'mbp_pagebreak':
break
if tocobj is not None:
opf.set_toc(tocobj)
@ -435,7 +437,7 @@ class MobiReader(object):
def replace_page_breaks(self):
self.processed_html = self.PAGE_BREAK_PAT.sub(
'<div style="page-break-after: always; margin: 0; display: block" />',
'<div class="mbp_pagebreak" style="page-break-after: always; margin: 0; display: block" />',
self.processed_html)
def add_anchors(self):
@ -521,7 +523,7 @@ def option_parser():
parser = OptionParser(usage=_('%prog [options] myebook.mobi'))
parser.add_option('-o', '--output-dir', default='.',
help=_('Output directory. Defaults to current directory.'))
parser.add_option('--verbose', default=False, action='store_true',
parser.add_option('-v', '--verbose', default=False, action='store_true',
help='Useful for debugging.')
return parser