MOBI Input:Fix extraction of Table of Contents for MOBI files that have their TOC at the start of the book instead of the end

This commit is contained in:
Kovid Goyal 2009-03-03 19:18:30 -08:00
parent 73af726c71
commit 7e849659cd
3 changed files with 9 additions and 5 deletions

View File

@ -640,7 +640,7 @@ class Processor(Parser):
name = self.htmlfile_map[self.htmlfile.path] name = self.htmlfile_map[self.htmlfile.path]
href = 'content/'+name href = 'content/'+name
# Add level 1 and level 2 TOC items # Add level* TOC items
counter = 0 counter = 0
def elem_to_link(elem, href, counter): def elem_to_link(elem, href, counter):
@ -711,6 +711,8 @@ class Processor(Parser):
if len(toc) > 0: if len(toc) > 0:
# Detected TOC entries using --level* options
# so aborting all other toc processing
return return
# Add chapters to TOC # Add chapters to TOC
if not self.opts.no_chapters_in_toc: if not self.opts.no_chapters_in_toc:

View File

@ -479,7 +479,7 @@ class HTMLConverter(object, LoggingInterface):
pprop.update(self.pseudo_css[tagname]) pprop.update(self.pseudo_css[tagname])
if tag.has_key("class"): if tag.has_key("class"):
cls = tag["class"].lower() cls = tag["class"].lower()
for cls in cls.split(): for cls in cls.split():
for classname in ["."+cls, tagname+"."+cls]: for classname in ["."+cls, tagname+"."+cls]:
if self.css.has_key(classname): if self.css.has_key(classname):
prop.update(self.css[classname]) prop.update(self.css[classname])

View File

@ -361,7 +361,7 @@ class MobiReader(object):
continue continue
if reached and x.tag == 'a': if reached and x.tag == 'a':
href = x.get('href', '') href = x.get('href', '')
if href: if href and re.match('\w+://', href) is None:
try: try:
text = u' '.join([t.strip() for t in \ text = u' '.join([t.strip() for t in \
x.xpath('descendant::text()')]) x.xpath('descendant::text()')])
@ -370,6 +370,8 @@ class MobiReader(object):
text = ent_pat.sub(entity_to_unicode, text) text = ent_pat.sub(entity_to_unicode, text)
tocobj.add_item(toc.partition('#')[0], href[1:], tocobj.add_item(toc.partition('#')[0], href[1:],
text) text)
if reached and x.get('class', None) == 'mbp_pagebreak':
break
if tocobj is not None: if tocobj is not None:
opf.set_toc(tocobj) opf.set_toc(tocobj)
@ -435,7 +437,7 @@ class MobiReader(object):
def replace_page_breaks(self): def replace_page_breaks(self):
self.processed_html = self.PAGE_BREAK_PAT.sub( self.processed_html = self.PAGE_BREAK_PAT.sub(
'<div style="page-break-after: always; margin: 0; display: block" />', '<div class="mbp_pagebreak" style="page-break-after: always; margin: 0; display: block" />',
self.processed_html) self.processed_html)
def add_anchors(self): def add_anchors(self):
@ -521,7 +523,7 @@ def option_parser():
parser = OptionParser(usage=_('%prog [options] myebook.mobi')) parser = OptionParser(usage=_('%prog [options] myebook.mobi'))
parser.add_option('-o', '--output-dir', default='.', parser.add_option('-o', '--output-dir', default='.',
help=_('Output directory. Defaults to current directory.')) help=_('Output directory. Defaults to current directory.'))
parser.add_option('--verbose', default=False, action='store_true', parser.add_option('-v', '--verbose', default=False, action='store_true',
help='Useful for debugging.') help='Useful for debugging.')
return parser return parser