From 7e849659cd2e248e951afc4bcb8627e883a8716e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 3 Mar 2009 19:18:30 -0800 Subject: [PATCH] MOBI Input:Fix extraction of Table of Contents for MOBI files that have their TOC at the start of the book instead of the end --- src/calibre/ebooks/html.py | 4 +++- src/calibre/ebooks/lrf/html/convert_from.py | 2 +- src/calibre/ebooks/mobi/reader.py | 8 +++++--- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html.py index 40e882c2af..5329e8ed86 100644 --- a/src/calibre/ebooks/html.py +++ b/src/calibre/ebooks/html.py @@ -640,7 +640,7 @@ class Processor(Parser): name = self.htmlfile_map[self.htmlfile.path] href = 'content/'+name - # Add level 1 and level 2 TOC items + # Add level* TOC items counter = 0 def elem_to_link(elem, href, counter): @@ -711,6 +711,8 @@ class Processor(Parser): if len(toc) > 0: + # Detected TOC entries using --level* options + # so aborting all other toc processing return # Add chapters to TOC if not self.opts.no_chapters_in_toc: diff --git a/src/calibre/ebooks/lrf/html/convert_from.py b/src/calibre/ebooks/lrf/html/convert_from.py index 48c2ffe993..2bd63d1d8f 100644 --- a/src/calibre/ebooks/lrf/html/convert_from.py +++ b/src/calibre/ebooks/lrf/html/convert_from.py @@ -479,7 +479,7 @@ class HTMLConverter(object, LoggingInterface): pprop.update(self.pseudo_css[tagname]) if tag.has_key("class"): cls = tag["class"].lower() - for cls in cls.split(): + for cls in cls.split(): for classname in ["."+cls, tagname+"."+cls]: if self.css.has_key(classname): prop.update(self.css[classname]) diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index df728e400e..81bd510694 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -361,7 +361,7 @@ class MobiReader(object): continue if reached and x.tag == 'a': href = x.get('href', '') - if href: + if href and re.match('\w+://', href) is None: try: text = u' '.join([t.strip() for t in \ x.xpath('descendant::text()')]) @@ -370,6 +370,8 @@ class MobiReader(object): text = ent_pat.sub(entity_to_unicode, text) tocobj.add_item(toc.partition('#')[0], href[1:], text) + if reached and x.get('class', None) == 'mbp_pagebreak': + break if tocobj is not None: opf.set_toc(tocobj) @@ -435,7 +437,7 @@ class MobiReader(object): def replace_page_breaks(self): self.processed_html = self.PAGE_BREAK_PAT.sub( - '
', + '
', self.processed_html) def add_anchors(self): @@ -521,7 +523,7 @@ def option_parser(): parser = OptionParser(usage=_('%prog [options] myebook.mobi')) parser.add_option('-o', '--output-dir', default='.', help=_('Output directory. Defaults to current directory.')) - parser.add_option('--verbose', default=False, action='store_true', + parser.add_option('-v', '--verbose', default=False, action='store_true', help='Useful for debugging.') return parser