MOBI Input:Fix extraction of Table of Contents for MOBI files that have their TOC at the start of the book instead of the end

2025-08-05 08:40:13 -04:00 · 2009-03-03 19:18:30 -08:00 · 2009-03-03 19:18:30 -08:00 · 7e849659cd
commit 7e849659cd
parent 73af726c71
3 changed files with 9 additions and 5 deletions
--- a/src/calibre/ebooks/html.py
+++ b/src/calibre/ebooks/html.py
@ -640,7 +640,7 @@ class Processor(Parser):
        name = self.htmlfile_map[self.htmlfile.path]
        href = 'content/'+name
-        # Add level 1 and level 2 TOC items
+        # Add level* TOC items
        counter = 0
        def elem_to_link(elem, href, counter):
@ -711,6 +711,8 @@ class Processor(Parser):
            if len(toc) > 0:
                # Detected TOC entries using --level* options
                # so aborting all other toc processing
                return
        # Add chapters to TOC
        if not self.opts.no_chapters_in_toc:
--- a/src/calibre/ebooks/lrf/html/convert_from.py
+++ b/src/calibre/ebooks/lrf/html/convert_from.py
@ -479,7 +479,7 @@ class HTMLConverter(object, LoggingInterface):
            pprop.update(self.pseudo_css[tagname])
        if tag.has_key("class"):
            cls = tag["class"].lower()
-            for cls in cls.split():            
+            for cls in cls.split():
                for classname in ["."+cls, tagname+"."+cls]:
                    if self.css.has_key(classname):
                        prop.update(self.css[classname])
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -361,7 +361,7 @@ class MobiReader(object):
                        continue
                    if reached and x.tag == 'a':
                        href = x.get('href', '')
-                        if href:
+                        if href and re.match('\w+://', href) is None:
                            try:
                                text = u' '.join([t.strip() for t in \
                                                x.xpath('descendant::text()')])
@ -370,6 +370,8 @@ class MobiReader(object):
                            text = ent_pat.sub(entity_to_unicode, text)
                            tocobj.add_item(toc.partition('#')[0], href[1:], 
                                            text)
                    if reached and x.get('class', None) == 'mbp_pagebreak':
                        break
            if tocobj is not None:
                opf.set_toc(tocobj)
@ -435,7 +437,7 @@ class MobiReader(object):
    def replace_page_breaks(self):
        self.processed_html = self.PAGE_BREAK_PAT.sub(
-            '<div style="page-break-after: always; margin: 0; display: block" />',
+            '<div class="mbp_pagebreak" style="page-break-after: always; margin: 0; display: block" />',
            self.processed_html)
    def add_anchors(self):
@ -521,7 +523,7 @@ def option_parser():
    parser = OptionParser(usage=_('%prog [options] myebook.mobi'))
    parser.add_option('-o', '--output-dir', default='.', 
                      help=_('Output directory. Defaults to current directory.'))
-    parser.add_option('--verbose', default=False, action='store_true',
+    parser.add_option('-v', '--verbose', default=False, action='store_true',
                      help='Useful for debugging.')
    return parser