Ported structure detection code and added plugin for FB2 input.

2025-07-09 03:04:10 -04:00 · 2009-04-19 14:44:37 -07:00 · 2009-04-19 14:44:37 -07:00 · 1770f7bf74
commit 1770f7bf74
parent 02cfaac014
15 changed files with 422 additions and 247 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -281,6 +281,7 @@ from calibre.ebooks.mobi.input import MOBIInput
 from calibre.ebooks.pdf.input import PDFInput
 from calibre.ebooks.txt.input import TXTInput
 from calibre.ebooks.lit.input import LITInput
+from calibre.ebooks.fb2.input import FB2Input
 from calibre.ebooks.html.input import HTMLInput
 from calibre.ebooks.oeb.output import OEBOutput
 from calibre.ebooks.txt.output import TXTOutput
@ -288,7 +289,8 @@ from calibre.ebooks.pdf.output import PDFOutput
 from calibre.customize.profiles import input_profiles, output_profiles

 plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDFInput, HTMLInput,
-        TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput]
+        TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput,
+        FB2Input]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                        x.__name__.endswith('MetadataReader')]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@ -119,6 +119,24 @@ def add_pipeline_options(parser, plumber):
                  ]
                  ),

+              'STRUCTURE DETECTION' : (
+                  _('Control auto-detection of document structure.'),
+                  [
+                      'dont_split_on_page_breaks', 'chapter', 'chapter_mark',
+                  ]
+                  ),
+
+              'TABLE OF CONTENTS' : (
+                  _('Control the automatic generation of a Table of Contents. By '
+                  'default, if the source file has a Table of Contents, it will '
+                  'be used in preference to the automatically generated one.'),
+                  [
+                    'level1_toc', 'level2_toc', 'level3_toc',
+                    'toc_threshold', 'max_toc_links', 'no_chapters_in_toc',
+                    'use_auto_toc',
+                  ]
+                  ),
+
              'METADATA' : (_('Options to set metadata in the output'),
                            plumber.metadata_option_names,
                            ),
@ -130,7 +148,8 @@ def add_pipeline_options(parser, plumber):

              }

-    group_order = ['', 'LOOK AND FEEL', 'METADATA', 'DEBUG']
+    group_order = ['', 'LOOK AND FEEL', 'STRUCTURE DETECTION',
+            'TABLE OF CONTENTS', 'METADATA', 'DEBUG']

    for group in group_order:
        desc, options = groups[group]
@ -163,6 +182,10 @@ def main(args=sys.argv):
    add_pipeline_options(parser, plumber)

    opts = parser.parse_args(args)[0]
+    y = lambda q : os.path.abspath(os.path.expanduser(q))
+    for x in ('read_metadata_from_opf', 'cover'):
+        if getattr(opts, x, None) is not None:
+            setattr(opts, x, y(getattr(opts, x)))
    recommendations = [(n.dest, getattr(opts, n.dest),
                        OptionRecommendation.HIGH) \
                                        for n in parser.options_iter()
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -121,6 +121,88 @@ OptionRecommendation(name='dont_split_on_page_breaks',
                )
        ),

+OptionRecommendation(name='level1_toc',
+            recommended_value=None, level=OptionRecommendation.LOW,
+            help=_('XPath expression that specifies all tags that '
+            'should be added to the Table of Contents at level one. If '
+            'this is specified, it takes precedence over other forms '
+            'of auto-detection.'
+                )
+        ),
+
+OptionRecommendation(name='level2_toc',
+            recommended_value=None, level=OptionRecommendation.LOW,
+            help=_('XPath expression that specifies all tags that should be '
+            'added to the Table of Contents at level two. Each entry is added '
+            'under the previous level one entry.'
+                )
+        ),
+
+OptionRecommendation(name='level3_toc',
+            recommended_value=None, level=OptionRecommendation.LOW,
+            help=_('XPath expression that specifies all tags that should be '
+                'added to the Table of Contents at level three. Each entry '
+                'is added under the previous level two entry.'
+                )
+        ),
+
+OptionRecommendation(name='use_auto_toc',
+            recommended_value=False, level=OptionRecommendation.LOW,
+            help=_('Normally, if the source file already has a Table of '
+            'Contents, it is used in preference to the auto-generated one. '
+            'With this option, the auto-generated one is always used.'
+                )
+        ),
+
+OptionRecommendation(name='no_chapters_in_toc',
+            recommended_value=False, level=OptionRecommendation.LOW,
+            help=_("Don't add auto-detected chapters to the Table of "
+            'Contents.'
+                )
+        ),
+
+OptionRecommendation(name='toc_threshold',
+            recommended_value=6, level=OptionRecommendation.LOW,
+            help=_(
+        'If fewer than this number of chapters is detected, then links '
+        'are added to the Table of Contents. Default: %default')
+        ),
+
+OptionRecommendation(name='max_toc_links',
+            recommended_value=50, level=OptionRecommendation.LOW,
+            help=_('Maximum number of links to insert into the TOC. Set to 0 '
+               'to disable. Default is: %default. Links are only added to the '
+            'TOC if less than the threshold number of chapters were detected.'
+                )
+        ),
+
+OptionRecommendation(name='chapter',
+        recommended_value="//*[((name()='h1' or name()='h2') and "
+              "re:test(., 'chapter|book|section|part', 'i')) or @class "
+              "= 'chapter']", level=OptionRecommendation.LOW,
+            help=_('An XPath expression to detect chapter titles. The default '
+                'is to consider <h1> or <h2> tags that contain the words '
+                '"chapter","book","section" or "part" as chapter titles as '
+                'well as any tags that have class="chapter". The expression '
+                'used must evaluate to a list of elements. To disable chapter '
+                'detection, use the expression "/". See the XPath Tutorial '
+                'in the calibre User Manual for further help on using this '
+                'feature.'
+                )
+        ),
+
+OptionRecommendation(name='chapter_mark',
+            recommended_value='pagebreak', level=OptionRecommendation.LOW,
+            choices=['pagebreak', 'rule', 'both', 'none'],
+            help=_('Specify how to mark detected chapters. A value of '
+                    '"pagebreak" will insert page breaks before chapters. '
+                    'A value of "rule" will insert a line before chapters. '
+                    'A value of "none" will disable chapter marking and a '
+                    'value of "both" will use both page breaks and lines '
+                    'to mark chapters.')
+        ),
+
+

 OptionRecommendation(name='read_metadata_from_opf',
            recommended_value=None, level=OptionRecommendation.LOW,
@ -130,6 +212,7 @@ OptionRecommendation(name='read_metadata_from_opf',
                   'file.')
        ),

+
 OptionRecommendation(name='title',
    recommended_value=None, level=OptionRecommendation.LOW,
    help=_('Set the title.')),
@ -237,6 +320,7 @@ OptionRecommendation(name='language',
                rec = self.get_option_by_name(name)
                if rec is not None and rec.level <= level:
                    rec.recommended_value = val
+                    rec.level = level

    def merge_ui_recommendations(self, recommendations):
        '''
@ -248,6 +332,7 @@ OptionRecommendation(name='language',
            rec = self.get_option_by_name(name)
            if rec is not None and rec.level <= level and rec.level < rec.HIGH:
                rec.recommended_value = val
+                rec.level = level

    def read_user_metadata(self):
        '''
@ -332,6 +417,9 @@ OptionRecommendation(name='language',
        self.opts.source = self.opts.input_profile
        self.opts.dest = self.opts.output_profile

+        from calibre.ebooks.oeb.transforms.structure import DetectStructure
+        DetectStructure()(self.oeb, self.opts)
+
        from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
        fbase = self.opts.base_font_size
        if fbase == 0:
@ -364,6 +452,8 @@ OptionRecommendation(name='language',
        trimmer = ManifestTrimmer()
        trimmer(self.oeb, self.opts)

+        self.oeb.toc.rationalize_play_orders()
+
        self.log.info('Creating %s...'%self.output_plugin.name)
        self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
                self.opts, self.log)
@ -384,4 +474,3 @@ def create_oebbook(log, path_or_stream, opts, reader=None):

    reader()(oeb, path_or_stream)
    return oeb
-
--- a/src/calibre/ebooks/epub/from_any.py
+++ b/src/calibre/ebooks/epub/from_any.py
@ -15,88 +15,15 @@ from calibre.ebooks import DRMError
 from calibre.ebooks.epub import config as common_config
 from calibre.ebooks.epub.from_html import convert as html2epub, find_html_index
 from calibre.ptempfile import TemporaryDirectory
-from calibre.ebooks.metadata import MetaInformation
-from calibre.ebooks.metadata.opf2 import OPFCreator, OPF
 from calibre.utils.zipfile import ZipFile
 from calibre.customize.ui import run_plugins_on_preprocess

-def lit2opf(path, tdir, opts):
-    from calibre.ebooks.lit.reader import LitReader
-    print 'Exploding LIT file:', path
-    reader = LitReader(path)
-    reader.extract_content(tdir, False)
-    opf = None
-    for opf in walk(tdir):
-        if opf.lower().endswith('.opf'):
-            break
-    if not opf.endswith('.opf'):
-        opf = None
-    if opf is not None: # Check for url-quoted filenames
-        _opf = OPF(opf, os.path.dirname(opf))
-        replacements = []
-        for item in _opf.itermanifest():
-            href = item.get('href', '')
-            path = os.path.join(os.path.dirname(opf), *(href.split('/')))
-            if not os.path.exists(path) and os.path.exists(path.replace('&', '%26')):
-                npath = path
-                path = path.replace('&', '%26')
-                replacements.append((path, npath))
-        if replacements:
-            print 'Fixing quoted filenames...'
-            for path, npath in replacements:
-                if os.path.exists(path):
-                    os.rename(path, npath)
-            for f in walk(tdir):
-                with open(f, 'r+b') as f:
-                    raw = f.read()
-                    for path, npath in replacements:
-                        raw = raw.replace(os.path.basename(path), os.path.basename(npath))
-                        f.seek(0)
-                        f.truncate()
-                        f.write(raw)
-    return opf

-def mobi2opf(path, tdir, opts):
-    from calibre.ebooks.mobi.reader import MobiReader
-    print 'Exploding MOBI file:', path.encode('utf-8') if isinstance(path, unicode) else path
-    reader = MobiReader(path)
-    reader.extract_content(tdir)
-    files = list(walk(tdir))
-    opts.encoding = 'utf-8'
-    for f in files:
-        if f.lower().endswith('.opf'):
-            return f
-    html_pat = re.compile(r'\.(x){0,1}htm(l){0,1}', re.IGNORECASE)
-    hf = [f for f in files if html_pat.match(os.path.splitext(f)[1]) is not None]
-    mi = MetaInformation(os.path.splitext(os.path.basename(path))[0], [_('Unknown')])
-    opf = OPFCreator(tdir, mi)
-    opf.create_manifest([(hf[0], None)])
-    opf.create_spine([hf[0]])
-    ans = os.path.join(tdir, 'metadata.opf')
-    opf.render(open(ans, 'wb'))
-    return ans
-
-def fb22opf(path, tdir, opts):
-    from calibre.ebooks.lrf.fb2.convert_from import to_html
-    print 'Converting FB2 to HTML...'
-    return to_html(path, tdir)
-    
 def rtf2opf(path, tdir, opts):
    from calibre.ebooks.lrf.rtf.convert_from import generate_html
    generate_html(path, tdir)
    return os.path.join(tdir, 'metadata.opf')

-def txt2opf(path, tdir, opts):
-    from calibre.ebooks.lrf.txt.convert_from import generate_html
-    generate_html(path, opts.encoding, tdir)
-    return os.path.join(tdir, 'metadata.opf')
-
-def pdf2opf(path, tdir, opts):
-    from calibre.ebooks.lrf.pdf.convert_from import generate_html
-    generate_html(path, tdir)
-    opts.dont_split_on_page_breaks = True
-    return os.path.join(tdir, 'metadata.opf')
-
 def epub2opf(path, tdir, opts):
    zf = ZipFile(path)
    zf.extractall(tdir)
@ -110,35 +37,23 @@ def epub2opf(path, tdir, opts):
    if opf and os.path.exists(encfile):
        if not process_encryption(encfile, opf):
            raise DRMError(os.path.basename(path))
-        
+
    if opf is None:
        raise ValueError('%s is not a valid EPUB file'%path)
    return opf
-    
+
 def odt2epub(path, tdir, opts):
    from calibre.ebooks.odt.to_oeb import Extract
    opts.encoding = 'utf-8'
    return Extract()(path, tdir)

-MAP = {
-       'lit'  : lit2opf,
-       'mobi' : mobi2opf,
-       'prc'  : mobi2opf,
-       'azw'  : mobi2opf,
-       'fb2'  : fb22opf,
-       'rtf'  : rtf2opf,
-       'txt'  : txt2opf,
-       'pdf'  : pdf2opf,
-       'epub' : epub2opf,
-       'odt'  : odt2epub,
-       }
-SOURCE_FORMATS = ['lit', 'mobi', 'prc', 'azw', 'fb2', 'odt', 'rtf', 
+SOURCE_FORMATS = ['lit', 'mobi', 'prc', 'azw', 'fb2', 'odt', 'rtf',
                  'txt', 'pdf', 'rar', 'zip', 'oebzip', 'htm', 'html', 'epub']

 def unarchive(path, tdir):
    extract(path, tdir)
    files = list(walk(tdir))
-    
+
    for ext in ['opf'] + list(MAP.keys()):
        for f in files:
            if f.lower().endswith('.'+ext):
@ -147,32 +62,32 @@ def unarchive(path, tdir):
                return f, ext
    return find_html_index(files)

-def any2epub(opts, path, notification=None, create_epub=True, 
+def any2epub(opts, path, notification=None, create_epub=True,
             oeb_cover=False, extract_to=None):
    path = run_plugins_on_preprocess(path)
    ext = os.path.splitext(path)[1]
    if not ext:
        raise ValueError('Unknown file type: '+path)
    ext = ext.lower()[1:]
-    
+
    if opts.output is None:
        opts.output = os.path.splitext(os.path.basename(path))[0]+'.epub'
-    
+
    with nested(TemporaryDirectory('_any2epub1'), TemporaryDirectory('_any2epub2')) as (tdir1, tdir2):
        if ext in ['rar', 'zip', 'oebzip']:
            path, ext = unarchive(path, tdir1)
            print 'Found %s file in archive'%(ext.upper())
-    
+
        if ext in MAP.keys():
            path = MAP[ext](path, tdir2, opts)
            ext = 'opf'
-            
-    
+
+
        if re.match(r'((x){0,1}htm(l){0,1})|opf', ext) is None:
            raise ValueError('Conversion from %s is not supported'%ext.upper())
-        
+
        print 'Creating EPUB file...'
-        html2epub(path, opts, notification=notification, 
+        html2epub(path, opts, notification=notification,
                  create_epub=create_epub, oeb_cover=oeb_cover,
                  extract_to=extract_to)

--- a/src/calibre/ebooks/lrf/fb2/init.py
+++ b/src/calibre/ebooks/lrf/fb2/init.py
--- a/src/calibre/ebooks/lrf/fb2/fb2.xsl
+++ b/src/calibre/ebooks/lrf/fb2/fb2.xsl
--- a/src/calibre/ebooks/fb2/input.py
+++ b/src/calibre/ebooks/fb2/input.py
@ -0,0 +1,74 @@
+from __future__ import with_statement
+__license__   = 'GPL v3'
+__copyright__ = '2008, Anatoly Shipitsin <norguhtar at gmail.com>'
+"""
+Convert .fb2 files to .lrf
+"""
+import os
+from base64 import b64decode
+from lxml import etree
+
+from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
+from calibre import guess_type
+
+FB2NS = 'http://www.gribuser.ru/xml/fictionbook/2.0'
+
+class FB2Input(InputFormatPlugin):
+
+    name        = 'FB2 Input'
+    author      = 'Anatoly Shipitsin'
+    description = 'Convert FB2 files to HTML'
+    file_types  = set(['fb2'])
+
+    recommendations = set([
+        ('level1_toc', '//h:h1', OptionRecommendation.MED),
+        ('level2_toc', '//h:h2', OptionRecommendation.MED),
+        ('level3_toc', '//h:h3', OptionRecommendation.MED),
+        ])
+
+    def convert(self, stream, options, file_ext, log,
+                accelerators):
+        from calibre.resources import fb2_xsl
+        from calibre.ebooks.metadata.opf2 import OPFCreator
+        from calibre.ebooks.metadata.meta import get_metadata
+        from calibre.ebooks.oeb.base import XLINK_NS
+        NAMESPACES = {'f':FB2NS, 'l':XLINK_NS}
+
+        log.debug('Parsing XML...')
+        parser = etree.XMLParser(recover=True, no_network=True)
+        doc = etree.parse(stream, parser)
+        self.extract_embedded_content(doc)
+        log.debug('Converting XML to HTML...')
+        styledoc = etree.fromstring(fb2_xsl)
+
+        transform = etree.XSLT(styledoc)
+        result = transform(doc)
+        open('index.xhtml', 'wb').write(transform.tostring(result))
+        stream.seek(0)
+        mi = get_metadata(stream, 'fb2')
+        if not mi.title:
+            mi.title = _('Unknown')
+        if not mi.authors:
+            mi.authors = [_('Unknown')]
+        opf = OPFCreator(os.getcwdu(), mi)
+        entries = [(f, guess_type(f)[0]) for f in os.listdir('.')]
+        opf.create_manifest(entries)
+        opf.create_spine(['index.xhtml'])
+
+        for img in doc.xpath('//f:coverpage/f:image', namespaces=NAMESPACES):
+            href = img.get('{%s}href'%XLINK_NS, img.get('href', None))
+            if href is not None:
+                if href.startswith('#'):
+                    href = href[1:]
+                opf.guide.set_cover(os.path.abspath(href))
+
+        opf.render(open('metadata.opf', 'wb'))
+        return os.path.join(os.getcwd(), 'metadata.opf')
+
+    def extract_embedded_content(self, doc):
+        for elem in doc.xpath('./*'):
+            if 'binary' in elem.tag and elem.attrib.has_key('id'):
+                fname = elem.attrib['id']
+                data = b64decode(elem.text.strip())
+                open(fname, 'wb').write(data)
+
--- a/src/calibre/ebooks/lrf/fb2/convert_from.py
+++ b/src/calibre/ebooks/lrf/fb2/convert_from.py
@ -1,125 +0,0 @@
-from __future__ import with_statement
-__license__   = 'GPL v3'
-__copyright__ = '2008, Anatoly Shipitsin <norguhtar at gmail.com>'
-"""
-Convert .fb2 files to .lrf
-"""
-import os, sys, shutil, logging
-from base64 import b64decode
-from lxml import etree
-    
-from calibre.ebooks.lrf import option_parser as lrf_option_parser
-from calibre.ebooks.metadata.meta import get_metadata
-from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
-from calibre import setup_cli_handlers
-from calibre.resources import fb2_xsl
-from calibre.ptempfile import PersistentTemporaryDirectory
-from calibre.ebooks.metadata.opf import OPFCreator
-from calibre.ebooks.metadata import MetaInformation
-
-
-def option_parser():
-    parser = lrf_option_parser(
-_('''%prog [options] mybook.fb2
-
-
-%prog converts mybook.fb2 to mybook.lrf'''))
-    parser.add_option('--debug-html-generation', action='store_true', default=False,
-                      dest='debug_html_generation', help=_('Print generated HTML to stdout and quit.'))
-    parser.add_option('--keep-intermediate-files', action='store_true', default=False,
-                      help=_('Keep generated HTML files after completing conversion to LRF.'))
-    return parser
-    
-def extract_embedded_content(doc):
-    for elem in doc.xpath('./*'):
-        if 'binary' in elem.tag and elem.attrib.has_key('id'):
-            fname = elem.attrib['id']
-            data = b64decode(elem.text.strip())
-            open(fname, 'wb').write(data)
-
-def to_html(fb2file, tdir):
-    fb2file = os.path.abspath(fb2file)
-    cwd = os.getcwd()
-    try:
-        os.chdir(tdir)
-        print 'Parsing XML...'
-        parser = etree.XMLParser(recover=True, no_network=True)
-        doc = etree.parse(fb2file, parser)
-        extract_embedded_content(doc)
-        print 'Converting XML to HTML...'
-        styledoc = etree.fromstring(fb2_xsl)
-    
-        transform = etree.XSLT(styledoc)
-        result = transform(doc)
-        open('index.html', 'wb').write(transform.tostring(result))
-        try:
-            mi = get_metadata(open(fb2file, 'rb'), 'fb2')
-        except:
-            mi = MetaInformation(None, None)
-        if not mi.title:
-            mi.title = os.path.splitext(os.path.basename(fb2file))[0]
-        if not mi.authors:
-            mi.authors = [_('Unknown')]
-        opf = OPFCreator(tdir, mi)
-        opf.create_manifest([('index.html', None)])
-        opf.create_spine(['index.html'])
-        opf.render(open('metadata.opf', 'wb'))
-        return os.path.join(tdir, 'metadata.opf')
-    finally:
-        os.chdir(cwd)
-
-    
-def generate_html(fb2file, encoding, logger):
-    tdir = PersistentTemporaryDirectory('_fb22lrf')
-    to_html(fb2file, tdir)
-    return os.path.join(tdir, 'index.html')
-    
-def process_file(path, options, logger=None):
-    if logger is None:
-        level = logging.DEBUG if options.verbose else logging.INFO
-        logger = logging.getLogger('fb22lrf')
-        setup_cli_handlers(logger, level)
-    fb2 = os.path.abspath(os.path.expanduser(path))
-    f = open(fb2, 'rb')
-    mi = get_metadata(f, 'fb2')
-    f.close()
-    htmlfile = generate_html(fb2, options.encoding, logger)
-    tdir = os.path.dirname(htmlfile)
-    cwd = os.getcwdu()
-    try:
-        if not options.output:
-            ext = '.lrs' if options.lrs else '.lrf'
-            options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
-        options.output = os.path.abspath(os.path.expanduser(options.output))
-        if not mi.title:
-            mi.title = os.path.splitext(os.path.basename(fb2))[0]
-        if (not options.title or options.title == _('Unknown')):
-            options.title = mi.title
-        if (not options.author or options.author == _('Unknown')) and mi.authors:
-            options.author = mi.authors.pop()
-        if (not options.category or options.category == _('Unknown')) and mi.category:
-            options.category = mi.category
-        if (not options.freetext or options.freetext == _('Unknown')) and mi.comments:
-            options.freetext = mi.comments
-        os.chdir(tdir)
-        html_process_file(htmlfile, options, logger)
-    finally:
-        os.chdir(cwd)
-        if getattr(options, 'keep_intermediate_files', False):
-            logger.debug('Intermediate files in '+ tdir)
-        else:
-            shutil.rmtree(tdir)
-
-def main(args=sys.argv, logger=None):
-    parser = option_parser()    
-    options, args = parser.parse_args(args)
-    if len(args) != 2:
-        parser.print_help()
-        print
-        print 'No fb2 file specified'
-        return 1
-    process_file(args[1], options, logger)
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -41,10 +41,12 @@ NCX_NS       = 'http://www.daisy.org/z3986/2005/ncx/'
 SVG_NS       = 'http://www.w3.org/2000/svg'
 XLINK_NS     = 'http://www.w3.org/1999/xlink'
 CALIBRE_NS   = 'http://calibre.kovidgoyal.net/2009/metadata'
+RE_NS        = 'http://exslt.org/regular-expressions'
+
 XPNSMAP      = {'h'  : XHTML_NS, 'o1' : OPF1_NS,    'o2' : OPF2_NS,
                'd09': DC09_NS,  'd10': DC10_NS,    'd11': DC11_NS,
                'xsi': XSI_NS,   'dt' : DCTERMS_NS, 'ncx': NCX_NS,
-                'svg': SVG_NS,   'xl' : XLINK_NS}
+                'svg': SVG_NS,   'xl' : XLINK_NS,   're': RE_NS}
 OPF1_NSMAP   = {'dc': DC11_NS, 'oebpackage': OPF1_NS}
 OPF2_NSMAP   = {'opf': OPF2_NS, 'dc': DC11_NS, 'dcterms': DCTERMS_NS,
                'xsi': XSI_NS, 'calibre': CALIBRE_NS}
@ -1256,16 +1258,21 @@ class TOC(object):
    :attr:`klass`: Optional semantic class referenced by this node.
    :attr:`id`: Option unique identifier for this node.
    """
-    def __init__(self, title=None, href=None, klass=None, id=None):
+    def __init__(self, title=None, href=None, klass=None, id=None,
+            play_order=None):
        self.title = title
        self.href = urlnormalize(href) if href else href
        self.klass = klass
        self.id = id
        self.nodes = []
+        self.play_order = 0
+        if play_order is None:
+            play_order = self.next_play_order()
+        self.play_order = play_order

-    def add(self, title, href, klass=None, id=None):
+    def add(self, title, href, klass=None, id=None, play_order=0):
        """Create and return a new sub-node of this node."""
-        node = TOC(title, href, klass, id)
+        node = TOC(title, href, klass, id, play_order)
        self.nodes.append(node)
        return node

@ -1276,6 +1283,18 @@ class TOC(object):
            for node in child.iter():
                yield node

+    def count(self):
+        return len(list(self.iter())) - 1
+
+    def next_play_order(self):
+        return max([x.play_order for x in self.iter()])+1
+
+    def has_href(self, href):
+        for x in self.iter():
+            if x.href == href:
+                return True
+        return False
+
    def iterdescendants(self):
        """Iterate over all descendant nodes in depth-first order."""
        for child in self.nodes:
@ -1309,6 +1328,10 @@ class TOC(object):
        except ValueError:
            return 1

+    def __str__(self):
+        return 'TOC: %s --> %s'%(self.title, self.href)
+
+
    def to_opf1(self, tour):
        for node in self.nodes:
            element(tour, 'site', attrib={
@ -1319,7 +1342,7 @@ class TOC(object):
    def to_ncx(self, parent):
        for node in self.nodes:
            id = node.id or unicode(uuid.uuid4())
-            attrib = {'id': id, 'playOrder': '0'}
+            attrib = {'id': id, 'playOrder': str(node.play_order)}
            if node.klass:
                attrib['class'] = node.klass
            point = element(parent, NCX('navPoint'), attrib=attrib)
@ -1329,6 +1352,34 @@ class TOC(object):
            node.to_ncx(point)
        return parent

+    def rationalize_play_orders(self):
+        '''
+        Ensure that all nodes with the same play_order have the same href and
+        with different play_orders have different hrefs.
+        '''
+        def po_node(n):
+            for x in self.iter():
+                if x is n:
+                    return
+                if x.play_order == n.play_order:
+                    return x
+
+        def href_node(n):
+            for x in self.iter():
+                if x is n:
+                    return
+                if x.href == n.href:
+                    return x
+
+        for x in self.iter():
+            y = po_node(x)
+            if y is not None:
+                if x.href != y.href:
+                    x.play_order = getattr(href_node(x), 'play_order',
+                            self.next_play_order())
+            y = href_node(x)
+            if y is not None:
+                x.play_order = y.play_order

 class PageList(object):
    """Collection of named "pages" to mapped positions within an OEB data model
--- a/src/calibre/ebooks/oeb/iterator.py
+++ b/src/calibre/ebooks/oeb/iterator.py
@ -118,6 +118,7 @@ class EbookIterator(object):
                                    print 'Loaded embedded font:', repr(family)

    def __enter__(self):
+        self.delete_on_exit = []
        self._tdir = TemporaryDirectory('_ebook_iter')
        self.base  = self._tdir.__enter__()
        from calibre.ebooks.conversion.plumber import Plumber
@ -137,9 +138,11 @@ class EbookIterator(object):

        cover = self.opf.cover
        if self.ebook_ext in ('lit', 'mobi', 'prc', 'opf') and cover:
-            cfile = os.path.join(os.path.dirname(self.spine[0]), 'calibre_ei_cover.html')
+            cfile = os.path.join(os.path.dirname(self.spine[0]),
+                    'calibre_iterator_cover.html')
            open(cfile, 'wb').write(TITLEPAGE%cover)
            self.spine[0:0] = [SpineItem(cfile)]
+            self.delete_on_exit.append(cfile)

        if self.opf.path_to_html_toc is not None and \
           self.opf.path_to_html_toc not in self.spine:
@ -221,3 +224,6 @@ class EbookIterator(object):

    def __exit__(self, *args):
        self._tdir.__exit__(*args)
+        for x in self.delete_on_exit:
+            if os.path.exists(x):
+                os.remove(x)
--- a/src/calibre/ebooks/oeb/reader.py
+++ b/src/calibre/ebooks/oeb/reader.py
@ -343,7 +343,8 @@ class OEBReader(object):
                continue
            id = child.get('id')
            klass = child.get('class')
-            node = toc.add(title, href, id=id, klass=klass)
+            po = int(child.get('playOrder', self.oeb.toc.next_play_order()))
+            node = toc.add(title, href, id=id, klass=klass, play_order=po)
            self._toc_from_navpoint(item, node, child)

    def _toc_from_ncx(self, item):
--- a/src/calibre/ebooks/oeb/transforms/split.py
+++ b/src/calibre/ebooks/oeb/transforms/split.py
@ -15,12 +15,10 @@ from lxml.etree import XPath as _XPath
 from lxml import etree
 from lxml.cssselect import CSSSelector

-from calibre.ebooks.oeb.base import OEB_STYLES, XPNSMAP, urldefrag, \
-                rewrite_links
+from calibre.ebooks.oeb.base import OEB_STYLES, XPNSMAP as NAMESPACES, \
+        urldefrag, rewrite_links
 from calibre.ebooks.epub import tostring, rules

-NAMESPACES = dict(XPNSMAP)
-NAMESPACES['re'] = 'http://exslt.org/regular-expressions'

 XPath = functools.partial(_XPath, namespaces=NAMESPACES)

--- a/src/calibre/ebooks/oeb/transforms/structure.py
+++ b/src/calibre/ebooks/oeb/transforms/structure.py
@ -0,0 +1,151 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from lxml import etree
+from urlparse import urlparse
+
+from calibre.ebooks.oeb.base import XPNSMAP, TOC
+XPath = lambda x: etree.XPath(x, namespaces=XPNSMAP)
+
+class DetectStructure(object):
+
+    def __call__(self, oeb, opts):
+        self.log = oeb.log
+        self.oeb = oeb
+        self.opts = opts
+        self.log('Detecting structure...')
+
+        self.detect_chapters()
+        if self.oeb.auto_generated_toc or opts.use_auto_toc:
+            orig_toc = self.oeb.toc
+            self.oeb.toc = TOC()
+            self.create_level_based_toc()
+            if self.oeb.toc.count() < 1:
+                if not opts.no_chapters_in_toc and self.detected_chapters:
+                    self.create_toc_from_chapters()
+                    if self.oeb.toc.count() < opts.toc_threshold:
+                        self.create_toc_from_links()
+            if self.oeb.toc.count() < 2 and orig_toc.count() > 2:
+                self.oeb.toc = orig_toc
+            else:
+                self.oeb.auto_generated_toc = True
+                self.log('Auto generated TOC with %d entries.' %
+                        self.oeb.toc.count())
+
+
+    def detect_chapters(self):
+        self.detected_chapters = []
+        if self.opts.chapter:
+            chapter_xpath = XPath(self.opts.chapter)
+            for item in self.oeb.spine:
+                for x in chapter_xpath(item.data):
+                    self.detected_chapters.append((item, x))
+
+            chapter_mark = self.opts.chapter_mark
+            page_break_before = 'display: block; page-break-before: always'
+            page_break_after = 'display: block; page-break-after: always'
+            for item, elem in self.detected_chapters:
+                text = u' '.join([t.strip() for t in elem.xpath('descendant::text()')])
+                self.log('\tDetected chapter:', text[:50])
+                if chapter_mark == 'none':
+                    continue
+                elif chapter_mark == 'rule':
+                    mark = etree.Element('hr')
+                elif chapter_mark == 'pagebreak':
+                    mark = etree.Element('div', style=page_break_after)
+                else: # chapter_mark == 'both':
+                    mark = etree.Element('hr', style=page_break_before)
+                elem.addprevious(mark)
+
+    def create_level_based_toc(self):
+        if self.opts.level1_toc is None:
+            return
+        for item in self.oeb.spine:
+            self.add_leveled_toc_items(item)
+
+    def create_toc_from_chapters(self):
+        counter = self.oeb.toc.next_play_order()
+        for item, elem in self.detected_chapters:
+            text, href = self.elem_to_link(item, elem, counter)
+            self.oeb.toc.add(text, href, play_order=counter)
+            counter += 1
+
+    def create_toc_from_links(self):
+        for item in self.oeb.spine:
+            for a in item.data.xpath('//h:a[@href]'):
+                href = a.get('href')
+                purl = urlparse(href)
+                if not purl[0] or purl[0] == 'file':
+                    href, frag = purl.path, purl.fragment
+                    href = item.abshref(href)
+                    if frag:
+                        href = '#'.join((href, frag))
+                    if not self.oeb.toc.has_href(href):
+                        text = u' '.join([t.strip() for t in \
+                                a.xpath('descendant::text()')])
+                        text = text[:100].strip()
+                        if not self.oeb.toc.has_text(text):
+                            self.oeb.toc.add(text, href,
+                                play_order=self.oeb.toc.next_play_order())
+
+
+    def elem_to_link(self, item, elem, counter):
+        text = u' '.join([t.strip() for t in elem.xpath('descendant::text()')])
+        text = text[:100].strip()
+        id = elem.get('id', 'calibre_toc_%d'%counter)
+        elem.set('id', id)
+        href = '#'.join((item.href, id))
+        return text, href
+
+
+    def add_leveled_toc_items(self, item):
+        level1 = XPath(self.opts.level1_toc)(item.data)
+        level1_order = []
+
+        counter = 1
+        if level1:
+            added = {}
+            for elem in level1:
+                text, _href = self.elem_to_link(item, elem, counter)
+                counter += 1
+                if text:
+                    node = self.oeb.toc.add(text, _href,
+                            play_order=self.oeb.toc.next_play_order())
+                    level1_order.append(node)
+                    added[elem] = node
+                    #node.add(_('Top'), _href)
+            if self.opts.level2_toc is not None:
+                added2 = {}
+                level2 = list(XPath(self.opts.level2_toc)(item.data))
+                for elem in level2:
+                    level1 = None
+                    for item in item.data.iterdescendants():
+                        if item in added.keys():
+                            level1 = added[item]
+                        elif item == elem and level1 is not None:
+                            text, _href = self.elem_to_link(item, elem, counter)
+                            counter += 1
+                            if text:
+                                added2[elem] = level1.add(text, _href,
+                                    play_order=self.oeb.toc.next_play_order())
+                if self.opts.level3_toc is not None:
+                    level3 = list(XPath(self.opts.level3_toc)(item.data))
+                    for elem in level3:
+                        level2 = None
+                        for item in item.data.iterdescendants():
+                            if item in added2.keys():
+                                level2 = added2[item]
+                            elif item == elem and level2 is not None:
+                                text, _href = \
+                                        self.elem_to_link(item, elem, counter)
+                                counter += 1
+                                if text:
+                                    level2.add(text, _href,
+                                    play_order=self.oeb.toc.next_play_order())
+
+
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@ -27,10 +27,6 @@ entry_points = {
             'lrs2lrf            = calibre.ebooks.lrf.lrs.convert_from:main',
             'isbndb             = calibre.ebooks.metadata.isbndb:main',
             'librarything       = calibre.ebooks.metadata.library_thing:main',
-             'comic2lrf          = calibre.ebooks.lrf.comic.convert_from:main',
-             'comic2epub         = calibre.ebooks.epub.from_comic:main',
-             'comic2mobi         = calibre.ebooks.mobi.from_comic:main',
-             'comic2pdf          = calibre.ebooks.pdf.from_comic:main',
             'calibre-debug      = calibre.debug:main',
             'calibredb          = calibre.library.cli:main',
             'calibre-fontconfig = calibre.utils.fontconfig:main',
@ -151,8 +147,6 @@ def setup_completion(fatal_errors):
        from calibre.ebooks.lrf.pdf.reflow import option_parser as pdfhtmlop
        from calibre.web.feeds.main import option_parser as feeds2disk
        from calibre.web.feeds.recipes import titles as feed_titles
-        from calibre.ebooks.lrf.comic.convert_from import option_parser as comicop
-        from calibre.ebooks.epub.from_comic import option_parser as comic2epub
        from calibre.ebooks.metadata.fetch import option_parser as fem_op
        from calibre.gui2.main import option_parser as guiop
        from calibre.utils.smtp import option_parser as smtp_op
@ -181,10 +175,6 @@ def setup_completion(fatal_errors):
        f.write(opts_and_exts('ebook-meta', metaop, list(meta_filetypes())))
        f.write(opts_and_exts('lrfviewer', lrfviewerop, ['lrf']))
        f.write(opts_and_exts('pdfrelow', pdfhtmlop, ['pdf']))
-        f.write(opts_and_exts('comic2lrf', comicop, ['cbz', 'cbr']))
-        f.write(opts_and_exts('comic2epub', comic2epub, ['cbz', 'cbr']))
-        f.write(opts_and_exts('comic2mobi', comic2epub, ['cbz', 'cbr']))
-        f.write(opts_and_exts('comic2pdf', comic2epub, ['cbz', 'cbr']))
        f.write(opts_and_words('feeds2disk', feeds2disk, feed_titles))
        f.write(opts_and_words('fetch-ebook-metadata', fem_op, []))
        f.write(opts_and_words('calibre-smtp', smtp_op, []))
--- a/upload.py
+++ b/upload.py
@ -139,7 +139,7 @@ class resources(OptionlessCommand):
    RESOURCES = dict(
        opf_template    = 'ebooks/metadata/opf.xml',
        ncx_template    = 'ebooks/metadata/ncx.xml',
-        fb2_xsl         = 'ebooks/lrf/fb2/fb2.xsl',
+        fb2_xsl         = 'ebooks/fb2/fb2.xsl',
        metadata_sqlite = 'library/metadata_sqlite.sql',
        jquery          = 'gui2/viewer/jquery.js',
        jquery_scrollTo = 'gui2/viewer/jquery_scrollTo.js',