IGN:Fix more minor regressions

2025-07-09 03:04:10 -04:00 · 2008-09-12 21:04:24 -07:00 · 2008-09-12 21:04:24 -07:00 · 829a344fe9
commit 829a344fe9
parent f7bf112ae2
6 changed files with 95 additions and 40 deletions
--- a/src/calibre/ebooks/epub/init.py
+++ b/src/calibre/ebooks/epub/init.py
@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
 '''
 Conversion to EPUB.
 '''
-import sys
+import sys, textwrap
 from calibre.utils.config import Config, StringConfig
 from calibre.utils.zipfile import ZipFile, ZIP_DEFLATED
 from calibre.ebooks.html import config as common_config
@ -53,9 +53,21 @@ The expression used must evaluate to a list of elements. To disable chapter dete
 use the expression "/". See the XPath Tutorial in the calibre User Manual for further
 help on using this feature.
 ''').replace('\n', ' '))
-    structure('no_chapters_in_toc', ['--no-chapters-in-toc'], default=False,
+    
-              help=_('Don\'t add detected chapters to the Table of Contents'))
+    toc = c.add_group('toc', 
-    structure('no_links_in_toc', ['--no-links-in-toc'], default=False,
+        _('''\
-              help=_('Don\'t add links in the root HTML file to the Table of Contents'))
+Control the automatic generation of a Table of Contents. If an OPF file is detected
 and it specifies a Table of Contents, then that will be used rather than trying
 to auto-generate a Table of Contents.
 ''').replace('\n', ' '))
    toc('max_toc_recursion', ['--max-toc-recursion'], default=1, 
        help=_('Number of levels of HTML files to try to autodetect TOC entries from. Set to 0 to disable all TOC autodetection. Default is %default.'))
    toc('max_toc_links', ['--max-toc-links'], default=40, 
        help=_('Maximum number of links from each HTML file to insert into the TOC. Set to 0 to disable. Default is: %default.'))
    toc('no_chapters_in_toc', ['--no-chapters-in-toc'], default=False,
        help=_("Don't add auto-detected chapters to the Table of Contents."))
    toc('add_files_to_toc', ['--add-files-to-toc'], default=False,
        help=_('If more than one HTML file is found, create a TOC entry for each file.'))
    return c
--- a/src/calibre/ebooks/epub/from_html.py
+++ b/src/calibre/ebooks/epub/from_html.py
@ -1,13 +1,16 @@
 from __future__ import with_statement
 from calibre.ebooks.metadata.opf import OPFReader
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
-import os, sys, re, shutil
+import os, sys, re, shutil, cStringIO
 from lxml.etree import XPath
-from calibre.ebooks.html import Parser, get_text, merge_metadata, get_filelist
+from calibre.ebooks.html import Parser, get_text, merge_metadata, get_filelist,\
    opf_traverse, create_metadata, rebase_toc
 from calibre.ebooks.epub import config as common_config
-from calibre.ptempfile import PersistentTemporaryDirectory
+from calibre.ptempfile import TemporaryDirectory
 from calibre.ebooks.metadata import MetaInformation
 class HTMLProcessor(Parser):
@ -17,7 +20,7 @@ class HTMLProcessor(Parser):
                        name='html2epub')
        if opts.verbose > 2:
            self.debug_tree('parsed')
-        self.detected_chapters = self.opts.chapter(self.root)
+        self.detect_chapters()
        self.extract_css()
        if opts.verbose > 2:
@ -27,6 +30,13 @@ class HTMLProcessor(Parser):
        self.split()
    def detect_chapters(self):
        self.detected_chapters = self.opts.chapter(self.root)
        for elem in self.detected_chapters:
            style = elem.get('style', '')
            style += ';page-break-before: always'
            elem.set(style, style)
    def collect_font_statistics(self):
        '''
        Collect font statistics to figure out the base font size used in this
@ -46,37 +56,44 @@ class HTMLProcessor(Parser):
 def config(defaults=None):
-    c = common_config(defaults=defaults)
+    return common_config(defaults=defaults)
    return c
 def option_parser():
    c = config()
    return c.option_parser(usage=_('''\
-%prog [options] file.html
+%prog [options] file.html|opf
-Convert a HTML file to an EPUB ebook. Follows links in the HTML file. 
+Convert a HTML file to an EPUB ebook. Recursively follows links in the HTML file.
 If you specify an OPF file instead of an HTML file, the list of links is takes from
 the <spine> element of the OPF file.  
 '''))
-def parse_content(filelist, opts):
+def parse_content(filelist, opts, tdir):
    tdir = PersistentTemporaryDirectory('_html2epub')
    os.makedirs(os.path.join(tdir, 'content', 'resources'))
    resource_map = {}
    for htmlfile in filelist:
        hp = HTMLProcessor(htmlfile, opts, os.path.join(tdir, 'content'), 
                           resource_map, filelist)
        hp.save()
    return resource_map, hp.htmlfile_map
 def convert(htmlfile, opts, notification=None):
    htmlfile = os.path.abspath(htmlfile)
    if opts.output is None:
        opts.output = os.path.splitext(os.path.basename(htmlfile))[0] + '.epub'
    opts.output = os.path.abspath(opts.output)
    if htmlfile.lower().endswith('.opf'):
        opf = OPFReader(htmlfile, os.path.dirname(os.path.abspath(htmlfile)))
        filelist = opf_traverse(opf, verbose=opts.verbose, encoding=opts.encoding)
        mi = MetaInformation(opf)
    else:
        opf, filelist = get_filelist(htmlfile, opts)
        mi = merge_metadata(htmlfile, opf, opts)
    opts.chapter = XPath(opts.chapter, 
                    namespaces={'re':'http://exslt.org/regular-expressions'})
-    resource_map = parse_content(filelist, opts)
+    with TemporaryDirectory('_html2epub') as tdir:
-    
+        resource_map, htmlfile_map = parse_content(filelist, opts, tdir)
        resources = [os.path.join(opts.output, 'content', f) for f in resource_map.values()]
        if opf.cover and os.access(opf.cover, os.R_OK):
@ -86,6 +103,18 @@ def convert(htmlfile, opts, notification=None):
            resources.append(cpath)
            mi.cover = cpath
        spine = [htmlfile_map[f.path] for f in filelist]
        mi = create_metadata(tdir, mi, spine, resources)
        buf = cStringIO.StringIO()
        if mi.toc:
            rebase_toc(mi.toc, htmlfile_map, opts.output)
        with open(os.path.join(tdir, 'metadata.opf'), 'wb') as f:
            mi.render(f, buf)
        toc = buf.getvalue()
        if toc:
            with open(os.path.join(tdir, 'toc.ncx'), 'wb') as f:
                f.write(toc)
 def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)
--- a/src/calibre/ebooks/html.py
+++ b/src/calibre/ebooks/html.py
@ -1,10 +1,14 @@
 from __future__ import with_statement
 import cStringIO
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
-import sys, re, os, shutil, logging, tempfile
+'''
 Code to recursively parse HTML files and create an open ebook in a specified
 directory or zip file. All the action starts in :function:`create_dir`.
 '''
 import sys, re, os, shutil, logging, tempfile, cStringIO
 from urlparse import urlparse
 from urllib import unquote
@ -445,10 +449,10 @@ class Parser(PreProcessor, LoggingInterface):
        self.raw_css = '\n\n'.join(css)
        # TODO: Figure out what to do about CSS imports from linked stylesheets    
-def config(defaults=None):
+def config(defaults=None, config_name='html', 
-    desc = _('Options to control the traversal of HTML')
+           desc=_('Options to control the traversal of HTML')):
    if defaults is None:
-        c = Config('html', desc)
+        c = Config(config_name, desc)
    else:
        c = StringConfig(defaults, desc)
@ -482,10 +486,12 @@ def config(defaults=None):
 def option_parser():
    c = config()
    return c.option_parser(usage=_('''\
-%prog [options] file.html
+%prog [options] file.html|opf
 Follow all links in an HTML file and collect them into the specified directory.
 Also collects any references resources like images, stylesheets, scripts, etc. 
 If an OPF file is specified instead, the list of files in its <spine> element
 is used.
 '''))
 def search_for_opf(dir):
@ -566,7 +572,8 @@ def create_metadata(basepath, mi, filelist, resources):
 def rebase_toc(toc, htmlfile_map, basepath, root=True):
    '''
-    Rebase a :class:`calibre.ebooks.metadata.toc.TOC` object.
+    Rebase a :class:`calibre.ebooks.metadata.toc.TOC` object. Maps all entries
    in the TOC to point to their new locations relative to the new OPF file.
    '''
    def fix_entry(entry):
        if entry.abspath in htmlfile_map.keys():
@ -582,15 +589,23 @@ def create_dir(htmlfile, opts):
    '''
    Create a directory that contains the open ebook
    '''
    if htmlfile.lower().endswith('.opf'):
        opf = OPFReader(open(htmlfile, 'rb'), os.path.dirname(os.path.abspath(htmlfile)))
        filelist = opf_traverse(opf, verbose=opts.verbose, encoding=opts.encoding)
        mi = MetaInformation(opf)
    else:
        opf, filelist = get_filelist(htmlfile, opts)
        mi = merge_metadata(htmlfile, opf, opts)
    resource_map, htmlfile_map = parse_content(filelist, opts)
    resources = [os.path.join(opts.output, 'content', f) for f in resource_map.values()]
    if opf and opf.cover and os.access(opf.cover, os.R_OK):
        cpath = os.path.join(opts.output, 'content', 'resources', '_cover_'+os.path.splitext(opf.cover)[-1])
        shutil.copyfile(opf.cover, cpath)
        resources.append(cpath)
        mi.cover = cpath
    spine = [htmlfile_map[f.path] for f in filelist]
    mi = create_metadata(opts.output, mi, spine, resources)
    buf = cStringIO.StringIO()
--- a/src/calibre/ebooks/metadata/epub.py
+++ b/src/calibre/ebooks/metadata/epub.py
@ -105,7 +105,6 @@ def set_metadata(stream, mi):
    reader.opf.smart_update(mi)
    newopf = StringIO(reader.opf.render())
    safe_replace(stream, reader.container[OPF.MIMETYPE], newopf)
    print newopf.getvalue()
 def option_parser():
    parser = get_parser('epub')
--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@ -150,7 +150,7 @@ class OPF(object):
        def fset(self, val):
            matches = self.isbn_path(self.tree)
            if not matches:
-                matches = [self.create_metadata_element('dc:identifier', 
+                matches = [self.create_metadata_element('identifier', ns='dc',
                                                attrib={'{%s}scheme'%self.NAMESPACES['opf']:'ISBN'})]
            matches[0].text = unicode(val)
        return property(fget=fget, fset=fset)