Merge from trunk

2025-07-09 03:04:10 -04:00 · 2010-07-09 17:22:25 +01:00 · 2010-07-09 17:22:25 +01:00 · 200547fe0e
commit 200547fe0e
parent 0b4e09180d ec607dc596
16 changed files with 469 additions and 16 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -9,6 +9,7 @@ from calibre.customize import FileTypePlugin, MetadataReaderPlugin, MetadataWrit
 from calibre.constants import numeric_version
 from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
 # To archive plugins {{{
 class HTML2ZIP(FileTypePlugin):
    name = 'HTML to ZIP'
    author = 'Kovid Goyal'
@ -82,6 +83,8 @@ class PML2PMLZ(FileTypePlugin):
        return of.name
 # }}}
 # Metadata reader plugins {{{
 class ComicMetadataReader(MetadataReaderPlugin):
@ -465,8 +468,11 @@ from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
    LibraryThing
 from calibre.ebooks.metadata.douban import DoubanBooks
 from calibre.library.catalog import CSV_XML, EPUB_MOBI
 from calibre.ebooks.epub.fix.unmanifested import Unmanifested
 from calibre.ebooks.epub.fix.epubcheck import Epubcheck
 plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
-        LibraryThing, DoubanBooks, CSV_XML, EPUB_MOBI]
+        LibraryThing, DoubanBooks, CSV_XML, EPUB_MOBI, Unmanifested, Epubcheck]
 plugins += [
    ComicInput,
    EPUBInput,
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@ -16,6 +16,7 @@ from calibre.ebooks.metadata import MetaInformation
 from calibre.ebooks.metadata.fetch import MetadataSource
 from calibre.utils.config import make_config_dir, Config, ConfigProxy, \
                                 plugin_dir, OptionParser, prefs
 from calibre.ebooks.epub.fix import ePubFixer
 platform = 'linux'
@ -194,7 +195,6 @@ def plugin_customization(plugin):
 # }}}
 # Input/Output profiles {{{
 def input_profiles():
    for plugin in _initialized_plugins:
@ -444,6 +444,14 @@ def device_plugins(): # {{{
                    yield plugin
 # }}}
 # epub fixers {{{
 def epub_fixers():
    for plugin in _initialized_plugins:
        if isinstance(plugin, ePubFixer):
            if not is_disabled(plugin):
                if platform in plugin.supported_platforms:
                    yield plugin
 # }}}
 # Initialize plugins {{{
--- a/src/calibre/devices/kobo/books.py
+++ b/src/calibre/devices/kobo/books.py
@ -54,7 +54,8 @@ class Book(MetaInformation):
        except:
             self.datetime = time.gmtime()
-        self.thumbnail = ImageWrapper(thumbnail_name)
+	if thumbnail_name is not None:
 	    self.thumbnail = ImageWrapper(thumbnail_name)
        self.tags = []
        if other:
            self.smart_update(other)
--- a/src/calibre/devices/kobo/driver.py
+++ b/src/calibre/devices/kobo/driver.py
@ -85,9 +85,11 @@ class KOBO(USBMS):
                idx = bl_cache.get(lpath, None)
                if idx is not None:
-                    imagename = self.normalize_path(prefix + '.kobo/images/' + ImageID + ' - NickelBookCover.parsed')
+                    if ImageID is not None:
-                    #print "Image name Normalized: " + imagename
+                        imagename = self.normalize_path(self._main_prefix + '.kobo/images/' + ImageID + ' - NickelBookCover.parsed')
-                    bl[idx].thumbnail = ImageWrapper(imagename)
+                        #print "Image name Normalized: " + imagename
                        if imagename is not None:
                            bl[idx].thumbnail = ImageWrapper(imagename)
                    bl_cache[lpath] = None
                    if ContentType != '6':
                        if self.update_metadata_item(bl[idx]):
@ -341,6 +343,7 @@ class KOBO(USBMS):
            else:
                # if path.startswith("file:///mnt/onboard/"):
                path = path.replace("file:///mnt/onboard/", self._main_prefix)
                path = path.replace("/mnt/onboard/", self._main_prefix)
                    # print "Internal: " + filename
        return path
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -25,13 +25,13 @@ convert_entities = functools.partial(entity_to_unicode,
 _span_pat = re.compile('<span.*?</span>', re.DOTALL|re.IGNORECASE)
 LIGATURES = {
-        u'\u00c6': u'AE',
+#        u'\u00c6': u'AE',
-        u'\u00e6': u'ae',
+#        u'\u00e6': u'ae',
-        u'\u0152': u'OE',
+#        u'\u0152': u'OE',
-        u'\u0153': u'oe',
+#        u'\u0153': u'oe',
-        u'\u0132': u'IJ',
+#        u'\u0132': u'IJ',
-        u'\u0133': u'ij',
+#        u'\u0133': u'ij',
-        u'\u1D6B': u'ue',
+#        u'\u1D6B': u'ue',
        u'\uFB00': u'ff',
        u'\uFB01': u'fi',
        u'\uFB02': u'fl',
--- a/src/calibre/ebooks/epub/fix/init.py
+++ b/src/calibre/ebooks/epub/fix/init.py
@ -0,0 +1,58 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from calibre.customize import Plugin
 class InvalidEpub(ValueError):
    pass
 class ePubFixer(Plugin):
    supported_platforms = ['windows', 'osx', 'linux']
    author = 'Kovid Goyal'
    type = _('ePub Fixer')
    can_be_disabled = True
    # API that subclasses must implement {{{
    @property
    def short_description(self):
        raise NotImplementedError
    @property
    def long_description(self):
        raise NotImplementedError
    @property
    def fix_name(self):
        raise NotImplementedError
    @property
    def options(self):
        '''
        Return a list of 4-tuples
        (option_name, type, default, help_text)
        type is one of 'bool', 'int', 'string'
        '''
        return []
    def run(self, container, opts, log, fix=False):
        raise NotImplementedError
    # }}}
    def add_options_to_parser(self, parser):
        parser.add_option('--' + self.fix_name.replace('_', '-'),
                help=self.long_description, action='store_true', default=False)
        for option in self.options:
            action = 'store'
            if option[1] == 'bool':
                action = 'store_true'
            kwargs = {'action': action, 'default':option[2], 'help':option[3]}
            if option[1] != 'bool':
                kwargs['type'] = option[1]
            parser.add_option('--'+option[0].replace('_', '-'), **kwargs)
--- a/src/calibre/ebooks/epub/fix/container.py
+++ b/src/calibre/ebooks/epub/fix/container.py
@ -0,0 +1,182 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import os, posixpath, urllib, sys
 from lxml import etree
 from calibre.ebooks.epub.fix import InvalidEpub
 from calibre import guess_type, prepare_string_for_xml
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.constants import iswindows
 from calibre.utils.zipfile import ZipFile, ZIP_STORED
 exists, join = os.path.exists, os.path.join
 OCF_NS = 'urn:oasis:names:tc:opendocument:xmlns:container'
 OPF_NS = 'http://www.idpf.org/2007/opf'
 class Container(object):
    META_INF = {
            'container.xml' : True,
            'manifest.xml' : False,
            'encryption.xml' : False,
            'metadata.xml' : False,
            'signatures.xml' : False,
            'rights.xml' : False,
    }
    def __init__(self, path, log):
        self.root = os.path.abspath(path)
        self.log = log
        self.dirtied = set([])
        self.cache = {}
        self.mime_map = {}
        if exists(join(self.root, 'mimetype')):
            os.remove(join(self.root, 'mimetype'))
        container_path = join(self.root, 'META-INF', 'container.xml')
        if not exists(container_path):
            raise InvalidEpub('No META-INF/container.xml in epub')
        self.container = etree.fromstring(open(container_path, 'rb').read())
        opf_files = self.container.xpath((
            r'child::ocf:rootfiles/ocf:rootfile'
            '[@media-type="%s" and @full-path]'%guess_type('a.opf')[0]
            ), namespaces={'ocf':OCF_NS}
        )
        if not opf_files:
            raise InvalidEpub('META-INF/container.xml contains no link to OPF file')
        opf_path = os.path.join(self.root,
                *opf_files[0].get('full-path').split('/'))
        if not exists(opf_path):
            raise InvalidEpub('OPF file does not exist at location pointed to'
                    ' by META-INF/container.xml')
        # Map of relative paths with / separators to absolute
        # paths on filesystem with os separators
        self.name_map = {}
        for dirpath, dirnames, filenames in os.walk(self.root):
            for f in filenames:
                path = join(dirpath, f)
                name = os.path.relpath(path, self.root).replace(os.sep, '/')
                self.name_map[name] = path
                if path == opf_path:
                    self.opf_name = name
                    self.mime_map[name] = guess_type('a.opf')[0]
        for item in self.opf.xpath(
                '//opf:manifest/opf:item[@href and @media-type]',
                namespaces={'opf':OPF_NS}):
            href = item.get('href')
            self.mime_map[self.href_to_name(href,
                posixpath.dirname(self.opf_name))] = item.get('media-type')
    def manifest_worthy_names(self):
        for name in self.name_map:
            if name.endswith('.opf'): continue
            if name.startswith('META-INF') and \
                    posixpath.basename(name) in self.META_INF: continue
            yield name
    def delete_name(self, name):
        self.mime_map.pop(name, None)
        path = self.name_map[name]
        os.remove(path)
        self.name_map.pop(name)
    def manifest_item_for_name(self, name):
        href = self.name_to_href(name,
            posixpath.dirname(self.opf_name))
        q = prepare_string_for_xml(href, attribute=True)
        existing = self.opf.xpath('//opf:manifest/opf:item[@href="%s"]'%q,
                namespaces={'opf':OPF_NS})
        if not existing:
            return None
        return existing[0]
    def add_name_to_manifest(self, name):
        item = self.manifest_item_for_name(name)
        if item is not None:
            return
        manifest = self.opf.xpath('//opf:manifest', namespaces={'opf':OPF_NS})[0]
        item = manifest.makeelement('{%s}item'%OPF_NS, nsmap={'opf':OPF_NS},
                href=self.name_to_href(name, posixpath.dirname(self.opf_name)),
                id=self.generate_manifest_id())
        mt = guess_type(posixpath.basename(name))[0]
        if not mt:
            mt = 'application/octest-stream'
        item.set('media-type', mt)
        manifest.append(item)
    def generate_manifest_id(self):
        items = self.opf.xpath('//opf:manifest/opf:item[@id]',
                namespaces={'opf':OPF_NS})
        ids = set([x.get('id') for x in items])
        for x in xrange(sys.maxint):
            c = 'id%d'%x
            if c not in ids:
                return c
    @property
    def opf(self):
        return self.get(self.opf_name)
    def href_to_name(self, href, base=''):
        href = urllib.unquote(href.partition('#')[0])
        name = href
        if base:
            name = posixpath.join(base, href)
        return name
    def name_to_href(self, name, base):
        if not base:
            return name
        return posixpath.relpath(name, base)
    def get_raw(self, name):
        path = self.name_map[name]
        return open(path, 'rb').read()
    def get(self, name):
        if name in self.cache:
            return self.cache[name]
        raw = self.get_raw(name)
        if name in self.mime_map:
            raw = self._parse(raw, self.mime_map[name])
        self.cache[name] = raw
        return raw
    def set(self, name, val):
        self.cache[name] = val
        self.dirtied.add(name)
    def _parse(self, raw, mimetype):
        mt = mimetype.lower()
        if mt.endswith('+xml'):
            parser = etree.XMLParser(no_network=True, huge_tree=not iswindows)
            return etree.fromstring(xml_to_unicode(raw,
                strip_encoding_pats=True, assume_utf8=True)[0], parser=parser)
        return raw
    def write(self, path):
        for name in self.dirtied:
            data = self.cache[name]
            raw = data
            if hasattr(data, 'xpath'):
                raw = etree.tostring(data, encoding='utf-8',
                        xml_declaration=True)
            with open(self.name_map[name], 'wb') as f:
                f.write(raw)
        self.dirtied.clear()
        zf = ZipFile(path, 'w')
        zf.writestr('mimetype', bytes(guess_type('a.epub')[0]),
                compression=ZIP_STORED)
        zf.add_dir(self.root)
        zf.close()
--- a/src/calibre/ebooks/epub/fix/epubcheck.py
+++ b/src/calibre/ebooks/epub/fix/epubcheck.py
@ -0,0 +1,82 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from calibre.ebooks.epub.fix import ePubFixer, InvalidEpub
 from calibre.utils.date import parse_date, strptime
 class Epubcheck(ePubFixer):
    name = 'Workaround epubcheck bugs'
    @property
    def short_description(self):
        return _('Workaround epubcheck bugs')
    @property
    def long_description(self):
        return _('Workarounds for bugs in the latest release of epubcheck. '
                'epubcheck reports many things as errors that are not '
                'actually errors. %prog will try to detect these and replace '
                'them with constructs that epubcheck likes. This may cause '
                'significant changes to your epub, complain to the epubcheck '
                'project.')
    @property
    def fix_name(self):
        return 'epubcheck'
    def fix_pubdates(self):
        dirtied = False
        opf = self.container.opf
        for dcdate in opf.xpath('//dc:date',
                namespaces={'dc':'http://purl.org/dc/elements/1.1/'}):
            raw = dcdate.text
            if not raw: raw = ''
            default = strptime('2000-1-1', '%Y-%m-%d', as_utc=True)
            try:
                ts = parse_date(raw, assume_utc=False, as_utc=True,
                        default=default)
            except:
                raise InvalidEpub('Invalid date set in OPF', raw)
            sval = ts.strftime('%Y-%m-%d')
            if sval != raw:
                self.log.error(
                    'OPF contains date', raw, 'that epubcheck does not like')
                if self.fix:
                    dcdate.text = sval
                    self.log('\tReplaced', raw, 'with', sval)
                    dirtied = True
        if dirtied:
            self.container.set(self.container.opf_name, opf)
    def fix_preserve_aspect_ratio(self):
        for name in self.container.name_map:
            mt = self.container.mime_map.get(name, '')
            if mt.lower() == 'application/xhtml+xml':
                root = self.container.get(name)
                dirtied = False
                for svg in root.xpath('//svg:svg[@preserveAspectRatio="none"]',
                        namespaces={'svg':'http://www.w3.org/2000/svg'}):
                    self.log.error('Found <svg> element with'
                            ' preserveAspectRatio="none" which epubcheck '
                            'cannot handle')
                    if self.fix:
                        svg.set('preserveAspectRatio', 'xMidYMid meet')
                        dirtied = True
                        self.log('\tReplaced none with xMidYMid meet')
                if dirtied:
                    self.container.set(name, root)
    def run(self, container, opts, log, fix=False):
        self.container = container
        self.opts = opts
        self.log = log
        self.fix = fix
        self.fix_pubdates()
        self.fix_preserve_aspect_ratio()
--- a/src/calibre/ebooks/epub/fix/main.py
+++ b/src/calibre/ebooks/epub/fix/main.py
@ -0,0 +1,56 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import sys, os
 from calibre.utils.config import OptionParser
 from calibre.ptempfile import TemporaryDirectory
 from calibre import CurrentDir
 from calibre.utils.zipfile import ZipFile
 from calibre.utils.logging import default_log
 from calibre.customize.ui import epub_fixers
 from calibre.ebooks.epub.fix.container import Container
 def option_parser():
    parser = OptionParser(usage=_(
        '%prog [options] file.epub\n\n'
        'Fix common problems in EPUB files that can cause them '
        'to be rejected by poorly designed publishing services.\n\n'
        'By default, no fixing is done and messages are printed out '
        'for each error detected. Use the options to control which errors '
        'are automatically fixed.'))
    for fixer in epub_fixers():
        fixer.add_options_to_parser(parser)
    return parser
 def run(epub, opts, log):
    with TemporaryDirectory('_epub-fix') as tdir:
        with CurrentDir(tdir):
            zf = ZipFile(epub)
            zf.extractall()
            zf.close()
            container = Container(tdir, log)
            for fixer in epub_fixers():
                fix = getattr(opts, fixer.fix_name, False)
                fixer.run(container, opts, log, fix=fix)
            container.write(epub)
 def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)
    if len(args) != 2:
        parser.print_help()
        print
        default_log.error(_('You must specify an epub file'))
        return
    epub = os.path.abspath(args[1])
    run(epub, opts, default_log)
 if __name__ == '__main__':
    main()
--- a/src/calibre/ebooks/epub/fix/unmanifested.py
+++ b/src/calibre/ebooks/epub/fix/unmanifested.py
@ -0,0 +1,49 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from calibre.ebooks.epub.fix import ePubFixer
 class Unmanifested(ePubFixer):
    name = 'Fix unmanifested files'
    @property
    def short_description(self):
        return _('Fix unmanifested files')
    @property
    def long_description(self):
        return _('Fix unmanifested files. %prog can either add them to '
        'the manifest or delete them as specified by the '
        'delete unmanifested option.')
    @property
    def fix_name(self):
        return 'unmanifested'
    @property
    def options(self):
        return [('delete_unmanifested', 'bool', False,
            _('Delete unmanifested files instead of adding them to the manifest'))]
    def run(self, container, opts, log, fix=False):
        dirtied = False
        for name in list(container.manifest_worthy_names()):
            item = container.manifest_item_for_name(name)
            if item is None:
                log.error(name, 'not in manifest')
                if fix:
                    if opts.delete_unmanifested:
                        container.delete_name(name)
                        log('\tDeleted')
                    else:
                        container.add_name_to_manifest(name)
                        log('\tAdded to manifest')
                        dirtied = True
        if dirtied:
            container.set(container.opf_name, container.opf)
--- a/src/calibre/ebooks/lrf/input.py
+++ b/src/calibre/ebooks/lrf/input.py
@ -367,7 +367,7 @@ class LRFInput(InputFormatPlugin):
        xml = d.to_xml(write_files=True)
        if options.verbose > 2:
            open('lrs.xml', 'wb').write(xml.encode('utf-8'))
-        parser = etree.XMLParser(recover=True, no_network=True, huge_tree=True)
+        parser = etree.XMLParser(no_network=True, huge_tree=True)
        doc = etree.fromstring(xml, parser=parser)
        char_button_map = {}
        for x in doc.xpath('//CharButton[@refobj]'):
--- a/src/calibre/ebooks/lrf/objects.py
+++ b/src/calibre/ebooks/lrf/objects.py
@ -870,7 +870,7 @@ class Text(LRFStream):
        open_containers = collections.deque()
        for c in self.content:
            if isinstance(c, basestring):
-                s += prepare_string_for_xml(c)
+                s += prepare_string_for_xml(c).replace('\0', '')
            elif c is None:
                if open_containers:
                    p = open_containers.pop()
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -26,7 +26,7 @@ from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
 from calibre.ebooks.conversion.preprocess import CSSPreProcessor
-RECOVER_PARSER = etree.XMLParser(recover=True, no_network=True, huge_tree=True)
+RECOVER_PARSER = etree.XMLParser(recover=True, no_network=True)
 XML_NS       = 'http://www.w3.org/XML/1998/namespace'
 XHTML_NS     = 'http://www.w3.org/1999/xhtml'
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@ -10,6 +10,7 @@ from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
 from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
    separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
    preserve_spaces
 from calibre import _ent_pat, xml_entity_to_unicode
 class TXTInput(InputFormatPlugin):
@ -55,6 +56,8 @@ class TXTInput(InputFormatPlugin):
        if options.preserve_spaces:
            txt = preserve_spaces(txt)
        txt = _ent_pat.sub(xml_entity_to_unicode, txt)
        if options.markdown:
            log.debug('Running text though markdown conversion...')
            try:
--- a/src/calibre/gui2/tools.py
+++ b/src/calibre/gui2/tools.py
@ -221,6 +221,8 @@ def fetch_scheduled_recipe(arg):
    if lf.get('base_font_size', 0.0) != 0.0:
        recs.append(('base_font_size', lf['base_font_size'],
            OptionRecommendation.HIGH))
        recs.append(('keep_ligatures', lf['keep_ligatures'],
            OptionRecommendation.HIGH))
    lr = load_defaults('lrf_output')
    if lr.get('header', False):
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@ -29,6 +29,7 @@ entry_points = {
             'calibre-complete   = calibre.utils.complete:main',
             'pdfmanipulate      = calibre.ebooks.pdf.manipulate.cli:main',
             'fetch-ebook-metadata = calibre.ebooks.metadata.fetch:main',
             'epub-fix           = calibre.ebooks.epub.fix.main:main',
             'calibre-smtp = calibre.utils.smtp:main',
        ],
        'gui_scripts'    : [
@ -180,6 +181,7 @@ class PostInstall:
            from calibre.ebooks.metadata.fetch import option_parser as fem_op
            from calibre.gui2.main import option_parser as guiop
            from calibre.utils.smtp import option_parser as smtp_op
            from calibre.ebooks.epub.fix.main import option_parser as fix_op
            any_formats = ['epub', 'htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip',
                'txt', 'lit', 'rtf', 'pdf', 'prc', 'mobi', 'fb2', 'odt', 'lrf']
            bc = os.path.join(os.path.dirname(self.opts.staging_sharedir),
@ -201,6 +203,7 @@ class PostInstall:
                f.write(opts_and_exts('ebook-viewer', viewer_op, any_formats))
                f.write(opts_and_words('fetch-ebook-metadata', fem_op, []))
                f.write(opts_and_words('calibre-smtp', smtp_op, []))
                f.write(opts_and_exts('epub-fix', fix_op, ['epub']))
                f.write(textwrap.dedent('''
                _ebook_device_ls()
                {