Remove the unmaintained epub fix code

2025-07-09 03:04:10 -04:00 · 2013-02-12 17:35:55 +05:30 · 2013-02-12 17:35:55 +05:30 · 227b13186e
commit 227b13186e
parent ad307cf23d
6 changed files with 0 additions and 503 deletions
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@ -17,7 +17,6 @@ from calibre.devices.interface import DevicePlugin
 from calibre.ebooks.metadata import MetaInformation
 from calibre.utils.config import (make_config_dir, Config, ConfigProxy,
                                 plugin_dir, OptionParser)
-from calibre.ebooks.epub.fix import ePubFixer
 from calibre.ebooks.metadata.sources.base import Source
 from calibre.constants import DEBUG

@ -489,15 +488,6 @@ def disabled_device_plugins():
                    yield plugin
 # }}}

-# epub fixers {{{
-def epub_fixers():
-    for plugin in _initialized_plugins:
-        if isinstance(plugin, ePubFixer):
-            if not is_disabled(plugin):
-                if platform in plugin.supported_platforms:
-                    yield plugin
-# }}}
-
 # Metadata sources2 {{{
 def metadata_plugins(capabilities):
    capabilities = frozenset(capabilities)
--- a/src/calibre/ebooks/epub/fix/init.py
+++ b/src/calibre/ebooks/epub/fix/init.py
@ -1,67 +0,0 @@
-#!/usr/bin/env python
-# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
-
-__license__   = 'GPL v3'
-__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
-__docformat__ = 'restructuredtext en'
-
-
-from calibre.customize import Plugin
-
-class InvalidEpub(ValueError):
-    pass
-
-class ParseError(ValueError):
-
-    def __init__(self, name, desc):
-        self.name = name
-        self.desc = desc
-        ValueError.__init__(self,
-            _('Failed to parse: %(name)s with error: %(err)s')%dict(
-                name=name, err=desc))
-
-class ePubFixer(Plugin):
-
-    supported_platforms = ['windows', 'osx', 'linux']
-    author = 'Kovid Goyal'
-    type = _('ePub Fixer')
-    can_be_disabled = True
-
-    # API that subclasses must implement {{{
-    @property
-    def short_description(self):
-        raise NotImplementedError
-
-    @property
-    def long_description(self):
-        raise NotImplementedError
-
-    @property
-    def fix_name(self):
-        raise NotImplementedError
-
-    @property
-    def options(self):
-        '''
-        Return a list of 4-tuples
-        (option_name, type, default, help_text)
-        type is one of 'bool', 'int', 'string'
-        '''
-        return []
-
-    def run(self, container, opts, log, fix=False):
-        raise NotImplementedError
-    # }}}
-
-    def add_options_to_parser(self, parser):
-        parser.add_option('--' + self.fix_name.replace('_', '-'),
-                help=self.long_description, action='store_true', default=False)
-        for option in self.options:
-            action = 'store'
-            if option[1] == 'bool':
-                action = 'store_true'
-            kwargs = {'action': action, 'default':option[2], 'help':option[3]}
-            if option[1] != 'bool':
-                kwargs['type'] = option[1]
-            parser.add_option('--'+option[0].replace('_', '-'), **kwargs)
-
--- a/src/calibre/ebooks/epub/fix/container.py
+++ b/src/calibre/ebooks/epub/fix/container.py
@ -1,220 +0,0 @@
-#!/usr/bin/env python
-# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
-
-__license__   = 'GPL v3'
-__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
-__docformat__ = 'restructuredtext en'
-
-import os, posixpath, urllib, sys, re
-
-from lxml import etree
-from lxml.etree import XMLSyntaxError
-
-from calibre.ebooks.epub.fix import InvalidEpub, ParseError
-from calibre import guess_type, prepare_string_for_xml
-from calibre.ebooks.chardet import xml_to_unicode
-from calibre.constants import iswindows
-from calibre.utils.zipfile import ZipFile, ZIP_STORED
-
-exists, join = os.path.exists, os.path.join
-
-OCF_NS = 'urn:oasis:names:tc:opendocument:xmlns:container'
-OPF_NS = 'http://www.idpf.org/2007/opf'
-
-class Container(object):
-
-    META_INF = {
-            'container.xml' : True,
-            'manifest.xml' : False,
-            'encryption.xml' : False,
-            'metadata.xml' : False,
-            'signatures.xml' : False,
-            'rights.xml' : False,
-    }
-
-    def __init__(self, path, log):
-        self.root = os.path.abspath(path)
-        self.log = log
-        self.dirtied = set([])
-        self.cache = {}
-        self.mime_map = {}
-
-        if exists(join(self.root, 'mimetype')):
-            os.remove(join(self.root, 'mimetype'))
-
-        container_path = join(self.root, 'META-INF', 'container.xml')
-        if not exists(container_path):
-            raise InvalidEpub('No META-INF/container.xml in epub')
-        self.container = etree.fromstring(open(container_path, 'rb').read())
-        opf_files = self.container.xpath((
-            r'child::ocf:rootfiles/ocf:rootfile'
-            '[@media-type="%s" and @full-path]'%guess_type('a.opf')[0]
-            ), namespaces={'ocf':OCF_NS}
-        )
-        if not opf_files:
-            raise InvalidEpub('META-INF/container.xml contains no link to OPF file')
-        opf_path = os.path.join(self.root,
-                *opf_files[0].get('full-path').split('/'))
-        if not exists(opf_path):
-            raise InvalidEpub('OPF file does not exist at location pointed to'
-                    ' by META-INF/container.xml')
-
-        # Map of relative paths with / separators to absolute
-        # paths on filesystem with os separators
-        self.name_map = {}
-        for dirpath, dirnames, filenames in os.walk(self.root):
-            for f in filenames:
-                path = join(dirpath, f)
-                name = os.path.relpath(path, self.root).replace(os.sep, '/')
-                self.name_map[name] = path
-                if path == opf_path:
-                    self.opf_name = name
-                    self.mime_map[name] = guess_type('a.opf')[0]
-
-        for item in self.opf.xpath(
-                '//opf:manifest/opf:item[@href and @media-type]',
-                namespaces={'opf':OPF_NS}):
-            href = item.get('href')
-            self.mime_map[self.href_to_name(href,
-                posixpath.dirname(self.opf_name))] = item.get('media-type')
-
-    def manifest_worthy_names(self):
-        for name in self.name_map:
-            if name.endswith('.opf'): continue
-            if name.startswith('META-INF') and \
-                    posixpath.basename(name) in self.META_INF: continue
-            yield name
-
-    def delete_name(self, name):
-        self.mime_map.pop(name, None)
-        path = self.name_map[name]
-        os.remove(path)
-        self.name_map.pop(name)
-
-    def manifest_item_for_name(self, name):
-        href = self.name_to_href(name,
-            posixpath.dirname(self.opf_name))
-        q = prepare_string_for_xml(href, attribute=True)
-        existing = self.opf.xpath('//opf:manifest/opf:item[@href="%s"]'%q,
-                namespaces={'opf':OPF_NS})
-        if not existing:
-            return None
-        return existing[0]
-
-    def add_name_to_manifest(self, name, mt=None):
-        item = self.manifest_item_for_name(name)
-        if item is not None:
-            return
-        manifest = self.opf.xpath('//opf:manifest', namespaces={'opf':OPF_NS})[0]
-        item = manifest.makeelement('{%s}item'%OPF_NS, nsmap={'opf':OPF_NS},
-                href=self.name_to_href(name, posixpath.dirname(self.opf_name)),
-                id=self.generate_manifest_id())
-        if not mt:
-            mt = guess_type(posixpath.basename(name))[0]
-        if not mt:
-            mt = 'application/octest-stream'
-        item.set('media-type', mt)
-        manifest.append(item)
-        self.fix_tail(item)
-
-    def fix_tail(self, item):
-        '''
-        Designed only to work with self closing elements after item has
-        just been inserted/appended
-        '''
-        parent = item.getparent()
-        idx = parent.index(item)
-        if idx == 0:
-            item.tail = parent.text
-        else:
-            item.tail = parent[idx-1].tail
-            if idx == len(parent)-1:
-                parent[idx-1].tail = parent.text
-
-    def generate_manifest_id(self):
-        items = self.opf.xpath('//opf:manifest/opf:item[@id]',
-                namespaces={'opf':OPF_NS})
-        ids = set([x.get('id') for x in items])
-        for x in xrange(sys.maxint):
-            c = 'id%d'%x
-            if c not in ids:
-                return c
-
-    @property
-    def opf(self):
-        return self.get(self.opf_name)
-
-    def href_to_name(self, href, base=''):
-        href = urllib.unquote(href.partition('#')[0])
-        name = href
-        if base:
-            name = posixpath.join(base, href)
-        return name
-
-    def name_to_href(self, name, base):
-        if not base:
-            return name
-        return posixpath.relpath(name, base)
-
-    def get_raw(self, name):
-        path = self.name_map[name]
-        return open(path, 'rb').read()
-
-    def get(self, name):
-        if name in self.cache:
-            return self.cache[name]
-        raw = self.get_raw(name)
-        if name in self.mime_map:
-            try:
-                raw = self._parse(raw, self.mime_map[name])
-            except XMLSyntaxError as err:
-                raise ParseError(name, unicode(err))
-        self.cache[name] = raw
-        return raw
-
-    def set(self, name, val):
-        self.cache[name] = val
-        self.dirtied.add(name)
-
-    def _parse(self, raw, mimetype):
-        mt = mimetype.lower()
-        if mt.endswith('+xml'):
-            parser = etree.XMLParser(no_network=True, huge_tree=not iswindows)
-            raw = xml_to_unicode(raw,
-                strip_encoding_pats=True, assume_utf8=True,
-                resolve_entities=True)[0].strip()
-            idx = raw.find('<html')
-            if idx == -1:
-                idx = raw.find('<HTML')
-            if idx > -1:
-                pre = raw[:idx]
-                raw = raw[idx:]
-                if '<!DOCTYPE' in pre:
-                    user_entities = {}
-                    for match in re.finditer(r'<!ENTITY\s+(\S+)\s+([^>]+)', pre):
-                        val = match.group(2)
-                        if val.startswith('"') and val.endswith('"'):
-                            val = val[1:-1]
-                        user_entities[match.group(1)] = val
-                    if user_entities:
-                        pat = re.compile(r'&(%s);'%('|'.join(user_entities.keys())))
-                        raw = pat.sub(lambda m:user_entities[m.group(1)], raw)
-            return etree.fromstring(raw, parser=parser)
-        return raw
-
-    def write(self, path):
-        for name in self.dirtied:
-            data = self.cache[name]
-            raw = data
-            if hasattr(data, 'xpath'):
-                raw = etree.tostring(data, encoding='utf-8',
-                        xml_declaration=True)
-            with open(self.name_map[name], 'wb') as f:
-                f.write(raw)
-        self.dirtied.clear()
-        zf = ZipFile(path, 'w')
-        zf.writestr('mimetype', bytes(guess_type('a.epub')[0]),
-                compression=ZIP_STORED)
-        zf.add_dir(self.root)
-        zf.close()
-
--- a/src/calibre/ebooks/epub/fix/epubcheck.py
+++ b/src/calibre/ebooks/epub/fix/epubcheck.py
@ -1,91 +0,0 @@
-#!/usr/bin/env python
-# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
-
-__license__   = 'GPL v3'
-__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
-__docformat__ = 'restructuredtext en'
-
-from calibre.ebooks.epub.fix import ePubFixer, InvalidEpub
-
-
-class Epubcheck(ePubFixer):
-
-    name = 'Workaround epubcheck bugs'
-
-    @property
-    def short_description(self):
-        return _('Workaround epubcheck bugs')
-
-    @property
-    def long_description(self):
-        return _('Workarounds for bugs in the latest release of epubcheck. '
-                'epubcheck reports many things as errors that are not '
-                'actually errors. epub-fix will try to detect these and replace '
-                'them with constructs that epubcheck likes. This may cause '
-                'significant changes to your epub, complain to the epubcheck '
-                'project.')
-
-    @property
-    def description(self):
-        return self.long_description
-
-    @property
-    def fix_name(self):
-        return 'epubcheck'
-
-    def fix_pubdates(self):
-        from calibre.utils.date import parse_date, strptime
-
-        dirtied = False
-        opf = self.container.opf
-        for dcdate in opf.xpath('//dc:date',
-                namespaces={'dc':'http://purl.org/dc/elements/1.1/'}):
-            raw = dcdate.text
-            if not raw: raw = ''
-            default = strptime('2000-1-1', '%Y-%m-%d', as_utc=True)
-            try:
-                ts = parse_date(raw, assume_utc=False, as_utc=True,
-                        default=default)
-            except:
-                raise InvalidEpub('Invalid date set in OPF', raw)
-            try:
-                sval = ts.strftime('%Y-%m-%d')
-            except:
-                from calibre import strftime
-                sval = strftime('%Y-%m-%d', ts.timetuple())
-            if sval != raw:
-                self.log.error(
-                    'OPF contains date', raw, 'that epubcheck does not like')
-                if self.fix:
-                    dcdate.text = sval
-                    self.log('\tReplaced', raw, 'with', sval)
-                    dirtied = True
-        if dirtied:
-            self.container.set(self.container.opf_name, opf)
-
-    def fix_preserve_aspect_ratio(self):
-        for name in self.container.name_map:
-            mt = self.container.mime_map.get(name, '')
-            if mt.lower() == 'application/xhtml+xml':
-                root = self.container.get(name)
-                dirtied = False
-                for svg in root.xpath('//svg:svg[@preserveAspectRatio="none"]',
-                        namespaces={'svg':'http://www.w3.org/2000/svg'}):
-                    self.log.error('Found <svg> element with'
-                            ' preserveAspectRatio="none" which epubcheck '
-                            'cannot handle')
-                    if self.fix:
-                        svg.set('preserveAspectRatio', 'xMidYMid meet')
-                        dirtied = True
-                        self.log('\tReplaced none with xMidYMid meet')
-                if dirtied:
-                    self.container.set(name, root)
-
-
-    def run(self, container, opts, log, fix=False):
-        self.container = container
-        self.opts = opts
-        self.log = log
-        self.fix = fix
-        self.fix_pubdates()
-        self.fix_preserve_aspect_ratio()
--- a/src/calibre/ebooks/epub/fix/main.py
+++ b/src/calibre/ebooks/epub/fix/main.py
@ -1,62 +0,0 @@
-#!/usr/bin/env python
-# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
-
-__license__   = 'GPL v3'
-__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
-__docformat__ = 'restructuredtext en'
-
-import sys, os
-
-from calibre.utils.config import OptionParser
-from calibre.ptempfile import TemporaryDirectory
-from calibre import CurrentDir
-from calibre.utils.zipfile import ZipFile
-from calibre.utils.logging import default_log
-from calibre.customize.ui import epub_fixers
-from calibre.ebooks.epub.fix.container import Container
-from calibre.ebooks.epub.fix import ParseError
-
-
-def option_parser():
-    parser = OptionParser(usage=_(
-        '%prog [options] file.epub\n\n'
-        'Fix common problems in EPUB files that can cause them '
-        'to be rejected by poorly designed publishing services.\n\n'
-        'By default, no fixing is done and messages are printed out '
-        'for each error detected. Use the options to control which errors '
-        'are automatically fixed.'))
-    for fixer in epub_fixers():
-        fixer.add_options_to_parser(parser)
-
-    return parser
-
-
-def run(epub, opts, log):
-    with TemporaryDirectory('_epub-fix') as tdir:
-        with CurrentDir(tdir):
-            zf = ZipFile(epub)
-            zf.extractall()
-            zf.close()
-            container = Container(tdir, log)
-            for fixer in epub_fixers():
-                fix = getattr(opts, fixer.fix_name, False)
-                fixer.run(container, opts, log, fix=fix)
-            container.write(epub)
-
-def main(args=sys.argv):
-    parser = option_parser()
-    opts, args = parser.parse_args(args)
-    if len(args) != 2:
-        parser.print_help()
-        print
-        default_log.error(_('You must specify an epub file'))
-        return
-    epub = os.path.abspath(args[1])
-    try:
-        run(epub, opts, default_log)
-    except ParseError as err:
-        default_log.error(unicode(err))
-        raise SystemExit(1)
-
-if __name__ == '__main__':
-    main()
--- a/src/calibre/ebooks/epub/fix/unmanifested.py
+++ b/src/calibre/ebooks/epub/fix/unmanifested.py
@ -1,53 +0,0 @@
-#!/usr/bin/env python
-# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
-
-__license__   = 'GPL v3'
-__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
-__docformat__ = 'restructuredtext en'
-
-
-from calibre.ebooks.epub.fix import ePubFixer
-
-class Unmanifested(ePubFixer):
-
-    name = 'Fix unmanifested files'
-
-    @property
-    def short_description(self):
-        return _('Fix unmanifested files')
-
-    @property
-    def long_description(self):
-        return _('Fix unmanifested files. epub-fix can either add them to '
-        'the manifest or delete them as specified by the '
-        'delete unmanifested option.')
-
-    @property
-    def description(self):
-        return self.long_description
-
-    @property
-    def fix_name(self):
-        return 'unmanifested'
-
-    @property
-    def options(self):
-        return [('delete_unmanifested', 'bool', False,
-            _('Delete unmanifested files instead of adding them to the manifest'))]
-
-    def run(self, container, opts, log, fix=False):
-        dirtied = False
-        for name in list(container.manifest_worthy_names()):
-            item = container.manifest_item_for_name(name)
-            if item is None:
-                log.error(name, 'not in manifest')
-                if fix:
-                    if opts.delete_unmanifested:
-                        container.delete_name(name)
-                        log('\tDeleted')
-                    else:
-                        container.add_name_to_manifest(name)
-                        log('\tAdded to manifest')
-                        dirtied = True
-        if dirtied:
-            container.set(container.opf_name, container.opf)