From 227b13186e34f094236ccfd9775b1027b3158458 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 12 Feb 2013 17:35:55 +0530 Subject: [PATCH] Remove the unmaintained epub fix code --- src/calibre/customize/ui.py | 10 - src/calibre/ebooks/epub/fix/__init__.py | 67 ------ src/calibre/ebooks/epub/fix/container.py | 220 -------------------- src/calibre/ebooks/epub/fix/epubcheck.py | 91 -------- src/calibre/ebooks/epub/fix/main.py | 62 ------ src/calibre/ebooks/epub/fix/unmanifested.py | 53 ----- 6 files changed, 503 deletions(-) delete mode 100644 src/calibre/ebooks/epub/fix/__init__.py delete mode 100644 src/calibre/ebooks/epub/fix/container.py delete mode 100644 src/calibre/ebooks/epub/fix/epubcheck.py delete mode 100644 src/calibre/ebooks/epub/fix/main.py delete mode 100644 src/calibre/ebooks/epub/fix/unmanifested.py diff --git a/src/calibre/customize/ui.py b/src/calibre/customize/ui.py index f08859b6e7..849d1a21f4 100644 --- a/src/calibre/customize/ui.py +++ b/src/calibre/customize/ui.py @@ -17,7 +17,6 @@ from calibre.devices.interface import DevicePlugin from calibre.ebooks.metadata import MetaInformation from calibre.utils.config import (make_config_dir, Config, ConfigProxy, plugin_dir, OptionParser) -from calibre.ebooks.epub.fix import ePubFixer from calibre.ebooks.metadata.sources.base import Source from calibre.constants import DEBUG @@ -489,15 +488,6 @@ def disabled_device_plugins(): yield plugin # }}} -# epub fixers {{{ -def epub_fixers(): - for plugin in _initialized_plugins: - if isinstance(plugin, ePubFixer): - if not is_disabled(plugin): - if platform in plugin.supported_platforms: - yield plugin -# }}} - # Metadata sources2 {{{ def metadata_plugins(capabilities): capabilities = frozenset(capabilities) diff --git a/src/calibre/ebooks/epub/fix/__init__.py b/src/calibre/ebooks/epub/fix/__init__.py deleted file mode 100644 index f1d396148c..0000000000 --- a/src/calibre/ebooks/epub/fix/__init__.py +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai - -__license__ = 'GPL v3' -__copyright__ = '2010, Kovid Goyal ' -__docformat__ = 'restructuredtext en' - - -from calibre.customize import Plugin - -class InvalidEpub(ValueError): - pass - -class ParseError(ValueError): - - def __init__(self, name, desc): - self.name = name - self.desc = desc - ValueError.__init__(self, - _('Failed to parse: %(name)s with error: %(err)s')%dict( - name=name, err=desc)) - -class ePubFixer(Plugin): - - supported_platforms = ['windows', 'osx', 'linux'] - author = 'Kovid Goyal' - type = _('ePub Fixer') - can_be_disabled = True - - # API that subclasses must implement {{{ - @property - def short_description(self): - raise NotImplementedError - - @property - def long_description(self): - raise NotImplementedError - - @property - def fix_name(self): - raise NotImplementedError - - @property - def options(self): - ''' - Return a list of 4-tuples - (option_name, type, default, help_text) - type is one of 'bool', 'int', 'string' - ''' - return [] - - def run(self, container, opts, log, fix=False): - raise NotImplementedError - # }}} - - def add_options_to_parser(self, parser): - parser.add_option('--' + self.fix_name.replace('_', '-'), - help=self.long_description, action='store_true', default=False) - for option in self.options: - action = 'store' - if option[1] == 'bool': - action = 'store_true' - kwargs = {'action': action, 'default':option[2], 'help':option[3]} - if option[1] != 'bool': - kwargs['type'] = option[1] - parser.add_option('--'+option[0].replace('_', '-'), **kwargs) - diff --git a/src/calibre/ebooks/epub/fix/container.py b/src/calibre/ebooks/epub/fix/container.py deleted file mode 100644 index 691bf7132a..0000000000 --- a/src/calibre/ebooks/epub/fix/container.py +++ /dev/null @@ -1,220 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai - -__license__ = 'GPL v3' -__copyright__ = '2010, Kovid Goyal ' -__docformat__ = 'restructuredtext en' - -import os, posixpath, urllib, sys, re - -from lxml import etree -from lxml.etree import XMLSyntaxError - -from calibre.ebooks.epub.fix import InvalidEpub, ParseError -from calibre import guess_type, prepare_string_for_xml -from calibre.ebooks.chardet import xml_to_unicode -from calibre.constants import iswindows -from calibre.utils.zipfile import ZipFile, ZIP_STORED - -exists, join = os.path.exists, os.path.join - -OCF_NS = 'urn:oasis:names:tc:opendocument:xmlns:container' -OPF_NS = 'http://www.idpf.org/2007/opf' - -class Container(object): - - META_INF = { - 'container.xml' : True, - 'manifest.xml' : False, - 'encryption.xml' : False, - 'metadata.xml' : False, - 'signatures.xml' : False, - 'rights.xml' : False, - } - - def __init__(self, path, log): - self.root = os.path.abspath(path) - self.log = log - self.dirtied = set([]) - self.cache = {} - self.mime_map = {} - - if exists(join(self.root, 'mimetype')): - os.remove(join(self.root, 'mimetype')) - - container_path = join(self.root, 'META-INF', 'container.xml') - if not exists(container_path): - raise InvalidEpub('No META-INF/container.xml in epub') - self.container = etree.fromstring(open(container_path, 'rb').read()) - opf_files = self.container.xpath(( - r'child::ocf:rootfiles/ocf:rootfile' - '[@media-type="%s" and @full-path]'%guess_type('a.opf')[0] - ), namespaces={'ocf':OCF_NS} - ) - if not opf_files: - raise InvalidEpub('META-INF/container.xml contains no link to OPF file') - opf_path = os.path.join(self.root, - *opf_files[0].get('full-path').split('/')) - if not exists(opf_path): - raise InvalidEpub('OPF file does not exist at location pointed to' - ' by META-INF/container.xml') - - # Map of relative paths with / separators to absolute - # paths on filesystem with os separators - self.name_map = {} - for dirpath, dirnames, filenames in os.walk(self.root): - for f in filenames: - path = join(dirpath, f) - name = os.path.relpath(path, self.root).replace(os.sep, '/') - self.name_map[name] = path - if path == opf_path: - self.opf_name = name - self.mime_map[name] = guess_type('a.opf')[0] - - for item in self.opf.xpath( - '//opf:manifest/opf:item[@href and @media-type]', - namespaces={'opf':OPF_NS}): - href = item.get('href') - self.mime_map[self.href_to_name(href, - posixpath.dirname(self.opf_name))] = item.get('media-type') - - def manifest_worthy_names(self): - for name in self.name_map: - if name.endswith('.opf'): continue - if name.startswith('META-INF') and \ - posixpath.basename(name) in self.META_INF: continue - yield name - - def delete_name(self, name): - self.mime_map.pop(name, None) - path = self.name_map[name] - os.remove(path) - self.name_map.pop(name) - - def manifest_item_for_name(self, name): - href = self.name_to_href(name, - posixpath.dirname(self.opf_name)) - q = prepare_string_for_xml(href, attribute=True) - existing = self.opf.xpath('//opf:manifest/opf:item[@href="%s"]'%q, - namespaces={'opf':OPF_NS}) - if not existing: - return None - return existing[0] - - def add_name_to_manifest(self, name, mt=None): - item = self.manifest_item_for_name(name) - if item is not None: - return - manifest = self.opf.xpath('//opf:manifest', namespaces={'opf':OPF_NS})[0] - item = manifest.makeelement('{%s}item'%OPF_NS, nsmap={'opf':OPF_NS}, - href=self.name_to_href(name, posixpath.dirname(self.opf_name)), - id=self.generate_manifest_id()) - if not mt: - mt = guess_type(posixpath.basename(name))[0] - if not mt: - mt = 'application/octest-stream' - item.set('media-type', mt) - manifest.append(item) - self.fix_tail(item) - - def fix_tail(self, item): - ''' - Designed only to work with self closing elements after item has - just been inserted/appended - ''' - parent = item.getparent() - idx = parent.index(item) - if idx == 0: - item.tail = parent.text - else: - item.tail = parent[idx-1].tail - if idx == len(parent)-1: - parent[idx-1].tail = parent.text - - def generate_manifest_id(self): - items = self.opf.xpath('//opf:manifest/opf:item[@id]', - namespaces={'opf':OPF_NS}) - ids = set([x.get('id') for x in items]) - for x in xrange(sys.maxint): - c = 'id%d'%x - if c not in ids: - return c - - @property - def opf(self): - return self.get(self.opf_name) - - def href_to_name(self, href, base=''): - href = urllib.unquote(href.partition('#')[0]) - name = href - if base: - name = posixpath.join(base, href) - return name - - def name_to_href(self, name, base): - if not base: - return name - return posixpath.relpath(name, base) - - def get_raw(self, name): - path = self.name_map[name] - return open(path, 'rb').read() - - def get(self, name): - if name in self.cache: - return self.cache[name] - raw = self.get_raw(name) - if name in self.mime_map: - try: - raw = self._parse(raw, self.mime_map[name]) - except XMLSyntaxError as err: - raise ParseError(name, unicode(err)) - self.cache[name] = raw - return raw - - def set(self, name, val): - self.cache[name] = val - self.dirtied.add(name) - - def _parse(self, raw, mimetype): - mt = mimetype.lower() - if mt.endswith('+xml'): - parser = etree.XMLParser(no_network=True, huge_tree=not iswindows) - raw = xml_to_unicode(raw, - strip_encoding_pats=True, assume_utf8=True, - resolve_entities=True)[0].strip() - idx = raw.find(' -1: - pre = raw[:idx] - raw = raw[idx:] - if ']+)', pre): - val = match.group(2) - if val.startswith('"') and val.endswith('"'): - val = val[1:-1] - user_entities[match.group(1)] = val - if user_entities: - pat = re.compile(r'&(%s);'%('|'.join(user_entities.keys()))) - raw = pat.sub(lambda m:user_entities[m.group(1)], raw) - return etree.fromstring(raw, parser=parser) - return raw - - def write(self, path): - for name in self.dirtied: - data = self.cache[name] - raw = data - if hasattr(data, 'xpath'): - raw = etree.tostring(data, encoding='utf-8', - xml_declaration=True) - with open(self.name_map[name], 'wb') as f: - f.write(raw) - self.dirtied.clear() - zf = ZipFile(path, 'w') - zf.writestr('mimetype', bytes(guess_type('a.epub')[0]), - compression=ZIP_STORED) - zf.add_dir(self.root) - zf.close() - diff --git a/src/calibre/ebooks/epub/fix/epubcheck.py b/src/calibre/ebooks/epub/fix/epubcheck.py deleted file mode 100644 index 0029868c23..0000000000 --- a/src/calibre/ebooks/epub/fix/epubcheck.py +++ /dev/null @@ -1,91 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai - -__license__ = 'GPL v3' -__copyright__ = '2010, Kovid Goyal ' -__docformat__ = 'restructuredtext en' - -from calibre.ebooks.epub.fix import ePubFixer, InvalidEpub - - -class Epubcheck(ePubFixer): - - name = 'Workaround epubcheck bugs' - - @property - def short_description(self): - return _('Workaround epubcheck bugs') - - @property - def long_description(self): - return _('Workarounds for bugs in the latest release of epubcheck. ' - 'epubcheck reports many things as errors that are not ' - 'actually errors. epub-fix will try to detect these and replace ' - 'them with constructs that epubcheck likes. This may cause ' - 'significant changes to your epub, complain to the epubcheck ' - 'project.') - - @property - def description(self): - return self.long_description - - @property - def fix_name(self): - return 'epubcheck' - - def fix_pubdates(self): - from calibre.utils.date import parse_date, strptime - - dirtied = False - opf = self.container.opf - for dcdate in opf.xpath('//dc:date', - namespaces={'dc':'http://purl.org/dc/elements/1.1/'}): - raw = dcdate.text - if not raw: raw = '' - default = strptime('2000-1-1', '%Y-%m-%d', as_utc=True) - try: - ts = parse_date(raw, assume_utc=False, as_utc=True, - default=default) - except: - raise InvalidEpub('Invalid date set in OPF', raw) - try: - sval = ts.strftime('%Y-%m-%d') - except: - from calibre import strftime - sval = strftime('%Y-%m-%d', ts.timetuple()) - if sval != raw: - self.log.error( - 'OPF contains date', raw, 'that epubcheck does not like') - if self.fix: - dcdate.text = sval - self.log('\tReplaced', raw, 'with', sval) - dirtied = True - if dirtied: - self.container.set(self.container.opf_name, opf) - - def fix_preserve_aspect_ratio(self): - for name in self.container.name_map: - mt = self.container.mime_map.get(name, '') - if mt.lower() == 'application/xhtml+xml': - root = self.container.get(name) - dirtied = False - for svg in root.xpath('//svg:svg[@preserveAspectRatio="none"]', - namespaces={'svg':'http://www.w3.org/2000/svg'}): - self.log.error('Found element with' - ' preserveAspectRatio="none" which epubcheck ' - 'cannot handle') - if self.fix: - svg.set('preserveAspectRatio', 'xMidYMid meet') - dirtied = True - self.log('\tReplaced none with xMidYMid meet') - if dirtied: - self.container.set(name, root) - - - def run(self, container, opts, log, fix=False): - self.container = container - self.opts = opts - self.log = log - self.fix = fix - self.fix_pubdates() - self.fix_preserve_aspect_ratio() diff --git a/src/calibre/ebooks/epub/fix/main.py b/src/calibre/ebooks/epub/fix/main.py deleted file mode 100644 index e4c1a60a77..0000000000 --- a/src/calibre/ebooks/epub/fix/main.py +++ /dev/null @@ -1,62 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai - -__license__ = 'GPL v3' -__copyright__ = '2010, Kovid Goyal ' -__docformat__ = 'restructuredtext en' - -import sys, os - -from calibre.utils.config import OptionParser -from calibre.ptempfile import TemporaryDirectory -from calibre import CurrentDir -from calibre.utils.zipfile import ZipFile -from calibre.utils.logging import default_log -from calibre.customize.ui import epub_fixers -from calibre.ebooks.epub.fix.container import Container -from calibre.ebooks.epub.fix import ParseError - - -def option_parser(): - parser = OptionParser(usage=_( - '%prog [options] file.epub\n\n' - 'Fix common problems in EPUB files that can cause them ' - 'to be rejected by poorly designed publishing services.\n\n' - 'By default, no fixing is done and messages are printed out ' - 'for each error detected. Use the options to control which errors ' - 'are automatically fixed.')) - for fixer in epub_fixers(): - fixer.add_options_to_parser(parser) - - return parser - - -def run(epub, opts, log): - with TemporaryDirectory('_epub-fix') as tdir: - with CurrentDir(tdir): - zf = ZipFile(epub) - zf.extractall() - zf.close() - container = Container(tdir, log) - for fixer in epub_fixers(): - fix = getattr(opts, fixer.fix_name, False) - fixer.run(container, opts, log, fix=fix) - container.write(epub) - -def main(args=sys.argv): - parser = option_parser() - opts, args = parser.parse_args(args) - if len(args) != 2: - parser.print_help() - print - default_log.error(_('You must specify an epub file')) - return - epub = os.path.abspath(args[1]) - try: - run(epub, opts, default_log) - except ParseError as err: - default_log.error(unicode(err)) - raise SystemExit(1) - -if __name__ == '__main__': - main() diff --git a/src/calibre/ebooks/epub/fix/unmanifested.py b/src/calibre/ebooks/epub/fix/unmanifested.py deleted file mode 100644 index 98fdd4615f..0000000000 --- a/src/calibre/ebooks/epub/fix/unmanifested.py +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai - -__license__ = 'GPL v3' -__copyright__ = '2010, Kovid Goyal ' -__docformat__ = 'restructuredtext en' - - -from calibre.ebooks.epub.fix import ePubFixer - -class Unmanifested(ePubFixer): - - name = 'Fix unmanifested files' - - @property - def short_description(self): - return _('Fix unmanifested files') - - @property - def long_description(self): - return _('Fix unmanifested files. epub-fix can either add them to ' - 'the manifest or delete them as specified by the ' - 'delete unmanifested option.') - - @property - def description(self): - return self.long_description - - @property - def fix_name(self): - return 'unmanifested' - - @property - def options(self): - return [('delete_unmanifested', 'bool', False, - _('Delete unmanifested files instead of adding them to the manifest'))] - - def run(self, container, opts, log, fix=False): - dirtied = False - for name in list(container.manifest_worthy_names()): - item = container.manifest_item_for_name(name) - if item is None: - log.error(name, 'not in manifest') - if fix: - if opts.delete_unmanifested: - container.delete_name(name) - log('\tDeleted') - else: - container.add_name_to_manifest(name) - log('\tAdded to manifest') - dirtied = True - if dirtied: - container.set(container.opf_name, container.opf)