mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Remove the unmaintained epub fix code
This commit is contained in:
parent
ad307cf23d
commit
227b13186e
@ -17,7 +17,6 @@ from calibre.devices.interface import DevicePlugin
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.utils.config import (make_config_dir, Config, ConfigProxy,
|
||||
plugin_dir, OptionParser)
|
||||
from calibre.ebooks.epub.fix import ePubFixer
|
||||
from calibre.ebooks.metadata.sources.base import Source
|
||||
from calibre.constants import DEBUG
|
||||
|
||||
@ -489,15 +488,6 @@ def disabled_device_plugins():
|
||||
yield plugin
|
||||
# }}}
|
||||
|
||||
# epub fixers {{{
|
||||
def epub_fixers():
|
||||
for plugin in _initialized_plugins:
|
||||
if isinstance(plugin, ePubFixer):
|
||||
if not is_disabled(plugin):
|
||||
if platform in plugin.supported_platforms:
|
||||
yield plugin
|
||||
# }}}
|
||||
|
||||
# Metadata sources2 {{{
|
||||
def metadata_plugins(capabilities):
|
||||
capabilities = frozenset(capabilities)
|
||||
|
@ -1,67 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
from calibre.customize import Plugin
|
||||
|
||||
class InvalidEpub(ValueError):
|
||||
pass
|
||||
|
||||
class ParseError(ValueError):
|
||||
|
||||
def __init__(self, name, desc):
|
||||
self.name = name
|
||||
self.desc = desc
|
||||
ValueError.__init__(self,
|
||||
_('Failed to parse: %(name)s with error: %(err)s')%dict(
|
||||
name=name, err=desc))
|
||||
|
||||
class ePubFixer(Plugin):
|
||||
|
||||
supported_platforms = ['windows', 'osx', 'linux']
|
||||
author = 'Kovid Goyal'
|
||||
type = _('ePub Fixer')
|
||||
can_be_disabled = True
|
||||
|
||||
# API that subclasses must implement {{{
|
||||
@property
|
||||
def short_description(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@property
|
||||
def long_description(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@property
|
||||
def fix_name(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@property
|
||||
def options(self):
|
||||
'''
|
||||
Return a list of 4-tuples
|
||||
(option_name, type, default, help_text)
|
||||
type is one of 'bool', 'int', 'string'
|
||||
'''
|
||||
return []
|
||||
|
||||
def run(self, container, opts, log, fix=False):
|
||||
raise NotImplementedError
|
||||
# }}}
|
||||
|
||||
def add_options_to_parser(self, parser):
|
||||
parser.add_option('--' + self.fix_name.replace('_', '-'),
|
||||
help=self.long_description, action='store_true', default=False)
|
||||
for option in self.options:
|
||||
action = 'store'
|
||||
if option[1] == 'bool':
|
||||
action = 'store_true'
|
||||
kwargs = {'action': action, 'default':option[2], 'help':option[3]}
|
||||
if option[1] != 'bool':
|
||||
kwargs['type'] = option[1]
|
||||
parser.add_option('--'+option[0].replace('_', '-'), **kwargs)
|
||||
|
@ -1,220 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, posixpath, urllib, sys, re
|
||||
|
||||
from lxml import etree
|
||||
from lxml.etree import XMLSyntaxError
|
||||
|
||||
from calibre.ebooks.epub.fix import InvalidEpub, ParseError
|
||||
from calibre import guess_type, prepare_string_for_xml
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.constants import iswindows
|
||||
from calibre.utils.zipfile import ZipFile, ZIP_STORED
|
||||
|
||||
exists, join = os.path.exists, os.path.join
|
||||
|
||||
OCF_NS = 'urn:oasis:names:tc:opendocument:xmlns:container'
|
||||
OPF_NS = 'http://www.idpf.org/2007/opf'
|
||||
|
||||
class Container(object):
|
||||
|
||||
META_INF = {
|
||||
'container.xml' : True,
|
||||
'manifest.xml' : False,
|
||||
'encryption.xml' : False,
|
||||
'metadata.xml' : False,
|
||||
'signatures.xml' : False,
|
||||
'rights.xml' : False,
|
||||
}
|
||||
|
||||
def __init__(self, path, log):
|
||||
self.root = os.path.abspath(path)
|
||||
self.log = log
|
||||
self.dirtied = set([])
|
||||
self.cache = {}
|
||||
self.mime_map = {}
|
||||
|
||||
if exists(join(self.root, 'mimetype')):
|
||||
os.remove(join(self.root, 'mimetype'))
|
||||
|
||||
container_path = join(self.root, 'META-INF', 'container.xml')
|
||||
if not exists(container_path):
|
||||
raise InvalidEpub('No META-INF/container.xml in epub')
|
||||
self.container = etree.fromstring(open(container_path, 'rb').read())
|
||||
opf_files = self.container.xpath((
|
||||
r'child::ocf:rootfiles/ocf:rootfile'
|
||||
'[@media-type="%s" and @full-path]'%guess_type('a.opf')[0]
|
||||
), namespaces={'ocf':OCF_NS}
|
||||
)
|
||||
if not opf_files:
|
||||
raise InvalidEpub('META-INF/container.xml contains no link to OPF file')
|
||||
opf_path = os.path.join(self.root,
|
||||
*opf_files[0].get('full-path').split('/'))
|
||||
if not exists(opf_path):
|
||||
raise InvalidEpub('OPF file does not exist at location pointed to'
|
||||
' by META-INF/container.xml')
|
||||
|
||||
# Map of relative paths with / separators to absolute
|
||||
# paths on filesystem with os separators
|
||||
self.name_map = {}
|
||||
for dirpath, dirnames, filenames in os.walk(self.root):
|
||||
for f in filenames:
|
||||
path = join(dirpath, f)
|
||||
name = os.path.relpath(path, self.root).replace(os.sep, '/')
|
||||
self.name_map[name] = path
|
||||
if path == opf_path:
|
||||
self.opf_name = name
|
||||
self.mime_map[name] = guess_type('a.opf')[0]
|
||||
|
||||
for item in self.opf.xpath(
|
||||
'//opf:manifest/opf:item[@href and @media-type]',
|
||||
namespaces={'opf':OPF_NS}):
|
||||
href = item.get('href')
|
||||
self.mime_map[self.href_to_name(href,
|
||||
posixpath.dirname(self.opf_name))] = item.get('media-type')
|
||||
|
||||
def manifest_worthy_names(self):
|
||||
for name in self.name_map:
|
||||
if name.endswith('.opf'): continue
|
||||
if name.startswith('META-INF') and \
|
||||
posixpath.basename(name) in self.META_INF: continue
|
||||
yield name
|
||||
|
||||
def delete_name(self, name):
|
||||
self.mime_map.pop(name, None)
|
||||
path = self.name_map[name]
|
||||
os.remove(path)
|
||||
self.name_map.pop(name)
|
||||
|
||||
def manifest_item_for_name(self, name):
|
||||
href = self.name_to_href(name,
|
||||
posixpath.dirname(self.opf_name))
|
||||
q = prepare_string_for_xml(href, attribute=True)
|
||||
existing = self.opf.xpath('//opf:manifest/opf:item[@href="%s"]'%q,
|
||||
namespaces={'opf':OPF_NS})
|
||||
if not existing:
|
||||
return None
|
||||
return existing[0]
|
||||
|
||||
def add_name_to_manifest(self, name, mt=None):
|
||||
item = self.manifest_item_for_name(name)
|
||||
if item is not None:
|
||||
return
|
||||
manifest = self.opf.xpath('//opf:manifest', namespaces={'opf':OPF_NS})[0]
|
||||
item = manifest.makeelement('{%s}item'%OPF_NS, nsmap={'opf':OPF_NS},
|
||||
href=self.name_to_href(name, posixpath.dirname(self.opf_name)),
|
||||
id=self.generate_manifest_id())
|
||||
if not mt:
|
||||
mt = guess_type(posixpath.basename(name))[0]
|
||||
if not mt:
|
||||
mt = 'application/octest-stream'
|
||||
item.set('media-type', mt)
|
||||
manifest.append(item)
|
||||
self.fix_tail(item)
|
||||
|
||||
def fix_tail(self, item):
|
||||
'''
|
||||
Designed only to work with self closing elements after item has
|
||||
just been inserted/appended
|
||||
'''
|
||||
parent = item.getparent()
|
||||
idx = parent.index(item)
|
||||
if idx == 0:
|
||||
item.tail = parent.text
|
||||
else:
|
||||
item.tail = parent[idx-1].tail
|
||||
if idx == len(parent)-1:
|
||||
parent[idx-1].tail = parent.text
|
||||
|
||||
def generate_manifest_id(self):
|
||||
items = self.opf.xpath('//opf:manifest/opf:item[@id]',
|
||||
namespaces={'opf':OPF_NS})
|
||||
ids = set([x.get('id') for x in items])
|
||||
for x in xrange(sys.maxint):
|
||||
c = 'id%d'%x
|
||||
if c not in ids:
|
||||
return c
|
||||
|
||||
@property
|
||||
def opf(self):
|
||||
return self.get(self.opf_name)
|
||||
|
||||
def href_to_name(self, href, base=''):
|
||||
href = urllib.unquote(href.partition('#')[0])
|
||||
name = href
|
||||
if base:
|
||||
name = posixpath.join(base, href)
|
||||
return name
|
||||
|
||||
def name_to_href(self, name, base):
|
||||
if not base:
|
||||
return name
|
||||
return posixpath.relpath(name, base)
|
||||
|
||||
def get_raw(self, name):
|
||||
path = self.name_map[name]
|
||||
return open(path, 'rb').read()
|
||||
|
||||
def get(self, name):
|
||||
if name in self.cache:
|
||||
return self.cache[name]
|
||||
raw = self.get_raw(name)
|
||||
if name in self.mime_map:
|
||||
try:
|
||||
raw = self._parse(raw, self.mime_map[name])
|
||||
except XMLSyntaxError as err:
|
||||
raise ParseError(name, unicode(err))
|
||||
self.cache[name] = raw
|
||||
return raw
|
||||
|
||||
def set(self, name, val):
|
||||
self.cache[name] = val
|
||||
self.dirtied.add(name)
|
||||
|
||||
def _parse(self, raw, mimetype):
|
||||
mt = mimetype.lower()
|
||||
if mt.endswith('+xml'):
|
||||
parser = etree.XMLParser(no_network=True, huge_tree=not iswindows)
|
||||
raw = xml_to_unicode(raw,
|
||||
strip_encoding_pats=True, assume_utf8=True,
|
||||
resolve_entities=True)[0].strip()
|
||||
idx = raw.find('<html')
|
||||
if idx == -1:
|
||||
idx = raw.find('<HTML')
|
||||
if idx > -1:
|
||||
pre = raw[:idx]
|
||||
raw = raw[idx:]
|
||||
if '<!DOCTYPE' in pre:
|
||||
user_entities = {}
|
||||
for match in re.finditer(r'<!ENTITY\s+(\S+)\s+([^>]+)', pre):
|
||||
val = match.group(2)
|
||||
if val.startswith('"') and val.endswith('"'):
|
||||
val = val[1:-1]
|
||||
user_entities[match.group(1)] = val
|
||||
if user_entities:
|
||||
pat = re.compile(r'&(%s);'%('|'.join(user_entities.keys())))
|
||||
raw = pat.sub(lambda m:user_entities[m.group(1)], raw)
|
||||
return etree.fromstring(raw, parser=parser)
|
||||
return raw
|
||||
|
||||
def write(self, path):
|
||||
for name in self.dirtied:
|
||||
data = self.cache[name]
|
||||
raw = data
|
||||
if hasattr(data, 'xpath'):
|
||||
raw = etree.tostring(data, encoding='utf-8',
|
||||
xml_declaration=True)
|
||||
with open(self.name_map[name], 'wb') as f:
|
||||
f.write(raw)
|
||||
self.dirtied.clear()
|
||||
zf = ZipFile(path, 'w')
|
||||
zf.writestr('mimetype', bytes(guess_type('a.epub')[0]),
|
||||
compression=ZIP_STORED)
|
||||
zf.add_dir(self.root)
|
||||
zf.close()
|
||||
|
@ -1,91 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from calibre.ebooks.epub.fix import ePubFixer, InvalidEpub
|
||||
|
||||
|
||||
class Epubcheck(ePubFixer):
|
||||
|
||||
name = 'Workaround epubcheck bugs'
|
||||
|
||||
@property
|
||||
def short_description(self):
|
||||
return _('Workaround epubcheck bugs')
|
||||
|
||||
@property
|
||||
def long_description(self):
|
||||
return _('Workarounds for bugs in the latest release of epubcheck. '
|
||||
'epubcheck reports many things as errors that are not '
|
||||
'actually errors. epub-fix will try to detect these and replace '
|
||||
'them with constructs that epubcheck likes. This may cause '
|
||||
'significant changes to your epub, complain to the epubcheck '
|
||||
'project.')
|
||||
|
||||
@property
|
||||
def description(self):
|
||||
return self.long_description
|
||||
|
||||
@property
|
||||
def fix_name(self):
|
||||
return 'epubcheck'
|
||||
|
||||
def fix_pubdates(self):
|
||||
from calibre.utils.date import parse_date, strptime
|
||||
|
||||
dirtied = False
|
||||
opf = self.container.opf
|
||||
for dcdate in opf.xpath('//dc:date',
|
||||
namespaces={'dc':'http://purl.org/dc/elements/1.1/'}):
|
||||
raw = dcdate.text
|
||||
if not raw: raw = ''
|
||||
default = strptime('2000-1-1', '%Y-%m-%d', as_utc=True)
|
||||
try:
|
||||
ts = parse_date(raw, assume_utc=False, as_utc=True,
|
||||
default=default)
|
||||
except:
|
||||
raise InvalidEpub('Invalid date set in OPF', raw)
|
||||
try:
|
||||
sval = ts.strftime('%Y-%m-%d')
|
||||
except:
|
||||
from calibre import strftime
|
||||
sval = strftime('%Y-%m-%d', ts.timetuple())
|
||||
if sval != raw:
|
||||
self.log.error(
|
||||
'OPF contains date', raw, 'that epubcheck does not like')
|
||||
if self.fix:
|
||||
dcdate.text = sval
|
||||
self.log('\tReplaced', raw, 'with', sval)
|
||||
dirtied = True
|
||||
if dirtied:
|
||||
self.container.set(self.container.opf_name, opf)
|
||||
|
||||
def fix_preserve_aspect_ratio(self):
|
||||
for name in self.container.name_map:
|
||||
mt = self.container.mime_map.get(name, '')
|
||||
if mt.lower() == 'application/xhtml+xml':
|
||||
root = self.container.get(name)
|
||||
dirtied = False
|
||||
for svg in root.xpath('//svg:svg[@preserveAspectRatio="none"]',
|
||||
namespaces={'svg':'http://www.w3.org/2000/svg'}):
|
||||
self.log.error('Found <svg> element with'
|
||||
' preserveAspectRatio="none" which epubcheck '
|
||||
'cannot handle')
|
||||
if self.fix:
|
||||
svg.set('preserveAspectRatio', 'xMidYMid meet')
|
||||
dirtied = True
|
||||
self.log('\tReplaced none with xMidYMid meet')
|
||||
if dirtied:
|
||||
self.container.set(name, root)
|
||||
|
||||
|
||||
def run(self, container, opts, log, fix=False):
|
||||
self.container = container
|
||||
self.opts = opts
|
||||
self.log = log
|
||||
self.fix = fix
|
||||
self.fix_pubdates()
|
||||
self.fix_preserve_aspect_ratio()
|
@ -1,62 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import sys, os
|
||||
|
||||
from calibre.utils.config import OptionParser
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre import CurrentDir
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
from calibre.utils.logging import default_log
|
||||
from calibre.customize.ui import epub_fixers
|
||||
from calibre.ebooks.epub.fix.container import Container
|
||||
from calibre.ebooks.epub.fix import ParseError
|
||||
|
||||
|
||||
def option_parser():
|
||||
parser = OptionParser(usage=_(
|
||||
'%prog [options] file.epub\n\n'
|
||||
'Fix common problems in EPUB files that can cause them '
|
||||
'to be rejected by poorly designed publishing services.\n\n'
|
||||
'By default, no fixing is done and messages are printed out '
|
||||
'for each error detected. Use the options to control which errors '
|
||||
'are automatically fixed.'))
|
||||
for fixer in epub_fixers():
|
||||
fixer.add_options_to_parser(parser)
|
||||
|
||||
return parser
|
||||
|
||||
|
||||
def run(epub, opts, log):
|
||||
with TemporaryDirectory('_epub-fix') as tdir:
|
||||
with CurrentDir(tdir):
|
||||
zf = ZipFile(epub)
|
||||
zf.extractall()
|
||||
zf.close()
|
||||
container = Container(tdir, log)
|
||||
for fixer in epub_fixers():
|
||||
fix = getattr(opts, fixer.fix_name, False)
|
||||
fixer.run(container, opts, log, fix=fix)
|
||||
container.write(epub)
|
||||
|
||||
def main(args=sys.argv):
|
||||
parser = option_parser()
|
||||
opts, args = parser.parse_args(args)
|
||||
if len(args) != 2:
|
||||
parser.print_help()
|
||||
print
|
||||
default_log.error(_('You must specify an epub file'))
|
||||
return
|
||||
epub = os.path.abspath(args[1])
|
||||
try:
|
||||
run(epub, opts, default_log)
|
||||
except ParseError as err:
|
||||
default_log.error(unicode(err))
|
||||
raise SystemExit(1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -1,53 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
from calibre.ebooks.epub.fix import ePubFixer
|
||||
|
||||
class Unmanifested(ePubFixer):
|
||||
|
||||
name = 'Fix unmanifested files'
|
||||
|
||||
@property
|
||||
def short_description(self):
|
||||
return _('Fix unmanifested files')
|
||||
|
||||
@property
|
||||
def long_description(self):
|
||||
return _('Fix unmanifested files. epub-fix can either add them to '
|
||||
'the manifest or delete them as specified by the '
|
||||
'delete unmanifested option.')
|
||||
|
||||
@property
|
||||
def description(self):
|
||||
return self.long_description
|
||||
|
||||
@property
|
||||
def fix_name(self):
|
||||
return 'unmanifested'
|
||||
|
||||
@property
|
||||
def options(self):
|
||||
return [('delete_unmanifested', 'bool', False,
|
||||
_('Delete unmanifested files instead of adding them to the manifest'))]
|
||||
|
||||
def run(self, container, opts, log, fix=False):
|
||||
dirtied = False
|
||||
for name in list(container.manifest_worthy_names()):
|
||||
item = container.manifest_item_for_name(name)
|
||||
if item is None:
|
||||
log.error(name, 'not in manifest')
|
||||
if fix:
|
||||
if opts.delete_unmanifested:
|
||||
container.delete_name(name)
|
||||
log('\tDeleted')
|
||||
else:
|
||||
container.add_name_to_manifest(name)
|
||||
log('\tAdded to manifest')
|
||||
dirtied = True
|
||||
if dirtied:
|
||||
container.set(container.opf_name, container.opf)
|
Loading…
x
Reference in New Issue
Block a user