mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
200547fe0e
@ -9,6 +9,7 @@ from calibre.customize import FileTypePlugin, MetadataReaderPlugin, MetadataWrit
|
|||||||
from calibre.constants import numeric_version
|
from calibre.constants import numeric_version
|
||||||
from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
|
from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
|
||||||
|
|
||||||
|
# To archive plugins {{{
|
||||||
class HTML2ZIP(FileTypePlugin):
|
class HTML2ZIP(FileTypePlugin):
|
||||||
name = 'HTML to ZIP'
|
name = 'HTML to ZIP'
|
||||||
author = 'Kovid Goyal'
|
author = 'Kovid Goyal'
|
||||||
@ -82,6 +83,8 @@ class PML2PMLZ(FileTypePlugin):
|
|||||||
|
|
||||||
return of.name
|
return of.name
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
# Metadata reader plugins {{{
|
# Metadata reader plugins {{{
|
||||||
class ComicMetadataReader(MetadataReaderPlugin):
|
class ComicMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
@ -465,8 +468,11 @@ from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
|
|||||||
LibraryThing
|
LibraryThing
|
||||||
from calibre.ebooks.metadata.douban import DoubanBooks
|
from calibre.ebooks.metadata.douban import DoubanBooks
|
||||||
from calibre.library.catalog import CSV_XML, EPUB_MOBI
|
from calibre.library.catalog import CSV_XML, EPUB_MOBI
|
||||||
|
from calibre.ebooks.epub.fix.unmanifested import Unmanifested
|
||||||
|
from calibre.ebooks.epub.fix.epubcheck import Epubcheck
|
||||||
|
|
||||||
plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
|
plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
|
||||||
LibraryThing, DoubanBooks, CSV_XML, EPUB_MOBI]
|
LibraryThing, DoubanBooks, CSV_XML, EPUB_MOBI, Unmanifested, Epubcheck]
|
||||||
plugins += [
|
plugins += [
|
||||||
ComicInput,
|
ComicInput,
|
||||||
EPUBInput,
|
EPUBInput,
|
||||||
|
@ -16,6 +16,7 @@ from calibre.ebooks.metadata import MetaInformation
|
|||||||
from calibre.ebooks.metadata.fetch import MetadataSource
|
from calibre.ebooks.metadata.fetch import MetadataSource
|
||||||
from calibre.utils.config import make_config_dir, Config, ConfigProxy, \
|
from calibre.utils.config import make_config_dir, Config, ConfigProxy, \
|
||||||
plugin_dir, OptionParser, prefs
|
plugin_dir, OptionParser, prefs
|
||||||
|
from calibre.ebooks.epub.fix import ePubFixer
|
||||||
|
|
||||||
|
|
||||||
platform = 'linux'
|
platform = 'linux'
|
||||||
@ -194,7 +195,6 @@ def plugin_customization(plugin):
|
|||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
|
||||||
# Input/Output profiles {{{
|
# Input/Output profiles {{{
|
||||||
def input_profiles():
|
def input_profiles():
|
||||||
for plugin in _initialized_plugins:
|
for plugin in _initialized_plugins:
|
||||||
@ -444,6 +444,14 @@ def device_plugins(): # {{{
|
|||||||
yield plugin
|
yield plugin
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
# epub fixers {{{
|
||||||
|
def epub_fixers():
|
||||||
|
for plugin in _initialized_plugins:
|
||||||
|
if isinstance(plugin, ePubFixer):
|
||||||
|
if not is_disabled(plugin):
|
||||||
|
if platform in plugin.supported_platforms:
|
||||||
|
yield plugin
|
||||||
|
# }}}
|
||||||
|
|
||||||
# Initialize plugins {{{
|
# Initialize plugins {{{
|
||||||
|
|
||||||
|
@ -54,7 +54,8 @@ class Book(MetaInformation):
|
|||||||
except:
|
except:
|
||||||
self.datetime = time.gmtime()
|
self.datetime = time.gmtime()
|
||||||
|
|
||||||
self.thumbnail = ImageWrapper(thumbnail_name)
|
if thumbnail_name is not None:
|
||||||
|
self.thumbnail = ImageWrapper(thumbnail_name)
|
||||||
self.tags = []
|
self.tags = []
|
||||||
if other:
|
if other:
|
||||||
self.smart_update(other)
|
self.smart_update(other)
|
||||||
|
@ -85,9 +85,11 @@ class KOBO(USBMS):
|
|||||||
|
|
||||||
idx = bl_cache.get(lpath, None)
|
idx = bl_cache.get(lpath, None)
|
||||||
if idx is not None:
|
if idx is not None:
|
||||||
imagename = self.normalize_path(prefix + '.kobo/images/' + ImageID + ' - NickelBookCover.parsed')
|
if ImageID is not None:
|
||||||
#print "Image name Normalized: " + imagename
|
imagename = self.normalize_path(self._main_prefix + '.kobo/images/' + ImageID + ' - NickelBookCover.parsed')
|
||||||
bl[idx].thumbnail = ImageWrapper(imagename)
|
#print "Image name Normalized: " + imagename
|
||||||
|
if imagename is not None:
|
||||||
|
bl[idx].thumbnail = ImageWrapper(imagename)
|
||||||
bl_cache[lpath] = None
|
bl_cache[lpath] = None
|
||||||
if ContentType != '6':
|
if ContentType != '6':
|
||||||
if self.update_metadata_item(bl[idx]):
|
if self.update_metadata_item(bl[idx]):
|
||||||
@ -341,6 +343,7 @@ class KOBO(USBMS):
|
|||||||
else:
|
else:
|
||||||
# if path.startswith("file:///mnt/onboard/"):
|
# if path.startswith("file:///mnt/onboard/"):
|
||||||
path = path.replace("file:///mnt/onboard/", self._main_prefix)
|
path = path.replace("file:///mnt/onboard/", self._main_prefix)
|
||||||
|
path = path.replace("/mnt/onboard/", self._main_prefix)
|
||||||
# print "Internal: " + filename
|
# print "Internal: " + filename
|
||||||
|
|
||||||
return path
|
return path
|
||||||
|
@ -25,13 +25,13 @@ convert_entities = functools.partial(entity_to_unicode,
|
|||||||
_span_pat = re.compile('<span.*?</span>', re.DOTALL|re.IGNORECASE)
|
_span_pat = re.compile('<span.*?</span>', re.DOTALL|re.IGNORECASE)
|
||||||
|
|
||||||
LIGATURES = {
|
LIGATURES = {
|
||||||
u'\u00c6': u'AE',
|
# u'\u00c6': u'AE',
|
||||||
u'\u00e6': u'ae',
|
# u'\u00e6': u'ae',
|
||||||
u'\u0152': u'OE',
|
# u'\u0152': u'OE',
|
||||||
u'\u0153': u'oe',
|
# u'\u0153': u'oe',
|
||||||
u'\u0132': u'IJ',
|
# u'\u0132': u'IJ',
|
||||||
u'\u0133': u'ij',
|
# u'\u0133': u'ij',
|
||||||
u'\u1D6B': u'ue',
|
# u'\u1D6B': u'ue',
|
||||||
u'\uFB00': u'ff',
|
u'\uFB00': u'ff',
|
||||||
u'\uFB01': u'fi',
|
u'\uFB01': u'fi',
|
||||||
u'\uFB02': u'fl',
|
u'\uFB02': u'fl',
|
||||||
|
58
src/calibre/ebooks/epub/fix/__init__.py
Normal file
58
src/calibre/ebooks/epub/fix/__init__.py
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
|
||||||
|
from calibre.customize import Plugin
|
||||||
|
|
||||||
|
class InvalidEpub(ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class ePubFixer(Plugin):
|
||||||
|
|
||||||
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
author = 'Kovid Goyal'
|
||||||
|
type = _('ePub Fixer')
|
||||||
|
can_be_disabled = True
|
||||||
|
|
||||||
|
# API that subclasses must implement {{{
|
||||||
|
@property
|
||||||
|
def short_description(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@property
|
||||||
|
def long_description(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@property
|
||||||
|
def fix_name(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@property
|
||||||
|
def options(self):
|
||||||
|
'''
|
||||||
|
Return a list of 4-tuples
|
||||||
|
(option_name, type, default, help_text)
|
||||||
|
type is one of 'bool', 'int', 'string'
|
||||||
|
'''
|
||||||
|
return []
|
||||||
|
|
||||||
|
def run(self, container, opts, log, fix=False):
|
||||||
|
raise NotImplementedError
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
def add_options_to_parser(self, parser):
|
||||||
|
parser.add_option('--' + self.fix_name.replace('_', '-'),
|
||||||
|
help=self.long_description, action='store_true', default=False)
|
||||||
|
for option in self.options:
|
||||||
|
action = 'store'
|
||||||
|
if option[1] == 'bool':
|
||||||
|
action = 'store_true'
|
||||||
|
kwargs = {'action': action, 'default':option[2], 'help':option[3]}
|
||||||
|
if option[1] != 'bool':
|
||||||
|
kwargs['type'] = option[1]
|
||||||
|
parser.add_option('--'+option[0].replace('_', '-'), **kwargs)
|
||||||
|
|
182
src/calibre/ebooks/epub/fix/container.py
Normal file
182
src/calibre/ebooks/epub/fix/container.py
Normal file
@ -0,0 +1,182 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os, posixpath, urllib, sys
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
from calibre.ebooks.epub.fix import InvalidEpub
|
||||||
|
from calibre import guess_type, prepare_string_for_xml
|
||||||
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
|
from calibre.constants import iswindows
|
||||||
|
from calibre.utils.zipfile import ZipFile, ZIP_STORED
|
||||||
|
|
||||||
|
exists, join = os.path.exists, os.path.join
|
||||||
|
|
||||||
|
OCF_NS = 'urn:oasis:names:tc:opendocument:xmlns:container'
|
||||||
|
OPF_NS = 'http://www.idpf.org/2007/opf'
|
||||||
|
|
||||||
|
class Container(object):
|
||||||
|
|
||||||
|
META_INF = {
|
||||||
|
'container.xml' : True,
|
||||||
|
'manifest.xml' : False,
|
||||||
|
'encryption.xml' : False,
|
||||||
|
'metadata.xml' : False,
|
||||||
|
'signatures.xml' : False,
|
||||||
|
'rights.xml' : False,
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, path, log):
|
||||||
|
self.root = os.path.abspath(path)
|
||||||
|
self.log = log
|
||||||
|
self.dirtied = set([])
|
||||||
|
self.cache = {}
|
||||||
|
self.mime_map = {}
|
||||||
|
|
||||||
|
if exists(join(self.root, 'mimetype')):
|
||||||
|
os.remove(join(self.root, 'mimetype'))
|
||||||
|
|
||||||
|
container_path = join(self.root, 'META-INF', 'container.xml')
|
||||||
|
if not exists(container_path):
|
||||||
|
raise InvalidEpub('No META-INF/container.xml in epub')
|
||||||
|
self.container = etree.fromstring(open(container_path, 'rb').read())
|
||||||
|
opf_files = self.container.xpath((
|
||||||
|
r'child::ocf:rootfiles/ocf:rootfile'
|
||||||
|
'[@media-type="%s" and @full-path]'%guess_type('a.opf')[0]
|
||||||
|
), namespaces={'ocf':OCF_NS}
|
||||||
|
)
|
||||||
|
if not opf_files:
|
||||||
|
raise InvalidEpub('META-INF/container.xml contains no link to OPF file')
|
||||||
|
opf_path = os.path.join(self.root,
|
||||||
|
*opf_files[0].get('full-path').split('/'))
|
||||||
|
if not exists(opf_path):
|
||||||
|
raise InvalidEpub('OPF file does not exist at location pointed to'
|
||||||
|
' by META-INF/container.xml')
|
||||||
|
|
||||||
|
# Map of relative paths with / separators to absolute
|
||||||
|
# paths on filesystem with os separators
|
||||||
|
self.name_map = {}
|
||||||
|
for dirpath, dirnames, filenames in os.walk(self.root):
|
||||||
|
for f in filenames:
|
||||||
|
path = join(dirpath, f)
|
||||||
|
name = os.path.relpath(path, self.root).replace(os.sep, '/')
|
||||||
|
self.name_map[name] = path
|
||||||
|
if path == opf_path:
|
||||||
|
self.opf_name = name
|
||||||
|
self.mime_map[name] = guess_type('a.opf')[0]
|
||||||
|
|
||||||
|
for item in self.opf.xpath(
|
||||||
|
'//opf:manifest/opf:item[@href and @media-type]',
|
||||||
|
namespaces={'opf':OPF_NS}):
|
||||||
|
href = item.get('href')
|
||||||
|
self.mime_map[self.href_to_name(href,
|
||||||
|
posixpath.dirname(self.opf_name))] = item.get('media-type')
|
||||||
|
|
||||||
|
def manifest_worthy_names(self):
|
||||||
|
for name in self.name_map:
|
||||||
|
if name.endswith('.opf'): continue
|
||||||
|
if name.startswith('META-INF') and \
|
||||||
|
posixpath.basename(name) in self.META_INF: continue
|
||||||
|
yield name
|
||||||
|
|
||||||
|
def delete_name(self, name):
|
||||||
|
self.mime_map.pop(name, None)
|
||||||
|
path = self.name_map[name]
|
||||||
|
os.remove(path)
|
||||||
|
self.name_map.pop(name)
|
||||||
|
|
||||||
|
def manifest_item_for_name(self, name):
|
||||||
|
href = self.name_to_href(name,
|
||||||
|
posixpath.dirname(self.opf_name))
|
||||||
|
q = prepare_string_for_xml(href, attribute=True)
|
||||||
|
existing = self.opf.xpath('//opf:manifest/opf:item[@href="%s"]'%q,
|
||||||
|
namespaces={'opf':OPF_NS})
|
||||||
|
if not existing:
|
||||||
|
return None
|
||||||
|
return existing[0]
|
||||||
|
|
||||||
|
def add_name_to_manifest(self, name):
|
||||||
|
item = self.manifest_item_for_name(name)
|
||||||
|
if item is not None:
|
||||||
|
return
|
||||||
|
manifest = self.opf.xpath('//opf:manifest', namespaces={'opf':OPF_NS})[0]
|
||||||
|
item = manifest.makeelement('{%s}item'%OPF_NS, nsmap={'opf':OPF_NS},
|
||||||
|
href=self.name_to_href(name, posixpath.dirname(self.opf_name)),
|
||||||
|
id=self.generate_manifest_id())
|
||||||
|
mt = guess_type(posixpath.basename(name))[0]
|
||||||
|
if not mt:
|
||||||
|
mt = 'application/octest-stream'
|
||||||
|
item.set('media-type', mt)
|
||||||
|
manifest.append(item)
|
||||||
|
|
||||||
|
def generate_manifest_id(self):
|
||||||
|
items = self.opf.xpath('//opf:manifest/opf:item[@id]',
|
||||||
|
namespaces={'opf':OPF_NS})
|
||||||
|
ids = set([x.get('id') for x in items])
|
||||||
|
for x in xrange(sys.maxint):
|
||||||
|
c = 'id%d'%x
|
||||||
|
if c not in ids:
|
||||||
|
return c
|
||||||
|
|
||||||
|
@property
|
||||||
|
def opf(self):
|
||||||
|
return self.get(self.opf_name)
|
||||||
|
|
||||||
|
def href_to_name(self, href, base=''):
|
||||||
|
href = urllib.unquote(href.partition('#')[0])
|
||||||
|
name = href
|
||||||
|
if base:
|
||||||
|
name = posixpath.join(base, href)
|
||||||
|
return name
|
||||||
|
|
||||||
|
def name_to_href(self, name, base):
|
||||||
|
if not base:
|
||||||
|
return name
|
||||||
|
return posixpath.relpath(name, base)
|
||||||
|
|
||||||
|
def get_raw(self, name):
|
||||||
|
path = self.name_map[name]
|
||||||
|
return open(path, 'rb').read()
|
||||||
|
|
||||||
|
def get(self, name):
|
||||||
|
if name in self.cache:
|
||||||
|
return self.cache[name]
|
||||||
|
raw = self.get_raw(name)
|
||||||
|
if name in self.mime_map:
|
||||||
|
raw = self._parse(raw, self.mime_map[name])
|
||||||
|
self.cache[name] = raw
|
||||||
|
return raw
|
||||||
|
|
||||||
|
def set(self, name, val):
|
||||||
|
self.cache[name] = val
|
||||||
|
self.dirtied.add(name)
|
||||||
|
|
||||||
|
def _parse(self, raw, mimetype):
|
||||||
|
mt = mimetype.lower()
|
||||||
|
if mt.endswith('+xml'):
|
||||||
|
parser = etree.XMLParser(no_network=True, huge_tree=not iswindows)
|
||||||
|
return etree.fromstring(xml_to_unicode(raw,
|
||||||
|
strip_encoding_pats=True, assume_utf8=True)[0], parser=parser)
|
||||||
|
return raw
|
||||||
|
|
||||||
|
def write(self, path):
|
||||||
|
for name in self.dirtied:
|
||||||
|
data = self.cache[name]
|
||||||
|
raw = data
|
||||||
|
if hasattr(data, 'xpath'):
|
||||||
|
raw = etree.tostring(data, encoding='utf-8',
|
||||||
|
xml_declaration=True)
|
||||||
|
with open(self.name_map[name], 'wb') as f:
|
||||||
|
f.write(raw)
|
||||||
|
self.dirtied.clear()
|
||||||
|
zf = ZipFile(path, 'w')
|
||||||
|
zf.writestr('mimetype', bytes(guess_type('a.epub')[0]),
|
||||||
|
compression=ZIP_STORED)
|
||||||
|
zf.add_dir(self.root)
|
||||||
|
zf.close()
|
||||||
|
|
82
src/calibre/ebooks/epub/fix/epubcheck.py
Normal file
82
src/calibre/ebooks/epub/fix/epubcheck.py
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from calibre.ebooks.epub.fix import ePubFixer, InvalidEpub
|
||||||
|
from calibre.utils.date import parse_date, strptime
|
||||||
|
|
||||||
|
|
||||||
|
class Epubcheck(ePubFixer):
|
||||||
|
|
||||||
|
name = 'Workaround epubcheck bugs'
|
||||||
|
|
||||||
|
@property
|
||||||
|
def short_description(self):
|
||||||
|
return _('Workaround epubcheck bugs')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def long_description(self):
|
||||||
|
return _('Workarounds for bugs in the latest release of epubcheck. '
|
||||||
|
'epubcheck reports many things as errors that are not '
|
||||||
|
'actually errors. %prog will try to detect these and replace '
|
||||||
|
'them with constructs that epubcheck likes. This may cause '
|
||||||
|
'significant changes to your epub, complain to the epubcheck '
|
||||||
|
'project.')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def fix_name(self):
|
||||||
|
return 'epubcheck'
|
||||||
|
|
||||||
|
def fix_pubdates(self):
|
||||||
|
dirtied = False
|
||||||
|
opf = self.container.opf
|
||||||
|
for dcdate in opf.xpath('//dc:date',
|
||||||
|
namespaces={'dc':'http://purl.org/dc/elements/1.1/'}):
|
||||||
|
raw = dcdate.text
|
||||||
|
if not raw: raw = ''
|
||||||
|
default = strptime('2000-1-1', '%Y-%m-%d', as_utc=True)
|
||||||
|
try:
|
||||||
|
ts = parse_date(raw, assume_utc=False, as_utc=True,
|
||||||
|
default=default)
|
||||||
|
except:
|
||||||
|
raise InvalidEpub('Invalid date set in OPF', raw)
|
||||||
|
sval = ts.strftime('%Y-%m-%d')
|
||||||
|
if sval != raw:
|
||||||
|
self.log.error(
|
||||||
|
'OPF contains date', raw, 'that epubcheck does not like')
|
||||||
|
if self.fix:
|
||||||
|
dcdate.text = sval
|
||||||
|
self.log('\tReplaced', raw, 'with', sval)
|
||||||
|
dirtied = True
|
||||||
|
if dirtied:
|
||||||
|
self.container.set(self.container.opf_name, opf)
|
||||||
|
|
||||||
|
def fix_preserve_aspect_ratio(self):
|
||||||
|
for name in self.container.name_map:
|
||||||
|
mt = self.container.mime_map.get(name, '')
|
||||||
|
if mt.lower() == 'application/xhtml+xml':
|
||||||
|
root = self.container.get(name)
|
||||||
|
dirtied = False
|
||||||
|
for svg in root.xpath('//svg:svg[@preserveAspectRatio="none"]',
|
||||||
|
namespaces={'svg':'http://www.w3.org/2000/svg'}):
|
||||||
|
self.log.error('Found <svg> element with'
|
||||||
|
' preserveAspectRatio="none" which epubcheck '
|
||||||
|
'cannot handle')
|
||||||
|
if self.fix:
|
||||||
|
svg.set('preserveAspectRatio', 'xMidYMid meet')
|
||||||
|
dirtied = True
|
||||||
|
self.log('\tReplaced none with xMidYMid meet')
|
||||||
|
if dirtied:
|
||||||
|
self.container.set(name, root)
|
||||||
|
|
||||||
|
|
||||||
|
def run(self, container, opts, log, fix=False):
|
||||||
|
self.container = container
|
||||||
|
self.opts = opts
|
||||||
|
self.log = log
|
||||||
|
self.fix = fix
|
||||||
|
self.fix_pubdates()
|
||||||
|
self.fix_preserve_aspect_ratio()
|
56
src/calibre/ebooks/epub/fix/main.py
Normal file
56
src/calibre/ebooks/epub/fix/main.py
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import sys, os
|
||||||
|
|
||||||
|
from calibre.utils.config import OptionParser
|
||||||
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
|
from calibre import CurrentDir
|
||||||
|
from calibre.utils.zipfile import ZipFile
|
||||||
|
from calibre.utils.logging import default_log
|
||||||
|
from calibre.customize.ui import epub_fixers
|
||||||
|
from calibre.ebooks.epub.fix.container import Container
|
||||||
|
|
||||||
|
def option_parser():
|
||||||
|
parser = OptionParser(usage=_(
|
||||||
|
'%prog [options] file.epub\n\n'
|
||||||
|
'Fix common problems in EPUB files that can cause them '
|
||||||
|
'to be rejected by poorly designed publishing services.\n\n'
|
||||||
|
'By default, no fixing is done and messages are printed out '
|
||||||
|
'for each error detected. Use the options to control which errors '
|
||||||
|
'are automatically fixed.'))
|
||||||
|
for fixer in epub_fixers():
|
||||||
|
fixer.add_options_to_parser(parser)
|
||||||
|
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def run(epub, opts, log):
|
||||||
|
with TemporaryDirectory('_epub-fix') as tdir:
|
||||||
|
with CurrentDir(tdir):
|
||||||
|
zf = ZipFile(epub)
|
||||||
|
zf.extractall()
|
||||||
|
zf.close()
|
||||||
|
container = Container(tdir, log)
|
||||||
|
for fixer in epub_fixers():
|
||||||
|
fix = getattr(opts, fixer.fix_name, False)
|
||||||
|
fixer.run(container, opts, log, fix=fix)
|
||||||
|
container.write(epub)
|
||||||
|
|
||||||
|
def main(args=sys.argv):
|
||||||
|
parser = option_parser()
|
||||||
|
opts, args = parser.parse_args(args)
|
||||||
|
if len(args) != 2:
|
||||||
|
parser.print_help()
|
||||||
|
print
|
||||||
|
default_log.error(_('You must specify an epub file'))
|
||||||
|
return
|
||||||
|
epub = os.path.abspath(args[1])
|
||||||
|
run(epub, opts, default_log)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
49
src/calibre/ebooks/epub/fix/unmanifested.py
Normal file
49
src/calibre/ebooks/epub/fix/unmanifested.py
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
|
||||||
|
from calibre.ebooks.epub.fix import ePubFixer
|
||||||
|
|
||||||
|
class Unmanifested(ePubFixer):
|
||||||
|
|
||||||
|
name = 'Fix unmanifested files'
|
||||||
|
|
||||||
|
@property
|
||||||
|
def short_description(self):
|
||||||
|
return _('Fix unmanifested files')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def long_description(self):
|
||||||
|
return _('Fix unmanifested files. %prog can either add them to '
|
||||||
|
'the manifest or delete them as specified by the '
|
||||||
|
'delete unmanifested option.')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def fix_name(self):
|
||||||
|
return 'unmanifested'
|
||||||
|
|
||||||
|
@property
|
||||||
|
def options(self):
|
||||||
|
return [('delete_unmanifested', 'bool', False,
|
||||||
|
_('Delete unmanifested files instead of adding them to the manifest'))]
|
||||||
|
|
||||||
|
def run(self, container, opts, log, fix=False):
|
||||||
|
dirtied = False
|
||||||
|
for name in list(container.manifest_worthy_names()):
|
||||||
|
item = container.manifest_item_for_name(name)
|
||||||
|
if item is None:
|
||||||
|
log.error(name, 'not in manifest')
|
||||||
|
if fix:
|
||||||
|
if opts.delete_unmanifested:
|
||||||
|
container.delete_name(name)
|
||||||
|
log('\tDeleted')
|
||||||
|
else:
|
||||||
|
container.add_name_to_manifest(name)
|
||||||
|
log('\tAdded to manifest')
|
||||||
|
dirtied = True
|
||||||
|
if dirtied:
|
||||||
|
container.set(container.opf_name, container.opf)
|
@ -367,7 +367,7 @@ class LRFInput(InputFormatPlugin):
|
|||||||
xml = d.to_xml(write_files=True)
|
xml = d.to_xml(write_files=True)
|
||||||
if options.verbose > 2:
|
if options.verbose > 2:
|
||||||
open('lrs.xml', 'wb').write(xml.encode('utf-8'))
|
open('lrs.xml', 'wb').write(xml.encode('utf-8'))
|
||||||
parser = etree.XMLParser(recover=True, no_network=True, huge_tree=True)
|
parser = etree.XMLParser(no_network=True, huge_tree=True)
|
||||||
doc = etree.fromstring(xml, parser=parser)
|
doc = etree.fromstring(xml, parser=parser)
|
||||||
char_button_map = {}
|
char_button_map = {}
|
||||||
for x in doc.xpath('//CharButton[@refobj]'):
|
for x in doc.xpath('//CharButton[@refobj]'):
|
||||||
|
@ -870,7 +870,7 @@ class Text(LRFStream):
|
|||||||
open_containers = collections.deque()
|
open_containers = collections.deque()
|
||||||
for c in self.content:
|
for c in self.content:
|
||||||
if isinstance(c, basestring):
|
if isinstance(c, basestring):
|
||||||
s += prepare_string_for_xml(c)
|
s += prepare_string_for_xml(c).replace('\0', '')
|
||||||
elif c is None:
|
elif c is None:
|
||||||
if open_containers:
|
if open_containers:
|
||||||
p = open_containers.pop()
|
p = open_containers.pop()
|
||||||
|
@ -26,7 +26,7 @@ from calibre.ebooks.chardet import xml_to_unicode
|
|||||||
from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
|
from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
|
||||||
from calibre.ebooks.conversion.preprocess import CSSPreProcessor
|
from calibre.ebooks.conversion.preprocess import CSSPreProcessor
|
||||||
|
|
||||||
RECOVER_PARSER = etree.XMLParser(recover=True, no_network=True, huge_tree=True)
|
RECOVER_PARSER = etree.XMLParser(recover=True, no_network=True)
|
||||||
|
|
||||||
XML_NS = 'http://www.w3.org/XML/1998/namespace'
|
XML_NS = 'http://www.w3.org/XML/1998/namespace'
|
||||||
XHTML_NS = 'http://www.w3.org/1999/xhtml'
|
XHTML_NS = 'http://www.w3.org/1999/xhtml'
|
||||||
|
@ -10,6 +10,7 @@ from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
|||||||
from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
|
from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
|
||||||
separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
|
separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
|
||||||
preserve_spaces
|
preserve_spaces
|
||||||
|
from calibre import _ent_pat, xml_entity_to_unicode
|
||||||
|
|
||||||
class TXTInput(InputFormatPlugin):
|
class TXTInput(InputFormatPlugin):
|
||||||
|
|
||||||
@ -55,6 +56,8 @@ class TXTInput(InputFormatPlugin):
|
|||||||
if options.preserve_spaces:
|
if options.preserve_spaces:
|
||||||
txt = preserve_spaces(txt)
|
txt = preserve_spaces(txt)
|
||||||
|
|
||||||
|
txt = _ent_pat.sub(xml_entity_to_unicode, txt)
|
||||||
|
|
||||||
if options.markdown:
|
if options.markdown:
|
||||||
log.debug('Running text though markdown conversion...')
|
log.debug('Running text though markdown conversion...')
|
||||||
try:
|
try:
|
||||||
|
@ -221,6 +221,8 @@ def fetch_scheduled_recipe(arg):
|
|||||||
if lf.get('base_font_size', 0.0) != 0.0:
|
if lf.get('base_font_size', 0.0) != 0.0:
|
||||||
recs.append(('base_font_size', lf['base_font_size'],
|
recs.append(('base_font_size', lf['base_font_size'],
|
||||||
OptionRecommendation.HIGH))
|
OptionRecommendation.HIGH))
|
||||||
|
recs.append(('keep_ligatures', lf['keep_ligatures'],
|
||||||
|
OptionRecommendation.HIGH))
|
||||||
|
|
||||||
lr = load_defaults('lrf_output')
|
lr = load_defaults('lrf_output')
|
||||||
if lr.get('header', False):
|
if lr.get('header', False):
|
||||||
|
@ -29,6 +29,7 @@ entry_points = {
|
|||||||
'calibre-complete = calibre.utils.complete:main',
|
'calibre-complete = calibre.utils.complete:main',
|
||||||
'pdfmanipulate = calibre.ebooks.pdf.manipulate.cli:main',
|
'pdfmanipulate = calibre.ebooks.pdf.manipulate.cli:main',
|
||||||
'fetch-ebook-metadata = calibre.ebooks.metadata.fetch:main',
|
'fetch-ebook-metadata = calibre.ebooks.metadata.fetch:main',
|
||||||
|
'epub-fix = calibre.ebooks.epub.fix.main:main',
|
||||||
'calibre-smtp = calibre.utils.smtp:main',
|
'calibre-smtp = calibre.utils.smtp:main',
|
||||||
],
|
],
|
||||||
'gui_scripts' : [
|
'gui_scripts' : [
|
||||||
@ -180,6 +181,7 @@ class PostInstall:
|
|||||||
from calibre.ebooks.metadata.fetch import option_parser as fem_op
|
from calibre.ebooks.metadata.fetch import option_parser as fem_op
|
||||||
from calibre.gui2.main import option_parser as guiop
|
from calibre.gui2.main import option_parser as guiop
|
||||||
from calibre.utils.smtp import option_parser as smtp_op
|
from calibre.utils.smtp import option_parser as smtp_op
|
||||||
|
from calibre.ebooks.epub.fix.main import option_parser as fix_op
|
||||||
any_formats = ['epub', 'htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip',
|
any_formats = ['epub', 'htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip',
|
||||||
'txt', 'lit', 'rtf', 'pdf', 'prc', 'mobi', 'fb2', 'odt', 'lrf']
|
'txt', 'lit', 'rtf', 'pdf', 'prc', 'mobi', 'fb2', 'odt', 'lrf']
|
||||||
bc = os.path.join(os.path.dirname(self.opts.staging_sharedir),
|
bc = os.path.join(os.path.dirname(self.opts.staging_sharedir),
|
||||||
@ -201,6 +203,7 @@ class PostInstall:
|
|||||||
f.write(opts_and_exts('ebook-viewer', viewer_op, any_formats))
|
f.write(opts_and_exts('ebook-viewer', viewer_op, any_formats))
|
||||||
f.write(opts_and_words('fetch-ebook-metadata', fem_op, []))
|
f.write(opts_and_words('fetch-ebook-metadata', fem_op, []))
|
||||||
f.write(opts_and_words('calibre-smtp', smtp_op, []))
|
f.write(opts_and_words('calibre-smtp', smtp_op, []))
|
||||||
|
f.write(opts_and_exts('epub-fix', fix_op, ['epub']))
|
||||||
f.write(textwrap.dedent('''
|
f.write(textwrap.dedent('''
|
||||||
_ebook_device_ls()
|
_ebook_device_ls()
|
||||||
{
|
{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user