From 710a4e232e62981802d5a3dec9a251d889bbf202 Mon Sep 17 00:00:00 2001 From: Timothy Legge Date: Sat, 26 Jun 2010 18:32:08 -0300 Subject: [PATCH 1/5] Fix issue with images for files on SD card not being displayed --- src/calibre/devices/kobo/driver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/devices/kobo/driver.py b/src/calibre/devices/kobo/driver.py index 7a37cb19c9..d367cc251d 100644 --- a/src/calibre/devices/kobo/driver.py +++ b/src/calibre/devices/kobo/driver.py @@ -85,7 +85,7 @@ class KOBO(USBMS): idx = bl_cache.get(lpath, None) if idx is not None: - imagename = self.normalize_path(prefix + '.kobo/images/' + ImageID + ' - NickelBookCover.parsed') + imagename = self.normalize_path(self._main_prefix + '.kobo/images/' + ImageID + ' - NickelBookCover.parsed') #print "Image name Normalized: " + imagename bl[idx].thumbnail = ImageWrapper(imagename) bl_cache[lpath] = None From 9985de5745138e8e427e545020656d8ddbe95832 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 8 Jul 2010 15:46:19 -0600 Subject: [PATCH 2/5] Initial implementation of epub-fix --- src/calibre/customize/builtins.py | 8 +- src/calibre/customize/ui.py | 10 +- src/calibre/ebooks/conversion/preprocess.py | 14 +- src/calibre/ebooks/epub/fix/__init__.py | 58 +++++++ src/calibre/ebooks/epub/fix/container.py | 182 ++++++++++++++++++++ src/calibre/ebooks/epub/fix/epubcheck.py | 82 +++++++++ src/calibre/ebooks/epub/fix/main.py | 56 ++++++ src/calibre/ebooks/epub/fix/unmanifested.py | 49 ++++++ src/calibre/linux.py | 3 + 9 files changed, 453 insertions(+), 9 deletions(-) create mode 100644 src/calibre/ebooks/epub/fix/__init__.py create mode 100644 src/calibre/ebooks/epub/fix/container.py create mode 100644 src/calibre/ebooks/epub/fix/epubcheck.py create mode 100644 src/calibre/ebooks/epub/fix/main.py create mode 100644 src/calibre/ebooks/epub/fix/unmanifested.py diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 07006aad40..3207c52cbd 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -9,6 +9,7 @@ from calibre.customize import FileTypePlugin, MetadataReaderPlugin, MetadataWrit from calibre.constants import numeric_version from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata +# To archive plugins {{{ class HTML2ZIP(FileTypePlugin): name = 'HTML to ZIP' author = 'Kovid Goyal' @@ -82,6 +83,8 @@ class PML2PMLZ(FileTypePlugin): return of.name +# }}} + # Metadata reader plugins {{{ class ComicMetadataReader(MetadataReaderPlugin): @@ -465,8 +468,11 @@ from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \ LibraryThing from calibre.ebooks.metadata.douban import DoubanBooks from calibre.library.catalog import CSV_XML, EPUB_MOBI +from calibre.ebooks.epub.fix.unmanifested import Unmanifested +from calibre.ebooks.epub.fix.epubcheck import Epubcheck + plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon, - LibraryThing, DoubanBooks, CSV_XML, EPUB_MOBI] + LibraryThing, DoubanBooks, CSV_XML, EPUB_MOBI, Unmanifested, Epubcheck] plugins += [ ComicInput, EPUBInput, diff --git a/src/calibre/customize/ui.py b/src/calibre/customize/ui.py index 14d22d5017..31f4c69c0f 100644 --- a/src/calibre/customize/ui.py +++ b/src/calibre/customize/ui.py @@ -16,6 +16,7 @@ from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata.fetch import MetadataSource from calibre.utils.config import make_config_dir, Config, ConfigProxy, \ plugin_dir, OptionParser, prefs +from calibre.ebooks.epub.fix import ePubFixer platform = 'linux' @@ -194,7 +195,6 @@ def plugin_customization(plugin): # }}} - # Input/Output profiles {{{ def input_profiles(): for plugin in _initialized_plugins: @@ -444,6 +444,14 @@ def device_plugins(): # {{{ yield plugin # }}} +# epub fixers {{{ +def epub_fixers(): + for plugin in _initialized_plugins: + if isinstance(plugin, ePubFixer): + if not is_disabled(plugin): + if platform in plugin.supported_platforms: + yield plugin +# }}} # Initialize plugins {{{ diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 4ce94188fa..3a18d38b22 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -25,13 +25,13 @@ convert_entities = functools.partial(entity_to_unicode, _span_pat = re.compile('', re.DOTALL|re.IGNORECASE) LIGATURES = { - u'\u00c6': u'AE', - u'\u00e6': u'ae', - u'\u0152': u'OE', - u'\u0153': u'oe', - u'\u0132': u'IJ', - u'\u0133': u'ij', - u'\u1D6B': u'ue', +# u'\u00c6': u'AE', +# u'\u00e6': u'ae', +# u'\u0152': u'OE', +# u'\u0153': u'oe', +# u'\u0132': u'IJ', +# u'\u0133': u'ij', +# u'\u1D6B': u'ue', u'\uFB00': u'ff', u'\uFB01': u'fi', u'\uFB02': u'fl', diff --git a/src/calibre/ebooks/epub/fix/__init__.py b/src/calibre/ebooks/epub/fix/__init__.py new file mode 100644 index 0000000000..5bdbd7c7f6 --- /dev/null +++ b/src/calibre/ebooks/epub/fix/__init__.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai + +__license__ = 'GPL v3' +__copyright__ = '2010, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + + +from calibre.customize import Plugin + +class InvalidEpub(ValueError): + pass + +class ePubFixer(Plugin): + + supported_platforms = ['windows', 'osx', 'linux'] + author = 'Kovid Goyal' + type = _('ePub Fixer') + can_be_disabled = True + + # API that subclasses must implement {{{ + @property + def short_description(self): + raise NotImplementedError + + @property + def long_description(self): + raise NotImplementedError + + @property + def fix_name(self): + raise NotImplementedError + + @property + def options(self): + ''' + Return a list of 4-tuples + (option_name, type, default, help_text) + type is one of 'bool', 'int', 'string' + ''' + return [] + + def run(self, container, opts, log, fix=False): + raise NotImplementedError + # }}} + + def add_options_to_parser(self, parser): + parser.add_option('--' + self.fix_name.replace('_', '-'), + help=self.long_description, action='store_true', default=False) + for option in self.options: + action = 'store' + if option[1] == 'bool': + action = 'store_true' + kwargs = {'action': action, 'default':option[2], 'help':option[3]} + if option[1] != 'bool': + kwargs['type'] = option[1] + parser.add_option('--'+option[0].replace('_', '-'), **kwargs) + diff --git a/src/calibre/ebooks/epub/fix/container.py b/src/calibre/ebooks/epub/fix/container.py new file mode 100644 index 0000000000..7a7c17427a --- /dev/null +++ b/src/calibre/ebooks/epub/fix/container.py @@ -0,0 +1,182 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai + +__license__ = 'GPL v3' +__copyright__ = '2010, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import os, posixpath, urllib, sys + +from lxml import etree + +from calibre.ebooks.epub.fix import InvalidEpub +from calibre import guess_type, prepare_string_for_xml +from calibre.ebooks.chardet import xml_to_unicode +from calibre.constants import iswindows +from calibre.utils.zipfile import ZipFile, ZIP_STORED + +exists, join = os.path.exists, os.path.join + +OCF_NS = 'urn:oasis:names:tc:opendocument:xmlns:container' +OPF_NS = 'http://www.idpf.org/2007/opf' + +class Container(object): + + META_INF = { + 'container.xml' : True, + 'manifest.xml' : False, + 'encryption.xml' : False, + 'metadata.xml' : False, + 'signatures.xml' : False, + 'rights.xml' : False, + } + + def __init__(self, path, log): + self.root = os.path.abspath(path) + self.log = log + self.dirtied = set([]) + self.cache = {} + self.mime_map = {} + + if exists(join(self.root, 'mimetype')): + os.remove(join(self.root, 'mimetype')) + + container_path = join(self.root, 'META-INF', 'container.xml') + if not exists(container_path): + raise InvalidEpub('No META-INF/container.xml in epub') + self.container = etree.fromstring(open(container_path, 'rb').read()) + opf_files = self.container.xpath(( + r'child::ocf:rootfiles/ocf:rootfile' + '[@media-type="%s" and @full-path]'%guess_type('a.opf')[0] + ), namespaces={'ocf':OCF_NS} + ) + if not opf_files: + raise InvalidEpub('META-INF/container.xml contains no link to OPF file') + opf_path = os.path.join(self.root, + *opf_files[0].get('full-path').split('/')) + if not exists(opf_path): + raise InvalidEpub('OPF file does not exist at location pointed to' + ' by META-INF/container.xml') + + # Map of relative paths with / separators to absolute + # paths on filesystem with os separators + self.name_map = {} + for dirpath, dirnames, filenames in os.walk(self.root): + for f in filenames: + path = join(dirpath, f) + name = os.path.relpath(path, self.root).replace(os.sep, '/') + self.name_map[name] = path + if path == opf_path: + self.opf_name = name + self.mime_map[name] = guess_type('a.opf')[0] + + for item in self.opf.xpath( + '//opf:manifest/opf:item[@href and @media-type]', + namespaces={'opf':OPF_NS}): + href = item.get('href') + self.mime_map[self.href_to_name(href, + posixpath.dirname(self.opf_name))] = item.get('media-type') + + def manifest_worthy_names(self): + for name in self.name_map: + if name.endswith('.opf'): continue + if name.startswith('META-INF') and \ + posixpath.basename(name) in self.META_INF: continue + yield name + + def delete_name(self, name): + self.mime_map.pop(name, None) + path = self.name_map[name] + os.remove(path) + self.name_map.pop(name) + + def manifest_item_for_name(self, name): + href = self.name_to_href(name, + posixpath.dirname(self.opf_name)) + q = prepare_string_for_xml(href, attribute=True) + existing = self.opf.xpath('//opf:manifest/opf:item[@href="%s"]'%q, + namespaces={'opf':OPF_NS}) + if not existing: + return None + return existing[0] + + def add_name_to_manifest(self, name): + item = self.manifest_item_for_name(name) + if item is not None: + return + manifest = self.opf.xpath('//opf:manifest', namespaces={'opf':OPF_NS})[0] + item = manifest.makeelement('{%s}item'%OPF_NS, nsmap={'opf':OPF_NS}, + href=self.name_to_href(name, posixpath.dirname(self.opf_name)), + id=self.generate_manifest_id()) + mt = guess_type(posixpath.basename(name))[0] + if not mt: + mt = 'application/octest-stream' + item.set('media-type', mt) + manifest.append(item) + + def generate_manifest_id(self): + items = self.opf.xpath('//opf:manifest/opf:item[@id]', + namespaces={'opf':OPF_NS}) + ids = set([x.get('id') for x in items]) + for x in xrange(sys.maxint): + c = 'id%d'%x + if c not in ids: + return c + + @property + def opf(self): + return self.get(self.opf_name) + + def href_to_name(self, href, base=''): + href = urllib.unquote(href.partition('#')[0]) + name = href + if base: + name = posixpath.join(base, href) + return name + + def name_to_href(self, name, base): + if not base: + return name + return posixpath.relpath(name, base) + + def get_raw(self, name): + path = self.name_map[name] + return open(path, 'rb').read() + + def get(self, name): + if name in self.cache: + return self.cache[name] + raw = self.get_raw(name) + if name in self.mime_map: + raw = self._parse(raw, self.mime_map[name]) + self.cache[name] = raw + return raw + + def set(self, name, val): + self.cache[name] = val + self.dirtied.add(name) + + def _parse(self, raw, mimetype): + mt = mimetype.lower() + if mt.endswith('+xml'): + parser = etree.XMLParser(no_network=True, huge_tree=not iswindows) + return etree.fromstring(xml_to_unicode(raw, + strip_encoding_pats=True, assume_utf8=True)[0], parser=parser) + return raw + + def write(self, path): + for name in self.dirtied: + data = self.cache[name] + raw = data + if hasattr(data, 'xpath'): + raw = etree.tostring(data, encoding='utf-8', + xml_declaration=True) + with open(self.name_map[name], 'wb') as f: + f.write(raw) + self.dirtied.clear() + zf = ZipFile(path, 'w') + zf.writestr('mimetype', bytes(guess_type('a.epub')[0]), + compression=ZIP_STORED) + zf.add_dir(self.root) + zf.close() + diff --git a/src/calibre/ebooks/epub/fix/epubcheck.py b/src/calibre/ebooks/epub/fix/epubcheck.py new file mode 100644 index 0000000000..f5c8086e7c --- /dev/null +++ b/src/calibre/ebooks/epub/fix/epubcheck.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai + +__license__ = 'GPL v3' +__copyright__ = '2010, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +from calibre.ebooks.epub.fix import ePubFixer, InvalidEpub +from calibre.utils.date import parse_date, strptime + + +class Epubcheck(ePubFixer): + + name = 'Workaround epubcheck bugs' + + @property + def short_description(self): + return _('Workaround epubcheck bugs') + + @property + def long_description(self): + return _('Workarounds for bugs in the latest release of epubcheck. ' + 'epubcheck reports many things as errors that are not ' + 'actually errors. %prog will try to detect these and replace ' + 'them with constructs that epubcheck likes. This may cause ' + 'significant changes to your epub, complain to the epubcheck ' + 'project.') + + @property + def fix_name(self): + return 'epubcheck' + + def fix_pubdates(self): + dirtied = False + opf = self.container.opf + for dcdate in opf.xpath('//dc:date', + namespaces={'dc':'http://purl.org/dc/elements/1.1/'}): + raw = dcdate.text + if not raw: raw = '' + default = strptime('2000-1-1', '%Y-%m-%d', as_utc=True) + try: + ts = parse_date(raw, assume_utc=False, as_utc=True, + default=default) + except: + raise InvalidEpub('Invalid date set in OPF', raw) + sval = ts.strftime('%Y-%m-%d') + if sval != raw: + self.log.error( + 'OPF contains date', raw, 'that epubcheck does not like') + if self.fix: + dcdate.text = sval + self.log('\tReplaced', raw, 'with', sval) + dirtied = True + if dirtied: + self.container.set(self.container.opf_name, opf) + + def fix_preserve_aspect_ratio(self): + for name in self.container.name_map: + mt = self.container.mime_map.get(name, '') + if mt.lower() == 'application/xhtml+xml': + root = self.container.get(name) + dirtied = False + for svg in root.xpath('//svg:svg[@preserveAspectRatio="none"]', + namespaces={'svg':'http://www.w3.org/2000/svg'}): + self.log.error('Found element with' + ' preserveAspectRatio="none" which epubcheck ' + 'cannot handle') + if self.fix: + svg.set('preserveAspectRatio', 'xMidYMid meet') + dirtied = True + self.log('\tReplaced none with xMidYMid meet') + if dirtied: + self.container.set(name, root) + + + def run(self, container, opts, log, fix=False): + self.container = container + self.opts = opts + self.log = log + self.fix = fix + self.fix_pubdates() + self.fix_preserve_aspect_ratio() diff --git a/src/calibre/ebooks/epub/fix/main.py b/src/calibre/ebooks/epub/fix/main.py new file mode 100644 index 0000000000..3f9ca260b3 --- /dev/null +++ b/src/calibre/ebooks/epub/fix/main.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai + +__license__ = 'GPL v3' +__copyright__ = '2010, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import sys, os + +from calibre.utils.config import OptionParser +from calibre.ptempfile import TemporaryDirectory +from calibre import CurrentDir +from calibre.utils.zipfile import ZipFile +from calibre.utils.logging import default_log +from calibre.customize.ui import epub_fixers +from calibre.ebooks.epub.fix.container import Container + +def option_parser(): + parser = OptionParser(usage=_( + '%prog [options] file.epub\n\n' + 'Fix common problems in EPUB files that can cause them ' + 'to be rejected by poorly designed publishing services.\n\n' + 'By default, no fixing is done and messages are printed out ' + 'for each error detected. Use the options to control which errors ' + 'are automatically fixed.')) + for fixer in epub_fixers(): + fixer.add_options_to_parser(parser) + + return parser + + +def run(epub, opts, log): + with TemporaryDirectory('_epub-fix') as tdir: + with CurrentDir(tdir): + zf = ZipFile(epub) + zf.extractall() + zf.close() + container = Container(tdir, log) + for fixer in epub_fixers(): + fix = getattr(opts, fixer.fix_name, False) + fixer.run(container, opts, log, fix=fix) + container.write(epub) + +def main(args=sys.argv): + parser = option_parser() + opts, args = parser.parse_args(args) + if len(args) != 2: + parser.print_help() + print + default_log.error(_('You must specify an epub file')) + return + epub = os.path.abspath(args[1]) + run(epub, opts, default_log) + +if __name__ == '__main__': + main() diff --git a/src/calibre/ebooks/epub/fix/unmanifested.py b/src/calibre/ebooks/epub/fix/unmanifested.py new file mode 100644 index 0000000000..71913e9d50 --- /dev/null +++ b/src/calibre/ebooks/epub/fix/unmanifested.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai + +__license__ = 'GPL v3' +__copyright__ = '2010, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + + +from calibre.ebooks.epub.fix import ePubFixer + +class Unmanifested(ePubFixer): + + name = 'Fix unmanifested files' + + @property + def short_description(self): + return _('Fix unmanifested files') + + @property + def long_description(self): + return _('Fix unmanifested files. %prog can either add them to ' + 'the manifest or delete them as specified by the ' + 'delete unmanifested option.') + + @property + def fix_name(self): + return 'unmanifested' + + @property + def options(self): + return [('delete_unmanifested', 'bool', False, + _('Delete unmanifested files instead of adding them to the manifest'))] + + def run(self, container, opts, log, fix=False): + dirtied = False + for name in list(container.manifest_worthy_names()): + item = container.manifest_item_for_name(name) + if item is None: + log.error(name, 'not in manifest') + if fix: + if opts.delete_unmanifested: + container.delete_name(name) + log('\tDeleted') + else: + container.add_name_to_manifest(name) + log('\tAdded to manifest') + dirtied = True + if dirtied: + container.set(container.opf_name, container.opf) diff --git a/src/calibre/linux.py b/src/calibre/linux.py index e19df02258..33e7c004f0 100644 --- a/src/calibre/linux.py +++ b/src/calibre/linux.py @@ -29,6 +29,7 @@ entry_points = { 'calibre-complete = calibre.utils.complete:main', 'pdfmanipulate = calibre.ebooks.pdf.manipulate.cli:main', 'fetch-ebook-metadata = calibre.ebooks.metadata.fetch:main', + 'epub-fix = calibre.ebooks.epub.fix.main:main', 'calibre-smtp = calibre.utils.smtp:main', ], 'gui_scripts' : [ @@ -180,6 +181,7 @@ class PostInstall: from calibre.ebooks.metadata.fetch import option_parser as fem_op from calibre.gui2.main import option_parser as guiop from calibre.utils.smtp import option_parser as smtp_op + from calibre.ebooks.epub.fix.main import option_parser as fix_op any_formats = ['epub', 'htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip', 'txt', 'lit', 'rtf', 'pdf', 'prc', 'mobi', 'fb2', 'odt', 'lrf'] bc = os.path.join(os.path.dirname(self.opts.staging_sharedir), @@ -201,6 +203,7 @@ class PostInstall: f.write(opts_and_exts('ebook-viewer', viewer_op, any_formats)) f.write(opts_and_words('fetch-ebook-metadata', fem_op, [])) f.write(opts_and_words('calibre-smtp', smtp_op, [])) + f.write(opts_and_exts('epub-fix', fix_op, ['epub'])) f.write(textwrap.dedent(''' _ebook_device_ls() { From 3c59d2da7f1992e528e671c468ad805eef624d85 Mon Sep 17 00:00:00 2001 From: Timothy Legge Date: Thu, 8 Jul 2010 22:14:34 -0300 Subject: [PATCH 3/5] Fix bug with html files not being displayed - Kobo does not generate an ImageID nad used a different ContentID format --- src/calibre/devices/kobo/books.py | 3 ++- src/calibre/devices/kobo/driver.py | 9 ++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/calibre/devices/kobo/books.py b/src/calibre/devices/kobo/books.py index 781562d091..7ee5beaec0 100644 --- a/src/calibre/devices/kobo/books.py +++ b/src/calibre/devices/kobo/books.py @@ -54,7 +54,8 @@ class Book(MetaInformation): except: self.datetime = time.gmtime() - self.thumbnail = ImageWrapper(thumbnail_name) + if thumbnail_name is not None: + self.thumbnail = ImageWrapper(thumbnail_name) self.tags = [] if other: self.smart_update(other) diff --git a/src/calibre/devices/kobo/driver.py b/src/calibre/devices/kobo/driver.py index d367cc251d..ac782f2363 100644 --- a/src/calibre/devices/kobo/driver.py +++ b/src/calibre/devices/kobo/driver.py @@ -85,9 +85,11 @@ class KOBO(USBMS): idx = bl_cache.get(lpath, None) if idx is not None: - imagename = self.normalize_path(self._main_prefix + '.kobo/images/' + ImageID + ' - NickelBookCover.parsed') - #print "Image name Normalized: " + imagename - bl[idx].thumbnail = ImageWrapper(imagename) + if ImageID is not None: + imagename = self.normalize_path(self._main_prefix + '.kobo/images/' + ImageID + ' - NickelBookCover.parsed') + #print "Image name Normalized: " + imagename + if imagename is not None: + bl[idx].thumbnail = ImageWrapper(imagename) bl_cache[lpath] = None if ContentType != '6': if self.update_metadata_item(bl[idx]): @@ -341,6 +343,7 @@ class KOBO(USBMS): else: # if path.startswith("file:///mnt/onboard/"): path = path.replace("file:///mnt/onboard/", self._main_prefix) + path = path.replace("/mnt/onboard/", self._main_prefix) # print "Internal: " + filename return path From f2c6baf630ad989bd4a3f686879a5d94037b9ee0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 9 Jul 2010 09:45:14 -0600 Subject: [PATCH 4/5] Fix #6097 (Changing from lrf file to mobi) --- src/calibre/ebooks/lrf/input.py | 2 +- src/calibre/ebooks/lrf/objects.py | 2 +- src/calibre/ebooks/oeb/base.py | 2 +- src/calibre/gui2/tools.py | 2 ++ 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/lrf/input.py b/src/calibre/ebooks/lrf/input.py index e9e6c502ec..256ab6fdf2 100644 --- a/src/calibre/ebooks/lrf/input.py +++ b/src/calibre/ebooks/lrf/input.py @@ -367,7 +367,7 @@ class LRFInput(InputFormatPlugin): xml = d.to_xml(write_files=True) if options.verbose > 2: open('lrs.xml', 'wb').write(xml.encode('utf-8')) - parser = etree.XMLParser(recover=True, no_network=True, huge_tree=True) + parser = etree.XMLParser(no_network=True, huge_tree=True) doc = etree.fromstring(xml, parser=parser) char_button_map = {} for x in doc.xpath('//CharButton[@refobj]'): diff --git a/src/calibre/ebooks/lrf/objects.py b/src/calibre/ebooks/lrf/objects.py index 8f69e94013..9f0dd4211c 100644 --- a/src/calibre/ebooks/lrf/objects.py +++ b/src/calibre/ebooks/lrf/objects.py @@ -870,7 +870,7 @@ class Text(LRFStream): open_containers = collections.deque() for c in self.content: if isinstance(c, basestring): - s += prepare_string_for_xml(c) + s += prepare_string_for_xml(c).replace('\0', '') elif c is None: if open_containers: p = open_containers.pop() diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index f48b6f8f51..b5f61db3ac 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -26,7 +26,7 @@ from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.oeb.entitydefs import ENTITYDEFS from calibre.ebooks.conversion.preprocess import CSSPreProcessor -RECOVER_PARSER = etree.XMLParser(recover=True, no_network=True, huge_tree=True) +RECOVER_PARSER = etree.XMLParser(recover=True, no_network=True) XML_NS = 'http://www.w3.org/XML/1998/namespace' XHTML_NS = 'http://www.w3.org/1999/xhtml' diff --git a/src/calibre/gui2/tools.py b/src/calibre/gui2/tools.py index 1f58f85383..9680b616e3 100644 --- a/src/calibre/gui2/tools.py +++ b/src/calibre/gui2/tools.py @@ -221,6 +221,8 @@ def fetch_scheduled_recipe(arg): if lf.get('base_font_size', 0.0) != 0.0: recs.append(('base_font_size', lf['base_font_size'], OptionRecommendation.HIGH)) + recs.append(('keep_ligatures', lf['keep_ligatures'], + OptionRecommendation.HIGH)) lr = load_defaults('lrf_output') if lr.get('header', False): From ec607dc596c03cf00901bda3f702b8a069a4b13a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 9 Jul 2010 10:08:00 -0600 Subject: [PATCH 5/5] TXT Input: Convert HTML entities to characters. Fixes #6114 (markdown lines with HTML UNICODE character sequences not converting correctly.) --- src/calibre/ebooks/txt/input.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index cce7bea519..b444bf1cf4 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -10,6 +10,7 @@ from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \ separate_paragraphs_single_line, separate_paragraphs_print_formatted, \ preserve_spaces +from calibre import _ent_pat, xml_entity_to_unicode class TXTInput(InputFormatPlugin): @@ -55,6 +56,8 @@ class TXTInput(InputFormatPlugin): if options.preserve_spaces: txt = preserve_spaces(txt) + txt = _ent_pat.sub(xml_entity_to_unicode, txt) + if options.markdown: log.debug('Running text though markdown conversion...') try: