From 710a4e232e62981802d5a3dec9a251d889bbf202 Mon Sep 17 00:00:00 2001
From: Timothy Legge <timlegge@gmail.com>
Date: Sat, 26 Jun 2010 18:32:08 -0300
Subject: [PATCH 1/5] Fix issue with images for files on SD card not being
 displayed

---
 src/calibre/devices/kobo/driver.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/devices/kobo/driver.py b/src/calibre/devices/kobo/driver.py
index 7a37cb19c9..d367cc251d 100644
--- a/src/calibre/devices/kobo/driver.py
+++ b/src/calibre/devices/kobo/driver.py
@@ -85,7 +85,7 @@ class KOBO(USBMS):
 
                 idx = bl_cache.get(lpath, None)
                 if idx is not None:
-                    imagename = self.normalize_path(prefix + '.kobo/images/' + ImageID + ' - NickelBookCover.parsed')
+                    imagename = self.normalize_path(self._main_prefix + '.kobo/images/' + ImageID + ' - NickelBookCover.parsed')
                     #print "Image name Normalized: " + imagename
                     bl[idx].thumbnail = ImageWrapper(imagename)
                     bl_cache[lpath] = None

From 9985de5745138e8e427e545020656d8ddbe95832 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 8 Jul 2010 15:46:19 -0600
Subject: [PATCH 2/5] Initial implementation of epub-fix

---
 src/calibre/customize/builtins.py           |   8 +-
 src/calibre/customize/ui.py                 |  10 +-
 src/calibre/ebooks/conversion/preprocess.py |  14 +-
 src/calibre/ebooks/epub/fix/__init__.py     |  58 +++++++
 src/calibre/ebooks/epub/fix/container.py    | 182 ++++++++++++++++++++
 src/calibre/ebooks/epub/fix/epubcheck.py    |  82 +++++++++
 src/calibre/ebooks/epub/fix/main.py         |  56 ++++++
 src/calibre/ebooks/epub/fix/unmanifested.py |  49 ++++++
 src/calibre/linux.py                        |   3 +
 9 files changed, 453 insertions(+), 9 deletions(-)
 create mode 100644 src/calibre/ebooks/epub/fix/__init__.py
 create mode 100644 src/calibre/ebooks/epub/fix/container.py
 create mode 100644 src/calibre/ebooks/epub/fix/epubcheck.py
 create mode 100644 src/calibre/ebooks/epub/fix/main.py
 create mode 100644 src/calibre/ebooks/epub/fix/unmanifested.py

diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 07006aad40..3207c52cbd 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -9,6 +9,7 @@ from calibre.customize import FileTypePlugin, MetadataReaderPlugin, MetadataWrit
 from calibre.constants import numeric_version
 from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
 
+# To archive plugins {{{
 class HTML2ZIP(FileTypePlugin):
     name = 'HTML to ZIP'
     author = 'Kovid Goyal'
@@ -82,6 +83,8 @@ class PML2PMLZ(FileTypePlugin):
 
         return of.name
 
+# }}}
+
 # Metadata reader plugins {{{
 class ComicMetadataReader(MetadataReaderPlugin):
 
@@ -465,8 +468,11 @@ from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
     LibraryThing
 from calibre.ebooks.metadata.douban import DoubanBooks
 from calibre.library.catalog import CSV_XML, EPUB_MOBI
+from calibre.ebooks.epub.fix.unmanifested import Unmanifested
+from calibre.ebooks.epub.fix.epubcheck import Epubcheck
+
 plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
-        LibraryThing, DoubanBooks, CSV_XML, EPUB_MOBI]
+        LibraryThing, DoubanBooks, CSV_XML, EPUB_MOBI, Unmanifested, Epubcheck]
 plugins += [
     ComicInput,
     EPUBInput,
diff --git a/src/calibre/customize/ui.py b/src/calibre/customize/ui.py
index 14d22d5017..31f4c69c0f 100644
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@@ -16,6 +16,7 @@ from calibre.ebooks.metadata import MetaInformation
 from calibre.ebooks.metadata.fetch import MetadataSource
 from calibre.utils.config import make_config_dir, Config, ConfigProxy, \
                                  plugin_dir, OptionParser, prefs
+from calibre.ebooks.epub.fix import ePubFixer
 
 
 platform = 'linux'
@@ -194,7 +195,6 @@ def plugin_customization(plugin):
 
 # }}}
 
-
 # Input/Output profiles {{{
 def input_profiles():
     for plugin in _initialized_plugins:
@@ -444,6 +444,14 @@ def device_plugins(): # {{{
                     yield plugin
 # }}}
 
+# epub fixers {{{
+def epub_fixers():
+    for plugin in _initialized_plugins:
+        if isinstance(plugin, ePubFixer):
+            if not is_disabled(plugin):
+                if platform in plugin.supported_platforms:
+                    yield plugin
+# }}}
 
 # Initialize plugins {{{
 
diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index 4ce94188fa..3a18d38b22 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -25,13 +25,13 @@ convert_entities = functools.partial(entity_to_unicode,
 _span_pat = re.compile('<span.*?</span>', re.DOTALL|re.IGNORECASE)
 
 LIGATURES = {
-        u'\u00c6': u'AE',
-        u'\u00e6': u'ae',
-        u'\u0152': u'OE',
-        u'\u0153': u'oe',
-        u'\u0132': u'IJ',
-        u'\u0133': u'ij',
-        u'\u1D6B': u'ue',
+#        u'\u00c6': u'AE',
+#        u'\u00e6': u'ae',
+#        u'\u0152': u'OE',
+#        u'\u0153': u'oe',
+#        u'\u0132': u'IJ',
+#        u'\u0133': u'ij',
+#        u'\u1D6B': u'ue',
         u'\uFB00': u'ff',
         u'\uFB01': u'fi',
         u'\uFB02': u'fl',
diff --git a/src/calibre/ebooks/epub/fix/__init__.py b/src/calibre/ebooks/epub/fix/__init__.py
new file mode 100644
index 0000000000..5bdbd7c7f6
--- /dev/null
+++ b/src/calibre/ebooks/epub/fix/__init__.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+from calibre.customize import Plugin
+
+class InvalidEpub(ValueError):
+    pass
+
+class ePubFixer(Plugin):
+
+    supported_platforms = ['windows', 'osx', 'linux']
+    author = 'Kovid Goyal'
+    type = _('ePub Fixer')
+    can_be_disabled = True
+
+    # API that subclasses must implement {{{
+    @property
+    def short_description(self):
+        raise NotImplementedError
+
+    @property
+    def long_description(self):
+        raise NotImplementedError
+
+    @property
+    def fix_name(self):
+        raise NotImplementedError
+
+    @property
+    def options(self):
+        '''
+        Return a list of 4-tuples
+        (option_name, type, default, help_text)
+        type is one of 'bool', 'int', 'string'
+        '''
+        return []
+
+    def run(self, container, opts, log, fix=False):
+        raise NotImplementedError
+    # }}}
+
+    def add_options_to_parser(self, parser):
+        parser.add_option('--' + self.fix_name.replace('_', '-'),
+                help=self.long_description, action='store_true', default=False)
+        for option in self.options:
+            action = 'store'
+            if option[1] == 'bool':
+                action = 'store_true'
+            kwargs = {'action': action, 'default':option[2], 'help':option[3]}
+            if option[1] != 'bool':
+                kwargs['type'] = option[1]
+            parser.add_option('--'+option[0].replace('_', '-'), **kwargs)
+
diff --git a/src/calibre/ebooks/epub/fix/container.py b/src/calibre/ebooks/epub/fix/container.py
new file mode 100644
index 0000000000..7a7c17427a
--- /dev/null
+++ b/src/calibre/ebooks/epub/fix/container.py
@@ -0,0 +1,182 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import os, posixpath, urllib, sys
+
+from lxml import etree
+
+from calibre.ebooks.epub.fix import InvalidEpub
+from calibre import guess_type, prepare_string_for_xml
+from calibre.ebooks.chardet import xml_to_unicode
+from calibre.constants import iswindows
+from calibre.utils.zipfile import ZipFile, ZIP_STORED
+
+exists, join = os.path.exists, os.path.join
+
+OCF_NS = 'urn:oasis:names:tc:opendocument:xmlns:container'
+OPF_NS = 'http://www.idpf.org/2007/opf'
+
+class Container(object):
+
+    META_INF = {
+            'container.xml' : True,
+            'manifest.xml' : False,
+            'encryption.xml' : False,
+            'metadata.xml' : False,
+            'signatures.xml' : False,
+            'rights.xml' : False,
+    }
+
+    def __init__(self, path, log):
+        self.root = os.path.abspath(path)
+        self.log = log
+        self.dirtied = set([])
+        self.cache = {}
+        self.mime_map = {}
+
+        if exists(join(self.root, 'mimetype')):
+            os.remove(join(self.root, 'mimetype'))
+
+        container_path = join(self.root, 'META-INF', 'container.xml')
+        if not exists(container_path):
+            raise InvalidEpub('No META-INF/container.xml in epub')
+        self.container = etree.fromstring(open(container_path, 'rb').read())
+        opf_files = self.container.xpath((
+            r'child::ocf:rootfiles/ocf:rootfile'
+            '[@media-type="%s" and @full-path]'%guess_type('a.opf')[0]
+            ), namespaces={'ocf':OCF_NS}
+        )
+        if not opf_files:
+            raise InvalidEpub('META-INF/container.xml contains no link to OPF file')
+        opf_path = os.path.join(self.root,
+                *opf_files[0].get('full-path').split('/'))
+        if not exists(opf_path):
+            raise InvalidEpub('OPF file does not exist at location pointed to'
+                    ' by META-INF/container.xml')
+
+        # Map of relative paths with / separators to absolute
+        # paths on filesystem with os separators
+        self.name_map = {}
+        for dirpath, dirnames, filenames in os.walk(self.root):
+            for f in filenames:
+                path = join(dirpath, f)
+                name = os.path.relpath(path, self.root).replace(os.sep, '/')
+                self.name_map[name] = path
+                if path == opf_path:
+                    self.opf_name = name
+                    self.mime_map[name] = guess_type('a.opf')[0]
+
+        for item in self.opf.xpath(
+                '//opf:manifest/opf:item[@href and @media-type]',
+                namespaces={'opf':OPF_NS}):
+            href = item.get('href')
+            self.mime_map[self.href_to_name(href,
+                posixpath.dirname(self.opf_name))] = item.get('media-type')
+
+    def manifest_worthy_names(self):
+        for name in self.name_map:
+            if name.endswith('.opf'): continue
+            if name.startswith('META-INF') and \
+                    posixpath.basename(name) in self.META_INF: continue
+            yield name
+
+    def delete_name(self, name):
+        self.mime_map.pop(name, None)
+        path = self.name_map[name]
+        os.remove(path)
+        self.name_map.pop(name)
+
+    def manifest_item_for_name(self, name):
+        href = self.name_to_href(name,
+            posixpath.dirname(self.opf_name))
+        q = prepare_string_for_xml(href, attribute=True)
+        existing = self.opf.xpath('//opf:manifest/opf:item[@href="%s"]'%q,
+                namespaces={'opf':OPF_NS})
+        if not existing:
+            return None
+        return existing[0]
+
+    def add_name_to_manifest(self, name):
+        item = self.manifest_item_for_name(name)
+        if item is not None:
+            return
+        manifest = self.opf.xpath('//opf:manifest', namespaces={'opf':OPF_NS})[0]
+        item = manifest.makeelement('{%s}item'%OPF_NS, nsmap={'opf':OPF_NS},
+                href=self.name_to_href(name, posixpath.dirname(self.opf_name)),
+                id=self.generate_manifest_id())
+        mt = guess_type(posixpath.basename(name))[0]
+        if not mt:
+            mt = 'application/octest-stream'
+        item.set('media-type', mt)
+        manifest.append(item)
+
+    def generate_manifest_id(self):
+        items = self.opf.xpath('//opf:manifest/opf:item[@id]',
+                namespaces={'opf':OPF_NS})
+        ids = set([x.get('id') for x in items])
+        for x in xrange(sys.maxint):
+            c = 'id%d'%x
+            if c not in ids:
+                return c
+
+    @property
+    def opf(self):
+        return self.get(self.opf_name)
+
+    def href_to_name(self, href, base=''):
+        href = urllib.unquote(href.partition('#')[0])
+        name = href
+        if base:
+            name = posixpath.join(base, href)
+        return name
+
+    def name_to_href(self, name, base):
+        if not base:
+            return name
+        return posixpath.relpath(name, base)
+
+    def get_raw(self, name):
+        path = self.name_map[name]
+        return open(path, 'rb').read()
+
+    def get(self, name):
+        if name in self.cache:
+            return self.cache[name]
+        raw = self.get_raw(name)
+        if name in self.mime_map:
+            raw = self._parse(raw, self.mime_map[name])
+        self.cache[name] = raw
+        return raw
+
+    def set(self, name, val):
+        self.cache[name] = val
+        self.dirtied.add(name)
+
+    def _parse(self, raw, mimetype):
+        mt = mimetype.lower()
+        if mt.endswith('+xml'):
+            parser = etree.XMLParser(no_network=True, huge_tree=not iswindows)
+            return etree.fromstring(xml_to_unicode(raw,
+                strip_encoding_pats=True, assume_utf8=True)[0], parser=parser)
+        return raw
+
+    def write(self, path):
+        for name in self.dirtied:
+            data = self.cache[name]
+            raw = data
+            if hasattr(data, 'xpath'):
+                raw = etree.tostring(data, encoding='utf-8',
+                        xml_declaration=True)
+            with open(self.name_map[name], 'wb') as f:
+                f.write(raw)
+        self.dirtied.clear()
+        zf = ZipFile(path, 'w')
+        zf.writestr('mimetype', bytes(guess_type('a.epub')[0]),
+                compression=ZIP_STORED)
+        zf.add_dir(self.root)
+        zf.close()
+
diff --git a/src/calibre/ebooks/epub/fix/epubcheck.py b/src/calibre/ebooks/epub/fix/epubcheck.py
new file mode 100644
index 0000000000..f5c8086e7c
--- /dev/null
+++ b/src/calibre/ebooks/epub/fix/epubcheck.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from calibre.ebooks.epub.fix import ePubFixer, InvalidEpub
+from calibre.utils.date import parse_date, strptime
+
+
+class Epubcheck(ePubFixer):
+
+    name = 'Workaround epubcheck bugs'
+
+    @property
+    def short_description(self):
+        return _('Workaround epubcheck bugs')
+
+    @property
+    def long_description(self):
+        return _('Workarounds for bugs in the latest release of epubcheck. '
+                'epubcheck reports many things as errors that are not '
+                'actually errors. %prog will try to detect these and replace '
+                'them with constructs that epubcheck likes. This may cause '
+                'significant changes to your epub, complain to the epubcheck '
+                'project.')
+
+    @property
+    def fix_name(self):
+        return 'epubcheck'
+
+    def fix_pubdates(self):
+        dirtied = False
+        opf = self.container.opf
+        for dcdate in opf.xpath('//dc:date',
+                namespaces={'dc':'http://purl.org/dc/elements/1.1/'}):
+            raw = dcdate.text
+            if not raw: raw = ''
+            default = strptime('2000-1-1', '%Y-%m-%d', as_utc=True)
+            try:
+                ts = parse_date(raw, assume_utc=False, as_utc=True,
+                        default=default)
+            except:
+                raise InvalidEpub('Invalid date set in OPF', raw)
+            sval = ts.strftime('%Y-%m-%d')
+            if sval != raw:
+                self.log.error(
+                    'OPF contains date', raw, 'that epubcheck does not like')
+                if self.fix:
+                    dcdate.text = sval
+                    self.log('\tReplaced', raw, 'with', sval)
+                    dirtied = True
+        if dirtied:
+            self.container.set(self.container.opf_name, opf)
+
+    def fix_preserve_aspect_ratio(self):
+        for name in self.container.name_map:
+            mt = self.container.mime_map.get(name, '')
+            if mt.lower() == 'application/xhtml+xml':
+                root = self.container.get(name)
+                dirtied = False
+                for svg in root.xpath('//svg:svg[@preserveAspectRatio="none"]',
+                        namespaces={'svg':'http://www.w3.org/2000/svg'}):
+                    self.log.error('Found <svg> element with'
+                            ' preserveAspectRatio="none" which epubcheck '
+                            'cannot handle')
+                    if self.fix:
+                        svg.set('preserveAspectRatio', 'xMidYMid meet')
+                        dirtied = True
+                        self.log('\tReplaced none with xMidYMid meet')
+                if dirtied:
+                    self.container.set(name, root)
+
+
+    def run(self, container, opts, log, fix=False):
+        self.container = container
+        self.opts = opts
+        self.log = log
+        self.fix = fix
+        self.fix_pubdates()
+        self.fix_preserve_aspect_ratio()
diff --git a/src/calibre/ebooks/epub/fix/main.py b/src/calibre/ebooks/epub/fix/main.py
new file mode 100644
index 0000000000..3f9ca260b3
--- /dev/null
+++ b/src/calibre/ebooks/epub/fix/main.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import sys, os
+
+from calibre.utils.config import OptionParser
+from calibre.ptempfile import TemporaryDirectory
+from calibre import CurrentDir
+from calibre.utils.zipfile import ZipFile
+from calibre.utils.logging import default_log
+from calibre.customize.ui import epub_fixers
+from calibre.ebooks.epub.fix.container import Container
+
+def option_parser():
+    parser = OptionParser(usage=_(
+        '%prog [options] file.epub\n\n'
+        'Fix common problems in EPUB files that can cause them '
+        'to be rejected by poorly designed publishing services.\n\n'
+        'By default, no fixing is done and messages are printed out '
+        'for each error detected. Use the options to control which errors '
+        'are automatically fixed.'))
+    for fixer in epub_fixers():
+        fixer.add_options_to_parser(parser)
+
+    return parser
+
+
+def run(epub, opts, log):
+    with TemporaryDirectory('_epub-fix') as tdir:
+        with CurrentDir(tdir):
+            zf = ZipFile(epub)
+            zf.extractall()
+            zf.close()
+            container = Container(tdir, log)
+            for fixer in epub_fixers():
+                fix = getattr(opts, fixer.fix_name, False)
+                fixer.run(container, opts, log, fix=fix)
+            container.write(epub)
+
+def main(args=sys.argv):
+    parser = option_parser()
+    opts, args = parser.parse_args(args)
+    if len(args) != 2:
+        parser.print_help()
+        print
+        default_log.error(_('You must specify an epub file'))
+        return
+    epub = os.path.abspath(args[1])
+    run(epub, opts, default_log)
+
+if __name__ == '__main__':
+    main()
diff --git a/src/calibre/ebooks/epub/fix/unmanifested.py b/src/calibre/ebooks/epub/fix/unmanifested.py
new file mode 100644
index 0000000000..71913e9d50
--- /dev/null
+++ b/src/calibre/ebooks/epub/fix/unmanifested.py
@@ -0,0 +1,49 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+from calibre.ebooks.epub.fix import ePubFixer
+
+class Unmanifested(ePubFixer):
+
+    name = 'Fix unmanifested files'
+
+    @property
+    def short_description(self):
+        return _('Fix unmanifested files')
+
+    @property
+    def long_description(self):
+        return _('Fix unmanifested files. %prog can either add them to '
+        'the manifest or delete them as specified by the '
+        'delete unmanifested option.')
+
+    @property
+    def fix_name(self):
+        return 'unmanifested'
+
+    @property
+    def options(self):
+        return [('delete_unmanifested', 'bool', False,
+            _('Delete unmanifested files instead of adding them to the manifest'))]
+
+    def run(self, container, opts, log, fix=False):
+        dirtied = False
+        for name in list(container.manifest_worthy_names()):
+            item = container.manifest_item_for_name(name)
+            if item is None:
+                log.error(name, 'not in manifest')
+                if fix:
+                    if opts.delete_unmanifested:
+                        container.delete_name(name)
+                        log('\tDeleted')
+                    else:
+                        container.add_name_to_manifest(name)
+                        log('\tAdded to manifest')
+                        dirtied = True
+        if dirtied:
+            container.set(container.opf_name, container.opf)
diff --git a/src/calibre/linux.py b/src/calibre/linux.py
index e19df02258..33e7c004f0 100644
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@@ -29,6 +29,7 @@ entry_points = {
              'calibre-complete   = calibre.utils.complete:main',
              'pdfmanipulate      = calibre.ebooks.pdf.manipulate.cli:main',
              'fetch-ebook-metadata = calibre.ebooks.metadata.fetch:main',
+             'epub-fix           = calibre.ebooks.epub.fix.main:main',
              'calibre-smtp = calibre.utils.smtp:main',
         ],
         'gui_scripts'    : [
@@ -180,6 +181,7 @@ class PostInstall:
             from calibre.ebooks.metadata.fetch import option_parser as fem_op
             from calibre.gui2.main import option_parser as guiop
             from calibre.utils.smtp import option_parser as smtp_op
+            from calibre.ebooks.epub.fix.main import option_parser as fix_op
             any_formats = ['epub', 'htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip',
                 'txt', 'lit', 'rtf', 'pdf', 'prc', 'mobi', 'fb2', 'odt', 'lrf']
             bc = os.path.join(os.path.dirname(self.opts.staging_sharedir),
@@ -201,6 +203,7 @@ class PostInstall:
                 f.write(opts_and_exts('ebook-viewer', viewer_op, any_formats))
                 f.write(opts_and_words('fetch-ebook-metadata', fem_op, []))
                 f.write(opts_and_words('calibre-smtp', smtp_op, []))
+                f.write(opts_and_exts('epub-fix', fix_op, ['epub']))
                 f.write(textwrap.dedent('''
                 _ebook_device_ls()
                 {

From 3c59d2da7f1992e528e671c468ad805eef624d85 Mon Sep 17 00:00:00 2001
From: Timothy Legge <timlegge@gmail.com>
Date: Thu, 8 Jul 2010 22:14:34 -0300
Subject: [PATCH 3/5] Fix bug with html files not being displayed - Kobo does
 not generate an ImageID nad used a different ContentID format

---
 src/calibre/devices/kobo/books.py  | 3 ++-
 src/calibre/devices/kobo/driver.py | 9 ++++++---
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/calibre/devices/kobo/books.py b/src/calibre/devices/kobo/books.py
index 781562d091..7ee5beaec0 100644
--- a/src/calibre/devices/kobo/books.py
+++ b/src/calibre/devices/kobo/books.py
@@ -54,7 +54,8 @@ class Book(MetaInformation):
         except:
              self.datetime = time.gmtime()
 
-        self.thumbnail = ImageWrapper(thumbnail_name)
+	if thumbnail_name is not None:
+	    self.thumbnail = ImageWrapper(thumbnail_name)
         self.tags = []
         if other:
             self.smart_update(other)
diff --git a/src/calibre/devices/kobo/driver.py b/src/calibre/devices/kobo/driver.py
index d367cc251d..ac782f2363 100644
--- a/src/calibre/devices/kobo/driver.py
+++ b/src/calibre/devices/kobo/driver.py
@@ -85,9 +85,11 @@ class KOBO(USBMS):
 
                 idx = bl_cache.get(lpath, None)
                 if idx is not None:
-                    imagename = self.normalize_path(self._main_prefix + '.kobo/images/' + ImageID + ' - NickelBookCover.parsed')
-                    #print "Image name Normalized: " + imagename
-                    bl[idx].thumbnail = ImageWrapper(imagename)
+                    if ImageID is not None:
+                        imagename = self.normalize_path(self._main_prefix + '.kobo/images/' + ImageID + ' - NickelBookCover.parsed')
+                        #print "Image name Normalized: " + imagename
+                        if imagename is not None:
+                            bl[idx].thumbnail = ImageWrapper(imagename)
                     bl_cache[lpath] = None
                     if ContentType != '6':
                         if self.update_metadata_item(bl[idx]):
@@ -341,6 +343,7 @@ class KOBO(USBMS):
             else:
                 # if path.startswith("file:///mnt/onboard/"):
                 path = path.replace("file:///mnt/onboard/", self._main_prefix)
+                path = path.replace("/mnt/onboard/", self._main_prefix)
                     # print "Internal: " + filename
 
         return path

From f2c6baf630ad989bd4a3f686879a5d94037b9ee0 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Fri, 9 Jul 2010 09:45:14 -0600
Subject: [PATCH 4/5] Fix #6097 (Changing from lrf file to mobi)

---
 src/calibre/ebooks/lrf/input.py   | 2 +-
 src/calibre/ebooks/lrf/objects.py | 2 +-
 src/calibre/ebooks/oeb/base.py    | 2 +-
 src/calibre/gui2/tools.py         | 2 ++
 4 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/calibre/ebooks/lrf/input.py b/src/calibre/ebooks/lrf/input.py
index e9e6c502ec..256ab6fdf2 100644
--- a/src/calibre/ebooks/lrf/input.py
+++ b/src/calibre/ebooks/lrf/input.py
@@ -367,7 +367,7 @@ class LRFInput(InputFormatPlugin):
         xml = d.to_xml(write_files=True)
         if options.verbose > 2:
             open('lrs.xml', 'wb').write(xml.encode('utf-8'))
-        parser = etree.XMLParser(recover=True, no_network=True, huge_tree=True)
+        parser = etree.XMLParser(no_network=True, huge_tree=True)
         doc = etree.fromstring(xml, parser=parser)
         char_button_map = {}
         for x in doc.xpath('//CharButton[@refobj]'):
diff --git a/src/calibre/ebooks/lrf/objects.py b/src/calibre/ebooks/lrf/objects.py
index 8f69e94013..9f0dd4211c 100644
--- a/src/calibre/ebooks/lrf/objects.py
+++ b/src/calibre/ebooks/lrf/objects.py
@@ -870,7 +870,7 @@ class Text(LRFStream):
         open_containers = collections.deque()
         for c in self.content:
             if isinstance(c, basestring):
-                s += prepare_string_for_xml(c)
+                s += prepare_string_for_xml(c).replace('\0', '')
             elif c is None:
                 if open_containers:
                     p = open_containers.pop()
diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py
index f48b6f8f51..b5f61db3ac 100644
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@@ -26,7 +26,7 @@ from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
 from calibre.ebooks.conversion.preprocess import CSSPreProcessor
 
-RECOVER_PARSER = etree.XMLParser(recover=True, no_network=True, huge_tree=True)
+RECOVER_PARSER = etree.XMLParser(recover=True, no_network=True)
 
 XML_NS       = 'http://www.w3.org/XML/1998/namespace'
 XHTML_NS     = 'http://www.w3.org/1999/xhtml'
diff --git a/src/calibre/gui2/tools.py b/src/calibre/gui2/tools.py
index 1f58f85383..9680b616e3 100644
--- a/src/calibre/gui2/tools.py
+++ b/src/calibre/gui2/tools.py
@@ -221,6 +221,8 @@ def fetch_scheduled_recipe(arg):
     if lf.get('base_font_size', 0.0) != 0.0:
         recs.append(('base_font_size', lf['base_font_size'],
             OptionRecommendation.HIGH))
+        recs.append(('keep_ligatures', lf['keep_ligatures'],
+            OptionRecommendation.HIGH))
 
     lr = load_defaults('lrf_output')
     if lr.get('header', False):

From ec607dc596c03cf00901bda3f702b8a069a4b13a Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Fri, 9 Jul 2010 10:08:00 -0600
Subject: [PATCH 5/5] TXT Input: Convert HTML entities to characters. Fixes
 #6114 (markdown lines with HTML UNICODE character sequences not converting
 correctly.)

---
 src/calibre/ebooks/txt/input.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py
index cce7bea519..b444bf1cf4 100644
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@@ -10,6 +10,7 @@ from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
 from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
     separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
     preserve_spaces
+from calibre import _ent_pat, xml_entity_to_unicode
 
 class TXTInput(InputFormatPlugin):
 
@@ -55,6 +56,8 @@ class TXTInput(InputFormatPlugin):
         if options.preserve_spaces:
             txt = preserve_spaces(txt)
 
+        txt = _ent_pat.sub(xml_entity_to_unicode, txt)
+
         if options.markdown:
             log.debug('Running text though markdown conversion...')
             try: