From 4b4ae70290796246a469218fd199b4a22b116e58 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 25 Mar 2012 01:33:58 +0530
Subject: [PATCH 01/19] ...

---
 src/calibre/gui2/update.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/calibre/gui2/update.py b/src/calibre/gui2/update.py
index caa1d3f3dc..526a0bc56e 100644
--- a/src/calibre/gui2/update.py
+++ b/src/calibre/gui2/update.py
@@ -151,7 +151,7 @@ class UpdateMixin(object):
             plt = u''
             if has_plugin_updates:
                 plt = _(' (%d plugin updates)')%plugin_updates
-            msg = (u'<span style="color:red; font-weight: bold">%s: '
+            msg = (u'<span style="color:green; font-weight: bold">%s: '
                     u'<a href="update:%s">%s%s</a></span>') % (
                         _('Update found'), version, calibre_version, plt)
         else:

From 5a1307ca6fd60737b494f2243482036f91050eac Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 25 Mar 2012 01:43:45 +0530
Subject: [PATCH 02/19] E-book viewer: Fix regression that caused the ebook
 viewer to stop functioning if it is launched from the main calibre program
 and then the main calibre program is closed. Fixes #963960 (Reader crashes at
 end of chapter)

---
 src/calibre/gui2/viewer/main.py | 2 ++
 src/calibre/ptempfile.py        | 5 +++++
 2 files changed, 7 insertions(+)

diff --git a/src/calibre/gui2/viewer/main.py b/src/calibre/gui2/viewer/main.py
index df4d146581..0122b42012 100644
--- a/src/calibre/gui2/viewer/main.py
+++ b/src/calibre/gui2/viewer/main.py
@@ -27,6 +27,7 @@ from calibre.ebooks.metadata import MetaInformation
 from calibre.customize.ui import available_input_formats
 from calibre.gui2.viewer.dictionary import Lookup
 from calibre import as_unicode, force_unicode, isbytestring
+from calibre.ptempfile import reset_base_dir
 
 vprefs = JSONConfig('viewer')
 
@@ -947,6 +948,7 @@ View an ebook.
 def main(args=sys.argv):
     # Ensure viewer can continue to function if GUI is closed
     os.environ.pop('CALIBRE_WORKER_TEMP_DIR', None)
+    reset_base_dir()
 
     parser = option_parser()
     opts, args = parser.parse_args(args)
diff --git a/src/calibre/ptempfile.py b/src/calibre/ptempfile.py
index 48974b0c6c..706a96b4b6 100644
--- a/src/calibre/ptempfile.py
+++ b/src/calibre/ptempfile.py
@@ -74,6 +74,11 @@ def base_dir():
 
     return _base_dir
 
+def reset_base_dir():
+    global _base_dir
+    _base_dir = None
+    base_dir()
+
 def force_unicode(x):
     # Cannot use the implementation in calibre.__init__ as it causes a circular
     # dependency

From 7a9e2050f907b22da738cfe636d5869ea9508494 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 25 Mar 2012 08:29:54 +0530
Subject: [PATCH 03/19] ...

---
 src/calibre/library/cli.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/library/cli.py b/src/calibre/library/cli.py
index da1b1e27c6..d5def1a364 100644
--- a/src/calibre/library/cli.py
+++ b/src/calibre/library/cli.py
@@ -233,7 +233,7 @@ def do_add(db, paths, one_book_per_directory, recurse, add_duplicates, otitle,
             if not mi.authors:
                 mi.authors = [_('Unknown')]
             for x in ('title', 'authors', 'isbn', 'tags', 'series'):
-                val = locals()[x]
+                val = locals()['o'+x]
                 if val: setattr(mi, x[1:], val)
             if oseries:
                 mi.series_index = oseries_index
@@ -356,7 +356,7 @@ def command_add(args, dbpath):
         print >>sys.stderr, _('You must specify at least one file to add')
         return 1
     do_add(get_db(dbpath, opts), args[1:], opts.one_book_per_directory,
-            opts.recurse, opts.duplicates, opts.title, opts.author, opts.isbn,
+            opts.recurse, opts.duplicates, opts.title, opts.authors, opts.isbn,
             tags, opts.series, opts.series_index)
     return 0
 

From bd4ee5c3e92f937225523bf2991b2cbb38da0a81 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 25 Mar 2012 08:46:34 +0530
Subject: [PATCH 04/19] ...

---
 src/calibre/ebooks/mobi/debug/headers.py  | 15 ++++++++++++---
 src/calibre/ebooks/mobi/reader/headers.py |  4 ++++
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/src/calibre/ebooks/mobi/debug/headers.py b/src/calibre/ebooks/mobi/debug/headers.py
index b367be2e4a..034c714d31 100644
--- a/src/calibre/ebooks/mobi/debug/headers.py
+++ b/src/calibre/ebooks/mobi/debug/headers.py
@@ -205,7 +205,10 @@ class EXTHHeader(object):
 
     @property
     def kf8_header_index(self):
-        return self.get(121, None)
+        ans = self.get(121, None)
+        if ans == NULL_INDEX:
+            ans = None
+        return ans
 
     def __str__(self):
         ans = ['*'*20 + ' EXTH Header '+ '*'*20]
@@ -467,9 +470,15 @@ class MOBIFile(object):
         if mh.file_version >= 8:
             self.kf8_type = 'standalone'
         elif mh.has_exth and mh.exth.kf8_header_index is not None:
-            self.kf8_type = 'joint'
             kf8i = mh.exth.kf8_header_index
-            mh8 = MOBIHeader(self.records[kf8i], kf8i)
+            try:
+                rec = self.records[kf8i-1]
+            except IndexError:
+                pass
+            else:
+                if rec.raw == b'BOUNDARY':
+                    self.kf8_type = 'joint'
+                    mh8 = MOBIHeader(self.records[kf8i], kf8i)
         self.mobi8_header = mh8
 
         if 'huff' in self.mobi_header.compression.lower():
diff --git a/src/calibre/ebooks/mobi/reader/headers.py b/src/calibre/ebooks/mobi/reader/headers.py
index 06d349d5de..571817f230 100644
--- a/src/calibre/ebooks/mobi/reader/headers.py
+++ b/src/calibre/ebooks/mobi/reader/headers.py
@@ -75,6 +75,8 @@ class EXTHHeader(object): # {{{
                 self.mi.author_sort = au.strip()
         elif idx == 101:
             self.mi.publisher = content.decode(codec, 'ignore').strip()
+            if self.mi.publisher in {'Unknown', _('Unknown')}:
+                self.mi.publisher = None
         elif idx == 103:
             self.mi.comments  = content.decode(codec, 'ignore')
         elif idx == 104:
@@ -98,6 +100,8 @@ class EXTHHeader(object): # {{{
             self.start_offset, = struct.unpack(b'>L', content)
         elif idx == 121:
             self.kf8_header, = struct.unpack(b'>L', content)
+            if self.kf8_header == NULL_INDEX:
+                self.kf8_header = None
         #else:
         #    print 'unhandled metadata record', idx, repr(content)
 # }}}

From 785c357da2e77ffab55bb20e176129fe295e74c3 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 25 Mar 2012 10:28:06 +0530
Subject: [PATCH 05/19] ...

---
 src/calibre/ebooks/mobi/reader/headers.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/mobi/reader/headers.py b/src/calibre/ebooks/mobi/reader/headers.py
index 571817f230..3ff5d19be7 100644
--- a/src/calibre/ebooks/mobi/reader/headers.py
+++ b/src/calibre/ebooks/mobi/reader/headers.py
@@ -11,7 +11,7 @@ import struct, re, os
 from calibre import replace_entities
 from calibre.utils.date import parse_date
 from calibre.ebooks.mobi import MobiError
-from calibre.ebooks.metadata import MetaInformation
+from calibre.ebooks.metadata import MetaInformation, check_isbn
 from calibre.ebooks.mobi.langcodes import main_language, sub_language, mobi2iana
 
 NULL_INDEX = 0xffffffff
@@ -80,7 +80,9 @@ class EXTHHeader(object): # {{{
         elif idx == 103:
             self.mi.comments  = content.decode(codec, 'ignore')
         elif idx == 104:
-            self.mi.isbn      = content.decode(codec, 'ignore').strip().replace('-', '')
+            raw = check_isbn(content.decode(codec, 'ignore').strip().replace('-', ''))
+            if raw:
+                self.mi.isbn = raw
         elif idx == 105:
             if not self.mi.tags:
                 self.mi.tags = []
@@ -94,6 +96,16 @@ class EXTHHeader(object): # {{{
                 pass
         elif idx == 108:
             self.mi.book_producer = content.decode(codec, 'ignore').strip()
+        elif idx == 112: # dc:source set in some EBSP amazon samples
+            try:
+                content = content.decode(codec).strip()
+                isig = 'urn:isbn:'
+                if content.lower().startswith(isig):
+                    raw = check_isbn(content[len(isig):])
+                    if raw and not self.mi.isbn:
+                        self.mi.isbn = raw
+            except:
+                pass
         elif idx == 113:
             pass # ASIN or UUID
         elif idx == 116:

From 63c0f96795ea0bc6094a7ecf02bdd8e61cfa96a2 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 25 Mar 2012 18:21:16 +0530
Subject: [PATCH 06/19] When a new plugin is added to calibre, add its icon (if
 any) to the second toolbar instead of the main one, by default (the user can
 change this during the plugin add process and afterwards)

---
 src/calibre/gui2/dialogs/choose_plugin_toolbars.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/calibre/gui2/dialogs/choose_plugin_toolbars.py b/src/calibre/gui2/dialogs/choose_plugin_toolbars.py
index ddf8e162e8..fdfa9f4576 100644
--- a/src/calibre/gui2/dialogs/choose_plugin_toolbars.py
+++ b/src/calibre/gui2/dialogs/choose_plugin_toolbars.py
@@ -39,6 +39,9 @@ class ChoosePluginToolbarsDialog(QDialog):
         self._locations_list.setSizePolicy(sizePolicy)
         for key, text in locations:
             self._locations_list.addItem(text)
+            if key in {'toolbar-child',}:
+                self._locations_list.item(self._locations_list.count()-1
+                        ).setSelected(True)
         self._layout.addWidget(self._locations_list)
 
         self._footer_label = QLabel(

From 7085d048370e6391223f00c40cf3bb03cc5d9a39 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 25 Mar 2012 18:23:25 +0530
Subject: [PATCH 07/19] ...

---
 src/calibre/gui2/dialogs/choose_plugin_toolbars.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/gui2/dialogs/choose_plugin_toolbars.py b/src/calibre/gui2/dialogs/choose_plugin_toolbars.py
index fdfa9f4576..605603ec29 100644
--- a/src/calibre/gui2/dialogs/choose_plugin_toolbars.py
+++ b/src/calibre/gui2/dialogs/choose_plugin_toolbars.py
@@ -9,8 +9,8 @@ __docformat__ = 'restructuredtext en'
 __license__   = 'GPL v3'
 
 
-from PyQt4.Qt import QDialog, QVBoxLayout, QLabel, QDialogButtonBox,  \
-            QListWidget, QAbstractItemView
+from PyQt4.Qt import (QDialog, QVBoxLayout, QLabel, QDialogButtonBox,
+            QListWidget, QAbstractItemView)
 from PyQt4 import QtGui
 
 class ChoosePluginToolbarsDialog(QDialog):

From 91643b20a38da877dcfda865299a71050382e3ee Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 25 Mar 2012 19:14:44 +0530
Subject: [PATCH 08/19] Use main toolbar and main toolbar when device connected
 as defaults rather than second toolbar as I dont want to deal with 'I added a
 plugin and now I have all this wasted space' complaints

---
 src/calibre/gui2/dialogs/choose_plugin_toolbars.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/gui2/dialogs/choose_plugin_toolbars.py b/src/calibre/gui2/dialogs/choose_plugin_toolbars.py
index 605603ec29..818eb5b2bc 100644
--- a/src/calibre/gui2/dialogs/choose_plugin_toolbars.py
+++ b/src/calibre/gui2/dialogs/choose_plugin_toolbars.py
@@ -39,7 +39,7 @@ class ChoosePluginToolbarsDialog(QDialog):
         self._locations_list.setSizePolicy(sizePolicy)
         for key, text in locations:
             self._locations_list.addItem(text)
-            if key in {'toolbar-child',}:
+            if key in {'toolbar', 'toolbar-device'}:
                 self._locations_list.item(self._locations_list.count()-1
                         ).setSelected(True)
         self._layout.addWidget(self._locations_list)

From aba3f4686ffe6fe7c285c26edecba9c43e4b5a11 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 25 Mar 2012 21:12:20 +0530
Subject: [PATCH 09/19] Linux installer: Manually presereve the defaults.list
 mimetype association file to workaround buggy xdg-desktop-menu
 implementations in some distros. Fixes #926559 (File associations)

---
 src/calibre/linux.py | 130 ++++++++++++++++++++++++++++---------------
 1 file changed, 84 insertions(+), 46 deletions(-)

diff --git a/src/calibre/linux.py b/src/calibre/linux.py
index 1686f66b22..00f636a30f 100644
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@@ -40,6 +40,46 @@ entry_points = {
                             ],
       }
 
+class PreserveMIMEDefaults(object):
+
+    def __init__(self):
+        self.initial_values = {}
+
+    def __enter__(self):
+        def_data_dirs = '/usr/local/share:/usr/share'
+        paths = os.environ.get('XDG_DATA_DIRS', def_data_dirs)
+        paths = paths.split(':')
+        paths.append(os.environ.get('XDG_DATA_HOME', os.path.expanduser(
+            '~/.local/share')))
+        paths = list(filter(os.path.isdir, paths))
+        if not paths:
+            # Env var had garbage in it, ignore it
+            paths = def_data_dirs.split(':')
+        paths = list(filter(os.path.isdir, paths))
+        self.paths = {os.path.join(x, 'applications/defaults.list') for x in
+                paths}
+        self.initial_values = {}
+        for x in self.paths:
+            try:
+                with open(x, 'rb') as f:
+                    self.initial_values[x] = f.read()
+            except:
+                self.initial_values[x] = None
+
+    def __exit__(self, *args):
+        for path, val in self.initial_values.iteritems():
+            if val is None:
+                try:
+                    os.remove(path)
+                except:
+                    pass
+            elif os.path.exists(path):
+                with open(path, 'r+b') as f:
+                    if f.read() != val:
+                        f.seek(0)
+                        f.truncate()
+                        f.write(val)
+
 # Uninstall script {{{
 UNINSTALL = '''\
 #!{python}
@@ -333,57 +373,55 @@ class PostInstall:
 
     def setup_desktop_integration(self): # {{{
         try:
-
             self.info('Setting up desktop integration...')
 
+            with TemporaryDirectory() as tdir, CurrentDir(tdir), \
+                                PreserveMIMEDefaults():
+                render_img('mimetypes/lrf.png', 'calibre-lrf.png')
+                check_call('xdg-icon-resource install --noupdate --context mimetypes --size 128 calibre-lrf.png application-lrf', shell=True)
+                self.icon_resources.append(('mimetypes', 'application-lrf', '128'))
+                check_call('xdg-icon-resource install --noupdate --context mimetypes --size 128 calibre-lrf.png text-lrs', shell=True)
+                self.icon_resources.append(('mimetypes', 'application-lrs',
+                '128'))
+                render_img('lt.png', 'calibre-gui.png')
+                check_call('xdg-icon-resource install --noupdate --size 128 calibre-gui.png calibre-gui', shell=True)
+                self.icon_resources.append(('apps', 'calibre-gui', '128'))
+                render_img('viewer.png', 'calibre-viewer.png')
+                check_call('xdg-icon-resource install --size 128 calibre-viewer.png calibre-viewer', shell=True)
+                self.icon_resources.append(('apps', 'calibre-viewer', '128'))
 
-            with TemporaryDirectory() as tdir:
-                with CurrentDir(tdir):
-                    render_img('mimetypes/lrf.png', 'calibre-lrf.png')
-                    check_call('xdg-icon-resource install --noupdate --context mimetypes --size 128 calibre-lrf.png application-lrf', shell=True)
-                    self.icon_resources.append(('mimetypes', 'application-lrf', '128'))
-                    check_call('xdg-icon-resource install --noupdate --context mimetypes --size 128 calibre-lrf.png text-lrs', shell=True)
-                    self.icon_resources.append(('mimetypes', 'application-lrs',
-                    '128'))
-                    render_img('lt.png', 'calibre-gui.png')
-                    check_call('xdg-icon-resource install --noupdate --size 128 calibre-gui.png calibre-gui', shell=True)
-                    self.icon_resources.append(('apps', 'calibre-gui', '128'))
-                    render_img('viewer.png', 'calibre-viewer.png')
-                    check_call('xdg-icon-resource install --size 128 calibre-viewer.png calibre-viewer', shell=True)
-                    self.icon_resources.append(('apps', 'calibre-viewer', '128'))
+                mimetypes = set([])
+                for x in all_input_formats():
+                    mt = guess_type('dummy.'+x)[0]
+                    if mt and 'chemical' not in mt and 'ctc-posml' not in mt:
+                        mimetypes.add(mt)
 
-                    mimetypes = set([])
-                    for x in all_input_formats():
-                        mt = guess_type('dummy.'+x)[0]
-                        if mt and 'chemical' not in mt and 'ctc-posml' not in mt:
-                            mimetypes.add(mt)
+                def write_mimetypes(f):
+                    f.write('MimeType=%s;\n'%';'.join(mimetypes))
 
-                    def write_mimetypes(f):
-                        f.write('MimeType=%s;\n'%';'.join(mimetypes))
-
-                    f = open('calibre-lrfviewer.desktop', 'wb')
-                    f.write(VIEWER)
-                    f.close()
-                    f = open('calibre-ebook-viewer.desktop', 'wb')
-                    f.write(EVIEWER)
-                    write_mimetypes(f)
-                    f.close()
-                    f = open('calibre-gui.desktop', 'wb')
-                    f.write(GUI)
-                    write_mimetypes(f)
-                    f.close()
-                    des = ('calibre-gui.desktop', 'calibre-lrfviewer.desktop',
-                            'calibre-ebook-viewer.desktop')
-                    for x in des:
-                        cmd = ['xdg-desktop-menu', 'install', '--noupdate', './'+x]
-                        check_call(' '.join(cmd), shell=True)
-                        self.menu_resources.append(x)
-                    check_call(['xdg-desktop-menu', 'forceupdate'])
-                    f = open('calibre-mimetypes', 'wb')
-                    f.write(MIME)
-                    f.close()
-                    self.mime_resources.append('calibre-mimetypes')
-                    check_call('xdg-mime install ./calibre-mimetypes', shell=True)
+                f = open('calibre-lrfviewer.desktop', 'wb')
+                f.write(VIEWER)
+                f.close()
+                f = open('calibre-ebook-viewer.desktop', 'wb')
+                f.write(EVIEWER)
+                write_mimetypes(f)
+                f.close()
+                f = open('calibre-gui.desktop', 'wb')
+                f.write(GUI)
+                write_mimetypes(f)
+                f.close()
+                des = ('calibre-gui.desktop', 'calibre-lrfviewer.desktop',
+                        'calibre-ebook-viewer.desktop')
+                for x in des:
+                    cmd = ['xdg-desktop-menu', 'install', '--noupdate', './'+x]
+                    check_call(' '.join(cmd), shell=True)
+                    self.menu_resources.append(x)
+                check_call(['xdg-desktop-menu', 'forceupdate'])
+                f = open('calibre-mimetypes', 'wb')
+                f.write(MIME)
+                f.close()
+                self.mime_resources.append('calibre-mimetypes')
+                check_call('xdg-mime install ./calibre-mimetypes', shell=True)
         except Exception:
             if self.opts.fatal_errors:
                 raise

From 23f9bdc7c90133bd7e6c02aaaa577354c92ac9a5 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 26 Mar 2012 13:56:12 +0530
Subject: [PATCH 10/19] KF8 Input: Support KF8 format Amazon samples. Fixes
 #963418 (UnicodeDecodeError invalid start byte when displaying KF8 ebook)

---
 src/calibre/ebooks/mobi/reader/index.py | 47 +++++++++++++++++++++----
 src/calibre/ebooks/mobi/reader/mobi8.py |  6 +++-
 src/calibre/ebooks/mobi/utils.py        |  4 ++-
 3 files changed, 48 insertions(+), 9 deletions(-)

diff --git a/src/calibre/ebooks/mobi/reader/index.py b/src/calibre/ebooks/mobi/reader/index.py
index dd85b5a5cb..1979458b2a 100644
--- a/src/calibre/ebooks/mobi/reader/index.py
+++ b/src/calibre/ebooks/mobi/reader/index.py
@@ -39,10 +39,43 @@ def parse_indx_header(data):
     words = (
             'len', 'nul1', 'type', 'gen', 'start', 'count', 'code',
             'lng', 'total', 'ordt', 'ligt', 'nligt', 'ncncx'
-    )
+    ) + tuple('unknown%d'%i for i in xrange(27)) + ('ocnt', 'oentries',
+            'ordt1', 'ordt2', 'tagx')
     num = len(words)
     values = struct.unpack(bytes('>%dL' % num), data[4:4*(num+1)])
-    return dict(zip(words, values))
+    ans = dict(zip(words, values))
+    ordt1, ordt2 = ans['ordt1'], ans['ordt2']
+    ans['ordt1_raw'], ans['ordt2_raw'] = [], []
+    ans['ordt_map'] = ''
+
+    if ordt1 > 0 and data[ordt1:ordt1+4] == b'ORDT':
+        # I dont know what this is, but using it seems to be unnecessary, so
+        # just leave it as the raw bytestring
+        ans['ordt1_raw'] = data[ordt1+4:ordt1+4+ans['oentries']]
+    if ordt2 > 0 and data[ordt2:ordt2+4] == b'ORDT':
+        ans['ordt2_raw'] = raw = bytearray(data[ordt2+4:ordt2+4+2*ans['oentries']])
+        if ans['code'] == 65002:
+            # This appears to be EBCDIC-UTF (65002) encoded. I can't be
+            # bothered to write a decoder for this (see
+            # http://www.unicode.org/reports/tr16/) Just how stupid is Amazon?
+            # Instead I use a half assed decoder that decodes only the ascii
+            # valid values correctly.  Hopefully these ORDT sections will only
+            # ever be used in SKEL and ELEM indices where the text is pure
+            # ASCII. EBCDIC-UTF and ASCII have the same. Any non ASCII valid
+            # values are mapped to the ? character
+
+            parsed = bytearray(ans['oentries'])
+            for i in xrange(0, 2*ans['oentries'], 2):
+                if 0x20 < raw[i+1] < 0x7f:
+                    parsed[i//2] = raw[i+1]
+                else:
+                    parsed[i//2] = ord(b'?')
+            ans['ordt_map'] = bytes(parsed).decode('ascii')
+        else:
+            ans['ordt_map'] = '?'*ans['oentries']
+
+    return ans
+
 
 class CNCX(object): # {{{
 
@@ -163,7 +196,7 @@ def get_tag_map(control_byte_count, tagx, data, strict=False):
     return ans
 
 def parse_index_record(table, data, control_byte_count, tags, codec,
-        strict=False):
+        ordt_map, strict=False):
     header = parse_indx_header(data)
     idxt_pos = header['start']
     if data[idxt_pos:idxt_pos+4] != b'IDXT':
@@ -184,12 +217,11 @@ def parse_index_record(table, data, control_byte_count, tags, codec,
     for j in xrange(entry_count):
         start, end = idx_positions[j:j+2]
         rec = data[start:end]
-        ident, consumed = decode_string(rec, codec=codec)
+        ident, consumed = decode_string(rec, codec=codec, ordt_map=ordt_map)
         rec = rec[consumed:]
         tag_map = get_tag_map(control_byte_count, tags, rec, strict=strict)
         table[ident] = tag_map
 
-
 def read_index(sections, idx, codec):
     table, cncx = OrderedDict(), CNCX([], codec)
 
@@ -203,12 +235,13 @@ def read_index(sections, idx, codec):
         cncx_records = [x[0] for x in sections[off:off+indx_header['ncncx']]]
         cncx = CNCX(cncx_records, codec)
 
-    tag_section_start = indx_header['len']
+    tag_section_start = indx_header['tagx']
     control_byte_count, tags = parse_tagx_section(data[tag_section_start:])
 
     for i in xrange(idx + 1, idx + 1 + indx_count):
         # Index record
         data = sections[i][0]
-        parse_index_record(table, data, control_byte_count, tags, codec)
+        parse_index_record(table, data, control_byte_count, tags, codec,
+                indx_header['ordt_map'])
     return table, cncx
 
diff --git a/src/calibre/ebooks/mobi/reader/mobi8.py b/src/calibre/ebooks/mobi/reader/mobi8.py
index ec7166ebb0..d2254e00d8 100644
--- a/src/calibre/ebooks/mobi/reader/mobi8.py
+++ b/src/calibre/ebooks/mobi/reader/mobi8.py
@@ -285,7 +285,11 @@ class Mobi8Reader(object):
     def create_guide(self):
         guide = Guide()
         for ref_type, ref_title, fileno in self.guide:
-            elem = self.elems[fileno]
+            try:
+                elem = self.elems[fileno]
+            except IndexError:
+                # Happens for thumbnailstandard in Amazon book samples
+                continue
             fi = self.get_file_info(elem.insert_pos)
             idtext = self.get_id_tag(elem.insert_pos).decode(self.header.codec)
             linktgt = fi.filename
diff --git a/src/calibre/ebooks/mobi/utils.py b/src/calibre/ebooks/mobi/utils.py
index 4c1e52e119..3530736ba0 100644
--- a/src/calibre/ebooks/mobi/utils.py
+++ b/src/calibre/ebooks/mobi/utils.py
@@ -15,10 +15,12 @@ from calibre.ebooks import normalize
 
 IMAGE_MAX_SIZE = 10 * 1024 * 1024
 
-def decode_string(raw, codec='utf-8'):
+def decode_string(raw, codec='utf-8', ordt_map=''):
     length, = struct.unpack(b'>B', raw[0])
     raw = raw[1:1+length]
     consumed = length+1
+    if ordt_map:
+        return ''.join(ordt_map[ord(x)] for x in raw), consumed
     return raw.decode(codec), consumed
 
 def decode_hex_number(raw, codec='utf-8'):

From 3235ca7356e98b969e4c190a49d66e7f651d4bc9 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 26 Mar 2012 14:11:37 +0530
Subject: [PATCH 11/19] ...

---
 src/calibre/ebooks/mobi/reader/index.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/mobi/reader/index.py b/src/calibre/ebooks/mobi/reader/index.py
index 1979458b2a..036b7df073 100644
--- a/src/calibre/ebooks/mobi/reader/index.py
+++ b/src/calibre/ebooks/mobi/reader/index.py
@@ -61,8 +61,7 @@ def parse_indx_header(data):
             # Instead I use a half assed decoder that decodes only the ascii
             # valid values correctly.  Hopefully these ORDT sections will only
             # ever be used in SKEL and ELEM indices where the text is pure
-            # ASCII. EBCDIC-UTF and ASCII have the same. Any non ASCII valid
-            # values are mapped to the ? character
+            # ASCII. Any non ASCII valid values are mapped to the ? character.
 
             parsed = bytearray(ans['oentries'])
             for i in xrange(0, 2*ans['oentries'], 2):

From 163f82a046d33e4db10339d00f349ad7f2dd3d85 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 26 Mar 2012 14:17:45 +0530
Subject: [PATCH 12/19] ...

---
 src/calibre/ebooks/mobi/reader/index.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/calibre/ebooks/mobi/reader/index.py b/src/calibre/ebooks/mobi/reader/index.py
index 036b7df073..ab575cc6c9 100644
--- a/src/calibre/ebooks/mobi/reader/index.py
+++ b/src/calibre/ebooks/mobi/reader/index.py
@@ -58,10 +58,10 @@ def parse_indx_header(data):
             # This appears to be EBCDIC-UTF (65002) encoded. I can't be
             # bothered to write a decoder for this (see
             # http://www.unicode.org/reports/tr16/) Just how stupid is Amazon?
-            # Instead I use a half assed decoder that decodes only the ascii
-            # valid values correctly.  Hopefully these ORDT sections will only
-            # ever be used in SKEL and ELEM indices where the text is pure
-            # ASCII. Any non ASCII valid values are mapped to the ? character.
+            # Instead, we use a weird hack that seems to do the trick for all
+            # the books with this type of ORDT record that I have come across.
+            # Basically we try to interpret every second byte as a printable
+            # ascii character. If we cannot, we map to the ? char.
 
             parsed = bytearray(ans['oentries'])
             for i in xrange(0, 2*ans['oentries'], 2):

From 6c31f4f67fd72c8589e75deda04674acc4641bc4 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 26 Mar 2012 14:19:15 +0530
Subject: [PATCH 13/19] ...

---
 src/calibre/ebooks/mobi/reader/index.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/calibre/ebooks/mobi/reader/index.py b/src/calibre/ebooks/mobi/reader/index.py
index ab575cc6c9..19984fdc00 100644
--- a/src/calibre/ebooks/mobi/reader/index.py
+++ b/src/calibre/ebooks/mobi/reader/index.py
@@ -65,10 +65,7 @@ def parse_indx_header(data):
 
             parsed = bytearray(ans['oentries'])
             for i in xrange(0, 2*ans['oentries'], 2):
-                if 0x20 < raw[i+1] < 0x7f:
-                    parsed[i//2] = raw[i+1]
-                else:
-                    parsed[i//2] = ord(b'?')
+                parsed[i//2] = raw[i+1] if 0x20 < raw[i+1] < 0x7f else ord(b'?')
             ans['ordt_map'] = bytes(parsed).decode('ascii')
         else:
             ans['ordt_map'] = '?'*ans['oentries']

From 5822d6b21b53a5280598d05006b6cbaf778b0a47 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 26 Mar 2012 14:22:09 +0530
Subject: [PATCH 14/19] ...

---
 src/calibre/ebooks/mobi/reader/index.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/calibre/ebooks/mobi/reader/index.py b/src/calibre/ebooks/mobi/reader/index.py
index 19984fdc00..d8a88227c8 100644
--- a/src/calibre/ebooks/mobi/reader/index.py
+++ b/src/calibre/ebooks/mobi/reader/index.py
@@ -60,6 +60,8 @@ def parse_indx_header(data):
             # http://www.unicode.org/reports/tr16/) Just how stupid is Amazon?
             # Instead, we use a weird hack that seems to do the trick for all
             # the books with this type of ORDT record that I have come across.
+            # Some EBSP book samples in KF8 format from Amazon have this type
+            # of encoding.
             # Basically we try to interpret every second byte as a printable
             # ascii character. If we cannot, we map to the ? char.
 

From fc15737d4f1ab66f3a7fb9f46e7a11a82f73a189 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 26 Mar 2012 23:29:15 +0530
Subject: [PATCH 15/19] inspect-mobi: Extract resources from KF8 files

---
 src/calibre/ebooks/mobi/debug/mobi8.py | 56 ++++++++++++++++++++++----
 1 file changed, 48 insertions(+), 8 deletions(-)

diff --git a/src/calibre/ebooks/mobi/debug/mobi8.py b/src/calibre/ebooks/mobi/debug/mobi8.py
index e4a92ee95c..20fd419e29 100644
--- a/src/calibre/ebooks/mobi/debug/mobi8.py
+++ b/src/calibre/ebooks/mobi/debug/mobi8.py
@@ -7,9 +7,10 @@ __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
-import sys, os
+import sys, os, imghdr
 
 from calibre.ebooks.mobi.debug.headers import TextRecord
+from calibre.ebooks.mobi.utils import read_font_record
 
 class MOBIFile(object):
 
@@ -30,6 +31,7 @@ class MOBIFile(object):
                 first_text_record+offset+h8.number_of_text_records])]
 
         self.raw_text = b''.join(r.raw for r in self.text_records)
+        self.extract_resources()
 
     def print_header(self, f=sys.stdout):
         print (str(self.mf.palmdb).encode('utf-8'), file=f)
@@ -41,6 +43,42 @@ class MOBIFile(object):
         print (file=f)
         print (str(self.mf.mobi8_header).encode('utf-8'), file=f)
 
+    def extract_resources(self):
+        self.resource_map = []
+        known_types = {b'FLIS', b'FCIS', b'SRCS',
+                    b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP',
+                    b'AUDI', b'VIDE'}
+
+        for i, rec in enumerate(self.resource_records):
+            sig = rec.raw[:4]
+            payload = rec.raw
+            ext = 'dat'
+            prefix = 'binary'
+            suffix = ''
+            if sig in {b'HUFF', b'CDIC', b'INDX'}: continue
+            # TODO: Ignore CNCX records as well
+            if sig == b'FONT':
+                font = read_font_record(rec.raw)
+                if font['err']:
+                    raise ValueError('Failed to read font record: %s Headers: %s'%(
+                        font['err'], font['headers']))
+                payload = (font['font_data'] if font['font_data'] else
+                        font['raw_data'])
+                prefix, ext = 'fonts', font['ext']
+            elif sig not in known_types:
+                q = imghdr.what(None, rec.raw)
+                if q:
+                    prefix, ext = 'images', q
+
+            if prefix == 'binary':
+                if sig == b'\xe9\x8e\r\n':
+                    suffix = '-EOF'
+                elif sig in known_types:
+                    suffix = '-' + sig.decode('ascii')
+
+            self.resource_map.append(('%s/%06d%s.%s'%(prefix, i, suffix, ext),
+                payload))
+
 
 def inspect_mobi(mobi_file, ddir):
     f = MOBIFile(mobi_file)
@@ -51,12 +89,14 @@ def inspect_mobi(mobi_file, ddir):
     with open(alltext, 'wb') as of:
         of.write(f.raw_text)
 
-    for tdir, attr in [('text_records', 'text_records'), ('images',
-        'image_records'), ('binary', 'binary_records'), ('font',
-            'font_records')]:
-        tdir = os.path.join(ddir, tdir)
-        os.mkdir(tdir)
-        for rec in getattr(f, attr, []):
-            rec.dump(tdir)
+    for x in ('text_records', 'images', 'fonts', 'binary'):
+        os.mkdir(os.path.join(ddir, x))
+
+    for rec in f.text_records:
+        rec.dump(os.path.join(ddir, 'text_records'))
+
+    for href, payload in f.resource_map:
+        with open(os.path.join(ddir, href), 'wb') as f:
+            f.write(payload)
 
 

From a4e1bd8ab33dfa7247ba936b1d774218bf1f1480 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 27 Mar 2012 09:46:29 +0530
Subject: [PATCH 16/19] PDF Output: Fix margin specifications not being applied

---
 setup/installer/__init__.py                |  2 +-
 src/calibre/ebooks/oeb/display/__init__.py | 11 ++++
 src/calibre/ebooks/oeb/display/webview.py  | 59 ++++++++++++++++++++++
 src/calibre/ebooks/pdf/writer.py           | 14 +++--
 src/calibre/gui2/viewer/documentview.py    | 51 ++++---------------
 5 files changed, 91 insertions(+), 46 deletions(-)
 create mode 100644 src/calibre/ebooks/oeb/display/__init__.py
 create mode 100644 src/calibre/ebooks/oeb/display/webview.py

diff --git a/setup/installer/__init__.py b/setup/installer/__init__.py
index d0a6cd6fa3..8374f93e38 100644
--- a/setup/installer/__init__.py
+++ b/setup/installer/__init__.py
@@ -48,7 +48,7 @@ class Push(Command):
         threads = []
         for host in (
             r'Owner@winxp:/cygdrive/c/Documents\ and\ Settings/Owner/calibre',
-            'kovid@leopard_test:calibre',
+            'kovid@ox:calibre',
             r'kovid@win7:/cygdrive/c/Users/kovid/calibre',
             ):
             rcmd = BASE_RSYNC + EXCLUDES + ['.', host]
diff --git a/src/calibre/ebooks/oeb/display/__init__.py b/src/calibre/ebooks/oeb/display/__init__.py
new file mode 100644
index 0000000000..dd9615356c
--- /dev/null
+++ b/src/calibre/ebooks/oeb/display/__init__.py
@@ -0,0 +1,11 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+
diff --git a/src/calibre/ebooks/oeb/display/webview.py b/src/calibre/ebooks/oeb/display/webview.py
new file mode 100644
index 0000000000..efcfe0346c
--- /dev/null
+++ b/src/calibre/ebooks/oeb/display/webview.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import re
+
+from calibre import guess_type
+
+class EntityDeclarationProcessor(object): # {{{
+
+    def __init__(self, html):
+        self.declared_entities = {}
+        for match in re.finditer(r'<!\s*ENTITY\s+([^>]+)>', html):
+            tokens = match.group(1).split()
+            if len(tokens) > 1:
+                self.declared_entities[tokens[0].strip()] = tokens[1].strip().replace('"', '')
+        self.processed_html = html
+        for key, val in self.declared_entities.iteritems():
+            self.processed_html = self.processed_html.replace('&%s;'%key, val)
+# }}}
+
+def self_closing_sub(match):
+    tag = match.group(1)
+    if tag.lower().strip() == 'br':
+        return match.group()
+    return '<%s %s></%s>'%(match.group(1), match.group(2), match.group(1))
+
+def load_html(path, view, codec='utf-8', mime_type=None,
+        pre_load_callback=lambda x:None):
+    from PyQt4.Qt import QUrl, QByteArray
+    if mime_type is None:
+        mime_type = guess_type(path)[0]
+    with open(path, 'rb') as f:
+        html = f.read().decode(codec, 'replace')
+
+    html = EntityDeclarationProcessor(html).processed_html
+    has_svg = re.search(r'<[:a-zA-Z]*svg', html) is not None
+    if 'xhtml' in mime_type:
+        self_closing_pat = re.compile(r'<([a-z1-6]+)\s+([^>]+)/>',
+                re.IGNORECASE)
+        html = self_closing_pat.sub(self_closing_sub, html)
+
+    html = re.sub(ur'<\s*title\s*/\s*>', u'', html, flags=re.IGNORECASE)
+    loading_url = QUrl.fromLocalFile(path)
+    pre_load_callback(loading_url)
+
+    if has_svg:
+        view.setContent(QByteArray(html.encode(codec)), mime_type,
+                loading_url)
+    else:
+        view.setHtml(html, loading_url)
+
+
+
diff --git a/src/calibre/ebooks/pdf/writer.py b/src/calibre/ebooks/pdf/writer.py
index 2c2e6a2f0e..fa2f5ebcfa 100644
--- a/src/calibre/ebooks/pdf/writer.py
+++ b/src/calibre/ebooks/pdf/writer.py
@@ -18,10 +18,11 @@ from calibre.ebooks.pdf.pageoptions import unit, paper_size, \
 from calibre.ebooks.metadata import authors_to_string
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre import __appname__, __version__, fit_image
+from calibre.ebooks.oeb.display.webview import load_html
 
 from PyQt4 import QtCore
-from PyQt4.Qt import QUrl, QEventLoop, QObject, \
-    QPrinter, QMetaObject, QSizeF, Qt, QPainter, QPixmap
+from PyQt4.Qt import (QEventLoop, QObject,
+    QPrinter, QMetaObject, QSizeF, Qt, QPainter, QPixmap)
 from PyQt4.QtWebKit import QWebView
 
 from pyPdf import PdfFileWriter, PdfFileReader
@@ -70,7 +71,7 @@ def get_pdf_printer(opts, for_comic=False):
                 opts.margin_right, opts.margin_bottom, QPrinter.Point)
     printer.setOrientation(orientation(opts.orientation))
     printer.setOutputFormat(QPrinter.PdfFormat)
-    printer.setFullPage(True)
+    printer.setFullPage(for_comic)
     return printer
 
 def get_printer_page_size(opts, for_comic=False):
@@ -156,8 +157,7 @@ class PDFWriter(QObject): # {{{
         self.combine_queue.append(os.path.join(self.tmp_path, '%i.pdf' % (len(self.combine_queue) + 1)))
 
         self.logger.debug('Processing %s...' % item)
-
-        self.view.load(QUrl.fromLocalFile(item))
+        load_html(item, self.view)
 
     def _render_html(self, ok):
         if ok:
@@ -171,6 +171,10 @@ class PDFWriter(QObject): # {{{
             # previously set on the printer.
             if isosx:
                 printer.setOutputFormat(QPrinter.NativeFormat)
+            self.view.page().mainFrame().evaluateJavaScript('''
+                document.body.style.backgroundColor = "white";
+
+                ''')
             self.view.print_(printer)
             printer.abort()
         else:
diff --git a/src/calibre/gui2/viewer/documentview.py b/src/calibre/gui2/viewer/documentview.py
index b03de237c1..4992510dc4 100644
--- a/src/calibre/gui2/viewer/documentview.py
+++ b/src/calibre/gui2/viewer/documentview.py
@@ -4,14 +4,14 @@ __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 
 # Imports {{{
-import os, math, re, glob, sys, zipfile
+import os, math, glob, sys, zipfile
 from base64 import b64encode
 from functools import partial
 
 from PyQt4.Qt import (QSize, QSizePolicy, QUrl, SIGNAL, Qt,
                      QPainter, QPalette, QBrush, QFontDatabase, QDialog,
                      QColor, QPoint, QImage, QRegion, QVariant, QIcon,
-                     QFont, pyqtSignature, QAction, QByteArray, QMenu,
+                     QFont, pyqtSignature, QAction, QMenu,
                      pyqtSignal, QSwipeGesture, QApplication)
 from PyQt4.QtWebKit import QWebPage, QWebView, QWebSettings
 
@@ -21,10 +21,11 @@ from calibre.gui2.viewer.config_ui import Ui_Dialog
 from calibre.gui2.viewer.flip import SlideFlip
 from calibre.gui2.shortcuts import Shortcuts, ShortcutConfig
 from calibre.constants import iswindows
-from calibre import prints, guess_type
+from calibre import prints
 from calibre.gui2.viewer.keys import SHORTCUTS
 from calibre.gui2.viewer.javascript import JavaScriptLoader
 from calibre.gui2.viewer.position import PagePosition
+from calibre.ebooks.oeb.display.webview import load_html
 
 # }}}
 
@@ -474,19 +475,6 @@ class Document(QWebPage): # {{{
 
 # }}}
 
-class EntityDeclarationProcessor(object): # {{{
-
-    def __init__(self, html):
-        self.declared_entities = {}
-        for match in re.finditer(r'<!\s*ENTITY\s+([^>]+)>', html):
-            tokens = match.group(1).split()
-            if len(tokens) > 1:
-                self.declared_entities[tokens[0].strip()] = tokens[1].strip().replace('"', '')
-        self.processed_html = html
-        for key, val in self.declared_entities.iteritems():
-            self.processed_html = self.processed_html.replace('&%s;'%key, val)
-# }}}
-
 class DocumentView(QWebView): # {{{
 
     magnification_changed = pyqtSignal(object)
@@ -497,8 +485,6 @@ class DocumentView(QWebView): # {{{
         self.is_auto_repeat_event = False
         self.debug_javascript = debug_javascript
         self.shortcuts =  Shortcuts(SHORTCUTS, 'shortcuts/viewer')
-        self.self_closing_pat = re.compile(r'<([a-z1-6]+)\s+([^>]+)/>',
-                re.IGNORECASE)
         self.setSizePolicy(QSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding))
         self._size_hint = QSize(510, 680)
         self.initial_pos = 0.0
@@ -689,31 +675,16 @@ class DocumentView(QWebView): # {{{
     def path(self):
         return os.path.abspath(unicode(self.url().toLocalFile()))
 
-    def self_closing_sub(self, match):
-        tag = match.group(1)
-        if tag.lower().strip() == 'br':
-            return match.group()
-        return '<%s %s></%s>'%(match.group(1), match.group(2), match.group(1))
-
     def load_path(self, path, pos=0.0):
         self.initial_pos = pos
-        mt = getattr(path, 'mime_type', None)
-        if mt is None:
-            mt = guess_type(path)[0]
-        html = open(path, 'rb').read().decode(path.encoding, 'replace')
-        html = EntityDeclarationProcessor(html).processed_html
-        has_svg = re.search(r'<[:a-zA-Z]*svg', html) is not None
 
-        if 'xhtml' in mt:
-            html = self.self_closing_pat.sub(self.self_closing_sub, html)
-        if self.manager is not None:
-            self.manager.load_started()
-        self.loading_url = QUrl.fromLocalFile(path)
-        html = re.sub(ur'<\s*title\s*/\s*>', u'', html, flags=re.IGNORECASE)
-        if has_svg:
-            self.setContent(QByteArray(html.encode(path.encoding)), mt, QUrl.fromLocalFile(path))
-        else:
-            self.setHtml(html, self.loading_url)
+        def callback(lu):
+            self.loading_url = lu
+            if self.manager is not None:
+                self.manager.load_started()
+
+        load_html(path, self, codec=path.encoding, mime_type=getattr(path,
+            'mime_type', None), pre_load_callback=callback)
         self.turn_off_internal_scrollbars()
 
     def initialize_scrollbar(self):

From 85cf39ba16043e07de20b4ace6ca561244049ee9 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 27 Mar 2012 10:07:24 +0530
Subject: [PATCH 17/19] Fix unable to clear username/password in Fetch news
 dialog

---
 src/calibre/gui2/dialogs/scheduler.py       | 8 +++++---
 src/calibre/web/feeds/recipes/collection.py | 8 ++++++++
 src/calibre/web/feeds/recipes/model.py      | 3 +++
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/src/calibre/gui2/dialogs/scheduler.py b/src/calibre/gui2/dialogs/scheduler.py
index d57d514d54..64e3c2e0a3 100644
--- a/src/calibre/gui2/dialogs/scheduler.py
+++ b/src/calibre/gui2/dialogs/scheduler.py
@@ -11,9 +11,9 @@ from datetime import timedelta
 import calendar, textwrap
 from collections import OrderedDict
 
-from PyQt4.Qt import QDialog, Qt, QTime, QObject, QMenu, QHBoxLayout, \
-        QAction, QIcon, QMutex, QTimer, pyqtSignal, QWidget, QGridLayout, \
-        QCheckBox, QTimeEdit, QLabel, QLineEdit, QDoubleSpinBox
+from PyQt4.Qt import (QDialog, Qt, QTime, QObject, QMenu, QHBoxLayout,
+        QAction, QIcon, QMutex, QTimer, pyqtSignal, QWidget, QGridLayout,
+        QCheckBox, QTimeEdit, QLabel, QLineEdit, QDoubleSpinBox)
 
 from calibre.gui2.dialogs.scheduler_ui import Ui_Dialog
 from calibre.gui2 import config as gconf, error_dialog
@@ -317,6 +317,8 @@ class SchedulerDialog(QDialog, Ui_Dialog):
                     return False
             if un or pw:
                 self.recipe_model.set_account_info(urn, un, pw)
+            else:
+                self.recipe_model.clear_account_info(urn)
 
         if self.schedule.isChecked():
             schedule_type, schedule = \
diff --git a/src/calibre/web/feeds/recipes/collection.py b/src/calibre/web/feeds/recipes/collection.py
index 3a25485955..6ab5764302 100644
--- a/src/calibre/web/feeds/recipes/collection.py
+++ b/src/calibre/web/feeds/recipes/collection.py
@@ -437,6 +437,14 @@ class SchedulerConfig(object):
                 if x.get('id', False) == urn:
                     return x.get('username', ''), x.get('password', '')
 
+    def clear_account_info(self, urn):
+        with self.lock:
+            for x in self.iter_accounts():
+                 if x.get('id', False) == urn:
+                     x.getparent().remove(x)
+                     self.write_scheduler_file()
+                     break
+
     def get_customize_info(self, urn):
         keep_issues = 0
         add_title_tag = True
diff --git a/src/calibre/web/feeds/recipes/model.py b/src/calibre/web/feeds/recipes/model.py
index 40d246b450..60b74585af 100644
--- a/src/calibre/web/feeds/recipes/model.py
+++ b/src/calibre/web/feeds/recipes/model.py
@@ -354,6 +354,9 @@ class RecipeModel(QAbstractItemModel, SearchQueryParser):
     def set_account_info(self, urn, un, pw):
         self.scheduler_config.set_account_info(urn, un, pw)
 
+    def clear_account_info(self, urn):
+        self.scheduler_config.clear_account_info(urn)
+
     def get_account_info(self, urn):
         return self.scheduler_config.get_account_info(urn)
 

From 5ff6fc8ffe82a8b93001d58c3cbcd56500d6b445 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 27 Mar 2012 21:32:57 +0530
Subject: [PATCH 18/19] Driver for Samsung Galaxy Plus GT-I9001

---
 src/calibre/devices/android/driver.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py
index 6ef1e528fe..ce5a076fdf 100644
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@@ -187,7 +187,7 @@ class ANDROID(USBMS):
             'UMS', '.K080', 'P990', 'LTE', 'MB853', 'GT-S5660_CARD', 'A107',
             'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET', 'RK29_SDK', 'MB855',
             'XT910', 'BOOK_A10', 'USB_2.0_DRIVER', 'I9100T', 'P999DW',
-            'KTABLET_PC', 'INGENIC']
+            'KTABLET_PC', 'INGENIC', 'GT-I9001_CARD']
     WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
             'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
             'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
@@ -195,7 +195,7 @@ class ANDROID(USBMS):
             'ANDROID_MID', 'P990_SD_CARD', '.K080', 'LTE_CARD', 'MB853',
             'A1-07___C0541A4F', 'XT912', 'MB855', 'XT910', 'BOOK_A10_CARD',
             'USB_2.0_DRIVER', 'I9100T', 'P999DW_SD_CARD', 'KTABLET_PC',
-            'FILE-CD_GADGET']
+            'FILE-CD_GADGET', 'GT-I9001_CARD']
 
     OSX_MAIN_MEM = 'Android Device Main Memory'
 

From 889c3baefb4ff9d21ce35c06c0bef1443bd024af Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 28 Mar 2012 01:16:28 +0530
Subject: [PATCH 19/19] ...

---
 recipes/le_monde.recipe | 95 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 84 insertions(+), 11 deletions(-)

diff --git a/recipes/le_monde.recipe b/recipes/le_monde.recipe
index 6c7f15cca7..afc19e4d86 100644
--- a/recipes/le_monde.recipe
+++ b/recipes/le_monde.recipe
@@ -1,8 +1,9 @@
 __license__   = 'GPL v3'
-__copyright__ = '2011'
+__copyright__ = '2012'
 '''
 lemonde.fr
 '''
+import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 
 class LeMonde(BasicNewsRecipe):
@@ -24,7 +25,7 @@ class LeMonde(BasicNewsRecipe):
                     .ariane{font-size:xx-small;}
                     .source{font-size:xx-small;}
                     #.href{font-size:xx-small;}
-                    .LM_caption{color:#666666; font-size:x-small;}
+                    #.figcaption style{color:#666666; font-size:x-small;}
                     #.main-article-info{font-family:Arial,Helvetica,sans-serif;}
                     #full-contents{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
                     #match-stats-summary{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
@@ -40,8 +41,88 @@ class LeMonde(BasicNewsRecipe):
 
     remove_empty_feeds = True
 
-    auto_cleanup = True
+    filterDuplicates = True
 
+    def preprocess_html(self, soup):
+        for alink in soup.findAll('a'):
+            if alink.string is not None:
+               tstr = alink.string
+               alink.replaceWith(tstr)
+        return soup
+
+    preprocess_regexps = [
+        (re.compile(r'([0-9])%'), lambda m: m.group(1) + '&nbsp;%'),
+        (re.compile(r'([0-9])([0-9])([0-9]) ([0-9])([0-9])([0-9])'), lambda m: m.group(1) + m.group(2) + m.group(3) + '&nbsp;' + m.group(4) + m.group(5) + m.group(6)),
+        (re.compile(r'([0-9]) ([0-9])([0-9])([0-9])'), lambda m: m.group(1) + '&nbsp;' + m.group(2) + m.group(3) + m.group(4)),
+        (re.compile(r'<span>'), lambda match: ' <span>'),
+        (re.compile(r'\("'), lambda match: '(&laquo;&nbsp;'),
+        (re.compile(r'"\)'), lambda match: '&nbsp;&raquo;)'),
+        (re.compile(r'&ldquo;'), lambda match: '(&laquo;&nbsp;'),
+        (re.compile(r'&rdquo;'), lambda match: '&nbsp;&raquo;)'),
+        (re.compile(r'>\''), lambda match: '>&lsquo;'),
+        (re.compile(r' \''), lambda match: ' &lsquo;'),
+        (re.compile(r' &quot;'), lambda match: ' &laquo;&nbsp;'),
+        (re.compile(r'>&quot;'), lambda match: '>&laquo;&nbsp;'),
+        (re.compile(r'&quot;<'), lambda match: '&nbsp;&raquo;<'),
+        (re.compile(r'&quot; '), lambda match: '&nbsp;&raquo; '),
+        (re.compile(r'&quot;,'), lambda match: '&nbsp;&raquo;,'),
+        (re.compile(r'\''), lambda match: '&rsquo;'),
+        (re.compile(r'"<em>'), lambda match: '<em>&laquo;&nbsp;'),
+        (re.compile(r'"<em>"</em><em>'), lambda match: '<em>&laquo;&nbsp;'),
+        (re.compile(r'"<a href='), lambda match: '&laquo;&nbsp;<a href='),
+        (re.compile(r'</em>"'), lambda match: '&nbsp;&raquo;</em>'),
+        (re.compile(r'</a>"'), lambda match: '&nbsp;&raquo;</a>'),
+        (re.compile(r'"</'), lambda match: '&nbsp;&raquo;</'),
+        (re.compile(r'>"'), lambda match: '>&laquo;&nbsp;'),
+        (re.compile(r'"<'), lambda match: '&nbsp;&raquo;<'),
+        (re.compile(r'&rsquo;"'), lambda match: '&rsquo;«&nbsp;'),
+        (re.compile(r' "'), lambda match: ' &laquo;&nbsp;'),
+        (re.compile(r'" '), lambda match: '&nbsp;&raquo; '),
+        (re.compile(r'"\.'), lambda match: '&nbsp;&raquo;.'),
+        (re.compile(r'",'), lambda match: '&nbsp;&raquo;,'),
+        (re.compile(r'"\?'), lambda match: '&nbsp;&raquo;?'),
+        (re.compile(r'":'), lambda match: '&nbsp;&raquo;:'),
+        (re.compile(r'";'), lambda match: '&nbsp;&raquo;;'),
+        (re.compile(r'"\!'), lambda match: '&nbsp;&raquo;!'),
+        (re.compile(r' :'), lambda match: '&nbsp;:'),
+        (re.compile(r' ;'), lambda match: '&nbsp;;'),
+        (re.compile(r' \?'), lambda match: '&nbsp;?'),
+        (re.compile(r' \!'), lambda match: '&nbsp;!'),
+        (re.compile(r'\s»'), lambda match: '&nbsp;»'),
+        (re.compile(r'«\s'), lambda match: '«&nbsp;'),
+        (re.compile(r' %'), lambda match: '&nbsp;%'),
+        (re.compile(r'\.jpg&nbsp;&raquo; width='), lambda match: '.jpg'),
+        (re.compile(r'\.png&nbsp;&raquo; width='), lambda match: '.png'),
+        (re.compile(r' &ndash; '), lambda match: '&nbsp;&ndash; '),
+        (re.compile(r'figcaption style="display:none"'), lambda match: 'figcaption'),
+        (re.compile(r' – '), lambda match: '&nbsp;&ndash; '),
+        (re.compile(r' - '), lambda match: '&nbsp;&ndash; '),
+        (re.compile(r' -,'), lambda match: '&nbsp;&ndash;,'),
+        (re.compile(r'&raquo;:'), lambda match: '&raquo;&nbsp;:'),
+        ]
+
+
+    keep_only_tags    = [
+                       dict(name='div', attrs={'class':['global']})
+                        ]
+
+    remove_tags = [
+                       dict(name='div', attrs={'class':['bloc_base meme_sujet']}),
+                       dict(name='p', attrs={'class':['lire']})
+                        ]
+
+    remove_tags_after = [dict(id='fb-like')]
+
+    def get_article_url(self, article):
+          url = article.get('guid', None)
+          if '/chat/' in url or '.blog' in url or '/video/' in url or '/sport/' in url or '/portfolio/' in url or '/visuel/' in url :
+              url = None
+          return url
+
+#    def get_article_url(self, article):
+#        link = article.get('link')
+#        if 'blog' not in link and ('chat' not in link):
+#             return link
 
     feeds          = [
                       ('A la une', 'http://www.lemonde.fr/rss/une.xml'),
@@ -66,11 +147,3 @@ class LeMonde(BasicNewsRecipe):
            cover_url = link_item.img['src']
 
         return cover_url
-
-    def get_article_url(self, article):
-        url = article.get('guid', None)
-        if '/chat/' in url or '.blog' in url or '/video/' in url or '/sport/' in url or '/portfolio/' in url or '/visuel/' in url :
-            url = None
-        return url
-
-