From ff74cd460d30d9ccf97bc1abff3aa519a681fca1 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Thu, 20 Aug 2009 21:05:47 -0400
Subject: [PATCH 001/120] Device drivers use file path template.

---
 src/calibre/devices/cybookg3/driver.py | 15 ++++---
 src/calibre/devices/jetbook/driver.py  | 23 +++++-----
 src/calibre/devices/prs500/books.py    | 14 +++---
 src/calibre/devices/prs500/driver.py   |  6 +--
 src/calibre/devices/prs505/books.py    | 15 +++----
 src/calibre/devices/prs505/driver.py   | 20 +++++----
 src/calibre/devices/usbms/device.py    | 50 +++++----------------
 src/calibre/devices/usbms/driver.py    | 14 +++---
 src/calibre/gui2/device.py             | 60 ++++++++++----------------
 9 files changed, 86 insertions(+), 131 deletions(-)
diff --git a/src/calibre/devices/cybookg3/driver.py b/src/calibre/devices/cybookg3/driver.py
index 670438f94d..de5e96d053 100644
--- a/src/calibre/devices/cybookg3/driver.py
+++ b/src/calibre/devices/cybookg3/driver.py
@@ -45,24 +45,25 @@ class CYBOOKG3(USBMS):
     DELETE_EXTS = ['.mbp', '.dat', '_6090.t2b']
     SUPPORTS_SUB_DIRS = True
 
-    def upload_books(self, files, names, on_card=None, end_session=True,
-                     metadata=None):
+    def upload_books(self, files, metadatas, ids, on_card=None,
+                     end_session=True):
 
         path = self._sanity_check(on_card, files)
 
         paths = []
-        names = iter(names)
-        metadata = iter(metadata)
+        metadatas = iter(metadatas)
+        ids = iter(ids)
 
         for i, infile in enumerate(files):
-            mdata, fname = metadata.next(), names.next()
-            filepath = self.create_upload_path(path, mdata, fname)
+            mdata, id = metadatas.next(), ids.next()
+            ext = os.path.splitext(infile)[1]
+            filepath = self.create_upload_path(path, mdata, ext, id)
             paths.append(filepath)
 
             self.put_file(infile, filepath, replace_file=True)
 
             coverdata = None
-            cover = mdata.get('cover', None)
+            cover = mdata.cover
             if cover:
                 coverdata = cover[2]
 
diff --git a/src/calibre/devices/jetbook/driver.py b/src/calibre/devices/jetbook/driver.py
index 8fcbe306a2..949438ae6f 100644
--- a/src/calibre/devices/jetbook/driver.py
+++ b/src/calibre/devices/jetbook/driver.py
@@ -15,7 +15,7 @@ from itertools import cycle
 
 from calibre.devices.usbms.driver import USBMS
 from calibre.utils.filenames import ascii_filename as sanitize
-from calibre.ebooks.metadata import string_to_authors
+from calibre.ebooks.metadata import authors_to_string, string_to_authors
 
 class JETBOOK(USBMS):
     name           = 'Ectaco JetBook Device Interface'
@@ -50,23 +50,22 @@ class JETBOOK(USBMS):
             r'(?P<authors>.+)#(?P<title>.+)'
             )
 
-    def upload_books(self, files, names, on_card=False, end_session=True,
-                    metadata=None):
-
+    def upload_books(self, files, metadatas, ids, on_card=None,
+                     end_session=True):
         path = self._sanity_check(on_card, files)
 
         paths = []
-        names = iter(names)
-        metadata = iter(metadata)
+        metadatas = iter(metadatas)
+        ids = iter(ids)
 
         for i, infile in enumerate(files):
-            mdata, fname = metadata.next(), names.next()
-            path = os.path.dirname(self.create_upload_path(path, mdata, fname))
+            mdata, id = metadatas.next(), ids.next()
+            ext = os.path.splitext(infile)[1]
+            path = self.create_upload_path(path, mdata, ext, id)
 
-            author = sanitize(mdata.get('authors','Unknown')).replace(' ', '_')
-            title = sanitize(mdata.get('title', 'Unknown')).replace(' ', '_')
-            fileext = os.path.splitext(os.path.basename(fname))[1]
-            fname = '%s#%s%s' % (author, title, fileext)
+            author = sanitize(authors_to_string(mdata.authors)).replace(' ', '_')
+            title = sanitize(mdata.title).replace(' ', '_')
+            fname = '%s#%s%s' % (author, title, ext)
 
             filepath = os.path.join(path, fname)
             paths.append(filepath)
diff --git a/src/calibre/devices/prs500/books.py b/src/calibre/devices/prs500/books.py
index 5eb8d7f011..770c48caf9 100644
--- a/src/calibre/devices/prs500/books.py
+++ b/src/calibre/devices/prs500/books.py
@@ -9,6 +9,7 @@ from base64 import b64decode as decode
 from base64 import b64encode as encode
 import re
 
+from calibre.ebooks.metadata import authors_to_string
 from calibre.devices.interface import BookList as _BookList
 from calibre.devices import strftime, strptime
 
@@ -262,9 +263,9 @@ class BookList(_BookList):
         cid = self.max_id()+1
         sourceid = str(self[0].sourceid) if len(self) else "1"
         attrs = {
-                 "title"  : info["title"],
-                 'titleSorter' : sortable_title(info['title']),
-                 "author" : info["authors"] if info['authors'] else 'Unknown', \
+                 "title"  : info.title,
+                 'titleSorter' : sortable_title(info.title),
+                 "author" : authors_to_string(info.authors), \
                  "page":"0", "part":"0", "scale":"0", \
                  "sourceid":sourceid,  "id":str(cid), "date":"", \
                  "mime":mime, "path":name, "size":str(size)
@@ -273,7 +274,7 @@ class BookList(_BookList):
             node.setAttributeNode(self.document.createAttribute(attr))
             node.setAttribute(attr, attrs[attr])
         try:
-            w, h, data = info["cover"]
+            w, h, data = info.cover
         except TypeError:
             w, h, data = None, None, None
 
@@ -290,10 +291,7 @@ class BookList(_BookList):
         book.datetime = ctime
         self.append(book)
         self.set_next_id(cid+1)
-        if self.prefix and info.has_key('tags'): # Playlists only supportted in main memory
-            if info.has_key('tag order'):
-                self.tag_order.update(info['tag order'])
-            self.set_playlists(book.id, info['tags'])
+        self.set_playlists(book.id, info.tags
 
 
     def playlist_by_title(self, title):
diff --git a/src/calibre/devices/prs500/driver.py b/src/calibre/devices/prs500/driver.py
index 4273101273..b6209f75af 100644
--- a/src/calibre/devices/prs500/driver.py
+++ b/src/calibre/devices/prs500/driver.py
@@ -863,14 +863,14 @@ class PRS500(DeviceConfig, DevicePlugin):
             self.upload_book_list(booklists[1], end_session=False)
 
     @safe
-    def upload_books(self, files, names, on_card=False, end_session=True,
-                     metadata=None):
+    def upload_books(self, files, metadatas, ids, on_card=None,
+                     end_session=True):
         card = self.card(end_session=False)
         prefix = card + '/' + self.CARD_PATH_PREFIX +'/' if on_card else '/Data/media/books/'
         if on_card and not self._exists(prefix)[0]:
             self.mkdir(prefix[:-1], False)
         paths, ctimes = [], []
-        names = iter(names)
+        names = iter([m.title for m in metatdatas])
         infiles = [file if hasattr(file, 'read') else open(file, 'rb') for file in files]
         for f in infiles: f.seek(0, 2)
         sizes = [f.tell() for f in infiles]
diff --git a/src/calibre/devices/prs505/books.py b/src/calibre/devices/prs505/books.py
index 6e268e734a..4b8a952816 100644
--- a/src/calibre/devices/prs505/books.py
+++ b/src/calibre/devices/prs505/books.py
@@ -8,7 +8,7 @@ import xml.dom.minidom as dom
 from base64 import b64decode as decode
 from base64 import b64encode as encode
 
-
+from calibre.ebooks.metadata import authors_to_string
 from calibre.devices.interface import BookList as _BookList
 from calibre.devices import strftime as _strftime
 from calibre.devices import strptime
@@ -194,9 +194,9 @@ class BookList(_BookList):
         except:
             sourceid = '1'
         attrs = {
-                 "title"  : info["title"],
-                 'titleSorter' : sortable_title(info['title']),
-                 "author" : info["authors"] if info['authors'] else _('Unknown'),
+                 "title"  : info.title,
+                 'titleSorter' : sortable_title(info.title),
+                 "author" : authors_to_string(info.authors),
                  "page":"0", "part":"0", "scale":"0", \
                  "sourceid":sourceid,  "id":str(cid), "date":"", \
                  "mime":mime, "path":name, "size":str(size)
@@ -205,7 +205,7 @@ class BookList(_BookList):
             node.setAttributeNode(self.document.createAttribute(attr))
             node.setAttribute(attr, attrs[attr])
         try:
-            w, h, data = info["cover"]
+            w, h, data = info.cover
         except TypeError:
             w, h, data = None, None, None
 
@@ -221,10 +221,7 @@ class BookList(_BookList):
         book = Book(node, self.mountpath, [], prefix=self.prefix)
         book.datetime = ctime
         self.append(book)
-        if info.has_key('tags'):
-            if info.has_key('tag order'):
-                self.tag_order.update(info['tag order'])
-            self.set_tags(book, info['tags'])
+        self.set_tags(book, info.tags)
 
     def _delete_book(self, node):
         nid = node.getAttribute('id')
diff --git a/src/calibre/devices/prs505/driver.py b/src/calibre/devices/prs505/driver.py
index d1e1535e36..b4fccd2548 100644
--- a/src/calibre/devices/prs505/driver.py
+++ b/src/calibre/devices/prs505/driver.py
@@ -109,20 +109,22 @@ class PRS505(CLI, Device):
         self.report_progress(1.0, _('Getting list of books on device...'))
         return bl
 
-    def upload_books(self, files, names, on_card=None, end_session=True,
-                     metadata=None):
+    def upload_books(self, files, metadatas, ids, on_card=None,
+                     end_session=True):
 
         path = self._sanity_check(on_card, files)
 
-        paths, ctimes, sizes = [], [], []
-        names = iter(names)
-        metadata = iter(metadata)
-        for i, infile in enumerate(files):
-            mdata, fname = metadata.next(), names.next()
-            filepath = self.create_upload_path(path, mdata, fname)
+        paths = []
+        metadatas = iter(metadatas)
+        ids = iter(ids)
 
+        for i, infile in enumerate(files):
+            mdata, id = metadatas.next(), ids.next()
+            ext = os.path.splitext(infile)[1]
+            filepath = self.create_upload_path(path, mdata, ext, id)
             paths.append(filepath)
-            self.put_file(infile, paths[-1], replace_file=True)
+
+            self.put_file(infile, filepath, replace_file=True)
             ctimes.append(os.path.getctime(paths[-1]))
             sizes.append(os.stat(paths[-1]).st_size)
 
diff --git a/src/calibre/devices/usbms/device.py b/src/calibre/devices/usbms/device.py
index 007d058941..f54e09e92f 100644
--- a/src/calibre/devices/usbms/device.py
+++ b/src/calibre/devices/usbms/device.py
@@ -23,7 +23,7 @@ from calibre.devices.interface import DevicePlugin
 from calibre.devices.errors import DeviceError, FreeSpaceError
 from calibre.devices.usbms.deviceconfig import DeviceConfig
 from calibre import iswindows, islinux, isosx, __appname__
-from calibre.utils.filenames import ascii_filename as sanitize, shorten_components_to
+from calibre.utils.filenames import shorten_components_to
 
 class Device(DeviceConfig, DevicePlugin):
 
@@ -667,46 +667,18 @@ class Device(DeviceConfig, DevicePlugin):
             raise FreeSpaceError(_("There is insufficient free space on the storage card"))
         return path
 
-    def create_upload_path(self, path, mdata, fname):
-        path = os.path.abspath(path)
-        newpath = path
-        extra_components = []
-
-        if self.SUPPORTS_SUB_DIRS and self.settings().use_subdirs:
-            if 'tags' in mdata.keys():
-                for tag in mdata['tags']:
-                    if tag.startswith(_('News')):
-                        extra_components.append('news')
-                        c = sanitize(mdata.get('title', ''))
-                        if c:
-                            extra_components.append(c)
-                        c = sanitize(mdata.get('timestamp', ''))
-                        if c:
-                            extra_components.append(c)
-                        break
-                    elif tag.startswith('/'):
-                        for c in tag.split('/'):
-                            c = sanitize(c)
-                            if not c: continue
-                            extra_components.append(c)
-                        break
-
-            if not extra_components:
-                c = sanitize(mdata.get('authors', _('Unknown')))
-                if c:
-                    extra_components.append(c)
-                c = sanitize(mdata.get('title', _('Unknown')))
-                if c:
-                    extra_components.append(c)
-                    newpath = os.path.join(newpath, c)
-
-        fname = sanitize(fname)
-        extra_components.append(fname)
-        extra_components = [str(x) for x in extra_components]
-        components = shorten_components_to(250 - len(path), extra_components)
-        filepath = os.path.join(path, *components)
+    def create_upload_path(self, root, mdata, ext, id):
+        from calibre.library.save_to_disk import config, get_components
+        opts = config().parse()
+        components = get_components(opts.template, mdata, id, opts.timefmt, 250)
+        components = [str(x) for x in components]
+        components = shorten_components_to(250 - len(root), components)
+        filepath = '%s%s' % (os.path.join(root, *components), ext)
         filedir = os.path.dirname(filepath)
 
+        if not self.SUPPORTS_SUB_DIRS or not self.settings().use_subdirs:
+            filedir = root
+            filepath = os.path.join(root, os.path.basename(filepath))
 
         if not os.path.exists(filedir):
             os.makedirs(filedir)
diff --git a/src/calibre/devices/usbms/driver.py b/src/calibre/devices/usbms/driver.py
index 5650a2f10e..6cfe0ed132 100644
--- a/src/calibre/devices/usbms/driver.py
+++ b/src/calibre/devices/usbms/driver.py
@@ -79,19 +79,19 @@ class USBMS(CLI, Device):
 
         return bl
 
-    def upload_books(self, files, names, on_card=None, end_session=True,
-                     metadata=None):
+    def upload_books(self, files, metadatas, ids, on_card=None,
+                     end_session=True):
 
         path = self._sanity_check(on_card, files)
 
         paths = []
-        names = iter(names)
-        metadata = iter(metadata)
+        metadatas = iter(metadatas)
+        ids = iter(ids)
 
         for i, infile in enumerate(files):
-            mdata, fname = metadata.next(), names.next()
-            filepath = self.create_upload_path(path, mdata, fname)
-
+            mdata, id = metadatas.next(), ids.next()
+            ext = os.path.splitext(infile)[1]
+            filepath = self.create_upload_path(path, mdata, ext, id)
             paths.append(filepath)
 
             self.put_file(infile, filepath, replace_file=True)
diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py
index 3a46352a70..75385008f0 100644
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@@ -214,18 +214,17 @@ class DeviceManager(Thread):
         return self.create_job(self._sync_booklists, done, args=[booklists],
                         description=_('Send metadata to device'))
 
-    def _upload_books(self, files, names, on_card=None, metadata=None):
+    def _upload_books(self, files, metadata, ids, on_card=None):
         '''Upload books to device: '''
-        return self.device.upload_books(files, names, on_card,
-                                        metadata=metadata, end_session=False)
+        return self.device.upload_books(files, metadata, ids, on_card,
+                                        end_session=False)
 
-    def upload_books(self, done, files, names, on_card=None, titles=None,
-                     metadata=None):
-        desc = _('Upload %d books to device')%len(names)
+    def upload_books(self, done, files, metadata, ids, on_card=None, titles=None):
+        desc = _('Upload %d books to device')%len(files)
         if titles:
             desc += u':' + u', '.join(titles)
-        return self.create_job(self._upload_books, done, args=[files, names],
-                kwargs={'on_card':on_card,'metadata':metadata}, description=desc)
+        return self.create_job(self._upload_books, done, args=[files, metadata, ids],
+                kwargs={'on_card':on_card}, description=desc)
 
     def add_books_to_metadata(self, locations, metadata, booklists):
         self.device.add_books_to_metadata(locations, metadata, booklists)
@@ -698,18 +697,18 @@ class DeviceGUI(object):
                 dynamic.set('news_to_be_synced', set([]))
                 return
             metadata = self.library_view.model().get_metadata(ids,
-                    rows_are_ids=True)
+                    rows_are_ids=True, full_metadata=True)[1]
             names = []
             for mi in metadata:
-                prefix = ascii_filename(mi['title'])
+                prefix = ascii_filename(mi.title)
                 if not isinstance(prefix, unicode):
                     prefix = prefix.decode(preferred_encoding, 'replace')
                 prefix = ascii_filename(prefix)
                 names.append('%s_%d%s'%(prefix, id,
                     os.path.splitext(f.name)[1]))
-                cdata = mi['cover']
+                cdata = mi.cover
                 if cdata:
-                    mi['cover'] = self.cover_to_thumbnail(cdata)
+                    mi.cover = self.cover_to_thumbnail(cdata)
             dynamic.set('news_to_be_synced', set([]))
             if config['upload_news_to_device'] and files:
                 remove = ids if \
@@ -718,8 +717,7 @@ class DeviceGUI(object):
                     self.location_view.model().free[1] : 'carda',
                     self.location_view.model().free[2] : 'cardb' }
                 on_card = space.get(sorted(space.keys(), reverse=True)[0], None)
-                self.upload_books(files, names, metadata,
-                        on_card=on_card,
+                self.upload_books(files, metadata, ids, on_card=on_card,
                         memory=[[f.name for f in files], remove])
                 self.status_bar.showMessage(_('Sending news to device.'), 5000)
 
@@ -741,38 +739,28 @@ class DeviceGUI(object):
         else:
             _auto_ids = []
 
-        metadata = self.library_view.model().get_metadata(ids, True)
+        metadata = self.library_view.model().get_metadata(ids, True, full_metadata=True)[1]
         ids = iter(ids)
         for mi in metadata:
-            cdata = mi['cover']
+            cdata = mi.cover
             if cdata:
                 mi['cover'] = self.cover_to_thumbnail(cdata)
         metadata = iter(metadata)
 
         files = [getattr(f, 'name', None) for f in _files]
-        bad, good, gf, names, remove_ids = [], [], [], [], []
+        bad, mdata, gf, fids, remove_ids = [], [], [], [], []
         for f in files:
             mi = metadata.next()
             id = ids.next()
             if f is None:
-                bad.append(mi['title'])
+                bad.append(mi.title)
             else:
                 remove_ids.append(id)
-                good.append(mi)
                 gf.append(f)
-                t = mi['title']
-                if not t:
-                    t = _('Unknown')
-                a = mi['authors']
-                if not a:
-                    a = _('Unknown')
-                prefix = ascii_filename(t+' - '+a)
-                if not isinstance(prefix, unicode):
-                    prefix = prefix.decode(preferred_encoding, 'replace')
-                prefix = ascii_filename(prefix)
-                names.append('%s_%d%s'%(prefix, id, os.path.splitext(f)[1]))
+                mdata.append(mi)
+                fids.append(id)
         remove = remove_ids if delete_from_library else []
-        self.upload_books(gf, names, good, on_card, memory=(_files, remove))
+        self.upload_books(gf, mdata, fids, on_card, memory=(_files, remove))
         self.status_bar.showMessage(_('Sending books to device.'), 5000)
 
         auto = []
@@ -834,17 +822,15 @@ class DeviceGUI(object):
         cp, fs = job.result
         self.location_view.model().update_devices(cp, fs)
 
-    def upload_books(self, files, names, metadata, on_card=None, memory=None):
+    def upload_books(self, files, metadata, ids, on_card=None, memory=None):
         '''
         Upload books to device.
         :param files: List of either paths to files or file like objects
         '''
-        titles = [i['title'] for i in metadata]
+        titles = [i.title for i in metadata]
         job = self.device_manager.upload_books(
                 Dispatcher(self.books_uploaded),
-                files, names, on_card=on_card,
-                metadata=metadata, titles=titles
-              )
+                files, metadata, ids, on_card=on_card, titles=titles)
         self.upload_memory[job] = (metadata, on_card, memory, files)
 
     def books_uploaded(self, job):
@@ -857,7 +843,7 @@ class DeviceGUI(object):
             if isinstance(job.exception, FreeSpaceError):
                 where = 'in main memory.' if 'memory' in str(job.exception) \
                         else 'on the storage card.'
-                titles = '\n'.join(['<li>'+mi['title']+'</li>' \
+                titles = '\n'.join(['<li>'+mi.title+'</li>' \
                                     for mi in metadata])
                 d = error_dialog(self, _('No space on device'),
                                  _('<p>Cannot upload books to device there '

From 819706e616b22a50bb36639a864cb622f31578fd Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 22 Aug 2009 18:29:42 -0400
Subject: [PATCH 002/120] Fix typo.

---
 src/calibre/devices/prs500/books.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/devices/prs500/books.py b/src/calibre/devices/prs500/books.py
index 770c48caf9..382dcf135d 100644
--- a/src/calibre/devices/prs500/books.py
+++ b/src/calibre/devices/prs500/books.py
@@ -291,7 +291,7 @@ class BookList(_BookList):
         book.datetime = ctime
         self.append(book)
         self.set_next_id(cid+1)
-        self.set_playlists(book.id, info.tags
+        self.set_playlists(book.id, info.tags)
 
 
     def playlist_by_title(self, title):

From 857f55d2b199230a37895aac9936cf087b02bab0 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 22 Aug 2009 18:55:29 -0400
Subject: [PATCH 003/120] Fix bug #3251: Handle single line paragraphs in PDB
 files.

---
 src/calibre/ebooks/pdb/ereader/reader.py    |  6 +--
 src/calibre/ebooks/pdb/ereader/reader132.py |  4 +-
 src/calibre/ebooks/pdb/ereader/reader202.py |  4 +-
 src/calibre/ebooks/pdb/formatreader.py      |  2 +-
 src/calibre/ebooks/pdb/input.py             | 11 +++++-
 src/calibre/ebooks/pdb/palmdoc/reader.py    |  7 ++--
 src/calibre/ebooks/pdb/ztxt/reader.py       |  7 ++--
 src/calibre/ebooks/txt/input.py             |  7 +---
 src/calibre/ebooks/txt/processor.py         |  6 ++-
 src/calibre/gui2/convert/pdb_input.py       | 19 ++++++++++
 src/calibre/gui2/convert/pdb_input.ui       | 41 +++++++++++++++++++++
 11 files changed, 91 insertions(+), 23 deletions(-)
 create mode 100644 src/calibre/gui2/convert/pdb_input.py
 create mode 100644 src/calibre/gui2/convert/pdb_input.ui

diff --git a/src/calibre/ebooks/pdb/ereader/reader.py b/src/calibre/ebooks/pdb/ereader/reader.py
index 7a3298122f..77ca8d6933 100644
--- a/src/calibre/ebooks/pdb/ereader/reader.py
+++ b/src/calibre/ebooks/pdb/ereader/reader.py
@@ -15,13 +15,13 @@ from calibre.ebooks.pdb.ereader.reader202 import Reader202
 
 class Reader(FormatReader):
 
-    def __init__(self, header, stream, log, encoding=None):
+    def __init__(self, header, stream, log, options):
         record0_size = len(header.section_data(0))
 
         if record0_size == 132:
-            self.reader = Reader132(header, stream, log, encoding)
+            self.reader = Reader132(header, stream, log, options)
         elif record0_size == 202:
-            self.reader = Reader202(header, stream, log, encoding)
+            self.reader = Reader202(header, stream, log, options)
         else:
             raise EreaderError('Size mismatch. eReader header record size %s KB is not supported.' % record0_size)
 
diff --git a/src/calibre/ebooks/pdb/ereader/reader132.py b/src/calibre/ebooks/pdb/ereader/reader132.py
index a1d1f4294d..d44eb2c561 100644
--- a/src/calibre/ebooks/pdb/ereader/reader132.py
+++ b/src/calibre/ebooks/pdb/ereader/reader132.py
@@ -47,9 +47,9 @@ class HeaderRecord(object):
 
 class Reader132(FormatReader):
 
-    def __init__(self, header, stream, log, encoding=None):
+    def __init__(self, header, stream, log, options):
         self.log = log
-        self.encoding = encoding
+        self.encoding = options.input_encoding
         
         self.log.debug('132 byte header version found.')
 
diff --git a/src/calibre/ebooks/pdb/ereader/reader202.py b/src/calibre/ebooks/pdb/ereader/reader202.py
index 5057df363e..18281a208e 100644
--- a/src/calibre/ebooks/pdb/ereader/reader202.py
+++ b/src/calibre/ebooks/pdb/ereader/reader202.py
@@ -33,9 +33,9 @@ class HeaderRecord(object):
 
 class Reader202(FormatReader):
 
-    def __init__(self, header, stream, log, encoding=None):
+    def __init__(self, header, stream, log, options):
         self.log = log
-        self.encoding = encoding
+        self.encoding = options.input_encoding
 
         self.log.debug('202 byte header version found.')
 
diff --git a/src/calibre/ebooks/pdb/formatreader.py b/src/calibre/ebooks/pdb/formatreader.py
index bde6c9ae35..2251eaae04 100644
--- a/src/calibre/ebooks/pdb/formatreader.py
+++ b/src/calibre/ebooks/pdb/formatreader.py
@@ -11,7 +11,7 @@ __docformat__ = 'restructuredtext en'
 
 class FormatReader(object):
 
-    def __init__(self, header, stream, log, encoding=None):
+    def __init__(self, header, stream, log, options):
         raise NotImplementedError()
         
     def extract_content(self, output_dir):
diff --git a/src/calibre/ebooks/pdb/input.py b/src/calibre/ebooks/pdb/input.py
index 62ae24c7f0..3ad1a6121c 100644
--- a/src/calibre/ebooks/pdb/input.py
+++ b/src/calibre/ebooks/pdb/input.py
@@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
 
 import os
 
-from calibre.customize.conversion import InputFormatPlugin
+from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
 from calibre.ebooks.pdb.header import PdbHeaderReader
 from calibre.ebooks.pdb import PDBError, IDENTITY_TO_NAME, get_reader
 
@@ -17,6 +17,13 @@ class PDBInput(InputFormatPlugin):
     description = 'Convert PDB to HTML'
     file_types  = set(['pdb'])
 
+    options = set([
+        OptionRecommendation(name='single_line_paras', recommended_value=False,
+            help=_('Normally calibre treats blank lines as paragraph markers. '
+                'With this option it will assume that every line represents '
+                'a paragraph instead.')),
+    ])
+
     def convert(self, stream, options, file_ext, log,
                 accelerators):
         header = PdbHeaderReader(stream)
@@ -27,7 +34,7 @@ class PDBInput(InputFormatPlugin):
 
         log.debug('Detected ebook format as: %s with identity: %s' % (IDENTITY_TO_NAME[header.ident], header.ident))
 
-        reader = Reader(header, stream, log, options.input_encoding)
+        reader = Reader(header, stream, log, options)
         opf = reader.extract_content(os.getcwd())
 
         return opf
diff --git a/src/calibre/ebooks/pdb/palmdoc/reader.py b/src/calibre/ebooks/pdb/palmdoc/reader.py
index 7e8f3b241c..e1935db566 100644
--- a/src/calibre/ebooks/pdb/palmdoc/reader.py
+++ b/src/calibre/ebooks/pdb/palmdoc/reader.py
@@ -31,10 +31,11 @@ class HeaderRecord(object):
 
 class Reader(FormatReader):
 
-    def __init__(self, header, stream, log, encoding=None):
+    def __init__(self, header, stream, log, options):
         self.stream = stream
         self.log = log
-        self.encoding = encoding
+        self.encoding = options.input_encoding
+        self.single_line_paras = options.single_line_paras
 
         self.sections = []
         for i in range(header.num_sections):
@@ -61,7 +62,7 @@ class Reader(FormatReader):
             txt += self.decompress_text(i)
 
         self.log.info('Converting text to OEB...')
-        html = txt_to_markdown(txt)
+        html = txt_to_markdown(txt, single_line_paras=self.single_line_paras)
         with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
             index.write(html.encode('utf-8'))
 
diff --git a/src/calibre/ebooks/pdb/ztxt/reader.py b/src/calibre/ebooks/pdb/ztxt/reader.py
index 0c334556e8..86c5abfe82 100644
--- a/src/calibre/ebooks/pdb/ztxt/reader.py
+++ b/src/calibre/ebooks/pdb/ztxt/reader.py
@@ -34,10 +34,11 @@ class HeaderRecord(object):
     
 class Reader(FormatReader):
     
-    def __init__(self, header, stream, log, encoding=None):
+    def __init__(self, header, stream, log, options):
         self.stream = stream
         self.log = log
-        self.encoding = encoding
+        self.encoding = options.input_encoding
+        self.single_line_paras = options.single_line_paras
     
         self.sections = []
         for i in range(header.num_sections):
@@ -76,7 +77,7 @@ class Reader(FormatReader):
             txt += self.decompress_text(i)
 
         self.log.info('Converting text to OEB...')
-        html = txt_to_markdown(txt)
+        html = txt_to_markdown(txt, single_line_paras=self.single_line_paras)
         with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
             index.write(html.encode('utf-8'))
                         
diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py
index 493fdf3967..75dd516360 100644
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@@ -31,14 +31,9 @@ class TXTInput(InputFormatPlugin):
         log.debug('Reading text from file...')
         txt = stream.read().decode(ienc, 'replace')
 
-        if options.single_line_paras:
-            txt = txt.replace('\r\n', '\n')
-            txt = txt.replace('\r', '\n')
-            txt = txt.replace('\n', '\n\n')
-
         log.debug('Running text though markdown conversion...')
         try:
-            html = txt_to_markdown(txt)
+            html = txt_to_markdown(txt, single_line_paras=options.single_line_paras)
         except RuntimeError:
             raise ValueError('This txt file has malformed markup, it cannot be'
                 'converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py
index ddb9b6a121..3005d633b8 100644
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@@ -13,7 +13,11 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 
-def txt_to_markdown(txt, title=''):
+def txt_to_markdown(txt, title='', single_line_paras=False):
+    if single_line_paras:
+        txt = txt.replace('\r\n', '\n')
+        txt = txt.replace('\r', '\n')
+        txt = txt.replace('\n', '\n\n')
     md = markdown.Markdown(
         extensions=['footnotes', 'tables', 'toc'],
         safe_mode=False,)
diff --git a/src/calibre/gui2/convert/pdb_input.py b/src/calibre/gui2/convert/pdb_input.py
new file mode 100644
index 0000000000..cc7582c7f6
--- /dev/null
+++ b/src/calibre/gui2/convert/pdb_input.py
@@ -0,0 +1,19 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+from calibre.gui2.convert.pdb_input_ui import Ui_Form
+from calibre.gui2.convert import Widget
+
+class PluginWidget(Widget, Ui_Form):
+
+    TITLE = _('PDB Input')
+    HELP = _('Options specific to')+' PDB '+_('input')
+
+    def __init__(self, parent, get_option, get_help, db=None, book_id=None):
+        Widget.__init__(self, parent, 'txt_input',
+            ['single_line_paras'])
+        self.db, self.book_id = db, book_id
+        self.initialize_options(get_option, get_help, db, book_id)
diff --git a/src/calibre/gui2/convert/pdb_input.ui b/src/calibre/gui2/convert/pdb_input.ui
new file mode 100644
index 0000000000..191e749833
--- /dev/null
+++ b/src/calibre/gui2/convert/pdb_input.ui
@@ -0,0 +1,41 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ui version="4.0">
+ <class>Form</class>
+ <widget class="QWidget" name="Form">
+  <property name="geometry">
+   <rect>
+    <x>0</x>
+    <y>0</y>
+    <width>400</width>
+    <height>300</height>
+   </rect>
+  </property>
+  <property name="windowTitle">
+   <string>Form</string>
+  </property>
+  <layout class="QGridLayout" name="gridLayout">
+   <item row="1" column="0">
+    <spacer name="verticalSpacer">
+     <property name="orientation">
+      <enum>Qt::Vertical</enum>
+     </property>
+     <property name="sizeHint" stdset="0">
+      <size>
+       <width>20</width>
+       <height>213</height>
+      </size>
+     </property>
+    </spacer>
+   </item>
+   <item row="0" column="0">
+    <widget class="QCheckBox" name="opt_single_line_paras">
+     <property name="text">
+      <string>Treat each &amp;line as a paragraph</string>
+     </property>
+    </widget>
+   </item>
+  </layout>
+ </widget>
+ <resources/>
+ <connections/>
+</ui>

From 1f014caf4454c5831ffb450b05569018a247f71d Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 22 Aug 2009 23:15:23 -0400
Subject: [PATCH 004/120] Remove excessive newlines from TXT output.

---
 src/calibre/ebooks/txt/txtml.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/txt/txtml.py b/src/calibre/ebooks/txt/txtml.py
index 24a9fb0878..09f9d5d50c 100644
--- a/src/calibre/ebooks/txt/txtml.py
+++ b/src/calibre/ebooks/txt/txtml.py
@@ -90,8 +90,8 @@ class TXTMLizer(object):
         text = re.sub('[  ]+', ' ', text)
 
         # Remove excessive newlines.
-        #text = re.sub('\n[ ]+\n', '\n\n', text)
-        #text = re.sub('\n{3,}', '\n\n', text)
+        text = re.sub('\n[ ]+\n', '\n\n', text)
+        text = re.sub('\n{5,}', '\n\n\n\n', text)
 
         # Replace spaces at the beginning and end of lines
         text = re.sub('(?imu)^[ ]+', '', text)

From 5589a0644740387a3a024f10933cf060fad2f413 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Fri, 28 Aug 2009 22:36:40 -0400
Subject: [PATCH 005/120] fix #3322: PDB input parameter not saved.

---
 src/calibre/gui2/convert/pdb_input.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/gui2/convert/pdb_input.py b/src/calibre/gui2/convert/pdb_input.py
index cc7582c7f6..4b0ba73fda 100644
--- a/src/calibre/gui2/convert/pdb_input.py
+++ b/src/calibre/gui2/convert/pdb_input.py
@@ -13,7 +13,7 @@ class PluginWidget(Widget, Ui_Form):
     HELP = _('Options specific to')+' PDB '+_('input')
 
     def __init__(self, parent, get_option, get_help, db=None, book_id=None):
-        Widget.__init__(self, parent, 'txt_input',
+        Widget.__init__(self, parent, 'pdb_input',
             ['single_line_paras'])
         self.db, self.book_id = db, book_id
         self.initialize_options(get_option, get_help, db, book_id)

From 813d9eb14f921a5926f29eda38ee9af438ec251f Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 29 Aug 2009 11:09:49 -0400
Subject: [PATCH 006/120] Fix Bug #3338: Handle strange characters in eReader
 metadata title.

---
 src/calibre/ebooks/pdb/ereader/reader132.py | 2 +-
 src/calibre/ebooks/pdb/ereader/reader202.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/pdb/ereader/reader132.py b/src/calibre/ebooks/pdb/ereader/reader132.py
index d44eb2c561..806d62c977 100644
--- a/src/calibre/ebooks/pdb/ereader/reader132.py
+++ b/src/calibre/ebooks/pdb/ereader/reader132.py
@@ -105,7 +105,7 @@ class Reader132(FormatReader):
         if not os.path.exists(output_dir):
             os.makedirs(output_dir)
 
-        html = u'<html><head><title>%s</title></head><body>' % self.mi.title
+        html = u'<html><head><title>%s</title></head><body>' % self.mi.title.decode('utf-8', 'replace')
 
         pml = u''
         for i in range(1, self.header_record.num_text_pages + 1):
diff --git a/src/calibre/ebooks/pdb/ereader/reader202.py b/src/calibre/ebooks/pdb/ereader/reader202.py
index 18281a208e..da8fadd5d9 100644
--- a/src/calibre/ebooks/pdb/ereader/reader202.py
+++ b/src/calibre/ebooks/pdb/ereader/reader202.py
@@ -93,7 +93,7 @@ class Reader202(FormatReader):
             pml += self.get_text_page(i)
 
         html = u'<html><head><title>%s</title></head><body>%s</body></html>' % \
-            (self.mi.title, pml_to_html(pml))
+            (self.mi.title.decode('utf-8', 'replace'), pml_to_html(pml))
 
         with CurrentDir(output_dir):
             with open('index.html', 'wb') as index:

From 6a55edf9cd3c0516e849ce4712de3da240f4f35b Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Mon, 31 Aug 2009 07:35:06 -0400
Subject: [PATCH 007/120] Update descripiton.

---
 src/calibre/gui2/dialogs/config/add_save.ui | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/gui2/dialogs/config/add_save.ui b/src/calibre/gui2/dialogs/config/add_save.ui
index 513be73e54..ef1a867cd2 100644
--- a/src/calibre/gui2/dialogs/config/add_save.ui
+++ b/src/calibre/gui2/dialogs/config/add_save.ui
@@ -70,7 +70,7 @@
     <item row="0" column="0" colspan="2">
      <widget class="QLabel" name="label">
       <property name="text">
-       <string>Here you can control how calibre will save your books when you click the Save to Disk button:</string>
+       <string>Here you can control how calibre will save your books when you click the Save to Disk or Send to Device buttons:</string>
       </property>
       <property name="wordWrap">
        <bool>true</bool>

From 13a4379063735c589c561a73a9493415d385960e Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Mon, 31 Aug 2009 21:03:00 -0400
Subject: [PATCH 008/120] Implement bug #3359: Make markdown processing of text
 files optional.

---
 src/calibre/ebooks/oeb/base.py           |  4 ++--
 src/calibre/ebooks/pdb/palmdoc/reader.py |  8 +++++---
 src/calibre/ebooks/pdb/ztxt/reader.py    |  7 +++++--
 src/calibre/ebooks/txt/input.py          | 23 ++++++++++++++++-------
 src/calibre/ebooks/txt/processor.py      | 24 ++++++++++++++++--------
 5 files changed, 44 insertions(+), 22 deletions(-)

diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py
index 3f9e6a4d4a..2e06fffe4e 100644
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@@ -934,7 +934,7 @@ class Manifest(object):
 
             self.oeb.log.debug('Converting', self.href, '...')
 
-            from calibre.ebooks.txt.processor import txt_to_markdown
+            from calibre.ebooks.txt.processor import convert_markdown
 
             title = self.oeb.metadata.title
             if title:
@@ -942,7 +942,7 @@ class Manifest(object):
             else:
                 title = _('Unknown')
 
-            return self._parse_xhtml(txt_to_markdown(data, title))
+            return self._parse_xhtml(convert_markdown(data, title))
 
 
         def _parse_css(self, data):
diff --git a/src/calibre/ebooks/pdb/palmdoc/reader.py b/src/calibre/ebooks/pdb/palmdoc/reader.py
index e1935db566..8992382597 100644
--- a/src/calibre/ebooks/pdb/palmdoc/reader.py
+++ b/src/calibre/ebooks/pdb/palmdoc/reader.py
@@ -13,8 +13,8 @@ import struct
 
 from calibre.ebooks.compression.palmdoc import decompress_doc
 from calibre.ebooks.pdb.formatreader import FormatReader
-from calibre.ebooks.txt.processor import opf_writer
-from calibre.ebooks.txt.processor import txt_to_markdown
+from calibre.ebooks.txt.processor import convert_basic, separate_paragraphs, \
+    opf_writer
 
 class HeaderRecord(object):
     '''
@@ -62,7 +62,9 @@ class Reader(FormatReader):
             txt += self.decompress_text(i)
 
         self.log.info('Converting text to OEB...')
-        html = txt_to_markdown(txt, single_line_paras=self.single_line_paras)
+        if self.single_line_paras:
+            txt = separate_paragraphs(txt)
+        html = convert_basic(txt)
         with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
             index.write(html.encode('utf-8'))
 
diff --git a/src/calibre/ebooks/pdb/ztxt/reader.py b/src/calibre/ebooks/pdb/ztxt/reader.py
index 86c5abfe82..664f498bee 100644
--- a/src/calibre/ebooks/pdb/ztxt/reader.py
+++ b/src/calibre/ebooks/pdb/ztxt/reader.py
@@ -12,7 +12,8 @@ import os, struct, zlib
 
 from calibre.ebooks.pdb.formatreader import FormatReader
 from calibre.ebooks.pdb.ztxt import zTXTError
-from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer
+from calibre.ebooks.txt.processor import convert_basic, separate_paragraphs, \
+    opf_writer
 
 SUPPORTED_VERSION = (1, 40)
 
@@ -77,7 +78,9 @@ class Reader(FormatReader):
             txt += self.decompress_text(i)
 
         self.log.info('Converting text to OEB...')
-        html = txt_to_markdown(txt, single_line_paras=self.single_line_paras)
+        if self.single_line_paras:
+            txt = separate_paragraphs(txt)
+        html = convert_basic(txt)
         with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
             index.write(html.encode('utf-8'))
                         
diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py
index 5d84a1bde1..2b0245c98b 100644
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@@ -7,7 +7,8 @@ __docformat__ = 'restructuredtext en'
 import os
 
 from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
-from calibre.ebooks.txt.processor import txt_to_markdown
+from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
+    separate_paragraphs
 
 class TXTInput(InputFormatPlugin):
 
@@ -21,6 +22,8 @@ class TXTInput(InputFormatPlugin):
             help=_('Normally calibre treats blank lines as paragraph markers. '
                 'With this option it will assume that every line represents '
                 'a paragraph instead.')),
+        OptionRecommendation(name='markdown', recommended_value=False,
+            help=_('Run the text input though the markdown processor.')),
     ])
 
     def convert(self, stream, options, file_ext, log,
@@ -31,12 +34,18 @@ class TXTInput(InputFormatPlugin):
         log.debug('Reading text from file...')
         txt = stream.read().decode(ienc, 'replace')
 
-        log.debug('Running text though markdown conversion...')
-        try:
-            html = txt_to_markdown(txt, single_line_paras=options.single_line_paras)
-        except RuntimeError:
-            raise ValueError('This txt file has malformed markup, it cannot be'
-                'converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
+        if options.single_line_paras:
+            txt = separate_paragraphs(txt)
+
+        if options.markdown:
+            log.debug('Running text though markdown conversion...')
+            try:
+                html = convert_markdown(txt)
+            except RuntimeError:
+                raise ValueError('This txt file has malformed markup, it cannot be'
+                    'converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
+        else:
+            html = convert_basic(txt)
 
         from calibre.customize.ui import plugin_for_input_format
         html_input = plugin_for_input_format('html')
diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py
index 3005d633b8..94df216616 100644
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@@ -5,6 +5,7 @@ Read content from txt file.
 '''
 
 import os
+import re
 
 from calibre.ebooks.markdown import markdown
 from calibre.ebooks.metadata.opf2 import OPFCreator
@@ -13,18 +14,25 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 
-def txt_to_markdown(txt, title='', single_line_paras=False):
-    if single_line_paras:
-        txt = txt.replace('\r\n', '\n')
-        txt = txt.replace('\r', '\n')
-        txt = txt.replace('\n', '\n\n')
+HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>%s</body></html>'
+
+def convert_basic(txt, title=''):
+    lines = []
+    for line in txt.splitlines():
+        lines.append('<p>%s</p>' % line)
+    return HTML_TEMPLATE % (title, '\n'.join(lines))
+
+def convert_markdown(txt, title=''):
     md = markdown.Markdown(
         extensions=['footnotes', 'tables', 'toc'],
         safe_mode=False,)
-    html = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>%s</body></html>' % (title,
-        md.convert(txt))
+    return HTML_TEMPLATE % (title, md.convert(txt))
 
-    return html
+def separate_paragraphs(txt):
+    txt = txt.replace('\r\n', '\n')
+    txt = txt.replace('\r', '\n')
+    txt = re.sub(u'(?<=.)\n(?=.)', u'\n\n', txt)
+    return txt
 
 def opf_writer(path, opf_name, manifest, spine, mi):
     opf = OPFCreator(path, mi)

From 9a30868d8372d3b07f34938827734b3700f9b4ed Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Mon, 31 Aug 2009 21:05:47 -0400
Subject: [PATCH 009/120] Add markdown option to TXT input in GUI.

---
 src/calibre/gui2/convert/txt_input.py | 2 +-
 src/calibre/gui2/convert/txt_input.ui | 9 ++++++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/calibre/gui2/convert/txt_input.py b/src/calibre/gui2/convert/txt_input.py
index 71dbbe1fe2..3d17eefe0d 100644
--- a/src/calibre/gui2/convert/txt_input.py
+++ b/src/calibre/gui2/convert/txt_input.py
@@ -14,6 +14,6 @@ class PluginWidget(Widget, Ui_Form):
 
     def __init__(self, parent, get_option, get_help, db=None, book_id=None):
         Widget.__init__(self, parent, 'txt_input',
-            ['single_line_paras'])
+            ['single_line_paras', 'markdown'])
         self.db, self.book_id = db, book_id
         self.initialize_options(get_option, get_help, db, book_id)
diff --git a/src/calibre/gui2/convert/txt_input.ui b/src/calibre/gui2/convert/txt_input.ui
index 191e749833..353144b46b 100644
--- a/src/calibre/gui2/convert/txt_input.ui
+++ b/src/calibre/gui2/convert/txt_input.ui
@@ -14,7 +14,7 @@
    <string>Form</string>
   </property>
   <layout class="QGridLayout" name="gridLayout">
-   <item row="1" column="0">
+   <item row="2" column="0">
     <spacer name="verticalSpacer">
      <property name="orientation">
       <enum>Qt::Vertical</enum>
@@ -34,6 +34,13 @@
      </property>
     </widget>
    </item>
+   <item row="1" column="0">
+    <widget class="QCheckBox" name="opt_markdown">
+     <property name="text">
+      <string>Process using markdown</string>
+     </property>
+    </widget>
+   </item>
   </layout>
  </widget>
  <resources/>

From 26a0327943421c677a9a395a6395f8d3082080e3 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Tue, 1 Sep 2009 06:21:40 -0400
Subject: [PATCH 010/120] TXT input convert_basic fixes: Make it handle html
 reserved characters in the text and detect paragraphs correctly.

---
 src/calibre/ebooks/txt/processor.py | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py
index 94df216616..f6503c0bc5 100644
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@@ -7,6 +7,7 @@ Read content from txt file.
 import os
 import re
 
+from calibre import prepare_string_for_xml
 from calibre.ebooks.markdown import markdown
 from calibre.ebooks.metadata.opf2 import OPFCreator
 
@@ -14,12 +15,28 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 
-HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>%s</body></html>'
+HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>\n%s\n</body></html>'
 
 def convert_basic(txt, title=''):
     lines = []
+    # Strip whitespace from the beginning and end of the line. Also replace
+    # all line breaks with \n.
     for line in txt.splitlines():
-        lines.append('<p>%s</p>' % line)
+        lines.append(line.strip())
+    txt = '\n'.join(lines)
+
+    # Remove blank lines from the beginning and end of the document.
+    txt = re.sub('^\s+(?=.)', '', txt)
+    txt = re.sub('(?<=.)\s+$', '', txt)
+    # Remove excessive line breaks.
+    txt = re.sub('\n{3,}', '\n\n', txt)
+
+    lines = []
+    # Split into paragraphs based on having a blank line between text.
+    for line in txt.split('\n\n'):
+        if line.strip():
+            lines.append('<p>%s</p>' % prepare_string_for_xml(line.replace('\n', ' ')))
+
     return HTML_TEMPLATE % (title, '\n'.join(lines))
 
 def convert_markdown(txt, title=''):

From f08775dda72c26589f8c85f74102764ac6030b99 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Tue, 1 Sep 2009 17:27:24 -0400
Subject: [PATCH 011/120] HTML input: Use correct name for pdf line unwrapping
 option. Conversion preprocessor: Allow non pdf to html input to use the line
 unwrapping code.

---
 src/calibre/ebooks/conversion/preprocess.py | 22 ++++++++++-----------
 src/calibre/ebooks/html/input.py            |  2 +-
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index cb2564ec0a..029b9752e1 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -223,16 +223,7 @@ class HTMLPreProcessor(object):
         elif self.is_book_designer(html):
             rules = self.BOOK_DESIGNER
         elif self.is_pdftohtml(html):
-            end_rules = []
-            if getattr(self.extra_opts, 'unwrap_factor', None):
-                length = line_length(html, getattr(self.extra_opts, 'unwrap_factor'))
-                if length:
-                    end_rules.append(
-                        # Un wrap using punctuation
-                        (re.compile(r'(?<=.{%i}[a-z\.,;:)-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines),
-                    )
-
-            rules = self.PDFTOHTML + end_rules
+            rules = self.PDFTOHTML
         else:
             rules = []
 
@@ -246,7 +237,16 @@ class HTMLPreProcessor(object):
                 (re.compile(getattr(self.extra_opts, 'footer_regex')), lambda match : '')
             )
 
-        for rule in self.PREPROCESS + pre_rules + rules:
+            end_rules = []
+            if getattr(self.extra_opts, 'unwrap_factor', None):
+                length = line_length(html, getattr(self.extra_opts, 'unwrap_factor'))
+                if length:
+                    end_rules.append(
+                        # Un wrap using punctuation
+                        (re.compile(r'(?<=.{%i}[a-z\.,;:)-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines),
+                    )
+
+        for rule in self.PREPROCESS + pre_rules + rules + end_rules:
             html = rule[0].sub(rule[1], html)
 
         # Handle broken XHTML w/ SVG (ugh)
diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py
index 92c2df9690..7b7bfdf3aa 100644
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@@ -262,7 +262,7 @@ class HTMLInput(InputFormatPlugin):
                 )
         ),
 
-        OptionRecommendation(name='pdf_line_length', recommended_value=0.5,
+        OptionRecommendation(name='unwrap_factor', recommended_value=0.5,
             help=_('Average line length for line breaking if the HTML is from a '
                 'previous partial conversion of a PDF file.')),
 

From fe267db425c06a60d2afc92ce11c7e1cdd7b1ca5 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Tue, 1 Sep 2009 19:45:22 -0400
Subject: [PATCH 012/120] Fix indent.

---
 src/calibre/ebooks/conversion/preprocess.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index 029b9752e1..1aae939a06 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -237,14 +237,14 @@ class HTMLPreProcessor(object):
                 (re.compile(getattr(self.extra_opts, 'footer_regex')), lambda match : '')
             )
 
-            end_rules = []
-            if getattr(self.extra_opts, 'unwrap_factor', None):
-                length = line_length(html, getattr(self.extra_opts, 'unwrap_factor'))
-                if length:
-                    end_rules.append(
-                        # Un wrap using punctuation
-                        (re.compile(r'(?<=.{%i}[a-z\.,;:)-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines),
-                    )
+        end_rules = []
+        if getattr(self.extra_opts, 'unwrap_factor', None):
+            length = line_length(html, getattr(self.extra_opts, 'unwrap_factor'))
+            if length:
+                end_rules.append(
+                    # Un wrap using punctuation
+                    (re.compile(r'(?<=.{%i}[a-z\.,;:)-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines),
+                )
 
         for rule in self.PREPROCESS + pre_rules + rules + end_rules:
             html = rule[0].sub(rule[1], html)

From 8c8b419e05b74ad4f5579b15ece7298cb8bf649f Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Tue, 1 Sep 2009 20:04:16 -0400
Subject: [PATCH 013/120] Fix bug #3380: TXT input encoding ignored.

---
 src/calibre/ebooks/txt/input.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py
index 2b0245c98b..47b03181f0 100644
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@@ -51,6 +51,7 @@ class TXTInput(InputFormatPlugin):
         html_input = plugin_for_input_format('html')
         for opt in html_input.options:
             setattr(options, opt.option.name, opt.recommended_value)
+        options.input_encoding = 'utf-8'
         base = os.getcwdu()
         if hasattr(stream, 'name'):
             base = os.path.dirname(stream.name)

From 34e9857ab0d36acb5f9cd091e0200210f0bdf4c4 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Wed, 2 Sep 2009 17:05:11 -0400
Subject: [PATCH 014/120] TXT output: Optimize string manipulation.

---
 src/calibre/ebooks/txt/newlines.py |  2 +-
 src/calibre/ebooks/txt/txtml.py    | 51 +++++++++++++++++++-----------
 2 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/src/calibre/ebooks/txt/newlines.py b/src/calibre/ebooks/txt/newlines.py
index 983d356206..ae766a216f 100644
--- a/src/calibre/ebooks/txt/newlines.py
+++ b/src/calibre/ebooks/txt/newlines.py
@@ -19,7 +19,7 @@ class TxtNewlines(object):
         self.newline = self.NEWLINE_TYPES.get(newline_type.lower(), os.linesep)
 
 def specified_newlines(newline, text):
-    if newline == os.linesep:
+    if newline == '\n':
         return text
 
     return text.replace(os.linesep, newline)
diff --git a/src/calibre/ebooks/txt/txtml.py b/src/calibre/ebooks/txt/txtml.py
index 206dff50ed..284cc22896 100644
--- a/src/calibre/ebooks/txt/txtml.py
+++ b/src/calibre/ebooks/txt/txtml.py
@@ -8,7 +8,8 @@ __docformat__ = 'restructuredtext en'
 Transform OEB content into plain text
 '''
 
-import os, re
+import os
+import re
 
 from lxml import etree
 
@@ -43,15 +44,15 @@ class TXTMLizer(object):
         return self.mlize_spine()
 
     def mlize_spine(self):
-        output = u''
-        output += self.get_toc()
+        output = [u'']
+        output.append(self.get_toc())
         for item in self.oeb_book.spine:
             self.log.debug('Converting %s to TXT...' % item.href)
             stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
             content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
             content = self.remove_newlines(content)
-            output += self.dump_text(etree.fromstring(content), stylizer)
-        output = self.cleanup_text(output)
+            output.append(self.get_text(etree.fromstring(content), stylizer))
+        output = self.cleanup_text(u''.join(output))
 
         return output
 
@@ -64,13 +65,13 @@ class TXTMLizer(object):
         return text
 
     def get_toc(self):
-        toc = u''
+        toc = [u'']
         if getattr(self.opts, 'inline_toc', None):
             self.log.debug('Generating table of contents...')
-            toc += u'%s\n\n' % _(u'Table of Contents:')
+            toc.append(u'%s\n\n' % _(u'Table of Contents:'))
             for item in self.oeb_book.toc:
-                toc += u'* %s\n\n' % item.title
-        return toc
+                toc.append(u'* %s\n\n' % item.title)
+        return ''.join(toc)
 
     def cleanup_text(self, text):
         self.log.debug('\tClean up text...')
@@ -99,6 +100,17 @@ class TXTMLizer(object):
 
         return text
 
+    def get_text(self, elem, stylizer):
+        '''
+        @elem: The element in the etree that we are working on.
+        @stylizer: The style information attached to the element.
+        @end: The last two characters of the text from the previous element.
+              This is used to determine if a blank line is needed when starting
+              a new block element.
+        '''
+        
+        return u''.join(self.dump_text(elem, stylizer))
+
     def dump_text(self, elem, stylizer, end=''):
         '''
         @elem: The element in the etree that we are working on.
@@ -110,14 +122,14 @@ class TXTMLizer(object):
 
         if not isinstance(elem.tag, basestring) \
            or namespace(elem.tag) != XHTML_NS:
-            return u''
+            return ['']
 
-        text = u''
+        text = ['']
         style = stylizer.style(elem)
 
         if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
            or style['visibility'] == 'hidden':
-            return u''
+            return ['']
 
         tag = barename(elem.tag)
         in_block = False
@@ -125,20 +137,23 @@ class TXTMLizer(object):
         # Are we in a paragraph block?
         if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES:
             in_block = True
-            if not end.endswith(os.linesep + os.linesep) and hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
-                text += os.linesep + os.linesep
+            if not end.endswith('\n\n') and hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
+                text.append('\n\n')
 
         # Proccess tags that contain text.
         if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
-            text += elem.text
+            text.append(elem.text)
 
         for item in elem:
-            text += self.dump_text(item, stylizer, text[-2:])
+            en = u''
+            if len(text) >= 2:
+                en = text[-1][-2:]
+            text += self.dump_text(item, stylizer, en)
 
         if in_block:
-            text += os.linesep + os.linesep
+            text.append('\n\n')
 
         if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '':
-            text += elem.tail
+            text.append(elem.tail)
 
         return text

From 8245d7d7e84ce7ec74ba00b94a34bad67ad44ed1 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Wed, 2 Sep 2009 17:56:54 -0400
Subject: [PATCH 015/120] TXT Output: Option to produce not add a blank line
 between paragraphs. Option to indent the beginning of each paragraph by a
 tab.

---
 src/calibre/ebooks/txt/output.py       |  6 ++++++
 src/calibre/ebooks/txt/txtml.py        | 15 +++++++++++----
 src/calibre/gui2/convert/txt_output.ui | 18 ++++++++++++++++--
 3 files changed, 33 insertions(+), 6 deletions(-)

diff --git a/src/calibre/ebooks/txt/output.py b/src/calibre/ebooks/txt/output.py
index 6f0a768b8f..b3bda7fa9d 100644
--- a/src/calibre/ebooks/txt/output.py
+++ b/src/calibre/ebooks/txt/output.py
@@ -33,6 +33,12 @@ class TXTOutput(OutputFormatPlugin):
         OptionRecommendation(name='inline_toc',
             recommended_value=False, level=OptionRecommendation.LOW,
             help=_('Add Table of Contents to beginning of the book.')),
+        OptionRecommendation(name='flush_paras',
+            recommended_value=False, level=OptionRecommendation.LOW,
+            help=_('Do not add a blank line between paragraphs.')),
+        OptionRecommendation(name='indent_paras',
+            recommended_value=False, level=OptionRecommendation.LOW,
+            help=_('Add a tab at the beginning of each paragraph.')),
      ])
 
     def convert(self, oeb_book, output_path, input_plugin, opts, log):
diff --git a/src/calibre/ebooks/txt/txtml.py b/src/calibre/ebooks/txt/txtml.py
index 284cc22896..10c132e1e1 100644
--- a/src/calibre/ebooks/txt/txtml.py
+++ b/src/calibre/ebooks/txt/txtml.py
@@ -41,6 +41,7 @@ class TXTMLizer(object):
         self.log.info('Converting XHTML to TXT...')
         self.oeb_book = oeb_book
         self.opts = opts
+        
         return self.mlize_spine()
 
     def mlize_spine(self):
@@ -92,12 +93,18 @@ class TXTMLizer(object):
 
         # Remove excessive newlines.
         text = re.sub('\n[ ]+\n', '\n\n', text)
-        text = re.sub('\n{3,}', '\n\n', text)
+        if self.opts.flush_paras:
+            text = re.sub('\n{2,}', '\n', text)
+        else:
+            text = re.sub('\n{3,}', '\n\n', text)
 
         # Replace spaces at the beginning and end of lines
         text = re.sub('(?imu)^[ ]+', '', text)
         text = re.sub('(?imu)[ ]+$', '', text)
 
+        if self.opts.indent_paras:
+            text = re.sub('(?imu)^(?=.)', '\t', text)
+
         return text
 
     def get_text(self, elem, stylizer):
@@ -137,8 +144,8 @@ class TXTMLizer(object):
         # Are we in a paragraph block?
         if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES:
             in_block = True
-            if not end.endswith('\n\n') and hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
-                text.append('\n\n')
+            if not end.endswith(u'\n\n') and hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
+                text.append(u'\n\n')
 
         # Proccess tags that contain text.
         if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
@@ -151,7 +158,7 @@ class TXTMLizer(object):
             text += self.dump_text(item, stylizer, en)
 
         if in_block:
-            text.append('\n\n')
+            text.append(u'\n\n')
 
         if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '':
             text.append(elem.tail)
diff --git a/src/calibre/gui2/convert/txt_output.ui b/src/calibre/gui2/convert/txt_output.ui
index 6e62040533..900198aca9 100644
--- a/src/calibre/gui2/convert/txt_output.ui
+++ b/src/calibre/gui2/convert/txt_output.ui
@@ -27,7 +27,7 @@
    <item row="0" column="1">
     <widget class="QComboBox" name="opt_newline"/>
    </item>
-   <item row="2" column="0">
+   <item row="4" column="0">
     <spacer name="verticalSpacer">
      <property name="orientation">
       <enum>Qt::Vertical</enum>
@@ -40,13 +40,27 @@
      </property>
     </spacer>
    </item>
-   <item row="1" column="0">
+   <item row="1" column="0" colspan="2">
     <widget class="QCheckBox" name="opt_inline_toc">
      <property name="text">
       <string>&amp;Inline TOC</string>
      </property>
     </widget>
    </item>
+   <item row="2" column="0" colspan="2">
+    <widget class="QCheckBox" name="opt_flush_paras">
+     <property name="text">
+      <string>Do not add a blank line between paragraphs.</string>
+     </property>
+    </widget>
+   </item>
+   <item row="3" column="0" colspan="2">
+    <widget class="QCheckBox" name="opt_indent_paras">
+     <property name="text">
+      <string>Add a tab at the beginning of each paragraph</string>
+     </property>
+    </widget>
+   </item>
   </layout>
  </widget>
  <resources/>

From 7fc881735f7f59e80276a332432f658cce91b917 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Wed, 2 Sep 2009 18:16:27 -0400
Subject: [PATCH 016/120] PML output: Optimize string manipulation.

---
 src/calibre/ebooks/pml/pmlml.py | 65 +++++++++++++++++----------------
 1 file changed, 33 insertions(+), 32 deletions(-)

diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py
index 2438fd9bef..72b55d00b1 100644
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@@ -85,11 +85,11 @@ class PMLMLizer(object):
     def pmlmlize_spine(self):
         self.image_hrefs = {}
         self.link_hrefs = {}
-        output = u''
-        output += self.get_cover_page()
-        output += u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk'
-        output += self.get_text()
-        output = output.replace(u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk', self.get_toc())
+        output = [u'']
+        output.append(self.get_cover_page())
+        output.append(u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk')
+        output.append(self.get_text())
+        output = ''.join(output).replace(u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk', self.get_toc())
         output = self.clean_text(output)
         return output
 
@@ -101,29 +101,29 @@ class PMLMLizer(object):
             item = self.oeb_book.manifest.hrefs[href]
             if item.spine_position is None:
                 stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
-                output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
+                output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
         return output
 
     def get_toc(self):
-        toc = u''
+        toc = [u'']
         if self.opts.inline_toc:
             self.log.debug('Generating table of contents...')
-            toc += u'\\X0%s\\X0\n\n' % _('Table of Contents:')
+            toc.append(u'\\X0%s\\X0\n\n' % _('Table of Contents:'))
             for item in self.oeb_book.toc:
                 if item.href in self.link_hrefs.keys():
-                    toc += '* \\q="#%s"%s\\q\n' % (self.link_hrefs[item.href], item.title)
+                    toc.append('* \\q="#%s"%s\\q\n' % (self.link_hrefs[item.href], item.title))
                 else:
                     self.oeb.warn('Ignoring toc item: %s not found in document.' % item)
-        return toc
+        return ''.join(toc)
 
     def get_text(self):
-        text = u''
+        text = [u'']
         for item in self.oeb_book.spine:
             self.log.debug('Converting %s to PML markup...' % item.href)
             stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
-            text += self.add_page_anchor(item)
+            text.append(self.add_page_anchor(item))
             text += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
-        return text
+        return ''.join(text)
 
     def add_page_anchor(self, page):
         return self.get_anchor(page, '')
@@ -163,14 +163,14 @@ class PMLMLizer(object):
     def dump_text(self, elem, stylizer, page, tag_stack=[]):
         if not isinstance(elem.tag, basestring) \
            or namespace(elem.tag) != XHTML_NS:
-            return u''
+            return [u'']
 
-        text = u''
+        text = [u'']
         style = stylizer.style(elem)
 
         if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
            or style['visibility'] == 'hidden':
-            return u''
+            return [u'']
 
         tag = barename(elem.tag)
         tag_count = 0
@@ -187,24 +187,25 @@ class PMLMLizer(object):
             if elem.attrib.get('src', None):
                 if page.abshref(elem.attrib['src']) not in self.image_hrefs.keys():
                     self.image_hrefs[page.abshref(elem.attrib['src'])] = image_name('%s' % len(self.image_hrefs.keys()), self.image_hrefs.keys()).strip('\x00')
-                text += '\\m="%s"' % self.image_hrefs[page.abshref(elem.attrib['src'])]
+                text.append('\\m="%s"' % self.image_hrefs[page.abshref(elem.attrib['src'])])
         if tag == 'hr':
-            text += '\\w'
+            w = '\\w'
             width = elem.get('width')
             if width:
-                text += '="%s%%"' % width
+                w += '="%s%%"' % width
             else:
-                text += '="50%"'
+                w += '="50%"'
+            text.append(w)
 
         # Process style information that needs holds a single tag
         # Commented out because every page in an OEB book starts with this style
         #if style['page-break-before'] == 'always':
-        #    text += '\\p'
+        #    text.append('\\p')
 
         pml_tag = TAG_MAP.get(tag, None)
         if pml_tag and pml_tag not in tag_stack:
             tag_count += 1
-            text += '\\%s' % pml_tag
+            text.append('\\%s' % pml_tag)
             tag_stack.append(pml_tag)
 
         # Special processing of tags that require an argument.
@@ -219,27 +220,27 @@ class PMLMLizer(object):
                     if href not in self.link_hrefs.keys():
                         self.link_hrefs[href] = 'calibre_link-%s' % len(self.link_hrefs.keys())
                     href = self.link_hrefs[href]
-                    text += '\\q="#%s"' % href
+                    text.append('\\q="#%s"' % href)
                 tag_count += 1
                 tag_stack.append('q')
 
         # Anchor ids
         id_name = elem.get('id')
         if id_name:
-            text += self.get_anchor(page, id_name)
+            text.append(self.get_anchor(page, id_name))
 
         # Processes style information
         for s in STYLES:
             style_tag = s[1].get(style[s[0]], None)
             if style_tag and style_tag not in tag_stack:
                 tag_count += 1
-                text += '\\%s' % style_tag
+                text.append('\\%s' % style_tag)
                 tag_stack.append(style_tag)
         # margin
 
         # Proccess tags that contain text.
         if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
-            text += self.elem_text(elem, tag_stack)
+            text.append(self.elem_text(elem, tag_stack))
 
         for item in elem:
             text += self.dump_text(item, stylizer, page, tag_stack)
@@ -249,16 +250,16 @@ class PMLMLizer(object):
             close_tag_list.insert(0, tag_stack.pop())
         text += self.close_tags(close_tag_list)
         if tag in SEPARATE_TAGS:
-            text += os.linesep + os.linesep
+            text.append(os.linesep + os.linesep)
 
         if 'block' not in tag_stack:
-            text += os.linesep + os.linesep
+            text.append(os.linesep + os.linesep)
 
         #if style['page-break-after'] == 'always':
-        #    text += '\\p'
+        #    text.append('\\p')
 
         if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '':
-            text += self.elem_tail(elem, tag_stack)
+            text.append(self.elem_tail(elem, tag_stack))
 
         return text
 
@@ -276,10 +277,10 @@ class PMLMLizer(object):
         return text
 
     def close_tags(self, tags):
-        text = u''
+        text = [u'']
         for i in range(0, len(tags)):
             tag = tags.pop()
             if tag != 'block':
-                text += '\\%s' % tag
+                text.append('\\%s' % tag)
         return text
 

From 7c9b132a7e079e83e69562b955a8748f82e31e01 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Wed, 2 Sep 2009 18:34:41 -0400
Subject: [PATCH 017/120] RB output: Optimize string manipulation.

---
 src/calibre/ebooks/rb/rbml.py | 52 +++++++++++++++++------------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/src/calibre/ebooks/rb/rbml.py b/src/calibre/ebooks/rb/rbml.py
index 679ccaa39b..e9c8d98c10 100644
--- a/src/calibre/ebooks/rb/rbml.py
+++ b/src/calibre/ebooks/rb/rbml.py
@@ -71,12 +71,12 @@ class RBMLizer(object):
 
     def mlize_spine(self):
         self.link_hrefs = {}
-        output = u'<HTML><HEAD><TITLE></TITLE></HEAD><BODY>'
-        output += self.get_cover_page()
-        output += u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk'
-        output += self.get_text()
-        output += u'</BODY></HTML>'
-        output = output.replace(u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk', self.get_toc())
+        output = [u'<HTML><HEAD><TITLE></TITLE></HEAD><BODY>']
+        output.append(self.get_cover_page())
+        output.append(u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk')
+        output.append(self.get_text())
+        output.append(u'</BODY></HTML>')
+        output = ''.join(output).replace(u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk', self.get_toc())
         output = self.clean_text(output)
         return output
 
@@ -92,26 +92,26 @@ class RBMLizer(object):
         return output
 
     def get_toc(self):
-        toc = u''
+        toc = [u'']
         if self.opts.inline_toc:
             self.log.debug('Generating table of contents...')
-            toc += u'<H1>%s</H1><UL>\n' % _('Table of Contents:')
+            toc.append(u'<H1>%s</H1><UL>\n' % _('Table of Contents:'))
             for item in self.oeb_book.toc:
                 if item.href in self.link_hrefs.keys():
-                    toc += '<LI><A HREF="#%s">%s</A></LI>\n' % (self.link_hrefs[item.href], item.title)
+                    toc.append('<LI><A HREF="#%s">%s</A></LI>\n' % (self.link_hrefs[item.href], item.title))
                 else:
                     self.oeb.warn('Ignoring toc item: %s not found in document.' % item)
-            toc += '</UL>'
-        return toc
+            toc.append('</UL>')
+        return ''.join(toc)
 
     def get_text(self):
-        output = u''
+        output = [u'']
         for item in self.oeb_book.spine:
             self.log.debug('Converting %s to RocketBook HTML...' % item.href)
             stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
-            output += self.add_page_anchor(item)
+            output.append(self.add_page_anchor(item))
             output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
-        return output
+        return ''.join(output)
 
     def add_page_anchor(self, page):
         return self.get_anchor(page, '')
@@ -135,14 +135,14 @@ class RBMLizer(object):
     def dump_text(self, elem, stylizer, page, tag_stack=[]):
         if not isinstance(elem.tag, basestring) \
            or namespace(elem.tag) != XHTML_NS:
-            return u''
+            return [u'']
 
-        text = u''
+        text = [u'']
         style = stylizer.style(elem)
 
         if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
            or style['visibility'] == 'hidden':
-            return u''
+            return [u'']
 
         tag = barename(elem.tag)
         tag_count = 0
@@ -153,12 +153,12 @@ class RBMLizer(object):
             if elem.attrib.get('src', None):
                 if page.abshref(elem.attrib['src']) not in self.name_map.keys():
                     self.name_map[page.abshref(elem.attrib['src'])] = unique_name('%s' % len(self.image_hrefs.keys()), self.image_hrefs.keys(), self.name_map.keys())
-                text += '<IMG SRC="%s">' % self.name_map[page.abshref(elem.attrib['src'])]
+                text.append('<IMG SRC="%s">' % self.name_map[page.abshref(elem.attrib['src'])])
 
         rb_tag = tag.upper() if tag in TAGS else None
         if rb_tag:
             tag_count += 1
-            text += '<%s>' % rb_tag
+            text.append('<%s>' % rb_tag)
             tag_stack.append(rb_tag)
 
         # Anchors links
@@ -172,14 +172,14 @@ class RBMLizer(object):
                     if href not in self.link_hrefs.keys():
                         self.link_hrefs[href] = 'calibre_link-%s' % len(self.link_hrefs.keys())
                     href = self.link_hrefs[href]
-                    text += '<A HREF="#%s">' % href
+                    text.append('<A HREF="#%s">' % href)
                 tag_count += 1
                 tag_stack.append('A')
 
         # Anchor ids
         id_name = elem.get('id')
         if id_name:
-            text += self.get_anchor(page, id_name)
+            text.append(self.get_anchor(page, id_name))
 
         # Processes style information
         for s in STYLES:
@@ -187,12 +187,12 @@ class RBMLizer(object):
             if style_tag:
                 style_tag = style_tag.upper()
                 tag_count += 1
-                text += '<%s>' % style_tag
+                text.append('<%s>' % style_tag)
                 tag_stack.append(style_tag)
 
         # Proccess tags that contain text.
         if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
-            text += prepare_string_for_xml(elem.text)
+            text.append(prepare_string_for_xml(elem.text))
 
         for item in elem:
             text += self.dump_text(item, stylizer, page, tag_stack)
@@ -204,14 +204,14 @@ class RBMLizer(object):
         text += self.close_tags(close_tag_list)
 
         if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '':
-                text += prepare_string_for_xml(elem.tail)
+                text.append(prepare_string_for_xml(elem.tail))
 
         return text
 
     def close_tags(self, tags):
-        text = u''
+        text = [u'']
         for i in range(0, len(tags)):
             tag = tags.pop()
-            text += '</%s>' % tag
+            text.append('</%s>' % tag)
 
         return text

From 017688ee5deefa3198c9349d2a98fa239b392281 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Wed, 2 Sep 2009 18:46:47 -0400
Subject: [PATCH 018/120] FB2 output: Optimize string manipulation. TXT Output:
 Fix merge issue.

---
 src/calibre/ebooks/fb2/fb2ml.py | 66 ++++++++++++++++-----------------
 src/calibre/ebooks/txt/txtml.py |  2 +-
 2 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py
index 3bc3424c39..36e65f6f0e 100644
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@@ -67,14 +67,14 @@ class FB2MLizer(object):
     def fb2mlize_spine(self):
         self.image_hrefs = {}
         self.link_hrefs = {}
-        output = self.fb2_header()
-        output += self.get_cover_page()
-        output += u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk'
-        output += self.get_text()
-        output += self.fb2_body_footer()
-        output += self.fb2mlize_images()
-        output += self.fb2_footer()
-        output = output.replace(u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk', self.get_toc())
+        output = [self.fb2_header()]
+        output.append(self.get_cover_page())
+        output.append(u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk')
+        output.append(self.get_text())
+        output.append(self.fb2_body_footer())
+        output.append(self.fb2mlize_images())
+        output.append(self.fb2_footer())
+        output = ''.join(output).replace(u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk', self.get_toc())
         return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode, pretty_print=True)
 
     def fb2_header(self):
@@ -117,25 +117,25 @@ class FB2MLizer(object):
         return output
 
     def get_toc(self):
-        toc = u''
+        toc = [u'']
         if self.opts.inline_toc:
             self.log.debug('Generating table of contents...')
-            toc += u'<p>%s</p>' % _('Table of Contents:')
+            toc.append(u'<p>%s</p>' % _('Table of Contents:'))
             for item in self.oeb_book.toc:
                 if item.href in self.link_hrefs.keys():
-                    toc += '<p><a xlink:href="#%s">%s</a></p>\n' % (self.link_hrefs[item.href], item.title)
+                    toc.append('<p><a xlink:href="#%s">%s</a></p>\n' % (self.link_hrefs[item.href], item.title))
                 else:
                     self.oeb.warn('Ignoring toc item: %s not found in document.' % item)
-        return toc
+        return ''.join(toc)
 
     def get_text(self):
-        text = u''
+        text = [u'']
         for item in self.oeb_book.spine:
             self.log.debug('Converting %s to FictionBook2 XML' % item.href)
             stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
-            text += self.add_page_anchor(item)
+            text.append(self.add_page_anchor(item))
             text += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
-        return text
+        return ''.join(text)
 
     def fb2_body_footer(self):
         return u'\n</section>\n</body>'
@@ -155,7 +155,7 @@ class FB2MLizer(object):
         return '<v id="%s"></v>' % aid
 
     def fb2mlize_images(self):
-        images = u''
+        images = [u'']
         for item in self.oeb_book.manifest:
             if item.media_type in OEB_RASTER_IMAGES:
                 try:
@@ -174,23 +174,23 @@ class FB2MLizer(object):
                             col = 1
                         col += 1
                         data += char
-                    images += '<binary id="%s" content-type="%s">%s\n</binary>' % (self.image_hrefs.get(item.href, '0000.JPEG'), item.media_type, data)
+                    images.append('<binary id="%s" content-type="%s">%s\n</binary>' % (self.image_hrefs.get(item.href, '0000.JPEG'), item.media_type, data))
                 except Exception as e:
                     self.log.error('Error: Could not include file %s becuase ' \
                         '%s.' % (item.href, e))
-        return images
+        return ''.join(images)
 
     def dump_text(self, elem, stylizer, page, tag_stack=[]):
         if not isinstance(elem.tag, basestring) \
            or namespace(elem.tag) != XHTML_NS:
-            return u''
+            return [u'']
             
-        fb2_text = u''
+        fb2_text = [u'']
         style = stylizer.style(elem)
 
         if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
            or style['visibility'] == 'hidden':
-            return u''
+            return [u'']
         
         tag = barename(elem.tag)
         tag_count = 0
@@ -199,33 +199,33 @@ class FB2MLizer(object):
             if elem.attrib.get('src', None):
                 if page.abshref(elem.attrib['src']) not in self.image_hrefs.keys():
                     self.image_hrefs[page.abshref(elem.attrib['src'])] = '%s.jpg' % len(self.image_hrefs.keys())
-                fb2_text += '<image xlink:href="#%s" />' % self.image_hrefs[page.abshref(elem.attrib['src'])]
+                fb2_text.append('<image xlink:href="#%s" />' % self.image_hrefs[page.abshref(elem.attrib['src'])])
 
         if tag in TAG_LINKS:
             href = elem.get('href')
             if href:
                 href = prepare_string_for_xml(page.abshref(href))
                 if '://' in href:
-                    fb2_text += '<a xlink:href="%s">' % href
+                    fb2_text.append('<a xlink:href="%s">' % href)
                 else:
                     if '#' not in href:
                         href += '#'
                     if href not in self.link_hrefs.keys():
                         self.link_hrefs[href] = 'calibre_link-%s' % len(self.link_hrefs.keys())
                     href = self.link_hrefs[href]
-                    fb2_text += '<a xlink:href="#%s">' % href
+                    fb2_text.append('<a xlink:href="#%s">' % href)
                 tag_count += 1
                 tag_stack.append('a')
 
         # Anchor ids
         id_name = elem.get('id')
         if id_name:
-            fb2_text += self.get_anchor(page, id_name)
+            fb2_text.append(self.get_anchor(page, id_name))
 
         fb2_tag = TAG_MAP.get(tag, None)
         if fb2_tag and fb2_tag not in tag_stack:
             tag_count += 1
-            fb2_text += '<%s>' % fb2_tag
+            fb2_text.append('<%s>' % fb2_tag)
             tag_stack.append(fb2_tag)
 
         # Processes style information
@@ -233,15 +233,15 @@ class FB2MLizer(object):
             style_tag = s[1].get(style[s[0]], None)
             if style_tag:
                 tag_count += 1
-                fb2_text += '<%s>' % style_tag
+                fb2_text.append('<%s>' % style_tag)
                 tag_stack.append(style_tag)
 
         if tag in TAG_SPACE:
             if not fb2_text or fb2_text[-1] != ' ':
-                fb2_text += ' '
+                fb2_text.append(' ')
 
         if hasattr(elem, 'text') and elem.text != None:
-            fb2_text += prepare_string_for_xml(elem.text)
+            fb2_text.append(prepare_string_for_xml(elem.text))
         
         for item in elem:
             fb2_text += self.dump_text(item, stylizer, page, tag_stack)
@@ -253,16 +253,16 @@ class FB2MLizer(object):
 
         if hasattr(elem, 'tail') and elem.tail != None:
             if 'p' not in tag_stack:
-                fb2_text += '<p>%s</p>' % prepare_string_for_xml(elem.tail)
+                fb2_text.append('<p>%s</p>' % prepare_string_for_xml(elem.tail))
             else:
-                fb2_text += prepare_string_for_xml(elem.tail)
+                fb2_text.append(prepare_string_for_xml(elem.tail))
 
         return fb2_text
 
     def close_tags(self, tags):
-        fb2_text = u''
+        fb2_text = [u'']
         for i in range(0, len(tags)):
             fb2_tag = tags.pop()
-            fb2_text += '</%s>' % fb2_tag
+            fb2_text.append('</%s>' % fb2_tag)
 
         return fb2_text
diff --git a/src/calibre/ebooks/txt/txtml.py b/src/calibre/ebooks/txt/txtml.py
index eff239668d..c705bcf221 100644
--- a/src/calibre/ebooks/txt/txtml.py
+++ b/src/calibre/ebooks/txt/txtml.py
@@ -52,7 +52,7 @@ class TXTMLizer(object):
             stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
             content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
             content = self.remove_newlines(content)
-            output += self.dump_text(etree.fromstring(content), stylizer))
+            output += self.dump_text(etree.fromstring(content), stylizer)
         output = self.cleanup_text(u''.join(output))
 
         return output

From 1214b476a7a798dd46bdf13b2c6a1b76e0e507ff Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Wed, 2 Sep 2009 20:22:18 -0400
Subject: [PATCH 019/120] GUI TXT Output: Correct import.

---
 src/calibre/gui2/convert/txt_output.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/gui2/convert/txt_output.py b/src/calibre/gui2/convert/txt_output.py
index c2474ac4b8..eca85f1292 100644
--- a/src/calibre/gui2/convert/txt_output.py
+++ b/src/calibre/gui2/convert/txt_output.py
@@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
 
 from calibre.gui2.convert.txt_output_ui import Ui_Form
 from calibre.gui2.convert import Widget
-from calibre.ebooks.txt.writer import TxtNewlines
+from calibre.ebooks.txt.newlines import TxtNewlines
 from calibre.gui2.widgets import BasicComboModel
 
 newline_model = None

From 392d9033b5689df1a002c7f2ce1c340300985a8f Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Wed, 2 Sep 2009 21:04:17 -0400
Subject: [PATCH 020/120] Implement bug #3232: Ask the user if they want to
 reconvert already converted files.

---
 src/calibre/gui2/tools.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/calibre/gui2/tools.py b/src/calibre/gui2/tools.py
index 91a25ca426..a6dec46ab2 100644
--- a/src/calibre/gui2/tools.py
+++ b/src/calibre/gui2/tools.py
@@ -12,7 +12,7 @@ import cPickle
 from PyQt4.Qt import QDialog
 
 from calibre.ptempfile import PersistentTemporaryFile
-from calibre.gui2 import warning_dialog
+from calibre.gui2 import warning_dialog, question_dialog
 from calibre.gui2.convert.single import NoSupportedInputFormats
 from calibre.gui2.convert.single import Config as SingleConfig
 from calibre.gui2.convert.bulk import BulkConfig
@@ -42,6 +42,9 @@ def convert_single_ebook(parent, db, book_ids, auto_conversion=False, out_format
                 result = d.exec_()
 
             if result == QDialog.Accepted:
+                if not convert_existing(parent, db, [book_id], d.output_format):
+                    continue
+
                 mi = db.get_metadata(book_id, True)
                 in_file = db.format_abspath(book_id, d.input_format, True)
 
@@ -100,6 +103,7 @@ def convert_bulk_ebook(parent, db, book_ids, out_format=None):
     output_format = d.output_format
     recs = cPickle.loads(d.recommendations)
 
+    book_ids = convert_existing(parent, db, book_ids, output_format)
     for i, book_id in enumerate(book_ids):
         temp_files = []
 
@@ -186,4 +190,15 @@ def fetch_scheduled_recipe(recipe, script):
 
     return 'gui_convert', args, _('Fetch news from ')+recipe.title, fmt.upper(), [pt]
 
+def convert_existing(parent, db, book_ids, output_format):
+    already_converted_ids = []
+    already_converted_titles = []
+    for book_id in book_ids:
+        if db.has_format(book_id, output_format, index_is_id=True):
+            already_converted_ids.append(book_id)
+            already_converted_titles.append(db.get_metadata(book_id, True).title)
 
+    if not question_dialog(parent, _('Convert existing'), _('The following books have already been converted to %s format. Do you wish to reconvert them?' % output_format), '\n'.join(already_converted_titles)):
+        book_ids = [x for x in book_ids if x not in already_converted_ids]
+
+    return book_ids

From ebb2cad20aefbd3602327a14b6f63194b293db72 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Thu, 3 Sep 2009 07:22:25 -0400
Subject: [PATCH 021/120] Fix bug #2913: crop PDF works.

---
 src/calibre/ebooks/pdf/manipulate/crop.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/calibre/ebooks/pdf/manipulate/crop.py b/src/calibre/ebooks/pdf/manipulate/crop.py
index de18dc66dc..8957320280 100644
--- a/src/calibre/ebooks/pdf/manipulate/crop.py
+++ b/src/calibre/ebooks/pdf/manipulate/crop.py
@@ -1,5 +1,4 @@
 # -*- coding: utf-8 -*-
-from __future__ import with_statement
 
 __license__   = 'GPL v3'
 __copyright__ = '2009, James Beal <james_@catbus.co.uk>, ' \
@@ -10,7 +9,9 @@ __docformat__ = 'restructuredtext en'
 Crop a pdf file
 '''
 
-import sys, re
+import sys
+import re
+from decimal import Decimal
 from optparse import OptionGroup, Option
 
 from calibre.ebooks.metadata.meta import metadata_from_formats
@@ -108,11 +109,11 @@ def crop_pdf(pdf_path, opts, metadata=None):
             mo = bounding_regex.search(blines.next())
             if mo == None:
                 raise Exception('Error in bounding file %s' % opts.bounding)
-            page.mediaBox.upperRight = (float(mo.group('top_x')), float(mo.group('top_y')))
-            page.mediaBox.lowerLeft  = (float(mo.group('bottom_x')), float(mo.group('bottom_y')))
+            page.mediaBox.upperRight = (float(mo.group('top_x')), Decimal(mo.group('top_y')))
+            page.mediaBox.lowerLeft  = (float(mo.group('bottom_x')), Decimal(mo.group('bottom_y')))
         else:
-            page.mediaBox.upperRight = (page.bleedBox.getUpperRight_x() - float(opts.top_right_x), page.bleedBox.getUpperRight_y() - float(opts.top_right_y))
-            page.mediaBox.lowerLeft  = (page.bleedBox.getLowerLeft_x() + float(opts.bottom_left_x), page.bleedBox.getLowerLeft_y() + float(opts.bottom_left_y))
+            page.mediaBox.upperRight = (page.bleedBox.getUpperRight_x() - Decimal(opts.top_right_x), page.bleedBox.getUpperRight_y() - Decimal(opts.top_right_y))
+            page.mediaBox.lowerLeft  = (page.bleedBox.getLowerLeft_x() + Decimal(opts.bottom_left_x), page.bleedBox.getLowerLeft_y() + Decimal(opts.bottom_left_y))
         output_pdf.addPage(page)
 
     with open(opts.output, 'wb') as output_file:

From ca124fba0354d92c1caf2402fad11d923e5dc3bc Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Fri, 4 Sep 2009 21:54:30 -0400
Subject: [PATCH 022/120] Fix bug #3405: PML small caps makes characters
 captial.

---
 src/calibre/ebooks/pml/pmlconverter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index 1c3c749f76..2c97f509fa 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -34,7 +34,7 @@ PML_HTML_RULES = [
     (re.compile(r'\\B(?P<text>.*?)\\B', re.DOTALL), lambda match: '<b>%s</b>' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\Sp(?P<text>.*?)\\Sp', re.DOTALL), lambda match: '<sup>%s</sup>' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\Sb(?P<text>.*?)\\Sb', re.DOTALL), lambda match: '<sub>%s</sub>' % match.group('text') if match.group('text') else ''),
-    (re.compile(r'\\k(?P<text>.*?)\\k', re.DOTALL), lambda match: '<span style="font-size: 50%%">%s</span>' % match.group('text') if match.group('text') else ''),
+    (re.compile(r'\\k(?P<text>.*?)\\k', re.DOTALL), lambda match: '<span style="font-size: 50%%">%s</span>' % match.group('text').upper() if match.group('text') else ''),
     (re.compile(r'\\a(?P<num>\d\d\d)'), lambda match: '&#%s;' % match.group('num')),
     (re.compile(r'\\U(?P<num>\d\d\d\d)'), lambda match: '%s' % my_unichr(int(match.group('num'), 16))),
     (re.compile(r'\\m="(?P<name>.+?)"'), lambda match: '<img src="images/%s" />' % image_name(match.group('name')).strip('\x00')),

From 5196f6d006d4c8a3c52aa8f7e7f8f353ab1456a9 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 5 Sep 2009 08:12:27 -0400
Subject: [PATCH 023/120] Only ask to convert existing when there are already
 converted books.

---
 src/calibre/gui2/tools.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/calibre/gui2/tools.py b/src/calibre/gui2/tools.py
index 7e1655b425..5670031e68 100644
--- a/src/calibre/gui2/tools.py
+++ b/src/calibre/gui2/tools.py
@@ -198,10 +198,11 @@ def convert_existing(parent, db, book_ids, output_format):
             already_converted_ids.append(book_id)
             already_converted_titles.append(db.get_metadata(book_id, True).title)
 
-    if not question_dialog(parent, _('Convert existing'),
-            _('The following books have already been converted to %s format. '
-               'Do you wish to reconvert them?') % output_format,
-            '\n'.join(already_converted_titles)):
-        book_ids = [x for x in book_ids if x not in already_converted_ids]
+    if already_converted_ids:
+        if not question_dialog(parent, _('Convert existing'),
+                _('The following books have already been converted to %s format. '
+                   'Do you wish to reconvert them?') % output_format,
+                '\n'.join(already_converted_titles)):
+            book_ids = [x for x in book_ids if x not in already_converted_ids]
 
     return book_ids

From 0bba611fe0d763fa6323962c304416448527ee9a Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 5 Sep 2009 08:44:32 -0400
Subject: [PATCH 024/120] Fine tune scrolling in the viewer.

---
 src/calibre/gui2/viewer/documentview.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/src/calibre/gui2/viewer/documentview.py b/src/calibre/gui2/viewer/documentview.py
index 3cda941be6..b8dcbb8a7f 100644
--- a/src/calibre/gui2/viewer/documentview.py
+++ b/src/calibre/gui2/viewer/documentview.py
@@ -541,11 +541,6 @@ class DocumentView(QWebView):
             if self.manager is not None:
                 self.manager.scrolled(self.scroll_fraction)
 
-    def wheel_event(self, down=True):
-        QWebView.wheelEvent(self,
-            QWheelEvent(QPoint(100, 100), (-120 if down else 120),
-                        Qt.NoButton, Qt.NoModifier))
-
     def next_page(self):
         delta_y = self.document.window_height - 25
         if self.document.at_bottom:
@@ -654,9 +649,9 @@ class DocumentView(QWebView):
             else:
                 self.scroll_to(1)
         elif key in [Qt.Key_J]:
-            self.wheel_event()
+            self.scroll_by(y=15)
         elif key in [Qt.Key_K]:
-            self.wheel_event(down=False)
+            self.scroll_by(y=-15)
         elif key in [Qt.Key_H]:
             self.scroll_by(x=-15)
         elif key in [Qt.Key_L]:

From ec48f4029ba346bc10be45644bfbe7feefe0629a Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 5 Sep 2009 16:48:32 -0400
Subject: [PATCH 025/120] GUI: Regex builder for removing header and footer.

---
 src/calibre/gui2/convert/__init__.py          |   6 +-
 src/calibre/gui2/convert/regex_builder.py     | 138 ++++++++++++++++++
 src/calibre/gui2/convert/regex_builder.ui     |  80 ++++++++++
 .../gui2/convert/structure_detection.py       |   9 +-
 .../gui2/convert/structure_detection.ui       |  45 ++----
 .../convert/{xpath_edit.ui => xexp_edit.ui}   |   0
 src/calibre/gui2/convert/xpath_wizard.py      |   2 +-
 7 files changed, 247 insertions(+), 33 deletions(-)
 create mode 100644 src/calibre/gui2/convert/regex_builder.py
 create mode 100644 src/calibre/gui2/convert/regex_builder.ui
 rename src/calibre/gui2/convert/{xpath_edit.ui => xexp_edit.ui} (100%)

diff --git a/src/calibre/gui2/convert/__init__.py b/src/calibre/gui2/convert/__init__.py
index bbab3a1edd..a96008b1c3 100644
--- a/src/calibre/gui2/convert/__init__.py
+++ b/src/calibre/gui2/convert/__init__.py
@@ -77,6 +77,7 @@ class Widget(QWidget):
 
     def get_value(self, g):
         from calibre.gui2.convert.xpath_wizard import XPathEdit
+        from calibre.gui2.convert.regex_builder import RegexEdit
         ret = self.get_value_handler(g)
         if ret != 'this is a dummy return value, xcswx1avcx4x':
             return ret
@@ -94,12 +95,15 @@ class Widget(QWidget):
             return bool(g.isChecked())
         elif isinstance(g, XPathEdit):
             return g.xpath if g.xpath else None
+        elif isinstance(g, RegexEdit):
+            return g.regex if g.regex else None
         else:
             raise Exception('Can\'t get value from %s'%type(g))
 
 
     def set_value(self, g, val):
         from calibre.gui2.convert.xpath_wizard import XPathEdit
+        from calibre.gui2.convert.regex_builder import RegexEdit
         if self.set_value_handler(g, val):
             return
         if isinstance(g, (QSpinBox, QDoubleSpinBox)):
@@ -116,7 +120,7 @@ class Widget(QWidget):
             g.setCurrentIndex(idx)
         elif isinstance(g, QCheckBox):
             g.setCheckState(Qt.Checked if bool(val) else Qt.Unchecked)
-        elif isinstance(g, XPathEdit):
+        elif isinstance(g, (XPathEdit, RegexEdit)):
             g.edit.setText(val if val else '')
         else:
             raise Exception('Can\'t set value %s in %s'%(repr(val),
diff --git a/src/calibre/gui2/convert/regex_builder.py b/src/calibre/gui2/convert/regex_builder.py
new file mode 100644
index 0000000000..07c63de93a
--- /dev/null
+++ b/src/calibre/gui2/convert/regex_builder.py
@@ -0,0 +1,138 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import re
+
+from PyQt4.QtCore import SIGNAL, Qt
+from PyQt4.QtGui import QDialog, QWidget, QDialogButtonBox, QFileDialog, \
+    QBrush, QSyntaxHighlighter, QTextCharFormat
+
+from calibre.gui2.convert.regex_builder_ui import Ui_RegexBuilder
+from calibre.gui2.convert.xexp_edit_ui import Ui_Form as Ui_Edit
+from calibre.gui2 import qstring_to_unicode
+from calibre.gui2 import error_dialog
+from calibre.ebooks.oeb.iterator import EbookIterator
+from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
+
+class RegexHighlighter(QSyntaxHighlighter):
+
+    def __init__(self, *args):
+        QSyntaxHighlighter.__init__(self, *args)
+
+        self.regex = u''
+
+    def update_regex(self, regex):
+        self.regex = qstring_to_unicode(regex)
+        self.rehighlight()
+
+    def highlightBlock(self, text):
+        valid_regex = True
+        text = qstring_to_unicode(text)
+        format = QTextCharFormat()
+        format.setBackground(QBrush(Qt.yellow))
+
+        if self.regex:
+            try:
+                for mo in re.finditer(self.regex, text):
+                    self.setFormat(mo.start(), mo.end() - mo.start(), format)
+            except:
+                valid_regex = False
+        self.emit(SIGNAL('regex_valid(PyQt_PyObject)'), valid_regex)
+
+class RegexBuilder(QDialog, Ui_RegexBuilder):
+
+    def __init__(self, db, book_id, regex, *args):
+        QDialog.__init__(self, *args)
+        self.setupUi(self)
+
+        self.regex.setText(regex)
+        self.regex_valid(True)
+        self.highlighter = RegexHighlighter(self.preview.document())
+
+        if not db or not book_id:
+            self.button_box.addButton(QDialogButtonBox.Open)
+        else:
+            self.select_format(db, book_id)
+
+        self.connect(self.button_box, SIGNAL('clicked(QAbstractButton*)'), self.button_clicked)
+        self.connect(self.regex, SIGNAL('textChanged(QString)'), self.highlighter.update_regex)
+        self.connect(self.highlighter, SIGNAL('regex_valid(PyQt_PyObject)'), self.regex_valid)
+
+    def regex_valid(self, valid):
+        if valid:
+            self.regex.setStyleSheet('QLineEdit { color: black; background-color: white; }')
+        else:
+            self.regex.setStyleSheet('QLineEdit { color: black; background-color: rgb(255,0,0,20%); }')
+
+    def select_format(self, db, book_id):
+        format = None
+        formats = db.formats(book_id, index_is_id=True).upper().split(',')
+        if len(formats) == 1:
+            format = formats[0]
+        elif len(formats) > 1:
+            d = ChooseFormatDialog(self, _('Choose the format to view'), formats)
+            d.exec_()
+            if d.result() == QDialog.Accepted:
+                format = d.format()
+
+        if not format:
+            error_dialog(self, _('No formats available'), _('Cannot build regex using the GUI builder without a book.'))
+            QDialog.reject()
+        else:
+            self.open_book(db.format_abspath(book_id, format, index_is_id=True))
+
+    def open_book(self, pathtoebook):
+        self.iterator = EbookIterator(pathtoebook)
+        self.iterator.__enter__()
+        text = [u'']
+        for path in self.iterator.spine:
+            html = open(path, 'rb').read().decode(path.encoding, 'replace')
+            text.append(html)
+        self.preview.setPlainText('\n\n'.join(text))
+
+    def button_clicked(self, button):
+        if button == self.button_box.button(QDialogButtonBox.Open):
+            name = QFileDialog.getOpenFileName(self, _('Open book'), _('~'))
+            if name:
+                self.open_book(qstring_to_unicode(name))
+        if button == self.button_box.button(QDialogButtonBox.Ok):
+            self.accept()
+
+class RegexEdit(QWidget, Ui_Edit):
+
+    def __init__(self, parent=None):
+        QWidget.__init__(self, parent)
+        self.setupUi(self)
+
+        self.book_id = None
+        self.db = None
+
+        self.connect(self.button, SIGNAL('clicked()'), self.builder)
+
+    def builder(self):
+        bld = RegexBuilder(self.db, self.book_id, self.edit.text(), self)
+        if bld.exec_() == bld.Accepted:
+            self.edit.setText(bld.regex.text())
+
+    def set_msg(self, msg):
+        self.msg.setText(msg)
+
+    def set_book_id(self, book_id):
+        self.book_id = book_id
+
+    def set_db(self, db):
+        self.db = db
+
+    @property
+    def text(self):
+        return unicode(self.edit.text())
+
+    @property
+    def regex(self):
+        return self.text
+
+    def check(self):
+        return True
diff --git a/src/calibre/gui2/convert/regex_builder.ui b/src/calibre/gui2/convert/regex_builder.ui
new file mode 100644
index 0000000000..3448c4dded
--- /dev/null
+++ b/src/calibre/gui2/convert/regex_builder.ui
@@ -0,0 +1,80 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ui version="4.0">
+ <class>RegexBuilder</class>
+ <widget class="QDialog" name="RegexBuilder">
+  <property name="geometry">
+   <rect>
+    <x>0</x>
+    <y>0</y>
+    <width>662</width>
+    <height>505</height>
+   </rect>
+  </property>
+  <property name="windowTitle">
+   <string>Regex Builder</string>
+  </property>
+  <layout class="QGridLayout" name="gridLayout">
+   <item row="1" column="0" colspan="2">
+    <widget class="QGroupBox" name="groupBox">
+     <property name="title">
+      <string>Preview</string>
+     </property>
+     <layout class="QVBoxLayout" name="verticalLayout">
+      <item>
+       <widget class="QPlainTextEdit" name="preview">
+        <property name="undoRedoEnabled">
+         <bool>false</bool>
+        </property>
+        <property name="readOnly">
+         <bool>true</bool>
+        </property>
+        <property name="textInteractionFlags">
+         <set>Qt::TextSelectableByMouse</set>
+        </property>
+       </widget>
+      </item>
+     </layout>
+    </widget>
+   </item>
+   <item row="2" column="1">
+    <widget class="QDialogButtonBox" name="button_box">
+     <property name="orientation">
+      <enum>Qt::Horizontal</enum>
+     </property>
+     <property name="standardButtons">
+      <set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
+     </property>
+    </widget>
+   </item>
+   <item row="0" column="0">
+    <widget class="QLabel" name="label">
+     <property name="text">
+      <string>Regex:</string>
+     </property>
+    </widget>
+   </item>
+   <item row="0" column="1">
+    <widget class="QLineEdit" name="regex"/>
+   </item>
+  </layout>
+ </widget>
+ <resources/>
+ <connections>
+  <connection>
+   <sender>button_box</sender>
+   <signal>rejected()</signal>
+   <receiver>RegexBuilder</receiver>
+   <slot>reject()</slot>
+   <hints>
+    <hint type="sourcelabel">
+     <x>316</x>
+     <y>260</y>
+    </hint>
+    <hint type="destinationlabel">
+     <x>286</x>
+     <y>274</y>
+    </hint>
+   </hints>
+  </connection>
+ </connections>
+</ui>
diff --git a/src/calibre/gui2/convert/structure_detection.py b/src/calibre/gui2/convert/structure_detection.py
index 00c5e30d6b..65e6a1d62a 100644
--- a/src/calibre/gui2/convert/structure_detection.py
+++ b/src/calibre/gui2/convert/structure_detection.py
@@ -34,11 +34,18 @@ class StructureDetectionWidget(Widget, Ui_Form):
         self.opt_chapter.set_msg(_('Detect chapters at (XPath expression):'))
         self.opt_page_breaks_before.set_msg(_('Insert page breaks before '
             '(XPath expression):'))
+        self.opt_header_regex.set_msg(_('Header regular expression:'))
+        self.opt_header_regex.set_book_id(book_id)
+        self.opt_header_regex.set_db(db)
+        self.opt_footer_regex.set_msg(_('Footer regular expression:'))
+        self.opt_footer_regex.set_book_id(book_id)
+        self.opt_footer_regex.set_db(db)
+
     def pre_commit_check(self):
         for x in ('header_regex', 'footer_regex'):
             x = getattr(self, 'opt_'+x)
             try:
-                pat = unicode(x.text())
+                pat = unicode(x.regex)
                 re.compile(pat)
             except Exception, err:
                 error_dialog(self, _('Invalid regular expression'),
diff --git a/src/calibre/gui2/convert/structure_detection.ui b/src/calibre/gui2/convert/structure_detection.ui
index 6952abce96..e4414473f5 100644
--- a/src/calibre/gui2/convert/structure_detection.ui
+++ b/src/calibre/gui2/convert/structure_detection.ui
@@ -28,8 +28,7 @@
     </widget>
    </item>
    <item row="1" column="1">
-    <widget class="QComboBox" name="opt_chapter_mark">
-    </widget>
+    <widget class="QComboBox" name="opt_chapter_mark"/>
    </item>
    <item row="2" column="0">
     <widget class="QCheckBox" name="opt_remove_first_image">
@@ -45,27 +44,17 @@
      </property>
     </widget>
    </item>
-   <item row="8" column="0">
-    <widget class="QLabel" name="label_3">
-     <property name="text">
-      <string>&amp;Footer regular expression:</string>
-     </property>
-     <property name="buddy">
-      <cstring>opt_footer_regex</cstring>
-     </property>
-    </widget>
-   </item>
-   <item row="10" column="0" colspan="2">
+   <item row="8" column="0" colspan="2">
     <widget class="QCheckBox" name="opt_preprocess_html">
      <property name="text">
       <string>&amp;Preprocess input file to possibly improve structure detection</string>
      </property>
     </widget>
    </item>
-   <item row="11" column="0" colspan="2">
+   <item row="9" column="0" colspan="2">
     <widget class="XPathEdit" name="opt_page_breaks_before" native="true"/>
    </item>
-   <item row="12" column="0">
+   <item row="10" column="0">
     <spacer name="verticalSpacer">
      <property name="orientation">
       <enum>Qt::Vertical</enum>
@@ -78,17 +67,7 @@
      </property>
     </spacer>
    </item>
-   <item row="5" column="0">
-    <widget class="QLabel" name="label_2">
-     <property name="text">
-      <string>&amp;Header regular expression:</string>
-     </property>
-     <property name="buddy">
-      <cstring>opt_header_regex</cstring>
-     </property>
-    </widget>
-   </item>
-   <item row="7" column="0">
+   <item row="6" column="0">
     <widget class="QCheckBox" name="opt_remove_footer">
      <property name="text">
       <string>Remove F&amp;ooter</string>
@@ -102,11 +81,11 @@
      </property>
     </widget>
    </item>
-   <item row="9" column="0" colspan="2">
-    <widget class="QLineEdit" name="opt_footer_regex"/>
+   <item row="5" column="0" colspan="2">
+    <widget class="RegexEdit" name="opt_header_regex" native="true"/>
    </item>
-   <item row="6" column="0" colspan="2">
-    <widget class="QLineEdit" name="opt_header_regex"/>
+   <item row="7" column="0" colspan="2">
+    <widget class="RegexEdit" name="opt_footer_regex" native="true"/>
    </item>
   </layout>
  </widget>
@@ -117,6 +96,12 @@
    <header>convert/xpath_wizard.h</header>
    <container>1</container>
   </customwidget>
+  <customwidget>
+   <class>RegexEdit</class>
+   <extends>QWidget</extends>
+   <header>regex_builder.h</header>
+   <container>1</container>
+  </customwidget>
  </customwidgets>
  <resources/>
  <connections/>
diff --git a/src/calibre/gui2/convert/xpath_edit.ui b/src/calibre/gui2/convert/xexp_edit.ui
similarity index 100%
rename from src/calibre/gui2/convert/xpath_edit.ui
rename to src/calibre/gui2/convert/xexp_edit.ui
diff --git a/src/calibre/gui2/convert/xpath_wizard.py b/src/calibre/gui2/convert/xpath_wizard.py
index 9b8e44ddaa..ef42a876d3 100644
--- a/src/calibre/gui2/convert/xpath_wizard.py
+++ b/src/calibre/gui2/convert/xpath_wizard.py
@@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
 from PyQt4.Qt import QDialog, QWidget, SIGNAL, Qt, QDialogButtonBox, QVBoxLayout
 
 from calibre.gui2.convert.xpath_wizard_ui import Ui_Form
-from calibre.gui2.convert.xpath_edit_ui import Ui_Form as Ui_Edit
+from calibre.gui2.convert.xexp_edit_ui import Ui_Form as Ui_Edit
 
 
 class WizardWidget(QWidget, Ui_Form):

From 2de625b3e1355737a54185deb3bb31ee6a56ddd8 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 6 Sep 2009 12:59:21 -0400
Subject: [PATCH 026/120] Implement #3418: Handle print style formatting for
 TXT input.

---
 src/calibre/ebooks/pdb/input.py          |  6 ++++++
 src/calibre/ebooks/pdb/palmdoc/reader.py |  9 ++++++---
 src/calibre/ebooks/pdb/ztxt/reader.py    |  9 ++++++---
 src/calibre/ebooks/txt/input.py          | 13 +++++++++++--
 src/calibre/ebooks/txt/processor.py      |  6 +++++-
 src/calibre/gui2/convert/pdb_input.py    |  2 +-
 src/calibre/gui2/convert/pdb_input.ui    |  9 ++++++++-
 src/calibre/gui2/convert/txt_input.py    |  2 +-
 src/calibre/gui2/convert/txt_input.ui    | 13 ++++++++++---
 9 files changed, 54 insertions(+), 15 deletions(-)

diff --git a/src/calibre/ebooks/pdb/input.py b/src/calibre/ebooks/pdb/input.py
index 3ad1a6121c..8f4751b42b 100644
--- a/src/calibre/ebooks/pdb/input.py
+++ b/src/calibre/ebooks/pdb/input.py
@@ -22,6 +22,12 @@ class PDBInput(InputFormatPlugin):
             help=_('Normally calibre treats blank lines as paragraph markers. '
                 'With this option it will assume that every line represents '
                 'a paragraph instead.')),
+        OptionRecommendation(name='print_formatted_paras', recommended_value=False,
+            help=_('Normally calibre treats blank lines as paragraph markers. '
+                'With this option it will assume that every line starting with '
+                'an indent (either a tab or 2+ spaces) represents a paragraph.'
+                'Paragraphs end when the next line that starts with an indent '
+                'is reached.')),
     ])
 
     def convert(self, stream, options, file_ext, log,
diff --git a/src/calibre/ebooks/pdb/palmdoc/reader.py b/src/calibre/ebooks/pdb/palmdoc/reader.py
index 8992382597..aaa121f1ed 100644
--- a/src/calibre/ebooks/pdb/palmdoc/reader.py
+++ b/src/calibre/ebooks/pdb/palmdoc/reader.py
@@ -13,8 +13,8 @@ import struct
 
 from calibre.ebooks.compression.palmdoc import decompress_doc
 from calibre.ebooks.pdb.formatreader import FormatReader
-from calibre.ebooks.txt.processor import convert_basic, separate_paragraphs, \
-    opf_writer
+from calibre.ebooks.txt.processor import convert_basic, opf_writer, \
+    separate_paragraphs_single_line
 
 class HeaderRecord(object):
     '''
@@ -36,6 +36,7 @@ class Reader(FormatReader):
         self.log = log
         self.encoding = options.input_encoding
         self.single_line_paras = options.single_line_paras
+        self.print_formatted_paras = options.print_formatted_paras
 
         self.sections = []
         for i in range(header.num_sections):
@@ -63,7 +64,9 @@ class Reader(FormatReader):
 
         self.log.info('Converting text to OEB...')
         if self.single_line_paras:
-            txt = separate_paragraphs(txt)
+            txt = separate_paragraphs_single_line(txt)
+        if self.print_formatted_paras:
+            txt = separate_paragraphs_print_formatted(txt)
         html = convert_basic(txt)
         with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
             index.write(html.encode('utf-8'))
diff --git a/src/calibre/ebooks/pdb/ztxt/reader.py b/src/calibre/ebooks/pdb/ztxt/reader.py
index 664f498bee..4379159d81 100644
--- a/src/calibre/ebooks/pdb/ztxt/reader.py
+++ b/src/calibre/ebooks/pdb/ztxt/reader.py
@@ -12,8 +12,8 @@ import os, struct, zlib
 
 from calibre.ebooks.pdb.formatreader import FormatReader
 from calibre.ebooks.pdb.ztxt import zTXTError
-from calibre.ebooks.txt.processor import convert_basic, separate_paragraphs, \
-    opf_writer
+from calibre.ebooks.txt.processor import convert_basic, opf_writer, \
+    separate_paragraphs_single_line
 
 SUPPORTED_VERSION = (1, 40)
 
@@ -40,6 +40,7 @@ class Reader(FormatReader):
         self.log = log
         self.encoding = options.input_encoding
         self.single_line_paras = options.single_line_paras
+        self.print_formatted_paras = options.print_formatted_paras
     
         self.sections = []
         for i in range(header.num_sections):
@@ -79,7 +80,9 @@ class Reader(FormatReader):
 
         self.log.info('Converting text to OEB...')
         if self.single_line_paras:
-            txt = separate_paragraphs(txt)
+            txt = separate_paragraphs_single_line(txt)
+        if self.print_formatted_paras:
+            txt = separate_paragraphs_print_formatted(txt)
         html = convert_basic(txt)
         with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
             index.write(html.encode('utf-8'))
diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py
index f52bfa6fb5..eb86113f7a 100644
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@@ -8,7 +8,7 @@ import os
 
 from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
 from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
-    separate_paragraphs
+    separate_paragraphs_single_line, separate_paragraphs_print_formatted
 
 class TXTInput(InputFormatPlugin):
 
@@ -22,6 +22,12 @@ class TXTInput(InputFormatPlugin):
             help=_('Normally calibre treats blank lines as paragraph markers. '
                 'With this option it will assume that every line represents '
                 'a paragraph instead.')),
+        OptionRecommendation(name='print_formatted_paras', recommended_value=False,
+            help=_('Normally calibre treats blank lines as paragraph markers. '
+                'With this option it will assume that every line starting with '
+                'an indent (either a tab or 2+ spaces) represents a paragraph.'
+                'Paragraphs end when the next line that starts with an indent '
+                'is reached.')),
         OptionRecommendation(name='markdown', recommended_value=False,
             help=_('Run the text input through the markdown pre-processor. To '
                 'learn more about markdown see')+' http://daringfireball.net/projects/markdown/'),
@@ -35,8 +41,11 @@ class TXTInput(InputFormatPlugin):
         log.debug('Reading text from file...')
         txt = stream.read().decode(ienc, 'replace')
 
+        # Adjust paragraph formatting as requested
         if options.single_line_paras:
-            txt = separate_paragraphs(txt)
+            txt = separate_paragraphs_single_line(txt)
+        if options.print_formatted_paras:
+            txt = separate_paragraphs_print_formatted(txt)
 
         if options.markdown:
             log.debug('Running text though markdown conversion...')
diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py
index f6503c0bc5..9d0e1283c1 100644
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@@ -45,12 +45,16 @@ def convert_markdown(txt, title=''):
         safe_mode=False,)
     return HTML_TEMPLATE % (title, md.convert(txt))
 
-def separate_paragraphs(txt):
+def separate_paragraphs_single_line(txt):
     txt = txt.replace('\r\n', '\n')
     txt = txt.replace('\r', '\n')
     txt = re.sub(u'(?<=.)\n(?=.)', u'\n\n', txt)
     return txt
 
+def separate_paragraphs_print_formatted(txt):
+    txt = re.sub('(?miu)^(\t+|[ ]{2,})(?=.)', '\n\t', txt)
+    return txt
+
 def opf_writer(path, opf_name, manifest, spine, mi):
     opf = OPFCreator(path, mi)
     opf.create_manifest(manifest)
diff --git a/src/calibre/gui2/convert/pdb_input.py b/src/calibre/gui2/convert/pdb_input.py
index 4b0ba73fda..058f589856 100644
--- a/src/calibre/gui2/convert/pdb_input.py
+++ b/src/calibre/gui2/convert/pdb_input.py
@@ -14,6 +14,6 @@ class PluginWidget(Widget, Ui_Form):
 
     def __init__(self, parent, get_option, get_help, db=None, book_id=None):
         Widget.__init__(self, parent, 'pdb_input',
-            ['single_line_paras'])
+            ['single_line_paras', 'print_formatted_paras'])
         self.db, self.book_id = db, book_id
         self.initialize_options(get_option, get_help, db, book_id)
diff --git a/src/calibre/gui2/convert/pdb_input.ui b/src/calibre/gui2/convert/pdb_input.ui
index 191e749833..2b632b1a33 100644
--- a/src/calibre/gui2/convert/pdb_input.ui
+++ b/src/calibre/gui2/convert/pdb_input.ui
@@ -14,7 +14,7 @@
    <string>Form</string>
   </property>
   <layout class="QGridLayout" name="gridLayout">
-   <item row="1" column="0">
+   <item row="2" column="0">
     <spacer name="verticalSpacer">
      <property name="orientation">
       <enum>Qt::Vertical</enum>
@@ -34,6 +34,13 @@
      </property>
     </widget>
    </item>
+   <item row="1" column="0">
+    <widget class="QCheckBox" name="opt_print_formatted_paras">
+     <property name="text">
+      <string>Assume print formatting</string>
+     </property>
+    </widget>
+   </item>
   </layout>
  </widget>
  <resources/>
diff --git a/src/calibre/gui2/convert/txt_input.py b/src/calibre/gui2/convert/txt_input.py
index 3d17eefe0d..505a916f81 100644
--- a/src/calibre/gui2/convert/txt_input.py
+++ b/src/calibre/gui2/convert/txt_input.py
@@ -14,6 +14,6 @@ class PluginWidget(Widget, Ui_Form):
 
     def __init__(self, parent, get_option, get_help, db=None, book_id=None):
         Widget.__init__(self, parent, 'txt_input',
-            ['single_line_paras', 'markdown'])
+            ['single_line_paras', 'print_formatted_paras', 'markdown'])
         self.db, self.book_id = db, book_id
         self.initialize_options(get_option, get_help, db, book_id)
diff --git a/src/calibre/gui2/convert/txt_input.ui b/src/calibre/gui2/convert/txt_input.ui
index 8c22ff721e..94cc1ad25f 100644
--- a/src/calibre/gui2/convert/txt_input.ui
+++ b/src/calibre/gui2/convert/txt_input.ui
@@ -14,7 +14,7 @@
    <string>Form</string>
   </property>
   <layout class="QGridLayout" name="gridLayout">
-   <item row="3" column="0">
+   <item row="4" column="0">
     <spacer name="verticalSpacer">
      <property name="orientation">
       <enum>Qt::Vertical</enum>
@@ -34,14 +34,14 @@
      </property>
     </widget>
    </item>
-   <item row="1" column="0">
+   <item row="2" column="0">
     <widget class="QCheckBox" name="opt_markdown">
      <property name="text">
       <string>Process using markdown</string>
      </property>
     </widget>
    </item>
-   <item row="2" column="0">
+   <item row="3" column="0">
     <widget class="QLabel" name="label">
      <property name="text">
       <string>&lt;p&gt;Markdown is a simple markup language for text files, that allows for advanced formatting. To learn more visit &lt;a href=&quot;http://daringfireball.net/projects/markdown&quot;&gt;markdown&lt;/a&gt;.</string>
@@ -51,6 +51,13 @@
      </property>
     </widget>
    </item>
+   <item row="1" column="0">
+    <widget class="QCheckBox" name="opt_print_formatted_paras">
+     <property name="text">
+      <string>Assume print formatting</string>
+     </property>
+    </widget>
+   </item>
   </layout>
  </widget>
  <resources/>

From bd1b37f3a96bbd4a24bc760ca543ec0e5d7278a9 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 6 Sep 2009 14:09:43 -0400
Subject: [PATCH 027/120] Add missing import. Allow TXT output to split lines
 based on a maximum line length value.

---
 src/calibre/ebooks/pdb/palmdoc/reader.py |  2 +-
 src/calibre/ebooks/pdb/ztxt/reader.py    |  2 +-
 src/calibre/ebooks/txt/output.py         | 11 ++++++++
 src/calibre/ebooks/txt/txtml.py          | 34 ++++++++++++++++++++++++
 src/calibre/gui2/convert/txt_output.py   |  5 ++--
 src/calibre/gui2/convert/txt_output.ui   | 28 ++++++++++++++++---
 6 files changed, 74 insertions(+), 8 deletions(-)

diff --git a/src/calibre/ebooks/pdb/palmdoc/reader.py b/src/calibre/ebooks/pdb/palmdoc/reader.py
index aaa121f1ed..0a57e3f51a 100644
--- a/src/calibre/ebooks/pdb/palmdoc/reader.py
+++ b/src/calibre/ebooks/pdb/palmdoc/reader.py
@@ -14,7 +14,7 @@ import struct
 from calibre.ebooks.compression.palmdoc import decompress_doc
 from calibre.ebooks.pdb.formatreader import FormatReader
 from calibre.ebooks.txt.processor import convert_basic, opf_writer, \
-    separate_paragraphs_single_line
+    separate_paragraphs_single_line, separate_paragraphs_print_formatted
 
 class HeaderRecord(object):
     '''
diff --git a/src/calibre/ebooks/pdb/ztxt/reader.py b/src/calibre/ebooks/pdb/ztxt/reader.py
index 4379159d81..86fb9d868c 100644
--- a/src/calibre/ebooks/pdb/ztxt/reader.py
+++ b/src/calibre/ebooks/pdb/ztxt/reader.py
@@ -13,7 +13,7 @@ import os, struct, zlib
 from calibre.ebooks.pdb.formatreader import FormatReader
 from calibre.ebooks.pdb.ztxt import zTXTError
 from calibre.ebooks.txt.processor import convert_basic, opf_writer, \
-    separate_paragraphs_single_line
+    separate_paragraphs_single_line, separate_paragraphs_print_formatted
 
 SUPPORTED_VERSION = (1, 40)
 
diff --git a/src/calibre/ebooks/txt/output.py b/src/calibre/ebooks/txt/output.py
index b3bda7fa9d..32bde90fe8 100644
--- a/src/calibre/ebooks/txt/output.py
+++ b/src/calibre/ebooks/txt/output.py
@@ -36,6 +36,17 @@ class TXTOutput(OutputFormatPlugin):
         OptionRecommendation(name='flush_paras',
             recommended_value=False, level=OptionRecommendation.LOW,
             help=_('Do not add a blank line between paragraphs.')),
+        OptionRecommendation(name='max_line_length',
+            recommended_value=0, level=OptionRecommendation.LOW,
+            help=_('The maximum number of characters per line. This splits on '
+            'the first space before the specified value. If no space is found '
+            'the line will be broken at the space after and will exceed the '
+            'specified value. Also, there is a minimum of 25 characters. '
+            'Use 0 to disable line splitting.')),
+        OptionRecommendation(name='force_max_line_length',
+            recommended_value=False, level=OptionRecommendation.LOW,
+            help=_('Force splitting on the max-line-length value when no space '
+            'is present. Also allows max-line-length to be below the minimum')),
         OptionRecommendation(name='indent_paras',
             recommended_value=False, level=OptionRecommendation.LOW,
             help=_('Add a tab at the beginning of each paragraph.')),
diff --git a/src/calibre/ebooks/txt/txtml.py b/src/calibre/ebooks/txt/txtml.py
index c705bcf221..63a5cdc8af 100644
--- a/src/calibre/ebooks/txt/txtml.py
+++ b/src/calibre/ebooks/txt/txtml.py
@@ -105,6 +105,40 @@ class TXTMLizer(object):
         if self.opts.indent_paras:
             text = re.sub('(?imu)^(?=.)', '\t', text)
 
+        if self.opts.max_line_length:
+            max_length = self.opts.max_line_length
+            if self.opts.max_line_length < 25 and not self.opts.force_max_line_length:
+                max_length = 25
+            short_lines = []
+            lines = text.splitlines()
+            for line in lines:
+                while len(line) > max_length:
+                    space = line.rfind(' ', 0, max_length)
+                    if space != -1:
+                        # Space was found.
+                        short_lines.append(line[:space])
+                        line = line[space + 1:]
+                    else:
+                        # Space was not found.
+                        if self.opts.force_max_line_length:
+                            # Force breaking at max_lenght.
+                            short_lines.append(line[:max_length])
+                            line = line[max_length:]
+                        else:
+                            # Look for the first space after max_length.
+                            space = line.find(' ', max_length, len(line))
+                            if space != -1:
+                                # Space was found.
+                                short_lines.append(line[:space])
+                                line = line[space + 1:]
+                            else:
+                                # No space was found cannot break line.
+                                short_lines.append(line)
+                                line = ''
+                # Add the text that was less than max_lengh to the list
+                short_lines.append(line)
+            text = '\n'.join(short_lines)
+
         return text
 
     def dump_text(self, elem, stylizer, end=''):
diff --git a/src/calibre/gui2/convert/txt_output.py b/src/calibre/gui2/convert/txt_output.py
index eca85f1292..2fc7f19908 100644
--- a/src/calibre/gui2/convert/txt_output.py
+++ b/src/calibre/gui2/convert/txt_output.py
@@ -17,8 +17,9 @@ class PluginWidget(Widget, Ui_Form):
     HELP = _('Options specific to')+' TXT '+_('output')
 
     def __init__(self, parent, get_option, get_help, db=None, book_id=None):
-        Widget.__init__(self, parent, 'txt_output', ['newline', 'inline_toc',
-        'flush_paras', 'indent_paras'])
+        Widget.__init__(self, parent, 'txt_output',
+        ['newline', 'max_line_length', 'force_max_line_length',
+        'inline_toc', 'flush_paras', 'indent_paras'])
         self.db, self.book_id = db, book_id
         self.initialize_options(get_option, get_help, db, book_id)
 
diff --git a/src/calibre/gui2/convert/txt_output.ui b/src/calibre/gui2/convert/txt_output.ui
index 900198aca9..8e5429b0ce 100644
--- a/src/calibre/gui2/convert/txt_output.ui
+++ b/src/calibre/gui2/convert/txt_output.ui
@@ -27,7 +27,7 @@
    <item row="0" column="1">
     <widget class="QComboBox" name="opt_newline"/>
    </item>
-   <item row="4" column="0">
+   <item row="6" column="0">
     <spacer name="verticalSpacer">
      <property name="orientation">
       <enum>Qt::Vertical</enum>
@@ -40,27 +40,47 @@
      </property>
     </spacer>
    </item>
-   <item row="1" column="0" colspan="2">
+   <item row="3" column="0" colspan="2">
     <widget class="QCheckBox" name="opt_inline_toc">
      <property name="text">
       <string>&amp;Inline TOC</string>
      </property>
     </widget>
    </item>
-   <item row="2" column="0" colspan="2">
+   <item row="4" column="0" colspan="2">
     <widget class="QCheckBox" name="opt_flush_paras">
      <property name="text">
       <string>Do not add a blank line between paragraphs.</string>
      </property>
     </widget>
    </item>
-   <item row="3" column="0" colspan="2">
+   <item row="5" column="0" colspan="2">
     <widget class="QCheckBox" name="opt_indent_paras">
      <property name="text">
       <string>Add a tab at the beginning of each paragraph</string>
      </property>
     </widget>
    </item>
+   <item row="1" column="1">
+    <widget class="QSpinBox" name="opt_max_line_length"/>
+   </item>
+   <item row="1" column="0">
+    <widget class="QLabel" name="label_2">
+     <property name="text">
+      <string>&amp;Maximum line length:</string>
+     </property>
+     <property name="buddy">
+      <cstring>opt_max_line_length</cstring>
+     </property>
+    </widget>
+   </item>
+   <item row="2" column="0" colspan="2">
+    <widget class="QCheckBox" name="opt_force_max_line_length">
+     <property name="text">
+      <string>Force maximum line lenght</string>
+     </property>
+    </widget>
+   </item>
   </layout>
  </widget>
  <resources/>

From 6d585f30904cb6e9d31100366f4b9edd712dae9d Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Mon, 7 Sep 2009 10:34:36 -0400
Subject: [PATCH 028/120] Fix bug #3424: Load defaults and book specific
 options in GUI bulk convert.

---
 src/calibre/gui2/tools.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/src/calibre/gui2/tools.py b/src/calibre/gui2/tools.py
index 5670031e68..52273adbd3 100644
--- a/src/calibre/gui2/tools.py
+++ b/src/calibre/gui2/tools.py
@@ -18,6 +18,8 @@ from calibre.gui2.convert.single import Config as SingleConfig
 from calibre.gui2.convert.bulk import BulkConfig
 from calibre.customize.conversion import OptionRecommendation
 from calibre.utils.config import prefs
+from calibre.ebooks.conversion.config import GuiRecommendations, \
+    load_defaults, load_specifics, save_specifics
 
 def convert_single_ebook(parent, db, book_ids, auto_conversion=False, out_format=None):
     changed = False
@@ -56,7 +58,6 @@ def convert_single_ebook(parent, db, book_ids, auto_conversion=False, out_format
                 desc = _('Convert book %d of %d (%s)') % (i + 1, total, repr(mi.title))
 
                 recs = cPickle.loads(d.recommendations)
-                args = [in_file, out_file.name, recs]
                 if d.opf_file is not None:
                     recs.append(('read_metadata_from_opf', d.opf_file.name,
                         OptionRecommendation.HIGH))
@@ -65,6 +66,7 @@ def convert_single_ebook(parent, db, book_ids, auto_conversion=False, out_format
                     recs.append(('cover', d.cover_file.name,
                         OptionRecommendation.HIGH))
                     temp_files.append(d.cover_file)
+                args = [in_file, out_file.name, recs]
                 temp_files.append(out_file)
                 jobs.append(('gui_convert', args, desc, d.output_format.upper(), book_id, temp_files))
 
@@ -101,7 +103,7 @@ def convert_bulk_ebook(parent, db, book_ids, out_format=None):
         return jobs, changed, bad
 
     output_format = d.output_format
-    recs = cPickle.loads(d.recommendations)
+    user_recs = cPickle.loads(d.recommendations)
 
     book_ids = convert_existing(parent, db, book_ids, output_format)
     for i, book_id in enumerate(book_ids):
@@ -119,7 +121,17 @@ def convert_bulk_ebook(parent, db, book_ids, out_format=None):
             out_file.close()
             temp_files = []
 
-            lrecs = list(recs)
+            combined_recs = GuiRecommendations()
+            default_recs = load_defaults('%s_input' % d.input_format)
+            specific_recs = load_specifics(db, book_id)
+            for key in default_recs:
+                combined_recs[key] = default_recs[key]
+            for key in specific_recs:
+                combined_recs[key] = specific_recs[key]
+            for item in user_recs:
+                combined_recs[item[0]] = item[1]
+            save_specifics(db, book_id, combined_recs)
+            lrecs = list(combined_recs.to_recommendations())
 
             if d.opf_file is not None:
                 lrecs.append(('read_metadata_from_opf', d.opf_file.name,

From 9cf5d0a5f8c053eb9ab41779f980c1ce9cc0891b Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Wed, 9 Sep 2009 06:52:55 -0400
Subject: [PATCH 029/120] Update prs descriptions.

---
 src/calibre/devices/prs505/driver.py | 6 +++---
 src/calibre/devices/prs700/driver.py | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/calibre/devices/prs505/driver.py b/src/calibre/devices/prs505/driver.py
index 07260ca31f..58ad12c078 100644
--- a/src/calibre/devices/prs505/driver.py
+++ b/src/calibre/devices/prs505/driver.py
@@ -21,16 +21,16 @@ from calibre import __appname__
 
 class PRS505(CLI, Device):
 
-    name           = 'PRS-505 Device Interface'
+    name           = 'PRS-300/505 Device Interface'
     gui_name       = 'SONY Pocket Edition'
-    description    = _('Communicate with the Sony PRS-505 eBook reader.')
+    description    = _('Communicate with the Sony PRS-300/505 eBook reader.')
     author         = _('Kovid Goyal and John Schember')
     supported_platforms = ['windows', 'osx', 'linux']
 
     FORMATS      = ['epub', 'lrf', 'lrx', 'rtf', 'pdf', 'txt']
 
     VENDOR_ID    = [0x054c]   #: SONY Vendor Id
-    PRODUCT_ID   = [0x031e]   #: Product Id for the PRS-505
+    PRODUCT_ID   = [0x031e]   #: Product Id for the PRS 300 and 505
     BCD          = [0x229, 0x1000]
 
     VENDOR_NAME        = 'SONY'
diff --git a/src/calibre/devices/prs700/driver.py b/src/calibre/devices/prs700/driver.py
index 804f400c8a..807e04dc26 100644
--- a/src/calibre/devices/prs700/driver.py
+++ b/src/calibre/devices/prs700/driver.py
@@ -13,8 +13,8 @@ import re
 
 class PRS700(PRS505):
 
-    name           = 'PRS-700 Device Interface'
-    description    = _('Communicate with the Sony PRS-700 eBook reader.')
+    name           = 'PRS-600/700 Device Interface'
+    description    = _('Communicate with the Sony PRS-600/700 eBook reader.')
     author         = _('Kovid Goyal and John Schember')
     gui_name       = 'SONY Touch edition'
     supported_platforms = ['windows', 'osx', 'linux']

From a1f05532cea92b6498ff471e9ede03e807673189 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 12 Sep 2009 20:04:29 -0400
Subject: [PATCH 030/120] Untested fix for bug #3472

---
 src/calibre/ebooks/pdb/output.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/calibre/ebooks/pdb/output.py b/src/calibre/ebooks/pdb/output.py
index 06f2f20d10..3b4065213e 100644
--- a/src/calibre/ebooks/pdb/output.py
+++ b/src/calibre/ebooks/pdb/output.py
@@ -47,6 +47,11 @@ class PDBOutput(OutputFormatPlugin):
         if Writer is None:
             raise PDBError('No writer available for format %s.' % format)
 
+        setattr(opts, 'flush_paras', False)
+        setattr(opts, 'max_line_length', 0)
+        setattr(opts, 'force_max_line_length', False)
+        setattr(opts, 'indent_paras', False)
+
         writer = Writer(opts, log)
 
         out_stream.seek(0)

From 9991c41fcbc30e5643e60dcf5733c349c7987ba6 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Mon, 21 Sep 2009 17:26:37 -0400
Subject: [PATCH 031/120] TXT Output: Fix specified_newlines to change the line
 ending type correctly.

---
 src/calibre/ebooks/txt/newlines.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/txt/newlines.py b/src/calibre/ebooks/txt/newlines.py
index ae766a216f..d7e97654b4 100644
--- a/src/calibre/ebooks/txt/newlines.py
+++ b/src/calibre/ebooks/txt/newlines.py
@@ -19,7 +19,11 @@ class TxtNewlines(object):
         self.newline = self.NEWLINE_TYPES.get(newline_type.lower(), os.linesep)
 
 def specified_newlines(newline, text):
+    # Convert all newlines to \n
+    text = text.replace('\r\n', '\n')
+    text = text.replace('\r', '\n')
+
     if newline == '\n':
         return text
 
-    return text.replace(os.linesep, newline)
+    return text.replace('\n', newline)

From 6e600b0e56d7153bc13eaa277fa5cd529ebafbcb Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Tue, 22 Sep 2009 17:22:18 -0400
Subject: [PATCH 032/120] Don't read invalid data from title in PDB header.

---
 src/calibre/ebooks/pdb/header.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/pdb/header.py b/src/calibre/ebooks/pdb/header.py
index 3436c262d8..753c5e29b9 100644
--- a/src/calibre/ebooks/pdb/header.py
+++ b/src/calibre/ebooks/pdb/header.py
@@ -30,7 +30,7 @@ class PdbHeaderReader(object):
 
     def name(self):
         self.stream.seek(0)
-        return self.stream.read(32).replace('\x00', '')
+        return re.sub('[^-A-Za-z0-9 ]+', '_', self.stream.read(32).replace('\x00', ''))
 
     def full_section_info(self, number):
         if number not in range(0, self.num_sections):

From 0956c683f031ea7e61435db6fd1fdbe312fb9e75 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Tue, 22 Sep 2009 20:28:04 -0400
Subject: [PATCH 033/120] TXT Output: Table support.

---
 src/calibre/ebooks/txt/txtml.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/calibre/ebooks/txt/txtml.py b/src/calibre/ebooks/txt/txtml.py
index 63a5cdc8af..6957e53d43 100644
--- a/src/calibre/ebooks/txt/txtml.py
+++ b/src/calibre/ebooks/txt/txtml.py
@@ -26,12 +26,18 @@ BLOCK_TAGS = [
     'h5',
     'h6',
     'li',
+    'tr',
 ]
 
 BLOCK_STYLES = [
     'block',
 ]
 
+SPACE_TAGS = [
+    'span',
+    'td',
+]
+
 class TXTMLizer(object):
 
     def __init__(self, log):
@@ -170,6 +176,10 @@ class TXTMLizer(object):
             if not end.endswith(u'\n\n') and hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
                 text.append(u'\n\n')
 
+        if tag in SPACE_TAGS:
+            if not end.endswith('u ') and hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
+                text.append(u' ')
+
         # Process tags that contain text.
         if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
             text.append(elem.text)

From 72fd1a1d590dfd59193039da0c039f0234d33cff Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Wed, 23 Sep 2009 19:51:28 -0400
Subject: [PATCH 034/120] Fix bug #3567: Limit text length used for TXT
 metadata.

---
 src/calibre/ebooks/metadata/txt.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/calibre/ebooks/metadata/txt.py b/src/calibre/ebooks/metadata/txt.py
index 8dbc0c1453..79713774e3 100644
--- a/src/calibre/ebooks/metadata/txt.py
+++ b/src/calibre/ebooks/metadata/txt.py
@@ -22,6 +22,8 @@ def get_metadata(stream, extract_cover=True):
         else:
             mdata += line
 
+    mdata = mdata[:100]
+
     mo = re.search('(?u)^[ ]*(?P<title>.+)[ ]*(\n{3}|(\r\n){3}|\r{3})[ ]*(?P<author>.+)[ ]*(\n|\r\n|\r)$', mdata)
     if mo != None:
         mi.title = mo.group('title')

From fd65635a014095cb418c7d8eaba851b716edb6c3 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Wed, 30 Sep 2009 08:24:41 -0400
Subject: [PATCH 035/120] Fix bug #3620: Don't put a space before span tags.

---
 src/calibre/ebooks/txt/txtml.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/calibre/ebooks/txt/txtml.py b/src/calibre/ebooks/txt/txtml.py
index 6957e53d43..59c3ea671a 100644
--- a/src/calibre/ebooks/txt/txtml.py
+++ b/src/calibre/ebooks/txt/txtml.py
@@ -34,7 +34,6 @@ BLOCK_STYLES = [
 ]
 
 SPACE_TAGS = [
-    'span',
     'td',
 ]
 

From 7eba89285e33d729c0c26da4d454b4233a1a6346 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 3 Oct 2009 13:33:35 -0400
Subject: [PATCH 036/120] Possible fix for style and images not included in
 produced pdf files on Windows.

---
 src/calibre/ebooks/pdf/writer.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/pdf/writer.py b/src/calibre/ebooks/pdf/writer.py
index b87aba7bc0..bf11394c02 100644
--- a/src/calibre/ebooks/pdf/writer.py
+++ b/src/calibre/ebooks/pdf/writer.py
@@ -111,7 +111,8 @@ class PDFWriter(QObject):
 
         self.logger.debug('Processing %s...' % item)
 
-        self.view.load(QUrl(item))
+        #self.view.load(QUrl(item))
+        self.view.setHtml(open(item, 'r+b').read().decode('utf-8'), QUrl(item))
 
     def _render_html(self, ok):
         if ok:

From d6984b83c3b4240ecece296bfc8d0fac751c1efd Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 3 Oct 2009 13:46:22 -0400
Subject: [PATCH 037/120] Use fromLocalFile with QUrl.

---
 src/calibre/ebooks/pdf/writer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/pdf/writer.py b/src/calibre/ebooks/pdf/writer.py
index bf11394c02..e10c13d7d3 100644
--- a/src/calibre/ebooks/pdf/writer.py
+++ b/src/calibre/ebooks/pdf/writer.py
@@ -112,7 +112,7 @@ class PDFWriter(QObject):
         self.logger.debug('Processing %s...' % item)
 
         #self.view.load(QUrl(item))
-        self.view.setHtml(open(item, 'r+b').read().decode('utf-8'), QUrl(item))
+        self.view.setHtml(open(item, 'r+b').read().decode('utf-8'), QUrl.fromLocalFile(item))
 
     def _render_html(self, ok):
         if ok:

From 62317ca8cfa72d773c092cd5bbedd9b76ee09520 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Thu, 8 Oct 2009 08:38:40 -0400
Subject: [PATCH 038/120] Fix FB2 output.

---
 src/calibre/ebooks/fb2/fb2ml.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py
index d2e90b6d44..ff914568d2 100644
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@@ -30,6 +30,7 @@ TAG_MAP = {
     'i' : 'emphasis',
     'p' : 'p',
     'li' : 'p',
+    'br' : 'empty-line',
 }
 
 TAG_SPACE = [
@@ -74,6 +75,7 @@ class FB2MLizer(object):
         output.append(self.fb2mlize_images())
         output.append(self.fb2_footer())
         output = ''.join(output).replace(u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk', self.get_toc())
+        return output
         return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode, pretty_print=True)
 
     def fb2_header(self):
@@ -112,7 +114,7 @@ class FB2MLizer(object):
             item = self.oeb_book.manifest.hrefs[href]
             if item.spine_position is None:
                 stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
-                output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
+                output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
         return output
 
     def get_toc(self):
@@ -151,7 +153,7 @@ class FB2MLizer(object):
         if aid not in self.link_hrefs.keys():
             self.link_hrefs[aid] = 'calibre_link-%s' % len(self.link_hrefs.keys())
         aid = self.link_hrefs[aid]
-        return '<v id="%s"></v>' % aid
+        return '<a id="%s" />' % aid
 
     def fb2mlize_images(self):
         images = [u'']
@@ -204,6 +206,7 @@ class FB2MLizer(object):
             href = elem.get('href')
             if href:
                 href = prepare_string_for_xml(page.abshref(href))
+                href = href.replace('"', '&quot;')
                 if '://' in href:
                     fb2_text.append('<a xlink:href="%s">' % href)
                 else:
@@ -240,7 +243,10 @@ class FB2MLizer(object):
                 fb2_text.append(' ')
 
         if hasattr(elem, 'text') and elem.text != None:
-            fb2_text.append(prepare_string_for_xml(elem.text))
+            if 'p' not in tag_stack:
+                fb2_text.append('<p>%s</p>' % prepare_string_for_xml(elem.text))
+            else:
+                fb2_text.append(prepare_string_for_xml(elem.text))
 
         for item in elem:
             fb2_text += self.dump_text(item, stylizer, page, tag_stack)

From 61e8c4222a6c7d8628ddfb156b7fb995b2179eea Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 10 Oct 2009 16:35:31 -0400
Subject: [PATCH 039/120] Add XML header to fb2 output.

---
 src/calibre/ebooks/fb2/fb2ml.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py
index ff914568d2..aaf8361b99 100644
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@@ -75,7 +75,6 @@ class FB2MLizer(object):
         output.append(self.fb2mlize_images())
         output.append(self.fb2_footer())
         output = ''.join(output).replace(u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk', self.get_toc())
-        return output
         return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode, pretty_print=True)
 
     def fb2_header(self):

From 552735c41eb6c8974025c2e47b4eea96b21c93fa Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 10 Oct 2009 23:38:00 -0400
Subject: [PATCH 040/120] Update eReader PDB code to produce files that are
 closer to what DropBook produces: Set the text size record, Write image size,
 Handle 1.5.2 sidebar and footnote changes. Fix PML output stripping backslash
 character.

---
 src/calibre/ebooks/pdb/ereader/reader132.py |   1 -
 src/calibre/ebooks/pdb/ereader/writer.py    | 111 ++++++++++++++------
 src/calibre/ebooks/pml/pmlconverter.py      |   5 +-
 3 files changed, 80 insertions(+), 37 deletions(-)

diff --git a/src/calibre/ebooks/pdb/ereader/reader132.py b/src/calibre/ebooks/pdb/ereader/reader132.py
index 98dbe13790..49fdfb8980 100644
--- a/src/calibre/ebooks/pdb/ereader/reader132.py
+++ b/src/calibre/ebooks/pdb/ereader/reader132.py
@@ -34,7 +34,6 @@ class HeaderRecord(object):
         self.has_metadata, = struct.unpack('>H', raw[24:26])
         self.footnote_rec, = struct.unpack('>H', raw[28:30])
         self.sidebar_rec, = struct.unpack('>H', raw[30:32])
-        self.bookmark_offset, = struct.unpack('>H', raw[32:34])
         self.image_data_offset, = struct.unpack('>H', raw[40:42])
         self.metadata_offset, = struct.unpack('>H', raw[44:46])
         self.footnote_offset, = struct.unpack('>H', raw[48:50])
diff --git a/src/calibre/ebooks/pdb/ereader/writer.py b/src/calibre/ebooks/pdb/ereader/writer.py
index 2f4e3bf16f..8a88c6a689 100644
--- a/src/calibre/ebooks/pdb/ereader/writer.py
+++ b/src/calibre/ebooks/pdb/ereader/writer.py
@@ -28,7 +28,7 @@ IDENTITY = 'PNRdPPrs'
 
 # This is an arbitrary number that is small enough to work. The actual maximum
 # record size is unknown.
-MAX_RECORD_SIZE = 3560
+MAX_RECORD_SIZE = 8192
 
 class Writer(FormatWriter):
 
@@ -37,13 +37,28 @@ class Writer(FormatWriter):
         self.log = log
 
     def write_content(self, oeb_book, out_stream, metadata=None):
-        text, image_hrefs = self._text(oeb_book)
+        text, image_hrefs, text_sizes = self._text(oeb_book)
         images = self._images(oeb_book.manifest, image_hrefs)
         metadata = [self._metadata(metadata)]
-
         hr = [self._header_record(len(text), len(images))]
 
-        sections = hr+text+images+metadata+['MeTaInFo\x00']
+        '''
+        Record order as generated by Dropbook.
+            1. eReader Header
+            2. Compressed text
+            3. Small font page index
+            4. Large font page index
+            5. Chapter index
+            6. Links index
+            7. Images
+            8. (Extrapolation: there should be one more record type here though yet uncovered what it might be).
+            9. Metadata
+           10. Sidebar records
+           11. Footnote records
+           12. Text block size record
+           13. "MeTaInFo\x00" word record
+        '''
+        sections = hr+text+images+metadata+[text_sizes]+['MeTaInFo\x00']
 
         lengths = [len(i) if i not in images else len(i[0]) + len(i[1]) for i in sections]
 
@@ -62,12 +77,38 @@ class Writer(FormatWriter):
         pml = unicode(pmlmlizer.extract_content(oeb_book, self.opts)).encode('cp1252', 'replace')
 
         pml_pages = []
-        for i in range(0, (len(pml) / MAX_RECORD_SIZE) + 1):
-            pml_pages.append(zlib.compress(pml[i * MAX_RECORD_SIZE : (i * MAX_RECORD_SIZE) + MAX_RECORD_SIZE]))
+        text_sizes = ''
+        index = 0
+        while index < len(pml):
+            '''
+            Split on the space character closest to MAX_RECORD_SIZE when possible.
+            '''
+            split = pml.rfind(' ', index, MAX_RECORD_SIZE)
+            if split == -1:
+                len_end = len(pml[index:])
+                if len_end > MAX_RECORD_SIZE:
+                    split = MAX_RECORD_SIZE
+                else:
+                    split = len_end
+            if split == 0:
+                split = 1
+            pml_pages.append(zlib.compress(pml[index:index+split]))
+            text_sizes += struct.pack('>H', split)
+            index += split
 
-        return pml_pages, pmlmlizer.image_hrefs
+        return pml_pages, pmlmlizer.image_hrefs, text_sizes
 
     def _images(self, manifest, image_hrefs):
+        '''
+        Image format.
+
+        0-4   : 'PNG '. There must be a space after PNG.
+        4-36  : Image name. Must be exactly 32 bytes long. Pad with \x00 for names shorter than 32 bytes
+        36-58 : Unknown.
+        58-60 : Width.
+        60-62 : Height.
+        62-...: Raw image data in 8 bit PNG format.
+        '''
         images = []
 
         for item in manifest:
@@ -82,6 +123,8 @@ class Writer(FormatWriter):
 
                     header = 'PNG '
                     header += image_hrefs[item.href].ljust(32, '\x00')[:32]
+                    header = header.ljust(58, '\x00')
+                    header += struct.pack('>HH', im.size[0], im.size[1])
                     header = header.ljust(62, '\x00')
 
                     if len(data) + len(header) < 65505:
@@ -126,7 +169,7 @@ class Writer(FormatWriter):
         text_items = the number of text pages
         image_items = the number of images
         '''
-        version = 10 # Zlib compression
+        compression = 10 # zlib compression.
         non_text_offset = text_items + 1
 
         if image_items > 0:
@@ -140,33 +183,33 @@ class Writer(FormatWriter):
 
         record = ''
 
-        record += struct.pack('>H', version)                # [0:2]    # Version. Specifies compression and drm. 2 = palmdoc, 10 = zlib. 260 and 272 = DRM
-        record += struct.pack('>H', 0)                      # [2:4]
-        record += struct.pack('>H', 0)                      # [4:6]
+        record += struct.pack('>H', compression)            # [0:2]    # Compression. Specifies compression and drm. 2 = palmdoc, 10 = zlib. 260 and 272 = DRM
+        record += struct.pack('>H', 0)                      # [2:4]    # Unknown.
+        record += struct.pack('>H', 0)                      # [4:6]    # Unknown.
         record += struct.pack('>H', 25152)                  # [6:8]    # 25152 is MAGIC. Somehow represents the cp1252 encoding of the text
-        record += struct.pack('>H', 0)                      # [8:10]
-        record += struct.pack('>H', 0)                      # [10:12]
-        record += struct.pack('>H', non_text_offset)        # [12:14]  # non_text_offset
-        record += struct.pack('>H', 0)                      # [14:16]
-        record += struct.pack('>H', 0)                      # [16:18]
-        record += struct.pack('>H', 0)                      # [18:20]
-        record += struct.pack('>H', image_items)            # [20:22]  # Number of images
-        record += struct.pack('>H', 0)                      # [22:24]
-        record += struct.pack('>H', 1)                      # [24:26]  # 1 if has metadata, 0 if not
-        record += struct.pack('>H', 0)                      # [26:28]
-        record += struct.pack('>H', 0)                      # [28:30]  # footnote_rec
-        record += struct.pack('>H', 0)                      # [30:32]  # sidebar_rec
-        record += struct.pack('>H', last_data_offset)       # [32:34]  # bookmark_offset
-        record += struct.pack('>H', 2560)                   # [34:36]  # 2560 is MAGIC
-        record += struct.pack('>H', 0)                      # [36:38]
-        record += struct.pack('>H', 0)                      # [38:40]
-        record += struct.pack('>H', image_data_offset)      # [40:42]  # image_data_offset. This will be the last data offset if there are no images
-        record += struct.pack('>H', 0)                      # [42:44]
-        record += struct.pack('>H', meta_data_offset)       # [44:46]  # meta_data_offset. This will be the last data offset if there are no images
-        record += struct.pack('>H', 0)                      # [46:48]
-        record += struct.pack('>H', last_data_offset)       # [48:50]  # footnote_offset. This will be the last data offset if there are no images
-        record += struct.pack('>H', last_data_offset)       # [50:52]  # sidebar_offset. This will be the last data offset if there are no images
-        record += struct.pack('>H', last_data_offset)       # [52:54]  # last_data_offset
+        record += struct.pack('>H', 0)                      # [8:10]   # Number of small font pages. 0 if page index is not built.
+        record += struct.pack('>H', 0)                      # [10:12]  # Number of large font pages. 0 if page index is not built.
+        record += struct.pack('>H', non_text_offset)        # [12:14]  # Non-Text record start.
+        record += struct.pack('>H', 0)                      # [14:16]  # Number of chapter index records.
+        record += struct.pack('>H', 0)                      # [16:18]  # Number of small font page index records.
+        record += struct.pack('>H', 0)                      # [18:20]  # Number of large font page index records.
+        record += struct.pack('>H', image_items)            # [20:22]  # Number of images.
+        record += struct.pack('>H', 0)                      # [22:24]  # Number of links.
+        record += struct.pack('>H', 1)                      # [24:26]  # 1 if has metadata, 0 if not.
+        record += struct.pack('>H', 0)                      # [26:28]  # Unknown.
+        record += struct.pack('>H', 0)                      # [28:30]  # Number of Footnotes.
+        record += struct.pack('>H', 0)                      # [30:32]  # Number of Sidebars.
+        record += struct.pack('>H', last_data_offset)       # [32:34]  # Chapter index offset.
+        record += struct.pack('>H', 2560)                   # [34:36]  # 2560 is MAGIC.
+        record += struct.pack('>H', last_data_offset)       # [36:38]  # Small font page offset. This will be the last data offset if there are none.
+        record += struct.pack('>H', last_data_offset)       # [38:40]  # Large font page offset. This will be the last data offset if there are none.
+        record += struct.pack('>H', image_data_offset)      # [40:42]  # Image offset. This will be the last data offset if there are none.
+        record += struct.pack('>H', image_data_offset)      # [42:44]  # Links offset. This will be the last data offset if there are none.
+        record += struct.pack('>H', meta_data_offset)       # [44:46]  # Metadata offset. This will be the last data offset if there are none.
+        record += struct.pack('>H', 0)                      # [46:48]  # Unknown.
+        record += struct.pack('>H', last_data_offset)       # [48:50]  # Footnote offset. This will be the last data offset if there are none.
+        record += struct.pack('>H', last_data_offset)       # [50:52]  # Sidebar offset. This will be the last data offset if there are none.
+        record += struct.pack('>H', last_data_offset)       # [52:54]  # Last data offset.
 
         for i in range(54, 132, 2):
             record += struct.pack('>H', 0)                  # [54:132]
diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index b4ab238da9..ca7721350c 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -64,7 +64,7 @@ PML_HTML_RULES = [
     (re.compile(r'(?<=[^\\])\\Sp'), lambda match: ''),
     (re.compile(r'(?<=[^\\])\\Sb'), lambda match: ''),
     # Remove invalid single item pml codes.
-    (re.compile(r'(?<=[^\\])\\.'), lambda match: ''),
+    (re.compile(r'(?<=[^\\])\\[^\\]'), lambda match: ''),
 
     # Replace \\ with \.
     (re.compile(r'\\\\'), lambda match: '\\'),
@@ -78,6 +78,7 @@ def pml_to_html(pml):
     return html
 
 def footnote_sidebar_to_html(id, pml):
+    if id.startswith('\x01'):
+        id = id[2:]
     html = '<div id="sidebar-%s"><dt>%s</dt></div><dd>%s</dd>' % (id, id, pml_to_html(pml))
     return html
-

From 4b2f26f123b5d0ee0172c4960e3442adedeecb07 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 11 Oct 2009 08:48:07 -0400
Subject: [PATCH 041/120] PML input cleanup. Generate chapter and link index
 with eReader PDB output.

---
 src/calibre/ebooks/pdb/ereader/writer.py | 93 ++++++++++++++++++------
 src/calibre/ebooks/pml/pmlconverter.py   |  6 +-
 2 files changed, 74 insertions(+), 25 deletions(-)

diff --git a/src/calibre/ebooks/pdb/ereader/writer.py b/src/calibre/ebooks/pdb/ereader/writer.py
index 8a88c6a689..1a172ea07d 100644
--- a/src/calibre/ebooks/pdb/ereader/writer.py
+++ b/src/calibre/ebooks/pdb/ereader/writer.py
@@ -8,6 +8,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 
+import re
 import struct
 import zlib
 
@@ -37,10 +38,15 @@ class Writer(FormatWriter):
         self.log = log
 
     def write_content(self, oeb_book, out_stream, metadata=None):
-        text, image_hrefs, text_sizes = self._text(oeb_book)
-        images = self._images(oeb_book.manifest, image_hrefs)
+        pmlmlizer = PMLMLizer(self.log)
+        pml = unicode(pmlmlizer.extract_content(oeb_book, self.opts)).encode('cp1252', 'replace')
+
+        text, text_sizes = self._text(pml)
+        chapter_index = self._chapter_index(pml)
+        link_index = self._link_index(pml)
+        images = self._images(oeb_book.manifest, pmlmlizer.image_hrefs)
         metadata = [self._metadata(metadata)]
-        hr = [self._header_record(len(text), len(images))]
+        hr = [self._header_record(len(text), len(chapter_index.split('\x00')), len(link_index.split('\x00')), len(images))]
 
         '''
         Record order as generated by Dropbook.
@@ -58,7 +64,7 @@ class Writer(FormatWriter):
            12. Text block size record
            13. "MeTaInFo\x00" word record
         '''
-        sections = hr+text+images+metadata+[text_sizes]+['MeTaInFo\x00']
+        sections = hr+text+[chapter_index]+[link_index]+images+metadata+[text_sizes]+['MeTaInFo\x00']
 
         lengths = [len(i) if i not in images else len(i[0]) + len(i[1]) for i in sections]
 
@@ -72,10 +78,7 @@ class Writer(FormatWriter):
             else:
                 out_stream.write(item)
 
-    def _text(self, oeb_book):
-        pmlmlizer = PMLMLizer(self.log)
-        pml = unicode(pmlmlizer.extract_content(oeb_book, self.opts)).encode('cp1252', 'replace')
-
+    def _text(self, pml):
         pml_pages = []
         text_sizes = ''
         index = 0
@@ -96,7 +99,38 @@ class Writer(FormatWriter):
             text_sizes += struct.pack('>H', split)
             index += split
 
-        return pml_pages, pmlmlizer.image_hrefs, text_sizes
+        return pml_pages, text_sizes
+
+    def _index_item(self, mo):
+        index = ''
+        if 'text' in mo.groupdict().keys():
+            index += struct.pack('>L', mo.start('text'))
+            # Strip all PML tags from text
+            text = re.sub(r'[^\\]\\[^\\]', '', mo.group('text'))
+            text = re.sub(r'\\\\', r'\\', mo.group('text'))
+            if 'val' in mo.groupdict().keys():
+                text = '%s%s' % ('\x20' * 4 * int(mo.group('val')), text)
+            index += text
+            index += '\x00'
+        return index
+
+    def _chapter_index(self, pml):
+        chapter_marks = [
+            r'(?s)\\x(?P<text>.+?)\\x',
+            r'(?s)\\X(?P<val>[0-4])(?P<text>.*?)\\X[0-4]',
+            r'(?s)\\C(?P<val>\d)="(?P<text>.+?)"',
+        ]
+        index = ''
+        for chapter_mark in chapter_marks:
+            for mo in re.finditer(chapter_mark, pml):
+                index += self._index_item(mo)
+        return index
+
+    def _link_index(self, pml):
+        index = ''
+        for mo in re.finditer(r'(?s)\\Q="(?P<text>.+?)"', pml):
+            index += self._index_item(mo)
+        return index
 
     def _images(self, manifest, image_hrefs):
         '''
@@ -164,23 +198,38 @@ class Writer(FormatWriter):
 
         return '%s\x00%s\x00%s\x00%s\x00%s\x00' % (title, author, copyright, publisher, isbn)
 
-    def _header_record(self, text_items, image_items):
+    def _header_record(self, text_count, chapter_count, link_count, image_count):
         '''
-        text_items = the number of text pages
-        image_items = the number of images
+        text_count = the number of text pages
+        image_count = the number of images
         '''
         compression = 10 # zlib compression.
-        non_text_offset = text_items + 1
+        non_text_offset = text_count + 1
 
-        if image_items > 0:
-            image_data_offset = text_items + 1
-            meta_data_offset = image_data_offset + image_items
+        if chapter_count > 0:
+            chapter_offset = text_count + 1
+        else:
+            chapter_offset = text_count
+
+        if link_count > 0:
+            link_offset = chapter_offset + 1
+        else:
+            link_offset = chapter_offset
+
+        if image_count > 0:
+            image_data_offset = link_offset + 1
+            meta_data_offset = image_data_offset + image_count
             last_data_offset = meta_data_offset + 1
         else:
-            meta_data_offset = text_items + 1
+            meta_data_offset = link_offset + 1
             last_data_offset = meta_data_offset + 1
             image_data_offset = last_data_offset
 
+        if chapter_count <= 0:
+            chapter_offset = last_data_offset
+        if link_count <= 0:
+            link_offset = last_data_offset
+
         record = ''
 
         record += struct.pack('>H', compression)            # [0:2]    # Compression. Specifies compression and drm. 2 = palmdoc, 10 = zlib. 260 and 272 = DRM
@@ -190,21 +239,21 @@ class Writer(FormatWriter):
         record += struct.pack('>H', 0)                      # [8:10]   # Number of small font pages. 0 if page index is not built.
         record += struct.pack('>H', 0)                      # [10:12]  # Number of large font pages. 0 if page index is not built.
         record += struct.pack('>H', non_text_offset)        # [12:14]  # Non-Text record start.
-        record += struct.pack('>H', 0)                      # [14:16]  # Number of chapter index records.
+        record += struct.pack('>H', chapter_count)          # [14:16]  # Number of chapter index records.
         record += struct.pack('>H', 0)                      # [16:18]  # Number of small font page index records.
         record += struct.pack('>H', 0)                      # [18:20]  # Number of large font page index records.
-        record += struct.pack('>H', image_items)            # [20:22]  # Number of images.
-        record += struct.pack('>H', 0)                      # [22:24]  # Number of links.
+        record += struct.pack('>H', image_count)            # [20:22]  # Number of images.
+        record += struct.pack('>H', link_count)             # [22:24]  # Number of links.
         record += struct.pack('>H', 1)                      # [24:26]  # 1 if has metadata, 0 if not.
         record += struct.pack('>H', 0)                      # [26:28]  # Unknown.
         record += struct.pack('>H', 0)                      # [28:30]  # Number of Footnotes.
         record += struct.pack('>H', 0)                      # [30:32]  # Number of Sidebars.
-        record += struct.pack('>H', last_data_offset)       # [32:34]  # Chapter index offset.
+        record += struct.pack('>H', chapter_offset)         # [32:34]  # Chapter index offset.
         record += struct.pack('>H', 2560)                   # [34:36]  # 2560 is MAGIC.
         record += struct.pack('>H', last_data_offset)       # [36:38]  # Small font page offset. This will be the last data offset if there are none.
         record += struct.pack('>H', last_data_offset)       # [38:40]  # Large font page offset. This will be the last data offset if there are none.
         record += struct.pack('>H', image_data_offset)      # [40:42]  # Image offset. This will be the last data offset if there are none.
-        record += struct.pack('>H', image_data_offset)      # [42:44]  # Links offset. This will be the last data offset if there are none.
+        record += struct.pack('>H', link_offset)            # [42:44]  # Links offset. This will be the last data offset if there are none.
         record += struct.pack('>H', meta_data_offset)       # [44:46]  # Metadata offset. This will be the last data offset if there are none.
         record += struct.pack('>H', 0)                      # [46:48]  # Unknown.
         record += struct.pack('>H', last_data_offset)       # [48:50]  # Footnote offset. This will be the last data offset if there are none.
diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index ca7721350c..3e1b3b4828 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -18,10 +18,10 @@ PML_HTML_RULES = [
     (re.compile(r'\\x(?P<text>.*?)\\x', re.DOTALL), lambda match: '<h1 style="page-break-before: always;">%s</h1>' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\X(?P<val>[0-4])(?P<text>.*?)\\X[0-4]', re.DOTALL), lambda match: '<h%s style="page-break-before: always;">%s</h%s>' % (int(match.group('val')) + 1, match.group('text'), int(match.group('val')) + 1) if match.group('text') else ''),
     (re.compile(r'\\C\d=".+?"'), lambda match: ''), # This should be made to create a TOC entry
-    (re.compile(r'\\c(?P<text>.*?)\\c', re.DOTALL), lambda match: '<div style="text-align: center; display: block; margin: auto;">%s</div>' % match.group('text') if match.group('text') else ''),
-    (re.compile(r'\\r(?P<text>.*?)\\r', re.DOTALL), lambda match: '<div style="text-align: right; display: block;">%s</div>' % match.group('text') if match.group('text') else ''),
+    (re.compile(r'\\c(?P<text>.*?)\\c', re.DOTALL), lambda match: '<span style="text-align: center; display: block; margin: auto;">%s</span>' % match.group('text') if match.group('text') else ''),
+    (re.compile(r'\\r(?P<text>.*?)\\r', re.DOTALL), lambda match: '<span style="text-align: right; display: block;">%s</span>' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\i(?P<text>.*?)\\i', re.DOTALL), lambda match: '<i>%s</i>' % match.group('text') if match.group('text') else ''),
-    (re.compile(r'\\u(?P<text>.*?)\\u', re.DOTALL), lambda match: '<div style="text-decoration: underline;">%s</div>' % match.group('text') if match.group('text') else ''),
+    (re.compile(r'\\u(?P<text>.*?)\\u', re.DOTALL), lambda match: '<span style="text-decoration: underline;">%s</span>' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\o(?P<text>.*?)\\o', re.DOTALL), lambda match: '<del>%s</del>' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\v(?P<text>.*?)\\v', re.DOTALL), lambda match: '<!-- %s -->' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\t(?P<text>.*?)\\t', re.DOTALL), lambda match: '<div style="margin-left: 5%%;">%s</div>' % match.group('text') if match.group('text') else ''),

From 599de056d01ba2e87b4fdda280a304fc2c055ad7 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 11 Oct 2009 10:32:38 -0400
Subject: [PATCH 042/120] eReader PDB output: proper length of indexes and do
 not try to add them if they are not avaliable. PML Outpu: cleanup. PML Input:
 read unicode and entity PML tags correctly.

---
 src/calibre/ebooks/pdb/ereader/writer.py | 12 ++++++++----
 src/calibre/ebooks/pml/pmlconverter.py   |  4 ++--
 src/calibre/ebooks/pml/pmlml.py          |  5 +++++
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/src/calibre/ebooks/pdb/ereader/writer.py b/src/calibre/ebooks/pdb/ereader/writer.py
index 1a172ea07d..1e108d113b 100644
--- a/src/calibre/ebooks/pdb/ereader/writer.py
+++ b/src/calibre/ebooks/pdb/ereader/writer.py
@@ -43,10 +43,14 @@ class Writer(FormatWriter):
 
         text, text_sizes = self._text(pml)
         chapter_index = self._chapter_index(pml)
+        chapter_index = [chapter_index] if chapter_index != '' else []
         link_index = self._link_index(pml)
+        link_index = [link_index] if link_index != '' else []
         images = self._images(oeb_book.manifest, pmlmlizer.image_hrefs)
         metadata = [self._metadata(metadata)]
-        hr = [self._header_record(len(text), len(chapter_index.split('\x00')), len(link_index.split('\x00')), len(images))]
+        chapter_index_count = len(chapter_index[0].split('\x00')) - 1 if len(chapter_index) >= 1 else 0
+        link_index_count = len(link_index[0].split('\x00')) - 1 if len(link_index) >= 1 else 0
+        hr = [self._header_record(len(text), chapter_index_count, link_index_count, len(images))]
 
         '''
         Record order as generated by Dropbook.
@@ -64,7 +68,7 @@ class Writer(FormatWriter):
            12. Text block size record
            13. "MeTaInFo\x00" word record
         '''
-        sections = hr+text+[chapter_index]+[link_index]+images+metadata+[text_sizes]+['MeTaInFo\x00']
+        sections = hr+text+chapter_index+link_index+images+metadata+[text_sizes]+['MeTaInFo\x00']
 
         lengths = [len(i) if i not in images else len(i[0]) + len(i[1]) for i in sections]
 
@@ -106,8 +110,8 @@ class Writer(FormatWriter):
         if 'text' in mo.groupdict().keys():
             index += struct.pack('>L', mo.start('text'))
             # Strip all PML tags from text
-            text = re.sub(r'[^\\]\\[^\\]', '', mo.group('text'))
-            text = re.sub(r'\\\\', r'\\', mo.group('text'))
+            text = re.sub(r'\\.', '', mo.group('text'))
+            # Add appropriate spacing to denote the various levels of headings
             if 'val' in mo.groupdict().keys():
                 text = '%s%s' % ('\x20' * 4 * int(mo.group('val')), text)
             index += text
diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index 3e1b3b4828..c72a21a5f9 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -35,8 +35,8 @@ PML_HTML_RULES = [
     (re.compile(r'\\Sp(?P<text>.*?)\\Sp', re.DOTALL), lambda match: '<sup>%s</sup>' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\Sb(?P<text>.*?)\\Sb', re.DOTALL), lambda match: '<sub>%s</sub>' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\k(?P<text>.*?)\\k', re.DOTALL), lambda match: '<span style="font-size: 50%%">%s</span>' % match.group('text').upper() if match.group('text') else ''),
-    (re.compile(r'\\a(?P<num>\d\d\d)'), lambda match: '&#%s;' % match.group('num')),
-    (re.compile(r'\\U(?P<num>\d\d\d\d)'), lambda match: '%s' % my_unichr(int(match.group('num'), 16))),
+    (re.compile(r'\\a(?P<num>\d{3})'), lambda match: '&#%s;' % match.group('num')),
+    (re.compile(r'\\U(?P<num>[0-9a-f]{4})'), lambda match: '%s' % my_unichr(int(match.group('num'), 16))),
     (re.compile(r'\\m="(?P<name>.+?)"'), lambda match: '<img src="images/%s" />' % image_name(match.group('name')).strip('\x00')),
     (re.compile(r'\\q="(?P<target>#.+?)"(?P<text>.*?)\\q', re.DOTALL), lambda match: '<a href="%s">%s</a>' % (match.group('target'), match.group('text')) if match.group('text') else ''),
     (re.compile(r'\\Q="(?P<target>.+?)"'), lambda match: '<span id="%s"></span>' % match.group('target')),
diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py
index 72b55d00b1..b6a62e7c1f 100644
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@@ -154,10 +154,15 @@ class PMLMLizer(object):
         for unused in anchors.difference(links):
             text = text.replace('\\Q="%s"' % unused, '')
 
+        # Turn all html entities into unicode. This should not be necessary as
+        # lxml should have already done this but we want to be sure it happens.
         for entity in set(re.findall('&.+?;', text)):
             mo = re.search('(%s)' % entity[1:-1], text)
             text = text.replace(entity, entity_to_unicode(mo))
 
+        # Turn all unicode characters into their PML hex equivelent
+        text = re.sub('[^\x00-\x7f]', lambda x: '\\U%04x' % ord(x.group()), text)
+
         return text
 
     def dump_text(self, elem, stylizer, page, tag_stack=[]):

From 86a7524b1175bae9807a45aa87f4b6737d020224 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 11 Oct 2009 16:28:04 -0400
Subject: [PATCH 043/120] eReader PDB Output: Disable index generation.

---
 src/calibre/ebooks/pdb/ereader/writer.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/calibre/ebooks/pdb/ereader/writer.py b/src/calibre/ebooks/pdb/ereader/writer.py
index 1e108d113b..b8f2cddd0b 100644
--- a/src/calibre/ebooks/pdb/ereader/writer.py
+++ b/src/calibre/ebooks/pdb/ereader/writer.py
@@ -42,10 +42,12 @@ class Writer(FormatWriter):
         pml = unicode(pmlmlizer.extract_content(oeb_book, self.opts)).encode('cp1252', 'replace')
 
         text, text_sizes = self._text(pml)
-        chapter_index = self._chapter_index(pml)
-        chapter_index = [chapter_index] if chapter_index != '' else []
-        link_index = self._link_index(pml)
-        link_index = [link_index] if link_index != '' else []
+        #chapter_index = self._chapter_index(pml)
+        #chapter_index = [chapter_index] if chapter_index != '' else []
+        chapter_index = []
+        #link_index = self._link_index(pml)
+        #link_index = [link_index] if link_index != '' else []
+        link_index = []
         images = self._images(oeb_book.manifest, pmlmlizer.image_hrefs)
         metadata = [self._metadata(metadata)]
         chapter_index_count = len(chapter_index[0].split('\x00')) - 1 if len(chapter_index) >= 1 else 0

From 1424435bff6674ecc52e8d5f13b0d0801b595573 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 11 Oct 2009 20:34:33 -0400
Subject: [PATCH 044/120] eReader PDB Output: Generate chapter and link indexes
 properly.

---
 src/calibre/ebooks/pdb/ereader/writer.py | 42 +++++++++++-------------
 1 file changed, 19 insertions(+), 23 deletions(-)

diff --git a/src/calibre/ebooks/pdb/ereader/writer.py b/src/calibre/ebooks/pdb/ereader/writer.py
index b8f2cddd0b..a1203aa9f2 100644
--- a/src/calibre/ebooks/pdb/ereader/writer.py
+++ b/src/calibre/ebooks/pdb/ereader/writer.py
@@ -42,17 +42,11 @@ class Writer(FormatWriter):
         pml = unicode(pmlmlizer.extract_content(oeb_book, self.opts)).encode('cp1252', 'replace')
 
         text, text_sizes = self._text(pml)
-        #chapter_index = self._chapter_index(pml)
-        #chapter_index = [chapter_index] if chapter_index != '' else []
-        chapter_index = []
-        #link_index = self._link_index(pml)
-        #link_index = [link_index] if link_index != '' else []
-        link_index = []
+        chapter_index = self._chapter_index(pml)
+        link_index = self._link_index(pml)
         images = self._images(oeb_book.manifest, pmlmlizer.image_hrefs)
         metadata = [self._metadata(metadata)]
-        chapter_index_count = len(chapter_index[0].split('\x00')) - 1 if len(chapter_index) >= 1 else 0
-        link_index_count = len(link_index[0].split('\x00')) - 1 if len(link_index) >= 1 else 0
-        hr = [self._header_record(len(text), chapter_index_count, link_index_count, len(images))]
+        hr = [self._header_record(len(text), len(chapter_index), len(link_index), len(images))]
 
         '''
         Record order as generated by Dropbook.
@@ -110,12 +104,15 @@ class Writer(FormatWriter):
     def _index_item(self, mo):
         index = ''
         if 'text' in mo.groupdict().keys():
-            index += struct.pack('>L', mo.start('text'))
+            index += struct.pack('>L', mo.start())
+            text = mo.group('text')
             # Strip all PML tags from text
-            text = re.sub(r'\\.', '', mo.group('text'))
+            text = re.sub(r'\\U[0-9a-z]{4}', '', text)
+            text = re.sub(r'\\a\d{3}', '', text)
+            text = re.sub(r'\\.', '', text)
             # Add appropriate spacing to denote the various levels of headings
             if 'val' in mo.groupdict().keys():
-                text = '%s%s' % ('\x20' * 4 * int(mo.group('val')), text)
+                text = '%s%s' % (' ' * 4 * int(mo.group('val')), text)
             index += text
             index += '\x00'
         return index
@@ -126,16 +123,16 @@ class Writer(FormatWriter):
             r'(?s)\\X(?P<val>[0-4])(?P<text>.*?)\\X[0-4]',
             r'(?s)\\C(?P<val>\d)="(?P<text>.+?)"',
         ]
-        index = ''
+        index = []
         for chapter_mark in chapter_marks:
             for mo in re.finditer(chapter_mark, pml):
-                index += self._index_item(mo)
+                index.append(self._index_item(mo))
         return index
 
     def _link_index(self, pml):
-        index = ''
+        index = []
         for mo in re.finditer(r'(?s)\\Q="(?P<text>.+?)"', pml):
-            index += self._index_item(mo)
+            index.append(self._index_item(mo))
         return index
 
     def _images(self, manifest, image_hrefs):
@@ -213,27 +210,26 @@ class Writer(FormatWriter):
         non_text_offset = text_count + 1
 
         if chapter_count > 0:
-            chapter_offset = text_count + 1
+            chapter_offset = non_text_offset
         else:
             chapter_offset = text_count
-
         if link_count > 0:
-            link_offset = chapter_offset + 1
+            link_offset = chapter_offset + chapter_count
         else:
             link_offset = chapter_offset
 
         if image_count > 0:
-            image_data_offset = link_offset + 1
+            image_data_offset = link_offset + link_count
             meta_data_offset = image_data_offset + image_count
             last_data_offset = meta_data_offset + 1
         else:
-            meta_data_offset = link_offset + 1
+            meta_data_offset = link_offset + link_count
             last_data_offset = meta_data_offset + 1
             image_data_offset = last_data_offset
 
-        if chapter_count <= 0:
+        if chapter_count == 0:
             chapter_offset = last_data_offset
-        if link_count <= 0:
+        if link_count == 0:
             link_offset = last_data_offset
 
         record = ''

From d373b5d71e0a01a9dae226573f35e4f9a84ecd21 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 11 Oct 2009 20:39:50 -0400
Subject: [PATCH 045/120] eReader PDB Output: Generate eReader header correctly
 when no chapter and link indexes are present.

---
 src/calibre/ebooks/pdb/ereader/writer.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/src/calibre/ebooks/pdb/ereader/writer.py b/src/calibre/ebooks/pdb/ereader/writer.py
index a1203aa9f2..263f6964bf 100644
--- a/src/calibre/ebooks/pdb/ereader/writer.py
+++ b/src/calibre/ebooks/pdb/ereader/writer.py
@@ -209,14 +209,8 @@ class Writer(FormatWriter):
         compression = 10 # zlib compression.
         non_text_offset = text_count + 1
 
-        if chapter_count > 0:
-            chapter_offset = non_text_offset
-        else:
-            chapter_offset = text_count
-        if link_count > 0:
-            link_offset = chapter_offset + chapter_count
-        else:
-            link_offset = chapter_offset
+        chapter_offset = non_text_offset
+        link_offset = chapter_offset + chapter_count
 
         if image_count > 0:
             image_data_offset = link_offset + link_count

From 1b2efaaf6f81a05dffbbbf41e34de907f1922716 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Mon, 12 Oct 2009 20:12:38 -0400
Subject: [PATCH 046/120] TCR input.

---
 src/calibre/customize/builtins.py  |  2 +
 src/calibre/ebooks/tcr/__init__.py |  5 +++
 src/calibre/ebooks/tcr/input.py    | 72 ++++++++++++++++++++++++++++++
 3 files changed, 79 insertions(+)
 create mode 100644 src/calibre/ebooks/tcr/__init__.py
 create mode 100644 src/calibre/ebooks/tcr/input.py

diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 11317bc312..1660e890fc 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -337,6 +337,7 @@ from calibre.ebooks.pml.input import PMLInput
 from calibre.ebooks.rb.input import RBInput
 from calibre.web.feeds.input import RecipeInput
 from calibre.ebooks.rtf.input import RTFInput
+from calibre.ebooks.tcr.input import TCRInput
 from calibre.ebooks.txt.input import TXTInput
 from calibre.ebooks.lrf.input import LRFInput
 
@@ -385,6 +386,7 @@ plugins += [
     RBInput,
     RecipeInput,
     RTFInput,
+    TCRInput,
     TXTInput,
     LRFInput,
 ]
diff --git a/src/calibre/ebooks/tcr/__init__.py b/src/calibre/ebooks/tcr/__init__.py
new file mode 100644
index 0000000000..9e2aad729c
--- /dev/null
+++ b/src/calibre/ebooks/tcr/__init__.py
@@ -0,0 +1,5 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
diff --git a/src/calibre/ebooks/tcr/input.py b/src/calibre/ebooks/tcr/input.py
new file mode 100644
index 0000000000..066d97a421
--- /dev/null
+++ b/src/calibre/ebooks/tcr/input.py
@@ -0,0 +1,72 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import os
+
+from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
+from calibre.ebooks.txt.processor import convert_basic, opf_writer, \
+    separate_paragraphs_single_line, separate_paragraphs_print_formatted
+
+class TCRInput(InputFormatPlugin):
+
+    name        = 'TCR Input'
+    author      = 'John Schember'
+    description = 'Convert TCR files to HTML'
+    file_types  = set(['tcr'])
+
+    options = set([
+        OptionRecommendation(name='single_line_paras', recommended_value=False,
+            help=_('Normally calibre treats blank lines as paragraph markers. '
+                'With this option it will assume that every line represents '
+                'a paragraph instead.')),
+        OptionRecommendation(name='print_formatted_paras', recommended_value=False,
+            help=_('Normally calibre treats blank lines as paragraph markers. '
+                'With this option it will assume that every line starting with '
+                'an indent (either a tab or 2+ spaces) represents a paragraph. '
+                'Paragraphs end when the next line that starts with an indent '
+                'is reached.')),
+    ])
+
+    def convert(self, stream, options, file_ext, log, accelerators):
+        txt = []
+
+        log.debug('Checking TCR header...')
+        if stream.read(9) != '!!8-Bit!!':
+            raise ValueError('File %s contaions an invalid TCR header.' % stream.name)
+
+        log.debug('Building string dictionary...')
+        # Dictionary codes that the file contents are broken down into.
+        entries = []
+        for i in xrange(256):
+            entry_len = ord(stream.read(1))
+            entries.append(stream.read(entry_len))
+
+        log.info('Decompressing text...')
+        # Map the values in the file to locations in the string list.
+        entry_loc = stream.read(1)
+        while entry_loc != '': # EOF
+            txt.append(entries[ord(entry_loc)])
+            entry_loc = stream.read(1)
+
+        ienc = options.input_encoding if options.input_encoding else 'utf-8'
+        txt = ''.join(txt).decode(ienc, 'replace')
+
+        log.info('Converting text to OEB...')
+        if options.single_line_paras:
+            txt = separate_paragraphs_single_line(txt)
+        if options.print_formatted_paras:
+            txt = separate_paragraphs_print_formatted(txt)
+        html = convert_basic(txt)
+        with open(os.path.join(os.getcwd(), 'index.html'), 'wb') as index:
+            index.write(html.encode('utf-8'))
+
+        from calibre.ebooks.metadata.meta import get_metadata
+        mi = get_metadata(stream, 'tcr')
+        manifest = [('index.html', None)]
+        spine = ['index.html']
+        opf_writer(os.getcwd(), 'metadata.opf', manifest, spine, mi)
+
+        return os.path.join(os.getcwd(), 'metadata.opf')

From 2782f49dca43a8c57de2240db44d16d1c5938672 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Wed, 14 Oct 2009 06:51:26 -0400
Subject: [PATCH 047/120] Regex Builder: Initially highlight the already set
 regex.

---
 src/calibre/gui2/convert/regex_builder.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/calibre/gui2/convert/regex_builder.py b/src/calibre/gui2/convert/regex_builder.py
index 58c4efbe1d..0ef4f29202 100644
--- a/src/calibre/gui2/convert/regex_builder.py
+++ b/src/calibre/gui2/convert/regex_builder.py
@@ -51,6 +51,7 @@ class RegexBuilder(QDialog, Ui_RegexBuilder):
         self.regex.setText(regex)
         self.regex_valid(True)
         self.highlighter = RegexHighlighter(self.preview.document())
+        self.highlighter.update_regex(regex)
 
         if not db or not book_id:
             self.button_box.addButton(QDialogButtonBox.Open)

From da700ca9330ca603cf7b987a63638497271df056 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Wed, 14 Oct 2009 17:43:43 -0400
Subject: [PATCH 048/120] Regex Builder: Show correct input step.

---
 src/calibre/ebooks/oeb/iterator.py        | 4 ++--
 src/calibre/gui2/convert/regex_builder.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/calibre/ebooks/oeb/iterator.py b/src/calibre/ebooks/oeb/iterator.py
index 33cc96f08b..05bbe7410d 100644
--- a/src/calibre/ebooks/oeb/iterator.py
+++ b/src/calibre/ebooks/oeb/iterator.py
@@ -122,7 +122,7 @@ class EbookIterator(object):
                                 else:
                                     print 'Loaded embedded font:', repr(family)
 
-    def __enter__(self):
+    def __enter__(self, raw_only=False):
         self.delete_on_exit = []
         self._tdir = TemporaryDirectory('_ebook_iter')
         self.base  = self._tdir.__enter__()
@@ -139,7 +139,7 @@ class EbookIterator(object):
                 plumber.opts, plumber.input_fmt, self.log,
                 {}, self.base)
 
-        if plumber.input_fmt.lower() in ('pdf', 'rb'):
+        if not raw_only and plumber.input_fmt.lower() in ('pdf', 'rb'):
             self.pathtoopf = create_oebbook(self.log, self.pathtoopf, plumber.opts,
                     plumber.input_plugin)
         if hasattr(self.pathtoopf, 'manifest'):
diff --git a/src/calibre/gui2/convert/regex_builder.py b/src/calibre/gui2/convert/regex_builder.py
index 0ef4f29202..20da8d7aaf 100644
--- a/src/calibre/gui2/convert/regex_builder.py
+++ b/src/calibre/gui2/convert/regex_builder.py
@@ -87,7 +87,7 @@ class RegexBuilder(QDialog, Ui_RegexBuilder):
 
     def open_book(self, pathtoebook):
         self.iterator = EbookIterator(pathtoebook)
-        self.iterator.__enter__()
+        self.iterator.__enter__(raw_only=True)
         text = [u'']
         for path in self.iterator.spine:
             html = open(path, 'rb').read().decode(path.encoding, 'replace')

From c26fd05fce100316da46e91a8de03eeed41ad557 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 17 Oct 2009 14:17:26 -0400
Subject: [PATCH 049/120] Opus driver generate t2b files and delete helper
 files when removing a book from the device.

---
 src/calibre/devices/cybookg3/driver.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/devices/cybookg3/driver.py b/src/calibre/devices/cybookg3/driver.py
index 20a7b259ee..6b5e5ff4ed 100644
--- a/src/calibre/devices/cybookg3/driver.py
+++ b/src/calibre/devices/cybookg3/driver.py
@@ -79,7 +79,7 @@ class CYBOOKG3(USBMS):
         return zip(paths, cycle([on_card]))
 
 
-class CYBOOK_OPUS(USBMS):
+class CYBOOK_OPUS(CYBOOKG3):
 
     name           = 'Cybook Opus Device Interface'
     gui_name       = 'Cybook Opus'

From df6d759b3852e77c066ea007815177db079dfa01 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Mon, 19 Oct 2009 07:15:27 -0400
Subject: [PATCH 050/120] TCR Output. Move TCR decompression to TCR compression
 file.

---
 src/calibre/customize/builtins.py     |   2 +
 src/calibre/ebooks/compression/tcr.py | 126 ++++++++++++++++++++++++++
 src/calibre/ebooks/tcr/input.py       |  22 +----
 src/calibre/ebooks/tcr/output.py      |  58 ++++++++++++
 4 files changed, 188 insertions(+), 20 deletions(-)
 create mode 100644 src/calibre/ebooks/compression/tcr.py
 create mode 100644 src/calibre/ebooks/tcr/output.py

diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 1660e890fc..e52d693bb5 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -352,6 +352,7 @@ from calibre.ebooks.pdf.output import PDFOutput
 from calibre.ebooks.pml.output import PMLOutput
 from calibre.ebooks.rb.output import RBOutput
 from calibre.ebooks.rtf.output import RTFOutput
+from calibre.ebooks.tcr.output import TCROutput
 from calibre.ebooks.txt.output import TXTOutput
 
 from calibre.customize.profiles import input_profiles, output_profiles
@@ -402,6 +403,7 @@ plugins += [
     PMLOutput,
     RBOutput,
     RTFOutput,
+    TCROutput,
     TXTOutput,
 ]
 plugins += [
diff --git a/src/calibre/ebooks/compression/tcr.py b/src/calibre/ebooks/compression/tcr.py
new file mode 100644
index 0000000000..40bed613ec
--- /dev/null
+++ b/src/calibre/ebooks/compression/tcr.py
@@ -0,0 +1,126 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import re
+
+def decompress(stream):
+        txt = []
+        stream.seek(0)
+        if stream.read(9) != '!!8-Bit!!':
+            raise ValueError('File %s contaions an invalid TCR header.' % stream.name)
+
+        # Codes that the file contents are broken down into.
+        entries = []
+        for i in xrange(256):
+            entry_len = ord(stream.read(1))
+            entries.append(stream.read(entry_len))
+
+        # Map the values in the file to locations in the string list.
+        entry_loc = stream.read(1)
+        while entry_loc != '': # EOF
+            txt.append(entries[ord(entry_loc)])
+            entry_loc = stream.read(1)
+
+        return ''.join(txt)
+
+
+def compress(txt, level=5):
+    '''
+    TCR compression takes the form header+code_list+coded_text.
+    The header is always "!!8-Bit!!". The code list is a list of 256 strings.
+    The list takes the form 1 byte length and then a string. Each position in
+    The list corresponds to a code found in the file. The coded text is
+    string of characters vaules. for instance the character Q represents the
+    value 81 which corresponds to the string in the code list at position 81.
+    '''
+    # Turn each unique character into a coded value.
+    # The code of the string at a given position are represented by the position
+    # they occupy in the list.
+    codes = list(set(re.findall('(?msu).', txt)))
+    for i in range(len(codes), 256):
+        codes.append('')
+    # Set the compression level.
+    if level <= 1:
+        new_length = 256
+    if level >= 10:
+        new_length = 1
+    else:
+        new_length = int(256 * (10 - level) * .1)
+    new_length = 1 if new_length < 1 else new_length
+    # Replace txt with codes.
+    coded_txt = ''
+    for c in txt:
+        coded_txt += chr(codes.index(c))
+    txt = coded_txt
+    # Start compressing the text.
+    new = True
+    merged = True
+    while new or merged:
+        # Merge codes that always follow another code
+        merge = []
+        merged = False
+        for i in xrange(256):
+            if codes[i] != '':
+                # Find all codes that are next to i.
+                fall = list(set(re.findall('(?msu)%s.' % re.escape(chr(i)), txt)))
+                # 1 if only one code comes after i.
+                if len(fall) == 1:
+                    # We are searching codes and each code is always 1 character.
+                    j = ord(fall[0][1:2])
+                    # Only merge if the total length of the string represented by
+                    # code is less than 256.
+                    if len(codes[i]) + len(codes[j]) < 256:
+                        merge.append((i, j))
+        if merge:
+            merged = True
+            for i, j in merge:
+                # Merge the string for j into the string for i.
+                if i == j:
+                    # Don't use += here just in case something goes wrong. This
+                    # will prevent out of control memory consumption. This is
+                    # unecessary but when creating this routine it happened due
+                    # to an error.
+                    codes[i] = codes[i] + codes[i]
+                else:
+                    codes[i] = codes[i] + codes[j]
+                txt = txt.replace(chr(i)+chr(j), chr(i))
+                if chr(j) not in txt:
+                    codes[j] = ''
+        new = False
+        if '' in codes:
+            # Create a list of codes based on combinations of codes that are next
+            # to each other. The amount of savings for the new code is calculated.
+            new_codes = []
+            for c in list(set(re.findall('(?msu)..', txt))):
+                i = ord(c[0:1])
+                j = ord(c[1:2])
+                if codes[i]+codes[j] in codes:
+                    continue
+                savings = txt.count(chr(i)+chr(j)) - len(codes[i]) - len(codes[j])
+                if savings > 2 and len(codes[i]) + len(codes[j]) < 256:
+                    new_codes.append((savings, i, j, codes[i], codes[j]))
+            if new_codes:
+                new = True
+                # Sort the codes from highest savings to lowest.
+                new_codes.sort(lambda x, y: -1 if x[0] > y[0] else 1 if x[0] < y[0] else 0)
+                # The shorter new_length the more chances time merging will happen
+                # giving more changes for better codes to be created. However,
+                # the shorter new_lengh the longer it will take to compress.
+                new_codes = new_codes[:new_length]
+                for code in new_codes:
+                    if '' not in codes:
+                        break
+                    c = codes.index('')
+                    codes[c] = code[3]+code[4]
+                    txt = txt.replace(chr(code[1])+chr(code[2]), chr(c))
+    # Generate the code dictionary.
+    header = []
+    for code in codes:
+        header.append(chr(len(code))+code)
+    for i in xrange(len(header), 256):
+        header.append(chr(0))
+    # Join the identifier with the dictionary and coded text.
+    return '!!8-Bit!!'+''.join(header)+txt
diff --git a/src/calibre/ebooks/tcr/input.py b/src/calibre/ebooks/tcr/input.py
index 066d97a421..67fa6ac66e 100644
--- a/src/calibre/ebooks/tcr/input.py
+++ b/src/calibre/ebooks/tcr/input.py
@@ -9,6 +9,7 @@ import os
 from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
 from calibre.ebooks.txt.processor import convert_basic, opf_writer, \
     separate_paragraphs_single_line, separate_paragraphs_print_formatted
+from calibre.ebooks.compression.tcr import decompress
 
 class TCRInput(InputFormatPlugin):
 
@@ -31,28 +32,9 @@ class TCRInput(InputFormatPlugin):
     ])
 
     def convert(self, stream, options, file_ext, log, accelerators):
-        txt = []
-
-        log.debug('Checking TCR header...')
-        if stream.read(9) != '!!8-Bit!!':
-            raise ValueError('File %s contaions an invalid TCR header.' % stream.name)
-
-        log.debug('Building string dictionary...')
-        # Dictionary codes that the file contents are broken down into.
-        entries = []
-        for i in xrange(256):
-            entry_len = ord(stream.read(1))
-            entries.append(stream.read(entry_len))
-
         log.info('Decompressing text...')
-        # Map the values in the file to locations in the string list.
-        entry_loc = stream.read(1)
-        while entry_loc != '': # EOF
-            txt.append(entries[ord(entry_loc)])
-            entry_loc = stream.read(1)
-
         ienc = options.input_encoding if options.input_encoding else 'utf-8'
-        txt = ''.join(txt).decode(ienc, 'replace')
+        txt = decompress(stream).decode(ienc, 'replace')
 
         log.info('Converting text to OEB...')
         if options.single_line_paras:
diff --git a/src/calibre/ebooks/tcr/output.py b/src/calibre/ebooks/tcr/output.py
new file mode 100644
index 0000000000..8aed995c44
--- /dev/null
+++ b/src/calibre/ebooks/tcr/output.py
@@ -0,0 +1,58 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import os
+
+from calibre.customize.conversion import OutputFormatPlugin, \
+    OptionRecommendation
+from calibre.ebooks.txt.txtml import TXTMLizer
+from calibre.ebooks.compression.tcr import compress
+
+class TCROutput(OutputFormatPlugin):
+
+    name = 'TCR Output'
+    author = 'John Schember'
+    file_type = 'tcr'
+
+    options = set([
+        OptionRecommendation(name='output_encoding', recommended_value='utf-8',
+            level=OptionRecommendation.LOW,
+            help=_('Specify the character encoding of the output document. ' \
+            'The default is utf-8.')),
+        OptionRecommendation(name='compression_level', recommended_value=5,
+            level=OptionRecommendation.LOW,
+            help=_('Speciy the compression level to use. Scale 1 - 10. 1 ' \
+            'being the lowest compression but the fastest and 10 being the ' \
+            'highest compression but the slowest.')),
+    ])
+
+    def convert(self, oeb_book, output_path, input_plugin, opts, log):
+        close = False
+        if not hasattr(output_path, 'write'):
+            close = True
+            if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '':
+                os.makedirs(os.path.dirname(output_path))
+            out_stream = open(output_path, 'wb')
+        else:
+            out_stream = output_path
+
+        setattr(opts, 'flush_paras', False)
+        setattr(opts, 'max_line_length', 0)
+        setattr(opts, 'force_max_line_length', False)
+        setattr(opts, 'indent_paras', False)
+
+        writer = TXTMLizer(log)
+        txt = writer.extract_content(oeb_book, opts).encode(opts.output_encoding, 'replace')
+
+        log.info('Compressing text...')
+        txt = compress(txt, opts.compression_level)
+
+        out_stream.seek(0)
+        out_stream.truncate()
+        out_stream.write(txt)
+
+        if close:
+            out_stream.close()

From d58f7a92d673ba8f5612263a7e76afc719faeffc Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Tue, 20 Oct 2009 18:37:22 -0400
Subject: [PATCH 051/120] Fix bug #3806: eReader PDB footnote and sidebar
 anchors set properly during conversion.

---
 src/calibre/ebooks/pml/pmlconverter.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index c72a21a5f9..7133e3f251 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -41,14 +41,14 @@ PML_HTML_RULES = [
     (re.compile(r'\\q="(?P<target>#.+?)"(?P<text>.*?)\\q', re.DOTALL), lambda match: '<a href="%s">%s</a>' % (match.group('target'), match.group('text')) if match.group('text') else ''),
     (re.compile(r'\\Q="(?P<target>.+?)"'), lambda match: '<span id="%s"></span>' % match.group('target')),
     (re.compile(r'\\-'), lambda match: ''),
-    (re.compile(r'\\Fn="(?P<target>.+?)"(?P<text>.*?)\\Fn'), lambda match: '<a href="#footnote-%s">%s</a>' % (match.group('target'), match.group('text')) if match.group('text') else ''),
-    (re.compile(r'\\Sd="(?P<target>.+?)"(?P<text>.*?)\\Sd'), lambda match: '<a href="#sidebar-%s">%s</a>' % (match.group('target'), match.group('text')) if match.group('text') else ''),
+    (re.compile(r'\\Fn="(?P<target>.+?)"(?P<text>.*?)\\Fn'), lambda match: '<a href="#fns-%s">%s</a>' % (match.group('target'), match.group('text')) if match.group('text') else ''),
+    (re.compile(r'\\Sd="(?P<target>.+?)"(?P<text>.*?)\\Sd'), lambda match: '<a href="#fns-%s">%s</a>' % (match.group('target'), match.group('text')) if match.group('text') else ''),
     # Just italicize index items as that is how the eReader software renders them.
     (re.compile(r'\\I(?P<text>.*?)\\I', re.DOTALL), lambda match: '<i>%s</i>' % match.group('text') if match.group('text') else ''),
 
     # Sidebar and Footnotes
-    (re.compile(r'<sidebar\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</sidebar>', re.DOTALL), lambda match: '<div id="sidebar-%s">%s</div>' % (match.group('target'), match.group('text')) if match.group('text') else ''),
-    (re.compile(r'<footnote\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</footnote>', re.DOTALL), lambda match: '<div id="footnote-%s">%s</div>' % (match.group('target'), match.group('text')) if match.group('text') else ''),
+    (re.compile(r'<sidebar\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</sidebar>', re.DOTALL), lambda match: '<div id="fns-%s">%s</div>' % (match.group('target'), match.group('text')) if match.group('text') else ''),
+    (re.compile(r'<footnote\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</footnote>', re.DOTALL), lambda match: '<div id="fns-%s">%s</div>' % (match.group('target'), match.group('text')) if match.group('text') else ''),
 
     # eReader files are one paragraph per line.
     # This forces the lines to wrap properly.
@@ -80,5 +80,5 @@ def pml_to_html(pml):
 def footnote_sidebar_to_html(id, pml):
     if id.startswith('\x01'):
         id = id[2:]
-    html = '<div id="sidebar-%s"><dt>%s</dt></div><dd>%s</dd>' % (id, id, pml_to_html(pml))
+    html = '<div id="fns-%s"><dt>%s</dt></div><dd>%s</dd>' % (id, id, pml_to_html(pml))
     return html

From 8a63b67c7aae0bba9cc40455858438785bb7e17d Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Wed, 21 Oct 2009 19:49:59 -0400
Subject: [PATCH 052/120] PML Input: Fix \w and \s tags.

---
 src/calibre/ebooks/pml/pmlconverter.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index 7133e3f251..dafe1e4f6a 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -26,9 +26,9 @@ PML_HTML_RULES = [
     (re.compile(r'\\v(?P<text>.*?)\\v', re.DOTALL), lambda match: '<!-- %s -->' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\t(?P<text>.*?)\\t', re.DOTALL), lambda match: '<div style="margin-left: 5%%;">%s</div>' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\T="(?P<val>\d+)%*"(?P<text>.*?)$', re.MULTILINE), lambda match: r'<div style="margin-left: %s%%;">%s</div>' % (match.group('val'), match.group('text')) if match.group('text') else ''),
-    (re.compile(r'\\w="(?P<val>\d+)%"'), lambda match: '<hr width="%s%%" />' % match.group('val')),
+    (re.compile(r'\\w="(?P<val>\d+)%"'), lambda match: '<hr width="%s" />' % match.group('val')),
     (re.compile(r'\\n'), lambda match: ''),
-    (re.compile(r'\\s'), lambda match: ''),
+    (re.compile(r'\\s(?P<text>.*?)\\s', re.DOTALL), lambda match: '<span style="font-size: 50%%">%s</span>' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\b(?P<text>.*?)\\b', re.DOTALL), lambda match: '<b>%s</b>' % match.group('text') if match.group('text') else ''), # \b is deprecated; \B should be used instead.
     (re.compile(r'\\l(?P<text>.*?)\\l', re.DOTALL), lambda match: '<span style="font-size: 175%%">%s</span>' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\B(?P<text>.*?)\\B', re.DOTALL), lambda match: '<b>%s</b>' % match.group('text') if match.group('text') else ''),

From feb4feaae781e0ba42fc7bb39ee5c9f310faf449 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Wed, 21 Oct 2009 21:11:17 -0400
Subject: [PATCH 053/120] PML Output: Use set cover as cover or use first image
 as cover if no cover is specified.

---
 src/calibre/ebooks/pml/pmlml.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py
index b6a62e7c1f..7b1813256e 100644
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@@ -95,6 +95,9 @@ class PMLMLizer(object):
 
     def get_cover_page(self):
         output = u''
+        if 'cover' in self.oeb_book.guide:
+            output += '\\m="cover.png"\n'
+            self.image_hrefs[self.oeb_book.guide['cover'].href] = 'cover.png'
         if 'titlepage' in self.oeb_book.guide:
             self.log.debug('Generating title page...')
             href = self.oeb_book.guide['titlepage'].href
@@ -191,7 +194,10 @@ class PMLMLizer(object):
         if tag in IMAGE_TAGS:
             if elem.attrib.get('src', None):
                 if page.abshref(elem.attrib['src']) not in self.image_hrefs.keys():
-                    self.image_hrefs[page.abshref(elem.attrib['src'])] = image_name('%s' % len(self.image_hrefs.keys()), self.image_hrefs.keys()).strip('\x00')
+                    if len(self.image_hrefs.keys()) == 0:
+                        self.image_hrefs[page.abshref(elem.attrib['src'])] = 'cover.png'
+                    else:
+                        self.image_hrefs[page.abshref(elem.attrib['src'])] = image_name('%s' % len(self.image_hrefs.keys()), self.image_hrefs.keys()).strip('\x00')
                 text.append('\\m="%s"' % self.image_hrefs[page.abshref(elem.attrib['src'])])
         if tag == 'hr':
             w = '\\w'

From 53a97fc98c633f1f8941831939586ee383aa6461 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Fri, 23 Oct 2009 21:11:16 -0400
Subject: [PATCH 054/120] PML Output: Add .png to image names. Fix removing
 excessive newlines from PML output. PMLZ Output: Name images correctly.

---
 src/calibre/ebooks/pdb/ereader/__init__.py |  6 +--
 src/calibre/ebooks/pml/output.py           | 16 +++----
 src/calibre/ebooks/pml/pmlml.py            | 50 ++++++++++------------
 3 files changed, 32 insertions(+), 40 deletions(-)

diff --git a/src/calibre/ebooks/pdb/ereader/__init__.py b/src/calibre/ebooks/pdb/ereader/__init__.py
index 3f08b068cb..89560c9448 100644
--- a/src/calibre/ebooks/pdb/ereader/__init__.py
+++ b/src/calibre/ebooks/pdb/ereader/__init__.py
@@ -16,11 +16,11 @@ def image_name(name, taken_names=[]):
         cut = len(name) - 32
         names = name[:10]
         namee = name[10+cut:]
-        name = names + namee
+        name = '%s%s.png' % (names, namee)
     
     while name in taken_names:
-        for i in xrange(9999999999999999999999999999999):
-            name = '%s%s' % (name[:-len('%s' % i)], i)
+        for i in xrange(999999999999999999999999999):
+            name = '%s%s.png' % (name[:-len('%s' % i)], i)
 
     name = name.ljust(32, '\x00')[:32]
     
diff --git a/src/calibre/ebooks/pml/output.py b/src/calibre/ebooks/pml/output.py
index 360e63c98e..774fc4c8d1 100644
--- a/src/calibre/ebooks/pml/output.py
+++ b/src/calibre/ebooks/pml/output.py
@@ -18,7 +18,7 @@ from calibre.customize.conversion import OutputFormatPlugin
 from calibre.customize.conversion import OptionRecommendation
 from calibre.ptempfile import TemporaryDirectory
 from calibre.utils.zipfile import ZipFile
-from calibre.ebooks.oeb.base import OEB_IMAGES
+from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
 from calibre.ebooks.pml.pmlml import PMLMLizer
 
 class PMLOutput(OutputFormatPlugin):
@@ -40,28 +40,26 @@ class PMLOutput(OutputFormatPlugin):
     def convert(self, oeb_book, output_path, input_plugin, opts, log):
         with TemporaryDirectory('_pmlz_output') as tdir:
             pmlmlizer = PMLMLizer(log)
-            content = pmlmlizer.extract_content(oeb_book, opts)
+            pml = unicode(pmlmlizer.extract_content(oeb_book, opts))
             with open(os.path.join(tdir, 'index.pml'), 'wb') as out:
-                out.write(content.encode(opts.output_encoding, 'replace'))
+                out.write(pml.encode(opts.output_encoding, 'replace'))
 
-            self.write_images(oeb_book.manifest, tdir)
+            self.write_images(oeb_book.manifest, pmlmlizer.image_hrefs, tdir)
 
             log.debug('Compressing output...')
             pmlz = ZipFile(output_path, 'w')
             pmlz.add_dir(tdir)
 
-    def write_images(self, manifest, out_dir):
+    def write_images(self, manifest, image_hrefs, out_dir):
         for item in manifest:
-            if item.media_type in OEB_IMAGES:
+            if item.media_type in OEB_RASTER_IMAGES and item.href in image_hrefs.keys():
                 im = Image.open(cStringIO.StringIO(item.data))
 
                 data = cStringIO.StringIO()
                 im.save(data, 'PNG')
                 data = data.getvalue()
 
-                name = os.path.splitext(os.path.basename(item.href))[0] + '.png'
-                path = os.path.join(out_dir, name)
+                path = os.path.join(out_dir, image_hrefs[item.href])
 
                 with open(path, 'wb') as out:
                     out.write(data)
-
diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py
index 7b1813256e..862f0ea0ae 100644
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@@ -8,7 +8,6 @@ __docformat__ = 'restructuredtext en'
 Transform OEB content into PML markup
 '''
 
-import os
 import re
 
 from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
@@ -138,16 +137,13 @@ class PMLMLizer(object):
         aid = self.link_hrefs[aid]
         return u'\\Q="%s"' % aid
 
+    def remove_newlines(self, text):
+        text = text.replace('\r\n', ' ')
+        text = text.replace('\n', ' ')
+        text = text.replace('\r', ' ')
+        return text
+
     def clean_text(self, text):
-        # Remove excess spaces at beginning and end of lines
-        text = re.sub('(?m)^[ ]+', '', text)
-        text = re.sub('(?m)[ ]+$', '', text)
-
-        # Remove excessive newlines
-        text = re.sub('%s{1,1}' % os.linesep, '%s%s' % (os.linesep, os.linesep), text)
-        text = re.sub('%s{3,}' % os.linesep, '%s%s' % (os.linesep, os.linesep), text)
-        text = re.sub('[ ]{2,}', ' ', text)
-
         # Remove excessive \p tags
         text = re.sub(r'\\p\s*\\p', '', text)
 
@@ -166,6 +162,17 @@ class PMLMLizer(object):
         # Turn all unicode characters into their PML hex equivelent
         text = re.sub('[^\x00-\x7f]', lambda x: '\\U%04x' % ord(x.group()), text)
 
+        # Remove excess spaces at beginning and end of lines
+        text = re.sub('(?m)^[ ]+', '', text)
+        text = re.sub('(?m)[ ]+$', '', text)
+
+        # Remove excessive spaces
+        text = re.sub('[ ]{2,}', ' ', text)
+
+        # Remove excessive newlines
+        text = re.sub('\n[ ]+\n', '\n\n', text)
+        text = re.sub('\n\n\n+', '\n\n', text)
+
         return text
 
     def dump_text(self, elem, stylizer, page, tag_stack=[]):
@@ -197,7 +204,7 @@ class PMLMLizer(object):
                     if len(self.image_hrefs.keys()) == 0:
                         self.image_hrefs[page.abshref(elem.attrib['src'])] = 'cover.png'
                     else:
-                        self.image_hrefs[page.abshref(elem.attrib['src'])] = image_name('%s' % len(self.image_hrefs.keys()), self.image_hrefs.keys()).strip('\x00')
+                        self.image_hrefs[page.abshref(elem.attrib['src'])] = image_name('%s.png' % len(self.image_hrefs.keys()), self.image_hrefs.keys()).strip('\x00')
                 text.append('\\m="%s"' % self.image_hrefs[page.abshref(elem.attrib['src'])])
         if tag == 'hr':
             w = '\\w'
@@ -251,7 +258,7 @@ class PMLMLizer(object):
 
         # Proccess tags that contain text.
         if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
-            text.append(self.elem_text(elem, tag_stack))
+            text.append(self.remove_newlines(elem.text))
 
         for item in elem:
             text += self.dump_text(item, stylizer, page, tag_stack)
@@ -261,32 +268,19 @@ class PMLMLizer(object):
             close_tag_list.insert(0, tag_stack.pop())
         text += self.close_tags(close_tag_list)
         if tag in SEPARATE_TAGS:
-            text.append(os.linesep + os.linesep)
+            text.append('\n\n')
 
         if 'block' not in tag_stack:
-            text.append(os.linesep + os.linesep)
+            text.append('\n\n')
 
         #if style['page-break-after'] == 'always':
         #    text.append('\\p')
 
         if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '':
-            text.append(self.elem_tail(elem, tag_stack))
+            text.append(self.remove_newlines(elem.tail))
 
         return text
 
-    def elem_text(self, elem, tag_stack):
-        return self.block_text(elem.text, 'block' in tag_stack)
-
-    def elem_tail(self, elem, tag_stack):
-        return self.block_text(elem.tail, 'block' in tag_stack)
-
-    def block_text(self, text, in_block):
-        if in_block:
-            text = text.replace('\n\r', ' ')
-            text = text.replace('\n', ' ')
-            text = text.replace('\r', ' ')
-        return text
-
     def close_tags(self, tags):
         text = [u'']
         for i in range(0, len(tags)):

From 4d3af2132d481c0994476dc1c25943ac0bea5428 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 25 Oct 2009 10:16:44 -0400
Subject: [PATCH 055/120] Fix bug with removing spaces to agressively.

---
 src/calibre/ebooks/fb2/fb2ml.py |  4 ++--
 src/calibre/ebooks/pml/pmlml.py |  4 ++--
 src/calibre/ebooks/rb/rbml.py   |  4 ++--
 src/calibre/ebooks/txt/txtml.py | 10 +++++-----
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py
index aaf8361b99..16c822d263 100644
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@@ -241,7 +241,7 @@ class FB2MLizer(object):
             if not fb2_text or fb2_text[-1] != ' ':
                 fb2_text.append(' ')
 
-        if hasattr(elem, 'text') and elem.text != None:
+        if hasattr(elem, 'text') and elem.text:
             if 'p' not in tag_stack:
                 fb2_text.append('<p>%s</p>' % prepare_string_for_xml(elem.text))
             else:
@@ -255,7 +255,7 @@ class FB2MLizer(object):
             close_tag_list.insert(0, tag_stack.pop())
         fb2_text += self.close_tags(close_tag_list)
 
-        if hasattr(elem, 'tail') and elem.tail != None:
+        if hasattr(elem, 'tail') and elem.tail:
             if 'p' not in tag_stack:
                 fb2_text.append('<p>%s</p>' % prepare_string_for_xml(elem.tail))
             else:
diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py
index 862f0ea0ae..27e88eb48b 100644
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@@ -257,7 +257,7 @@ class PMLMLizer(object):
         # margin
 
         # Proccess tags that contain text.
-        if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
+        if hasattr(elem, 'text') and elem.text:
             text.append(self.remove_newlines(elem.text))
 
         for item in elem:
@@ -276,7 +276,7 @@ class PMLMLizer(object):
         #if style['page-break-after'] == 'always':
         #    text.append('\\p')
 
-        if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '':
+        if hasattr(elem, 'tail') and elem.tail:
             text.append(self.remove_newlines(elem.tail))
 
         return text
diff --git a/src/calibre/ebooks/rb/rbml.py b/src/calibre/ebooks/rb/rbml.py
index 945e21c994..c293880343 100644
--- a/src/calibre/ebooks/rb/rbml.py
+++ b/src/calibre/ebooks/rb/rbml.py
@@ -191,7 +191,7 @@ class RBMLizer(object):
                 tag_stack.append(style_tag)
 
         # Proccess tags that contain text.
-        if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
+        if hasattr(elem, 'text') and elem.text:
             text.append(prepare_string_for_xml(elem.text))
 
         for item in elem:
@@ -203,7 +203,7 @@ class RBMLizer(object):
 
         text += self.close_tags(close_tag_list)
 
-        if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '':
+        if hasattr(elem, 'tail') and elem.tail:
                 text.append(prepare_string_for_xml(elem.tail))
 
         return text
diff --git a/src/calibre/ebooks/txt/txtml.py b/src/calibre/ebooks/txt/txtml.py
index 59c3ea671a..45383675b4 100644
--- a/src/calibre/ebooks/txt/txtml.py
+++ b/src/calibre/ebooks/txt/txtml.py
@@ -94,7 +94,7 @@ class TXTMLizer(object):
         text = re.sub('(?<=.)%s(?=.)' % os.linesep, ' ', text)
 
         # Remove multiple spaces.
-        text = re.sub('[  ]+', ' ', text)
+        text = re.sub('[ ]{2,}', ' ', text)
 
         # Remove excessive newlines.
         text = re.sub('\n[ ]+\n', '\n\n', text)
@@ -172,15 +172,15 @@ class TXTMLizer(object):
         # Are we in a paragraph block?
         if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES:
             in_block = True
-            if not end.endswith(u'\n\n') and hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
+            if not end.endswith(u'\n\n') and hasattr(elem, 'text') and elem.text:
                 text.append(u'\n\n')
 
         if tag in SPACE_TAGS:
-            if not end.endswith('u ') and hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
+            if not end.endswith('u ') and hasattr(elem, 'text') and elem.text:
                 text.append(u' ')
 
         # Process tags that contain text.
-        if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
+        if hasattr(elem, 'text') and elem.text:
             text.append(elem.text)
 
         for item in elem:
@@ -192,7 +192,7 @@ class TXTMLizer(object):
         if in_block:
             text.append(u'\n\n')
 
-        if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '':
+        if hasattr(elem, 'tail') and elem.tail:
             text.append(elem.tail)
 
         return text

From abe52807cb074c5ed10622ca303146b49c3ce630 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Mon, 26 Oct 2009 19:16:52 -0400
Subject: [PATCH 056/120] Add to conversion documentation.

---
 src/calibre/manual/conversion.rst | 59 +++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/src/calibre/manual/conversion.rst b/src/calibre/manual/conversion.rst
index 1f23d43419..ac25dc8121 100644
--- a/src/calibre/manual/conversion.rst
+++ b/src/calibre/manual/conversion.rst
@@ -306,6 +306,11 @@ the headers and footers and remove them. Remember that they operate on the inter
 by the conversion pipeline. There is also a wizard to help you customize the regular expressions for
 your document.
 
+The header and footer regular expressions are used in conjunction with the remove header and footer options.
+If the remove option is not enabled the regular expression will not be applied to remove the matched text.
+As stated the removal works by using a python regular expression. All matched text is simply removed from
+the document. You can learn more about regular expressions and the syntax at http://docs.python.org/library/re.html
+
 Miscellaneous
 ~~~~~~~~~~~~~~
 
@@ -419,7 +424,61 @@ generating the Table of Contents much simpler. It is called BookCreator and is a
 Convert TXT documents
 ~~~~~~~~~~~~~~~~~~~~~~
 
+TXT documents can contain any imaginable layout. Since TXT documents provide no way to explicitly mark parts of
+the text, by default |app| only groups parts of the document into paragraphs. The default is to assume one or
+more blank lines are a paragraph boundary.
+
+.. code-block:: txt
+
+    This is the first.
+    
+    This is the
+    second paragraph.
+
+TXT input supports a number of options to differentiate how paragraphs are detected.
+
+:guilabel:`Treat each line as a paragraph`
+    Assumes that every line is a paragraph.
+    
+    .. code-block:: txt
+    
+        This is the first.
+        This is the second.
+        This is the third.
+    
+:guilabel:`Assume print formatting`
+    Assumes that every paragraph starts with an indent (either a tab or 2+ spaces). Paragraphs end when
+    the next line that starts with an indent is reached.
+    
+    .. code-block:: txt
+    
+          This is the
+        first.
+          This is the second.
+        
+          This is the
+        third.
+
+:guilabel:`Process using markdown`
+    |app| also supports running TXT input though a transformation preprocessor known as markdown. Markdown
+    allows for basic formatting to be added to the document and is an easy way to differentiate non-paragraph
+    elements such as chapter headings. Marking chapter headings with # and settings the chapter XPath detection
+    expression to "//h:h1" is the easiest way to have a proper table of contents generated from a TXT document.
+    You can learn more about the markdown syntax at http://daringfireball.net/projects/markdown/syntax
+
 Convert PDF documents
 ~~~~~~~~~~~~~~~~~~~~~~
 
+PDF documents are one of the worst formats to convert from. They are a fixed page size and text placement format.
+Meaning, it is very difficult to determine where one paragraph ends and another begins. |app| will try to unwrap
+paragraphs using a configurable, :guilabel:`Line Un-Wrapping Factor`. This is a scale used to determine the length at which a line should be unwrapped. Valid values are a decimal
+between 0 and 1. The default is 0.5, this is the median line length. Lower this value to include more
+text in the unwrapping. Increase to include less.
+
+Also, they often have headers and footers as part of the document that will become included with the text.
+Use the options to remove headers and footers to mitigate this issue. If the headers and footers are not
+removed from the text it can throw off the paragraph unwrapping.
+
+Some limitations of PDF input is complex, multi-column, and image based documents are not supported.
+Extraction of SVG images from within the document is also not supported.
 

From 68e3acd43ab05ae8ba417b35840e067b0115ca10 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Mon, 26 Oct 2009 19:55:10 -0400
Subject: [PATCH 057/120] Get header and footer regex matching working better.

---
 src/calibre/ebooks/conversion/preprocess.py | 11 +++++------
 src/calibre/ebooks/oeb/iterator.py          |  4 ++--
 src/calibre/gui2/convert/regex_builder.py   |  6 +++---
 3 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index 39ca28e87f..b63c7ca861 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -228,17 +228,16 @@ class HTMLPreProcessor(object):
         else:
             rules = []
 
-        pre_rules = []
+        end_rules = []
         if getattr(self.extra_opts, 'remove_header', None):
-            pre_rules.append(
+            end_rules.append(
                 (re.compile(getattr(self.extra_opts, 'header_regex')), lambda match : '')
             )
         if getattr(self.extra_opts, 'remove_footer', None):
-            pre_rules.append(
+            end_rules.append(
                 (re.compile(getattr(self.extra_opts, 'footer_regex')), lambda match : '')
             )
-
-        end_rules = []
+        
         if getattr(self.extra_opts, 'unwrap_factor', 0.0) > 0.01:
             length = line_length(html, getattr(self.extra_opts, 'unwrap_factor'))
             if length:
@@ -247,7 +246,7 @@ class HTMLPreProcessor(object):
                     (re.compile(r'(?<=.{%i}[a-z\.,;:)-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines),
                 )
 
-        for rule in self.PREPROCESS + pre_rules + rules + end_rules:
+        for rule in self.PREPROCESS + rules + end_rules:
             html = rule[0].sub(rule[1], html)
 
         # Handle broken XHTML w/ SVG (ugh)
diff --git a/src/calibre/ebooks/oeb/iterator.py b/src/calibre/ebooks/oeb/iterator.py
index 762b14c3e5..565ceed519 100644
--- a/src/calibre/ebooks/oeb/iterator.py
+++ b/src/calibre/ebooks/oeb/iterator.py
@@ -123,7 +123,7 @@ class EbookIterator(object):
                                 else:
                                     print 'Loaded embedded font:', repr(family)
 
-    def __enter__(self, raw_only=False):
+    def __enter__(self, processed=False):
         self.delete_on_exit = []
         self._tdir = TemporaryDirectory('_ebook_iter')
         self.base  = self._tdir.__enter__()
@@ -140,7 +140,7 @@ class EbookIterator(object):
                 plumber.opts, plumber.input_fmt, self.log,
                 {}, self.base)
 
-        if not raw_only and plumber.input_fmt.lower() in ('pdf', 'rb'):
+        if processed or plumber.input_fmt.lower() in ('pdf', 'rb'):
             self.pathtoopf = create_oebbook(self.log, self.pathtoopf, plumber.opts,
                     plumber.input_plugin)
         if hasattr(self.pathtoopf, 'manifest'):
diff --git a/src/calibre/gui2/convert/regex_builder.py b/src/calibre/gui2/convert/regex_builder.py
index 20da8d7aaf..b1d8fbcbd5 100644
--- a/src/calibre/gui2/convert/regex_builder.py
+++ b/src/calibre/gui2/convert/regex_builder.py
@@ -87,12 +87,12 @@ class RegexBuilder(QDialog, Ui_RegexBuilder):
 
     def open_book(self, pathtoebook):
         self.iterator = EbookIterator(pathtoebook)
-        self.iterator.__enter__(raw_only=True)
+        self.iterator.__enter__(processed=True)
         text = [u'']
         for path in self.iterator.spine:
-            html = open(path, 'rb').read().decode(path.encoding, 'replace')
+            html = open(path, 'rb').read().decode('utf-8', 'replace')
             text.append(html)
-        self.preview.setPlainText('\n\n'.join(text))
+        self.preview.setPlainText('\n---\n'.join(text))
 
     def button_clicked(self, button):
         if button == self.button_box.button(QDialogButtonBox.Open):

From 66f7802f9ebe3464f9de2ce4013d98637329261e Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Tue, 27 Oct 2009 07:44:55 -0400
Subject: [PATCH 058/120] Replace non-breaking spaces with spaces.

---
 src/calibre/ebooks/pml/pmlml.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py
index 27e88eb48b..aa608496c7 100644
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@@ -153,6 +153,10 @@ class PMLMLizer(object):
         for unused in anchors.difference(links):
             text = text.replace('\\Q="%s"' % unused, '')
 
+        # Replace bad characters.
+        text = text.replace(u'\xc2', '')
+        text = text.replace(u'\xa0', ' ')
+
         # Turn all html entities into unicode. This should not be necessary as
         # lxml should have already done this but we want to be sure it happens.
         for entity in set(re.findall('&.+?;', text)):

From 4cc5e18606af68984d61894f43d6af82e789ca3e Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 31 Oct 2009 11:33:30 -0400
Subject: [PATCH 059/120] Remove stray setup. Revert create_upload_path to work
 with the custom path when sending to device.

---
 setup/installer/osx/freeze.py       |  1 -
 src/calibre/devices/usbms/device.py | 53 ++++-------------------------
 2 files changed, 7 insertions(+), 47 deletions(-)

diff --git a/setup/installer/osx/freeze.py b/setup/installer/osx/freeze.py
index f30a037703..bc764d25d2 100644
--- a/setup/installer/osx/freeze.py
+++ b/setup/installer/osx/freeze.py
@@ -10,7 +10,6 @@ from setup import __version__ as VERSION, __appname__ as APPNAME, SRC, Command,
 try:
     from setuptools import setup
 except:
-    setup
     class setup:
         pass
 
diff --git a/src/calibre/devices/usbms/device.py b/src/calibre/devices/usbms/device.py
index 0799f6779b..86f8de8e39 100644
--- a/src/calibre/devices/usbms/device.py
+++ b/src/calibre/devices/usbms/device.py
@@ -702,52 +702,13 @@ class Device(DeviceConfig, DevicePlugin):
             raise FreeSpaceError(_("There is insufficient free space on the storage card"))
         return path
 
-    def create_upload_path(self, path, mdata, fname):
-        path = os.path.abspath(path)
-        newpath = path
-        extra_components = []
-
-        if self.SUPPORTS_SUB_DIRS and self.settings().use_subdirs:
-            if 'tags' in mdata.keys():
-                for tag in mdata['tags']:
-                    if tag.startswith(_('News')):
-                        extra_components.append('news')
-                        c = sanitize(mdata.get('title', ''))
-                        if c:
-                            extra_components.append(c)
-                        c = sanitize(mdata.get('timestamp', ''))
-                        if c:
-                            extra_components.append(c)
-                        break
-                    elif tag.startswith('/'):
-                        for c in tag.split('/'):
-                            c = sanitize(c)
-                            if not c: continue
-                            extra_components.append(c)
-                        break
-
-            if not extra_components:
-                c = sanitize(mdata.get('authors', _('Unknown')))
-                if c:
-                    extra_components.append(c)
-                c = sanitize(mdata.get('title', _('Unknown')))
-                if c:
-                    extra_components.append(c)
-                    newpath = os.path.join(newpath, c)
-
-        fname = sanitize(fname)
-        extra_components.append(fname)
-        extra_components = [str(x) for x in extra_components]
-        def remove_trailing_periods(x):
-            ans = x
-            while ans.endswith('.'):
-                ans = ans[:-1]
-            if not ans:
-                ans = 'x'
-            return ans
-        extra_components = list(map(remove_trailing_periods, extra_components))
-        components = shorten_components_to(250 - len(path), extra_components)
-        filepath = os.path.join(path, *components)
+    def create_upload_path(self, root, mdata, ext, id):
+        from calibre.library.save_to_disk import config, get_components
+        opts = config().parse()
+        components = get_components(opts.template, mdata, id, opts.timefmt, 250)
+        components = [str(x) for x in components]
+        components = shorten_components_to(250 - len(root), components)
+        filepath = '%s%s' % (os.path.join(root, *components), ext)
         filedir = os.path.dirname(filepath)
 
         if not self.SUPPORTS_SUB_DIRS or not self.settings().use_subdirs:

From c6eec96b7ea7e70cd52a82009c761e758743c50e Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 31 Oct 2009 12:11:23 -0400
Subject: [PATCH 060/120] Add user specified cover page support to FB2 and RB
 Output. Fix bug when adding images to RB Output.

---
 src/calibre/ebooks/fb2/fb2ml.py | 3 +++
 src/calibre/ebooks/rb/rbml.py   | 7 +++++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py
index 16c822d263..78ecc94681 100644
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@@ -107,6 +107,9 @@ class FB2MLizer(object):
 
     def get_cover_page(self):
         output = u''
+        if 'cover' in self.oeb_book.guide:
+            output += '<image xlink:href="#cover.jpg" />'
+            self.image_hrefs[self.oeb_book.guide['cover'].href] = 'cover.jpg'
         if 'titlepage' in self.oeb_book.guide:
             self.log.debug('Generating cover page...')
             href = self.oeb_book.guide['titlepage'].href
diff --git a/src/calibre/ebooks/rb/rbml.py b/src/calibre/ebooks/rb/rbml.py
index c293880343..5574aa94b6 100644
--- a/src/calibre/ebooks/rb/rbml.py
+++ b/src/calibre/ebooks/rb/rbml.py
@@ -82,13 +82,16 @@ class RBMLizer(object):
 
     def get_cover_page(self):
         output = u''
+        if 'cover' in self.oeb_book.guide:
+            if self.name_map.get(self.oeb_book.guide['cover'].href, None):
+                output += '<IMG SRC="%s">' % self.name_map[self.oeb_book.guide['cover'].href]
         if 'titlepage' in self.oeb_book.guide:
             self.log.debug('Generating cover page...')
             href = self.oeb_book.guide['titlepage'].href
             item = self.oeb_book.manifest.hrefs[href]
             if item.spine_position is None:
                 stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
-                output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
+                output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
         return output
 
     def get_toc(self):
@@ -152,7 +155,7 @@ class RBMLizer(object):
         if tag in IMAGE_TAGS:
             if elem.attrib.get('src', None):
                 if page.abshref(elem.attrib['src']) not in self.name_map.keys():
-                    self.name_map[page.abshref(elem.attrib['src'])] = unique_name('%s' % len(self.image_hrefs.keys()), self.image_hrefs.keys(), self.name_map.keys())
+                    self.name_map[page.abshref(elem.attrib['src'])] = unique_name('%s' % len(self.name_map.keys()), self.name_map.keys())
                 text.append('<IMG SRC="%s">' % self.name_map[page.abshref(elem.attrib['src'])])
 
         rb_tag = tag.upper() if tag in TAGS else None

From f11d876a9d99b3ee90d780a02e116d7c36d98496 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Wed, 11 Nov 2009 12:07:56 -0500
Subject: [PATCH 061/120] Integrate WayneD's PML input image fix.

---
 src/calibre/ebooks/pml/input.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/pml/input.py b/src/calibre/ebooks/pml/input.py
index 270c8a7b0f..4128e4aa38 100644
--- a/src/calibre/ebooks/pml/input.py
+++ b/src/calibre/ebooks/pml/input.py
@@ -79,7 +79,7 @@ class PMLInput(InputFormatPlugin):
                     pimg_name = os.path.basename(img)
                     pimg_path = os.path.join(os.getcwd(), 'images', pimg_name)
                     
-                    images.append(pimg_name)
+                    images.append('images/' + pimg_name)
                     
                     shutil.move(img, pimg_path)
         else:

From 3fba659ba3421cc92f229ef2495f32621be7dd98 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Wed, 11 Nov 2009 12:19:57 -0500
Subject: [PATCH 062/120] Integrate WayneD's PML input parsing fixes.

---
 src/calibre/ebooks/pml/input.py        | 16 ++++++-
 src/calibre/ebooks/pml/pmlconverter.py | 58 ++++++++++++++++----------
 2 files changed, 50 insertions(+), 24 deletions(-)

diff --git a/src/calibre/ebooks/pml/input.py b/src/calibre/ebooks/pml/input.py
index 4128e4aa38..f2d00742ba 100644
--- a/src/calibre/ebooks/pml/input.py
+++ b/src/calibre/ebooks/pml/input.py
@@ -42,9 +42,23 @@ class PMLInput(InputFormatPlugin):
         if self.options.input_encoding:
             ienc = self.options.input_encoding
 
+        style = '''
+<style>
+.s {font-size: 1em}
+.l {font-size: 1.5em}
+.k {font-size: 0.75em}
+.c {text-align: center; margin: auto}
+.r {text-align: right}
+.t {margin-left: 5%}
+.p {page-break-after: always}
+.x {page-break-before: always}
+</style>
+'''
         self.log.debug('Converting PML to HTML...')
         html = pml_to_html(pml_stream.read().decode(ienc)) 
-        html_stream.write('<html><head><title /></head><body>' + html.encode('utf-8', 'replace') + '</body></html>')
+        html_stream.write('<html><head><title />%s</head><body>' % style)
+        html_stream.write(html.encode('utf-8', 'replace'))
+        html_stream.write('</body></html>') 
 
         if pclose:
             pml_stream.close()
diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index dafe1e4f6a..1b42f99cc1 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -14,27 +14,38 @@ from calibre import my_unichr
 from calibre.ebooks.pdb.ereader import image_name
 
 PML_HTML_RULES = [
-    (re.compile(r'\\p'), lambda match: '<br /><br style="page-break-after: always;" />'),
-    (re.compile(r'\\x(?P<text>.*?)\\x', re.DOTALL), lambda match: '<h1 style="page-break-before: always;">%s</h1>' % match.group('text') if match.group('text') else ''),
-    (re.compile(r'\\X(?P<val>[0-4])(?P<text>.*?)\\X[0-4]', re.DOTALL), lambda match: '<h%s style="page-break-before: always;">%s</h%s>' % (int(match.group('val')) + 1, match.group('text'), int(match.group('val')) + 1) if match.group('text') else ''),
+    # Any literal <, &, and > chars be escaped to avoid HTML issues (though
+    # <footnote> and <sidebar> tags are handled specially later).
+    (re.compile(r'&'), lambda match: '&amp;'),
+    (re.compile(r'<'), lambda match: '&lt;'),
+    (re.compile(r'>'), lambda match: '&gt;'),
+
+    # NOP-process all \x escapes, turning \\ into &#92;  This keeps the regex
+    # parsing simple while making sure that we don't try to honor \\x as \x
+    # (and also makes sure we DO honor \\\x as &#92; followed by \x).
+    (re.compile(r'\\(.)'), lambda match: '&#92;' if match.group(1) == '\\' else '\\' + match.group(1)),
+
+    (re.compile(r'\\p'), lambda match: '<br /><br class="p" />'),
+    (re.compile(r'\\x(?P<text>.*?)\\x', re.DOTALL), lambda match: '<h1 class="x">%s</h1>' % match.group('text') if match.group('text') else ''),
+    (re.compile(r'\\X(?P<val>[0-4])(?P<text>.*?)\\X[0-4]', re.DOTALL), lambda match: '<h%s>%s</h%s>' % (int(match.group('val')) + 1, match.group('text'), int(match.group('val')) + 1) if match.group('text') else ''),
     (re.compile(r'\\C\d=".+?"'), lambda match: ''), # This should be made to create a TOC entry
-    (re.compile(r'\\c(?P<text>.*?)\\c', re.DOTALL), lambda match: '<span style="text-align: center; display: block; margin: auto;">%s</span>' % match.group('text') if match.group('text') else ''),
-    (re.compile(r'\\r(?P<text>.*?)\\r', re.DOTALL), lambda match: '<span style="text-align: right; display: block;">%s</span>' % match.group('text') if match.group('text') else ''),
+    (re.compile(r'\\c(?P<text>.*?)\\c', re.DOTALL), lambda match: '<div class="c">%s</div>' % match.group('text') if match.group('text') else ''),
+    (re.compile(r'\\r(?P<text>.*?)\\r', re.DOTALL), lambda match: '<div class="r">%s</div>' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\i(?P<text>.*?)\\i', re.DOTALL), lambda match: '<i>%s</i>' % match.group('text') if match.group('text') else ''),
-    (re.compile(r'\\u(?P<text>.*?)\\u', re.DOTALL), lambda match: '<span style="text-decoration: underline;">%s</span>' % match.group('text') if match.group('text') else ''),
+    (re.compile(r'\\u(?P<text>.*?)\\u', re.DOTALL), lambda match: '<u>%s</u>' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\o(?P<text>.*?)\\o', re.DOTALL), lambda match: '<del>%s</del>' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\v(?P<text>.*?)\\v', re.DOTALL), lambda match: '<!-- %s -->' % match.group('text') if match.group('text') else ''),
-    (re.compile(r'\\t(?P<text>.*?)\\t', re.DOTALL), lambda match: '<div style="margin-left: 5%%;">%s</div>' % match.group('text') if match.group('text') else ''),
-    (re.compile(r'\\T="(?P<val>\d+)%*"(?P<text>.*?)$', re.MULTILINE), lambda match: r'<div style="margin-left: %s%%;">%s</div>' % (match.group('val'), match.group('text')) if match.group('text') else ''),
-    (re.compile(r'\\w="(?P<val>\d+)%"'), lambda match: '<hr width="%s" />' % match.group('val')),
+    (re.compile(r'\\t(?P<text>.*?)\\t', re.DOTALL), lambda match: '<div class="t">%s</div>' % match.group('text') if match.group('text') else ''),
+    (re.compile(r'\\T="(?P<val>\d+)%*"(?P<text>.*?)$', re.MULTILINE), lambda match: r'<div style="margin-left: %s%%">%s</div>' % (match.group('val'), match.group('text')) if match.group('text') else ''),
+    (re.compile(r'\\w="(?P<val>\d+)%"'), lambda match: '<hr width="%s%%" />' % match.group('val')),
     (re.compile(r'\\n'), lambda match: ''),
-    (re.compile(r'\\s(?P<text>.*?)\\s', re.DOTALL), lambda match: '<span style="font-size: 50%%">%s</span>' % match.group('text') if match.group('text') else ''),
+    (re.compile(r'\\s(?P<text>.*?)\\s', re.DOTALL), lambda match: '<span class="s">%s</span>' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\b(?P<text>.*?)\\b', re.DOTALL), lambda match: '<b>%s</b>' % match.group('text') if match.group('text') else ''), # \b is deprecated; \B should be used instead.
-    (re.compile(r'\\l(?P<text>.*?)\\l', re.DOTALL), lambda match: '<span style="font-size: 175%%">%s</span>' % match.group('text') if match.group('text') else ''),
+    (re.compile(r'\\l(?P<text>.*?)\\l', re.DOTALL), lambda match: '<span class="l">%s</span>' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\B(?P<text>.*?)\\B', re.DOTALL), lambda match: '<b>%s</b>' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\Sp(?P<text>.*?)\\Sp', re.DOTALL), lambda match: '<sup>%s</sup>' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\Sb(?P<text>.*?)\\Sb', re.DOTALL), lambda match: '<sub>%s</sub>' % match.group('text') if match.group('text') else ''),
-    (re.compile(r'\\k(?P<text>.*?)\\k', re.DOTALL), lambda match: '<span style="font-size: 50%%">%s</span>' % match.group('text').upper() if match.group('text') else ''),
+    (re.compile(r'\\k(?P<text>.*?)\\k', re.DOTALL), lambda match: '<span class="k">%s</span>' % match.group('text').upper() if match.group('text') else ''),
     (re.compile(r'\\a(?P<num>\d{3})'), lambda match: '&#%s;' % match.group('num')),
     (re.compile(r'\\U(?P<num>[0-9a-f]{4})'), lambda match: '%s' % my_unichr(int(match.group('num'), 16))),
     (re.compile(r'\\m="(?P<name>.+?)"'), lambda match: '<img src="images/%s" />' % image_name(match.group('name')).strip('\x00')),
@@ -47,8 +58,8 @@ PML_HTML_RULES = [
     (re.compile(r'\\I(?P<text>.*?)\\I', re.DOTALL), lambda match: '<i>%s</i>' % match.group('text') if match.group('text') else ''),
 
     # Sidebar and Footnotes
-    (re.compile(r'<sidebar\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</sidebar>', re.DOTALL), lambda match: '<div id="fns-%s">%s</div>' % (match.group('target'), match.group('text')) if match.group('text') else ''),
-    (re.compile(r'<footnote\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</footnote>', re.DOTALL), lambda match: '<div id="fns-%s">%s</div>' % (match.group('target'), match.group('text')) if match.group('text') else ''),
+    (re.compile(r'&lt;sidebar\s+id="(?P<target>.+?)"&gt;\s*(?P<text>.*?)\s*&lt;/sidebar&gt;', re.DOTALL), lambda match: '<div id="fns-%s">%s</div>' % (match.group('target'), match.group('text')) if match.group('text') else ''),
+    (re.compile(r'&lt;footnote\s+id="(?P<target>.+?)"&gt;\s*(?P<text>.*?)\s*&lt;/footnote&gt;', re.DOTALL), lambda match: '<div id="fns-%s">%s</div>' % (match.group('target'), match.group('text')) if match.group('text') else ''),
 
     # eReader files are one paragraph per line.
     # This forces the lines to wrap properly.
@@ -58,16 +69,17 @@ PML_HTML_RULES = [
     # Ensure empty lines carry over.
     (re.compile('(\r\n|\n|\r){3}'), lambda match: '<br />'),
 
-    # Remove unmatched plm codes.
-    (re.compile(r'(?<=[^\\])\\[pxcriouvtblBk]'), lambda match: ''),
-    (re.compile(r'(?<=[^\\])\\X[0-4]'), lambda match: ''),
-    (re.compile(r'(?<=[^\\])\\Sp'), lambda match: ''),
-    (re.compile(r'(?<=[^\\])\\Sb'), lambda match: ''),
-    # Remove invalid single item pml codes.
-    (re.compile(r'(?<=[^\\])\\[^\\]'), lambda match: ''),
+    # Try to fix some of the misordering of character-attribute tags.
+    (re.compile(r'(?P<ch>(<(i|u|b|del|sup|sub)( [^>]+)?>)+)(?P<close>(</(div|span)>)+)'), lambda match: match.group('close') + match.group('ch')),
+    (re.compile(r'(?P<ch>(<(i|u|b|del|sup|sub|span)( [^>]+)?>)+)(?P<blk>(<(div|h\d)( [^>]+)?>)+)'), lambda match: match.group('blk') + match.group('ch')),
 
-    # Replace \\ with \.
-    (re.compile(r'\\\\'), lambda match: '\\'),
+    # Remove unmatched plm codes.
+    (re.compile(r'\\X[0-4]'), lambda match: ''),
+    (re.compile(r'\\T="\d+%*"'), lambda match: ''),
+    (re.compile(r'\\Sp'), lambda match: ''),
+    (re.compile(r'\\Sb'), lambda match: ''),
+    # Remove invalid single item pml codes.
+    (re.compile(r'\\.'), lambda match: ''),
 ]
 
 def pml_to_html(pml):

From 155fd8a9e62a8d41f3b5d88cb03ddc787aac2125 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Thu, 12 Nov 2009 13:23:10 -0500
Subject: [PATCH 063/120] PML metadata reader.

---
 src/calibre/customize/builtins.py  | 11 +++++++
 src/calibre/ebooks/metadata/pml.py | 53 ++++++++++++++++++++++++++++++
 src/calibre/ebooks/pml/input.py    |  3 +-
 3 files changed, 66 insertions(+), 1 deletion(-)
 create mode 100644 src/calibre/ebooks/metadata/pml.py

diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 22ae0d4b04..061a4409a6 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -197,6 +197,17 @@ class PDFMetadataReader(MetadataReaderPlugin):
             return get_quick_metadata(stream)
         return get_metadata(stream)
 
+class PMLMetadataReader(MetadataReaderPlugin):
+
+    name        = 'Read PML metadata'
+    file_types  = set(['pml', 'pmlz'])
+    description = _('Read metadata from %s files') % 'PML'
+    author      = 'John Schember'
+
+    def get_metadata(self, stream, ftype):
+        from calibre.ebooks.metadata.pml import get_metadata
+        return get_metadata(stream)
+
 class RARMetadataReader(MetadataReaderPlugin):
 
     name = 'Read RAR metadata'
diff --git a/src/calibre/ebooks/metadata/pml.py b/src/calibre/ebooks/metadata/pml.py
new file mode 100644
index 0000000000..57ca29172a
--- /dev/null
+++ b/src/calibre/ebooks/metadata/pml.py
@@ -0,0 +1,53 @@
+'''Read meta information from TXT files'''
+
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+
+import os
+import glob
+import re
+
+from calibre.ebooks.metadata import MetaInformation
+from calibre.ptempfile import TemporaryDirectory
+from calibre.utils.zipfile import ZipFile
+
+def get_metadata(stream, extract_cover=True):
+    """ Return metadata as a L{MetaInfo} object """
+    mi = MetaInformation(_('Unknown'), [_('Unknown')])
+    stream.seek(0)
+
+    pml = ''
+    if stream.name.endswith('.pmlz'):
+        with TemporaryDirectory('_unpmlz') as tdir:
+            zf = ZipFile(stream)
+            zf.extractall(tdir)
+
+            pmls = glob.glob(os.path.join(tdir, '*.pml'))
+            for p in pmls:
+                with open(p, 'r+b') as p_stream:
+                    pml += p_stream.read()
+    else:
+        pml = stream.read()
+
+    for comment in re.findall(r'(?mus)\\v.*?\\v', pml):
+        m = re.search(r'TITLE="(.*?)"', comment)
+        if m:
+            mi.title = m.group(1).strip().decode('cp1252', 'replace')
+        m = re.search(r'AUTHOR="(.*?)"', comment)
+        if m:
+            if mi.authors == [_('Unknown')]:
+                mi.authors = []
+            mi.authors.append(m.group(1).strip().decode('cp1252', 'replace'))
+        m = re.search(r'PUBLISHER="(.*?)"', comment)
+        if m:
+            mi.publisher = m.group(1).strip().decode('cp1252', 'replace')
+        m = re.search(r'COPYRIGHT="(.*?)"', comment)
+        if m:
+            mi.rights = m.group(1).strip().decode('cp1252', 'replace')
+        m = re.search(r'ISBN="(.*?)"', comment)
+        if m:
+            mi.isbn = m.group(1).strip().decode('cp1252', 'replace')
+
+    return mi
diff --git a/src/calibre/ebooks/pml/input.py b/src/calibre/ebooks/pml/input.py
index f2d00742ba..2475e40c34 100644
--- a/src/calibre/ebooks/pml/input.py
+++ b/src/calibre/ebooks/pml/input.py
@@ -31,6 +31,7 @@ class PMLInput(InputFormatPlugin):
             pclose = True
         else:
             pml_stream = pml_path
+            pml_stream.seek(0)
             
         if not hasattr(html_path, 'write'):
             html_stream = open(html_path, 'wb')
@@ -38,7 +39,7 @@ class PMLInput(InputFormatPlugin):
         else:
             html_stream = html_path
         
-        ienc = pml_stream.encoding if pml_stream.encoding else 'utf-8'
+        ienc = pml_stream.encoding if pml_stream.encoding else 'cp1252'
         if self.options.input_encoding:
             ienc = self.options.input_encoding
 

From c2d9b2e6ad47409cf135d7b619bf5aea96b6cfa5 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Fri, 13 Nov 2009 11:41:09 -0500
Subject: [PATCH 064/120] Fix bug #3941: Handle input with <br> separated
 paragraphs better.

---
 src/calibre/ebooks/fb2/fb2ml.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py
index 78ecc94681..41b93f6d6b 100644
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@@ -30,7 +30,7 @@ TAG_MAP = {
     'i' : 'emphasis',
     'p' : 'p',
     'li' : 'p',
-    'br' : 'empty-line',
+    'br' : 'p',
 }
 
 TAG_SPACE = [
@@ -227,8 +227,14 @@ class FB2MLizer(object):
             fb2_text.append(self.get_anchor(page, id_name))
 
         fb2_tag = TAG_MAP.get(tag, None)
-        if fb2_tag and fb2_tag not in tag_stack:
-            tag_count += 1
+        if fb2_tag:
+            if fb2_tag not in tag_stack:
+                tag_count += 1
+            else:
+                tag_stack.reverse()
+                tag_stack.remove(fb2_tag)
+                tag_stack.reverse()
+                fb2_text.append('</%s>' % fb2_tag)
             fb2_text.append('<%s>' % fb2_tag)
             tag_stack.append(fb2_tag)
 

From 4d5f1894b158802b6c48911918e1308058361f84 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 14 Nov 2009 16:54:21 -0500
Subject: [PATCH 065/120] Fix pml parsing changes as they break PDB eReader
 input badly.

---
 src/calibre/ebooks/pml/input.py        | 16 +---------------
 src/calibre/ebooks/pml/pmlconverter.py | 16 ++++++++--------
 2 files changed, 9 insertions(+), 23 deletions(-)

diff --git a/src/calibre/ebooks/pml/input.py b/src/calibre/ebooks/pml/input.py
index 2475e40c34..ead6c988f4 100644
--- a/src/calibre/ebooks/pml/input.py
+++ b/src/calibre/ebooks/pml/input.py
@@ -43,23 +43,9 @@ class PMLInput(InputFormatPlugin):
         if self.options.input_encoding:
             ienc = self.options.input_encoding
 
-        style = '''
-<style>
-.s {font-size: 1em}
-.l {font-size: 1.5em}
-.k {font-size: 0.75em}
-.c {text-align: center; margin: auto}
-.r {text-align: right}
-.t {margin-left: 5%}
-.p {page-break-after: always}
-.x {page-break-before: always}
-</style>
-'''
         self.log.debug('Converting PML to HTML...')
         html = pml_to_html(pml_stream.read().decode(ienc)) 
-        html_stream.write('<html><head><title />%s</head><body>' % style)
-        html_stream.write(html.encode('utf-8', 'replace'))
-        html_stream.write('</body></html>') 
+        html_stream.write('<html><head><title /></head><body>%s</body></html>' % html.encode('utf-8', 'replace'))
 
         if pclose:
             pml_stream.close()
diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index 1b42f99cc1..140317c9df 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -25,27 +25,27 @@ PML_HTML_RULES = [
     # (and also makes sure we DO honor \\\x as &#92; followed by \x).
     (re.compile(r'\\(.)'), lambda match: '&#92;' if match.group(1) == '\\' else '\\' + match.group(1)),
 
-    (re.compile(r'\\p'), lambda match: '<br /><br class="p" />'),
-    (re.compile(r'\\x(?P<text>.*?)\\x', re.DOTALL), lambda match: '<h1 class="x">%s</h1>' % match.group('text') if match.group('text') else ''),
+    (re.compile(r'\\p'), lambda match: '<br /><br style="page-break-after: always;" />'),
+    (re.compile(r'\\x(?P<text>.*?)\\x', re.DOTALL), lambda match: '<h1 style="page-break-before: always;">%s</h1>' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\X(?P<val>[0-4])(?P<text>.*?)\\X[0-4]', re.DOTALL), lambda match: '<h%s>%s</h%s>' % (int(match.group('val')) + 1, match.group('text'), int(match.group('val')) + 1) if match.group('text') else ''),
     (re.compile(r'\\C\d=".+?"'), lambda match: ''), # This should be made to create a TOC entry
-    (re.compile(r'\\c(?P<text>.*?)\\c', re.DOTALL), lambda match: '<div class="c">%s</div>' % match.group('text') if match.group('text') else ''),
-    (re.compile(r'\\r(?P<text>.*?)\\r', re.DOTALL), lambda match: '<div class="r">%s</div>' % match.group('text') if match.group('text') else ''),
+    (re.compile(r'\\c(?P<text>.*?)\\c', re.DOTALL), lambda match: '<div style="text-align: center; margin: auto;">%s</div>' % match.group('text') if match.group('text') else ''),
+    (re.compile(r'\\r(?P<text>.*?)\\r', re.DOTALL), lambda match: '<div style="text-align: right;">%s</div>' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\i(?P<text>.*?)\\i', re.DOTALL), lambda match: '<i>%s</i>' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\u(?P<text>.*?)\\u', re.DOTALL), lambda match: '<u>%s</u>' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\o(?P<text>.*?)\\o', re.DOTALL), lambda match: '<del>%s</del>' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\v(?P<text>.*?)\\v', re.DOTALL), lambda match: '<!-- %s -->' % match.group('text') if match.group('text') else ''),
-    (re.compile(r'\\t(?P<text>.*?)\\t', re.DOTALL), lambda match: '<div class="t">%s</div>' % match.group('text') if match.group('text') else ''),
+    (re.compile(r'\\t(?P<text>.*?)\\t', re.DOTALL), lambda match: '<div style="margin-left: 5%;">%s</div>' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\T="(?P<val>\d+)%*"(?P<text>.*?)$', re.MULTILINE), lambda match: r'<div style="margin-left: %s%%">%s</div>' % (match.group('val'), match.group('text')) if match.group('text') else ''),
     (re.compile(r'\\w="(?P<val>\d+)%"'), lambda match: '<hr width="%s%%" />' % match.group('val')),
     (re.compile(r'\\n'), lambda match: ''),
-    (re.compile(r'\\s(?P<text>.*?)\\s', re.DOTALL), lambda match: '<span class="s">%s</span>' % match.group('text') if match.group('text') else ''),
+    (re.compile(r'\\s(?P<text>.*?)\\s', re.DOTALL), lambda match: '<span style="font-size: 1em;">%s</span>' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\b(?P<text>.*?)\\b', re.DOTALL), lambda match: '<b>%s</b>' % match.group('text') if match.group('text') else ''), # \b is deprecated; \B should be used instead.
-    (re.compile(r'\\l(?P<text>.*?)\\l', re.DOTALL), lambda match: '<span class="l">%s</span>' % match.group('text') if match.group('text') else ''),
+    (re.compile(r'\\l(?P<text>.*?)\\l', re.DOTALL), lambda match: '<span style="font-size: 1.5em;">%s</span>' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\B(?P<text>.*?)\\B', re.DOTALL), lambda match: '<b>%s</b>' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\Sp(?P<text>.*?)\\Sp', re.DOTALL), lambda match: '<sup>%s</sup>' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\Sb(?P<text>.*?)\\Sb', re.DOTALL), lambda match: '<sub>%s</sub>' % match.group('text') if match.group('text') else ''),
-    (re.compile(r'\\k(?P<text>.*?)\\k', re.DOTALL), lambda match: '<span class="k">%s</span>' % match.group('text').upper() if match.group('text') else ''),
+    (re.compile(r'\\k(?P<text>.*?)\\k', re.DOTALL), lambda match: '<span style="font-size: 0.75em;">%s</span>' % match.group('text').upper() if match.group('text') else ''),
     (re.compile(r'\\a(?P<num>\d{3})'), lambda match: '&#%s;' % match.group('num')),
     (re.compile(r'\\U(?P<num>[0-9a-f]{4})'), lambda match: '%s' % my_unichr(int(match.group('num'), 16))),
     (re.compile(r'\\m="(?P<name>.+?)"'), lambda match: '<img src="images/%s" />' % image_name(match.group('name')).strip('\x00')),

From fb7ecb5cf21738ebc37cb89ac25c64144dfcae24 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 15 Nov 2009 19:38:12 -0500
Subject: [PATCH 066/120] Fix typos.

---
 src/calibre/ebooks/conversion/preprocess.py | 2 +-
 src/calibre/ebooks/pml/pmlconverter.py      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index b63c7ca861..29ce0e4296 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -243,7 +243,7 @@ class HTMLPreProcessor(object):
             if length:
                 end_rules.append(
                     # Un wrap using punctuation
-                    (re.compile(r'(?<=.{%i}[a-z\.,;:)-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines),
+                    (re.compile(r'(?<=.{%i}[a-z\.,;:)\-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines),
                 )
 
         for rule in self.PREPROCESS + rules + end_rules:
diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index 140317c9df..1505e5fc4b 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -35,7 +35,7 @@ PML_HTML_RULES = [
     (re.compile(r'\\u(?P<text>.*?)\\u', re.DOTALL), lambda match: '<u>%s</u>' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\o(?P<text>.*?)\\o', re.DOTALL), lambda match: '<del>%s</del>' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\v(?P<text>.*?)\\v', re.DOTALL), lambda match: '<!-- %s -->' % match.group('text') if match.group('text') else ''),
-    (re.compile(r'\\t(?P<text>.*?)\\t', re.DOTALL), lambda match: '<div style="margin-left: 5%;">%s</div>' % match.group('text') if match.group('text') else ''),
+    (re.compile(r'\\t(?P<text>.*?)\\t', re.DOTALL), lambda match: '<div style="margin-left: 5%%;">%s</div>' % match.group('text') if match.group('text') else ''),
     (re.compile(r'\\T="(?P<val>\d+)%*"(?P<text>.*?)$', re.MULTILINE), lambda match: r'<div style="margin-left: %s%%">%s</div>' % (match.group('val'), match.group('text')) if match.group('text') else ''),
     (re.compile(r'\\w="(?P<val>\d+)%"'), lambda match: '<hr width="%s%%" />' % match.group('val')),
     (re.compile(r'\\n'), lambda match: ''),

From 2bdc1afa259044ab94e583d3831445bb31e1c6b8 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 21 Nov 2009 20:48:35 -0500
Subject: [PATCH 067/120] tweaks.

---
 src/calibre/ebooks/pdb/ereader/reader.py |  4 ++--
 src/calibre/ebooks/pml/pmlconverter.py   | 10 ++--------
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/src/calibre/ebooks/pdb/ereader/reader.py b/src/calibre/ebooks/pdb/ereader/reader.py
index 77ca8d6933..ad1df98793 100644
--- a/src/calibre/ebooks/pdb/ereader/reader.py
+++ b/src/calibre/ebooks/pdb/ereader/reader.py
@@ -31,5 +31,5 @@ class Reader(FormatReader):
     def dump_pml(self):
         return self.reader.dump_pml()
 
-    def dump_images(self):
-        return self.reader.dump_images()
+    def dump_images(self, out_dir):
+        return self.reader.dump_images(out_dir)
diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index 1505e5fc4b..b0d9ce1ec7 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -10,16 +10,10 @@ __docformat__ = 'restructuredtext en'
 
 import re
 
-from calibre import my_unichr
+from calibre import my_unichr, prepare_string_for_xml
 from calibre.ebooks.pdb.ereader import image_name
 
 PML_HTML_RULES = [
-    # Any literal <, &, and > chars be escaped to avoid HTML issues (though
-    # <footnote> and <sidebar> tags are handled specially later).
-    (re.compile(r'&'), lambda match: '&amp;'),
-    (re.compile(r'<'), lambda match: '&lt;'),
-    (re.compile(r'>'), lambda match: '&gt;'),
-
     # NOP-process all \x escapes, turning \\ into &#92;  This keeps the regex
     # parsing simple while making sure that we don't try to honor \\x as \x
     # (and also makes sure we DO honor \\\x as &#92; followed by \x).
@@ -83,7 +77,7 @@ PML_HTML_RULES = [
 ]
 
 def pml_to_html(pml):
-    html = pml
+    html = prepare_string_for_xml(pml)
     for rule in PML_HTML_RULES:
         html = rule[0].sub(rule[1], html)
 

From c569ba843fbfc01013ddd0c10683c7bcd2294169 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 21 Nov 2009 21:20:15 -0500
Subject: [PATCH 068/120] Line oriented simple state machine for PML parsing.

---
 src/calibre/ebooks/pml/pmlconverter.py | 435 +++++++++++++++++++++----
 1 file changed, 373 insertions(+), 62 deletions(-)

diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index b0d9ce1ec7..05cf488617 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -9,79 +9,390 @@ __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 
 import re
+import StringIO
 
 from calibre import my_unichr, prepare_string_for_xml
 from calibre.ebooks.pdb.ereader import image_name
 
-PML_HTML_RULES = [
-    # NOP-process all \x escapes, turning \\ into &#92;  This keeps the regex
-    # parsing simple while making sure that we don't try to honor \\x as \x
-    # (and also makes sure we DO honor \\\x as &#92; followed by \x).
-    (re.compile(r'\\(.)'), lambda match: '&#92;' if match.group(1) == '\\' else '\\' + match.group(1)),
+class PML_HTMLizer(object):
 
-    (re.compile(r'\\p'), lambda match: '<br /><br style="page-break-after: always;" />'),
-    (re.compile(r'\\x(?P<text>.*?)\\x', re.DOTALL), lambda match: '<h1 style="page-break-before: always;">%s</h1>' % match.group('text') if match.group('text') else ''),
-    (re.compile(r'\\X(?P<val>[0-4])(?P<text>.*?)\\X[0-4]', re.DOTALL), lambda match: '<h%s>%s</h%s>' % (int(match.group('val')) + 1, match.group('text'), int(match.group('val')) + 1) if match.group('text') else ''),
-    (re.compile(r'\\C\d=".+?"'), lambda match: ''), # This should be made to create a TOC entry
-    (re.compile(r'\\c(?P<text>.*?)\\c', re.DOTALL), lambda match: '<div style="text-align: center; margin: auto;">%s</div>' % match.group('text') if match.group('text') else ''),
-    (re.compile(r'\\r(?P<text>.*?)\\r', re.DOTALL), lambda match: '<div style="text-align: right;">%s</div>' % match.group('text') if match.group('text') else ''),
-    (re.compile(r'\\i(?P<text>.*?)\\i', re.DOTALL), lambda match: '<i>%s</i>' % match.group('text') if match.group('text') else ''),
-    (re.compile(r'\\u(?P<text>.*?)\\u', re.DOTALL), lambda match: '<u>%s</u>' % match.group('text') if match.group('text') else ''),
-    (re.compile(r'\\o(?P<text>.*?)\\o', re.DOTALL), lambda match: '<del>%s</del>' % match.group('text') if match.group('text') else ''),
-    (re.compile(r'\\v(?P<text>.*?)\\v', re.DOTALL), lambda match: '<!-- %s -->' % match.group('text') if match.group('text') else ''),
-    (re.compile(r'\\t(?P<text>.*?)\\t', re.DOTALL), lambda match: '<div style="margin-left: 5%%;">%s</div>' % match.group('text') if match.group('text') else ''),
-    (re.compile(r'\\T="(?P<val>\d+)%*"(?P<text>.*?)$', re.MULTILINE), lambda match: r'<div style="margin-left: %s%%">%s</div>' % (match.group('val'), match.group('text')) if match.group('text') else ''),
-    (re.compile(r'\\w="(?P<val>\d+)%"'), lambda match: '<hr width="%s%%" />' % match.group('val')),
-    (re.compile(r'\\n'), lambda match: ''),
-    (re.compile(r'\\s(?P<text>.*?)\\s', re.DOTALL), lambda match: '<span style="font-size: 1em;">%s</span>' % match.group('text') if match.group('text') else ''),
-    (re.compile(r'\\b(?P<text>.*?)\\b', re.DOTALL), lambda match: '<b>%s</b>' % match.group('text') if match.group('text') else ''), # \b is deprecated; \B should be used instead.
-    (re.compile(r'\\l(?P<text>.*?)\\l', re.DOTALL), lambda match: '<span style="font-size: 1.5em;">%s</span>' % match.group('text') if match.group('text') else ''),
-    (re.compile(r'\\B(?P<text>.*?)\\B', re.DOTALL), lambda match: '<b>%s</b>' % match.group('text') if match.group('text') else ''),
-    (re.compile(r'\\Sp(?P<text>.*?)\\Sp', re.DOTALL), lambda match: '<sup>%s</sup>' % match.group('text') if match.group('text') else ''),
-    (re.compile(r'\\Sb(?P<text>.*?)\\Sb', re.DOTALL), lambda match: '<sub>%s</sub>' % match.group('text') if match.group('text') else ''),
-    (re.compile(r'\\k(?P<text>.*?)\\k', re.DOTALL), lambda match: '<span style="font-size: 0.75em;">%s</span>' % match.group('text').upper() if match.group('text') else ''),
-    (re.compile(r'\\a(?P<num>\d{3})'), lambda match: '&#%s;' % match.group('num')),
-    (re.compile(r'\\U(?P<num>[0-9a-f]{4})'), lambda match: '%s' % my_unichr(int(match.group('num'), 16))),
-    (re.compile(r'\\m="(?P<name>.+?)"'), lambda match: '<img src="images/%s" />' % image_name(match.group('name')).strip('\x00')),
-    (re.compile(r'\\q="(?P<target>#.+?)"(?P<text>.*?)\\q', re.DOTALL), lambda match: '<a href="%s">%s</a>' % (match.group('target'), match.group('text')) if match.group('text') else ''),
-    (re.compile(r'\\Q="(?P<target>.+?)"'), lambda match: '<span id="%s"></span>' % match.group('target')),
-    (re.compile(r'\\-'), lambda match: ''),
-    (re.compile(r'\\Fn="(?P<target>.+?)"(?P<text>.*?)\\Fn'), lambda match: '<a href="#fns-%s">%s</a>' % (match.group('target'), match.group('text')) if match.group('text') else ''),
-    (re.compile(r'\\Sd="(?P<target>.+?)"(?P<text>.*?)\\Sd'), lambda match: '<a href="#fns-%s">%s</a>' % (match.group('target'), match.group('text')) if match.group('text') else ''),
-    # Just italicize index items as that is how the eReader software renders them.
-    (re.compile(r'\\I(?P<text>.*?)\\I', re.DOTALL), lambda match: '<i>%s</i>' % match.group('text') if match.group('text') else ''),
+    STATES = [
+        'i',
+        'u',
+        'd',
+        'b',
+        'sp',
+        'sb',
+        'h1',
+        'h2',
+        'h3',
+        'h4',
+        'h5',
+        'h6',
+        'a',
+        'c',
+        'r',
+        't',
+        's',
+        'l',
+        'T',
+        'Fn',
+        'Sd',
+        'FS'
+    ]
 
-    # Sidebar and Footnotes
-    (re.compile(r'&lt;sidebar\s+id="(?P<target>.+?)"&gt;\s*(?P<text>.*?)\s*&lt;/sidebar&gt;', re.DOTALL), lambda match: '<div id="fns-%s">%s</div>' % (match.group('target'), match.group('text')) if match.group('text') else ''),
-    (re.compile(r'&lt;footnote\s+id="(?P<target>.+?)"&gt;\s*(?P<text>.*?)\s*&lt;/footnote&gt;', re.DOTALL), lambda match: '<div id="fns-%s">%s</div>' % (match.group('target'), match.group('text')) if match.group('text') else ''),
+    STATES_VALUE_REQ = [
+        'a',
+        'T',
+        'FS'
+    ]
 
-    # eReader files are one paragraph per line.
-    # This forces the lines to wrap properly.
-    (re.compile('^(?P<text>.+)$', re.MULTILINE), lambda match: '<p>%s</p>' % match.group('text')),
-    # Remove empty <p>'s.
-    (re.compile('<p>[ ]*</p>'), lambda match: ''),
-    # Ensure empty lines carry over.
-    (re.compile('(\r\n|\n|\r){3}'), lambda match: '<br />'),
+    STATES_TAGS = {
+        'h1': ('<h1 style="page-break-after: always;">', '</h1>'),
+        'h2': ('<h2>', '</h2>'),
+        'h3': ('<h3>', '</h3>'),
+        'h4': ('<h4>', '</h4>'),
+        'h5': ('<h5>', '</h5>'),
+        'h6': ('<h6>', '</h6>'),
+        'sp': ('<sup>', '</sup>'),
+        'sb': ('<sub>', '</sub>'),
+        'a': ('<a href="%s">', '</a>'),
+        'c': ('<div style="text-align: center; margin: auto;">', '</div>'),
+        'r': ('<div style="text-align: right;">', '</div>'),
+        't': ('<div style="margin-left: 5%;">', '</div>'),
+        'T': ('<div style="margin-left: %s;">', '</div>'),
+        'i': ('<span style="font-style : italic;">', '</span>'),
+        'u': ('<span style="text-decoration : underline;">', '</span>'),
+        'd': ('<span style="text-decoration: line-through;">', '</span>'),
+        'b': ('<span style="font-weight: bold;">', '</span>'),
+        's': ('<span style="font-size: 75%">', '</span>'),
+        'l': ('<span style="font-size: 125%">', '</span>'),
+        'FS': ('<div id="%s">', '</div>'),
+    }
 
-    # Try to fix some of the misordering of character-attribute tags.
-    (re.compile(r'(?P<ch>(<(i|u|b|del|sup|sub)( [^>]+)?>)+)(?P<close>(</(div|span)>)+)'), lambda match: match.group('close') + match.group('ch')),
-    (re.compile(r'(?P<ch>(<(i|u|b|del|sup|sub|span)( [^>]+)?>)+)(?P<blk>(<(div|h\d)( [^>]+)?>)+)'), lambda match: match.group('blk') + match.group('ch')),
+    CODE_STATES = {
+        'q': 'a',
+        'x': 'h1',
+        'X0': 'h2',
+        'X1': 'h3',
+        'X2': 'h4',
+        'X3': 'h5',
+        'X4': 'h6',
+        'Sp': 'sp',
+        'Sb': 'sb',
+        'c': 'c',
+        'r': 'r',
+        't': 't',
+        'T': 'T',
+        'i': 'i',
+        'I': 'i',
+        'u': 'u',
+        'o': 'd',
+        'b': 'b',
+        'B': 'b',
+        's': 's',
+        'l': 'l',
+        'Fn': 'a',
+        'Sd': 'a',
+        'FN': 'FS',
+        'SB': 'FS',
+    }
 
-    # Remove unmatched plm codes.
-    (re.compile(r'\\X[0-4]'), lambda match: ''),
-    (re.compile(r'\\T="\d+%*"'), lambda match: ''),
-    (re.compile(r'\\Sp'), lambda match: ''),
-    (re.compile(r'\\Sb'), lambda match: ''),
-    # Remove invalid single item pml codes.
-    (re.compile(r'\\.'), lambda match: ''),
-]
+    DIV_STATES = [
+        'c',
+        'r',
+        't',
+        'T',
+        'FS',
+    ]
 
-def pml_to_html(pml):
-    html = prepare_string_for_xml(pml)
-    for rule in PML_HTML_RULES:
-        html = rule[0].sub(rule[1], html)
+    SPAN_STATES = [
+        's',
+        'l',
+        'i',
+        'u',
+        'd',
+        'b',
+    ]
 
-    return html
+    def __init__(self, close_all):
+        self.close_all = close_all
+        self.state = {}
+
+    def prepare_pml(self, pml):
+        # Remove comments
+        pml = re.sub(r'(?mus)\\v(?P<text>.*?)\\v', '', pml)
+        # Footnotes and Sidebars
+        pml = re.sub(r'(?mus)<footnote\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</footnote>', lambda match: '\\FN="fns-%s"%s\\FN' % (match.group('target'), match.group('text')) if match.group('text') else '', pml)
+        pml = re.sub(r'(?mus)<sidebar\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</sidebar>', lambda match: '\\SB="fns-%s"%s\\SB' % (match.group('target'), match.group('text')) if match.group('text') else '', pml)
+
+        pml = prepare_string_for_xml(pml)
+
+        pml = re.sub(r'\\a(?P<num>\d{3})', lambda match: '&#%s;' % match.group('num'), pml)
+        pml = re.sub(r'\\U(?P<num>[0-9a-f]{4})', lambda match: '%s' % my_unichr(int(match.group('num'), 16)), pml)
+
+        return pml
+
+    def prepare_line(self, line):
+        line = re.sub(r'[ ]{2,}', ' ', line)
+        line = re.sub(r'^[ ]*(?=.)', '', line)
+        line = re.sub(r'(?<=.)[ ]*$', '', line)
+        line = re.sub(r'^[ ]*$', '', line)
+
+        return line
+
+    def start_line(self):
+        start = u''
+
+        for key, val in self.state.items():
+            if val[0]:
+                if key not in self.STATES_VALUE_REQ:
+                    start += self.STATES_TAGS[key][0]
+                else:
+                    start += self.STATES_TAGS[key][0] % val[1]
+
+        return u'<p>%s' % start
+
+    def end_line(self):
+        end = u''
+
+        for key, val in self.state.items():
+            if val[0]:
+                if key == 'T':
+                    self.state['T'][0] = False
+                end += self.STATES_TAGS[key][1]
+
+        return u'%s</p>' % end
+
+    def process_code_simple(self, code):
+        if code not in self.CODE_STATES.keys():
+            return u''
+
+        text = u''
+
+        if self.state[self.CODE_STATES[code]][0]:
+            text = self.STATES_TAGS[self.CODE_STATES[code]][1]
+        else:
+            text = self.STATES_TAGS[self.CODE_STATES[code]][0]
+
+        self.state[self.CODE_STATES[code]][0] = not self.state[self.CODE_STATES[code]][0]
+
+        return text
+
+    def process_code_link(self, stream, pre=''):
+        text = u''
+
+        href = self.code_value(stream)
+        if pre:
+            href = '#%s-%s' % (pre, href)
+
+        if self.state['a'][0]:
+            text = self.STATES_TAGS['a'][1]
+        else:
+            text = self.STATES_TAGS['a'][0] % href
+            self.state['a'][1] = href
+
+        self.state['a'][0] = not self.state['a'][0]
+
+        return text
+
+    def process_code_div_span(self, code, stream):
+        if self.close_all:
+            return self.process_code_div_span_call(code, stream)
+        else:
+            return self.process_code_div_span_ind(code, stream)
+
+    def process_code_div_span_ind(self, code, stream):
+        text = u''
+        ds = []
+
+        code = self.CODE_STATES[code]
+
+        if code in self.DIV_STATES:
+            ds = self.DIV_STATES[:]
+        elif code in self.SPAN_STATES:
+            ds = self.SPAN_STATES[:]
+
+        if self.state[code][0]:
+            # Close all.
+            for c in ds:
+                if self.state[c][0]:
+                    text += self.STATES_TAGS[c][1]
+            # Reopen the based on state.
+            del ds[ds.index(code)]
+            for c in ds:
+                if self.state[c][0]:
+                    if c in self.STATES_VALUE_REQ:
+                        text += self.STATES_TAGS[self.CODE_STATES[c]][0] % self.state[c][1]
+                    else:
+                        text += self.STATES_TAGS[c][0]
+        else:
+            if code in self.STATES_VALUE_REQ:
+                val = self.code_value(stream)
+                text = self.STATES_TAGS[code][0] % val
+                self.state[code][1] = val
+            else:
+                text = self.STATES_TAGS[code][0]
+
+        self.state[code][0] = not self.state[code][0]
+
+        return text
+
+    def process_code_div_span_call(self, code, stream):
+        text = u''
+        divs = self.DIV_STATES[:]
+        spans = self.SPAN_STATES[:]
+
+        code = self.CODE_STATES[code]
+
+        if self.state[code][0]:
+            # Close all divs then spans.
+            for c in spans+divs:
+                if self.state[c][0]:
+                    text += self.STATES_TAGS[c][1]
+            # Reopen the based on state. Open divs then spans
+            if code in self.DIV_STATES:
+                del divs[divs.index(code)]
+            if code in self.SPAN_STATES:
+                del spans[spans.index(code)]
+            for c in divs+spans:
+                if state[c][0]:
+                    if c in self.STATES_VALUE_REQ:
+                        text += self.STATES_TAGS[self.CODE_STATES[c]][0] % self.state[c][1]
+                    else:
+                        text += self.STATES_TAGS[c][0]
+        else:
+            if code in self.STATES_VALUE_REQ:
+                val = self.code_value(stream)
+                text = self.STATES_TAGS[code][0] % val
+                state[code][1] = val
+            else:
+                text = self.STATES_TAGS[code][0]
+
+        self.state[code][0] = not self.state[code][0]
+
+        return text
+
+    def code_value(self, stream):
+        value = u''
+        open = False
+
+        c = stream.read(1)
+        while c != '':
+            if open and c != '"':
+                value += c
+            if c == '"':
+                if not open:
+                    open = True
+                else:
+                    break
+            c = stream.read(1)
+
+        return value.strip()
+
+    def parse_pml(self, pml):
+        pml = self.prepare_pml(pml)
+        output = []
+
+        self.state = {}
+        for s in self.STATES:
+            self.state[s] = [False, ''];
+
+        for line in pml.splitlines():
+            if not line:
+                continue
+            parsed = []
+            empty = True
+
+            # Must use StringIO, cStringIO does not support unicode
+            line = StringIO.StringIO(self.prepare_line(line))
+            parsed.append(self.start_line())
+
+            c = line.read(1)
+            while c != '':
+                text = u''
+
+                if c == '\\':
+                    c = line.read(1)
+
+                    if c == 'x':
+                        text = self.process_code_simple(c)
+                    elif c in 'XS':
+                        l = line.read(1)
+                        if '%s%s' % (c, l) == 'Sd':
+                            text = self.process_code_link(line, 'fns')
+                        elif '%s%s' % (c, l) == 'SB':
+                            text = self.process_code_div_span('SB', line)
+                        else:
+                            text = self.process_code_simple('%s%s' % (c, l))
+                    elif c == 'q':
+                        text = self.process_code_link(line)
+                    elif c in 'crtTiIuobB':
+                        text = self.process_code_div_span(c, line)
+                    elif c in 'sl':
+                        close = u''
+                        if c == 's' and self.state['l']:
+                            close = self.process_code_div_span('l', line)
+                        if c == 'l' and self.state['s']:
+                            close = self.process_code_div_span('s', line)
+                        text = self.process_code_div_span(c, line)
+                        text = close+text
+                    elif c == 'm':
+                        empty = False
+                        src = self.code_value(line)
+                        text = '<img src="images/%s" />' % image_name(src).strip('\x00')
+                    elif c == 'Q':
+                        empty = False
+                        id = self.code_value(line)
+                        text = '<span id="%s"></span>' % id
+                    elif c == 'p':
+                        empty = False
+                        text = '<br /><br style="page-break-after: always;" />'
+                    elif c == 'C':
+                        # This should be made to create a TOC entry
+                        line.read(1)
+                        self.code_value(line)
+                    elif c == 'n':
+                        pass
+                    elif c == 'F':
+                        l = line.read(1)
+                        if '%s%s' % (c, l) == 'Fn':
+                            text = self.process_code_link(line, 'fns')
+                        elif '%s%s' % (c, l) == 'FN':
+                            text = self.process_code_div_span('FN', line)
+                    elif c == 'w':
+                        empty = False
+                        text = '<hr width="%s" />' % self.code_value(line)
+                    elif c == '-':
+                        empty = False
+                        text = '&shy;'
+                    elif c == '\\':
+                        empty = False
+                        text = '\\'
+                else:
+                    if c != ' ':
+                        empty = False
+                    text = c
+                parsed.append(text)
+                c = line.read(1)
+
+            if not empty:
+                text = self.end_line()
+                parsed.append(text)
+                output.append(u''.join(parsed))
+            line.close()
+
+        return u'\n'.join(output)
+
+
+def pml_to_html(pml, close_all=False):
+    '''
+    close_all will close div all div and span tags when one is closed and then
+    re-open the appropriate ones.
+    '''
+
+    hizer = PML_HTMLizer(close_all)
+    return hizer.parse_pml(pml)
 
 def footnote_sidebar_to_html(id, pml):
     if id.startswith('\x01'):

From 07f9db1b2008410b6518dc7bb3800ecef030e42d Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 22 Nov 2009 12:00:00 -0500
Subject: [PATCH 069/120] PML Input: Create Toc from CX tags.

---
 src/calibre/ebooks/pml/input.py        | 20 ++++++++++++++------
 src/calibre/ebooks/pml/pmlconverter.py | 17 ++++++++++++++---
 2 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/src/calibre/ebooks/pml/input.py b/src/calibre/ebooks/pml/input.py
index ead6c988f4..eac2e99e05 100644
--- a/src/calibre/ebooks/pml/input.py
+++ b/src/calibre/ebooks/pml/input.py
@@ -11,7 +11,8 @@ import shutil
 from calibre.customize.conversion import InputFormatPlugin
 from calibre.ptempfile import TemporaryDirectory
 from calibre.utils.zipfile import ZipFile
-from calibre.ebooks.pml.pmlconverter import pml_to_html
+from calibre.ebooks.pml.pmlconverter import PML_HTMLizer
+from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.metadata.opf2 import OPFCreator
 
 class PMLInput(InputFormatPlugin):
@@ -22,7 +23,7 @@ class PMLInput(InputFormatPlugin):
     # pmlz is a zip file containing pml files and png images.
     file_types  = set(['pml', 'pmlz'])
 
-    def process_pml(self, pml_path, html_path):
+    def process_pml(self, pml_path, html_path, close_all=False):
         pclose = False
         hclose = False
     
@@ -44,7 +45,8 @@ class PMLInput(InputFormatPlugin):
             ienc = self.options.input_encoding
 
         self.log.debug('Converting PML to HTML...')
-        html = pml_to_html(pml_stream.read().decode(ienc)) 
+        hizer = PML_HTMLizer(close_all)
+        html = hizer.parse_pml(pml_stream.read().decode(ienc), html_path)
         html_stream.write('<html><head><title /></head><body>%s</body></html>' % html.encode('utf-8', 'replace'))
 
         if pclose:
@@ -52,11 +54,14 @@ class PMLInput(InputFormatPlugin):
         if hclose:
             html_stream.close()
 
+        return hizer.get_toc()
+
     def convert(self, stream, options, file_ext, log,
                 accelerators):
         self.options = options
         self.log = log
         pages, images = [], []
+        toc = TOC()
 
         if file_ext == 'pmlz':
             log.debug('De-compressing content to temporary directory...')
@@ -71,7 +76,8 @@ class PMLInput(InputFormatPlugin):
                     
                     pages.append(html_name)
                     log.debug('Processing PML item %s...' % pml)
-                    self.process_pml(pml, html_path)
+                    ttoc = self.process_pml(pml, html_path)
+                    toc += ttoc
                     
                 imgs = glob.glob(os.path.join(tdir, '*.png'))
                 if len(imgs) > 0:
@@ -84,7 +90,7 @@ class PMLInput(InputFormatPlugin):
                     
                     shutil.move(img, pimg_path)
         else:
-            self.process_pml(stream, 'index.html')
+            toc = self.process_pml(stream, 'index.html')
 
             pages.append('index.html')
             images = []
@@ -103,7 +109,9 @@ class PMLInput(InputFormatPlugin):
         log.debug('Generating manifest...')
         opf.create_manifest(manifest_items)
         opf.create_spine(pages)
+        opf.set_toc(toc)
         with open('metadata.opf', 'wb') as opffile:
-            opf.render(opffile)
+            with open('toc.ncx', 'wb') as tocfile:
+                opf.render(opffile, tocfile, 'toc.ncx')
         
         return os.path.join(os.getcwd(), 'metadata.opf')
diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index cb8ae15298..62227c94ea 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -12,6 +12,7 @@ import re
 import StringIO
 
 from calibre import my_unichr, prepare_string_for_xml
+from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.pdb.ereader import image_name
 
 class PML_HTMLizer(object):
@@ -118,6 +119,8 @@ class PML_HTMLizer(object):
     def __init__(self, close_all):
         self.close_all = close_all
         self.state = {}
+        self.toc = TOC()
+        self.file_name = ''
 
     def prepare_pml(self, pml):
         # Remove comments
@@ -290,11 +293,14 @@ class PML_HTMLizer(object):
 
         return value.strip()
 
-    def parse_pml(self, pml):
+    def parse_pml(self, pml, file_name=''):
         pml = self.prepare_pml(pml)
         output = []
 
         self.state = {}
+        self.toc = TOC()
+        self.file_name = file_name
+
         for s in self.STATES:
             self.state[s] = [False, ''];
 
@@ -350,8 +356,10 @@ class PML_HTMLizer(object):
                         text = '<br /><br style="page-break-after: always;" />'
                     elif c == 'C':
                         # This should be made to create a TOC entry
-                        line.read(1)
-                        self.code_value(line)
+                        l = line.read(1)
+                        id = 'pml_toc-%s' % len(self.toc)
+                        self.toc.add_item(self.file_name, id, self.code_value(line))
+                        text = '<span id="%s"></span>' % id
                     elif c == 'n':
                         pass
                     elif c == 'F':
@@ -384,6 +392,9 @@ class PML_HTMLizer(object):
 
         return u'\n'.join(output)
 
+    def get_toc(self):
+        return self.toc
+
 
 def pml_to_html(pml, close_all=False):
     '''

From bee0c326e1af6186343878845e39ab20f44e9230 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 22 Nov 2009 12:04:29 -0500
Subject: [PATCH 070/120] ...

---
 src/calibre/ebooks/pml/pmlconverter.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index 62227c94ea..a8a7e9922b 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -355,8 +355,7 @@ class PML_HTMLizer(object):
                         empty = False
                         text = '<br /><br style="page-break-after: always;" />'
                     elif c == 'C':
-                        # This should be made to create a TOC entry
-                        l = line.read(1)
+                        line.read(1)
                         id = 'pml_toc-%s' % len(self.toc)
                         self.toc.add_item(self.file_name, id, self.code_value(line))
                         text = '<span id="%s"></span>' % id

From 8d364272ffed49f0a068f39aa9382f12b2e429a4 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Wed, 25 Nov 2009 07:49:06 -0500
Subject: [PATCH 071/120] Improve PML conversion.

---
 src/calibre/ebooks/pml/pmlconverter.py | 42 ++++++++++++++++----------
 1 file changed, 26 insertions(+), 16 deletions(-)

diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index a8a7e9922b..5ef218e962 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -66,8 +66,7 @@ class PML_HTMLizer(object):
         'u': ('<span style="text-decoration : underline;">', '</span>'),
         'd': ('<span style="text-decoration: line-through;">', '</span>'),
         'b': ('<span style="font-weight: bold;">', '</span>'),
-        's': ('<span style="font-size: 75%">', '</span>'),
-        'l': ('<span style="font-size: 125%">', '</span>'),
+        'l': ('<span style="font-size: 150%">', '</span>'),
         'FS': ('<div id="%s">', '</div>'),
     }
 
@@ -91,7 +90,6 @@ class PML_HTMLizer(object):
         'o': 'd',
         'b': 'b',
         'B': 'b',
-        's': 's',
         'l': 'l',
         'Fn': 'a',
         'Sd': 'a',
@@ -108,7 +106,6 @@ class PML_HTMLizer(object):
     ]
 
     SPAN_STATES = [
-        's',
         'l',
         'i',
         'u',
@@ -144,6 +141,23 @@ class PML_HTMLizer(object):
 
         return line
 
+    def cleanup_html(self, html):
+        old = html
+        html = self.cleanup_html_remove_redundant(html)
+        while html != old:
+            old = html
+            html = self.cleanup_html_remove_redundant(html)
+        return html
+
+    def cleanup_html_remove_redundant(self, html):
+        for key in self.STATES_TAGS.keys():
+            open, close = self.STATES_TAGS[key]
+            if key in self.STATES_VALUE_REQ:
+                html = re.sub(r'(?u)%s\s*%s' % (open % '.*?', close), '', html)
+            else:
+                html = re.sub(r'(?u)%s\s*%s' % (open, close), '', html)
+        return html
+
     def start_line(self):
         start = u''
 
@@ -213,17 +227,19 @@ class PML_HTMLizer(object):
 
         if code in self.DIV_STATES:
             ds = self.DIV_STATES[:]
+            ss = self.SPAN_STATES[:]
         elif code in self.SPAN_STATES:
             ds = self.SPAN_STATES[:]
+            ss = []
 
         if self.state[code][0]:
             # Close all.
-            for c in ds:
+            for c in ss+ds:
                 if self.state[c][0]:
                     text += self.STATES_TAGS[c][1]
             # Reopen the based on state.
             del ds[ds.index(code)]
-            for c in ds:
+            for c in ds+ss:
                 if self.state[c][0]:
                     if c in self.STATES_VALUE_REQ:
                         text += self.STATES_TAGS[self.CODE_STATES[c]][0] % self.state[c][1]
@@ -333,16 +349,8 @@ class PML_HTMLizer(object):
                             text = self.process_code_simple('%s%s' % (c, l))
                     elif c == 'q':
                         text = self.process_code_link(line)
-                    elif c in 'crtTiIuobB':
+                    elif c in 'crtTiIuobBl':
                         text = self.process_code_div_span(c, line)
-                    elif c in 'sl':
-                        close = u''
-                        if c == 's' and self.state['l']:
-                            close = self.process_code_div_span('l', line)
-                        if c == 'l' and self.state['s']:
-                            close = self.process_code_div_span('s', line)
-                        text = self.process_code_div_span(c, line)
-                        text = close+text
                     elif c == 'm':
                         empty = False
                         src = self.code_value(line)
@@ -389,7 +397,9 @@ class PML_HTMLizer(object):
                 output.append(u''.join(parsed))
             line.close()
 
-        return u'\n'.join(output)
+        output = self.cleanup_html(u'\n'.join(output))
+
+        return output
 
     def get_toc(self):
         return self.toc

From c254f63a0728d90d3aad80d0d7efa16d1c4912c2 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Fri, 27 Nov 2009 20:25:57 -0500
Subject: [PATCH 072/120] PML parser fixes.

---
 src/calibre/ebooks/pml/input.py        |   2 +-
 src/calibre/ebooks/pml/pmlconverter.py | 118 ++++++++++++-------------
 2 files changed, 56 insertions(+), 64 deletions(-)

diff --git a/src/calibre/ebooks/pml/input.py b/src/calibre/ebooks/pml/input.py
index eac2e99e05..ad37494ff7 100644
--- a/src/calibre/ebooks/pml/input.py
+++ b/src/calibre/ebooks/pml/input.py
@@ -45,7 +45,7 @@ class PMLInput(InputFormatPlugin):
             ienc = self.options.input_encoding
 
         self.log.debug('Converting PML to HTML...')
-        hizer = PML_HTMLizer(close_all)
+        hizer = PML_HTMLizer()
         html = hizer.parse_pml(pml_stream.read().decode(ienc), html_path)
         html_stream.write('<html><head><title /></head><body>%s</body></html>' % html.encode('utf-8', 'replace'))
 
diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index 5ef218e962..f4a959b3fc 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -62,11 +62,11 @@ class PML_HTMLizer(object):
         'r': ('<div style="text-align: right;">', '</div>'),
         't': ('<div style="margin-left: 5%;">', '</div>'),
         'T': ('<div style="margin-left: %s;">', '</div>'),
-        'i': ('<span style="font-style : italic;">', '</span>'),
-        'u': ('<span style="text-decoration : underline;">', '</span>'),
+        'i': ('<span style="font-style: italic;">', '</span>'),
+        'u': ('<span style="text-decoration: underline;">', '</span>'),
         'd': ('<span style="text-decoration: line-through;">', '</span>'),
         'b': ('<span style="font-weight: bold;">', '</span>'),
-        'l': ('<span style="font-size: 150%">', '</span>'),
+        'l': ('<span style="font-size: 150%;">', '</span>'),
         'FS': ('<div id="%s">', '</div>'),
     }
 
@@ -113,8 +113,7 @@ class PML_HTMLizer(object):
         'b',
     ]
 
-    def __init__(self, close_all):
-        self.close_all = close_all
+    def __init__(self):
         self.state = {}
         self.toc = TOC()
         self.file_name = ''
@@ -156,6 +155,7 @@ class PML_HTMLizer(object):
                 html = re.sub(r'(?u)%s\s*%s' % (open % '.*?', close), '', html)
             else:
                 html = re.sub(r'(?u)%s\s*%s' % (open, close), '', html)
+        html = re.sub(r'<p>\s*</p>', '', html)
         return html
 
     def start_line(self):
@@ -173,11 +173,22 @@ class PML_HTMLizer(object):
     def end_line(self):
         end = u''
 
+        div = []
+        span = []
+        other = []
+
         for key, val in self.state.items():
             if val[0]:
                 if key == 'T':
                     self.state['T'][0] = False
-                end += self.STATES_TAGS[key][1]
+                elif key in self.DIV_STATES:
+                    div.append(key)
+                elif key in self.SPAN_STATES:
+                    span.append(key)
+                else:
+                    other.append(key)
+        for key in span+div+other:
+            end += self.STATES_TAGS[key][1]
 
         return u'%s</p>' % end
 
@@ -214,12 +225,6 @@ class PML_HTMLizer(object):
         return text
 
     def process_code_div_span(self, code, stream):
-        if self.close_all:
-            return self.process_code_div_span_call(code, stream)
-        else:
-            return self.process_code_div_span_ind(code, stream)
-
-    def process_code_div_span_ind(self, code, stream):
         text = u''
         ds = []
 
@@ -246,47 +251,24 @@ class PML_HTMLizer(object):
                     else:
                         text += self.STATES_TAGS[c][0]
         else:
-            if code in self.STATES_VALUE_REQ:
-                val = self.code_value(stream)
-                text = self.STATES_TAGS[code][0] % val
-                self.state[code][1] = val
-            else:
-                text = self.STATES_TAGS[code][0]
-
-        self.state[code][0] = not self.state[code][0]
-
-        return text
-
-    def process_code_div_span_call(self, code, stream):
-        text = u''
-        divs = self.DIV_STATES[:]
-        spans = self.SPAN_STATES[:]
-
-        code = self.CODE_STATES[code]
-
-        if self.state[code][0]:
-            # Close all divs then spans.
-            for c in spans+divs:
+            # Close all spans if code is a div
+            for c in ss:
                 if self.state[c][0]:
                     text += self.STATES_TAGS[c][1]
-            # Reopen the based on state. Open divs then spans
-            if code in self.DIV_STATES:
-                del divs[divs.index(code)]
-            if code in self.SPAN_STATES:
-                del spans[spans.index(code)]
-            for c in divs+spans:
+            # Process the code
+            if code in self.STATES_VALUE_REQ:
+                val = self.code_value(stream)
+                text += self.STATES_TAGS[code][0] % val
+                self.state[code][1] = val
+            else:
+                text += self.STATES_TAGS[code][0]
+            # Re-open all spans if code was a div based on state
+            for c in ss:
                 if self.state[c][0]:
                     if c in self.STATES_VALUE_REQ:
                         text += self.STATES_TAGS[self.CODE_STATES[c]][0] % self.state[c][1]
                     else:
                         text += self.STATES_TAGS[c][0]
-        else:
-            if code in self.STATES_VALUE_REQ:
-                val = self.code_value(stream)
-                text = self.STATES_TAGS[code][0] % val
-                self.state[code][1] = val
-            else:
-                text = self.STATES_TAGS[code][0]
 
         self.state[code][0] = not self.state[code][0]
 
@@ -294,19 +276,32 @@ class PML_HTMLizer(object):
 
     def code_value(self, stream):
         value = u''
-        open = False
+        # state 0 is before =
+        # state 1 is before the first "
+        # state 2 is before the second "
+        state = 0
+        loc = stream.tell()
 
         c = stream.read(1)
         while c != '':
-            if open and c != '"':
-                value += c
-            if c == '"':
-                if not open:
-                    open = True
-                else:
+            if state == 0:
+                if c == '=':
+                    state = 1
+            elif state == 1:
+                if c == '"':
+                    state = 2
+            elif state == 2:
+                if c == '"':
+                    state = 3
                     break
+                else:
+                    value += c
             c = stream.read(1)
 
+        if state != 3:
+            stream.seek(loc)
+            value = u''
+
         return value.strip()
 
     def parse_pml(self, pml, file_name=''):
@@ -321,13 +316,15 @@ class PML_HTMLizer(object):
             self.state[s] = [False, ''];
 
         for line in pml.splitlines():
-            if not line:
-                continue
             parsed = []
             empty = True
 
+            line = self.prepare_line(line)
+            if not line:
+                continue
+
             # Must use StringIO, cStringIO does not support unicode
-            line = StringIO.StringIO(self.prepare_line(line))
+            line = StringIO.StringIO(line)
             parsed.append(self.start_line())
 
             c = line.read(1)
@@ -405,13 +402,8 @@ class PML_HTMLizer(object):
         return self.toc
 
 
-def pml_to_html(pml, close_all=False):
-    '''
-    close_all will close div all div and span tags when one is closed and then
-    re-open the appropriate ones.
-    '''
-
-    hizer = PML_HTMLizer(close_all)
+def pml_to_html(pml):
+    hizer = PML_HTMLizer()
     return hizer.parse_pml(pml)
 
 def footnote_sidebar_to_html(id, pml):

From fe9f40e4a63b77d654f2742b32b7443445d0d42f Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 28 Nov 2009 07:56:35 -0500
Subject: [PATCH 073/120] k and T code patches from WayneD.

---
 src/calibre/ebooks/pml/pmlconverter.py | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index f4a959b3fc..ebb451a14b 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -36,6 +36,7 @@ class PML_HTMLizer(object):
         't',
         's',
         'l',
+        'k',
         'T',
         'Fn',
         'Sd',
@@ -67,6 +68,7 @@ class PML_HTMLizer(object):
         'd': ('<span style="text-decoration: line-through;">', '</span>'),
         'b': ('<span style="font-weight: bold;">', '</span>'),
         'l': ('<span style="font-size: 150%;">', '</span>'),
+        'k': ('<span style="font-size: 75%;">', '</span>'),
         'FS': ('<div id="%s">', '</div>'),
     }
 
@@ -91,6 +93,7 @@ class PML_HTMLizer(object):
         'b': 'b',
         'B': 'b',
         'l': 'l',
+        'k': 'k',
         'Fn': 'a',
         'Sd': 'a',
         'FN': 'FS',
@@ -107,6 +110,7 @@ class PML_HTMLizer(object):
 
     SPAN_STATES = [
         'l',
+        'k',
         'i',
         'u',
         'd',
@@ -125,11 +129,11 @@ class PML_HTMLizer(object):
         pml = re.sub(r'(?mus)<footnote\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</footnote>', lambda match: '\\FN="fns-%s"%s\\FN' % (match.group('target'), match.group('text')) if match.group('text') else '', pml)
         pml = re.sub(r'(?mus)<sidebar\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</sidebar>', lambda match: '\\SB="fns-%s"%s\\SB' % (match.group('target'), match.group('text')) if match.group('text') else '', pml)
 
-        pml = prepare_string_for_xml(pml)
-
         pml = re.sub(r'\\a(?P<num>\d{3})', lambda match: '&#%s;' % match.group('num'), pml)
         pml = re.sub(r'\\U(?P<num>[0-9a-f]{4})', lambda match: '%s' % my_unichr(int(match.group('num'), 16)), pml)
 
+        pml = prepare_string_for_xml(pml)
+
         return pml
 
     def prepare_line(self, line):
@@ -181,7 +185,7 @@ class PML_HTMLizer(object):
             if val[0]:
                 if key == 'T':
                     self.state['T'][0] = False
-                elif key in self.DIV_STATES:
+                if key in self.DIV_STATES:
                     div.append(key)
                 elif key in self.SPAN_STATES:
                     span.append(key)
@@ -238,6 +242,11 @@ class PML_HTMLizer(object):
             ss = []
 
         if self.state[code][0]:
+            # Ignore multilple T's on the same line. They do not have a closing
+            # code. They get closed at the end of the line.
+            if code == 'T':
+                self.code_value(stream)
+                return text
             # Close all.
             for c in ss+ds:
                 if self.state[c][0]:
@@ -346,7 +355,7 @@ class PML_HTMLizer(object):
                             text = self.process_code_simple('%s%s' % (c, l))
                     elif c == 'q':
                         text = self.process_code_link(line)
-                    elif c in 'crtTiIuobBl':
+                    elif c in 'crtTiIuobBlk':
                         text = self.process_code_div_span(c, line)
                     elif c == 'm':
                         empty = False
@@ -384,7 +393,10 @@ class PML_HTMLizer(object):
                 else:
                     if c != ' ':
                         empty = False
-                    text = c
+                    if self.state['k'][0]:
+                        text = c.upper()
+                    else:
+                        text = c
                 parsed.append(text)
                 c = line.read(1)
 

From 94a11a8c56fd02fa065c3e0800df20d37aaa6cfa Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 28 Nov 2009 08:08:06 -0500
Subject: [PATCH 074/120] Fix bug when handling links.

---
 src/calibre/ebooks/pml/pmlconverter.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index ebb451a14b..d085b0dc2d 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -214,13 +214,12 @@ class PML_HTMLizer(object):
     def process_code_link(self, stream, pre=''):
         text = u''
 
-        href = self.code_value(stream)
-        if pre:
-            href = '#%s-%s' % (pre, href)
-
         if self.state['a'][0]:
             text = self.STATES_TAGS['a'][1]
         else:
+            href = self.code_value(stream)
+            if pre:
+                href = '#%s-%s' % (pre, href)
             text = self.STATES_TAGS['a'][0] % href
             self.state['a'][1] = href
 

From a4847d88d9ffb8562b6f8cf601caab73968e6f99 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 28 Nov 2009 08:32:08 -0500
Subject: [PATCH 075/120] PML input: Mark <a> as block level element.

---
 src/calibre/ebooks/pml/pmlconverter.py | 27 +++++++-------------------
 1 file changed, 7 insertions(+), 20 deletions(-)

diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index d085b0dc2d..2416be596a 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -101,6 +101,7 @@ class PML_HTMLizer(object):
     }
 
     DIV_STATES = [
+        'a',
         'c',
         'r',
         't',
@@ -211,23 +212,7 @@ class PML_HTMLizer(object):
 
         return text
 
-    def process_code_link(self, stream, pre=''):
-        text = u''
-
-        if self.state['a'][0]:
-            text = self.STATES_TAGS['a'][1]
-        else:
-            href = self.code_value(stream)
-            if pre:
-                href = '#%s-%s' % (pre, href)
-            text = self.STATES_TAGS['a'][0] % href
-            self.state['a'][1] = href
-
-        self.state['a'][0] = not self.state['a'][0]
-
-        return text
-
-    def process_code_div_span(self, code, stream):
+    def process_code_div_span(self, code, stream, pre=''):
         text = u''
         ds = []
 
@@ -266,6 +251,8 @@ class PML_HTMLizer(object):
             # Process the code
             if code in self.STATES_VALUE_REQ:
                 val = self.code_value(stream)
+                if pre:
+                    val = '#%s-%s' % (pre, val)
                 text += self.STATES_TAGS[code][0] % val
                 self.state[code][1] = val
             else:
@@ -347,13 +334,13 @@ class PML_HTMLizer(object):
                     elif c in 'XS':
                         l = line.read(1)
                         if '%s%s' % (c, l) == 'Sd':
-                            text = self.process_code_link(line, 'fns')
+                            text = self.process_code_div_span('Sd', line, 'fns')
                         elif '%s%s' % (c, l) == 'SB':
                             text = self.process_code_div_span('SB', line)
                         else:
                             text = self.process_code_simple('%s%s' % (c, l))
                     elif c == 'q':
-                        text = self.process_code_link(line)
+                        text = self.process_code_div_span(c, line)
                     elif c in 'crtTiIuobBlk':
                         text = self.process_code_div_span(c, line)
                     elif c == 'm':
@@ -377,7 +364,7 @@ class PML_HTMLizer(object):
                     elif c == 'F':
                         l = line.read(1)
                         if '%s%s' % (c, l) == 'Fn':
-                            text = self.process_code_link(line, 'fns')
+                            text = self.process_code_div_span('Fn', line, 'fns')
                         elif '%s%s' % (c, l) == 'FN':
                             text = self.process_code_div_span('FN', line)
                     elif c == 'w':

From fc93d954e5e6bbd8d01401a7ef22d4eef238f56a Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 28 Nov 2009 09:41:23 -0500
Subject: [PATCH 076/120] PML input: cleanup and refactor a bit. Also fix an
 error when handling a tags.

---
 src/calibre/ebooks/pml/pmlconverter.py | 107 +++++++++++++++++++------
 1 file changed, 81 insertions(+), 26 deletions(-)

diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index 2416be596a..8c34cc8da9 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -100,8 +100,17 @@ class PML_HTMLizer(object):
         'SB': 'FS',
     }
 
-    DIV_STATES = [
+    BLOCK_STATES = [
         'a',
+        'h1',
+        'h2',
+        'h3',
+        'h4',
+        'h5',
+        'h6',
+    ]
+
+    DIV_STATES = [
         'c',
         'r',
         't',
@@ -197,22 +206,10 @@ class PML_HTMLizer(object):
 
         return u'%s</p>' % end
 
-    def process_code_simple(self, code):
-        if code not in self.CODE_STATES.keys():
-            return u''
-
-        text = u''
-
-        if self.state[self.CODE_STATES[code]][0]:
-            text = self.STATES_TAGS[self.CODE_STATES[code]][1]
-        else:
-            text = self.STATES_TAGS[self.CODE_STATES[code]][0]
-
-        self.state[self.CODE_STATES[code]][0] = not self.state[self.CODE_STATES[code]][0]
-
-        return text
-
-    def process_code_div_span(self, code, stream, pre=''):
+    def process_code(self, code, stream):
+        '''
+        Used for processing div and span elements.
+        '''
         text = u''
         ds = []
 
@@ -225,6 +222,10 @@ class PML_HTMLizer(object):
             ds = self.SPAN_STATES[:]
             ss = []
 
+        # Close code.
+        # Close all tags starting with the inline then close block. Remove the
+        # Tag that is closed from the list and reopen them all starting with
+        # block followed by inline.
         if self.state[code][0]:
             # Ignore multilple T's on the same line. They do not have a closing
             # code. They get closed at the end of the line.
@@ -243,16 +244,17 @@ class PML_HTMLizer(object):
                         text += self.STATES_TAGS[self.CODE_STATES[c]][0] % self.state[c][1]
                     else:
                         text += self.STATES_TAGS[c][0]
+        # Open code.
+        # If the tag to open is a block we close all inline tags, open the block
+        # then re-open the inline tags.
         else:
-            # Close all spans if code is a div
+            # Close all spans if code is a div.
             for c in ss:
                 if self.state[c][0]:
                     text += self.STATES_TAGS[c][1]
             # Process the code
             if code in self.STATES_VALUE_REQ:
                 val = self.code_value(stream)
-                if pre:
-                    val = '#%s-%s' % (pre, val)
                 text += self.STATES_TAGS[code][0] % val
                 self.state[code][1] = val
             else:
@@ -269,6 +271,59 @@ class PML_HTMLizer(object):
 
         return text
 
+    def process_code_block(self, code, stream, pre=''):
+        text = u''
+
+        code = self.CODE_STATES[code]
+
+        # Close all spans
+        for c in self.SPAN_STATES:
+            if self.state[c][0]:
+                text += self.STATES_TAGS[c][1]
+
+        # Process the code
+        if self.state[code][0]:
+            # Close tag
+            text += self.STATES_TAGS[code][1]
+        else:
+            # Open tag
+            if code in self.STATES_VALUE_REQ:
+                val = self.code_value(stream)
+                if pre:
+                    val = '#%s-%s' % (pre, val)
+                text += self.STATES_TAGS[code][0] % val
+                self.state[code][1] = val
+            else:
+                text += self.STATES_TAGS[code][0]
+
+        # Re-open all spans if code was a div based on state
+        for c in self.SPAN_STATES:
+            if self.state[c][0]:
+                if c in self.STATES_VALUE_REQ:
+                    text += self.STATES_TAGS[code][0] % self.state[c][1]
+                else:
+                    text += self.STATES_TAGS[c][0]
+
+        self.state[code][0] = not self.state[code][0]
+
+        return text
+
+
+    def process_code_simple(self, code):
+        if code not in self.CODE_STATES.keys():
+            return u''
+
+        text = u''
+
+        if self.state[self.CODE_STATES[code]][0]:
+            text = self.STATES_TAGS[self.CODE_STATES[code]][1]
+        else:
+            text = self.STATES_TAGS[self.CODE_STATES[code]][0]
+
+        self.state[self.CODE_STATES[code]][0] = not self.state[self.CODE_STATES[code]][0]
+
+        return text
+
     def code_value(self, stream):
         value = u''
         # state 0 is before =
@@ -334,15 +389,15 @@ class PML_HTMLizer(object):
                     elif c in 'XS':
                         l = line.read(1)
                         if '%s%s' % (c, l) == 'Sd':
-                            text = self.process_code_div_span('Sd', line, 'fns')
+                            text = self.process_code_block('Sd', line, 'fns')
                         elif '%s%s' % (c, l) == 'SB':
-                            text = self.process_code_div_span('SB', line)
+                            text = self.process_code('SB', line)
                         else:
                             text = self.process_code_simple('%s%s' % (c, l))
                     elif c == 'q':
-                        text = self.process_code_div_span(c, line)
+                        text = self.process_code_block(c, line)
                     elif c in 'crtTiIuobBlk':
-                        text = self.process_code_div_span(c, line)
+                        text = self.process_code(c, line)
                     elif c == 'm':
                         empty = False
                         src = self.code_value(line)
@@ -364,9 +419,9 @@ class PML_HTMLizer(object):
                     elif c == 'F':
                         l = line.read(1)
                         if '%s%s' % (c, l) == 'Fn':
-                            text = self.process_code_div_span('Fn', line, 'fns')
+                            text = self.process_code_block('Fn', line, 'fns')
                         elif '%s%s' % (c, l) == 'FN':
-                            text = self.process_code_div_span('FN', line)
+                            text = self.process_code('FN', line)
                     elif c == 'w':
                         empty = False
                         text = '<hr width="%s" />' % self.code_value(line)

From 32895741531b6350213cf3dcb1c1112aaf4e9952 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 28 Nov 2009 14:13:54 -0500
Subject: [PATCH 077/120] PML input: don't turn HTML entities in the PML into
 the character.

---
 src/calibre/ebooks/pml/pmlconverter.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index 8c34cc8da9..7707325131 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -139,6 +139,10 @@ class PML_HTMLizer(object):
         pml = re.sub(r'(?mus)<footnote\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</footnote>', lambda match: '\\FN="fns-%s"%s\\FN' % (match.group('target'), match.group('text')) if match.group('text') else '', pml)
         pml = re.sub(r'(?mus)<sidebar\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</sidebar>', lambda match: '\\SB="fns-%s"%s\\SB' % (match.group('target'), match.group('text')) if match.group('text') else '', pml)
 
+        # Convert &'s into entities so &amp; in the text doesn't get turned into
+        # &. It will display as &amp;
+        pml = pml.replace('&', '&amp;')
+
         pml = re.sub(r'\\a(?P<num>\d{3})', lambda match: '&#%s;' % match.group('num'), pml)
         pml = re.sub(r'\\U(?P<num>[0-9a-f]{4})', lambda match: '%s' % my_unichr(int(match.group('num'), 16)), pml)
 

From 27935c4a71636393b0a4d454b462bd0fdf8a7508 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 28 Nov 2009 14:57:47 -0500
Subject: [PATCH 078/120] PML Input refactoring.

---
 src/calibre/ebooks/pml/pmlconverter.py | 70 +++++++++++++-------------
 1 file changed, 34 insertions(+), 36 deletions(-)

diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index 7707325131..874b39223a 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -50,7 +50,7 @@ class PML_HTMLizer(object):
     ]
 
     STATES_TAGS = {
-        'h1': ('<h1 style="page-break-after: always;">', '</h1>'),
+        'h1': ('<h1 style="page-break-before: always;">', '</h1>'),
         'h2': ('<h2>', '</h2>'),
         'h3': ('<h3>', '</h3>'),
         'h4': ('<h4>', '</h4>'),
@@ -108,6 +108,8 @@ class PML_HTMLizer(object):
         'h4',
         'h5',
         'h6',
+        'sb',
+        'sp',
     ]
 
     DIV_STATES = [
@@ -135,6 +137,13 @@ class PML_HTMLizer(object):
     def prepare_pml(self, pml):
         # Remove comments
         pml = re.sub(r'(?mus)\\v(?P<text>.*?)\\v', '', pml)
+
+        # Remove extra white spaces.
+        pml = re.sub(r'(?mus)[ ]{2,}', ' ', pml)
+        pml = re.sub(r'(?mus)^[ ]*(?=.)', '', pml)
+        pml = re.sub(r'(?mus)(?<=.)[ ]*$', '', pml)
+        pml = re.sub(r'(?mus)^[ ]*$', '', pml)
+        
         # Footnotes and Sidebars
         pml = re.sub(r'(?mus)<footnote\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</footnote>', lambda match: '\\FN="fns-%s"%s\\FN' % (match.group('target'), match.group('text')) if match.group('text') else '', pml)
         pml = re.sub(r'(?mus)<sidebar\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</sidebar>', lambda match: '\\SB="fns-%s"%s\\SB' % (match.group('target'), match.group('text')) if match.group('text') else '', pml)
@@ -150,14 +159,6 @@ class PML_HTMLizer(object):
 
         return pml
 
-    def prepare_line(self, line):
-        line = re.sub(r'[ ]{2,}', ' ', line)
-        line = re.sub(r'^[ ]*(?=.)', '', line)
-        line = re.sub(r'(?<=.)[ ]*$', '', line)
-        line = re.sub(r'^[ ]*$', '', line)
-
-        return line
-
     def cleanup_html(self, html):
         old = html
         html = self.cleanup_html_remove_redundant(html)
@@ -217,7 +218,9 @@ class PML_HTMLizer(object):
         text = u''
         ds = []
 
-        code = self.CODE_STATES[code]
+        code = self.CODE_STATES.get(code, None)
+        if not code:
+            return text
 
         if code in self.DIV_STATES:
             ds = self.DIV_STATES[:]
@@ -278,7 +281,9 @@ class PML_HTMLizer(object):
     def process_code_block(self, code, stream, pre=''):
         text = u''
 
-        code = self.CODE_STATES[code]
+        code = self.CODE_STATES.get(code, None)
+        if not code:
+            return text
 
         # Close all spans
         for c in self.SPAN_STATES:
@@ -312,27 +317,12 @@ class PML_HTMLizer(object):
 
         return text
 
-
-    def process_code_simple(self, code):
-        if code not in self.CODE_STATES.keys():
-            return u''
-
-        text = u''
-
-        if self.state[self.CODE_STATES[code]][0]:
-            text = self.STATES_TAGS[self.CODE_STATES[code]][1]
-        else:
-            text = self.STATES_TAGS[self.CODE_STATES[code]][0]
-
-        self.state[self.CODE_STATES[code]][0] = not self.state[self.CODE_STATES[code]][0]
-
-        return text
-
     def code_value(self, stream):
         value = u''
         # state 0 is before =
         # state 1 is before the first "
         # state 2 is before the second "
+        # state 3 is after the second "
         state = 0
         loc = stream.tell()
 
@@ -341,6 +331,13 @@ class PML_HTMLizer(object):
             if state == 0:
                 if c == '=':
                     state = 1
+                elif c != ' ':
+                    # A code that requires an argument should have = after the
+                    # code but sometimes has spaces. If it has anything other
+                    # than a space or = after the code then we can assume the
+                    # markup is invalid. We will stop looking for the value
+                    # and continue to hopefully not lose any data.
+                    break;
             elif state == 1:
                 if c == '"':
                     state = 2
@@ -353,6 +350,8 @@ class PML_HTMLizer(object):
             c = stream.read(1)
 
         if state != 3:
+            # Unable to complete the sequence to reterieve the value. Reset
+            # the stream to the location it started.
             stream.seek(loc)
             value = u''
 
@@ -370,13 +369,12 @@ class PML_HTMLizer(object):
             self.state[s] = [False, ''];
 
         for line in pml.splitlines():
-            parsed = []
-            empty = True
-
-            line = self.prepare_line(line)
             if not line:
                 continue
 
+            parsed = []
+            empty = True
+
             # Must use StringIO, cStringIO does not support unicode
             line = StringIO.StringIO(line)
             parsed.append(self.start_line())
@@ -389,15 +387,15 @@ class PML_HTMLizer(object):
                     c = line.read(1)
 
                     if c == 'x':
-                        text = self.process_code_simple(c)
+                        text = self.process_code_block(c, line)
                     elif c in 'XS':
                         l = line.read(1)
-                        if '%s%s' % (c, l) == 'Sd':
-                            text = self.process_code_block('Sd', line, 'fns')
-                        elif '%s%s' % (c, l) == 'SB':
+                        if '%s%s' % (c, l) == 'SB':
                             text = self.process_code('SB', line)
+                        elif '%s%s' % (c, l) == 'Sd':
+                            text = self.process_code_block('Sd', line, 'fns')
                         else:
-                            text = self.process_code_simple('%s%s' % (c, l))
+                            text = self.process_code_block('%s%s' % (c, l), line)
                     elif c == 'q':
                         text = self.process_code_block(c, line)
                     elif c in 'crtTiIuobBlk':

From 71c4beccfc230292341c291f57497c1482622db5 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 28 Nov 2009 15:41:24 -0500
Subject: [PATCH 079/120] PML Input: more refactoring.

---
 src/calibre/ebooks/pml/pmlconverter.py | 130 +++++++++++++++----------
 1 file changed, 81 insertions(+), 49 deletions(-)

diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index 874b39223a..e6c352e2b5 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -211,52 +211,68 @@ class PML_HTMLizer(object):
 
         return u'%s</p>' % end
 
-    def process_code(self, code, stream):
-        '''
-        Used for processing div and span elements.
-        '''
+    def process_code(self, code, stream, pre=''):
         text = u''
-        ds = []
 
         code = self.CODE_STATES.get(code, None)
         if not code:
             return text
 
         if code in self.DIV_STATES:
-            ds = self.DIV_STATES[:]
-            ss = self.SPAN_STATES[:]
-        elif code in self.SPAN_STATES:
-            ds = self.SPAN_STATES[:]
-            ss = []
-
-        # Close code.
-        # Close all tags starting with the inline then close block. Remove the
-        # Tag that is closed from the list and reopen them all starting with
-        # block followed by inline.
-        if self.state[code][0]:
             # Ignore multilple T's on the same line. They do not have a closing
             # code. They get closed at the end of the line.
-            if code == 'T':
+            if code == 'T' and self.state['T'][0]:
                 self.code_value(stream)
                 return text
+            text = self.process_code_div(code, stream)
+        elif code in self.SPAN_STATES:
+            text = self.process_code_span(code, stream)
+        elif code in self.BLOCK_STATES:
+            text = self.process_code_block(code, stream, pre)
+        else:
+            text = self.process_code_simple(code)
+
+        self.state[code][0] = not self.state[code][0]
+
+        return text
+
+    def process_code_simple(self, code):
+        text = u''
+
+        if self.state[code][0]:
+            text = self.STATES_TAGS[code][1]
+        else:
+            if code in self.STATES_VALUE_REQ:
+                val = self.code_value(stream)
+                text += self.STATES_TAGS[code][0] % val
+                self.state[code][1] = val
+            else:
+                text = self.STATES_TAGS[code][0]
+
+        return text
+
+    def process_code_div(self, code, stream):
+        text = u''
+
+        # Close code.
+        if self.state[code][0]:
             # Close all.
-            for c in ss+ds:
+            for c in self.SPAN_STATES+self.DIV_STATES:
                 if self.state[c][0]:
                     text += self.STATES_TAGS[c][1]
             # Reopen the based on state.
-            del ds[ds.index(code)]
-            for c in ds+ss:
+            for c in self.DIV_STATES+self.SPAN_STATES:
+                if code == c:
+                    continue
                 if self.state[c][0]:
                     if c in self.STATES_VALUE_REQ:
                         text += self.STATES_TAGS[self.CODE_STATES[c]][0] % self.state[c][1]
                     else:
                         text += self.STATES_TAGS[c][0]
         # Open code.
-        # If the tag to open is a block we close all inline tags, open the block
-        # then re-open the inline tags.
         else:
-            # Close all spans if code is a div.
-            for c in ss:
+            # Close all spans.
+            for c in self.SPAN_STATES:
                 if self.state[c][0]:
                     text += self.STATES_TAGS[c][1]
             # Process the code
@@ -266,25 +282,49 @@ class PML_HTMLizer(object):
                 self.state[code][1] = val
             else:
                 text += self.STATES_TAGS[code][0]
-            # Re-open all spans if code was a div based on state
-            for c in ss:
+            # Re-open all spans based on state
+            for c in self.SPAN_STATES:
                 if self.state[c][0]:
                     if c in self.STATES_VALUE_REQ:
                         text += self.STATES_TAGS[self.CODE_STATES[c]][0] % self.state[c][1]
                     else:
                         text += self.STATES_TAGS[c][0]
 
-        self.state[code][0] = not self.state[code][0]
+        return text
+
+    def process_code_span(self, code, stream):
+        text = u''
+
+        # Close code.
+        if self.state[code][0]:
+            # Close all spans
+            for c in self.SPAN_STATES:
+                if self.state[c][0]:
+                    text += self.STATES_TAGS[c][1]
+            # Re-open the spans based on state except for code which will be
+            # left closed.
+            for c in self.SPAN_STATES:
+                if code == c:
+                    continue
+                if self.state[c][0]:
+                    if c in self.STATES_VALUE_REQ:
+                        text += self.STATES_TAGS[code][0] % self.state[c][1]
+                    else:
+                        text += self.STATES_TAGS[c][0]
+        # Open code.
+        else:
+            if code in self.STATES_VALUE_REQ:
+                val = self.code_value(stream)
+                text += self.STATES_TAGS[code][0] % val
+                self.state[code][1] = val
+            else:
+                text += self.STATES_TAGS[code][0]
 
         return text
 
     def process_code_block(self, code, stream, pre=''):
         text = u''
 
-        code = self.CODE_STATES.get(code, None)
-        if not code:
-            return text
-
         # Close all spans
         for c in self.SPAN_STATES:
             if self.state[c][0]:
@@ -313,8 +353,6 @@ class PML_HTMLizer(object):
                 else:
                     text += self.STATES_TAGS[c][0]
 
-        self.state[code][0] = not self.state[code][0]
-
         return text
 
     def code_value(self, stream):
@@ -386,20 +424,20 @@ class PML_HTMLizer(object):
                 if c == '\\':
                     c = line.read(1)
 
-                    if c == 'x':
-                        text = self.process_code_block(c, line)
-                    elif c in 'XS':
+                    if c in 'xqcrtTiIuobBlk':
+                        text = self.process_code(c, line)
+                    elif c in 'FSX':
                         l = line.read(1)
-                        if '%s%s' % (c, l) == 'SB':
+                        if '%s%s' % (c, l) == 'Fn':
+                            text = self.process_code('Fn', line, 'fns')
+                        elif '%s%s' % (c, l) == 'FN':
+                            text = self.process_code('FN', line)
+                        elif '%s%s' % (c, l) == 'SB':
                             text = self.process_code('SB', line)
                         elif '%s%s' % (c, l) == 'Sd':
-                            text = self.process_code_block('Sd', line, 'fns')
+                            text = self.process_code('Sd', line, 'fns')
                         else:
-                            text = self.process_code_block('%s%s' % (c, l), line)
-                    elif c == 'q':
-                        text = self.process_code_block(c, line)
-                    elif c in 'crtTiIuobBlk':
-                        text = self.process_code(c, line)
+                            text = self.process_code('%s%s' % (c, l), line)
                     elif c == 'm':
                         empty = False
                         src = self.code_value(line)
@@ -418,12 +456,6 @@ class PML_HTMLizer(object):
                         text = '<span id="%s"></span>' % id
                     elif c == 'n':
                         pass
-                    elif c == 'F':
-                        l = line.read(1)
-                        if '%s%s' % (c, l) == 'Fn':
-                            text = self.process_code_block('Fn', line, 'fns')
-                        elif '%s%s' % (c, l) == 'FN':
-                            text = self.process_code('FN', line)
                     elif c == 'w':
                         empty = False
                         text = '<hr width="%s" />' % self.code_value(line)

From 63e2876f76ddfd5ee7f55c4d4d6c45873d0ae556 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 28 Nov 2009 17:34:30 -0500
Subject: [PATCH 080/120] PML Input: break if opening quote is not found when
 getting tag value.

---
 src/calibre/ebooks/pml/pmlconverter.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index e6c352e2b5..cd5bafa260 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -375,10 +375,14 @@ class PML_HTMLizer(object):
                     # than a space or = after the code then we can assume the
                     # markup is invalid. We will stop looking for the value
                     # and continue to hopefully not lose any data.
-                    break;
+                    break
             elif state == 1:
                 if c == '"':
                     state = 2
+                elif c != ' ':
+                    # " should always follow = but we will allow for blank
+                    # space after the =.
+                    break
             elif state == 2:
                 if c == '"':
                     state = 3

From 65d59ebc935f3aa25112e5e214c674ed4d7ebe46 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Tue, 1 Dec 2009 06:50:03 -0500
Subject: [PATCH 081/120] Correct import.

---
 src/calibre/gui2/convert/gui_conversion.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/gui2/convert/gui_conversion.py b/src/calibre/gui2/convert/gui_conversion.py
index 06ade752d1..c740fb5c1c 100644
--- a/src/calibre/gui2/convert/gui_conversion.py
+++ b/src/calibre/gui2/convert/gui_conversion.py
@@ -4,9 +4,9 @@ __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 
-from calibre.ebooks.conversion.plumber import Plumber, DummyReporter
+from calibre.ebooks.conversion.plumber import Plumber
 from calibre.utils.logging import Log
-from calibre.customize.conversion import OptionRecommendation
+from calibre.customize.conversion import OptionRecommendation, DummyReporter
 
 def gui_convert(input, output, recommendations, notification=DummyReporter(),
         abort_after_input_dump=False):

From 990f4f3bc46fe457a75a65a2646fa08a3b4efe70 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Wed, 2 Dec 2009 19:18:14 -0500
Subject: [PATCH 082/120] PML Input: image_name is unnecessary.

---
 src/calibre/ebooks/pml/pmlconverter.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index e495c24042..c120f2faf9 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -13,7 +13,6 @@ import StringIO
 
 from calibre import my_unichr, prepare_string_for_xml
 from calibre.ebooks.metadata.toc import TOC
-from calibre.ebooks.pdb.ereader import image_name
 
 class PML_HTMLizer(object):
 
@@ -445,7 +444,7 @@ class PML_HTMLizer(object):
                     elif c == 'm':
                         empty = False
                         src = self.code_value(line)
-                        text = '<img src="images/%s" />' % image_name(src).strip('\x00')
+                        text = '<img src="images/%s" />' % src
                     elif c == 'Q':
                         empty = False
                         id = self.code_value(line)

From 44ba14e77be9a9d17e80336bd0d6bf1e7f810d77 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Wed, 2 Dec 2009 20:05:54 -0500
Subject: [PATCH 083/120] PML Input: Allow for images to be in top level,
 bookname_img, or images directory for both PML and PMLZ.

---
 src/calibre/ebooks/pml/input.py | 44 +++++++++++++++++++++++----------
 1 file changed, 31 insertions(+), 13 deletions(-)

diff --git a/src/calibre/ebooks/pml/input.py b/src/calibre/ebooks/pml/input.py
index ad37494ff7..b18630c044 100644
--- a/src/calibre/ebooks/pml/input.py
+++ b/src/calibre/ebooks/pml/input.py
@@ -1,3 +1,4 @@
+import os.path
 # -*- coding: utf-8 -*-
 
 __license__   = 'GPL v3'
@@ -56,6 +57,32 @@ class PMLInput(InputFormatPlugin):
 
         return hizer.get_toc()
 
+    def get_images(self, stream, tdir, top_level=False):
+        images = []
+        imgs = []
+
+        if top_level:
+            imgs = glob.glob(os.path.join(tdir, '*.png'))
+        # Images not in top level try bookname_img directory because
+        # that's where Dropbook likes to see them.
+        if not imgs:
+            if hasattr(stream, 'name'):
+                imgs = glob.glob(os.path.join(os.path.join(tdir, os.path.splitext(os.path.basename(stream.name))[0] + '_img'), '*.png'))
+        # No images in Dropbook location try generic images directory
+        if not imgs:
+            imgs = glob.glob(os.path.join(os.path.join(tdir, 'images'), '*.png'))
+        if imgs:
+            os.makedirs(os.path.join(os.getcwd(), 'images'))
+        for img in imgs:
+            pimg_name = os.path.basename(img)
+            pimg_path = os.path.join(os.getcwd(), 'images', pimg_name)
+
+            images.append('images/' + pimg_name)
+
+            shutil.copy(img, pimg_path)
+
+        return images
+
     def convert(self, stream, options, file_ext, log,
                 accelerators):
         self.options = options
@@ -78,22 +105,13 @@ class PMLInput(InputFormatPlugin):
                     log.debug('Processing PML item %s...' % pml)
                     ttoc = self.process_pml(pml, html_path)
                     toc += ttoc
-                    
-                imgs = glob.glob(os.path.join(tdir, '*.png'))
-                if len(imgs) > 0:
-                    os.makedirs(os.path.join(os.getcwd(), 'images'))
-                for img in imgs:
-                    pimg_name = os.path.basename(img)
-                    pimg_path = os.path.join(os.getcwd(), 'images', pimg_name)
-                    
-                    images.append('images/' + pimg_name)
-                    
-                    shutil.move(img, pimg_path)
+                images = self.get_images(stream, tdir, True)
         else:
             toc = self.process_pml(stream, 'index.html')
-
             pages.append('index.html')
-            images = []
+
+            if hasattr(stream, 'name'):
+                images = self.get_images(stream, os.path.abspath(os.path.dirname(stream.name)))
 
         # We want pages to be orded alphabetically.
         pages.sort()

From d19848184dccffcb1d3bff6a43a6743fad30fbe5 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Thu, 3 Dec 2009 07:18:37 -0500
Subject: [PATCH 084/120] PML Output: Only create \a and \U tags for supported
 characters.

---
 src/calibre/ebooks/pml/__init__.py | 60 ++++++++++++++++++++++++++++++
 src/calibre/ebooks/pml/pmlml.py    |  6 ++-
 2 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/pml/__init__.py b/src/calibre/ebooks/pml/__init__.py
index e69de29bb2..9bda82bafb 100644
--- a/src/calibre/ebooks/pml/__init__.py
+++ b/src/calibre/ebooks/pml/__init__.py
@@ -0,0 +1,60 @@
+# -*- coding: utf-8 -*-
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+# Uncommon Characters supported by PML. \\a tag codes
+A_CHARS = range(160, 256) + range(130, 136) + range(138, 141) + \
+    range(145, 152) + range(153, 157) + [159]
+
+# Extended Unicode characters supported by PML
+Latin_ExtendedA = range(0x0100, 0x0104) + [0x0105, 0x0107, 0x010C, 0x010D,
+    0x0112, 0x0113, 0x0115, 0x0117, 0x0119, 0x011B, 0x011D, 0x011F, 0x012A,
+    0x012B, 0x012D, 0x012F, 0x0131, 0x0141, 0x0142, 0x0144, 0x0148] + \
+    range(0x014B, 0x014E) + [0x014F, 0x0151, 0x0155] + range(0x0159, 0x015C) + \
+    [0x015F, 0x0163, 0x0169, 0x016B, 0x016D, 0x0177, 0x017A, 0x017D, 0x017E]
+Latin_ExtendedB = [0x01BF, 0x01CE, 0x01D0, 0x01D2, 0x01D4, 0x01E1, 0x01E3,
+    0x01E7, 0x01EB, 0x01F0, 0x0207, 0x021D, 0x0227, 0x022F, 0x0233]
+IPA_Extensions = [0x0251, 0x0251, 0x0254, 0x0259, 0x025C, 0x0265, 0x026A,
+    0x0272, 0x0283, 0x0289, 0x028A, 0x028C, 0x028F, 0x0292, 0x0294, 0x029C]
+Spacing_Modifier_Letters = [0x02BE, 0x02BF, 0x02C7, 0x02C8, 0x02CC, 0x02D0,
+    0x02D8, 0x02D9]
+Greek_and_Coptic = range(0x0391, 0x03A2) + range(0x03A3, 0x03AA) + \
+    range(0x03B1, 0x03CA) + [0x03D1, 0x03DD]
+Hebrew = range(0x05D0, 0x05EB)
+Latin_Extended_Additional = [0x1E0B, 0x1E0D, 0x1E17, 0x1E22, 0x1E24, 0x1E25,
+    0x1E2B, 0x1E33, 0x1E37, 0x1E41, 0x1E43, 0x1E45, 0x1E47, 0x1E53] + \
+    range(0x1E59, 0x1E5C) + [0x1E61, 0x1E63, 0x1E6B, 0x1E6D, 0x1E6F, 0x1E91,
+    0x1E93, 0x1E96, 0x1EA1, 0x1ECD, 0x1EF9]
+General_Punctuation = [0x2011, 0x2038, 0x203D, 0x2042]
+Arrows = [0x2190, 0x2192]
+Mathematical_Operators = [0x2202, 0x221A, 0x221E, 0x2225, 0x222B, 0x2260,
+    0x2294, 0x2295, 0x22EE]
+Enclosed_Alphanumerics = [0x24CA]
+Miscellaneous_Symbols = range(0x261C, 0x2641) + range(0x2642, 0x2648) + \
+    range(0x2660, 0x2664) + range(0x266D, 0x2670)
+Dingbats = [0x2713, 0x2720]
+Private_Use_Area = range(0xE000, 0xE01D) + range(0xE01E, 0xE029) + \
+    range(0xE02A, 0xE052)
+Alphabetic_Presentation_Forms = [0xFB02, 0xFB2A, 0xFB2B]
+
+# \\U tag codes.
+U_CHARS = Latin_ExtendedA + Latin_ExtendedB + IPA_Extensions + \
+    Spacing_Modifier_Letters + Greek_and_Coptic + Hebrew + \
+    Latin_Extended_Additional + General_Punctuation + Arrows + \
+    Mathematical_Operators + Enclosed_Alphanumerics + Miscellaneous_Symbols + \
+    Dingbats + Private_Use_Area + Alphabetic_Presentation_Forms
+
+def unipmlcode(char):
+    try:
+        val = ord(char.encode('cp1252'))
+        if val in A_CHARS:
+            return '\\a%i' % val
+    except:
+        pass
+    val = ord(char)
+    if val in U_CHARS:
+        return '\\U%04x'.upper() % val
+    else:
+        return '?'
diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py
index aa608496c7..b40870c0b5 100644
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@@ -13,6 +13,7 @@ import re
 from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
 from calibre.ebooks.oeb.stylizer import Stylizer
 from calibre.ebooks.pdb.ereader import image_name
+from calibre.ebooks.pml import unipmlcode
 from calibre import entity_to_unicode
 
 TAG_MAP = {
@@ -163,8 +164,9 @@ class PMLMLizer(object):
             mo = re.search('(%s)' % entity[1:-1], text)
             text = text.replace(entity, entity_to_unicode(mo))
 
-        # Turn all unicode characters into their PML hex equivelent
-        text = re.sub('[^\x00-\x7f]', lambda x: '\\U%04x' % ord(x.group()), text)
+        # Turn all characters that cannot be represented by themself into their
+        # PML code equivelent
+        text = re.sub('[^\x00-\x7f]', lambda x: unipmlcode(x.group()), text)
 
         # Remove excess spaces at beginning and end of lines
         text = re.sub('(?m)^[ ]+', '', text)

From 113b4c8d090db091938ea08d0cabe30d07beac36 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Thu, 3 Dec 2009 07:48:19 -0500
Subject: [PATCH 085/120] PML Meatadata: Read cover.

---
 src/calibre/ebooks/metadata/pml.py | 23 +++++++++++++++++++++++
 src/calibre/ebooks/pml/input.py    |  2 +-
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/metadata/pml.py b/src/calibre/ebooks/metadata/pml.py
index 57ca29172a..dff791cb0b 100644
--- a/src/calibre/ebooks/metadata/pml.py
+++ b/src/calibre/ebooks/metadata/pml.py
@@ -28,8 +28,12 @@ def get_metadata(stream, extract_cover=True):
             for p in pmls:
                 with open(p, 'r+b') as p_stream:
                     pml += p_stream.read()
+            if extract_cover:
+                mi.cover_data = get_cover(os.path.splitext(os.path.basename(stream.name))[0], tdir, True)
     else:
         pml = stream.read()
+        if extract_cover:
+            mi.cover_data = get_cover(os.path.splitext(os.path.basename(stream.name))[0], os.path.abspath(os.path.dirname(stream.name)))
 
     for comment in re.findall(r'(?mus)\\v.*?\\v', pml):
         m = re.search(r'TITLE="(.*?)"', comment)
@@ -51,3 +55,22 @@ def get_metadata(stream, extract_cover=True):
             mi.isbn = m.group(1).strip().decode('cp1252', 'replace')
 
     return mi
+
+def get_cover(name, tdir, top_level=False):
+    cover_path = []
+    cover_data = None
+
+    if top_level:
+        cover_path = glob.glob(os.path.join(tdir, 'cover.png'))
+    # Images not in top level try bookname_img directory because
+    # that's where Dropbook likes to see them.
+    if not cover_path:
+        cover_path = glob.glob(os.path.join(tdir, name + '_img', 'cover.png'))
+    # No images in Dropbook location try generic images directory
+    if not cover_path:
+        cover_path = glob.glob(os.path.join(os.path.join(tdir, 'images'), 'cover.png'))
+    if cover_path:
+        with open(cover_path[0], 'r+b') as cstream:
+            cover_data = cstream.read()
+
+    return ('png', cover_data)
diff --git a/src/calibre/ebooks/pml/input.py b/src/calibre/ebooks/pml/input.py
index b18630c044..c88a4f947d 100644
--- a/src/calibre/ebooks/pml/input.py
+++ b/src/calibre/ebooks/pml/input.py
@@ -67,7 +67,7 @@ class PMLInput(InputFormatPlugin):
         # that's where Dropbook likes to see them.
         if not imgs:
             if hasattr(stream, 'name'):
-                imgs = glob.glob(os.path.join(os.path.join(tdir, os.path.splitext(os.path.basename(stream.name))[0] + '_img'), '*.png'))
+                imgs = glob.glob(os.path.join(tdir, os.path.splitext(os.path.basename(stream.name))[0] + '_img', '*.png'))
         # No images in Dropbook location try generic images directory
         if not imgs:
             imgs = glob.glob(os.path.join(os.path.join(tdir, 'images'), '*.png'))

From 0a384ba93bdadec5de9e491b0361750c02b19728 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Thu, 3 Dec 2009 18:11:05 -0500
Subject: [PATCH 086/120] PML Metadata: Improve cover extraction.

---
 src/calibre/ebooks/metadata/pml.py | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/src/calibre/ebooks/metadata/pml.py b/src/calibre/ebooks/metadata/pml.py
index dff791cb0b..499cf75d2c 100644
--- a/src/calibre/ebooks/metadata/pml.py
+++ b/src/calibre/ebooks/metadata/pml.py
@@ -1,9 +1,13 @@
-'''Read meta information from TXT files'''
-
-from __future__ import with_statement
+# -*- coding: utf-8 -*-
 
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+'''
+Read meta information from TXT files
+'''
+
 
 import os
 import glob
@@ -57,20 +61,15 @@ def get_metadata(stream, extract_cover=True):
     return mi
 
 def get_cover(name, tdir, top_level=False):
-    cover_path = []
+    cover_path = ''
     cover_data = None
 
     if top_level:
-        cover_path = glob.glob(os.path.join(tdir, 'cover.png'))
-    # Images not in top level try bookname_img directory because
-    # that's where Dropbook likes to see them.
+        cover_path = os.path.join(tdir, 'cover.png') if os.path.exists(os.path.join(tdir, 'cover.png')) else ''
     if not cover_path:
-        cover_path = glob.glob(os.path.join(tdir, name + '_img', 'cover.png'))
-    # No images in Dropbook location try generic images directory
-    if not cover_path:
-        cover_path = glob.glob(os.path.join(os.path.join(tdir, 'images'), 'cover.png'))
+        cover_path = os.path.join(tdir, name + '_img', 'cover.png') if os.path.exists(os.path.join(tdir, name + '_img', 'cover.png')) else os.path.join(os.path.join(tdir, 'images'), 'cover.png') if os.path.exists(os.path.join(os.path.join(tdir, 'images'), 'cover.png')) else ''
     if cover_path:
-        with open(cover_path[0], 'r+b') as cstream:
+        with open(cover_path, 'r+b') as cstream:
             cover_data = cstream.read()
 
     return ('png', cover_data)

From 5574f36c75dab3ef40e5501682b3a9b7bdfe934f Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Thu, 3 Dec 2009 18:48:19 -0500
Subject: [PATCH 087/120] PML Input: Set cover properly if it is avaliable.

---
 src/calibre/ebooks/pml/input.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/calibre/ebooks/pml/input.py b/src/calibre/ebooks/pml/input.py
index c88a4f947d..45f54f192f 100644
--- a/src/calibre/ebooks/pml/input.py
+++ b/src/calibre/ebooks/pml/input.py
@@ -123,6 +123,8 @@ class PMLInput(InputFormatPlugin):
         from calibre.ebooks.metadata.meta import get_metadata
         log.debug('Reading metadata from input file...')
         mi = get_metadata(stream, 'pml')
+        if 'images/cover.png' in images:
+            mi.cover = 'images/cover.png'
         opf = OPFCreator(os.getcwd(), mi)
         log.debug('Generating manifest...')
         opf.create_manifest(manifest_items)

From 56fd57605540edfa2208faa413037ccc74b29baa Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Thu, 3 Dec 2009 20:06:46 -0500
Subject: [PATCH 088/120] eReader Metadata: Extract cover.

---
 src/calibre/ebooks/metadata/ereader.py      | 19 +++++++++++--
 src/calibre/ebooks/pdb/ereader/reader132.py | 31 +++++++++++++--------
 src/calibre/ebooks/pml/input.py             | 14 +++++-----
 3 files changed, 43 insertions(+), 21 deletions(-)

diff --git a/src/calibre/ebooks/metadata/ereader.py b/src/calibre/ebooks/metadata/ereader.py
index 42f575188c..036baff2aa 100644
--- a/src/calibre/ebooks/metadata/ereader.py
+++ b/src/calibre/ebooks/metadata/ereader.py
@@ -16,6 +16,18 @@ from calibre.ebooks.pdb.ereader.reader132 import HeaderRecord
 from calibre.ebooks.pdb.header import PdbHeaderBuilder
 from calibre.ebooks.pdb.header import PdbHeaderReader
 
+def get_cover(pheader, eheader):
+    cover_data = None
+
+    for i in range(eheader.image_count):
+        raw = pheader.section_data(eheader.image_data_offset + i)
+
+        if raw[4:4 + 32].strip('\x00') == 'cover.png':
+            cover_data = raw[62:]
+            break
+
+    return ('png', cover_data)
+
 def get_metadata(stream, extract_cover=True):
     """
     Return metadata as a L{MetaInfo} object
@@ -29,7 +41,7 @@ def get_metadata(stream, extract_cover=True):
     if len(pheader.section_data(0)) == 132:
         hr = HeaderRecord(pheader.section_data(0))
 
-        if hr.version in (2, 10) and hr.has_metadata == 1:
+        if hr.compression in (2, 10) and hr.has_metadata == 1:
             try:
                 mdata = pheader.section_data(hr.metadata_offset)
 
@@ -41,6 +53,9 @@ def get_metadata(stream, extract_cover=True):
             except:
                 pass
 
+            if extract_cover:
+                mi.cover_data = get_cover(pheader, hr)
+
     if not mi.title:
         mi.title = pheader.title if pheader.title else _('Unknown')
 
@@ -56,7 +71,7 @@ def set_metadata(stream, mi):
     sections = [pheader.section_data(x) for x in range(0, pheader.section_count())]
     hr = HeaderRecord(sections[0])
 
-    if hr.version not in (2, 10):
+    if hr.compression not in (2, 10):
         return
 
     # Create a metadata record for the file if one does not alreay exist
diff --git a/src/calibre/ebooks/pdb/ereader/reader132.py b/src/calibre/ebooks/pdb/ereader/reader132.py
index 49fdfb8980..adb77d478f 100644
--- a/src/calibre/ebooks/pdb/ereader/reader132.py
+++ b/src/calibre/ebooks/pdb/ereader/reader132.py
@@ -29,12 +29,19 @@ class HeaderRecord(object):
     '''
 
     def __init__(self, raw):
-        self.version, = struct.unpack('>H', raw[0:2])
+        self.compression, = struct.unpack('>H', raw[0:2])
         self.non_text_offset, = struct.unpack('>H', raw[12:14])
+        self.chapter_count, = struct.unpack('>H', raw[14:16])
+        self.image_count, = struct.unpack('>H', raw[20:22])
+        self.link_count, = struct.unpack('>H', raw[22:24])
         self.has_metadata, = struct.unpack('>H', raw[24:26])
-        self.footnote_rec, = struct.unpack('>H', raw[28:30])
-        self.sidebar_rec, = struct.unpack('>H', raw[30:32])
+        self.footnote_count, = struct.unpack('>H', raw[28:30])
+        self.sidebar_count, = struct.unpack('>H', raw[30:32])
+        self.chapter_offset, = struct.unpack('>H', raw[32:34])
+        self.small_font_page_offset, = struct.unpack('>H', raw[36:38])
+        self.large_font_page_offset, = struct.unpack('>H', raw[38:40])
         self.image_data_offset, = struct.unpack('>H', raw[40:42])
+        self.link_offset, = struct.unpack('>H', raw[42:44])
         self.metadata_offset, = struct.unpack('>H', raw[44:46])
         self.footnote_offset, = struct.unpack('>H', raw[48:50])
         self.sidebar_offset, = struct.unpack('>H', raw[50:52])
@@ -58,11 +65,11 @@ class Reader132(FormatReader):
 
         self.header_record = HeaderRecord(self.section_data(0))
 
-        if self.header_record.version not in (2, 10):
-            if self.header_record.version in (260, 272):
+        if self.header_record.compression not in (2, 10):
+            if self.header_record.compression in (260, 272):
                 raise DRMError('eReader DRM is not supported.')
             else:
-                raise EreaderError('Unknown book version %i.' % self.header_record.version)
+                raise EreaderError('Unknown book compression %i.' % self.header_record.compression)
 
         from calibre.ebooks.metadata.pdb import get_metadata
         self.mi = get_metadata(stream, False)
@@ -71,9 +78,9 @@ class Reader132(FormatReader):
         return self.sections[number]
 
     def decompress_text(self, number):
-        if self.header_record.version == 2:
+        if self.header_record.compression == 2:
             return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
-        if self.header_record.version == 10:
+        if self.header_record.compression == 10:
             return zlib.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
 
     def get_image(self, number):
@@ -115,19 +122,19 @@ class Reader132(FormatReader):
             pml += self.get_text_page(i)
         html += pml_to_html(pml)
 
-        if self.header_record.footnote_rec > 0:
+        if self.header_record.footnote_count > 0:
             html += '<br /><h1>%s</h1>' % _('Footnotes')
             footnoteids = re.findall('\w+(?=\x00)', self.section_data(self.header_record.footnote_offset).decode('cp1252' if self.encoding is None else self.encoding))
-            for fid, i in enumerate(range(self.header_record.footnote_offset + 1, self.header_record.footnote_offset + self.header_record.footnote_rec)):
+            for fid, i in enumerate(range(self.header_record.footnote_offset + 1, self.header_record.footnote_offset + self.header_record.footnote_count)):
                 self.log.debug('Extracting footnote page %i' % i)
                 html += '<dl>'
                 html += footnote_sidebar_to_html(footnoteids[fid], self.decompress_text(i))
                 html += '</dl>'
 
-        if self.header_record.sidebar_rec > 0:
+        if self.header_record.sidebar_count > 0:
             html += '<br /><h1>%s</h1>' % _('Sidebar')
             sidebarids = re.findall('\w+(?=\x00)', self.section_data(self.header_record.sidebar_offset).decode('cp1252' if self.encoding is None else self.encoding))
-            for sid, i in enumerate(range(self.header_record.sidebar_offset + 1, self.header_record.sidebar_offset + self.header_record.sidebar_rec)):
+            for sid, i in enumerate(range(self.header_record.sidebar_offset + 1, self.header_record.sidebar_offset + self.header_record.sidebar_count)):
                 self.log.debug('Extracting sidebar page %i' % i)
                 html += '<dl>'
                 html += footnote_sidebar_to_html(sidebarids[sid], self.decompress_text(i))
diff --git a/src/calibre/ebooks/pml/input.py b/src/calibre/ebooks/pml/input.py
index 45f54f192f..5453665a55 100644
--- a/src/calibre/ebooks/pml/input.py
+++ b/src/calibre/ebooks/pml/input.py
@@ -27,20 +27,20 @@ class PMLInput(InputFormatPlugin):
     def process_pml(self, pml_path, html_path, close_all=False):
         pclose = False
         hclose = False
-    
+
         if not hasattr(pml_path, 'read'):
             pml_stream = open(pml_path, 'rb')
             pclose = True
         else:
             pml_stream = pml_path
             pml_stream.seek(0)
-            
+
         if not hasattr(html_path, 'write'):
             html_stream = open(html_path, 'wb')
             hclose = True
         else:
             html_stream = html_path
-        
+
         ienc = pml_stream.encoding if pml_stream.encoding else 'cp1252'
         if self.options.input_encoding:
             ienc = self.options.input_encoding
@@ -95,12 +95,12 @@ class PMLInput(InputFormatPlugin):
             with TemporaryDirectory('_unpmlz') as tdir:
                 zf = ZipFile(stream)
                 zf.extractall(tdir)
-            
+
                 pmls = glob.glob(os.path.join(tdir, '*.pml'))
                 for pml in pmls:
                     html_name = os.path.splitext(os.path.basename(pml))[0]+'.html'
                     html_path = os.path.join(os.getcwd(), html_name)
-                    
+
                     pages.append(html_name)
                     log.debug('Processing PML item %s...' % pml)
                     ttoc = self.process_pml(pml, html_path)
@@ -119,7 +119,7 @@ class PMLInput(InputFormatPlugin):
         manifest_items = []
         for item in pages+images:
             manifest_items.append((item, None))
-        
+
         from calibre.ebooks.metadata.meta import get_metadata
         log.debug('Reading metadata from input file...')
         mi = get_metadata(stream, 'pml')
@@ -133,5 +133,5 @@ class PMLInput(InputFormatPlugin):
         with open('metadata.opf', 'wb') as opffile:
             with open('toc.ncx', 'wb') as tocfile:
                 opf.render(opffile, tocfile, 'toc.ncx')
-        
+
         return os.path.join(os.getcwd(), 'metadata.opf')

From 1e6651393e9216b2eb9faea28e0f87d2e0ec60bf Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Thu, 3 Dec 2009 20:10:07 -0500
Subject: [PATCH 089/120] eReader Input: Use included cover when avaliable.

---
 src/calibre/ebooks/pdb/ereader/reader132.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/pdb/ereader/reader132.py b/src/calibre/ebooks/pdb/ereader/reader132.py
index adb77d478f..ffe414dd47 100644
--- a/src/calibre/ebooks/pdb/ereader/reader132.py
+++ b/src/calibre/ebooks/pdb/ereader/reader132.py
@@ -164,12 +164,15 @@ class Reader132(FormatReader):
 
     def create_opf(self, output_dir, images):
         with CurrentDir(output_dir):
+            if 'cover.png' in images:
+                self.mi.cover = os.path.join('images', 'cover.png')
+
             opf = OPFCreator(output_dir, self.mi)
 
             manifest = [('index.html', None)]
 
             for i in images:
-                manifest.append((os.path.join('images/', i), None))
+                manifest.append((os.path.join('images', i), None))
 
             opf.create_manifest(manifest)
             opf.create_spine(['index.html'])

From d5d8202c2a7a56a64afb582192c372b038a1dcb8 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Thu, 3 Dec 2009 20:38:33 -0500
Subject: [PATCH 090/120] PML2PMLZ input plugin.

---
 src/calibre/customize/builtins.py | 38 ++++++++++++++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)

diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 02bc0b621f..7dfba3a899 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -1,3 +1,4 @@
+import os.path
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 
@@ -49,6 +50,41 @@ every time you add an HTML file to the library.\
         'include: cp1252, latin1, iso-8859-1 and utf-8.')
 
 
+class PML2PMLZ(FileTypePlugin):
+    name = 'PML to ZIP'
+    author = 'John Schember'
+    description = textwrap.dedent(_('''\
+Create a PMLZ archive containging the PML file \
+and all images in the directory pmlname_img or images \
+file containing all linked files. This plugin is run \
+every time you add an PML file to the library.\
+'''))
+    version = numeric_version
+    file_types = set(['pml'])
+    supported_platforms = ['windows', 'osx', 'linux']
+    on_import = True
+
+    def run(self, pmlfile):
+        import zipfile
+        from calibre.ptempfile import PersistentTemporaryFile
+
+        name = os.path.join(tdir, '_plugin_pml2pmlz.pmlz')
+        pmlz = zipfile.ZipFile(name, 'w')
+        pmlz.write(pmlfile)
+
+        pml_img = os.path.basename(pmlfile)[0] + '_img'
+        img_dir = pml_img if os.path.exists(pml_img) else 'images' if os.path.exists(images) else ''
+        if img_dir:
+            for image in glob.glob(os.path.join(img_dir, '*.png')):
+                pmlz.write(image)
+        pmlz.close()
+
+        return name
+
+    def customization_help(self, gui=False):
+        return _('Character encoding for the input PML files. Should ways be: cp1252.')
+
+
 class ComicMetadataReader(MetadataReaderPlugin):
 
     name = 'Read comic metadata'
@@ -387,7 +423,7 @@ from calibre.devices.nuut2.driver import NUUT2
 from calibre.devices.iriver.driver import IRIVER_STORY
 
 from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon
-plugins = [HTML2ZIP, GoogleBooks, ISBNDB, Amazon]
+plugins = [HTML2ZIP, PML2PMLZ, GoogleBooks, ISBNDB, Amazon]
 plugins += [
     ComicInput,
     EPUBInput,

From 9cfd0b9a4fbba99f1fcf2c2706bc9482bf01f662 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Fri, 4 Dec 2009 06:13:17 -0500
Subject: [PATCH 091/120] PMLTOPMLZ: Fix name.

---
 src/calibre/customize/builtins.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 7dfba3a899..948d4c9b7d 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -51,7 +51,7 @@ every time you add an HTML file to the library.\
 
 
 class PML2PMLZ(FileTypePlugin):
-    name = 'PML to ZIP'
+    name = 'PML to PMLZ'
     author = 'John Schember'
     description = textwrap.dedent(_('''\
 Create a PMLZ archive containging the PML file \

From 74d613eb375d0d0051f361562e3ddfc5b2ab88dd Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Fri, 4 Dec 2009 18:38:20 -0500
Subject: [PATCH 092/120] Fix PML2PMLZ FileTypePlugin.

---
 src/calibre/customize/builtins.py | 33 +++++++++++++------------------
 1 file changed, 14 insertions(+), 19 deletions(-)

diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index a8ad352b9b..e2f1055610 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -54,10 +54,10 @@ class PML2PMLZ(FileTypePlugin):
     name = 'PML to PMLZ'
     author = 'John Schember'
     description = textwrap.dedent(_('''\
-        Create a PMLZ archive containing the PML file \
-        and all images in the directory pmlname_img or images \
-        file containing all linked files. This plugin is run \
-        every time you add an PML file to the library.\
+Create a PMLZ archive containing the PML file \
+and all images in the directory pmlname_img or images \
+file containing all linked files. This plugin is run \
+every time you add an PML file to the library.\
     '''))
     version = numeric_version
     file_types = set(['pml'])
@@ -66,25 +66,20 @@ class PML2PMLZ(FileTypePlugin):
 
     def run(self, pmlfile):
         import zipfile
-        from calibre.ptempfile import TemporaryDirectory
 
-        with TemporaryDirectory('_plugin_pml2pmlz') as tdir:
-            name = os.path.join(tdir, '_plugin_pml2pmlz.pmlz')
-            pmlz = zipfile.ZipFile(name, 'w')
-            pmlz.write(pmlfile)
+        of = self.temporary_file('_plugin_pml2pmlz.pmlz')
+        pmlz = zipfile.ZipFile(of.name, 'w')
+        pmlz.write(pmlfile, os.path.basename(pmlfile))
 
-            pml_img = os.path.basename(pmlfile)[0] + '_img'
-            img_dir = pml_img if os.path.exists(pml_img) else 'images' if \
+        pml_img = os.path.basename(pmlfile)[0] + '_img'
+        img_dir = pml_img if os.path.exists(pml_img) else 'images' if \
             os.path.exists('images') else ''
-            if img_dir:
-                for image in glob.glob(os.path.join(img_dir, '*.png')):
-                    pmlz.write(image)
-            pmlz.close()
+        if img_dir:
+            for image in glob.glob(os.path.join(img_dir, '*.png')):
+                pmlz.write(image, os.path.join('images', (os.path.basename(image))))
+        pmlz.close()
 
-        return name
-
-    def customization_help(self, gui=False):
-        return _('Character encoding for the input PML files. Should ways be: cp1252.')
+        return of.name
 
 
 class ComicMetadataReader(MetadataReaderPlugin):

From 6634f3f13f80495c2884c257270bc81f501b95d6 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Fri, 4 Dec 2009 18:40:02 -0500
Subject: [PATCH 093/120] Fix PML2PMLZ FileTypePlugin description.

---
 src/calibre/customize/builtins.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index e2f1055610..bd9e52ae93 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -55,9 +55,9 @@ class PML2PMLZ(FileTypePlugin):
     author = 'John Schember'
     description = textwrap.dedent(_('''\
 Create a PMLZ archive containing the PML file \
-and all images in the directory pmlname_img or images \
-file containing all linked files. This plugin is run \
-every time you add an PML file to the library.\
+and all images in the directory pmlname_img or \
+images. This plugin is run every time you add \
+a PML file to the library. \
     '''))
     version = numeric_version
     file_types = set(['pml'])

From d03dc39fcba71ced4d4fb36c84c16be16865327f Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 5 Dec 2009 08:09:48 -0500
Subject: [PATCH 094/120] FB2 Output: Use h1 tags to create section titles used
 for TOC.

---
 src/calibre/ebooks/fb2/fb2ml.py | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py
index 41b93f6d6b..31b0d8f0a2 100644
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@@ -46,6 +46,10 @@ TAG_LINKS = [
     'a',
 ]
 
+TAG_TITLE = [
+    'h1',
+]
+
 STYLES = [
     ('font-weight', {'bold'   : 'strong', 'bolder' : 'strong'}),
     ('font-style', {'italic' : 'emphasis'}),
@@ -196,7 +200,6 @@ class FB2MLizer(object):
             return [u'']
 
         tag = barename(elem.tag)
-        tag_count = 0
 
         if tag in TAG_IMAGES:
             if elem.attrib.get('src', None):
@@ -218,7 +221,6 @@ class FB2MLizer(object):
                         self.link_hrefs[href] = 'calibre_link-%s' % len(self.link_hrefs.keys())
                     href = self.link_hrefs[href]
                     fb2_text.append('<a xlink:href="#%s">' % href)
-                tag_count += 1
                 tag_stack.append('a')
 
         # Anchor ids
@@ -226,11 +228,20 @@ class FB2MLizer(object):
         if id_name:
             fb2_text.append(self.get_anchor(page, id_name))
 
+        if tag in TAG_TITLE:
+            if 'p' in tag_stack:
+                ctag = []
+                ctag.append(tag_stack.pop())
+                while ctag[-1] != 'p':
+                    ctag.append(tag_stack.pop())
+                fb2_text += self.close_tags(ctag)
+            fb2_text.append('</section><section><title><p>')
+            tag_stack.append('title')
+            tag_stack.append('p')
+
         fb2_tag = TAG_MAP.get(tag, None)
         if fb2_tag:
-            if fb2_tag not in tag_stack:
-                tag_count += 1
-            else:
+            if fb2_tag in tag_stack:
                 tag_stack.reverse()
                 tag_stack.remove(fb2_tag)
                 tag_stack.reverse()
@@ -242,7 +253,6 @@ class FB2MLizer(object):
         for s in STYLES:
             style_tag = s[1].get(style[s[0]], None)
             if style_tag:
-                tag_count += 1
                 fb2_text.append('<%s>' % style_tag)
                 tag_stack.append(style_tag)
 
@@ -260,7 +270,7 @@ class FB2MLizer(object):
             fb2_text += self.dump_text(item, stylizer, page, tag_stack)
 
         close_tag_list = []
-        for i in range(0, tag_count):
+        for i in range(0, len(tag_stack)):
             close_tag_list.insert(0, tag_stack.pop())
         fb2_text += self.close_tags(close_tag_list)
 

From 3263a8c3bacf757db2e74ca46b687cdf89267b78 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 5 Dec 2009 15:41:50 -0500
Subject: [PATCH 095/120] PML Input: Make footnotes and sidebars display better
 and add return link.

---
 src/calibre/ebooks/pdb/ereader/reader132.py |  10 +-
 src/calibre/ebooks/pml/pmlconverter.py      | 146 ++++++++++++++------
 2 files changed, 110 insertions(+), 46 deletions(-)

diff --git a/src/calibre/ebooks/pdb/ereader/reader132.py b/src/calibre/ebooks/pdb/ereader/reader132.py
index ffe414dd47..d2a1c006e3 100644
--- a/src/calibre/ebooks/pdb/ereader/reader132.py
+++ b/src/calibre/ebooks/pdb/ereader/reader132.py
@@ -103,7 +103,7 @@ class Reader132(FormatReader):
         return self.decompress_text(number)
 
     def extract_content(self, output_dir):
-        from calibre.ebooks.pml.pmlconverter import footnote_sidebar_to_html
+        from calibre.ebooks.pml.pmlconverter import footnote_to_html, sidebar_to_html
         from calibre.ebooks.pml.pmlconverter import pml_to_html
 
         output_dir = os.path.abspath(output_dir)
@@ -127,18 +127,14 @@ class Reader132(FormatReader):
             footnoteids = re.findall('\w+(?=\x00)', self.section_data(self.header_record.footnote_offset).decode('cp1252' if self.encoding is None else self.encoding))
             for fid, i in enumerate(range(self.header_record.footnote_offset + 1, self.header_record.footnote_offset + self.header_record.footnote_count)):
                 self.log.debug('Extracting footnote page %i' % i)
-                html += '<dl>'
-                html += footnote_sidebar_to_html(footnoteids[fid], self.decompress_text(i))
-                html += '</dl>'
+                html += footnote_to_html(footnoteids[fid], self.decompress_text(i))
 
         if self.header_record.sidebar_count > 0:
             html += '<br /><h1>%s</h1>' % _('Sidebar')
             sidebarids = re.findall('\w+(?=\x00)', self.section_data(self.header_record.sidebar_offset).decode('cp1252' if self.encoding is None else self.encoding))
             for sid, i in enumerate(range(self.header_record.sidebar_offset + 1, self.header_record.sidebar_offset + self.header_record.sidebar_count)):
                 self.log.debug('Extracting sidebar page %i' % i)
-                html += '<dl>'
-                html += footnote_sidebar_to_html(sidebarids[sid], self.decompress_text(i))
-                html += '</dl>'
+                html += sidebar_to_html(sidebarids[sid], self.decompress_text(i))
 
         html += '</body></html>'
 
diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index c120f2faf9..2d85a6b251 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -30,6 +30,7 @@ class PML_HTMLizer(object):
         'h5',
         'h6',
         'a',
+        'ra',
         'c',
         'r',
         't',
@@ -37,15 +38,24 @@ class PML_HTMLizer(object):
         'l',
         'k',
         'T',
-        'Fn',
-        'Sd',
-        'FS'
+        'FN',
+        'SB',
     ]
 
     STATES_VALUE_REQ = [
         'a',
         'T',
-        'FS'
+    ]
+
+    STATES_VALUE_REQ_2 = [
+        'ra',
+        'FN',
+        'SB',
+    ]
+
+    STATES_CLOSE_VALUE_REQ = [
+        'FN',
+        'SB',
     ]
 
     STATES_TAGS = {
@@ -57,7 +67,8 @@ class PML_HTMLizer(object):
         'h6': ('<h6>', '</h6>'),
         'sp': ('<sup>', '</sup>'),
         'sb': ('<sub>', '</sub>'),
-        'a': ('<a href="%s">', '</a>'),
+        'a': ('<a href="#%s">', '</a>'),
+        'ra': ('<span id="r%s"></span><a href="#%s">', '</a>'),
         'c': ('<div style="text-align: center; margin: auto;">', '</div>'),
         'r': ('<div style="text-align: right;">', '</div>'),
         't': ('<div style="margin-left: 5%;">', '</div>'),
@@ -68,7 +79,8 @@ class PML_HTMLizer(object):
         'b': ('<span style="font-weight: bold;">', '</span>'),
         'l': ('<span style="font-size: 150%;">', '</span>'),
         'k': ('<span style="font-size: 75%;">', '</span>'),
-        'FS': ('<div id="%s">', '</div>'),
+        'FN': ('<br /><br style="page-break-after: always;" /><div id="fn-%s"><dl><dt>%s</dt><dd><p>', '</p></dd></dl><small><a href="#rfn-%s">return</a></small></div>'),
+        'SB': ('<br /><br style="page-break-after: always;" /><div id="sb-%s"><dl><dt><dt>%s</dt><dd><p>', '</p></dd></dl><small><a href="#rsb-%s">return</a></small></div>'),
     }
 
     CODE_STATES = {
@@ -93,14 +105,20 @@ class PML_HTMLizer(object):
         'B': 'b',
         'l': 'l',
         'k': 'k',
-        'Fn': 'a',
-        'Sd': 'a',
-        'FN': 'FS',
-        'SB': 'FS',
+        'Fn': 'ra',
+        'Sd': 'ra',
+        'FN': 'FN',
+        'SB': 'SB',
     }
 
+    LINK_STATES = [
+        'a',
+        'ra',
+    ]
+
     BLOCK_STATES = [
         'a',
+        'ra',
         'h1',
         'h2',
         'h3',
@@ -116,7 +134,8 @@ class PML_HTMLizer(object):
         'r',
         't',
         'T',
-        'FS',
+        'FN',
+        'SB',
     ]
 
     SPAN_STATES = [
@@ -144,8 +163,8 @@ class PML_HTMLizer(object):
         pml = re.sub(r'(?mus)^[ ]*$', '', pml)
 
         # Footnotes and Sidebars
-        pml = re.sub(r'(?mus)<footnote\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</footnote>', lambda match: '\\FN="fns-%s"%s\\FN' % (match.group('target'), match.group('text')) if match.group('text') else '', pml)
-        pml = re.sub(r'(?mus)<sidebar\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</sidebar>', lambda match: '\\SB="fns-%s"%s\\SB' % (match.group('target'), match.group('text')) if match.group('text') else '', pml)
+        pml = re.sub(r'(?mus)<footnote\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</footnote>', lambda match: '\\FN="%s"%s\\FN' % (match.group('target'), match.group('text')) if match.group('text') else '', pml)
+        pml = re.sub(r'(?mus)<sidebar\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</sidebar>', lambda match: '\\SB="%s"%s\\SB' % (match.group('target'), match.group('text')) if match.group('text') else '', pml)
 
         # Convert &'s into entities so &amp; in the text doesn't get turned into
         # &. It will display as &amp;
@@ -181,10 +200,12 @@ class PML_HTMLizer(object):
 
         for key, val in self.state.items():
             if val[0]:
-                if key not in self.STATES_VALUE_REQ:
-                    start += self.STATES_TAGS[key][0]
-                else:
+                if key in self.STATES_VALUE_REQ:
                     start += self.STATES_TAGS[key][0] % val[1]
+                elif key in self.STATES_VALUE_REQ_2:
+                    start += self.STATES_TAGS[key][0] % (val[1], val[1])
+                else:
+                    start += self.STATES_TAGS[key][0]
 
         return u'<p>%s' % start
 
@@ -206,7 +227,10 @@ class PML_HTMLizer(object):
                 else:
                     other.append(key)
         for key in span+div+other:
-            end += self.STATES_TAGS[key][1]
+            if key in self.STATES_CLOSE_VALUE_REQ:
+                end += self.STATES_TAGS[key][1] % self.state[key][1]
+            else:
+                end += self.STATES_TAGS[key][1]
 
         return u'%s</p>' % end
 
@@ -239,11 +263,17 @@ class PML_HTMLizer(object):
         text = u''
 
         if self.state[code][0]:
-            text = self.STATES_TAGS[code][1]
+            if code in self.STATES_CLOSE_VALUE_REQ:
+                text = self.STATES_TAGS[code][1] % self.state[code][1]
+            else:
+                text = self.STATES_TAGS[code][1]
         else:
-            if code in self.STATES_VALUE_REQ:
+            if code in self.STATES_VALUE_REQ or code in self.STATES_VALUE_REQ_2:
                 val = self.code_value(stream)
-                text += self.STATES_TAGS[code][0] % val
+                if code in self.STATES_VALUE_REQ:
+                    text = self.STATES_TAGS[code][0] % val
+                else:
+                    text = self.STATES_TAGS[code][0] % (val, val)
                 self.state[code][1] = val
             else:
                 text = self.STATES_TAGS[code][0]
@@ -258,7 +288,10 @@ class PML_HTMLizer(object):
             # Close all.
             for c in self.SPAN_STATES+self.DIV_STATES:
                 if self.state[c][0]:
-                    text += self.STATES_TAGS[c][1]
+                    if c in self.STATES_CLOSE_VALUE_REQ:
+                        text += self.STATES_TAGS[c][1] % self.state[c][1]
+                    else:
+                        text += self.STATES_TAGS[c][1]
             # Reopen the based on state.
             for c in self.DIV_STATES+self.SPAN_STATES:
                 if code == c:
@@ -266,6 +299,8 @@ class PML_HTMLizer(object):
                 if self.state[c][0]:
                     if c in self.STATES_VALUE_REQ:
                         text += self.STATES_TAGS[self.CODE_STATES[c]][0] % self.state[c][1]
+                    elif c in self.STATES_VALUE_REQ_2:
+                        text += self.STATES_TAGS[self.CODE_STATES[c]][0] % (self.state[c][1], self.state[c][1])
                     else:
                         text += self.STATES_TAGS[c][0]
         # Open code.
@@ -273,11 +308,17 @@ class PML_HTMLizer(object):
             # Close all spans.
             for c in self.SPAN_STATES:
                 if self.state[c][0]:
-                    text += self.STATES_TAGS[c][1]
+                    if c in self.STATES_CLOSE_VALUE_REQ:
+                        text += self.STATES_TAGS[c][1] % self.state[c][1]
+                    else:
+                        text += self.STATES_TAGS[c][1]
             # Process the code
-            if code in self.STATES_VALUE_REQ:
+            if code in self.STATES_VALUE_REQ or code in self.STATES_VALUE_REQ_2:
                 val = self.code_value(stream)
-                text += self.STATES_TAGS[code][0] % val
+                if code in self.STATES_VALUE_REQ:
+                    text += self.STATES_TAGS[code][0] % val
+                else:
+                    text += self.STATES_TAGS[code][0] % (val, val)
                 self.state[code][1] = val
             else:
                 text += self.STATES_TAGS[code][0]
@@ -286,6 +327,8 @@ class PML_HTMLizer(object):
                 if self.state[c][0]:
                     if c in self.STATES_VALUE_REQ:
                         text += self.STATES_TAGS[self.CODE_STATES[c]][0] % self.state[c][1]
+                    elif c in self.STATES_VALUE_REQ_2:
+                        text += self.STATES_TAGS[self.CODE_STATES[c]][0] % (self.state[c][1], self.state[c][1])
                     else:
                         text += self.STATES_TAGS[c][0]
 
@@ -299,7 +342,10 @@ class PML_HTMLizer(object):
             # Close all spans
             for c in self.SPAN_STATES:
                 if self.state[c][0]:
-                    text += self.STATES_TAGS[c][1]
+                    if c in self.STATES_CLOSE_VALUE_REQ:
+                        text += self.STATES_TAGS[c][1] % self.state[c][1]
+                    else:
+                        text += self.STATES_TAGS[c][1]
             # Re-open the spans based on state except for code which will be
             # left closed.
             for c in self.SPAN_STATES:
@@ -308,13 +354,18 @@ class PML_HTMLizer(object):
                 if self.state[c][0]:
                     if c in self.STATES_VALUE_REQ:
                         text += self.STATES_TAGS[code][0] % self.state[c][1]
+                    elif c in self.STATES_VALUE_REQ_2:
+                        text += self.STATES_TAGS[code][0] % (self.state[c][1], self.state[c][1])
                     else:
                         text += self.STATES_TAGS[c][0]
         # Open code.
         else:
-            if code in self.STATES_VALUE_REQ:
+            if code in self.STATES_VALUE_REQ or code in self.STATES_VALUE_REQ_2:
                 val = self.code_value(stream)
-                text += self.STATES_TAGS[code][0] % val
+                if code in self.STATES_VALUE_REQ:
+                    text += self.STATES_TAGS[code][0] % val
+                else:
+                    text += self.STATES_TAGS[code][0] % (val, val)
                 self.state[code][1] = val
             else:
                 text += self.STATES_TAGS[code][0]
@@ -327,19 +378,29 @@ class PML_HTMLizer(object):
         # Close all spans
         for c in self.SPAN_STATES:
             if self.state[c][0]:
-                text += self.STATES_TAGS[c][1]
-
+                if c in self.STATES_CLOSE_VALUE_REQ:
+                    text += self.STATES_TAGS[c][1] % self.state[c][1]
+                else:
+                    text += self.STATES_TAGS[c][1]
         # Process the code
         if self.state[code][0]:
             # Close tag
-            text += self.STATES_TAGS[code][1]
+            if code in self.STATES_CLOSE_VALUE_REQ:
+                text += self.STATES_TAGS[code][1] % self.state[code][1]
+            else:
+                text += self.STATES_TAGS[code][1]
         else:
             # Open tag
-            if code in self.STATES_VALUE_REQ:
+            if code in self.STATES_VALUE_REQ or code in self.STATES_VALUE_REQ_2:
                 val = self.code_value(stream)
+                if code in self.LINK_STATES:
+                    val = val.lstrip('#')
                 if pre:
-                    val = '#%s-%s' % (pre, val)
-                text += self.STATES_TAGS[code][0] % val
+                    val = '%s-%s' % (pre, val)
+                if code in self.STATES_VALUE_REQ:
+                    text += self.STATES_TAGS[code][0] % val
+                else:
+                    text += self.STATES_TAGS[code][0] % (val, val)
                 self.state[code][1] = val
             else:
                 text += self.STATES_TAGS[code][0]
@@ -349,6 +410,8 @@ class PML_HTMLizer(object):
             if self.state[c][0]:
                 if c in self.STATES_VALUE_REQ:
                     text += self.STATES_TAGS[code][0] % self.state[c][1]
+                elif c in self.STATES_VALUE_REQ_2:
+                    text += self.STATES_TAGS[code][0] % (self.state[c][1], self.state[c][1])
                 else:
                     text += self.STATES_TAGS[c][0]
 
@@ -432,13 +495,13 @@ class PML_HTMLizer(object):
                     elif c in 'FSX':
                         l = line.read(1)
                         if '%s%s' % (c, l) == 'Fn':
-                            text = self.process_code('Fn', line, 'fns')
+                            text = self.process_code('Fn', line, 'fn')
                         elif '%s%s' % (c, l) == 'FN':
                             text = self.process_code('FN', line)
                         elif '%s%s' % (c, l) == 'SB':
                             text = self.process_code('SB', line)
                         elif '%s%s' % (c, l) == 'Sd':
-                            text = self.process_code('Sd', line, 'fns')
+                            text = self.process_code('Sd', line, 'sb')
                         else:
                             text = self.process_code('%s%s' % (c, l), line)
                     elif c == 'm':
@@ -496,8 +559,13 @@ def pml_to_html(pml):
     hizer = PML_HTMLizer()
     return hizer.parse_pml(pml)
 
-def footnote_sidebar_to_html(id, pml):
-    if id.startswith('\x01'):
-        id = id[2:]
-    html = '<div id="fns-%s"><dt>%s</dt></div><dd>%s</dd>' % (id, id, pml_to_html(pml))
+def footnote_sidebar_to_html(pre_id, id, pml):
+    id = id.strip('\x01')
+    html = '<br /><br style="page-break-after: always;" /><div id="%s-%s"><dl><dt>%s</dt><dd><p>%s</p></dd></dl><small><a href="#r%s-%s">return</a></small></div>' % (pre_id, id, id, pml_to_html(pml), pre_id, id)
     return html
+
+def footnote_to_html(id, pml):
+    return footnote_sidebar_to_html('fn', id, pml)
+
+def sidebar_to_html(id, pml):
+    return footnote_sidebar_to_html('sb', id, pml)

From 216c27bb3c8d45d9d72ff3f5d0d72eb0557a3abe Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 5 Dec 2009 15:45:23 -0500
Subject: [PATCH 096/120] PML Input: Don't use id as title for footnotes and
 sidebars.

---
 src/calibre/ebooks/pml/pmlconverter.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index 2d85a6b251..3484be5927 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -45,12 +45,12 @@ class PML_HTMLizer(object):
     STATES_VALUE_REQ = [
         'a',
         'T',
+        'FN',
+        'SB',
     ]
 
     STATES_VALUE_REQ_2 = [
         'ra',
-        'FN',
-        'SB',
     ]
 
     STATES_CLOSE_VALUE_REQ = [
@@ -79,8 +79,8 @@ class PML_HTMLizer(object):
         'b': ('<span style="font-weight: bold;">', '</span>'),
         'l': ('<span style="font-size: 150%;">', '</span>'),
         'k': ('<span style="font-size: 75%;">', '</span>'),
-        'FN': ('<br /><br style="page-break-after: always;" /><div id="fn-%s"><dl><dt>%s</dt><dd><p>', '</p></dd></dl><small><a href="#rfn-%s">return</a></small></div>'),
-        'SB': ('<br /><br style="page-break-after: always;" /><div id="sb-%s"><dl><dt><dt>%s</dt><dd><p>', '</p></dd></dl><small><a href="#rsb-%s">return</a></small></div>'),
+        'FN': ('<br /><br style="page-break-after: always;" /><div id="fn-%s"><p>', '</p><<small><a href="#rfn-%s">return</a></small></div>'),
+        'SB': ('<br /><br style="page-break-after: always;" /><div id="sb-%s"><p>', '</p><small><a href="#rsb-%s">return</a></small></div>'),
     }
 
     CODE_STATES = {
@@ -561,7 +561,7 @@ def pml_to_html(pml):
 
 def footnote_sidebar_to_html(pre_id, id, pml):
     id = id.strip('\x01')
-    html = '<br /><br style="page-break-after: always;" /><div id="%s-%s"><dl><dt>%s</dt><dd><p>%s</p></dd></dl><small><a href="#r%s-%s">return</a></small></div>' % (pre_id, id, id, pml_to_html(pml), pre_id, id)
+    html = '<br /><br style="page-break-after: always;" /><div id="%s-%s"><p>%s</p><small><a href="#r%s-%s">return</a></small></div>' % (pre_id, id, pml_to_html(pml), pre_id, id)
     return html
 
 def footnote_to_html(id, pml):

From 4a20c9a5829d4ec82655ff801edc8e37f853d11f Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 5 Dec 2009 21:39:33 -0500
Subject: [PATCH 097/120] PML Output: Remove unnecessary entity to unicode
 call.

---
 src/calibre/ebooks/pml/pmlml.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py
index b40870c0b5..6c217f524c 100644
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@@ -158,12 +158,6 @@ class PMLMLizer(object):
         text = text.replace(u'\xc2', '')
         text = text.replace(u'\xa0', ' ')
 
-        # Turn all html entities into unicode. This should not be necessary as
-        # lxml should have already done this but we want to be sure it happens.
-        for entity in set(re.findall('&.+?;', text)):
-            mo = re.search('(%s)' % entity[1:-1], text)
-            text = text.replace(entity, entity_to_unicode(mo))
-
         # Turn all characters that cannot be represented by themself into their
         # PML code equivelent
         text = re.sub('[^\x00-\x7f]', lambda x: unipmlcode(x.group()), text)

From 0fff29bfb2551592e65ee994a65b99ad7f3a5179 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 5 Dec 2009 21:40:27 -0500
Subject: [PATCH 098/120] PML Output: Remove unnecessary import.

---
 src/calibre/ebooks/pml/pmlml.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py
index 6c217f524c..b23cd40813 100644
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@@ -14,7 +14,6 @@ from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
 from calibre.ebooks.oeb.stylizer import Stylizer
 from calibre.ebooks.pdb.ereader import image_name
 from calibre.ebooks.pml import unipmlcode
-from calibre import entity_to_unicode
 
 TAG_MAP = {
     'b'       : 'B',

From 44a78a9f0ff7c675341b5b5f0fd74882edabe288 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Mon, 7 Dec 2009 07:21:27 -0500
Subject: [PATCH 099/120] Preliminary Nook driver.

---
 src/calibre/customize/builtins.py    |  2 ++
 src/calibre/devices/nook/__init__.py |  0
 src/calibre/devices/nook/driver.py   | 36 ++++++++++++++++++++++++++++
 3 files changed, 38 insertions(+)
 create mode 100644 src/calibre/devices/nook/__init__.py
 create mode 100644 src/calibre/devices/nook/driver.py

diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index c317decd76..25a5fd0910 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -409,6 +409,7 @@ from calibre.devices.iliad.driver import ILIAD
 from calibre.devices.irexdr.driver import IREXDR1000
 from calibre.devices.jetbook.driver import JETBOOK
 from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
+from calibre.devices.nook.driver import NOOK
 from calibre.devices.prs500.driver import PRS500
 from calibre.devices.prs505.driver import PRS505
 from calibre.devices.prs700.driver import PRS700
@@ -464,6 +465,7 @@ plugins += [
     KINDLE,
     KINDLE2,
     KINDLE_DX,
+    NOOK,
     PRS505,
     PRS700,
     PRS500,
diff --git a/src/calibre/devices/nook/__init__.py b/src/calibre/devices/nook/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/calibre/devices/nook/driver.py b/src/calibre/devices/nook/driver.py
new file mode 100644
index 0000000000..45031082e1
--- /dev/null
+++ b/src/calibre/devices/nook/driver.py
@@ -0,0 +1,36 @@
+# -*- coding: utf-8 -*-
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john at nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+'''
+Device driver for Barns and Nobel's Nook
+'''
+
+from calibre.devices.usbms.driver import USBMS
+
+class NOOK(USBMS):
+
+    name           = 'Nook Iliad Device Interface'
+    description    = _('Communicate with the Barns and Noble Nook eBook reader.')
+    author         = _('John Schember')
+    supported_platforms = ['windows', 'linux']
+
+    # Ordered list of supported formats
+    # Be sure these have an entry in calibre.devices.mime
+    FORMATS     = ['epub', 'pdb', 'pdf']
+
+    VENDOR_ID   = [0x2080]
+    PRODUCT_ID  = [0x001]
+    BCD         = [0x322]
+
+    VENDOR_NAME = 'B&N'
+    WINDOWS_MAIN_MEM = 'NOOK'
+
+    #OSX_MAIN_MEM = ''
+
+    MAIN_MEMORY_VOLUME_LABEL  = 'BN Nook Main Memory'
+
+    EBOOK_DIR_MAIN = 'my documents'
+    SUPPORTS_SUB_DIRS = True

From 25d19004587cd651c524f0469251530b24ce9670 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Mon, 7 Dec 2009 07:35:57 -0500
Subject: [PATCH 100/120] NOOK Driver: Support stoarge card.

---
 src/calibre/devices/nook/driver.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/calibre/devices/nook/driver.py b/src/calibre/devices/nook/driver.py
index 45031082e1..fe73edf84a 100644
--- a/src/calibre/devices/nook/driver.py
+++ b/src/calibre/devices/nook/driver.py
@@ -27,6 +27,7 @@ class NOOK(USBMS):
 
     VENDOR_NAME = 'B&N'
     WINDOWS_MAIN_MEM = 'NOOK'
+    WINDOWS_CARD_A_MEM = 'NOOK'
 
     #OSX_MAIN_MEM = ''
 
@@ -34,3 +35,12 @@ class NOOK(USBMS):
 
     EBOOK_DIR_MAIN = 'my documents'
     SUPPORTS_SUB_DIRS = True
+
+    def windows_sort_drives(self, drives):
+        main = drives.get('main', None)
+        card = drives.get('carda', None)
+        if card and main and card < main:
+            drives['main'] = card
+            drives['carda'] = main
+
+        return drives

From af4e0b0155c9000212a0e2ead02dd18a6d5ea8e5 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 12 Dec 2009 10:48:48 -0500
Subject: [PATCH 101/120] Fix errors preventing develop --clean-all from
 working properly.

---
 setup/extensions.py                         | 6 +++---
 setup/installer/__init__.py                 | 2 +-
 src/calibre/ebooks/pdb/ereader/reader132.py | 2 +-
 src/calibre/ebooks/pdb/ereader/reader202.py | 4 ++--
 src/calibre/ebooks/pdb/palmdoc/reader.py    | 2 +-
 src/calibre/ebooks/pdb/palmdoc/writer.py    | 3 ++-
 6 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/setup/extensions.py b/setup/extensions.py
index faa1a3d88a..0d465f4a0a 100644
--- a/setup/extensions.py
+++ b/setup/extensions.py
@@ -375,9 +375,9 @@ class Build(Command):
             for x in (dest, dest+'.manifest'):
                 if os.path.exists(x):
                     os.remove(x)
-        shutil.rmtree(self.j(self.d(self.SRC), 'build'))
-
-
+        build_dir = self.j(self.d(self.SRC), 'build')
+        if os.path.exists(build_dir):
+            shutil.rmtree(build_dir)
 
 
 class BuildPDF2XML(Command):
diff --git a/setup/installer/__init__.py b/setup/installer/__init__.py
index 99e7586aa3..2acc5d4649 100644
--- a/setup/installer/__init__.py
+++ b/setup/installer/__init__.py
@@ -126,5 +126,5 @@ class VMInstaller(Command):
 
     def clean(self):
         installer = self.installer()
-        if os.patyh.exists(installer):
+        if os.path.exists(installer):
             os.remove(installer)
diff --git a/src/calibre/ebooks/pdb/ereader/reader132.py b/src/calibre/ebooks/pdb/ereader/reader132.py
index d2a1c006e3..cce1d40f8c 100644
--- a/src/calibre/ebooks/pdb/ereader/reader132.py
+++ b/src/calibre/ebooks/pdb/ereader/reader132.py
@@ -15,7 +15,6 @@ import zlib
 
 from calibre import CurrentDir
 from calibre.ebooks import DRMError
-from calibre.ebooks.compression.palmdoc import decompress_doc
 from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre.ebooks.pdb.ereader import EreaderError
 from calibre.ebooks.pdb.formatreader import FormatReader
@@ -79,6 +78,7 @@ class Reader132(FormatReader):
 
     def decompress_text(self, number):
         if self.header_record.compression == 2:
+            from calibre.ebooks.compression.palmdoc import decompress_doc
             return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
         if self.header_record.compression == 10:
             return zlib.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
diff --git a/src/calibre/ebooks/pdb/ereader/reader202.py b/src/calibre/ebooks/pdb/ereader/reader202.py
index ce7ad1263a..a674c5bf60 100644
--- a/src/calibre/ebooks/pdb/ereader/reader202.py
+++ b/src/calibre/ebooks/pdb/ereader/reader202.py
@@ -12,7 +12,6 @@ import struct
 
 from calibre import CurrentDir
 from calibre.ebooks.metadata.opf2 import OPFCreator
-from calibre.ebooks.compression.palmdoc import decompress_doc
 from calibre.ebooks.pdb.formatreader import FormatReader
 from calibre.ebooks.pdb.ereader import EreaderError
 
@@ -55,7 +54,8 @@ class Reader202(FormatReader):
         return self.sections[number]
 
     def decompress_text(self, number):
-            return decompress_doc(''.join([chr(ord(x) ^ 0xA5) for x in self.section_data(number)])).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
+        from calibre.ebooks.compression.palmdoc import decompress_doc
+        return decompress_doc(''.join([chr(ord(x) ^ 0xA5) for x in self.section_data(number)])).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
 
     def get_image(self, number):
         name = None
diff --git a/src/calibre/ebooks/pdb/palmdoc/reader.py b/src/calibre/ebooks/pdb/palmdoc/reader.py
index 0a57e3f51a..ea7e6bbc2b 100644
--- a/src/calibre/ebooks/pdb/palmdoc/reader.py
+++ b/src/calibre/ebooks/pdb/palmdoc/reader.py
@@ -11,7 +11,6 @@ __docformat__ = 'restructuredtext en'
 import os
 import struct
 
-from calibre.ebooks.compression.palmdoc import decompress_doc
 from calibre.ebooks.pdb.formatreader import FormatReader
 from calibre.ebooks.txt.processor import convert_basic, opf_writer, \
     separate_paragraphs_single_line, separate_paragraphs_print_formatted
@@ -51,6 +50,7 @@ class Reader(FormatReader):
         if self.header_record.compression == 1:
             return self.section_data(number).decode('cp1252' if self.encoding is None else self.encoding)
         if self.header_record.compression == 2:
+            from calibre.ebooks.compression.palmdoc import decompress_doc
             return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
         return ''
 
diff --git a/src/calibre/ebooks/pdb/palmdoc/writer.py b/src/calibre/ebooks/pdb/palmdoc/writer.py
index 91a5eb3d97..3f4a92fbed 100644
--- a/src/calibre/ebooks/pdb/palmdoc/writer.py
+++ b/src/calibre/ebooks/pdb/palmdoc/writer.py
@@ -10,7 +10,6 @@ __docformat__ = 'restructuredtext en'
 
 import struct
 
-from calibre.ebooks.compression.palmdoc import compress_doc
 from calibre.ebooks.pdb.formatwriter import FormatWriter
 from calibre.ebooks.pdb.header import PdbHeaderBuilder
 from calibre.ebooks.txt.txtml import TXTMLizer
@@ -25,6 +24,8 @@ class Writer(FormatWriter):
         self.log = log
 
     def write_content(self, oeb_book, out_stream, metadata=None):
+        from calibre.ebooks.compression.palmdoc import compress_doc
+
         title = self.opts.title if self.opts.title else oeb_book.metadata.title[0].value if oeb_book.metadata.title != [] else _('Unknown')
 
         txt_records, txt_length = self._generate_text(oeb_book)

From a8cb44249df81a516c03aaf8a8d1da374a49b515 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 12 Dec 2009 19:55:30 -0500
Subject: [PATCH 102/120] Work on FB2ML

---
 src/calibre/ebooks/fb2/fb2ml.py | 88 ++++++++++++++++++++-------------
 1 file changed, 54 insertions(+), 34 deletions(-)

diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py
index 31b0d8f0a2..d991c0072b 100644
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@@ -10,6 +10,7 @@ Transform OEB content into FB2 markup
 
 import cStringIO
 from base64 import b64encode
+import re
 
 try:
     from PIL import Image
@@ -30,14 +31,15 @@ TAG_MAP = {
     'i' : 'emphasis',
     'p' : 'p',
     'li' : 'p',
-    'br' : 'p',
+    'div': 'p',
 }
 
-TAG_SPACE = [
-    'div',
+TAG_FORCE_P = [
     'br',
 ]
 
+TAG_SPACE = []
+
 TAG_IMAGES = [
     'img',
 ]
@@ -79,8 +81,14 @@ class FB2MLizer(object):
         output.append(self.fb2mlize_images())
         output.append(self.fb2_footer())
         output = ''.join(output).replace(u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk', self.get_toc())
+        output = self.clean_text(output)
         return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode, pretty_print=True)
 
+    def clean_text(self, text):
+        text = re.sub('<p>[ ]*</p>', '', text)
+
+        return text
+
     def fb2_header(self):
         author_first = u''
         author_middle = u''
@@ -124,7 +132,7 @@ class FB2MLizer(object):
         return output
 
     def get_toc(self):
-        toc = [u'']
+        toc = []
         if self.opts.inline_toc:
             self.log.debug('Generating table of contents...')
             toc.append(u'<p>%s</p>' % _('Table of Contents:'))
@@ -136,7 +144,7 @@ class FB2MLizer(object):
         return ''.join(toc)
 
     def get_text(self):
-        text = [u'']
+        text = []
         for item in self.oeb_book.spine:
             self.log.debug('Converting %s to FictionBook2 XML' % item.href)
             stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
@@ -162,7 +170,7 @@ class FB2MLizer(object):
         return '<a id="%s" />' % aid
 
     def fb2mlize_images(self):
-        images = [u'']
+        images = []
         for item in self.oeb_book.manifest:
             if item.media_type in OEB_RASTER_IMAGES:
                 try:
@@ -190,14 +198,15 @@ class FB2MLizer(object):
     def dump_text(self, elem, stylizer, page, tag_stack=[]):
         if not isinstance(elem.tag, basestring) \
            or namespace(elem.tag) != XHTML_NS:
-            return [u'']
+            return []
 
-        fb2_text = [u'']
         style = stylizer.style(elem)
-
         if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
            or style['visibility'] == 'hidden':
-            return [u'']
+            return []
+
+        fb2_text = []
+        tags = []
 
         tag = barename(elem.tag)
 
@@ -221,14 +230,32 @@ class FB2MLizer(object):
                         self.link_hrefs[href] = 'calibre_link-%s' % len(self.link_hrefs.keys())
                     href = self.link_hrefs[href]
                     fb2_text.append('<a xlink:href="#%s">' % href)
-                tag_stack.append('a')
+                tags.append('a')
 
         # Anchor ids
         id_name = elem.get('id')
         if id_name:
             fb2_text.append(self.get_anchor(page, id_name))
 
-        if tag in TAG_TITLE:
+        if tag in TAG_FORCE_P:
+            if 'p' in tag_stack+tags:
+                # Close all up to p. Close p. Reopen all closed tags including p.
+                all_tags = tag_stack+tags
+                closed_tags = []
+                all_tags.reverse()
+                for t in all_tags:
+                    fb2_text.append('</%s>' % t)
+                    closed_tags.append(t)
+                    if t == 'p':
+                        break
+                closed_tags.reverse()
+                for t in closed_tags:
+                    fb2_text.append('<%s>' % t)
+            else:
+                fb2_text.append('<p>')
+                tags.append('p')
+
+        '''if tag in TAG_TITLE:
             if 'p' in tag_stack:
                 ctag = []
                 ctag.append(tag_stack.pop())
@@ -237,42 +264,35 @@ class FB2MLizer(object):
                 fb2_text += self.close_tags(ctag)
             fb2_text.append('</section><section><title><p>')
             tag_stack.append('title')
-            tag_stack.append('p')
+            tag_stack.append('p')'''
 
         fb2_tag = TAG_MAP.get(tag, None)
-        if fb2_tag:
-            if fb2_tag in tag_stack:
-                tag_stack.reverse()
-                tag_stack.remove(fb2_tag)
-                tag_stack.reverse()
-                fb2_text.append('</%s>' % fb2_tag)
+        if fb2_tag and fb2_tag not in tag_stack+tags:
             fb2_text.append('<%s>' % fb2_tag)
-            tag_stack.append(fb2_tag)
+            tags.append(fb2_tag)
 
         # Processes style information
         for s in STYLES:
             style_tag = s[1].get(style[s[0]], None)
-            if style_tag:
+            if style_tag and style_tag not in tag_stack+tags:
                 fb2_text.append('<%s>' % style_tag)
-                tag_stack.append(style_tag)
+                tags.append(style_tag)
 
         if tag in TAG_SPACE:
-            if not fb2_text or fb2_text[-1] != ' ':
+            if not fb2_text or fb2_text[-1] != ' ' or not fb2_text[-1].endswith(' '):
                 fb2_text.append(' ')
 
         if hasattr(elem, 'text') and elem.text:
-            if 'p' not in tag_stack:
+            if 'p' not in tag_stack+tags:
                 fb2_text.append('<p>%s</p>' % prepare_string_for_xml(elem.text))
             else:
                 fb2_text.append(prepare_string_for_xml(elem.text))
 
         for item in elem:
-            fb2_text += self.dump_text(item, stylizer, page, tag_stack)
+            fb2_text += self.dump_text(item, stylizer, page, tag_stack+tags)
 
-        close_tag_list = []
-        for i in range(0, len(tag_stack)):
-            close_tag_list.insert(0, tag_stack.pop())
-        fb2_text += self.close_tags(close_tag_list)
+        tags.reverse()
+        fb2_text += self.close_tags(tags)
 
         if hasattr(elem, 'tail') and elem.tail:
             if 'p' not in tag_stack:
@@ -280,12 +300,12 @@ class FB2MLizer(object):
             else:
                 fb2_text.append(prepare_string_for_xml(elem.tail))
 
+        #print fb2_text
         return fb2_text
 
     def close_tags(self, tags):
-        fb2_text = [u'']
-        for i in range(0, len(tags)):
-            fb2_tag = tags.pop()
-            fb2_text.append('</%s>' % fb2_tag)
+        text = []
+        for tag in tags:
+            text.append('</%s>' % tag)
 
-        return fb2_text
+        return text

From ca89801730d9b736ef8b4baa6aa5e9094385739b Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 12 Dec 2009 20:21:43 -0500
Subject: [PATCH 103/120] More FB2 work.

---
 src/calibre/ebooks/fb2/fb2ml.py | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py
index d991c0072b..b454d7c7ab 100644
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@@ -48,10 +48,6 @@ TAG_LINKS = [
     'a',
 ]
 
-TAG_TITLE = [
-    'h1',
-]
-
 STYLES = [
     ('font-weight', {'bold'   : 'strong', 'bolder' : 'strong'}),
     ('font-style', {'italic' : 'emphasis'}),
@@ -255,17 +251,6 @@ class FB2MLizer(object):
                 fb2_text.append('<p>')
                 tags.append('p')
 
-        '''if tag in TAG_TITLE:
-            if 'p' in tag_stack:
-                ctag = []
-                ctag.append(tag_stack.pop())
-                while ctag[-1] != 'p':
-                    ctag.append(tag_stack.pop())
-                fb2_text += self.close_tags(ctag)
-            fb2_text.append('</section><section><title><p>')
-            tag_stack.append('title')
-            tag_stack.append('p')'''
-
         fb2_tag = TAG_MAP.get(tag, None)
         if fb2_tag and fb2_tag not in tag_stack+tags:
             fb2_text.append('<%s>' % fb2_tag)

From 3216eccce77d792da3ab980cd44f43b23fb2dde5 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 12 Dec 2009 21:03:07 -0500
Subject: [PATCH 104/120] Add Todo to FB2MLizer.

---
 src/calibre/ebooks/fb2/fb2ml.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py
index b454d7c7ab..1c0e5c10be 100644
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@@ -54,6 +54,13 @@ STYLES = [
 ]
 
 class FB2MLizer(object):
+    '''
+    Todo: * Ensure all style tags are inside of the p tags.
+          * Include more FB2 specific tags in the conversion.
+          * Handle reopening of a tag properly.
+          * Figure out some way to turn oeb_book.toc items into <section><title>
+            <p> to allow for readers to generate toc from the document.
+    '''
 
     def __init__(self, log):
         self.log = log
@@ -285,7 +292,6 @@ class FB2MLizer(object):
             else:
                 fb2_text.append(prepare_string_for_xml(elem.tail))
 
-        #print fb2_text
         return fb2_text
 
     def close_tags(self, tags):

From 2d2ec5fb51feded1e81e8dcf9214c1e075068387 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 12 Dec 2009 22:30:16 -0500
Subject: [PATCH 105/120] Support Ganaxa Ger2 ereader.

---
 src/calibre/customize/builtins.py      |  5 +++--
 src/calibre/devices/cybookg3/driver.py | 16 ++++++++++++++++
 src/calibre/devices/eb600/driver.py    | 17 ++++++++++++++++-
 3 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 25a5fd0910..1067d72357 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -404,7 +404,7 @@ from calibre.devices.bebook.driver import BEBOOK, BEBOOK_MINI
 from calibre.devices.blackberry.driver import BLACKBERRY
 from calibre.devices.cybookg3.driver import CYBOOKG3, CYBOOK_OPUS
 from calibre.devices.eb600.driver import EB600, COOL_ER, SHINEBOOK, \
-                POCKETBOOK360
+                POCKETBOOK360, GER2
 from calibre.devices.iliad.driver import ILIAD
 from calibre.devices.irexdr.driver import IREXDR1000
 from calibre.devices.jetbook.driver import JETBOOK
@@ -476,7 +476,8 @@ plugins += [
     ESLICK,
     NUUT2,
     IRIVER_STORY,
-    POCKETBOOK360
+    POCKETBOOK360,
+    GER2,
 ]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                         x.__name__.endswith('MetadataReader')]
diff --git a/src/calibre/devices/cybookg3/driver.py b/src/calibre/devices/cybookg3/driver.py
index 6b5e5ff4ed..54f7f93579 100644
--- a/src/calibre/devices/cybookg3/driver.py
+++ b/src/calibre/devices/cybookg3/driver.py
@@ -78,6 +78,14 @@ class CYBOOKG3(USBMS):
 
         return zip(paths, cycle([on_card]))
 
+    @classmethod
+    def can_handle(cls, device_info, debug=False):
+        USBMS.can_handle(device_info, debug)
+        if islinux:
+            if device_info[3] == 'Bookeen' and device_info[4] == 'Cybook Gen3':
+                return True
+        return False
+
 
 class CYBOOK_OPUS(CYBOOKG3):
 
@@ -103,3 +111,11 @@ class CYBOOK_OPUS(CYBOOKG3):
     EBOOK_DIR_MAIN = 'eBooks'
     EBOOK_DIR_CARD_A = 'eBooks'
     SUPPORTS_SUB_DIRS = True
+
+    @classmethod
+    def can_handle(cls, device_info, debug=False):
+        USBMS.can_handle(device_info, debug)
+        if islinux:
+            if device_info[3] == 'Bookeen':
+                return True
+        return False
diff --git a/src/calibre/devices/eb600/driver.py b/src/calibre/devices/eb600/driver.py
index 1e36775bb2..e0c031f30c 100644
--- a/src/calibre/devices/eb600/driver.py
+++ b/src/calibre/devices/eb600/driver.py
@@ -21,7 +21,7 @@ class EB600(USBMS):
 
     name           = 'Netronix EB600 Device Interface'
     description    = _('Communicate with the EB600 eBook reader.')
-    author         = _('Kovid Goyal')
+    author         = 'Kovid Goyal'
     supported_platforms = ['windows', 'osx', 'linux']
 
     # Ordered list of supported formats
@@ -97,3 +97,18 @@ class POCKETBOOK360(EB600):
     OSX_MAIN_MEM   = 'Philips Mass Storge Media'
     OSX_CARD_A_MEM = 'Philips Mass Storge Media'
 
+class GER2(EB600):
+
+    name = 'Ganaxa GeR2 Device Interface'
+    gui_name = 'Ganaxa GeR2'
+    supported_platforms = ['windows']
+
+    FORMATS = ['pdf']
+
+    VENDOR_ID   = [0xbda]
+    PRODUCT_ID  = [0x703]
+    BCD         = [0x132]
+
+    VENDOR_NAME = 'GANAXA'
+    WINDOWS_MAIN_MEN = 'GER2_________-FD'
+    WINDOWS_CARD_A_MEM = 'GER2_________-SD'

From eb6fd6a3b1e3edc5a3e75847ef1a5fd7e73eefe2 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 13 Dec 2009 09:25:14 -0500
Subject: [PATCH 106/120] Add missing import and supported OS.

---
 src/calibre/devices/cybookg3/driver.py | 1 +
 src/calibre/devices/eb600/driver.py    | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/calibre/devices/cybookg3/driver.py b/src/calibre/devices/cybookg3/driver.py
index 54f7f93579..82429cdffa 100644
--- a/src/calibre/devices/cybookg3/driver.py
+++ b/src/calibre/devices/cybookg3/driver.py
@@ -11,6 +11,7 @@ Device driver for Bookeen's Cybook Gen 3
 import os
 from itertools import cycle
 
+from calibre import islinux
 from calibre.devices.usbms.driver import USBMS
 import calibre.devices.cybookg3.t2b as t2b
 
diff --git a/src/calibre/devices/eb600/driver.py b/src/calibre/devices/eb600/driver.py
index e0c031f30c..0963292b2d 100644
--- a/src/calibre/devices/eb600/driver.py
+++ b/src/calibre/devices/eb600/driver.py
@@ -101,7 +101,7 @@ class GER2(EB600):
 
     name = 'Ganaxa GeR2 Device Interface'
     gui_name = 'Ganaxa GeR2'
-    supported_platforms = ['windows']
+    supported_platforms = ['windows', 'linux']
 
     FORMATS = ['pdf']
 

From 65e6e04d65e9ab014d7649aa40c954f8fdf94651 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 13 Dec 2009 09:38:29 -0500
Subject: [PATCH 107/120] Add support for the Nokia 770 internet tablet.

---
 src/calibre/customize/builtins.py      |  2 ++
 src/calibre/devices/cybookg3/driver.py |  2 +-
 src/calibre/devices/nokia/__init__.py  |  0
 src/calibre/devices/nokia/driver.py    | 35 ++++++++++++++++++++++++++
 4 files changed, 38 insertions(+), 1 deletion(-)
 create mode 100644 src/calibre/devices/nokia/__init__.py
 create mode 100644 src/calibre/devices/nokia/driver.py

diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 1067d72357..8f21d86b74 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -414,6 +414,7 @@ from calibre.devices.prs500.driver import PRS500
 from calibre.devices.prs505.driver import PRS505
 from calibre.devices.prs700.driver import PRS700
 from calibre.devices.android.driver import ANDROID
+from calibre.devices.nokia.driver import N770
 from calibre.devices.eslick.driver import ESLICK
 from calibre.devices.nuut2.driver import NUUT2
 from calibre.devices.iriver.driver import IRIVER_STORY
@@ -470,6 +471,7 @@ plugins += [
     PRS700,
     PRS500,
     ANDROID,
+    N770,
     CYBOOK_OPUS,
     COOL_ER,
     SHINEBOOK,
diff --git a/src/calibre/devices/cybookg3/driver.py b/src/calibre/devices/cybookg3/driver.py
index 82429cdffa..f299fc30d6 100644
--- a/src/calibre/devices/cybookg3/driver.py
+++ b/src/calibre/devices/cybookg3/driver.py
@@ -20,7 +20,7 @@ class CYBOOKG3(USBMS):
     name           = 'Cybook Gen 3 Device Interface'
     gui_name       = 'Cybook Gen 3'
     description    = _('Communicate with the Cybook Gen 3 eBook reader.')
-    author         = _('John Schember')
+    author         = 'John Schember'
     supported_platforms = ['windows', 'osx', 'linux']
 
     # Ordered list of supported formats
diff --git a/src/calibre/devices/nokia/__init__.py b/src/calibre/devices/nokia/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/calibre/devices/nokia/driver.py b/src/calibre/devices/nokia/driver.py
new file mode 100644
index 0000000000..5f6191f751
--- /dev/null
+++ b/src/calibre/devices/nokia/driver.py
@@ -0,0 +1,35 @@
+# -*- coding: utf-8 -*-
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john at nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+'''
+Device driver for Nokia's internet tablet devices
+'''
+
+from calibre.devices.usbms.driver import USBMS
+
+class N770(USBMS):
+
+    name           = 'Nokia 770 Device Interface'
+    gui_name       = 'Nokia 770'
+    description    = _('Communicate with the Nokia Nokia 770 internet tablet.')
+    author         = 'John Schember'
+    supported_platforms = ['windows', 'linux']
+
+    # Ordered list of supported formats
+    FORMATS     = ['mobi', 'prc', 'epub', 'html', 'zip', 'fb2', 'chm', 'pdb',
+        'tcr', 'txt', 'rtf']
+
+    VENDOR_ID   = [0x111]
+    PRODUCT_ID  = [0x1af]
+    BCD         = [0x134]
+
+    VENDOR_NAME      = 'NOKIA'
+    WINDOWS_MAIN_MEM = '770'
+
+    MAIN_MEMORY_VOLUME_LABEL  = 'N770 Main Memory'
+
+    EBOOK_DIR_MAIN = 'My Ebooks'
+    SUPPORTS_SUB_DIRS = True

From 288b64529c4caf5472d532fedf3abaf04c86ec35 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Tue, 15 Dec 2009 05:55:36 -0500
Subject: [PATCH 108/120] Fix can_handle for Cybook Gen 3 and Opus.

---
 src/calibre/devices/cybookg3/driver.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/calibre/devices/cybookg3/driver.py b/src/calibre/devices/cybookg3/driver.py
index f299fc30d6..77deb6efa5 100644
--- a/src/calibre/devices/cybookg3/driver.py
+++ b/src/calibre/devices/cybookg3/driver.py
@@ -85,7 +85,8 @@ class CYBOOKG3(USBMS):
         if islinux:
             if device_info[3] == 'Bookeen' and device_info[4] == 'Cybook Gen3':
                 return True
-        return False
+            return False
+        return True
 
 
 class CYBOOK_OPUS(CYBOOKG3):
@@ -119,4 +120,5 @@ class CYBOOK_OPUS(CYBOOKG3):
         if islinux:
             if device_info[3] == 'Bookeen':
                 return True
-        return False
+            return False
+        return True

From 35fc570d2481347d27337557e40a6bd7d268e6e7 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Tue, 15 Dec 2009 18:11:36 -0500
Subject: [PATCH 109/120] PML Output: Generate \CX Tags as chapter anchors. PDB
 eReader Output: Use \CX tags to generate chapter index.

---
 src/calibre/ebooks/pdb/ereader/writer.py | 52 +++++++++---------------
 src/calibre/ebooks/pml/pmlml.py          | 29 ++++++++++---
 2 files changed, 43 insertions(+), 38 deletions(-)

diff --git a/src/calibre/ebooks/pdb/ereader/writer.py b/src/calibre/ebooks/pdb/ereader/writer.py
index 263f6964bf..a379899af5 100644
--- a/src/calibre/ebooks/pdb/ereader/writer.py
+++ b/src/calibre/ebooks/pdb/ereader/writer.py
@@ -42,8 +42,8 @@ class Writer(FormatWriter):
         pml = unicode(pmlmlizer.extract_content(oeb_book, self.opts)).encode('cp1252', 'replace')
 
         text, text_sizes = self._text(pml)
-        chapter_index = self._chapter_index(pml)
-        link_index = self._link_index(pml)
+        chapter_index = self._index_item(r'(?s)\\C(?P<val>\d)="(?P<text>.+?)"', pml)
+        link_index = self._index_item(r'(?s)\\Q="(?P<text>.+?)"', pml)
         images = self._images(oeb_book.manifest, pmlmlizer.image_hrefs)
         metadata = [self._metadata(metadata)]
         hr = [self._header_record(len(text), len(chapter_index), len(link_index), len(images))]
@@ -101,38 +101,24 @@ class Writer(FormatWriter):
 
         return pml_pages, text_sizes
 
-    def _index_item(self, mo):
-        index = ''
-        if 'text' in mo.groupdict().keys():
-            index += struct.pack('>L', mo.start())
-            text = mo.group('text')
-            # Strip all PML tags from text
-            text = re.sub(r'\\U[0-9a-z]{4}', '', text)
-            text = re.sub(r'\\a\d{3}', '', text)
-            text = re.sub(r'\\.', '', text)
-            # Add appropriate spacing to denote the various levels of headings
-            if 'val' in mo.groupdict().keys():
-                text = '%s%s' % (' ' * 4 * int(mo.group('val')), text)
-            index += text
-            index += '\x00'
-        return index
-
-    def _chapter_index(self, pml):
-        chapter_marks = [
-            r'(?s)\\x(?P<text>.+?)\\x',
-            r'(?s)\\X(?P<val>[0-4])(?P<text>.*?)\\X[0-4]',
-            r'(?s)\\C(?P<val>\d)="(?P<text>.+?)"',
-        ]
+    def _index_item(self, regex, pml):
         index = []
-        for chapter_mark in chapter_marks:
-            for mo in re.finditer(chapter_mark, pml):
-                index.append(self._index_item(mo))
-        return index
-
-    def _link_index(self, pml):
-        index = []
-        for mo in re.finditer(r'(?s)\\Q="(?P<text>.+?)"', pml):
-            index.append(self._index_item(mo))
+        for mo in re.finditer(regex, pml):
+            item = ''
+            if 'text' in mo.groupdict().keys():
+                item += struct.pack('>L', mo.start())
+                text = mo.group('text')
+                # Strip all PML tags from text
+                text = re.sub(r'\\U[0-9a-z]{4}', '', text)
+                text = re.sub(r'\\a\d{3}', '', text)
+                text = re.sub(r'\\.', '', text)
+                # Add appropriate spacing to denote the various levels of headings
+                if 'val' in mo.groupdict().keys():
+                    text = '%s%s' % (' ' * 4 * int(mo.group('val')), text)
+                item += text
+                item += '\x00'
+            if item:
+                index.append(item)
         return index
 
     def _images(self, manifest, image_hrefs):
diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py
index b23cd40813..ccce95fce6 100644
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@@ -79,6 +79,16 @@ class PMLMLizer(object):
         self.log.info('Converting XHTML to PML markup...')
         self.oeb_book = oeb_book
         self.opts = opts
+
+        # This is used for adding \CX tags chapter markers. This is separate
+        # from the optional inline toc.
+        self.toc = {}
+        for item in oeb_book.toc:
+            page, mid, id = item.href.partition('#')
+            if not self.toc.get(page, None):
+                self.toc[page] = {}
+            self.toc[page][id] = item.title
+
         return self.pmlmlize_spine()
 
     def pmlmlize_spine(self):
@@ -107,7 +117,11 @@ class PMLMLizer(object):
         return output
 
     def get_toc(self):
-        toc = [u'']
+        '''
+        Generation of inline TOC
+        '''
+
+        toc = []
         if self.opts.inline_toc:
             self.log.debug('Generating table of contents...')
             toc.append(u'\\X0%s\\X0\n\n' % _('Table of Contents:'))
@@ -177,14 +191,14 @@ class PMLMLizer(object):
     def dump_text(self, elem, stylizer, page, tag_stack=[]):
         if not isinstance(elem.tag, basestring) \
            or namespace(elem.tag) != XHTML_NS:
-            return [u'']
+            return []
 
-        text = [u'']
+        text = []
         style = stylizer.style(elem)
 
         if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
            or style['visibility'] == 'hidden':
-            return [u'']
+            return []
 
         tag = barename(elem.tag)
         tag_count = 0
@@ -213,6 +227,12 @@ class PMLMLizer(object):
             else:
                 w += '="50%"'
             text.append(w)
+        toc_id = elem.attrib.get('id', None)
+        if toc_id:
+            if self.toc.get(page.href, None):
+                toc_title = self.toc[page.href].get(toc_id, None)
+                if toc_title:
+                    text.append('\\C1="%s"' % toc_title)
 
         # Process style information that needs holds a single tag
         # Commented out because every page in an OEB book starts with this style
@@ -287,4 +307,3 @@ class PMLMLizer(object):
             if tag != 'block':
                 text.append('\\%s' % tag)
         return text
-

From 549d2f00d429aff132d35d2e80d64fbcce74abb2 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Tue, 15 Dec 2009 18:28:32 -0500
Subject: [PATCH 110/120] Add N810 driver. Fix device ids for GeR2 and N770.

---
 src/calibre/customize/builtins.py   |  3 ++-
 src/calibre/devices/eb600/driver.py |  4 ++--
 src/calibre/devices/nokia/driver.py | 18 +++++++++++++++---
 3 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 1ea76a2189..ed942b5a9a 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -413,7 +413,7 @@ from calibre.devices.nook.driver import NOOK
 from calibre.devices.prs500.driver import PRS500
 from calibre.devices.prs505.driver import PRS505, PRS700
 from calibre.devices.android.driver import ANDROID
-from calibre.devices.nokia.driver import N770
+from calibre.devices.nokia.driver import N770, N810
 from calibre.devices.eslick.driver import ESLICK
 from calibre.devices.nuut2.driver import NUUT2
 from calibre.devices.iriver.driver import IRIVER_STORY
@@ -470,6 +470,7 @@ plugins += [
     PRS500,
     ANDROID,
     N770,
+    N810,
     CYBOOK_OPUS,
     COOL_ER,
     ESLICK,
diff --git a/src/calibre/devices/eb600/driver.py b/src/calibre/devices/eb600/driver.py
index 18e86fb238..07217ac78d 100644
--- a/src/calibre/devices/eb600/driver.py
+++ b/src/calibre/devices/eb600/driver.py
@@ -104,8 +104,8 @@ class GER2(EB600):
 
     FORMATS = ['pdf']
 
-    VENDOR_ID   = [0xbda]
-    PRODUCT_ID  = [0x703]
+    VENDOR_ID   = [0x3034]
+    PRODUCT_ID  = [0x1795]
     BCD         = [0x132]
 
     VENDOR_NAME = 'GANAXA'
diff --git a/src/calibre/devices/nokia/driver.py b/src/calibre/devices/nokia/driver.py
index e6944de4d9..7bd1dbb28d 100644
--- a/src/calibre/devices/nokia/driver.py
+++ b/src/calibre/devices/nokia/driver.py
@@ -22,9 +22,9 @@ class N770(USBMS):
     FORMATS     = ['mobi', 'prc', 'epub', 'html', 'zip', 'fb2', 'chm', 'pdb',
         'tcr', 'txt', 'rtf']
 
-    VENDOR_ID   = [0x111]
-    PRODUCT_ID  = [0x1af]
-    BCD         = [0x134]
+    VENDOR_ID   = [0x421]
+    PRODUCT_ID  = [0x431]
+    BCD         = [0x308]
 
     VENDOR_NAME      = 'NOKIA'
     WINDOWS_MAIN_MEM = '770'
@@ -33,3 +33,15 @@ class N770(USBMS):
 
     EBOOK_DIR_MAIN = 'My Ebooks'
     SUPPORTS_SUB_DIRS = True
+
+class N810(N770):
+    name           = 'Nokia 810 Device Interface'
+    gui_name       = 'Nokia 810'
+    description    = _('Communicate with the Nokia Nokia 810 internet tablet.')
+
+    PRODUCT_ID = [0x96]
+    BCD        = [0x316]
+
+    WINDOWS_MAIN_MEM = 'N810'
+
+    MAIN_MEMORY_VOLUME_LABEL = 'N810 Main Memory'

From b62e3b03b0fb5d2f75d0eb50251c154743b7fecf Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Wed, 16 Dec 2009 07:02:12 -0500
Subject: [PATCH 111/120] PML Output: Change \C1 to \C0.

---
 src/calibre/ebooks/pml/pmlml.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py
index ccce95fce6..c8acf2487e 100644
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@@ -232,7 +232,7 @@ class PMLMLizer(object):
             if self.toc.get(page.href, None):
                 toc_title = self.toc[page.href].get(toc_id, None)
                 if toc_title:
-                    text.append('\\C1="%s"' % toc_title)
+                    text.append('\\C0="%s"' % toc_title)
 
         # Process style information that needs holds a single tag
         # Commented out because every page in an OEB book starts with this style

From 81c8e661ffbf47fbd5e7a3ff59d0ebc1722c2314 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Fri, 18 Dec 2009 06:01:45 -0500
Subject: [PATCH 112/120] Update eSlick formats for 2.0 firmware.

---
 src/calibre/devices/eslick/driver.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/devices/eslick/driver.py b/src/calibre/devices/eslick/driver.py
index e891baa50b..5bdb1c04d2 100644
--- a/src/calibre/devices/eslick/driver.py
+++ b/src/calibre/devices/eslick/driver.py
@@ -18,7 +18,7 @@ class ESLICK(USBMS):
     supported_platforms = ['windows', 'osx', 'linux']
 
     # Ordered list of supported formats
-    FORMATS     = ['pdf', 'txt']
+    FORMATS     = ['epub', 'pdb', 'pdf', 'txt']
 
     VENDOR_ID   = [0x04cc]
     PRODUCT_ID  = [0x1a64]

From d877cd91a7457a66e88f7a9b6b39b743432f8d92 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Fri, 18 Dec 2009 06:06:10 -0500
Subject: [PATCH 113/120] Don't translate author name.

---
 src/calibre/devices/eslick/driver.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/devices/eslick/driver.py b/src/calibre/devices/eslick/driver.py
index 5bdb1c04d2..4854c5c730 100644
--- a/src/calibre/devices/eslick/driver.py
+++ b/src/calibre/devices/eslick/driver.py
@@ -14,7 +14,7 @@ class ESLICK(USBMS):
     name           = 'ESlick Device Interface'
     gui_name       = 'Foxit ESlick'
     description    = _('Communicate with the ESlick eBook reader.')
-    author         = _('Kovid Goyal')
+    author         = 'Kovid Goyal'
     supported_platforms = ['windows', 'osx', 'linux']
 
     # Ordered list of supported formats

From 11427001a3cca9aacbc0fb5950a707d9667ab290 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Fri, 18 Dec 2009 19:05:59 -0500
Subject: [PATCH 114/120] USBMS: Move windows sort drives before checks for
 main. Should fix a bug detecting iriver story.

---
 src/calibre/devices/iriver/driver.py | 2 +-
 src/calibre/devices/usbms/device.py  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/devices/iriver/driver.py b/src/calibre/devices/iriver/driver.py
index 030fe8f6bb..f8e7d41600 100644
--- a/src/calibre/devices/iriver/driver.py
+++ b/src/calibre/devices/iriver/driver.py
@@ -13,7 +13,7 @@ class IRIVER_STORY(USBMS):
     name           = 'Iriver Story Device Interface'
     gui_name       = 'Iriver Story'
     description    = _('Communicate with the Iriver Story reader.')
-    author         = _('Kovid Goyal')
+    author         = 'Kovid Goyal'
     supported_platforms = ['windows', 'osx', 'linux']
 
     # Ordered list of supported formats
diff --git a/src/calibre/devices/usbms/device.py b/src/calibre/devices/usbms/device.py
index 7cd702dd96..5effa0a8c6 100644
--- a/src/calibre/devices/usbms/device.py
+++ b/src/calibre/devices/usbms/device.py
@@ -302,13 +302,13 @@ class Device(DeviceConfig, DevicePlugin):
             drives['main'] = drives.pop('carda')
 
         drives = self.windows_open_callback(drives)
+        drives = self.windows_sort_drives(drives)
 
         if drives.get('main', None) is None:
             raise DeviceError(
                 _('Unable to detect the %s disk drive. Try rebooting.') %
                 self.__class__.__name__)
 
-        drives = self.windows_sort_drives(drives)
         self._main_prefix = drives.get('main')
         self._card_a_prefix = drives.get('carda', None)
         self._card_b_prefix = drives.get('cardb', None)

From 9ee3e926c0cf29189194fd4c3edb74c91f5c2309 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Fri, 18 Dec 2009 23:35:19 -0500
Subject: [PATCH 115/120] Jetbook driver: add FB2 format.

---
 src/calibre/devices/jetbook/driver.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/devices/jetbook/driver.py b/src/calibre/devices/jetbook/driver.py
index b1e0b17fb9..a55e76155f 100644
--- a/src/calibre/devices/jetbook/driver.py
+++ b/src/calibre/devices/jetbook/driver.py
@@ -20,13 +20,13 @@ from calibre.ebooks.metadata import authors_to_string, string_to_authors
 class JETBOOK(USBMS):
     name           = 'Ectaco JetBook Device Interface'
     description    = _('Communicate with the JetBook eBook reader.')
-    author         = _('James Ralston')
+    author         = 'James Ralston'
     supported_platforms = ['windows', 'osx', 'linux']
 
 
     # Ordered list of supported formats
     # Be sure these have an entry in calibre.devices.mime
-    FORMATS     = ['epub', 'mobi', 'prc', 'txt', 'rtf', 'pdf']
+    FORMATS     = ['epub', 'mobi', 'prc', 'fb2', 'txt', 'rtf', 'pdf']
 
     VENDOR_ID   = [0x0525]
     PRODUCT_ID  = [0xa4a5]

From 86afb92057ee61953c1ea9cbb5e4cf7c5668962b Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 19 Dec 2009 15:58:51 -0500
Subject: [PATCH 116/120] Realign to trunk.

---
 session.vim                                 |  7 +++
 src/calibre/devices/cybookg3/driver.py      | 15 +++--
 src/calibre/devices/iriver/driver.py        |  2 +-
 src/calibre/devices/jetbook/driver.py       | 25 +++++----
 src/calibre/devices/prs500/books.py         | 14 +++--
 src/calibre/devices/prs500/driver.py        |  6 +-
 src/calibre/devices/prs505/books.py         | 15 +++--
 src/calibre/devices/prs505/driver.py        | 20 +++----
 src/calibre/devices/usbms/device.py         | 62 ++++++++++++++++-----
 src/calibre/devices/usbms/driver.py         | 14 ++---
 src/calibre/gui2/device.py                  | 60 ++++++++++++--------
 src/calibre/gui2/dialogs/config/add_save.ui |  2 +-
 src/calibre/manual/conversion.rst           |  1 +
 13 files changed, 152 insertions(+), 91 deletions(-)

diff --git a/session.vim b/session.vim
index 56705f9528..6b965cff2f 100644
--- a/session.vim
+++ b/session.vim
@@ -13,4 +13,11 @@ base_dir = os.path.join(src_dir, 'calibre')
 
 vipy.session.initialize(project_name='calibre', src_dir=src_dir,
             project_dir=project_dir, base_dir=base_dir)
+
+def recipe_title_callback(raw):
+    return eval(raw.decode('utf-8'))
+
+vipy.session.add_content_browser('.r', ',r', 'Recipe',
+    vipy.session.glob_based_iterator(os.path.join(project_dir, 'resources', 'recipes', '*.recipe')),
+    vipy.session.regexp_based_matcher(r'title\s*=\s*(?P<title>.+)', 'title', recipe_title_callback))
 EOFPY
diff --git a/src/calibre/devices/cybookg3/driver.py b/src/calibre/devices/cybookg3/driver.py
index e1d8aaa0c7..04e5e7012c 100644
--- a/src/calibre/devices/cybookg3/driver.py
+++ b/src/calibre/devices/cybookg3/driver.py
@@ -47,25 +47,24 @@ class CYBOOKG3(USBMS):
     DELETE_EXTS = ['.mbp', '.dat', '_6090.t2b']
     SUPPORTS_SUB_DIRS = True
 
-    def upload_books(self, files, metadatas, ids, on_card=None,
-                     end_session=True):
+    def upload_books(self, files, names, on_card=None, end_session=True,
+                     metadata=None):
 
         path = self._sanity_check(on_card, files)
 
         paths = []
-        metadatas = iter(metadatas)
-        ids = iter(ids)
+        names = iter(names)
+        metadata = iter(metadata)
 
         for i, infile in enumerate(files):
-            mdata, id = metadatas.next(), ids.next()
-            ext = os.path.splitext(infile)[1]
-            filepath = self.create_upload_path(path, mdata, ext, id)
+            mdata, fname = metadata.next(), names.next()
+            filepath = self.create_upload_path(path, mdata, fname)
             paths.append(filepath)
 
             self.put_file(infile, filepath, replace_file=True)
 
             coverdata = None
-            cover = mdata.cover
+            cover = mdata.get('cover', None)
             if cover:
                 coverdata = cover[2]
 
diff --git a/src/calibre/devices/iriver/driver.py b/src/calibre/devices/iriver/driver.py
index f8e7d41600..7373996213 100644
--- a/src/calibre/devices/iriver/driver.py
+++ b/src/calibre/devices/iriver/driver.py
@@ -35,7 +35,7 @@ class IRIVER_STORY(USBMS):
 
     SUPPORTS_SUB_DIRS = True
 
-    def windows_sort_drives(self, drives):
+    def windows_open_callback(self, drives):
         main = drives.get('main', None)
         card = drives.get('carda', None)
         if card and main and card < main:
diff --git a/src/calibre/devices/jetbook/driver.py b/src/calibre/devices/jetbook/driver.py
index a55e76155f..6a09c7c345 100644
--- a/src/calibre/devices/jetbook/driver.py
+++ b/src/calibre/devices/jetbook/driver.py
@@ -15,7 +15,7 @@ from itertools import cycle
 
 from calibre.devices.usbms.driver import USBMS
 from calibre.utils.filenames import ascii_filename as sanitize
-from calibre.ebooks.metadata import authors_to_string, string_to_authors
+from calibre.ebooks.metadata import string_to_authors
 
 class JETBOOK(USBMS):
     name           = 'Ectaco JetBook Device Interface'
@@ -50,22 +50,23 @@ class JETBOOK(USBMS):
             r'(?P<authors>.+)#(?P<title>.+)'
             )
 
-    def upload_books(self, files, metadatas, ids, on_card=None,
-                     end_session=True):
-        path = self._sanity_check(on_card, files)
+    def upload_books(self, files, names, on_card=False, end_session=True,
+                    metadata=None):
+
+        base_path = self._sanity_check(on_card, files)
 
         paths = []
-        metadatas = iter(metadatas)
-        ids = iter(ids)
+        names = iter(names)
+        metadata = iter(metadata)
 
         for i, infile in enumerate(files):
-            mdata, id = metadatas.next(), ids.next()
-            ext = os.path.splitext(infile)[1]
-            path = self.create_upload_path(path, mdata, ext, id)
+            mdata, fname = metadata.next(), names.next()
+            path = os.path.dirname(self.create_upload_path(base_path, mdata, fname))
 
-            author = sanitize(authors_to_string(mdata.authors)).replace(' ', '_')
-            title = sanitize(mdata.title).replace(' ', '_')
-            fname = '%s#%s%s' % (author, title, ext)
+            author = sanitize(mdata.get('authors','Unknown')).replace(' ', '_')
+            title = sanitize(mdata.get('title', 'Unknown')).replace(' ', '_')
+            fileext = os.path.splitext(os.path.basename(fname))[1]
+            fname = '%s#%s%s' % (author, title, fileext)
 
             filepath = os.path.join(path, fname)
             paths.append(filepath)
diff --git a/src/calibre/devices/prs500/books.py b/src/calibre/devices/prs500/books.py
index 382dcf135d..5eb8d7f011 100644
--- a/src/calibre/devices/prs500/books.py
+++ b/src/calibre/devices/prs500/books.py
@@ -9,7 +9,6 @@ from base64 import b64decode as decode
 from base64 import b64encode as encode
 import re
 
-from calibre.ebooks.metadata import authors_to_string
 from calibre.devices.interface import BookList as _BookList
 from calibre.devices import strftime, strptime
 
@@ -263,9 +262,9 @@ class BookList(_BookList):
         cid = self.max_id()+1
         sourceid = str(self[0].sourceid) if len(self) else "1"
         attrs = {
-                 "title"  : info.title,
-                 'titleSorter' : sortable_title(info.title),
-                 "author" : authors_to_string(info.authors), \
+                 "title"  : info["title"],
+                 'titleSorter' : sortable_title(info['title']),
+                 "author" : info["authors"] if info['authors'] else 'Unknown', \
                  "page":"0", "part":"0", "scale":"0", \
                  "sourceid":sourceid,  "id":str(cid), "date":"", \
                  "mime":mime, "path":name, "size":str(size)
@@ -274,7 +273,7 @@ class BookList(_BookList):
             node.setAttributeNode(self.document.createAttribute(attr))
             node.setAttribute(attr, attrs[attr])
         try:
-            w, h, data = info.cover
+            w, h, data = info["cover"]
         except TypeError:
             w, h, data = None, None, None
 
@@ -291,7 +290,10 @@ class BookList(_BookList):
         book.datetime = ctime
         self.append(book)
         self.set_next_id(cid+1)
-        self.set_playlists(book.id, info.tags)
+        if self.prefix and info.has_key('tags'): # Playlists only supportted in main memory
+            if info.has_key('tag order'):
+                self.tag_order.update(info['tag order'])
+            self.set_playlists(book.id, info['tags'])
 
 
     def playlist_by_title(self, title):
diff --git a/src/calibre/devices/prs500/driver.py b/src/calibre/devices/prs500/driver.py
index 616a1c387d..8d2c4cc9d4 100644
--- a/src/calibre/devices/prs500/driver.py
+++ b/src/calibre/devices/prs500/driver.py
@@ -867,14 +867,14 @@ class PRS500(DeviceConfig, DevicePlugin):
             self.upload_book_list(booklists[1], end_session=False)
 
     @safe
-    def upload_books(self, files, metadatas, ids, on_card=None,
-                     end_session=True):
+    def upload_books(self, files, names, on_card=False, end_session=True,
+                     metadata=None):
         card = self.card(end_session=False)
         prefix = card + '/' + self.CARD_PATH_PREFIX +'/' if on_card else '/Data/media/books/'
         if on_card and not self._exists(prefix)[0]:
             self.mkdir(prefix[:-1], False)
         paths, ctimes = [], []
-        names = iter([m.title for m in metatdatas])
+        names = iter(names)
         infiles = [file if hasattr(file, 'read') else open(file, 'rb') for file in files]
         for f in infiles: f.seek(0, 2)
         sizes = [f.tell() for f in infiles]
diff --git a/src/calibre/devices/prs505/books.py b/src/calibre/devices/prs505/books.py
index 4b8a952816..6e268e734a 100644
--- a/src/calibre/devices/prs505/books.py
+++ b/src/calibre/devices/prs505/books.py
@@ -8,7 +8,7 @@ import xml.dom.minidom as dom
 from base64 import b64decode as decode
 from base64 import b64encode as encode
 
-from calibre.ebooks.metadata import authors_to_string
+
 from calibre.devices.interface import BookList as _BookList
 from calibre.devices import strftime as _strftime
 from calibre.devices import strptime
@@ -194,9 +194,9 @@ class BookList(_BookList):
         except:
             sourceid = '1'
         attrs = {
-                 "title"  : info.title,
-                 'titleSorter' : sortable_title(info.title),
-                 "author" : authors_to_string(info.authors),
+                 "title"  : info["title"],
+                 'titleSorter' : sortable_title(info['title']),
+                 "author" : info["authors"] if info['authors'] else _('Unknown'),
                  "page":"0", "part":"0", "scale":"0", \
                  "sourceid":sourceid,  "id":str(cid), "date":"", \
                  "mime":mime, "path":name, "size":str(size)
@@ -205,7 +205,7 @@ class BookList(_BookList):
             node.setAttributeNode(self.document.createAttribute(attr))
             node.setAttribute(attr, attrs[attr])
         try:
-            w, h, data = info.cover
+            w, h, data = info["cover"]
         except TypeError:
             w, h, data = None, None, None
 
@@ -221,7 +221,10 @@ class BookList(_BookList):
         book = Book(node, self.mountpath, [], prefix=self.prefix)
         book.datetime = ctime
         self.append(book)
-        self.set_tags(book, info.tags)
+        if info.has_key('tags'):
+            if info.has_key('tag order'):
+                self.tag_order.update(info['tag order'])
+            self.set_tags(book, info['tags'])
 
     def _delete_book(self, node):
         nid = node.getAttribute('id')
diff --git a/src/calibre/devices/prs505/driver.py b/src/calibre/devices/prs505/driver.py
index 17de805756..ab61f76b61 100644
--- a/src/calibre/devices/prs505/driver.py
+++ b/src/calibre/devices/prs505/driver.py
@@ -114,22 +114,20 @@ class PRS505(CLI, Device):
         self.report_progress(1.0, _('Getting list of books on device...'))
         return bl
 
-    def upload_books(self, files, metadatas, ids, on_card=None,
-                     end_session=True):
+    def upload_books(self, files, names, on_card=None, end_session=True,
+                     metadata=None):
 
         path = self._sanity_check(on_card, files)
 
-        paths = []
-        metadatas = iter(metadatas)
-        ids = iter(ids)
-
+        paths, ctimes, sizes = [], [], []
+        names = iter(names)
+        metadata = iter(metadata)
         for i, infile in enumerate(files):
-            mdata, id = metadatas.next(), ids.next()
-            ext = os.path.splitext(infile)[1]
-            filepath = self.create_upload_path(path, mdata, ext, id)
-            paths.append(filepath)
+            mdata, fname = metadata.next(), names.next()
+            filepath = self.create_upload_path(path, mdata, fname)
 
-            self.put_file(infile, filepath, replace_file=True)
+            paths.append(filepath)
+            self.put_file(infile, paths[-1], replace_file=True)
             ctimes.append(os.path.getctime(paths[-1]))
             sizes.append(os.stat(paths[-1]).st_size)
 
diff --git a/src/calibre/devices/usbms/device.py b/src/calibre/devices/usbms/device.py
index 5effa0a8c6..33ba104e38 100644
--- a/src/calibre/devices/usbms/device.py
+++ b/src/calibre/devices/usbms/device.py
@@ -23,7 +23,7 @@ from calibre.devices.interface import DevicePlugin
 from calibre.devices.errors import DeviceError, FreeSpaceError
 from calibre.devices.usbms.deviceconfig import DeviceConfig
 from calibre import iswindows, islinux, isosx, __appname__
-from calibre.utils.filenames import shorten_components_to
+from calibre.utils.filenames import ascii_filename as sanitize, shorten_components_to
 
 class Device(DeviceConfig, DevicePlugin):
 
@@ -295,20 +295,20 @@ class Device(DeviceConfig, DevicePlugin):
 
         # This is typically needed when the device has the same
         # WINDOWS_MAIN_MEM and WINDOWS_CARD_A_MEM in which case
-        # if the devices is connected without a crad, the above
+        # if the devices is connected without a card, the above
         # will incorrectly identify the main mem as carda
         # See for example the driver for the Nook
         if 'main' not in drives and 'carda' in drives:
             drives['main'] = drives.pop('carda')
 
         drives = self.windows_open_callback(drives)
-        drives = self.windows_sort_drives(drives)
 
         if drives.get('main', None) is None:
             raise DeviceError(
                 _('Unable to detect the %s disk drive. Try rebooting.') %
                 self.__class__.__name__)
 
+        drives = self.windows_sort_drives(drives)
         self._main_prefix = drives.get('main')
         self._card_a_prefix = drives.get('carda', None)
         self._card_b_prefix = drives.get('cardb', None)
@@ -739,18 +739,54 @@ class Device(DeviceConfig, DevicePlugin):
             raise FreeSpaceError(_("There is insufficient free space on the storage card"))
         return path
 
-    def create_upload_path(self, root, mdata, ext, id):
-        from calibre.library.save_to_disk import config, get_components
-        opts = config().parse()
-        components = get_components(opts.template, mdata, id, opts.timefmt, 250)
-        components = [str(x) for x in components]
-        components = shorten_components_to(250 - len(root), components)
-        filepath = '%s%s' % (os.path.join(root, *components), ext)
+    def create_upload_path(self, path, mdata, fname):
+        path = os.path.abspath(path)
+        newpath = path
+        extra_components = []
+
+        if self.SUPPORTS_SUB_DIRS and self.settings().use_subdirs:
+            if 'tags' in mdata.keys():
+                for tag in mdata['tags']:
+                    if tag.startswith(_('News')):
+                        extra_components.append('news')
+                        c = sanitize(mdata.get('title', ''))
+                        if c:
+                            extra_components.append(c)
+                        c = sanitize(mdata.get('timestamp', ''))
+                        if c:
+                            extra_components.append(c)
+                        break
+                    elif tag.startswith('/'):
+                        for c in tag.split('/'):
+                            c = sanitize(c)
+                            if not c: continue
+                            extra_components.append(c)
+                        break
+
+            if not extra_components:
+                c = sanitize(mdata.get('authors', _('Unknown')))
+                if c:
+                    extra_components.append(c)
+                c = sanitize(mdata.get('title', _('Unknown')))
+                if c:
+                    extra_components.append(c)
+                    newpath = os.path.join(newpath, c)
+
+        fname = sanitize(fname)
+        extra_components.append(fname)
+        extra_components = [str(x) for x in extra_components]
+        def remove_trailing_periods(x):
+            ans = x
+            while ans.endswith('.'):
+                ans = ans[:-1]
+            if not ans:
+                ans = 'x'
+            return ans
+        extra_components = list(map(remove_trailing_periods, extra_components))
+        components = shorten_components_to(250 - len(path), extra_components)
+        filepath = os.path.join(path, *components)
         filedir = os.path.dirname(filepath)
 
-        if not self.SUPPORTS_SUB_DIRS or not self.settings().use_subdirs:
-            filedir = root
-            filepath = os.path.join(root, os.path.basename(filepath))
 
         if not os.path.exists(filedir):
             os.makedirs(filedir)
diff --git a/src/calibre/devices/usbms/driver.py b/src/calibre/devices/usbms/driver.py
index 1228781579..8d2416511c 100644
--- a/src/calibre/devices/usbms/driver.py
+++ b/src/calibre/devices/usbms/driver.py
@@ -95,19 +95,19 @@ class USBMS(CLI, Device):
 
         return bl
 
-    def upload_books(self, files, metadatas, ids, on_card=None,
-                     end_session=True):
+    def upload_books(self, files, names, on_card=None, end_session=True,
+                     metadata=None):
 
         path = self._sanity_check(on_card, files)
 
         paths = []
-        metadatas = iter(metadatas)
-        ids = iter(ids)
+        names = iter(names)
+        metadata = iter(metadata)
 
         for i, infile in enumerate(files):
-            mdata, id = metadatas.next(), ids.next()
-            ext = os.path.splitext(infile)[1]
-            filepath = self.create_upload_path(path, mdata, ext, id)
+            mdata, fname = metadata.next(), names.next()
+            filepath = self.create_upload_path(path, mdata, fname)
+
             paths.append(filepath)
 
             self.put_file(infile, filepath, replace_file=True)
diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py
index c33e279912..4471f285dc 100644
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@@ -223,17 +223,18 @@ class DeviceManager(Thread):
         return self.create_job(self._sync_booklists, done, args=[booklists],
                         description=_('Send metadata to device'))
 
-    def _upload_books(self, files, metadata, ids, on_card=None):
+    def _upload_books(self, files, names, on_card=None, metadata=None):
         '''Upload books to device: '''
-        return self.device.upload_books(files, metadata, ids, on_card,
-                                        end_session=False)
+        return self.device.upload_books(files, names, on_card,
+                                        metadata=metadata, end_session=False)
 
-    def upload_books(self, done, files, metadata, ids, on_card=None, titles=None):
-        desc = _('Upload %d books to device')%len(files)
+    def upload_books(self, done, files, names, on_card=None, titles=None,
+                     metadata=None):
+        desc = _('Upload %d books to device')%len(names)
         if titles:
             desc += u':' + u', '.join(titles)
-        return self.create_job(self._upload_books, done, args=[files, metadata, ids],
-                kwargs={'on_card':on_card}, description=desc)
+        return self.create_job(self._upload_books, done, args=[files, names],
+                kwargs={'on_card':on_card,'metadata':metadata}, description=desc)
 
     def add_books_to_metadata(self, locations, metadata, booklists):
         self.device.add_books_to_metadata(locations, metadata, booklists)
@@ -707,18 +708,18 @@ class DeviceGUI(object):
                 dynamic.set('news_to_be_synced', set([]))
                 return
             metadata = self.library_view.model().get_metadata(ids,
-                    rows_are_ids=True, full_metadata=True)[1]
+                    rows_are_ids=True)
             names = []
             for mi in metadata:
-                prefix = ascii_filename(mi.title)
+                prefix = ascii_filename(mi['title'])
                 if not isinstance(prefix, unicode):
                     prefix = prefix.decode(preferred_encoding, 'replace')
                 prefix = ascii_filename(prefix)
                 names.append('%s_%d%s'%(prefix, id,
                     os.path.splitext(f.name)[1]))
-                cdata = mi.cover
+                cdata = mi['cover']
                 if cdata:
-                    mi.cover = self.cover_to_thumbnail(cdata)
+                    mi['cover'] = self.cover_to_thumbnail(cdata)
             dynamic.set('news_to_be_synced', set([]))
             if config['upload_news_to_device'] and files:
                 remove = ids if \
@@ -727,7 +728,8 @@ class DeviceGUI(object):
                     self.location_view.model().free[1] : 'carda',
                     self.location_view.model().free[2] : 'cardb' }
                 on_card = space.get(sorted(space.keys(), reverse=True)[0], None)
-                self.upload_books(files, metadata, ids, on_card=on_card,
+                self.upload_books(files, names, metadata,
+                        on_card=on_card,
                         memory=[[f.name for f in files], remove])
                 self.status_bar.showMessage(_('Sending news to device.'), 5000)
 
@@ -749,28 +751,38 @@ class DeviceGUI(object):
         else:
             _auto_ids = []
 
-        metadata = self.library_view.model().get_metadata(ids, True, full_metadata=True)[1]
+        metadata = self.library_view.model().get_metadata(ids, True)
         ids = iter(ids)
         for mi in metadata:
-            cdata = mi.cover
+            cdata = mi['cover']
             if cdata:
                 mi['cover'] = self.cover_to_thumbnail(cdata)
         metadata = iter(metadata)
 
         files = [getattr(f, 'name', None) for f in _files]
-        bad, mdata, gf, fids, remove_ids = [], [], [], [], []
+        bad, good, gf, names, remove_ids = [], [], [], [], []
         for f in files:
             mi = metadata.next()
             id = ids.next()
             if f is None:
-                bad.append(mi.title)
+                bad.append(mi['title'])
             else:
                 remove_ids.append(id)
+                good.append(mi)
                 gf.append(f)
-                mdata.append(mi)
-                fids.append(id)
+                t = mi['title']
+                if not t:
+                    t = _('Unknown')
+                a = mi['authors']
+                if not a:
+                    a = _('Unknown')
+                prefix = ascii_filename(t+' - '+a)
+                if not isinstance(prefix, unicode):
+                    prefix = prefix.decode(preferred_encoding, 'replace')
+                prefix = ascii_filename(prefix)
+                names.append('%s_%d%s'%(prefix, id, os.path.splitext(f)[1]))
         remove = remove_ids if delete_from_library else []
-        self.upload_books(gf, mdata, fids, on_card, memory=(_files, remove))
+        self.upload_books(gf, names, good, on_card, memory=(_files, remove))
         self.status_bar.showMessage(_('Sending books to device.'), 5000)
 
         auto = []
@@ -833,15 +845,17 @@ class DeviceGUI(object):
         cp, fs = job.result
         self.location_view.model().update_devices(cp, fs)
 
-    def upload_books(self, files, metadata, ids, on_card=None, memory=None):
+    def upload_books(self, files, names, metadata, on_card=None, memory=None):
         '''
         Upload books to device.
         :param files: List of either paths to files or file like objects
         '''
-        titles = [i.title for i in metadata]
+        titles = [i['title'] for i in metadata]
         job = self.device_manager.upload_books(
                 Dispatcher(self.books_uploaded),
-                files, metadata, ids, on_card=on_card, titles=titles)
+                files, names, on_card=on_card,
+                metadata=metadata, titles=titles
+              )
         self.upload_memory[job] = (metadata, on_card, memory, files)
 
     def books_uploaded(self, job):
@@ -854,7 +868,7 @@ class DeviceGUI(object):
             if isinstance(job.exception, FreeSpaceError):
                 where = 'in main memory.' if 'memory' in str(job.exception) \
                         else 'on the storage card.'
-                titles = '\n'.join(['<li>'+mi.title+'</li>' \
+                titles = '\n'.join(['<li>'+mi['title']+'</li>' \
                                     for mi in metadata])
                 d = error_dialog(self, _('No space on device'),
                                  _('<p>Cannot upload books to device there '
diff --git a/src/calibre/gui2/dialogs/config/add_save.ui b/src/calibre/gui2/dialogs/config/add_save.ui
index ef1a867cd2..513be73e54 100644
--- a/src/calibre/gui2/dialogs/config/add_save.ui
+++ b/src/calibre/gui2/dialogs/config/add_save.ui
@@ -70,7 +70,7 @@
     <item row="0" column="0" colspan="2">
      <widget class="QLabel" name="label">
       <property name="text">
-       <string>Here you can control how calibre will save your books when you click the Save to Disk or Send to Device buttons:</string>
+       <string>Here you can control how calibre will save your books when you click the Save to Disk button:</string>
       </property>
       <property name="wordWrap">
        <bool>true</bool>
diff --git a/src/calibre/manual/conversion.rst b/src/calibre/manual/conversion.rst
index 72e30eedbb..ccdb8d6cdd 100644
--- a/src/calibre/manual/conversion.rst
+++ b/src/calibre/manual/conversion.rst
@@ -497,6 +497,7 @@ TXT input supports a number of options to differentiate how paragraphs are detec
 
 
 Convert PDF documents
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 PDF documents are one of the worst formats to convert from. They are a fixed page size and text placement format.
 Meaning, it is very difficult to determine where one paragraph ends and another begins. |app| will try to unwrap

From 2debc774d935956b06a164466fee527511e7516e Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 19 Dec 2009 19:42:42 -0500
Subject: [PATCH 117/120] Refactor sending cover to device. Nook: Send cover
 with book to device.

---
 src/calibre/devices/cybookg3/driver.py | 31 ++------------------------
 src/calibre/devices/nook/driver.py     |  5 +++++
 src/calibre/devices/usbms/driver.py    | 13 +++++++++++
 3 files changed, 20 insertions(+), 29 deletions(-)

diff --git a/src/calibre/devices/cybookg3/driver.py b/src/calibre/devices/cybookg3/driver.py
index 04e5e7012c..e0caff36f8 100644
--- a/src/calibre/devices/cybookg3/driver.py
+++ b/src/calibre/devices/cybookg3/driver.py
@@ -47,36 +47,9 @@ class CYBOOKG3(USBMS):
     DELETE_EXTS = ['.mbp', '.dat', '_6090.t2b']
     SUPPORTS_SUB_DIRS = True
 
-    def upload_books(self, files, names, on_card=None, end_session=True,
-                     metadata=None):
-
-        path = self._sanity_check(on_card, files)
-
-        paths = []
-        names = iter(names)
-        metadata = iter(metadata)
-
-        for i, infile in enumerate(files):
-            mdata, fname = metadata.next(), names.next()
-            filepath = self.create_upload_path(path, mdata, fname)
-            paths.append(filepath)
-
-            self.put_file(infile, filepath, replace_file=True)
-
-            coverdata = None
-            cover = mdata.get('cover', None)
-            if cover:
-                coverdata = cover[2]
-
-            t2bfile = open('%s_6090.t2b' % (os.path.splitext(filepath)[0]), 'wb')
+    def upload_cover(self, path, name, coverdata):
+        with open('%s_6090.t2b' % os.path.join(path, name), 'wb') as t2bfile:
             t2b.write_t2b(t2bfile, coverdata)
-            t2bfile.close()
-
-            self.report_progress(i / float(len(files)), _('Transferring books to device...'))
-
-        self.report_progress(1.0, _('Transferring books to device...'))
-
-        return zip(paths, cycle([on_card]))
 
     @classmethod
     def can_handle(cls, device_info, debug=False):
diff --git a/src/calibre/devices/nook/driver.py b/src/calibre/devices/nook/driver.py
index 001cc06b8e..c3f3267401 100644
--- a/src/calibre/devices/nook/driver.py
+++ b/src/calibre/devices/nook/driver.py
@@ -38,6 +38,11 @@ class NOOK(USBMS):
     EBOOK_DIR_MAIN = 'my documents'
     SUPPORTS_SUB_DIRS = True
 
+    def upload_cover(self, path, name, coverdata):
+        if coverdata:
+            with open('%s.jpg' % os.path.join(path, name), 'wb') as coverfile:
+                coverfile.write(coverdata)
+
     def windows_sort_drives(self, drives):
         main = drives.get('main', None)
         card = drives.get('carda', None)
diff --git a/src/calibre/devices/usbms/driver.py b/src/calibre/devices/usbms/driver.py
index 8d2416511c..ee746de9cc 100644
--- a/src/calibre/devices/usbms/driver.py
+++ b/src/calibre/devices/usbms/driver.py
@@ -112,12 +112,25 @@ class USBMS(CLI, Device):
 
             self.put_file(infile, filepath, replace_file=True)
 
+            coverdata = mdata.get('cover', None)
+            if coverdata:
+                coverdata = coverdata[2]
+            self.upload_cover(os.path.dirname(filepath), os.path.splitext(os.path.basename(filepath))[0], coverdata)
+
             self.report_progress((i+1) / float(len(files)), _('Transferring books to device...'))
 
         self.report_progress(1.0, _('Transferring books to device...'))
 
         return zip(paths, cycle([on_card]))
 
+    def upload_cover(self, path, name, coverdata):
+        '''
+        :path: the full path were the associated book is located.
+        :name: the name of the book file without the extension.
+        :coverdata: cover data in jpeg format.
+        '''
+        pass
+
     def add_books_to_metadata(self, locations, metadata, booklists):
         for i, location in enumerate(locations):
             self.report_progress((i+1) / float(len(locations)), _('Adding books to device metadata listing...'))

From 32b02e627beae6e236ca7df0eb8e934c001895e7 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 19 Dec 2009 20:32:40 -0500
Subject: [PATCH 118/120] Fix bug #4252: Nook driver writes cover image and
 uses default image when no cover is associated with book.

---
 src/calibre/devices/cybookg3/driver.py |  2 +-
 src/calibre/devices/nook/driver.py     | 30 ++++++++++++++++++++++----
 src/calibre/devices/usbms/driver.py    |  2 +-
 3 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/src/calibre/devices/cybookg3/driver.py b/src/calibre/devices/cybookg3/driver.py
index e0caff36f8..cd9545f231 100644
--- a/src/calibre/devices/cybookg3/driver.py
+++ b/src/calibre/devices/cybookg3/driver.py
@@ -47,7 +47,7 @@ class CYBOOKG3(USBMS):
     DELETE_EXTS = ['.mbp', '.dat', '_6090.t2b']
     SUPPORTS_SUB_DIRS = True
 
-    def upload_cover(self, path, name, coverdata):
+    def upload_cover(self, path, name, coverdata, metadata):
         with open('%s_6090.t2b' % os.path.join(path, name), 'wb') as t2bfile:
             t2b.write_t2b(t2bfile, coverdata)
 
diff --git a/src/calibre/devices/nook/driver.py b/src/calibre/devices/nook/driver.py
index c3f3267401..4cf65c866e 100644
--- a/src/calibre/devices/nook/driver.py
+++ b/src/calibre/devices/nook/driver.py
@@ -8,6 +8,14 @@ __docformat__ = 'restructuredtext en'
 Device driver for Barns and Nobel's Nook
 '''
 
+try:
+    from PIL import Image, ImageDraw
+    Image
+except ImportError:
+    import Image
+
+import cStringIO
+
 from calibre.devices.usbms.driver import USBMS
 
 class NOOK(USBMS):
@@ -38,10 +46,24 @@ class NOOK(USBMS):
     EBOOK_DIR_MAIN = 'my documents'
     SUPPORTS_SUB_DIRS = True
 
-    def upload_cover(self, path, name, coverdata):
-        if coverdata:
-            with open('%s.jpg' % os.path.join(path, name), 'wb') as coverfile:
-                coverfile.write(coverdata)
+    def upload_cover(self, path, name, coverdata, metadata):
+        if not coverdata:
+            coverdata = open(I('library.png'), 'rb').read()
+
+        im = Image.open(cStringIO.StringIO(coverdata))
+        im.thumbnail((96, 144), Image.ANTIALIAS)
+
+        if not coverdata:
+            draw = ImageDraw.Draw(im)
+            draw.text((0, 29), metadata.title)
+            draw.text((0, 115), ', '.join(metadata.authors))
+
+        data = cStringIO.StringIO()
+        im.save(data, 'JPG')
+        coverdata = data.getvalue()
+
+        with open('%s.jpg' % os.path.join(path, name), 'wb') as coverfile:
+            coverfile.write(coverdata)
 
     def windows_sort_drives(self, drives):
         main = drives.get('main', None)
diff --git a/src/calibre/devices/usbms/driver.py b/src/calibre/devices/usbms/driver.py
index ee746de9cc..e37ea62525 100644
--- a/src/calibre/devices/usbms/driver.py
+++ b/src/calibre/devices/usbms/driver.py
@@ -115,7 +115,7 @@ class USBMS(CLI, Device):
             coverdata = mdata.get('cover', None)
             if coverdata:
                 coverdata = coverdata[2]
-            self.upload_cover(os.path.dirname(filepath), os.path.splitext(os.path.basename(filepath))[0], coverdata)
+            self.upload_cover(os.path.dirname(filepath), os.path.splitext(os.path.basename(filepath))[0], coverdata, mdata)
 
             self.report_progress((i+1) / float(len(files)), _('Transferring books to device...'))
 

From 80ce4c5b4d7b95466fe872cca95baaee428fa47b Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 19 Dec 2009 20:44:58 -0500
Subject: [PATCH 119/120] Nook input/output profile. Make name for Hanlin
 profile clearer.

---
 src/calibre/customize/profiles.py | 37 ++++++++++++++++++++++++++-----
 1 file changed, 32 insertions(+), 5 deletions(-)

diff --git a/src/calibre/customize/profiles.py b/src/calibre/customize/profiles.py
index 456cc21d3c..752a908c1c 100644
--- a/src/calibre/customize/profiles.py
+++ b/src/calibre/customize/profiles.py
@@ -88,9 +88,9 @@ class MobipocketInput(InputProfile):
 
 class HanlinV3Input(InputProfile):
 
-    name        = 'Hanlin V3'
+    name        = 'Hanlin V3/V5'
     short_name  = 'hanlinv3'
-    description = _('This profile is intended for the Hanlin V3 and its clones.')
+    description = _('This profile is intended for the Hanlin V3/V5 and its clones.')
 
     # Screen size is a best guess
     screen_size               = (584, 754)
@@ -159,9 +159,23 @@ class IRexDR1000Input(InputProfile):
     fbase                     = 16
     fsizes                    = [12, 14, 16, 18, 20, 22, 24]
 
+
+class NookInput(InputProfile):
+
+    author      = 'John Schember'
+    name        = 'Nook'
+    short_name  = 'nook'
+    description = _('This profile is intended for the B&N Nook.')
+
+    # Screen size is a best guess
+    screen_size               = (600, 800)
+    dpi                       = 167
+    fbase                     = 16
+    fsizes                    = [12, 12, 14, 16, 18, 20, 22, 24]
+
 input_profiles = [InputProfile, SonyReaderInput, MSReaderInput,
         MobipocketInput, HanlinV3Input, CybookG3Input, CybookOpusInput, KindleInput,
-        IlliadInput, IRexDR1000Input]
+        IlliadInput, IRexDR1000Input, NookInput]
 
 
 class OutputProfile(Plugin):
@@ -248,7 +262,7 @@ class MobipocketOutput(OutputProfile):
 
 class HanlinV3Output(OutputProfile):
 
-    name        = 'Hanlin V3'
+    name        = 'Hanlin V3/V5'
     short_name  = 'hanlinv3'
     description = _('This profile is intended for the Hanlin V3/V5 and its clones.')
 
@@ -341,7 +355,20 @@ class IRexDR1000Output(OutputProfile):
     fbase                     = 16
     fsizes                    = [12, 14, 16, 18, 20, 22, 24]
 
+class NookOutput(OutputProfile):
+
+    author      = 'John Schember'
+    name        = 'Nook'
+    short_name  = 'nook'
+    description = _('This profile is intended for the B&N Nook.')
+
+    # Screen size is a best guess
+    screen_size               = (600, 800)
+    dpi                       = 167
+    fbase                     = 16
+    fsizes                    = [12, 12, 14, 16, 18, 20, 22, 24]
+
 output_profiles = [OutputProfile, SonyReaderOutput, MSReaderOutput,
         MobipocketOutput, HanlinV3Output, CybookG3Output, CybookOpusOutput,
         KindleOutput, SonyReaderLandscapeOutput, KindleDXOutput, IlliadOutput,
-        IRexDR1000Output, JetBook5Output]
+        IRexDR1000Output, JetBook5Output, NookOutput]

From 170e57967390a70b234c9da7ae7e0d397a6cb725 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 20 Dec 2009 08:52:40 -0500
Subject: [PATCH 120/120] Working cover upload for the Nook.

---
 src/calibre/devices/cybookg3/driver.py |  7 ++++--
 src/calibre/devices/nook/driver.py     | 30 +++++++++++++++-----------
 src/calibre/devices/usbms/driver.py    | 12 ++++-------
 3 files changed, 27 insertions(+), 22 deletions(-)

diff --git a/src/calibre/devices/cybookg3/driver.py b/src/calibre/devices/cybookg3/driver.py
index cd9545f231..00cf99a8c4 100644
--- a/src/calibre/devices/cybookg3/driver.py
+++ b/src/calibre/devices/cybookg3/driver.py
@@ -47,8 +47,11 @@ class CYBOOKG3(USBMS):
     DELETE_EXTS = ['.mbp', '.dat', '_6090.t2b']
     SUPPORTS_SUB_DIRS = True
 
-    def upload_cover(self, path, name, coverdata, metadata):
-        with open('%s_6090.t2b' % os.path.join(path, name), 'wb') as t2bfile:
+    def upload_cover(self, path, filename, metadata):
+        coverdata = metadata.get('cover', None)
+        if coverdata:
+            coverdata = coverdata[2]
+        with open('%s_6090.t2b' % os.path.join(path, filename), 'wb') as t2bfile:
             t2b.write_t2b(t2bfile, coverdata)
 
     @classmethod
diff --git a/src/calibre/devices/nook/driver.py b/src/calibre/devices/nook/driver.py
index 4cf65c866e..cc3f26d730 100644
--- a/src/calibre/devices/nook/driver.py
+++ b/src/calibre/devices/nook/driver.py
@@ -10,9 +10,8 @@ Device driver for Barns and Nobel's Nook
 
 try:
     from PIL import Image, ImageDraw
-    Image
 except ImportError:
-    import Image
+    import Image, ImageDraw
 
 import cStringIO
 
@@ -46,23 +45,30 @@ class NOOK(USBMS):
     EBOOK_DIR_MAIN = 'my documents'
     SUPPORTS_SUB_DIRS = True
 
-    def upload_cover(self, path, name, coverdata, metadata):
-        if not coverdata:
+    def upload_cover(self, path, filename, metadata):
+        coverdata = metadata.get('cover', None)
+        if coverdata:
+            cover = Image.open(cStringIO.StringIO(coverdata[2]))
+            cover.thumbnail((96, 144), Image.ANTIALIAS)
+        else:
             coverdata = open(I('library.png'), 'rb').read()
 
-        im = Image.open(cStringIO.StringIO(coverdata))
-        im.thumbnail((96, 144), Image.ANTIALIAS)
+            cover = Image.new('RGB', (96, 144), 'black')
+            im = Image.open(cStringIO.StringIO(coverdata))
+            im.thumbnail((96, 144), Image.ANTIALIAS)
 
-        if not coverdata:
-            draw = ImageDraw.Draw(im)
-            draw.text((0, 29), metadata.title)
-            draw.text((0, 115), ', '.join(metadata.authors))
+            x, y = im.size
+            cover.paste(im, ((96-x)/2, (144-y)/2))
+
+            draw = ImageDraw.Draw(cover)
+            draw.text((1, 15), metadata.title)
+            draw.text((1, 115), ', '.join(metadata.authors))
 
         data = cStringIO.StringIO()
-        im.save(data, 'JPG')
+        cover.save(data, 'JPEG')
         coverdata = data.getvalue()
 
-        with open('%s.jpg' % os.path.join(path, name), 'wb') as coverfile:
+        with open('%s.jpg' % os.path.join(path, filename), 'wb') as coverfile:
             coverfile.write(coverdata)
 
     def windows_sort_drives(self, drives):
diff --git a/src/calibre/devices/usbms/driver.py b/src/calibre/devices/usbms/driver.py
index e37ea62525..f637ee2fa8 100644
--- a/src/calibre/devices/usbms/driver.py
+++ b/src/calibre/devices/usbms/driver.py
@@ -111,11 +111,7 @@ class USBMS(CLI, Device):
             paths.append(filepath)
 
             self.put_file(infile, filepath, replace_file=True)
-
-            coverdata = mdata.get('cover', None)
-            if coverdata:
-                coverdata = coverdata[2]
-            self.upload_cover(os.path.dirname(filepath), os.path.splitext(os.path.basename(filepath))[0], coverdata, mdata)
+            self.upload_cover(os.path.dirname(filepath), os.path.splitext(os.path.basename(filepath))[0], mdata)
 
             self.report_progress((i+1) / float(len(files)), _('Transferring books to device...'))
 
@@ -123,11 +119,11 @@ class USBMS(CLI, Device):
 
         return zip(paths, cycle([on_card]))
 
-    def upload_cover(self, path, name, coverdata):
+    def upload_cover(self, path, filename, metadata):
         '''
         :path: the full path were the associated book is located.
-        :name: the name of the book file without the extension.
-        :coverdata: cover data in jpeg format.
+        :filename: the name of the book file without the extension.
+        :metatdata: metadata belonging to the book. metadata.cover[2] for coverdata.
         '''
         pass