From 86e68579f32972a2424771a7f3e84d046d630283 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 11 Sep 2010 08:39:40 -0400
Subject: [PATCH 1/5] PDF Input: Fix bug #6734, add additional matching for
 unicode characters.

---
 src/calibre/ebooks/conversion/preprocess.py | 11 +++++++++++
 1 file changed, 11 insertions(+)
diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index f7b803974f..256bcce6fc 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -166,6 +166,17 @@ class HTMLPreProcessor(object):
                   (re.compile(u'`\s*(<br.*?>)*\s*O', re.UNICODE), lambda match: u'Ò'),
                   (re.compile(u'`\s*(<br.*?>)*\s*u', re.UNICODE), lambda match: u'ù'),
                   (re.compile(u'`\s*(<br.*?>)*\s*U', re.UNICODE), lambda match: u'Ù'),
+                  # ` with letter before
+                  (re.compile(u'a\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'à'),
+                  (re.compile(u'A\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'À'),
+                  (re.compile(u'e\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'è'),
+                  (re.compile(u'E\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'È'),
+                  (re.compile(u'i\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'ì'),
+                  (re.compile(u'I\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'Ì'),
+                  (re.compile(u'o\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'ò'),
+                  (re.compile(u'O\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'Ò'),
+                  (re.compile(u'u\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'ù'),
+                  (re.compile(u'U\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'Ù'),
 
                   # ´
                   (re.compile(u'´\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'á'),

From 96478da323e642febb94c2c1a2c9826a6b3dddb7 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 11 Sep 2010 08:48:47 -0400
Subject: [PATCH 2/5] PLM Input: Fix cleanup code.

---
 src/calibre/ebooks/pml/pmlconverter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index 166695ff5c..3a4454725a 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -216,7 +216,7 @@ class PML_HTMLizer(object):
                 html = re.sub(r'(?u)%s\s*%s' % (open % '.*?', close), '', html)
             else:
                 html = re.sub(r'(?u)%s\s*%s' % (open, close), '', html)
-        html = re.sub(r'<p>\s*</p>', '', html)
+        html = re.sub(r'(?imu)<p>\s*</p>', '', html)
         return html
 
     def start_line(self):

From dc7bc5dd5d890278d7f43377e9df944675888fc6 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 11 Sep 2010 09:01:34 -0400
Subject: [PATCH 3/5] PML Input: Fix bug #6770, put toc link after header so
 toc link goes to correct page.

---
 src/calibre/ebooks/pml/pmlconverter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index 3a4454725a..6e479a71ef 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -556,7 +556,7 @@ class PML_HTMLizer(object):
                             text = t
                         else:
                             self.toc.add_item(os.path.basename(self.file_name), id, value)
-                            text = '<span id="%s"></span>%s' % (id, t)
+                            text = '%s<span id="%s"></span>' % (t, id)
                     elif c == 'm':
                         empty = False
                         src = self.code_value(line)

From c2b3c445e17a38b5599393c943036c6c448886da Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 11 Sep 2010 09:09:08 -0400
Subject: [PATCH 4/5] PML Input: Remove emtpy lines.

---
 src/calibre/ebooks/pml/pmlconverter.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index 6e479a71ef..b0fc15197a 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -207,6 +207,7 @@ class PML_HTMLizer(object):
         while html != old:
             old = html
             html = self.cleanup_html_remove_redundant(html)
+        html = re.sub(r'(?imu)^\s*', '', html)
         return html
 
     def cleanup_html_remove_redundant(self, html):

From ef8408869cebac380474deb971c4b6910680c895 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 11 Sep 2010 09:13:23 -0400
Subject: [PATCH 5/5] TXT Output: preserve spaces, handle tab character
 correct. &#09; is reduced to a single space by many renderers.

---
 src/calibre/ebooks/txt/processor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py
index a12e8a0761..dac1e34df7 100644
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@@ -77,7 +77,7 @@ def separate_paragraphs_print_formatted(txt):
 
 def preserve_spaces(txt):
     txt = txt.replace(' ', '&nbsp;')
-    txt = txt.replace('\t', '&#09;')
+    txt = txt.replace('\t', '&nbsp;&nbsp;&nbsp;&nbsp;')
     return txt
 
 def opf_writer(path, opf_name, manifest, spine, mi):