From a380a7a284bc12dbde704a74dfb476dc9aa76759 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Thu, 28 Jan 2010 19:08:39 -0500
Subject: [PATCH 1/5] Fix bug #4411: Include chapter headins when generating
 toc from pml files.

---
 src/calibre/ebooks/pdb/ereader/writer.py |  4 ++-
 src/calibre/ebooks/pml/pmlconverter.py   | 43 +++++++++++++++++++-----
 src/calibre/ebooks/pml/pmlml.py          |  2 +-
 3 files changed, 39 insertions(+), 10 deletions(-)
diff --git a/src/calibre/ebooks/pdb/ereader/writer.py b/src/calibre/ebooks/pdb/ereader/writer.py
index a379899af5..a6ee16db15 100644
--- a/src/calibre/ebooks/pdb/ereader/writer.py
+++ b/src/calibre/ebooks/pdb/ereader/writer.py
@@ -42,7 +42,9 @@ class Writer(FormatWriter):
         pml = unicode(pmlmlizer.extract_content(oeb_book, self.opts)).encode('cp1252', 'replace')
 
         text, text_sizes = self._text(pml)
-        chapter_index = self._index_item(r'(?s)\\C(?P<val>\d)="(?P<text>.+?)"', pml)
+        chapter_index = self._index_item(r'(?s)\\C(?P<val>[0-4)="(?P<text>.+?)"', pml)
+        chapter_index += self.index_item(r'(?s)\\X(?P<val>[0-4])(?P<text>.+?)\\X[0-4]', pml)
+        chapter_index += self.index_item(r'(?s)\\x(?P<text>.+?)\\x', pml)
         link_index = self._index_item(r'(?s)\\Q="(?P<text>.+?)"', pml)
         images = self._images(oeb_book.manifest, pmlmlizer.image_hrefs)
         metadata = [self._metadata(metadata)]
diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py
index aa2ff117a4..356e2679ee 100644
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@@ -171,6 +171,9 @@ class PML_HTMLizer(object):
         # &. It will display as &amp;
         pml = pml.replace('&', '&amp;')
 
+        pml = re.sub(r'(?<=\\x)(?P<text>.*?)(?=\\x)', lambda match: '="%s"%s' % (self.strip_pml(match.group('text')), match.group('text')), pml)
+        pml = re.sub(r'(?<=\\X[0-4])(?P<text>.*?)(?=\\X[0-4])', lambda match: '="%s"%s' % (self.strip_pml(match.group('text')), match.group('text')), pml)
+
         pml = re.sub(r'\\a(?P<num>\d{3})', lambda match: '&#%s;' % match.group('num'), pml)
         pml = re.sub(r'\\U(?P<num>[0-9a-f]{4})', lambda match: '%s' % my_unichr(int(match.group('num'), 16)), pml)
 
@@ -178,6 +181,19 @@ class PML_HTMLizer(object):
 
         return pml
 
+    def strip_pml(self, pml):
+        pml = re.sub(r'\\.\d=""', '', pml)
+        pml = re.sub(r'\\.=""', '', pml)
+        pml = re.sub(r'\\.\d', '', pml)
+        pml = re.sub(r'\\.', '', pml)
+        pml = re.sub(r'\\a\d\d\d', '', pml)
+        pml = re.sub(r'\\U\d\d\d\d', '', pml)
+        pml.replace('\r\n', ' ')
+        pml.replace('\n', ' ')
+        pml.replace('\r', ' ')
+
+        return pml
+
     def cleanup_html(self, html):
         old = html
         html = self.cleanup_html_remove_redundant(html)
@@ -503,9 +519,9 @@ class PML_HTMLizer(object):
                 if c == '\\':
                     c = line.read(1)
 
-                    if c in 'xqcrtTiIuobBlk':
+                    if c in 'qcrtTiIuobBlk':
                         text = self.process_code(c, line)
-                    elif c in 'FSX':
+                    elif c in 'FS':
                         l = line.read(1)
                         if '%s%s' % (c, l) == 'Fn':
                             text = self.process_code('Fn', line, 'fn')
@@ -515,8 +531,24 @@ class PML_HTMLizer(object):
                             text = self.process_code('SB', line)
                         elif '%s%s' % (c, l) == 'Sd':
                             text = self.process_code('Sd', line, 'sb')
+                    elif c in 'xXC':
+                        # The PML was modified eariler so x and X put the text
+                        # inside of ="" so we don't have do special processing
+                        # for C.
+                        t = ''
+                        if c in 'XC':
+                            level = line.read(1)
+                        id = 'pml_toc-%s' % len(self.toc)
+                        value = self.code_value(line)
+                        if c == 'x':
+                            t = self.process_code(c, line)
+                        elif c == 'X':
+                            t = self.process_code('%s%s' % (c, level), line)
+                        if not value or value == '':
+                            text = t
                         else:
-                            text = self.process_code('%s%s' % (c, l), line)
+                            self.toc.add_item(os.path.basename(self.file_name), id, value)
+                            text = '<span id="%s"></span>%s' % (id, t)
                     elif c == 'm':
                         empty = False
                         src = self.code_value(line)
@@ -528,11 +560,6 @@ class PML_HTMLizer(object):
                     elif c == 'p':
                         empty = False
                         text = '<br /><br style="page-break-after: always;" />'
-                    elif c == 'C':
-                        line.read(1)
-                        id = 'pml_toc-%s' % len(self.toc)
-                        self.toc.add_item(os.path.basename(self.file_name), id, self.code_value(line))
-                        text = '<span id="%s"></span>' % id
                     elif c == 'n':
                         pass
                     elif c == 'w':
diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py
index 7427a77c2f..d57ed136f6 100644
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@@ -233,7 +233,7 @@ class PMLMLizer(object):
                 w += '="50%"'
             text.append(w)
         toc_id = elem.attrib.get('id', None)
-        if toc_id:
+        if toc_id and tag  not in ('h1', 'h2','h3','h4','h5','h6',):
             if self.toc.get(page.href, None):
                 toc_title = self.toc[page.href].get(toc_id, None)
                 if toc_title:

From ebdcae06cda073c3d5a902173819825157eab1ab Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 28 Jan 2010 17:34:04 -0700
Subject: [PATCH 2/5] ...

---
 src/calibre/ebooks/pdf/reflow.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/calibre/ebooks/pdf/reflow.py b/src/calibre/ebooks/pdf/reflow.py
index bf2d921a10..42c16225d2 100644
--- a/src/calibre/ebooks/pdf/reflow.py
+++ b/src/calibre/ebooks/pdf/reflow.py
@@ -256,11 +256,16 @@ class Region(object):
         return len(self.columns) == 0
 
     @property
-    def is_small(self):
+    def line_count(self):
         max_lines = 0
         for c in self.columns:
             max_lines = max(max_lines, len(c))
-        return max_lines > 2
+        return max_lines
+
+
+    @property
+    def is_small(self):
+        return self.line_count < 3
 
     def absorb(self, singleton):
 
@@ -431,7 +436,7 @@ class Page(object):
     def coalesce_regions(self):
         # find contiguous sets of small regions
         # absorb into a neighboring region (prefer the one with number of cols
-        # closer to the avg number of cols in the set, if equal use large
+        # closer to the avg number of cols in the set, if equal use larger
         # region)
         # merge contiguous regions that can contain each other
         absorbed = set([])

From d54c1a867e790e455d96cae6f32787eac74c17d0 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 28 Jan 2010 19:18:00 -0700
Subject: [PATCH 3/5] E-book viewer: Workaround to display images that have
 been embedded in svg containers. Fixes #4716 (Unable to view ePUB cover
 images in calibre viewer)

---
 resources/viewer/images.js | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/resources/viewer/images.js b/resources/viewer/images.js
index ea68009254..46cb968d4c 100644
--- a/resources/viewer/images.js
+++ b/resources/viewer/images.js
@@ -20,4 +20,20 @@ function setup_image_scaling_handlers() {
    });
 }
 
+function extract_svged_images() {
+    $("svg").each(function() {
+        var children = $(this).children("img");
+        if (children.length == 1) {
+            var img = $(children[0]);
+            var href = img.attr('xlink:href');
+            if (href != undefined) {
+                $(this).replaceWith('<div style="text-align:center; margin: 0; padding: 0"><img style="height: 98%" alt="SVG Image" src="' + href +'"></img></div>');
+            }
+        }
+    });
+}
+
+$(document).ready(function() {
+   extract_svged_images();
+});
 

From de2e424c88e22f876a9f071bfad32c8c236517e9 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 28 Jan 2010 19:37:25 -0700
Subject: [PATCH 4/5] E-book viewer: Add support for SVG images

---
 resources/viewer/images.js              |  2 +-
 src/calibre/gui2/viewer/documentview.py | 11 +++++++----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/resources/viewer/images.js b/resources/viewer/images.js
index 46cb968d4c..7b10f6169a 100644
--- a/resources/viewer/images.js
+++ b/resources/viewer/images.js
@@ -34,6 +34,6 @@ function extract_svged_images() {
 }
 
 $(document).ready(function() {
-   extract_svged_images();
+   //extract_svged_images();
 });
 
diff --git a/src/calibre/gui2/viewer/documentview.py b/src/calibre/gui2/viewer/documentview.py
index f6fce62eac..2c2a7fc135 100644
--- a/src/calibre/gui2/viewer/documentview.py
+++ b/src/calibre/gui2/viewer/documentview.py
@@ -10,7 +10,7 @@ from base64 import b64encode
 from PyQt4.Qt import QSize, QSizePolicy, QUrl, SIGNAL, Qt, QTimer, \
                      QPainter, QPalette, QBrush, QFontDatabase, QDialog, \
                      QColor, QPoint, QImage, QRegion, QVariant, QIcon, \
-                     QFont, pyqtSignature, QAction
+                     QFont, pyqtSignature, QAction, QByteArray
 from PyQt4.QtWebKit import QWebPage, QWebView, QWebSettings
 
 from calibre.utils.config import Config, StringConfig
@@ -514,14 +514,17 @@ class DocumentView(QWebView):
             mt = guess_type(path)[0]
         html = open(path, 'rb').read().decode(path.encoding, 'replace')
         html = EntityDeclarationProcessor(html).processed_html
+        has_svg = re.search(r'<\S*svg', html) is not None
+
         if 'xhtml' in mt:
             html = self.self_closing_pat.sub(self.self_closing_sub, html)
         if self.manager is not None:
             self.manager.load_started()
         self.loading_url = QUrl.fromLocalFile(path)
-        #self.setContent(QByteArray(html.encode(path.encoding)), mt, QUrl.fromLocalFile(path))
-        #open('/tmp/t.html', 'wb').write(html.encode(path.encoding))
-        self.setHtml(html, self.loading_url)
+        if has_svg:
+            self.setContent(QByteArray(html.encode(path.encoding)), mt, QUrl.fromLocalFile(path))
+        else:
+            self.setHtml(html, self.loading_url)
         self.turn_off_internal_scrollbars()
 
     def initialize_scrollbar(self):

From 813e52bbf8cb0b07651a3e2141bb91c019fc99f7 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 28 Jan 2010 19:52:04 -0700
Subject: [PATCH 5/5] ...

---
 src/calibre/__init__.py                 | 9 ++++++---
 src/calibre/gui2/viewer/documentview.py | 3 ++-
 src/calibre/utils/ipc/job.py            | 7 +++++--
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py
index e32c03fe13..e5e284fb5b 100644
--- a/src/calibre/__init__.py
+++ b/src/calibre/__init__.py
@@ -132,9 +132,12 @@ def prints(*args, **kwargs):
                 try:
                     arg = arg.encode(enc)
                 except UnicodeEncodeError:
-                    if not safe_encode:
-                        raise
-                    arg = repr(arg)
+                    try:
+                        arg = arg.encode('utf-8')
+                    except:
+                        if not safe_encode:
+                            raise
+                        arg = repr(arg)
 
         file.write(arg)
         if i != len(args)-1:
diff --git a/src/calibre/gui2/viewer/documentview.py b/src/calibre/gui2/viewer/documentview.py
index 2c2a7fc135..6b95a4dcaa 100644
--- a/src/calibre/gui2/viewer/documentview.py
+++ b/src/calibre/gui2/viewer/documentview.py
@@ -514,7 +514,7 @@ class DocumentView(QWebView):
             mt = guess_type(path)[0]
         html = open(path, 'rb').read().decode(path.encoding, 'replace')
         html = EntityDeclarationProcessor(html).processed_html
-        has_svg = re.search(r'<\S*svg', html) is not None
+        has_svg = re.search(r'<[:a-z]*svg', html) is not None
 
         if 'xhtml' in mt:
             html = self.self_closing_pat.sub(self.self_closing_sub, html)
@@ -522,6 +522,7 @@ class DocumentView(QWebView):
             self.manager.load_started()
         self.loading_url = QUrl.fromLocalFile(path)
         if has_svg:
+            prints('Rendering as XHTML...')
             self.setContent(QByteArray(html.encode(path.encoding)), mt, QUrl.fromLocalFile(path))
         else:
             self.setHtml(html, self.loading_url)
diff --git a/src/calibre/utils/ipc/job.py b/src/calibre/utils/ipc/job.py
index 458d5adb8a..a6c39ffc6b 100644
--- a/src/calibre/utils/ipc/job.py
+++ b/src/calibre/utils/ipc/job.py
@@ -52,10 +52,13 @@ class BaseJob(object):
             else:
                 self._status_text = _('Error') if self.failed else _('Finished')
             if DEBUG:
-                prints('Job:', self.id, self.description, 'finished',
+                try:
+                    prints('Job:', self.id, self.description, 'finished',
                         safe_encode=True)
-                prints('\t'.join(self.details.splitlines(True)),
+                    prints('\t'.join(self.details.splitlines(True)),
                         safe_encode=True)
+                except:
+                    pass
             if not self._done_called:
                 self._done_called = True
                 try: