From a380a7a284bc12dbde704a74dfb476dc9aa76759 Mon Sep 17 00:00:00 2001 From: John Schember Date: Thu, 28 Jan 2010 19:08:39 -0500 Subject: [PATCH 1/5] Fix bug #4411: Include chapter headins when generating toc from pml files. --- src/calibre/ebooks/pdb/ereader/writer.py | 4 ++- src/calibre/ebooks/pml/pmlconverter.py | 43 +++++++++++++++++++----- src/calibre/ebooks/pml/pmlml.py | 2 +- 3 files changed, 39 insertions(+), 10 deletions(-) diff --git a/src/calibre/ebooks/pdb/ereader/writer.py b/src/calibre/ebooks/pdb/ereader/writer.py index a379899af5..a6ee16db15 100644 --- a/src/calibre/ebooks/pdb/ereader/writer.py +++ b/src/calibre/ebooks/pdb/ereader/writer.py @@ -42,7 +42,9 @@ class Writer(FormatWriter): pml = unicode(pmlmlizer.extract_content(oeb_book, self.opts)).encode('cp1252', 'replace') text, text_sizes = self._text(pml) - chapter_index = self._index_item(r'(?s)\\C(?P\d)="(?P.+?)"', pml) + chapter_index = self._index_item(r'(?s)\\C(?P[0-4)="(?P.+?)"', pml) + chapter_index += self.index_item(r'(?s)\\X(?P[0-4])(?P.+?)\\X[0-4]', pml) + chapter_index += self.index_item(r'(?s)\\x(?P.+?)\\x', pml) link_index = self._index_item(r'(?s)\\Q="(?P.+?)"', pml) images = self._images(oeb_book.manifest, pmlmlizer.image_hrefs) metadata = [self._metadata(metadata)] diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py index aa2ff117a4..356e2679ee 100644 --- a/src/calibre/ebooks/pml/pmlconverter.py +++ b/src/calibre/ebooks/pml/pmlconverter.py @@ -171,6 +171,9 @@ class PML_HTMLizer(object): # &. It will display as & pml = pml.replace('&', '&') + pml = re.sub(r'(?<=\\x)(?P.*?)(?=\\x)', lambda match: '="%s"%s' % (self.strip_pml(match.group('text')), match.group('text')), pml) + pml = re.sub(r'(?<=\\X[0-4])(?P.*?)(?=\\X[0-4])', lambda match: '="%s"%s' % (self.strip_pml(match.group('text')), match.group('text')), pml) + pml = re.sub(r'\\a(?P\d{3})', lambda match: '&#%s;' % match.group('num'), pml) pml = re.sub(r'\\U(?P[0-9a-f]{4})', lambda match: '%s' % my_unichr(int(match.group('num'), 16)), pml) @@ -178,6 +181,19 @@ class PML_HTMLizer(object): return pml + def strip_pml(self, pml): + pml = re.sub(r'\\.\d=""', '', pml) + pml = re.sub(r'\\.=""', '', pml) + pml = re.sub(r'\\.\d', '', pml) + pml = re.sub(r'\\.', '', pml) + pml = re.sub(r'\\a\d\d\d', '', pml) + pml = re.sub(r'\\U\d\d\d\d', '', pml) + pml.replace('\r\n', ' ') + pml.replace('\n', ' ') + pml.replace('\r', ' ') + + return pml + def cleanup_html(self, html): old = html html = self.cleanup_html_remove_redundant(html) @@ -503,9 +519,9 @@ class PML_HTMLizer(object): if c == '\\': c = line.read(1) - if c in 'xqcrtTiIuobBlk': + if c in 'qcrtTiIuobBlk': text = self.process_code(c, line) - elif c in 'FSX': + elif c in 'FS': l = line.read(1) if '%s%s' % (c, l) == 'Fn': text = self.process_code('Fn', line, 'fn') @@ -515,8 +531,24 @@ class PML_HTMLizer(object): text = self.process_code('SB', line) elif '%s%s' % (c, l) == 'Sd': text = self.process_code('Sd', line, 'sb') + elif c in 'xXC': + # The PML was modified eariler so x and X put the text + # inside of ="" so we don't have do special processing + # for C. + t = '' + if c in 'XC': + level = line.read(1) + id = 'pml_toc-%s' % len(self.toc) + value = self.code_value(line) + if c == 'x': + t = self.process_code(c, line) + elif c == 'X': + t = self.process_code('%s%s' % (c, level), line) + if not value or value == '': + text = t else: - text = self.process_code('%s%s' % (c, l), line) + self.toc.add_item(os.path.basename(self.file_name), id, value) + text = '%s' % (id, t) elif c == 'm': empty = False src = self.code_value(line) @@ -528,11 +560,6 @@ class PML_HTMLizer(object): elif c == 'p': empty = False text = '

' - elif c == 'C': - line.read(1) - id = 'pml_toc-%s' % len(self.toc) - self.toc.add_item(os.path.basename(self.file_name), id, self.code_value(line)) - text = '' % id elif c == 'n': pass elif c == 'w': diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py index 7427a77c2f..d57ed136f6 100644 --- a/src/calibre/ebooks/pml/pmlml.py +++ b/src/calibre/ebooks/pml/pmlml.py @@ -233,7 +233,7 @@ class PMLMLizer(object): w += '="50%"' text.append(w) toc_id = elem.attrib.get('id', None) - if toc_id: + if toc_id and tag not in ('h1', 'h2','h3','h4','h5','h6',): if self.toc.get(page.href, None): toc_title = self.toc[page.href].get(toc_id, None) if toc_title: From ebdcae06cda073c3d5a902173819825157eab1ab Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 28 Jan 2010 17:34:04 -0700 Subject: [PATCH 2/5] ... --- src/calibre/ebooks/pdf/reflow.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/pdf/reflow.py b/src/calibre/ebooks/pdf/reflow.py index bf2d921a10..42c16225d2 100644 --- a/src/calibre/ebooks/pdf/reflow.py +++ b/src/calibre/ebooks/pdf/reflow.py @@ -256,11 +256,16 @@ class Region(object): return len(self.columns) == 0 @property - def is_small(self): + def line_count(self): max_lines = 0 for c in self.columns: max_lines = max(max_lines, len(c)) - return max_lines > 2 + return max_lines + + + @property + def is_small(self): + return self.line_count < 3 def absorb(self, singleton): @@ -431,7 +436,7 @@ class Page(object): def coalesce_regions(self): # find contiguous sets of small regions # absorb into a neighboring region (prefer the one with number of cols - # closer to the avg number of cols in the set, if equal use large + # closer to the avg number of cols in the set, if equal use larger # region) # merge contiguous regions that can contain each other absorbed = set([]) From d54c1a867e790e455d96cae6f32787eac74c17d0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 28 Jan 2010 19:18:00 -0700 Subject: [PATCH 3/5] E-book viewer: Workaround to display images that have been embedded in svg containers. Fixes #4716 (Unable to view ePUB cover images in calibre viewer) --- resources/viewer/images.js | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/resources/viewer/images.js b/resources/viewer/images.js index ea68009254..46cb968d4c 100644 --- a/resources/viewer/images.js +++ b/resources/viewer/images.js @@ -20,4 +20,20 @@ function setup_image_scaling_handlers() { }); } +function extract_svged_images() { + $("svg").each(function() { + var children = $(this).children("img"); + if (children.length == 1) { + var img = $(children[0]); + var href = img.attr('xlink:href'); + if (href != undefined) { + $(this).replaceWith('
SVG Image
'); + } + } + }); +} + +$(document).ready(function() { + extract_svged_images(); +}); From de2e424c88e22f876a9f071bfad32c8c236517e9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 28 Jan 2010 19:37:25 -0700 Subject: [PATCH 4/5] E-book viewer: Add support for SVG images --- resources/viewer/images.js | 2 +- src/calibre/gui2/viewer/documentview.py | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/resources/viewer/images.js b/resources/viewer/images.js index 46cb968d4c..7b10f6169a 100644 --- a/resources/viewer/images.js +++ b/resources/viewer/images.js @@ -34,6 +34,6 @@ function extract_svged_images() { } $(document).ready(function() { - extract_svged_images(); + //extract_svged_images(); }); diff --git a/src/calibre/gui2/viewer/documentview.py b/src/calibre/gui2/viewer/documentview.py index f6fce62eac..2c2a7fc135 100644 --- a/src/calibre/gui2/viewer/documentview.py +++ b/src/calibre/gui2/viewer/documentview.py @@ -10,7 +10,7 @@ from base64 import b64encode from PyQt4.Qt import QSize, QSizePolicy, QUrl, SIGNAL, Qt, QTimer, \ QPainter, QPalette, QBrush, QFontDatabase, QDialog, \ QColor, QPoint, QImage, QRegion, QVariant, QIcon, \ - QFont, pyqtSignature, QAction + QFont, pyqtSignature, QAction, QByteArray from PyQt4.QtWebKit import QWebPage, QWebView, QWebSettings from calibre.utils.config import Config, StringConfig @@ -514,14 +514,17 @@ class DocumentView(QWebView): mt = guess_type(path)[0] html = open(path, 'rb').read().decode(path.encoding, 'replace') html = EntityDeclarationProcessor(html).processed_html + has_svg = re.search(r'<\S*svg', html) is not None + if 'xhtml' in mt: html = self.self_closing_pat.sub(self.self_closing_sub, html) if self.manager is not None: self.manager.load_started() self.loading_url = QUrl.fromLocalFile(path) - #self.setContent(QByteArray(html.encode(path.encoding)), mt, QUrl.fromLocalFile(path)) - #open('/tmp/t.html', 'wb').write(html.encode(path.encoding)) - self.setHtml(html, self.loading_url) + if has_svg: + self.setContent(QByteArray(html.encode(path.encoding)), mt, QUrl.fromLocalFile(path)) + else: + self.setHtml(html, self.loading_url) self.turn_off_internal_scrollbars() def initialize_scrollbar(self): From 813e52bbf8cb0b07651a3e2141bb91c019fc99f7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 28 Jan 2010 19:52:04 -0700 Subject: [PATCH 5/5] ... --- src/calibre/__init__.py | 9 ++++++--- src/calibre/gui2/viewer/documentview.py | 3 ++- src/calibre/utils/ipc/job.py | 7 +++++-- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index e32c03fe13..e5e284fb5b 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -132,9 +132,12 @@ def prints(*args, **kwargs): try: arg = arg.encode(enc) except UnicodeEncodeError: - if not safe_encode: - raise - arg = repr(arg) + try: + arg = arg.encode('utf-8') + except: + if not safe_encode: + raise + arg = repr(arg) file.write(arg) if i != len(args)-1: diff --git a/src/calibre/gui2/viewer/documentview.py b/src/calibre/gui2/viewer/documentview.py index 2c2a7fc135..6b95a4dcaa 100644 --- a/src/calibre/gui2/viewer/documentview.py +++ b/src/calibre/gui2/viewer/documentview.py @@ -514,7 +514,7 @@ class DocumentView(QWebView): mt = guess_type(path)[0] html = open(path, 'rb').read().decode(path.encoding, 'replace') html = EntityDeclarationProcessor(html).processed_html - has_svg = re.search(r'<\S*svg', html) is not None + has_svg = re.search(r'<[:a-z]*svg', html) is not None if 'xhtml' in mt: html = self.self_closing_pat.sub(self.self_closing_sub, html) @@ -522,6 +522,7 @@ class DocumentView(QWebView): self.manager.load_started() self.loading_url = QUrl.fromLocalFile(path) if has_svg: + prints('Rendering as XHTML...') self.setContent(QByteArray(html.encode(path.encoding)), mt, QUrl.fromLocalFile(path)) else: self.setHtml(html, self.loading_url) diff --git a/src/calibre/utils/ipc/job.py b/src/calibre/utils/ipc/job.py index 458d5adb8a..a6c39ffc6b 100644 --- a/src/calibre/utils/ipc/job.py +++ b/src/calibre/utils/ipc/job.py @@ -52,10 +52,13 @@ class BaseJob(object): else: self._status_text = _('Error') if self.failed else _('Finished') if DEBUG: - prints('Job:', self.id, self.description, 'finished', + try: + prints('Job:', self.id, self.description, 'finished', safe_encode=True) - prints('\t'.join(self.details.splitlines(True)), + prints('\t'.join(self.details.splitlines(True)), safe_encode=True) + except: + pass if not self._done_called: self._done_called = True try: