From c55fdcb654d5891fb0b1eb5ca3924bc5fd6bdbe4 Mon Sep 17 00:00:00 2001 From: John Schember Date: Thu, 23 Jul 2009 17:51:38 -0400 Subject: [PATCH] Fix bug #2914: Ignore invalid unicode code points in eReader. --- src/calibre/ebooks/pdb/ereader/reader132.py | 2 ++ src/calibre/ebooks/pdb/ereader/reader202.py | 2 ++ src/calibre/ebooks/pml/pmlconverter.py | 3 ++- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/pdb/ereader/reader132.py b/src/calibre/ebooks/pdb/ereader/reader132.py index 7821a9e509..52d4778561 100644 --- a/src/calibre/ebooks/pdb/ereader/reader132.py +++ b/src/calibre/ebooks/pdb/ereader/reader132.py @@ -52,6 +52,8 @@ class Reader132(FormatReader): def __init__(self, header, stream, log, encoding=None): self.log = log self.encoding = encoding + + self.log.debug('132 byte header version found.') self.sections = [] for i in range(header.num_sections): diff --git a/src/calibre/ebooks/pdb/ereader/reader202.py b/src/calibre/ebooks/pdb/ereader/reader202.py index ec8380dfe5..0b394fb765 100644 --- a/src/calibre/ebooks/pdb/ereader/reader202.py +++ b/src/calibre/ebooks/pdb/ereader/reader202.py @@ -38,6 +38,8 @@ class Reader202(FormatReader): self.log = log self.encoding = encoding + self.log.debug('202 byte header version found.') + self.sections = [] for i in range(header.num_sections): self.sections.append(header.section_data(i)) diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py index a96adc5772..2147ec0b38 100644 --- a/src/calibre/ebooks/pml/pmlconverter.py +++ b/src/calibre/ebooks/pml/pmlconverter.py @@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en' import re +from calibre import my_unichr from calibre.ebooks.pdb.ereader import image_name PML_HTML_RULES = [ @@ -35,7 +36,7 @@ PML_HTML_RULES = [ (re.compile(r'\\Sb(?P.+?)\\Sb', re.DOTALL), lambda match: '%s' % match.group('text')), (re.compile(r'\\k(?P.+?)\\k', re.DOTALL), lambda match: '%s' % match.group('text')), (re.compile(r'\\a(?P\d\d\d)'), lambda match: '&#%s;' % match.group('num')), - (re.compile(r'\\U(?P\d+)'), lambda match: '%s' % unichr(int(match.group('num'), 16))), + (re.compile(r'\\U(?P\d+)'), lambda match: '%s' % my_unichr(int(match.group('num'), 16))), (re.compile(r'\\m="(?P.+?)"'), lambda match: '' % image_name(match.group('name')).strip('\x00')), (re.compile(r'\\q="(?P#.+?)"(?P.+?)\\q', re.DOTALL), lambda match: '%s' % (match.group('target'), match.group('text'))), (re.compile(r'\\Q="(?P.+?)"'), lambda match: '' % match.group('target')),