Fix bug #2914: Ignore invalid unicode code points in eReader.

This commit is contained in:
John Schember 2009-07-23 17:51:38 -04:00
parent 50b71bd449
commit c55fdcb654
3 changed files with 6 additions and 1 deletions

View File

@ -52,6 +52,8 @@ class Reader132(FormatReader):
def __init__(self, header, stream, log, encoding=None):
self.log = log
self.encoding = encoding
self.log.debug('132 byte header version found.')
self.sections = []
for i in range(header.num_sections):

View File

@ -38,6 +38,8 @@ class Reader202(FormatReader):
self.log = log
self.encoding = encoding
self.log.debug('202 byte header version found.')
self.sections = []
for i in range(header.num_sections):
self.sections.append(header.section_data(i))

View File

@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en'
import re
from calibre import my_unichr
from calibre.ebooks.pdb.ereader import image_name
PML_HTML_RULES = [
@ -35,7 +36,7 @@ PML_HTML_RULES = [
(re.compile(r'\\Sb(?P<text>.+?)\\Sb', re.DOTALL), lambda match: '<sub>%s</sub>' % match.group('text')),
(re.compile(r'\\k(?P<text>.+?)\\k', re.DOTALL), lambda match: '<small>%s</small>' % match.group('text')),
(re.compile(r'\\a(?P<num>\d\d\d)'), lambda match: '&#%s;' % match.group('num')),
(re.compile(r'\\U(?P<num>\d+)'), lambda match: '%s' % unichr(int(match.group('num'), 16))),
(re.compile(r'\\U(?P<num>\d+)'), lambda match: '%s' % my_unichr(int(match.group('num'), 16))),
(re.compile(r'\\m="(?P<name>.+?)"'), lambda match: '<img src="images/%s" />' % image_name(match.group('name')).strip('\x00')),
(re.compile(r'\\q="(?P<target>#.+?)"(?P<text>.+?)\\q', re.DOTALL), lambda match: '<a href="%s">%s</a>' % (match.group('target'), match.group('text'))),
(re.compile(r'\\Q="(?P<target>.+?)"'), lambda match: '<span id="%s"></span>' % match.group('target')),