From b4d599d118860c796307aa12eea1f03bc61239bf Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 9 Oct 2007 17:26:20 +0000 Subject: [PATCH] Fix #235 --- src/libprs500/ebooks/lrf/__init__.py | 12 +++++++++++- src/libprs500/ebooks/lrf/objects.py | 12 ++---------- src/libprs500/ebooks/metadata/opf.py | 17 ++++++++++------- 3 files changed, 23 insertions(+), 18 deletions(-) diff --git a/src/libprs500/ebooks/lrf/__init__.py b/src/libprs500/ebooks/lrf/__init__.py index d0d001afd3..0f8ebe86e1 100644 --- a/src/libprs500/ebooks/lrf/__init__.py +++ b/src/libprs500/ebooks/lrf/__init__.py @@ -18,6 +18,8 @@ The LRF file format is documented at U{http://www.sven.de/librie/Librie/LrfForma """ import sys, os from optparse import OptionParser, OptionValueError +from htmlentitydefs import name2codepoint + from ttfquery import describe, findsystem from fontTools.ttLib import TTLibError @@ -288,4 +290,12 @@ def Book(options, logger, font_delta=0, header=None, fonts[family] = { 'normal' : (None, profile.default_fonts[family]) } elif not fonts[family].has_key('normal'): raise ConversionError, 'Could not find the normal version of the ' + family + ' font' - return book, fonts \ No newline at end of file + return book, fonts + +def entity_to_unicode(self, match): + ent = match.group(1) + if ent.startswith(u'#x'): + return unichr(int(ent[2:], 16)) + if ent.startswith(u'#'): + return unichr(int(ent[1:])) + return unichr(name2codepoint[ent]) diff --git a/src/libprs500/ebooks/lrf/objects.py b/src/libprs500/ebooks/lrf/objects.py index efcfdae457..d192010ede 100644 --- a/src/libprs500/ebooks/lrf/objects.py +++ b/src/libprs500/ebooks/lrf/objects.py @@ -12,6 +12,7 @@ ## You should have received a copy of the GNU General Public License along ## with this program; if not, write to the Free Software Foundation, Inc., ## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +from libprs500.ebooks.lrf import entity_to_unicode import struct, array, zlib, cStringIO, collections, re from htmlentitydefs import name2codepoint @@ -606,20 +607,11 @@ class Text(LRFStream): adjustment_map = {1: 'top', 2: 'center', 3: 'baseline', 4: 'bottom'} lineposition_map = {1:'before', 2:'after'} - - def handle_entity(self, match): - ent = match.group(1) - if ent.startswith(u'#x'): - return unichr(int(ent[2:], 16)) - if ent.startswith(u'#'): - return unichr(int(ent[1:])) - return unichr(name2codepoint[ent]) - def add_text(self, text): s = unicode(text, "utf-16-le") if s: s = s.translate(self.text_map) - self.content.append(self.entity_pattern.sub(self.handle_entity, s)) + self.content.append(self.entity_pattern.sub(entity_to_unicode, s)) def end_container(self, tag, stream): self.content.append(None) diff --git a/src/libprs500/ebooks/metadata/opf.py b/src/libprs500/ebooks/metadata/opf.py index 883e0745fe..4d77ef6bb3 100644 --- a/src/libprs500/ebooks/metadata/opf.py +++ b/src/libprs500/ebooks/metadata/opf.py @@ -14,13 +14,16 @@ ## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. '''Read/Write metadata from Open Packaging Format (.opf) files.''' -import sys +import sys, re from libprs500.ebooks.metadata import MetaInformation from libprs500.ebooks.BeautifulSoup import BeautifulStoneSoup +from libprs500.ebooks.lrf import entity_to_unicode class OPFReader(MetaInformation): + ENTITY_PATTERN = re.compile(r'&(\S+);') + def __init__(self, stream): self.default_title = stream.name if hasattr(stream, 'name') else 'Unknown' if hasattr(stream, 'seek'): @@ -34,7 +37,7 @@ class OPFReader(MetaInformation): def fget(self): title = self.soup.package.metadata.find('dc:title') if title: - return title.string + return self.ENTITY_PATTERN.sub(entity_to_unicode, title.string) return self.default_title return property(doc=doc, fget=fget) @@ -52,7 +55,7 @@ class OPFReader(MetaInformation): ans = [] for i in au: ans.extend(i.split('&')) - return ans + return self.ENTITY_PATTERN.sub(entity_to_unicode, ans) return None return property(doc=doc, fget=fget) @@ -67,7 +70,7 @@ class OPFReader(MetaInformation): role = elem.get('opf:role') if role == 'aut': fa = elem.get('file-as') - return fa if fa else None + return self.ENTITY_PATTERN.sub(entity_to_unicode, fa) if fa else None return property(doc=doc, fget=fget) @apply @@ -83,7 +86,7 @@ class OPFReader(MetaInformation): def fget(self): comments = self.soup.find('dc:description') if comments: - return comments.string + return self.ENTITY_PATTERN.sub(entity_to_unicode, comments.string) return None return property(doc=doc, fget=fget) @@ -93,7 +96,7 @@ class OPFReader(MetaInformation): def fget(self): category = self.soup.find('dc:type') if category: - return category.string + return self.ENTITY_PATTERN.sub(entity_to_unicode, category.string) return None return property(doc=doc, fget=fget) @@ -103,7 +106,7 @@ class OPFReader(MetaInformation): def fget(self): publisher = self.soup.find('dc:publisher') if publisher: - return publisher.string + return self.ENTITY_PATTERN.sub(entity_to_unicode, publisher.string) return None return property(doc=doc, fget=fget)