Fix #235

2025-07-09 03:04:10 -04:00 · 2007-10-09 17:26:20 +00:00 · 2007-10-09 17:26:20 +00:00 · b4d599d118
commit b4d599d118
parent 9cbe71c384
3 changed files with 23 additions and 18 deletions
--- a/src/libprs500/ebooks/lrf/init.py
+++ b/src/libprs500/ebooks/lrf/init.py
@ -18,6 +18,8 @@ The LRF file format is documented at U{http://www.sven.de/librie/Librie/LrfForma
 """
 import sys, os
 from optparse import OptionParser, OptionValueError
 from htmlentitydefs import name2codepoint
 from ttfquery import describe, findsystem
 from fontTools.ttLib import TTLibError
@ -289,3 +291,11 @@ def Book(options, logger, font_delta=0, header=None,
        elif not fonts[family].has_key('normal'):
            raise ConversionError, 'Could not find the normal version of the ' + family + ' font'
    return book, fonts
 def entity_to_unicode(self, match):
    ent = match.group(1)
    if ent.startswith(u'#x'):
        return unichr(int(ent[2:], 16))
    if ent.startswith(u'#'):
        return unichr(int(ent[1:]))
    return unichr(name2codepoint[ent])
--- a/src/libprs500/ebooks/lrf/objects.py
+++ b/src/libprs500/ebooks/lrf/objects.py
@ -12,6 +12,7 @@
 ##    You should have received a copy of the GNU General Public License along
 ##    with this program; if not, write to the Free Software Foundation, Inc.,
 ##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 from libprs500.ebooks.lrf import entity_to_unicode
 import struct, array, zlib, cStringIO, collections, re
 from htmlentitydefs import name2codepoint
@ -606,20 +607,11 @@ class Text(LRFStream):
    adjustment_map = {1: 'top', 2: 'center', 3: 'baseline', 4: 'bottom'}
    lineposition_map = {1:'before', 2:'after'}
    def handle_entity(self, match):
        ent = match.group(1)
        if ent.startswith(u'#x'):
            return unichr(int(ent[2:], 16))
        if ent.startswith(u'#'):
            return unichr(int(ent[1:]))
        return unichr(name2codepoint[ent])
    def add_text(self, text):
        s = unicode(text, "utf-16-le")
        if s:
            s = s.translate(self.text_map)            
-            self.content.append(self.entity_pattern.sub(self.handle_entity, s))
+            self.content.append(self.entity_pattern.sub(entity_to_unicode, s))
    def end_container(self, tag, stream):
        self.content.append(None)
--- a/src/libprs500/ebooks/metadata/opf.py
+++ b/src/libprs500/ebooks/metadata/opf.py
@ -14,13 +14,16 @@
 ##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 '''Read/Write metadata from Open Packaging Format (.opf) files.'''
-import sys
+import sys, re
 from libprs500.ebooks.metadata import MetaInformation
 from libprs500.ebooks.BeautifulSoup import BeautifulStoneSoup
 from libprs500.ebooks.lrf import entity_to_unicode
 class OPFReader(MetaInformation):
    ENTITY_PATTERN = re.compile(r'&(\S+);')
    def __init__(self, stream):
        self.default_title = stream.name if hasattr(stream, 'name') else 'Unknown' 
        if hasattr(stream, 'seek'):
@ -34,7 +37,7 @@ class OPFReader(MetaInformation):
        def fget(self):
            title = self.soup.package.metadata.find('dc:title')
            if title:
-                return title.string
+                return self.ENTITY_PATTERN.sub(entity_to_unicode, title.string)
            return self.default_title
        return property(doc=doc, fget=fget)
@ -52,7 +55,7 @@ class OPFReader(MetaInformation):
                    ans = []
                    for i in au:
                        ans.extend(i.split('&'))
-                    return ans
+                    return self.ENTITY_PATTERN.sub(entity_to_unicode, ans)
            return None
        return property(doc=doc, fget=fget)
@ -67,7 +70,7 @@ class OPFReader(MetaInformation):
                    role = elem.get('opf:role')
                if role == 'aut':
                    fa = elem.get('file-as')
-                    return fa if fa else None
+                    return self.ENTITY_PATTERN.sub(entity_to_unicode, fa) if fa else None
        return property(doc=doc, fget=fget)
    @apply
@ -83,7 +86,7 @@ class OPFReader(MetaInformation):
        def fget(self):
            comments = self.soup.find('dc:description')
            if comments:
-                return comments.string
+                return self.ENTITY_PATTERN.sub(entity_to_unicode, comments.string)
            return None
        return property(doc=doc, fget=fget)
@ -93,7 +96,7 @@ class OPFReader(MetaInformation):
        def fget(self):
            category = self.soup.find('dc:type')
            if category:
-                return category.string
+                return self.ENTITY_PATTERN.sub(entity_to_unicode, category.string)
            return None
        return property(doc=doc, fget=fget)
@ -103,7 +106,7 @@ class OPFReader(MetaInformation):
        def fget(self):
            publisher = self.soup.find('dc:publisher')
            if publisher:
-                return publisher.string
+                return self.ENTITY_PATTERN.sub(entity_to_unicode, publisher.string)
            return None
        return property(doc=doc, fget=fget)