Move entity conversion into LRF parser from renderer

2025-07-09 03:04:10 -04:00 · 2007-10-07 19:30:10 +00:00 · 2007-10-07 19:30:10 +00:00 · 4477a78a5b
commit 4477a78a5b
parent 4914a064c0
2 changed files with 14 additions and 14 deletions
--- a/src/libprs500/ebooks/lrf/objects.py
+++ b/src/libprs500/ebooks/lrf/objects.py
@ -12,7 +12,8 @@
 ##    You should have received a copy of the GNU General Public License along
 ##    with this program; if not, write to the Free Software Foundation, Inc.,
 ##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-import struct, array, zlib, cStringIO, collections
+import struct, array, zlib, cStringIO, collections, re
 from htmlentitydefs import name2codepoint
 from libprs500.ebooks.lrf import LRFParseError
 from libprs500.ebooks.lrf.tags import Tag
@ -545,6 +546,7 @@ class Text(LRFStream):
    style = property(fget=lambda self : self._document.objects[self.style_id])
    text_map = { 0x22: u'&quot;', 0x26: u'&amp;', 0x27: u'&squot;', 0x3c: u'&lt;', 0x3e: u'&gt;' }
    entity_pattern = re.compile(r'&amp;(\S+?);')
    text_tags = {
           0xF581: ['simple_container', 'Italic'],
@ -605,10 +607,19 @@ class Text(LRFStream):
    lineposition_map = {1:'before', 2:'after'}
    def handle_entity(self, match):
        ent = match.group(1)
        if ent.startswith(u'#x'):
            return unichr(int(ent[2:], 16))
        if ent.startswith(u'#'):
            return unichr(int(ent[1:]))
        return unichr(name2codepoint[ent])
    def add_text(self, text):
        s = unicode(text, "utf-16-le")
        if s:
-            self.content.append(s.translate(self.text_map))
+            s = s.translate(self.text_map)            
            self.content.append(self.entity_pattern.sub(self.handle_entity, s))
    def end_container(self, tag, stream):
        self.content.append(None)
--- a/src/libprs500/gui2/lrf_renderer/text.py
+++ b/src/libprs500/gui2/lrf_renderer/text.py
@ -12,8 +12,6 @@
 ##    You should have received a copy of the GNU General Public License along
 ##    with this program; if not, write to the Free Software Foundation, Inc.,
 ##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 from libprs500.gui2 import qstring_to_unicode
 import htmlentitydefs
 ''''''
 import sys, collections, operator, copy, re
@ -25,6 +23,7 @@ from PyQt4.QtGui import QFont, QColor, QPixmap, QGraphicsPixmapItem, \
 from libprs500.ebooks.lrf.fonts import FONT_MAP
 from libprs500.ebooks.BeautifulSoup import Tag
 from libprs500.ebooks.hyphenate import hyphenate_word
 from libprs500.gui2 import qstring_to_unicode
 WEIGHT_MAP = lambda wt : int((wt/10.)-1)
 NULL       = lambda a, b: a
@ -176,7 +175,6 @@ class TextBlock(object):
    has_content = property(fget=lambda self: self.peek_index < len(self.lines)-1)
    XML_ENTITIES = dict(zip(Tag.XML_SPECIAL_CHARS_TO_ENTITIES.values(), Tag.XML_SPECIAL_CHARS_TO_ENTITIES.keys())) 
    XML_ENTITIES["quot"] = '"'
    ENTITY_PATTERN = re.compile('&(\S+);')
    def __init__(self, tb, font_loader, respect_max_y, text_width, logger, 
                 opts, ruby_tags, link_activated):
@ -311,18 +309,9 @@ class TextBlock(object):
                                 self.opts.hyphenate, self.block_id)
        self.first_line = False
    def handle_entity(self, match):
        ent = match.group(1)
        if ent.startswith(u'#x'):
            return unichr(int(ent[2:], 16))
        if ent.startswith(u'#'):
            return unichr(int(ent[1:]))
        return unichr(htmlentitydefs.name2codepoint[ent])
    def process_text(self, raw):
        for ent, rep in TextBlock.XML_ENTITIES.items():
            raw = raw.replace(u'&%s;'%ent, rep)
        raw = self.__class__.ENTITY_PATTERN.sub(self.handle_entity, raw)
        while len(raw) > 0:
            if self.current_line is None:
                self.create_line()