From 4477a78a5b95ef10b9ff316cf3b32fb228d923f4 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 7 Oct 2007 19:30:10 +0000 Subject: [PATCH] Move entity conversion into LRF parser from renderer --- src/libprs500/ebooks/lrf/objects.py | 15 +++++++++++++-- src/libprs500/gui2/lrf_renderer/text.py | 13 +------------ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/libprs500/ebooks/lrf/objects.py b/src/libprs500/ebooks/lrf/objects.py index 186c7f3cd7..efcfdae457 100644 --- a/src/libprs500/ebooks/lrf/objects.py +++ b/src/libprs500/ebooks/lrf/objects.py @@ -12,7 +12,8 @@ ## You should have received a copy of the GNU General Public License along ## with this program; if not, write to the Free Software Foundation, Inc., ## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -import struct, array, zlib, cStringIO, collections +import struct, array, zlib, cStringIO, collections, re +from htmlentitydefs import name2codepoint from libprs500.ebooks.lrf import LRFParseError from libprs500.ebooks.lrf.tags import Tag @@ -545,6 +546,7 @@ class Text(LRFStream): style = property(fget=lambda self : self._document.objects[self.style_id]) text_map = { 0x22: u'"', 0x26: u'&', 0x27: u'&squot;', 0x3c: u'<', 0x3e: u'>' } + entity_pattern = re.compile(r'&(\S+?);') text_tags = { 0xF581: ['simple_container', 'Italic'], @@ -605,10 +607,19 @@ class Text(LRFStream): lineposition_map = {1:'before', 2:'after'} + def handle_entity(self, match): + ent = match.group(1) + if ent.startswith(u'#x'): + return unichr(int(ent[2:], 16)) + if ent.startswith(u'#'): + return unichr(int(ent[1:])) + return unichr(name2codepoint[ent]) + def add_text(self, text): s = unicode(text, "utf-16-le") if s: - self.content.append(s.translate(self.text_map)) + s = s.translate(self.text_map) + self.content.append(self.entity_pattern.sub(self.handle_entity, s)) def end_container(self, tag, stream): self.content.append(None) diff --git a/src/libprs500/gui2/lrf_renderer/text.py b/src/libprs500/gui2/lrf_renderer/text.py index 754e5f2b66..88b3d1c564 100644 --- a/src/libprs500/gui2/lrf_renderer/text.py +++ b/src/libprs500/gui2/lrf_renderer/text.py @@ -12,8 +12,6 @@ ## You should have received a copy of the GNU General Public License along ## with this program; if not, write to the Free Software Foundation, Inc., ## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -from libprs500.gui2 import qstring_to_unicode -import htmlentitydefs '''''' import sys, collections, operator, copy, re @@ -25,6 +23,7 @@ from PyQt4.QtGui import QFont, QColor, QPixmap, QGraphicsPixmapItem, \ from libprs500.ebooks.lrf.fonts import FONT_MAP from libprs500.ebooks.BeautifulSoup import Tag from libprs500.ebooks.hyphenate import hyphenate_word +from libprs500.gui2 import qstring_to_unicode WEIGHT_MAP = lambda wt : int((wt/10.)-1) NULL = lambda a, b: a @@ -176,7 +175,6 @@ class TextBlock(object): has_content = property(fget=lambda self: self.peek_index < len(self.lines)-1) XML_ENTITIES = dict(zip(Tag.XML_SPECIAL_CHARS_TO_ENTITIES.values(), Tag.XML_SPECIAL_CHARS_TO_ENTITIES.keys())) XML_ENTITIES["quot"] = '"' - ENTITY_PATTERN = re.compile('&(\S+);') def __init__(self, tb, font_loader, respect_max_y, text_width, logger, opts, ruby_tags, link_activated): @@ -311,18 +309,9 @@ class TextBlock(object): self.opts.hyphenate, self.block_id) self.first_line = False - def handle_entity(self, match): - ent = match.group(1) - if ent.startswith(u'#x'): - return unichr(int(ent[2:], 16)) - if ent.startswith(u'#'): - return unichr(int(ent[1:])) - return unichr(htmlentitydefs.name2codepoint[ent]) - def process_text(self, raw): for ent, rep in TextBlock.XML_ENTITIES.items(): raw = raw.replace(u'&%s;'%ent, rep) - raw = self.__class__.ENTITY_PATTERN.sub(self.handle_entity, raw) while len(raw) > 0: if self.current_line is None: self.create_line()