Move entity conversion into LRF parser from renderer

This commit is contained in:
Kovid Goyal 2007-10-07 19:30:10 +00:00
parent 4914a064c0
commit 4477a78a5b
2 changed files with 14 additions and 14 deletions

View File

@ -12,7 +12,8 @@
## You should have received a copy of the GNU General Public License along ## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc., ## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
import struct, array, zlib, cStringIO, collections import struct, array, zlib, cStringIO, collections, re
from htmlentitydefs import name2codepoint
from libprs500.ebooks.lrf import LRFParseError from libprs500.ebooks.lrf import LRFParseError
from libprs500.ebooks.lrf.tags import Tag from libprs500.ebooks.lrf.tags import Tag
@ -545,6 +546,7 @@ class Text(LRFStream):
style = property(fget=lambda self : self._document.objects[self.style_id]) style = property(fget=lambda self : self._document.objects[self.style_id])
text_map = { 0x22: u'"', 0x26: u'&', 0x27: u'&squot;', 0x3c: u'<', 0x3e: u'>' } text_map = { 0x22: u'"', 0x26: u'&', 0x27: u'&squot;', 0x3c: u'<', 0x3e: u'>' }
entity_pattern = re.compile(r'&(\S+?);')
text_tags = { text_tags = {
0xF581: ['simple_container', 'Italic'], 0xF581: ['simple_container', 'Italic'],
@ -605,10 +607,19 @@ class Text(LRFStream):
lineposition_map = {1:'before', 2:'after'} lineposition_map = {1:'before', 2:'after'}
def handle_entity(self, match):
ent = match.group(1)
if ent.startswith(u'#x'):
return unichr(int(ent[2:], 16))
if ent.startswith(u'#'):
return unichr(int(ent[1:]))
return unichr(name2codepoint[ent])
def add_text(self, text): def add_text(self, text):
s = unicode(text, "utf-16-le") s = unicode(text, "utf-16-le")
if s: if s:
self.content.append(s.translate(self.text_map)) s = s.translate(self.text_map)
self.content.append(self.entity_pattern.sub(self.handle_entity, s))
def end_container(self, tag, stream): def end_container(self, tag, stream):
self.content.append(None) self.content.append(None)

View File

@ -12,8 +12,6 @@
## You should have received a copy of the GNU General Public License along ## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc., ## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
from libprs500.gui2 import qstring_to_unicode
import htmlentitydefs
'''''' ''''''
import sys, collections, operator, copy, re import sys, collections, operator, copy, re
@ -25,6 +23,7 @@ from PyQt4.QtGui import QFont, QColor, QPixmap, QGraphicsPixmapItem, \
from libprs500.ebooks.lrf.fonts import FONT_MAP from libprs500.ebooks.lrf.fonts import FONT_MAP
from libprs500.ebooks.BeautifulSoup import Tag from libprs500.ebooks.BeautifulSoup import Tag
from libprs500.ebooks.hyphenate import hyphenate_word from libprs500.ebooks.hyphenate import hyphenate_word
from libprs500.gui2 import qstring_to_unicode
WEIGHT_MAP = lambda wt : int((wt/10.)-1) WEIGHT_MAP = lambda wt : int((wt/10.)-1)
NULL = lambda a, b: a NULL = lambda a, b: a
@ -176,7 +175,6 @@ class TextBlock(object):
has_content = property(fget=lambda self: self.peek_index < len(self.lines)-1) has_content = property(fget=lambda self: self.peek_index < len(self.lines)-1)
XML_ENTITIES = dict(zip(Tag.XML_SPECIAL_CHARS_TO_ENTITIES.values(), Tag.XML_SPECIAL_CHARS_TO_ENTITIES.keys())) XML_ENTITIES = dict(zip(Tag.XML_SPECIAL_CHARS_TO_ENTITIES.values(), Tag.XML_SPECIAL_CHARS_TO_ENTITIES.keys()))
XML_ENTITIES["quot"] = '"' XML_ENTITIES["quot"] = '"'
ENTITY_PATTERN = re.compile('&(\S+);')
def __init__(self, tb, font_loader, respect_max_y, text_width, logger, def __init__(self, tb, font_loader, respect_max_y, text_width, logger,
opts, ruby_tags, link_activated): opts, ruby_tags, link_activated):
@ -311,18 +309,9 @@ class TextBlock(object):
self.opts.hyphenate, self.block_id) self.opts.hyphenate, self.block_id)
self.first_line = False self.first_line = False
def handle_entity(self, match):
ent = match.group(1)
if ent.startswith(u'#x'):
return unichr(int(ent[2:], 16))
if ent.startswith(u'#'):
return unichr(int(ent[1:]))
return unichr(htmlentitydefs.name2codepoint[ent])
def process_text(self, raw): def process_text(self, raw):
for ent, rep in TextBlock.XML_ENTITIES.items(): for ent, rep in TextBlock.XML_ENTITIES.items():
raw = raw.replace(u'&%s;'%ent, rep) raw = raw.replace(u'&%s;'%ent, rep)
raw = self.__class__.ENTITY_PATTERN.sub(self.handle_entity, raw)
while len(raw) > 0: while len(raw) > 0:
if self.current_line is None: if self.current_line is None:
self.create_line() self.create_line()