Move entity conversion into LRF parser from renderer

This commit is contained in:
Kovid Goyal 2007-10-07 19:30:10 +00:00
parent 4914a064c0
commit 4477a78a5b
2 changed files with 14 additions and 14 deletions

View File

@ -12,7 +12,8 @@
## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
import struct, array, zlib, cStringIO, collections
import struct, array, zlib, cStringIO, collections, re
from htmlentitydefs import name2codepoint
from libprs500.ebooks.lrf import LRFParseError
from libprs500.ebooks.lrf.tags import Tag
@ -545,6 +546,7 @@ class Text(LRFStream):
style = property(fget=lambda self : self._document.objects[self.style_id])
text_map = { 0x22: u'"', 0x26: u'&', 0x27: u'&squot;', 0x3c: u'<', 0x3e: u'>' }
entity_pattern = re.compile(r'&(\S+?);')
text_tags = {
0xF581: ['simple_container', 'Italic'],
@ -605,10 +607,19 @@ class Text(LRFStream):
lineposition_map = {1:'before', 2:'after'}
def handle_entity(self, match):
ent = match.group(1)
if ent.startswith(u'#x'):
return unichr(int(ent[2:], 16))
if ent.startswith(u'#'):
return unichr(int(ent[1:]))
return unichr(name2codepoint[ent])
def add_text(self, text):
s = unicode(text, "utf-16-le")
if s:
self.content.append(s.translate(self.text_map))
s = s.translate(self.text_map)
self.content.append(self.entity_pattern.sub(self.handle_entity, s))
def end_container(self, tag, stream):
self.content.append(None)

View File

@ -12,8 +12,6 @@
## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
from libprs500.gui2 import qstring_to_unicode
import htmlentitydefs
''''''
import sys, collections, operator, copy, re
@ -25,6 +23,7 @@ from PyQt4.QtGui import QFont, QColor, QPixmap, QGraphicsPixmapItem, \
from libprs500.ebooks.lrf.fonts import FONT_MAP
from libprs500.ebooks.BeautifulSoup import Tag
from libprs500.ebooks.hyphenate import hyphenate_word
from libprs500.gui2 import qstring_to_unicode
WEIGHT_MAP = lambda wt : int((wt/10.)-1)
NULL = lambda a, b: a
@ -176,7 +175,6 @@ class TextBlock(object):
has_content = property(fget=lambda self: self.peek_index < len(self.lines)-1)
XML_ENTITIES = dict(zip(Tag.XML_SPECIAL_CHARS_TO_ENTITIES.values(), Tag.XML_SPECIAL_CHARS_TO_ENTITIES.keys()))
XML_ENTITIES["quot"] = '"'
ENTITY_PATTERN = re.compile('&(\S+);')
def __init__(self, tb, font_loader, respect_max_y, text_width, logger,
opts, ruby_tags, link_activated):
@ -311,18 +309,9 @@ class TextBlock(object):
self.opts.hyphenate, self.block_id)
self.first_line = False
def handle_entity(self, match):
ent = match.group(1)
if ent.startswith(u'#x'):
return unichr(int(ent[2:], 16))
if ent.startswith(u'#'):
return unichr(int(ent[1:]))
return unichr(htmlentitydefs.name2codepoint[ent])
def process_text(self, raw):
for ent, rep in TextBlock.XML_ENTITIES.items():
raw = raw.replace(u'&%s;'%ent, rep)
raw = self.__class__.ENTITY_PATTERN.sub(self.handle_entity, raw)
while len(raw) > 0:
if self.current_line is None:
self.create_line()