mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Move entity conversion into LRF parser from renderer
This commit is contained in:
parent
4914a064c0
commit
4477a78a5b
@ -12,7 +12,8 @@
|
|||||||
## You should have received a copy of the GNU General Public License along
|
## You should have received a copy of the GNU General Public License along
|
||||||
## with this program; if not, write to the Free Software Foundation, Inc.,
|
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
import struct, array, zlib, cStringIO, collections
|
import struct, array, zlib, cStringIO, collections, re
|
||||||
|
from htmlentitydefs import name2codepoint
|
||||||
|
|
||||||
from libprs500.ebooks.lrf import LRFParseError
|
from libprs500.ebooks.lrf import LRFParseError
|
||||||
from libprs500.ebooks.lrf.tags import Tag
|
from libprs500.ebooks.lrf.tags import Tag
|
||||||
@ -545,6 +546,7 @@ class Text(LRFStream):
|
|||||||
style = property(fget=lambda self : self._document.objects[self.style_id])
|
style = property(fget=lambda self : self._document.objects[self.style_id])
|
||||||
|
|
||||||
text_map = { 0x22: u'"', 0x26: u'&', 0x27: u'&squot;', 0x3c: u'<', 0x3e: u'>' }
|
text_map = { 0x22: u'"', 0x26: u'&', 0x27: u'&squot;', 0x3c: u'<', 0x3e: u'>' }
|
||||||
|
entity_pattern = re.compile(r'&(\S+?);')
|
||||||
|
|
||||||
text_tags = {
|
text_tags = {
|
||||||
0xF581: ['simple_container', 'Italic'],
|
0xF581: ['simple_container', 'Italic'],
|
||||||
@ -605,10 +607,19 @@ class Text(LRFStream):
|
|||||||
lineposition_map = {1:'before', 2:'after'}
|
lineposition_map = {1:'before', 2:'after'}
|
||||||
|
|
||||||
|
|
||||||
|
def handle_entity(self, match):
|
||||||
|
ent = match.group(1)
|
||||||
|
if ent.startswith(u'#x'):
|
||||||
|
return unichr(int(ent[2:], 16))
|
||||||
|
if ent.startswith(u'#'):
|
||||||
|
return unichr(int(ent[1:]))
|
||||||
|
return unichr(name2codepoint[ent])
|
||||||
|
|
||||||
def add_text(self, text):
|
def add_text(self, text):
|
||||||
s = unicode(text, "utf-16-le")
|
s = unicode(text, "utf-16-le")
|
||||||
if s:
|
if s:
|
||||||
self.content.append(s.translate(self.text_map))
|
s = s.translate(self.text_map)
|
||||||
|
self.content.append(self.entity_pattern.sub(self.handle_entity, s))
|
||||||
|
|
||||||
def end_container(self, tag, stream):
|
def end_container(self, tag, stream):
|
||||||
self.content.append(None)
|
self.content.append(None)
|
||||||
|
@ -12,8 +12,6 @@
|
|||||||
## You should have received a copy of the GNU General Public License along
|
## You should have received a copy of the GNU General Public License along
|
||||||
## with this program; if not, write to the Free Software Foundation, Inc.,
|
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
from libprs500.gui2 import qstring_to_unicode
|
|
||||||
import htmlentitydefs
|
|
||||||
''''''
|
''''''
|
||||||
|
|
||||||
import sys, collections, operator, copy, re
|
import sys, collections, operator, copy, re
|
||||||
@ -25,6 +23,7 @@ from PyQt4.QtGui import QFont, QColor, QPixmap, QGraphicsPixmapItem, \
|
|||||||
from libprs500.ebooks.lrf.fonts import FONT_MAP
|
from libprs500.ebooks.lrf.fonts import FONT_MAP
|
||||||
from libprs500.ebooks.BeautifulSoup import Tag
|
from libprs500.ebooks.BeautifulSoup import Tag
|
||||||
from libprs500.ebooks.hyphenate import hyphenate_word
|
from libprs500.ebooks.hyphenate import hyphenate_word
|
||||||
|
from libprs500.gui2 import qstring_to_unicode
|
||||||
|
|
||||||
WEIGHT_MAP = lambda wt : int((wt/10.)-1)
|
WEIGHT_MAP = lambda wt : int((wt/10.)-1)
|
||||||
NULL = lambda a, b: a
|
NULL = lambda a, b: a
|
||||||
@ -176,7 +175,6 @@ class TextBlock(object):
|
|||||||
has_content = property(fget=lambda self: self.peek_index < len(self.lines)-1)
|
has_content = property(fget=lambda self: self.peek_index < len(self.lines)-1)
|
||||||
XML_ENTITIES = dict(zip(Tag.XML_SPECIAL_CHARS_TO_ENTITIES.values(), Tag.XML_SPECIAL_CHARS_TO_ENTITIES.keys()))
|
XML_ENTITIES = dict(zip(Tag.XML_SPECIAL_CHARS_TO_ENTITIES.values(), Tag.XML_SPECIAL_CHARS_TO_ENTITIES.keys()))
|
||||||
XML_ENTITIES["quot"] = '"'
|
XML_ENTITIES["quot"] = '"'
|
||||||
ENTITY_PATTERN = re.compile('&(\S+);')
|
|
||||||
|
|
||||||
def __init__(self, tb, font_loader, respect_max_y, text_width, logger,
|
def __init__(self, tb, font_loader, respect_max_y, text_width, logger,
|
||||||
opts, ruby_tags, link_activated):
|
opts, ruby_tags, link_activated):
|
||||||
@ -311,18 +309,9 @@ class TextBlock(object):
|
|||||||
self.opts.hyphenate, self.block_id)
|
self.opts.hyphenate, self.block_id)
|
||||||
self.first_line = False
|
self.first_line = False
|
||||||
|
|
||||||
def handle_entity(self, match):
|
|
||||||
ent = match.group(1)
|
|
||||||
if ent.startswith(u'#x'):
|
|
||||||
return unichr(int(ent[2:], 16))
|
|
||||||
if ent.startswith(u'#'):
|
|
||||||
return unichr(int(ent[1:]))
|
|
||||||
return unichr(htmlentitydefs.name2codepoint[ent])
|
|
||||||
|
|
||||||
def process_text(self, raw):
|
def process_text(self, raw):
|
||||||
for ent, rep in TextBlock.XML_ENTITIES.items():
|
for ent, rep in TextBlock.XML_ENTITIES.items():
|
||||||
raw = raw.replace(u'&%s;'%ent, rep)
|
raw = raw.replace(u'&%s;'%ent, rep)
|
||||||
raw = self.__class__.ENTITY_PATTERN.sub(self.handle_entity, raw)
|
|
||||||
while len(raw) > 0:
|
while len(raw) > 0:
|
||||||
if self.current_line is None:
|
if self.current_line is None:
|
||||||
self.create_line()
|
self.create_line()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user