This commit is contained in:
Kovid Goyal 2007-10-09 17:26:20 +00:00
parent 9cbe71c384
commit b4d599d118
3 changed files with 23 additions and 18 deletions

View File

@ -18,6 +18,8 @@ The LRF file format is documented at U{http://www.sven.de/librie/Librie/LrfForma
""" """
import sys, os import sys, os
from optparse import OptionParser, OptionValueError from optparse import OptionParser, OptionValueError
from htmlentitydefs import name2codepoint
from ttfquery import describe, findsystem from ttfquery import describe, findsystem
from fontTools.ttLib import TTLibError from fontTools.ttLib import TTLibError
@ -289,3 +291,11 @@ def Book(options, logger, font_delta=0, header=None,
elif not fonts[family].has_key('normal'): elif not fonts[family].has_key('normal'):
raise ConversionError, 'Could not find the normal version of the ' + family + ' font' raise ConversionError, 'Could not find the normal version of the ' + family + ' font'
return book, fonts return book, fonts
def entity_to_unicode(self, match):
ent = match.group(1)
if ent.startswith(u'#x'):
return unichr(int(ent[2:], 16))
if ent.startswith(u'#'):
return unichr(int(ent[1:]))
return unichr(name2codepoint[ent])

View File

@ -12,6 +12,7 @@
## You should have received a copy of the GNU General Public License along ## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc., ## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
from libprs500.ebooks.lrf import entity_to_unicode
import struct, array, zlib, cStringIO, collections, re import struct, array, zlib, cStringIO, collections, re
from htmlentitydefs import name2codepoint from htmlentitydefs import name2codepoint
@ -606,20 +607,11 @@ class Text(LRFStream):
adjustment_map = {1: 'top', 2: 'center', 3: 'baseline', 4: 'bottom'} adjustment_map = {1: 'top', 2: 'center', 3: 'baseline', 4: 'bottom'}
lineposition_map = {1:'before', 2:'after'} lineposition_map = {1:'before', 2:'after'}
def handle_entity(self, match):
ent = match.group(1)
if ent.startswith(u'#x'):
return unichr(int(ent[2:], 16))
if ent.startswith(u'#'):
return unichr(int(ent[1:]))
return unichr(name2codepoint[ent])
def add_text(self, text): def add_text(self, text):
s = unicode(text, "utf-16-le") s = unicode(text, "utf-16-le")
if s: if s:
s = s.translate(self.text_map) s = s.translate(self.text_map)
self.content.append(self.entity_pattern.sub(self.handle_entity, s)) self.content.append(self.entity_pattern.sub(entity_to_unicode, s))
def end_container(self, tag, stream): def end_container(self, tag, stream):
self.content.append(None) self.content.append(None)

View File

@ -14,13 +14,16 @@
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
'''Read/Write metadata from Open Packaging Format (.opf) files.''' '''Read/Write metadata from Open Packaging Format (.opf) files.'''
import sys import sys, re
from libprs500.ebooks.metadata import MetaInformation from libprs500.ebooks.metadata import MetaInformation
from libprs500.ebooks.BeautifulSoup import BeautifulStoneSoup from libprs500.ebooks.BeautifulSoup import BeautifulStoneSoup
from libprs500.ebooks.lrf import entity_to_unicode
class OPFReader(MetaInformation): class OPFReader(MetaInformation):
ENTITY_PATTERN = re.compile(r'&(\S+);')
def __init__(self, stream): def __init__(self, stream):
self.default_title = stream.name if hasattr(stream, 'name') else 'Unknown' self.default_title = stream.name if hasattr(stream, 'name') else 'Unknown'
if hasattr(stream, 'seek'): if hasattr(stream, 'seek'):
@ -34,7 +37,7 @@ class OPFReader(MetaInformation):
def fget(self): def fget(self):
title = self.soup.package.metadata.find('dc:title') title = self.soup.package.metadata.find('dc:title')
if title: if title:
return title.string return self.ENTITY_PATTERN.sub(entity_to_unicode, title.string)
return self.default_title return self.default_title
return property(doc=doc, fget=fget) return property(doc=doc, fget=fget)
@ -52,7 +55,7 @@ class OPFReader(MetaInformation):
ans = [] ans = []
for i in au: for i in au:
ans.extend(i.split('&')) ans.extend(i.split('&'))
return ans return self.ENTITY_PATTERN.sub(entity_to_unicode, ans)
return None return None
return property(doc=doc, fget=fget) return property(doc=doc, fget=fget)
@ -67,7 +70,7 @@ class OPFReader(MetaInformation):
role = elem.get('opf:role') role = elem.get('opf:role')
if role == 'aut': if role == 'aut':
fa = elem.get('file-as') fa = elem.get('file-as')
return fa if fa else None return self.ENTITY_PATTERN.sub(entity_to_unicode, fa) if fa else None
return property(doc=doc, fget=fget) return property(doc=doc, fget=fget)
@apply @apply
@ -83,7 +86,7 @@ class OPFReader(MetaInformation):
def fget(self): def fget(self):
comments = self.soup.find('dc:description') comments = self.soup.find('dc:description')
if comments: if comments:
return comments.string return self.ENTITY_PATTERN.sub(entity_to_unicode, comments.string)
return None return None
return property(doc=doc, fget=fget) return property(doc=doc, fget=fget)
@ -93,7 +96,7 @@ class OPFReader(MetaInformation):
def fget(self): def fget(self):
category = self.soup.find('dc:type') category = self.soup.find('dc:type')
if category: if category:
return category.string return self.ENTITY_PATTERN.sub(entity_to_unicode, category.string)
return None return None
return property(doc=doc, fget=fget) return property(doc=doc, fget=fget)
@ -103,7 +106,7 @@ class OPFReader(MetaInformation):
def fget(self): def fget(self):
publisher = self.soup.find('dc:publisher') publisher = self.soup.find('dc:publisher')
if publisher: if publisher:
return publisher.string return self.ENTITY_PATTERN.sub(entity_to_unicode, publisher.string)
return None return None
return property(doc=doc, fget=fget) return property(doc=doc, fget=fget)