This commit is contained in:
Kovid Goyal 2008-01-14 19:14:39 +00:00
parent 54ea0b12b1
commit 9c75ead5d1
5 changed files with 54 additions and 11 deletions

View File

@ -25,6 +25,7 @@ entry_points = {
'rtf-meta = libprs500.ebooks.metadata.rtf:main',
'pdf-meta = libprs500.ebooks.metadata.pdf:main',
'lit-meta = libprs500.ebooks.metadata.lit:main',
'opf-meta = libprs500.ebooks.metadata.opf:main',
'epub-meta = libprs500.ebooks.metadata.epub:main',
'txt2lrf = libprs500.ebooks.lrf.txt.convert_from:main',
'html2lrf = libprs500.ebooks.lrf.html.convert_from:main',

View File

@ -159,6 +159,7 @@ FLAG_CLOSING = 2
FLAG_BLOCK = 4
FLAG_HEAD = 8
FLAG_ATOM = 16
XML_ENTITIES = ['&', ''', '<', '>', '"']
class UnBinary(object):
def __init__(self, bin, manifest, attr_map=OPF_ATTR_MAP, tag_map=OPF_TAG_MAP,
@ -173,8 +174,28 @@ class UnBinary(object):
self.opf = self.attr_map is OPF_ATTR_MAP
self.bin = bin
self.buf = cStringIO.StringIO()
self.binary_to_text()
self.ampersands = []
self.binary_to_text()
self.raw = self.buf.getvalue().lstrip().decode('utf-8')
self.escape_ampersands()
def escape_ampersands(self):
offset = 0
for pos in self.ampersands:
test = self.raw[pos+offset:pos+offset+6]
if test.startswith('&#') and ';' in test:
continue
escape = True
for ent in XML_ENTITIES:
if test.startswith(ent):
escape = False
break
if not escape:
continue
self.raw = self.raw[:pos+offset] + '&' + self.raw[pos+offset+1:]
offset += 4
def write_spaces(self, depth):
self.buf.write(u' '.join(u'' for i in range(depth)))
@ -185,8 +206,7 @@ class UnBinary(object):
raise LitReadError('Could not find item %s'%(internal_id,))
def __unicode__(self):
raw = self.buf.getvalue().lstrip()
return raw.decode('utf-8')
return self.raw
def binary_to_text(self, base=0, depth=0):
space_enabled, saved_space_enabled = 1, 0
@ -213,6 +233,8 @@ class UnBinary(object):
if c == '\v':
c = '\n'
pending_indent = 0
if c == '&':
self.ampersands.append(self.buf.tell()-1)
self.buf.write(c.encode('utf-8') if isinstance(c, unicode) else c)
elif state == 'get flags':
if ord(c) == 0:
@ -693,7 +715,8 @@ class LitFile(object):
def get_metadata(stream):
try:
litfile = LitFile(stream)
mi = OPFReader(cStringIO.StringIO(litfile.meta.encode('utf-8')))
src = litfile.meta.encode('utf-8')
mi = OPFReader(cStringIO.StringIO(src))
except:
title = stream.name if hasattr(stream, 'name') and stream.name else 'Unknown'
mi = MetaInformation(title, ['Unknown'])
@ -705,7 +728,8 @@ def main(args=sys.argv):
if len(args) != 2:
print >>sys.stderr, 'Usage: %s file.lit'%(args[0],)
return 1
print unicode(get_metadata(open(args[1], 'rb')))
mi = get_metadata(open(args[1], 'rb'))
print unicode(mi)
return 0
if __name__ == '__main__':

View File

@ -22,6 +22,7 @@ import xml.dom.minidom as dom
from libprs500.ebooks.metadata import MetaInformation
from libprs500.ebooks.BeautifulSoup import BeautifulStoneSoup, BeautifulSoup
from libprs500.ebooks.lrf import entity_to_unicode
from libprs500.ebooks.metadata import get_parser
class ManifestItem(object):
def __init__(self, item, cwd):
@ -491,7 +492,25 @@ class OPFCreator(OPF):
self.libprs_id = mi.libprs_id
def main(args=sys.argv):
print OPFReader(open(args[1], 'rb'))
parser = get_parser('opf')
opts, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
return 1
mi = OPFReader(open(args[1], 'rb'))
if opts.title is not None:
mi.title = opts.title.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
if opts.authors is not None:
aus = [i.strip().replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;') for i in opts.authors.split(',')]
mi.authors = aus
if opts.category is not None:
mi.category = opts.category.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
if opts.comment is not None:
mi.comments = opts.comment.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
print mi
res = str(mi.soup)
del mi
open(args[1], 'wb').write(res)
return 0
if __name__ == '__main__':

View File

@ -295,14 +295,12 @@ class Main(MainWindow, Ui_MainWindow):
format = format[1:] if format else None
stream = open(book, 'rb')
mi = get_metadata(stream, stream_type=format)
if not mi.title:
mi.title = os.path.splitext(os.path.basename(book))[0]
title = mi.title if mi.title else os.path.splitext(os.path.basename(book))[0]
formats.append(format)
metadata.append(mi)
names.append(os.path.basename(book))
if not mi.authors:
mi.authors = ['Unknown']
infos.append({'title':mi.title, 'authors':', '.join(mi.authors),
authors = mi.authors if mi.authors else ['Unknown']
infos.append({'title':title, 'authors':', '.join(authors),
'cover':self.default_thumbnail, 'tags':[]})
if not to_device:

View File

@ -98,6 +98,7 @@ def setup_completion():
f.write(opts_and_exts('rtf-meta', metaop, ['rtf']))
f.write(opts_and_exts('pdf-meta', metaop, ['pdf']))
f.write(opts_and_exts('lit-meta', metaop, ['lit']))
f.write(opts_and_exts('opf-meta', metaop, ['opf']))
f.write(opts_and_exts('epub-meta', metaop, ['epub']))
f.write(opts_and_exts('lrfviewer', lrfviewerop, ['lrf']))
f.write('''