diff --git a/setup.py b/setup.py index 45abce7d0b..a4af61bd55 100644 --- a/setup.py +++ b/setup.py @@ -25,6 +25,7 @@ entry_points = { 'rtf-meta = libprs500.ebooks.metadata.rtf:main', 'pdf-meta = libprs500.ebooks.metadata.pdf:main', 'lit-meta = libprs500.ebooks.metadata.lit:main', + 'opf-meta = libprs500.ebooks.metadata.opf:main', 'epub-meta = libprs500.ebooks.metadata.epub:main', 'txt2lrf = libprs500.ebooks.lrf.txt.convert_from:main', 'html2lrf = libprs500.ebooks.lrf.html.convert_from:main', diff --git a/src/libprs500/ebooks/metadata/lit.py b/src/libprs500/ebooks/metadata/lit.py index f6ce59bab5..1c1a7e0220 100644 --- a/src/libprs500/ebooks/metadata/lit.py +++ b/src/libprs500/ebooks/metadata/lit.py @@ -159,6 +159,7 @@ FLAG_CLOSING = 2 FLAG_BLOCK = 4 FLAG_HEAD = 8 FLAG_ATOM = 16 +XML_ENTITIES = ['&', ''', '<', '>', '"'] class UnBinary(object): def __init__(self, bin, manifest, attr_map=OPF_ATTR_MAP, tag_map=OPF_TAG_MAP, @@ -173,8 +174,28 @@ class UnBinary(object): self.opf = self.attr_map is OPF_ATTR_MAP self.bin = bin self.buf = cStringIO.StringIO() - self.binary_to_text() + self.ampersands = [] + self.binary_to_text() + self.raw = self.buf.getvalue().lstrip().decode('utf-8') + self.escape_ampersands() + def escape_ampersands(self): + offset = 0 + for pos in self.ampersands: + test = self.raw[pos+offset:pos+offset+6] + if test.startswith('&#') and ';' in test: + continue + escape = True + for ent in XML_ENTITIES: + if test.startswith(ent): + escape = False + break + if not escape: + continue + self.raw = self.raw[:pos+offset] + '&' + self.raw[pos+offset+1:] + offset += 4 + + def write_spaces(self, depth): self.buf.write(u' '.join(u'' for i in range(depth))) @@ -185,8 +206,7 @@ class UnBinary(object): raise LitReadError('Could not find item %s'%(internal_id,)) def __unicode__(self): - raw = self.buf.getvalue().lstrip() - return raw.decode('utf-8') + return self.raw def binary_to_text(self, base=0, depth=0): space_enabled, saved_space_enabled = 1, 0 @@ -213,6 +233,8 @@ class UnBinary(object): if c == '\v': c = '\n' pending_indent = 0 + if c == '&': + self.ampersands.append(self.buf.tell()-1) self.buf.write(c.encode('utf-8') if isinstance(c, unicode) else c) elif state == 'get flags': if ord(c) == 0: @@ -693,7 +715,8 @@ class LitFile(object): def get_metadata(stream): try: litfile = LitFile(stream) - mi = OPFReader(cStringIO.StringIO(litfile.meta.encode('utf-8'))) + src = litfile.meta.encode('utf-8') + mi = OPFReader(cStringIO.StringIO(src)) except: title = stream.name if hasattr(stream, 'name') and stream.name else 'Unknown' mi = MetaInformation(title, ['Unknown']) @@ -705,7 +728,8 @@ def main(args=sys.argv): if len(args) != 2: print >>sys.stderr, 'Usage: %s file.lit'%(args[0],) return 1 - print unicode(get_metadata(open(args[1], 'rb'))) + mi = get_metadata(open(args[1], 'rb')) + print unicode(mi) return 0 if __name__ == '__main__': diff --git a/src/libprs500/ebooks/metadata/opf.py b/src/libprs500/ebooks/metadata/opf.py index 7f0eee751a..966a60f3e6 100644 --- a/src/libprs500/ebooks/metadata/opf.py +++ b/src/libprs500/ebooks/metadata/opf.py @@ -22,6 +22,7 @@ import xml.dom.minidom as dom from libprs500.ebooks.metadata import MetaInformation from libprs500.ebooks.BeautifulSoup import BeautifulStoneSoup, BeautifulSoup from libprs500.ebooks.lrf import entity_to_unicode +from libprs500.ebooks.metadata import get_parser class ManifestItem(object): def __init__(self, item, cwd): @@ -491,7 +492,25 @@ class OPFCreator(OPF): self.libprs_id = mi.libprs_id def main(args=sys.argv): - print OPFReader(open(args[1], 'rb')) + parser = get_parser('opf') + opts, args = parser.parse_args(args) + if len(args) != 2: + parser.print_help() + return 1 + mi = OPFReader(open(args[1], 'rb')) + if opts.title is not None: + mi.title = opts.title.replace('&', '&').replace('<', '<').replace('>', '>') + if opts.authors is not None: + aus = [i.strip().replace('&', '&').replace('<', '<').replace('>', '>') for i in opts.authors.split(',')] + mi.authors = aus + if opts.category is not None: + mi.category = opts.category.replace('&', '&').replace('<', '<').replace('>', '>') + if opts.comment is not None: + mi.comments = opts.comment.replace('&', '&').replace('<', '<').replace('>', '>') + print mi + res = str(mi.soup) + del mi + open(args[1], 'wb').write(res) return 0 if __name__ == '__main__': diff --git a/src/libprs500/gui2/main.py b/src/libprs500/gui2/main.py index 0a75e49981..e2e9cbd134 100644 --- a/src/libprs500/gui2/main.py +++ b/src/libprs500/gui2/main.py @@ -295,14 +295,12 @@ class Main(MainWindow, Ui_MainWindow): format = format[1:] if format else None stream = open(book, 'rb') mi = get_metadata(stream, stream_type=format) - if not mi.title: - mi.title = os.path.splitext(os.path.basename(book))[0] + title = mi.title if mi.title else os.path.splitext(os.path.basename(book))[0] formats.append(format) metadata.append(mi) names.append(os.path.basename(book)) - if not mi.authors: - mi.authors = ['Unknown'] - infos.append({'title':mi.title, 'authors':', '.join(mi.authors), + authors = mi.authors if mi.authors else ['Unknown'] + infos.append({'title':title, 'authors':', '.join(authors), 'cover':self.default_thumbnail, 'tags':[]}) if not to_device: diff --git a/src/libprs500/linux.py b/src/libprs500/linux.py index 268fa48a07..3873e1700e 100644 --- a/src/libprs500/linux.py +++ b/src/libprs500/linux.py @@ -98,6 +98,7 @@ def setup_completion(): f.write(opts_and_exts('rtf-meta', metaop, ['rtf'])) f.write(opts_and_exts('pdf-meta', metaop, ['pdf'])) f.write(opts_and_exts('lit-meta', metaop, ['lit'])) + f.write(opts_and_exts('opf-meta', metaop, ['opf'])) f.write(opts_and_exts('epub-meta', metaop, ['epub'])) f.write(opts_and_exts('lrfviewer', lrfviewerop, ['lrf'])) f.write('''