From 371c1bee5babcbc9237a252d9d3da3480e734d51 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 11 Dec 2008 13:47:29 -0800 Subject: [PATCH] Add support for reading metadata from LRX files (thanks to igorsk) --- src/calibre/ebooks/metadata/lrx.py | 89 +++++++++++++++++++++++++++++ src/calibre/ebooks/metadata/meta.py | 5 +- 2 files changed, 92 insertions(+), 2 deletions(-) create mode 100644 src/calibre/ebooks/metadata/lrx.py diff --git a/src/calibre/ebooks/metadata/lrx.py b/src/calibre/ebooks/metadata/lrx.py new file mode 100644 index 0000000000..2106a45450 --- /dev/null +++ b/src/calibre/ebooks/metadata/lrx.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' +__docformat__ = 'restructuredtext en' + +''' +Read metadata from LRX files +''' + +import sys, struct +from zlib import decompress +from lxml import etree + +from calibre.ebooks.metadata import MetaInformation, string_to_authors + +def _read(f, at, amount): + f.seek(at) + return f.read(amount) + +def word_be(buf): + return struct.unpack('>L', buf)[0] + +def word_le(buf): + return struct.unpack('H', buf)[0] + + +def get_metadata(f): + read = lambda at, amount: _read(f, at, amount) + f.seek(0) + buf = f.read(12) + if buf[4:] == 'ftypLRX2': + offset = 0 + while True: + offset += word_be(buf[:4]) + try: + buf = read(offset, 8) + except: + raise ValueError('Not a valid LRX file') + if buf[4:] == 'bbeb': + break + offset += 8 + buf = read(offset, 16) + if buf[:8].decode('utf-16-le') != 'LRF\x00': + raise ValueError('Not a valid LRX file') + lrf_version = word_le(buf[8:12]) + offset += 0x4c + compressed_size = short_le(read(offset, 2)) + offset += 2 + if lrf_version >= 800: + offset += 6 + compressed_size -= 4 + uncompressed_size = word_le(read(offset, 4)) + info = decompress(f.read(compressed_size)) + if len(info) != uncompressed_size: + raise ValueError('LRX file has malformed metadata section') + root = etree.fromstring(info) + bi = root.find('BookInfo') + title = bi.find('Title') + title_sort = title.get('reading', None) + title = title.text + author = bi.find('Author') + author_sort = author.get('reading', None) + mi = MetaInformation(title, string_to_authors(author.text)) + mi.title_sort, mi.author_sort = title_sort, author_sort + author = author.text + publisher = bi.find('Publisher') + mi.publisher = getattr(publisher, 'text', None) + mi.tags = [x.text for x in bi.findall('Category')] + mi.language = root.find('DocInfo').find('Language').text + return mi + + elif buf[4:8] == 'LRX': + raise ValueError('Librie LRX format not supported') + else: + raise ValueError('Not a LRX file') + + +def main(args=sys.argv): + print get_metadata(open(args[1], 'rb')) + return 0 + +if __name__ == '__main__': + sys.exit(main()) \ No newline at end of file diff --git a/src/calibre/ebooks/metadata/meta.py b/src/calibre/ebooks/metadata/meta.py index aff09281da..3264655f9c 100644 --- a/src/calibre/ebooks/metadata/meta.py +++ b/src/calibre/ebooks/metadata/meta.py @@ -16,6 +16,7 @@ from calibre.ebooks.metadata.epub import get_metadata as epub_metadata from calibre.ebooks.metadata.html import get_metadata as html_metadata from calibre.ebooks.mobi.reader import get_metadata as mobi_metadata from calibre.ebooks.metadata.odt import get_metadata as odt_metadata +from calibre.ebooks.metadata.lrx import get_metadata as lrx_metadata from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.metadata.rtf import set_metadata as set_rtf_metadata from calibre.ebooks.lrf.meta import set_metadata as set_lrf_metadata @@ -29,12 +30,12 @@ except OSError: from calibre.libunzip import extract_member as zip_extract_first from calibre.ebooks.metadata import MetaInformation -from calibre.ptempfile import TemporaryDirectory _METADATA_PRIORITIES = [ 'html', 'htm', 'xhtml', 'xhtm', 'rtf', 'fb2', 'pdf', 'prc', 'odt', - 'epub', 'lit', 'lrf', 'mobi', 'rb', 'imp' + 'epub', 'lit', 'lrx', 'lrf', 'mobi', + 'rb', 'imp' ] # The priorities for loading metadata from different file types