Add support for reading metadata from LRX files (thanks to igorsk)

2025-08-11 09:13:57 -04:00 · 2008-12-11 13:47:29 -08:00 · 2008-12-11 13:47:29 -08:00 · 371c1bee5b
commit 371c1bee5b
parent f152f37ec0
2 changed files with 92 additions and 2 deletions
--- a/src/calibre/ebooks/metadata/lrx.py
+++ b/src/calibre/ebooks/metadata/lrx.py
@ -0,0 +1,89 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 '''
 Read metadata from LRX files
 '''
 import sys, struct
 from zlib import decompress
 from lxml import etree
 from calibre.ebooks.metadata import MetaInformation, string_to_authors
 def _read(f, at, amount):
    f.seek(at)
    return f.read(amount)
 def word_be(buf):
    return struct.unpack('>L', buf)[0]
 def word_le(buf):
    return struct.unpack('<L', buf)[0]
 def short_le(buf):
    return struct.unpack('<H', buf)[0]
 def short_be(buf):
    return struct.unpack('>H', buf)[0]
 def get_metadata(f):
    read = lambda at, amount: _read(f, at, amount)
    f.seek(0)
    buf = f.read(12) 
    if buf[4:] == 'ftypLRX2':
        offset = 0
        while True:
            offset += word_be(buf[:4])
            try:
                buf = read(offset, 8)
            except:
                raise ValueError('Not a valid LRX file')
            if buf[4:] == 'bbeb':
                break
        offset += 8
        buf = read(offset, 16)
        if buf[:8].decode('utf-16-le') != 'LRF\x00':
            raise ValueError('Not a valid LRX file')
        lrf_version = word_le(buf[8:12])
        offset += 0x4c
        compressed_size = short_le(read(offset, 2))
        offset += 2
        if lrf_version >= 800:
            offset += 6
        compressed_size -= 4
        uncompressed_size = word_le(read(offset, 4))
        info = decompress(f.read(compressed_size))
        if len(info) != uncompressed_size:
            raise ValueError('LRX file has malformed metadata section')
        root = etree.fromstring(info)
        bi = root.find('BookInfo')
        title = bi.find('Title')
        title_sort = title.get('reading', None)
        title = title.text
        author = bi.find('Author')
        author_sort = author.get('reading', None)
        mi = MetaInformation(title, string_to_authors(author.text))
        mi.title_sort, mi.author_sort = title_sort, author_sort
        author = author.text
        publisher = bi.find('Publisher')
        mi.publisher = getattr(publisher, 'text', None)
        mi.tags = [x.text for x in  bi.findall('Category')]
        mi.language = root.find('DocInfo').find('Language').text
        return mi
    elif buf[4:8] == 'LRX':
        raise ValueError('Librie LRX format not supported')
    else:
        raise ValueError('Not a LRX file')
 def main(args=sys.argv):
    print get_metadata(open(args[1], 'rb'))
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/metadata/meta.py
+++ b/src/calibre/ebooks/metadata/meta.py
@ -16,6 +16,7 @@ from calibre.ebooks.metadata.epub import get_metadata as epub_metadata
 from calibre.ebooks.metadata.html import get_metadata as html_metadata
 from calibre.ebooks.mobi.reader   import get_metadata as mobi_metadata
 from calibre.ebooks.metadata.odt  import get_metadata as odt_metadata
 from calibre.ebooks.metadata.lrx  import get_metadata as lrx_metadata 
 from calibre.ebooks.metadata.opf2 import OPF
 from calibre.ebooks.metadata.rtf  import set_metadata as set_rtf_metadata
 from calibre.ebooks.lrf.meta      import set_metadata as set_lrf_metadata
@ -29,12 +30,12 @@ except OSError:
 from calibre.libunzip import extract_member as zip_extract_first
 from calibre.ebooks.metadata import MetaInformation
 from calibre.ptempfile import TemporaryDirectory
 _METADATA_PRIORITIES = [
                       'html', 'htm', 'xhtml', 'xhtm',
                       'rtf', 'fb2', 'pdf', 'prc', 'odt',
-                       'epub', 'lit', 'lrf', 'mobi', 'rb', 'imp'
+                       'epub', 'lit', 'lrx', 'lrf', 'mobi', 
                       'rb', 'imp'
                      ]
 # The priorities for loading metadata from different file types