mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 18:24:30 -04:00
Add support for reading metadata from LRX files (thanks to igorsk)
This commit is contained in:
parent
f152f37ec0
commit
371c1bee5b
89
src/calibre/ebooks/metadata/lrx.py
Normal file
89
src/calibre/ebooks/metadata/lrx.py
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Read metadata from LRX files
|
||||||
|
'''
|
||||||
|
|
||||||
|
import sys, struct
|
||||||
|
from zlib import decompress
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
||||||
|
|
||||||
|
def _read(f, at, amount):
|
||||||
|
f.seek(at)
|
||||||
|
return f.read(amount)
|
||||||
|
|
||||||
|
def word_be(buf):
|
||||||
|
return struct.unpack('>L', buf)[0]
|
||||||
|
|
||||||
|
def word_le(buf):
|
||||||
|
return struct.unpack('<L', buf)[0]
|
||||||
|
|
||||||
|
def short_le(buf):
|
||||||
|
return struct.unpack('<H', buf)[0]
|
||||||
|
|
||||||
|
def short_be(buf):
|
||||||
|
return struct.unpack('>H', buf)[0]
|
||||||
|
|
||||||
|
|
||||||
|
def get_metadata(f):
|
||||||
|
read = lambda at, amount: _read(f, at, amount)
|
||||||
|
f.seek(0)
|
||||||
|
buf = f.read(12)
|
||||||
|
if buf[4:] == 'ftypLRX2':
|
||||||
|
offset = 0
|
||||||
|
while True:
|
||||||
|
offset += word_be(buf[:4])
|
||||||
|
try:
|
||||||
|
buf = read(offset, 8)
|
||||||
|
except:
|
||||||
|
raise ValueError('Not a valid LRX file')
|
||||||
|
if buf[4:] == 'bbeb':
|
||||||
|
break
|
||||||
|
offset += 8
|
||||||
|
buf = read(offset, 16)
|
||||||
|
if buf[:8].decode('utf-16-le') != 'LRF\x00':
|
||||||
|
raise ValueError('Not a valid LRX file')
|
||||||
|
lrf_version = word_le(buf[8:12])
|
||||||
|
offset += 0x4c
|
||||||
|
compressed_size = short_le(read(offset, 2))
|
||||||
|
offset += 2
|
||||||
|
if lrf_version >= 800:
|
||||||
|
offset += 6
|
||||||
|
compressed_size -= 4
|
||||||
|
uncompressed_size = word_le(read(offset, 4))
|
||||||
|
info = decompress(f.read(compressed_size))
|
||||||
|
if len(info) != uncompressed_size:
|
||||||
|
raise ValueError('LRX file has malformed metadata section')
|
||||||
|
root = etree.fromstring(info)
|
||||||
|
bi = root.find('BookInfo')
|
||||||
|
title = bi.find('Title')
|
||||||
|
title_sort = title.get('reading', None)
|
||||||
|
title = title.text
|
||||||
|
author = bi.find('Author')
|
||||||
|
author_sort = author.get('reading', None)
|
||||||
|
mi = MetaInformation(title, string_to_authors(author.text))
|
||||||
|
mi.title_sort, mi.author_sort = title_sort, author_sort
|
||||||
|
author = author.text
|
||||||
|
publisher = bi.find('Publisher')
|
||||||
|
mi.publisher = getattr(publisher, 'text', None)
|
||||||
|
mi.tags = [x.text for x in bi.findall('Category')]
|
||||||
|
mi.language = root.find('DocInfo').find('Language').text
|
||||||
|
return mi
|
||||||
|
|
||||||
|
elif buf[4:8] == 'LRX':
|
||||||
|
raise ValueError('Librie LRX format not supported')
|
||||||
|
else:
|
||||||
|
raise ValueError('Not a LRX file')
|
||||||
|
|
||||||
|
|
||||||
|
def main(args=sys.argv):
|
||||||
|
print get_metadata(open(args[1], 'rb'))
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
@ -16,6 +16,7 @@ from calibre.ebooks.metadata.epub import get_metadata as epub_metadata
|
|||||||
from calibre.ebooks.metadata.html import get_metadata as html_metadata
|
from calibre.ebooks.metadata.html import get_metadata as html_metadata
|
||||||
from calibre.ebooks.mobi.reader import get_metadata as mobi_metadata
|
from calibre.ebooks.mobi.reader import get_metadata as mobi_metadata
|
||||||
from calibre.ebooks.metadata.odt import get_metadata as odt_metadata
|
from calibre.ebooks.metadata.odt import get_metadata as odt_metadata
|
||||||
|
from calibre.ebooks.metadata.lrx import get_metadata as lrx_metadata
|
||||||
from calibre.ebooks.metadata.opf2 import OPF
|
from calibre.ebooks.metadata.opf2 import OPF
|
||||||
from calibre.ebooks.metadata.rtf import set_metadata as set_rtf_metadata
|
from calibre.ebooks.metadata.rtf import set_metadata as set_rtf_metadata
|
||||||
from calibre.ebooks.lrf.meta import set_metadata as set_lrf_metadata
|
from calibre.ebooks.lrf.meta import set_metadata as set_lrf_metadata
|
||||||
@ -29,12 +30,12 @@ except OSError:
|
|||||||
from calibre.libunzip import extract_member as zip_extract_first
|
from calibre.libunzip import extract_member as zip_extract_first
|
||||||
|
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
|
||||||
|
|
||||||
_METADATA_PRIORITIES = [
|
_METADATA_PRIORITIES = [
|
||||||
'html', 'htm', 'xhtml', 'xhtm',
|
'html', 'htm', 'xhtml', 'xhtm',
|
||||||
'rtf', 'fb2', 'pdf', 'prc', 'odt',
|
'rtf', 'fb2', 'pdf', 'prc', 'odt',
|
||||||
'epub', 'lit', 'lrf', 'mobi', 'rb', 'imp'
|
'epub', 'lit', 'lrx', 'lrf', 'mobi',
|
||||||
|
'rb', 'imp'
|
||||||
]
|
]
|
||||||
|
|
||||||
# The priorities for loading metadata from different file types
|
# The priorities for loading metadata from different file types
|
||||||
|
Loading…
x
Reference in New Issue
Block a user