From 6037b6c8205bf95f164f2abca24ca80701fe54f4 Mon Sep 17 00:00:00 2001 From: j-howell Date: Wed, 22 Feb 2017 16:42:24 -0500 Subject: [PATCH] Support getting metadata from sideloaded KFX files by treating the main KFX file of a book as the actual book path instead of the 'metadata.kfx' file. Read metadata.kfx only if the main book file is encrypted. Also improve KFX metadata reading in general. --- src/calibre/devices/kindle/driver.py | 45 ++++++++++------------------ src/calibre/devices/usbms/driver.py | 6 ++-- src/calibre/ebooks/metadata/kfx.py | 18 +++++++++-- 3 files changed, 33 insertions(+), 36 deletions(-) diff --git a/src/calibre/devices/kindle/driver.py b/src/calibre/devices/kindle/driver.py index a84e160c67..8d8d19638d 100644 --- a/src/calibre/devices/kindle/driver.py +++ b/src/calibre/devices/kindle/driver.py @@ -38,10 +38,6 @@ file metadata. ''' -def get_kfx_path(path): - return os.path.dirname(os.path.dirname(path)).rpartition('.')[0] + '.kfx' - - class KINDLE(USBMS): name = 'Kindle Device Interface' @@ -86,40 +82,29 @@ class KINDLE(USBMS): ' Click "Show details" to see the list of books.' ) - def is_a_book_file(self, filename, path, prefix): + def is_allowed_book_file(self, filename, path, prefix): lpath = os.path.join(path, filename).partition(self.normalize_path(prefix))[2].replace('\\', '/') - return lpath.endswith('.sdr/assets/metadata.kfx') - - def delete_single_book(self, path): - if path.replace('\\', '/').endswith('.sdr/assets/metadata.kfx'): - kfx_path = get_kfx_path(path) - if DEBUG: - prints('Kindle driver: Attempting to delete kfx: %r -> %r' % (path, kfx_path)) - if os.path.exists(kfx_path): - os.unlink(kfx_path) - sdr_path = kfx_path.rpartition('.')[0] + '.sdr' - if os.path.exists(sdr_path): - shutil.rmtree(sdr_path) - try: - os.removedirs(os.path.dirname(kfx_path)) - except Exception: - pass - - else: - return USBMS.delete_single_book(self, path) - + return '.sdr/' not in lpath + @classmethod def metadata_from_path(cls, path): - if path.replace('\\', '/').endswith('.sdr/assets/metadata.kfx'): + if path.endswith('.kfx'): from calibre.ebooks.metadata.kfx import read_metadata_kfx try: - with lopen(path, 'rb') as f: - mi = read_metadata_kfx(f) + kfx_path = path + with lopen(kfx_path, 'rb') as f: + if f.read(8) != b'\xeaDRMION\xee': + f.seek(0) + mi = read_metadata_kfx(f) + else: + kfx_path = os.path.join(path.rpartition('.')[0] + '.sdr', 'assets', 'metadata.kfx') + with lopen(kfx_path, 'rb') as mf: + mi = read_metadata_kfx(mf) except Exception: import traceback traceback.print_exc() - path = get_kfx_path(path) - mi = cls.metadata_from_formats([get_kfx_path(path)]) + print('failed kfx path=' + kfx_path) + mi = cls.metadata_from_formats([path]) else: mi = cls.metadata_from_formats([path]) if mi.title == _('Unknown') or ('-asin' in mi.title and '-type' in mi.title): diff --git a/src/calibre/devices/usbms/driver.py b/src/calibre/devices/usbms/driver.py index bb23608e1a..c3c57c9e21 100644 --- a/src/calibre/devices/usbms/driver.py +++ b/src/calibre/devices/usbms/driver.py @@ -177,8 +177,8 @@ class USBMS(CLI, Device): def formats_to_scan_for(self): return set(self.settings().format_map) | set(self.FORMATS) - def is_a_book_file(self, filename, path, prefix): - return False + def is_allowed_book_file(self, filename, path, prefix): + return True def books(self, oncard=None, end_session=True): from calibre.ebooks.metadata.meta import path_to_ext @@ -222,7 +222,7 @@ class USBMS(CLI, Device): def update_booklist(filename, path, prefix): changed = False - if path_to_ext(filename) in all_formats or self.is_a_book_file(filename, path, prefix): + if path_to_ext(filename) in all_formats and self.is_allowed_book_file(filename, path, prefix): try: lpath = os.path.join(path, filename).partition(self.normalize_path(prefix))[2] if lpath.startswith(os.sep): diff --git a/src/calibre/ebooks/metadata/kfx.py b/src/calibre/ebooks/metadata/kfx.py index f6bb24557b..e7dc1685d6 100644 --- a/src/calibre/ebooks/metadata/kfx.py +++ b/src/calibre/ebooks/metadata/kfx.py @@ -38,6 +38,7 @@ DT_STRING = 8 # unicode DT_STRUCT = 11 # tuple DT_LIST = 12 # list DT_OBJECT = 13 # dict of property/value pairs +DT_TYPED_DATA = 14 # type, name, value # property names (non-unicode strings to distinguish them from ION strings in this program) # These are place holders. The correct property names are unknown. @@ -52,7 +53,7 @@ METADATA_PROPERTIES = { b'P10' : "languages", b'P153': "title", b'P154': "description", - b'P222': "authors", + b'P222': "author", b'P232': "publisher", } @@ -217,6 +218,12 @@ class PackedIon(PackedData): return result + if data_type == DT_TYPED_DATA: + ion = PackedIon(self.extract(data_len)) + ion.unpack_number() + ion.unpack_number() + return ion.unpack_typed_value() + # ignore unknown types self.advance(data_len) return None @@ -289,7 +296,7 @@ def read_metadata_kfx(stream, read_cover=True): return ans title = get('title') or _('Unknown') - authors = get('authors', False) or [_('Unknown')] + authors = get('author', False) or [_('Unknown')] auth_pat = re.compile(r'([^,]+?)\s*,\s+([^,]+)$') def fix_author(x): @@ -298,8 +305,13 @@ def read_metadata_kfx(stream, read_cover=True): if m is not None: return m.group(2) + ' ' + m.group(1) return x + + unique_authors = [] # remove duplicates while retaining order + for f in [fix_author(x) for x in authors]: + if f not in unique_authors: + unique_authors.append(f) - mi = Metadata(title, [fix_author(x) for x in authors]) + mi = Metadata(title, unique_authors) if has('author'): mi.author_sort = get('author') if has('ASIN'):