From f2445ad87aab1c1c1864a431d64ef194d19d46e5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 30 Mar 2023 13:41:20 +0530 Subject: [PATCH] Get rid of author_link_map Also implement support for link_maps metadata in EPUB3 and PDF. Reading proceeds by returning the link_maps if present, otherwise look for a legacy author_link_map and return a link_maps constructed from it. Writing simply sets link_maps, which as per the sceme above takes precedence anyway. --- src/calibre/db/cache.py | 3 -- src/calibre/db/lazy.py | 2 +- src/calibre/db/restore.py | 39 +++++++++-------- src/calibre/db/tests/base.py | 2 +- src/calibre/db/tests/legacy.py | 1 - src/calibre/ebooks/metadata/book/__init__.py | 4 +- src/calibre/ebooks/metadata/book/base.py | 1 - src/calibre/ebooks/metadata/opf2.py | 44 ++++++++++++++++---- src/calibre/ebooks/metadata/opf3.py | 27 +++++++++--- src/calibre/ebooks/metadata/opf3_test.py | 30 ++++++------- src/calibre/ebooks/metadata/xmp.py | 13 ++++-- src/calibre/utils/formatter_functions.py | 5 ++- 12 files changed, 112 insertions(+), 59 deletions(-) diff --git a/src/calibre/db/cache.py b/src/calibre/db/cache.py index aac848f7d5..dbf91f73de 100644 --- a/src/calibre/db/cache.py +++ b/src/calibre/db/cache.py @@ -325,19 +325,16 @@ class Cache: aut_list = [adata[i] for i in author_ids] aum = [] aus = {} - aul = {} for rec in aut_list: aut = rec['name'] aum.append(aut) aus[aut] = rec['sort'] - aul[aut] = rec['link'] mi.title = self._field_for('title', book_id, default_value=_('Unknown')) mi.authors = aum mi.author_sort = self._field_for('author_sort', book_id, default_value=_('Unknown')) mi.author_sort_map = aus - mi.author_link_map = aul mi.comments = self._field_for('comments', book_id) mi.publisher = self._field_for('publisher', book_id) n = utcnow() diff --git a/src/calibre/db/lazy.py b/src/calibre/db/lazy.py index 01053ccb8c..07a73a81aa 100644 --- a/src/calibre/db/lazy.py +++ b/src/calibre/db/lazy.py @@ -315,7 +315,7 @@ getters = { for field in ('comments', 'publisher', 'identifiers', 'series', 'rating'): getters[field] = simple_getter(field) -for field in ('author_sort_map', 'author_link_map'): +for field in ('author_sort_map',): getters[field] = adata_getter(field) for field in ('timestamp', 'pubdate', 'last_modified'): diff --git a/src/calibre/db/restore.py b/src/calibre/db/restore.py index 726804f2a1..2e03435713 100644 --- a/src/calibre/db/restore.py +++ b/src/calibre/db/restore.py @@ -5,18 +5,22 @@ __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import re, os, traceback, shutil, time -from threading import Thread +import os +import re +import shutil +import time +import traceback +from contextlib import suppress from operator import itemgetter +from threading import Thread -from calibre.ptempfile import TemporaryDirectory -from calibre.ebooks.metadata.opf2 import OPF +from calibre import force_unicode, isbytestring +from calibre.constants import filesystem_encoding from calibre.db.backend import DB, DBPrefs from calibre.db.cache import Cache -from calibre.constants import filesystem_encoding +from calibre.ebooks.metadata.opf2 import OPF +from calibre.ptempfile import TemporaryDirectory from calibre.utils.date import utcfromtimestamp -from calibre import isbytestring, force_unicode -from polyglot.builtins import iteritems NON_EBOOK_EXTENSIONS = frozenset(( 'jpg', 'jpeg', 'gif', 'png', 'bmp', @@ -59,7 +63,7 @@ class Restore(Thread): self.mismatched_dirs = [] self.successes = 0 self.tb = None - self.authors_links = {} + self.link_maps = {} @property def errors_occurred(self): @@ -209,11 +213,13 @@ class Restore(Thread): else: self.mismatched_dirs.append(dirpath) - alm = mi.get('author_link_map', {}) - for author, link in iteritems(alm): - existing_link, timestamp = self.authors_links.get(author, (None, None)) - if existing_link is None or existing_link != link and timestamp < mi.timestamp: - self.authors_links[author] = (link, mi.timestamp) + alm = mi.get('link_maps', {}) + for field, lmap in alm.items(): + dest = self.link_maps.setdefault(field, {}) + for item, link in lmap.items(): + existing_link, timestamp = dest.get(item, (None, None)) + if existing_link is None or existing_link != link and timestamp < mi.timestamp: + dest[item] = link, mi.timestamp def create_cc_metadata(self): self.books.sort(key=itemgetter('timestamp')) @@ -262,10 +268,9 @@ class Restore(Thread): self.failed_restores.append((book, traceback.format_exc())) self.progress_callback(book['mi'].title, i+1) - id_map = db.get_item_ids('authors', [author for author in self.authors_links]) - link_map = {aid:self.authors_links[name][0] for name, aid in iteritems(id_map) if aid is not None} - if link_map: - db.set_link_for_authors(link_map) + for field, lmap in self.link_maps.items(): + with suppress(Exception): + db.set_link_map(field, {k:v[0] for k, v in lmap.items()}) db.close() def replace_db(self): diff --git a/src/calibre/db/tests/base.py b/src/calibre/db/tests/base.py index 3ccdf5b643..010f1d744e 100644 --- a/src/calibre/db/tests/base.py +++ b/src/calibre/db/tests/base.py @@ -102,7 +102,7 @@ class BaseTest(unittest.TestCase): self.assertEqual(allfk1, allfk2) all_keys = {'format_metadata', 'id', 'application_id', - 'author_sort_map', 'author_link_map', 'book_size', + 'author_sort_map', 'link_maps', 'book_size', 'ondevice_col', 'last_modified', 'has_cover', 'cover_data'}.union(allfk1) for attr in all_keys: diff --git a/src/calibre/db/tests/legacy.py b/src/calibre/db/tests/legacy.py index 2ba66b3ae8..1a7b3b9e81 100644 --- a/src/calibre/db/tests/legacy.py +++ b/src/calibre/db/tests/legacy.py @@ -575,7 +575,6 @@ class LegacyTest(BaseTest): omi = [db.get_metadata(x) for x in (0, 1, 2)] nmi = [ndb.get_metadata(x) for x in (0, 1, 2)] self.assertEqual([x.author_sort_map for x in omi], [x.author_sort_map for x in nmi]) - self.assertEqual([x.author_link_map for x in omi], [x.author_link_map for x in nmi]) db.close() ndb = self.init_legacy(self.cloned_library) diff --git a/src/calibre/ebooks/metadata/book/__init__.py b/src/calibre/ebooks/metadata/book/__init__.py index 6835319590..93cc9207eb 100644 --- a/src/calibre/ebooks/metadata/book/__init__.py +++ b/src/calibre/ebooks/metadata/book/__init__.py @@ -85,8 +85,8 @@ CALIBRE_METADATA_FIELDS = frozenset(( # a dict of user category names, where the value is a list of item names # from the book that are in that category 'user_categories', - # a dict of author to an associated hyperlink - 'author_link_map', + # a dict of items to associated hyperlink + 'link_maps', )) ALL_METADATA_FIELDS = SOCIAL_METADATA_FIELDS.union( diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py index df7fcad934..b28ed647ef 100644 --- a/src/calibre/ebooks/metadata/book/base.py +++ b/src/calibre/ebooks/metadata/book/base.py @@ -45,7 +45,6 @@ NULL_VALUES = { 'author_sort' : _('Unknown'), 'title' : _('Unknown'), 'user_categories' : {}, - 'author_link_map' : {}, 'link_maps' : {}, 'language' : 'und' } diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py index b56eb178f4..58b9928bf4 100644 --- a/src/calibre/ebooks/metadata/opf2.py +++ b/src/calibre/ebooks/metadata/opf2.py @@ -17,6 +17,7 @@ import os import re import sys import uuid +from contextlib import suppress from lxml import etree from calibre import guess_type, prints @@ -458,6 +459,37 @@ class MetadataField: obj.set_text(elem, self.renderer(val)) +class LinkMapsField: + + def __get__(self, obj, type=None): + ans = obj.get_metadata_element('link_maps') + if ans is not None: + ans = obj.get_text(ans) + if ans: + with suppress(Exception): + return json.loads(ans) + ans = obj.get_metadata_element('author_link_map') + if ans is not None: + ans = obj.get_text(ans) + if ans: + with suppress(Exception): + return {'authors': json.loads(ans)} + return {} + + def __set__(self, obj, val): + elem = obj.get_metadata_element('author_link_map') + if elem is not None: + elem.getparent().remove(elem) + elem = obj.get_metadata_element('link_maps') + if not val: + if elem is not None: + elem.getparent().remove(elem) + return + if elem is None: + elem = obj.create_metadata_element('link_maps', is_dc=False) + obj.set_text(elem, dump_dict(val)) + + class TitleSortField(MetadataField): def __get__(self, obj, type=None): @@ -593,10 +625,7 @@ class OPF: # {{{ user_categories = MetadataField('user_categories', is_dc=False, formatter=json.loads, renderer=dump_dict) - author_link_map = MetadataField('author_link_map', is_dc=False, - formatter=json.loads, renderer=dump_dict) - link_map = MetadataField('link_maps', is_dc=False, - formatter=json.loads, renderer=dump_dict) + link_maps = LinkMapsField() def __init__(self, stream, basedir=os.getcwd(), unquote_urls=True, populate_spine=True, try_to_guess_cover=False, preparsed_opf=None, read_toc=True): @@ -667,6 +696,7 @@ class OPF: # {{{ ans.set_user_metadata(n, v) ans.set_identifiers(self.get_identifiers()) + ans.link_maps = self.link_maps return ans @@ -1312,7 +1342,7 @@ class OPF: # {{{ for attr in ('title', 'authors', 'author_sort', 'title_sort', 'publisher', 'series', 'series_index', 'rating', 'isbn', 'tags', 'category', 'comments', 'book_producer', - 'pubdate', 'user_categories', 'author_link_map', 'link_map'): + 'pubdate', 'user_categories', 'link_maps'): val = getattr(mi, attr, None) if attr == 'rating' and val: val = float(val) @@ -1673,9 +1703,7 @@ def metadata_to_opf(mi, as_string=True, default_lang=None): def meta(n, c): return factory('meta', name='calibre:' + n, content=c) - if getattr(mi, 'author_link_map', None) is not None: - meta('author_link_map', dump_dict(mi.author_link_map)) - if getattr(mi, 'link_maps', None) is not None: + if not mi.is_null('link_maps'): meta('link_maps', dump_dict(mi.link_maps)) if mi.series: meta('series', mi.series) diff --git a/src/calibre/ebooks/metadata/opf3.py b/src/calibre/ebooks/metadata/opf3.py index 4261b7a423..98f3f4a31b 100644 --- a/src/calibre/ebooks/metadata/opf3.py +++ b/src/calibre/ebooks/metadata/opf3.py @@ -865,16 +865,31 @@ def dict_reader(name, load=json.loads, try2=True): read_user_categories = dict_reader('user_categories') -read_author_link_map = dict_reader('author_link_map') +_read_link_maps = dict_reader('link_maps') +_read_author_link_map = dict_reader('author_link_map') -def dict_writer(name, serialize=dump_dict, remove2=True): +def read_link_maps(root, prefixes, refines): + ans = _read_link_maps(root, prefixes, refines) + if ans is not None: + return ans + ans = _read_author_link_map(root, prefixes, refines) + if ans: + ans = {k: v for k, v in ans.items() if v} + if ans: + return {'authors': ans} + + +def dict_writer(name, serialize=dump_dict, remove2=True, extra_remove=''): pq = f'{CALIBRE_PREFIX}:{name}' def writer(root, prefixes, refines, val): if remove2: for meta in XPath('./opf:metadata/opf:meta[@name="calibre:%s"]' % name)(root): remove_element(meta, refines) + if extra_remove: + for meta in XPath('./opf:metadata/opf:meta[@name="calibre:%s"]' % extra_remove)(root): + remove_element(meta, refines) for meta in XPath('./opf:metadata/opf:meta[@property]')(root): prop = expand_prefix(meta.get('property'), prefixes) if prop.lower() == pq: @@ -889,7 +904,7 @@ def dict_writer(name, serialize=dump_dict, remove2=True): set_user_categories = dict_writer('user_categories') -set_author_link_map = dict_writer('author_link_map') +set_link_maps = dict_writer('link_maps', extra_remove='author_link_map') def deserialize_user_metadata(val): @@ -1054,7 +1069,7 @@ def read_metadata(root, ver=None, return_extra_data=False): s, si = read_series(root, prefixes, refines) if s: ans.series, ans.series_index = s, si - ans.author_link_map = read_author_link_map(root, prefixes, refines) or ans.author_link_map + ans.link_maps = read_link_maps(root, prefixes, refines) or ans.link_maps ans.user_categories = read_user_categories(root, prefixes, refines) or ans.user_categories for name, fm in iteritems(read_user_metadata(root, prefixes, refines) or {}): ans.set_user_metadata(name, fm) @@ -1105,8 +1120,8 @@ def apply_metadata(root, mi, cover_prefix='', cover_data=None, apply_null=False, set_rating(root, prefixes, refines, mi.rating) if ok('series'): set_series(root, prefixes, refines, mi.series, mi.series_index or 1) - if ok('author_link_map'): - set_author_link_map(root, prefixes, refines, getattr(mi, 'author_link_map', None)) + if ok('link_maps'): + set_link_maps(root, prefixes, refines, getattr(mi, 'link_maps', None)) if ok('user_categories'): set_user_categories(root, prefixes, refines, getattr(mi, 'user_categories', None)) # We ignore apply_null for the next two to match the behavior with opf2.py diff --git a/src/calibre/ebooks/metadata/opf3_test.py b/src/calibre/ebooks/metadata/opf3_test.py index 3dd8f0657e..db06ad5f78 100644 --- a/src/calibre/ebooks/metadata/opf3_test.py +++ b/src/calibre/ebooks/metadata/opf3_test.py @@ -2,29 +2,29 @@ # License: GPLv3 Copyright: 2016, Kovid Goyal +import unittest from collections import defaultdict from io import BytesIO -import unittest from calibre.ebooks.metadata.book import ALL_METADATA_FIELDS -from calibre.utils.xml_parse import safe_xml_fromstring from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.metadata.opf3 import ( - parse_prefixes, reserved_prefixes, expand_prefix, read_identifiers, - read_metadata, set_identifiers, XPath, set_application_id, read_title, - read_refines, set_title, read_title_sort, read_languages, set_languages, - read_authors, Author, set_authors, ensure_prefix, read_prefixes, - read_book_producers, set_book_producers, read_timestamp, set_timestamp, - read_pubdate, set_pubdate, CALIBRE_PREFIX, read_last_modified, read_comments, - set_comments, read_publisher, set_publisher, read_tags, set_tags, read_rating, - set_rating, read_series, set_series, read_user_metadata, set_user_metadata, - read_author_link_map, read_user_categories, set_author_link_map, set_user_categories, - apply_metadata, read_raster_cover, ensure_is_only_raster_cover + CALIBRE_PREFIX, Author, XPath, apply_metadata, ensure_is_only_raster_cover, + ensure_prefix, expand_prefix, parse_prefixes, read_authors, read_book_producers, + read_comments, read_identifiers, read_languages, read_last_modified, read_link_maps, + read_metadata, read_prefixes, read_pubdate, read_publisher, read_raster_cover, + read_rating, read_refines, read_series, read_tags, read_timestamp, read_title, + read_title_sort, read_user_categories, read_user_metadata, reserved_prefixes, + set_application_id, set_authors, set_book_producers, set_comments, set_identifiers, + set_languages, set_link_maps, set_pubdate, set_publisher, set_rating, set_series, + set_tags, set_timestamp, set_title, set_user_categories, set_user_metadata, ) + # This import is needed to prevent a test from running slowly from calibre.ebooks.oeb.polish.pretty import pretty_opf, pretty_xml_tree # noqa +from calibre.utils.xml_parse import safe_xml_fromstring -read_author_link_map, read_user_categories, set_author_link_map, set_user_categories +read_user_categories, set_user_categories, read_link_maps, set_link_maps TEMPLATE = '''{metadata}{manifest}''' % CALIBRE_PREFIX # noqa default_refines = defaultdict(list) @@ -288,7 +288,7 @@ class TestOPF3(unittest.TestCase): f = globals()['set_' + name] f(root, read_prefixes(root), read_refines(root), val) return rt(root, name) - for name in 'author_link_map user_categories'.split(): + for name in 'link_maps user_categories'.split(): root = self.get_opf('''''' % name) self.ae({'1':1}, rt(root, name)) root = self.get_opf(f'''{{"2":2}}''') @@ -328,7 +328,7 @@ class TestOPF3(unittest.TestCase): conversion docs ebook - + diff --git a/src/calibre/ebooks/metadata/xmp.py b/src/calibre/ebooks/metadata/xmp.py index 17f8715665..b7c72992d3 100644 --- a/src/calibre/ebooks/metadata/xmp.py +++ b/src/calibre/ebooks/metadata/xmp.py @@ -289,13 +289,20 @@ def metadata_from_xmp_packet(raw_bytes): if val: setattr(mi, x, val) break - for x in ('author_link_map', 'user_categories'): + for x in ('link_maps', 'user_categories'): val = first_simple('//calibre:'+x, root) if val: try: setattr(mi, x, json.loads(val)) - except: + except Exception: pass + elif x == 'link_maps': + val = first_simple('//calibre:author_link_map', root) + if val: + try: + setattr(mi, x, {'authors': json.loads(val)}) + except Exception: + pass languages = multiple_sequences('//dc:language', root) if languages: @@ -526,7 +533,7 @@ def metadata_to_xmp_packet(mi): create_series(calibre, mi.series, mi.series_index) if not mi.is_null('timestamp'): create_simple_property(calibre, 'calibre:timestamp', isoformat(mi.timestamp, as_utc=False)) - for x in ('author_link_map', 'user_categories'): + for x in ('link_maps', 'user_categories'): val = getattr(mi, x, None) if val: create_simple_property(calibre, 'calibre:'+x, dump_dict(val)) diff --git a/src/calibre/utils/formatter_functions.py b/src/calibre/utils/formatter_functions.py index 30540ca2d4..89e0150693 100644 --- a/src/calibre/utils/formatter_functions.py +++ b/src/calibre/utils/formatter_functions.py @@ -2014,7 +2014,10 @@ class BuiltinAuthorLinks(BuiltinFormatterFunction): def evaluate(self, formatter, kwargs, mi, locals, val_sep, pair_sep): if hasattr(mi, '_proxy_metadata'): - link_data = mi._proxy_metadata.author_link_map + link_data = mi._proxy_metadata.link_maps + if not link_data: + return '' + link_data = link_data.get('authors') if not link_data: return '' names = sorted(link_data.keys(), key=sort_key)