Get rid of author_link_map

Also implement support for link_maps metadata in EPUB3 and PDF.
Reading proceeds by returning the link_maps if present, otherwise look
for a legacy author_link_map and return a link_maps constructed from it.

Writing simply sets link_maps, which as per the sceme above takes
precedence anyway.
This commit is contained in:
Kovid Goyal 2023-03-30 13:41:20 +05:30
parent 44001e2c60
commit f2445ad87a
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
12 changed files with 112 additions and 59 deletions

View File

@ -325,19 +325,16 @@ class Cache:
aut_list = [adata[i] for i in author_ids] aut_list = [adata[i] for i in author_ids]
aum = [] aum = []
aus = {} aus = {}
aul = {}
for rec in aut_list: for rec in aut_list:
aut = rec['name'] aut = rec['name']
aum.append(aut) aum.append(aut)
aus[aut] = rec['sort'] aus[aut] = rec['sort']
aul[aut] = rec['link']
mi.title = self._field_for('title', book_id, mi.title = self._field_for('title', book_id,
default_value=_('Unknown')) default_value=_('Unknown'))
mi.authors = aum mi.authors = aum
mi.author_sort = self._field_for('author_sort', book_id, mi.author_sort = self._field_for('author_sort', book_id,
default_value=_('Unknown')) default_value=_('Unknown'))
mi.author_sort_map = aus mi.author_sort_map = aus
mi.author_link_map = aul
mi.comments = self._field_for('comments', book_id) mi.comments = self._field_for('comments', book_id)
mi.publisher = self._field_for('publisher', book_id) mi.publisher = self._field_for('publisher', book_id)
n = utcnow() n = utcnow()

View File

@ -315,7 +315,7 @@ getters = {
for field in ('comments', 'publisher', 'identifiers', 'series', 'rating'): for field in ('comments', 'publisher', 'identifiers', 'series', 'rating'):
getters[field] = simple_getter(field) getters[field] = simple_getter(field)
for field in ('author_sort_map', 'author_link_map'): for field in ('author_sort_map',):
getters[field] = adata_getter(field) getters[field] = adata_getter(field)
for field in ('timestamp', 'pubdate', 'last_modified'): for field in ('timestamp', 'pubdate', 'last_modified'):

View File

@ -5,18 +5,22 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re, os, traceback, shutil, time import os
from threading import Thread import re
import shutil
import time
import traceback
from contextlib import suppress
from operator import itemgetter from operator import itemgetter
from threading import Thread
from calibre.ptempfile import TemporaryDirectory from calibre import force_unicode, isbytestring
from calibre.ebooks.metadata.opf2 import OPF from calibre.constants import filesystem_encoding
from calibre.db.backend import DB, DBPrefs from calibre.db.backend import DB, DBPrefs
from calibre.db.cache import Cache from calibre.db.cache import Cache
from calibre.constants import filesystem_encoding from calibre.ebooks.metadata.opf2 import OPF
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.date import utcfromtimestamp from calibre.utils.date import utcfromtimestamp
from calibre import isbytestring, force_unicode
from polyglot.builtins import iteritems
NON_EBOOK_EXTENSIONS = frozenset(( NON_EBOOK_EXTENSIONS = frozenset((
'jpg', 'jpeg', 'gif', 'png', 'bmp', 'jpg', 'jpeg', 'gif', 'png', 'bmp',
@ -59,7 +63,7 @@ class Restore(Thread):
self.mismatched_dirs = [] self.mismatched_dirs = []
self.successes = 0 self.successes = 0
self.tb = None self.tb = None
self.authors_links = {} self.link_maps = {}
@property @property
def errors_occurred(self): def errors_occurred(self):
@ -209,11 +213,13 @@ class Restore(Thread):
else: else:
self.mismatched_dirs.append(dirpath) self.mismatched_dirs.append(dirpath)
alm = mi.get('author_link_map', {}) alm = mi.get('link_maps', {})
for author, link in iteritems(alm): for field, lmap in alm.items():
existing_link, timestamp = self.authors_links.get(author, (None, None)) dest = self.link_maps.setdefault(field, {})
for item, link in lmap.items():
existing_link, timestamp = dest.get(item, (None, None))
if existing_link is None or existing_link != link and timestamp < mi.timestamp: if existing_link is None or existing_link != link and timestamp < mi.timestamp:
self.authors_links[author] = (link, mi.timestamp) dest[item] = link, mi.timestamp
def create_cc_metadata(self): def create_cc_metadata(self):
self.books.sort(key=itemgetter('timestamp')) self.books.sort(key=itemgetter('timestamp'))
@ -262,10 +268,9 @@ class Restore(Thread):
self.failed_restores.append((book, traceback.format_exc())) self.failed_restores.append((book, traceback.format_exc()))
self.progress_callback(book['mi'].title, i+1) self.progress_callback(book['mi'].title, i+1)
id_map = db.get_item_ids('authors', [author for author in self.authors_links]) for field, lmap in self.link_maps.items():
link_map = {aid:self.authors_links[name][0] for name, aid in iteritems(id_map) if aid is not None} with suppress(Exception):
if link_map: db.set_link_map(field, {k:v[0] for k, v in lmap.items()})
db.set_link_for_authors(link_map)
db.close() db.close()
def replace_db(self): def replace_db(self):

View File

@ -102,7 +102,7 @@ class BaseTest(unittest.TestCase):
self.assertEqual(allfk1, allfk2) self.assertEqual(allfk1, allfk2)
all_keys = {'format_metadata', 'id', 'application_id', all_keys = {'format_metadata', 'id', 'application_id',
'author_sort_map', 'author_link_map', 'book_size', 'author_sort_map', 'link_maps', 'book_size',
'ondevice_col', 'last_modified', 'has_cover', 'ondevice_col', 'last_modified', 'has_cover',
'cover_data'}.union(allfk1) 'cover_data'}.union(allfk1)
for attr in all_keys: for attr in all_keys:

View File

@ -575,7 +575,6 @@ class LegacyTest(BaseTest):
omi = [db.get_metadata(x) for x in (0, 1, 2)] omi = [db.get_metadata(x) for x in (0, 1, 2)]
nmi = [ndb.get_metadata(x) for x in (0, 1, 2)] nmi = [ndb.get_metadata(x) for x in (0, 1, 2)]
self.assertEqual([x.author_sort_map for x in omi], [x.author_sort_map for x in nmi]) self.assertEqual([x.author_sort_map for x in omi], [x.author_sort_map for x in nmi])
self.assertEqual([x.author_link_map for x in omi], [x.author_link_map for x in nmi])
db.close() db.close()
ndb = self.init_legacy(self.cloned_library) ndb = self.init_legacy(self.cloned_library)

View File

@ -85,8 +85,8 @@ CALIBRE_METADATA_FIELDS = frozenset((
# a dict of user category names, where the value is a list of item names # a dict of user category names, where the value is a list of item names
# from the book that are in that category # from the book that are in that category
'user_categories', 'user_categories',
# a dict of author to an associated hyperlink # a dict of items to associated hyperlink
'author_link_map', 'link_maps',
)) ))
ALL_METADATA_FIELDS = SOCIAL_METADATA_FIELDS.union( ALL_METADATA_FIELDS = SOCIAL_METADATA_FIELDS.union(

View File

@ -45,7 +45,6 @@ NULL_VALUES = {
'author_sort' : _('Unknown'), 'author_sort' : _('Unknown'),
'title' : _('Unknown'), 'title' : _('Unknown'),
'user_categories' : {}, 'user_categories' : {},
'author_link_map' : {},
'link_maps' : {}, 'link_maps' : {},
'language' : 'und' 'language' : 'und'
} }

View File

@ -17,6 +17,7 @@ import os
import re import re
import sys import sys
import uuid import uuid
from contextlib import suppress
from lxml import etree from lxml import etree
from calibre import guess_type, prints from calibre import guess_type, prints
@ -458,6 +459,37 @@ class MetadataField:
obj.set_text(elem, self.renderer(val)) obj.set_text(elem, self.renderer(val))
class LinkMapsField:
def __get__(self, obj, type=None):
ans = obj.get_metadata_element('link_maps')
if ans is not None:
ans = obj.get_text(ans)
if ans:
with suppress(Exception):
return json.loads(ans)
ans = obj.get_metadata_element('author_link_map')
if ans is not None:
ans = obj.get_text(ans)
if ans:
with suppress(Exception):
return {'authors': json.loads(ans)}
return {}
def __set__(self, obj, val):
elem = obj.get_metadata_element('author_link_map')
if elem is not None:
elem.getparent().remove(elem)
elem = obj.get_metadata_element('link_maps')
if not val:
if elem is not None:
elem.getparent().remove(elem)
return
if elem is None:
elem = obj.create_metadata_element('link_maps', is_dc=False)
obj.set_text(elem, dump_dict(val))
class TitleSortField(MetadataField): class TitleSortField(MetadataField):
def __get__(self, obj, type=None): def __get__(self, obj, type=None):
@ -593,10 +625,7 @@ class OPF: # {{{
user_categories = MetadataField('user_categories', is_dc=False, user_categories = MetadataField('user_categories', is_dc=False,
formatter=json.loads, formatter=json.loads,
renderer=dump_dict) renderer=dump_dict)
author_link_map = MetadataField('author_link_map', is_dc=False, link_maps = LinkMapsField()
formatter=json.loads, renderer=dump_dict)
link_map = MetadataField('link_maps', is_dc=False,
formatter=json.loads, renderer=dump_dict)
def __init__(self, stream, basedir=os.getcwd(), unquote_urls=True, def __init__(self, stream, basedir=os.getcwd(), unquote_urls=True,
populate_spine=True, try_to_guess_cover=False, preparsed_opf=None, read_toc=True): populate_spine=True, try_to_guess_cover=False, preparsed_opf=None, read_toc=True):
@ -667,6 +696,7 @@ class OPF: # {{{
ans.set_user_metadata(n, v) ans.set_user_metadata(n, v)
ans.set_identifiers(self.get_identifiers()) ans.set_identifiers(self.get_identifiers())
ans.link_maps = self.link_maps
return ans return ans
@ -1312,7 +1342,7 @@ class OPF: # {{{
for attr in ('title', 'authors', 'author_sort', 'title_sort', for attr in ('title', 'authors', 'author_sort', 'title_sort',
'publisher', 'series', 'series_index', 'rating', 'publisher', 'series', 'series_index', 'rating',
'isbn', 'tags', 'category', 'comments', 'book_producer', 'isbn', 'tags', 'category', 'comments', 'book_producer',
'pubdate', 'user_categories', 'author_link_map', 'link_map'): 'pubdate', 'user_categories', 'link_maps'):
val = getattr(mi, attr, None) val = getattr(mi, attr, None)
if attr == 'rating' and val: if attr == 'rating' and val:
val = float(val) val = float(val)
@ -1673,9 +1703,7 @@ def metadata_to_opf(mi, as_string=True, default_lang=None):
def meta(n, c): def meta(n, c):
return factory('meta', name='calibre:' + n, content=c) return factory('meta', name='calibre:' + n, content=c)
if getattr(mi, 'author_link_map', None) is not None: if not mi.is_null('link_maps'):
meta('author_link_map', dump_dict(mi.author_link_map))
if getattr(mi, 'link_maps', None) is not None:
meta('link_maps', dump_dict(mi.link_maps)) meta('link_maps', dump_dict(mi.link_maps))
if mi.series: if mi.series:
meta('series', mi.series) meta('series', mi.series)

View File

@ -865,16 +865,31 @@ def dict_reader(name, load=json.loads, try2=True):
read_user_categories = dict_reader('user_categories') read_user_categories = dict_reader('user_categories')
read_author_link_map = dict_reader('author_link_map') _read_link_maps = dict_reader('link_maps')
_read_author_link_map = dict_reader('author_link_map')
def dict_writer(name, serialize=dump_dict, remove2=True): def read_link_maps(root, prefixes, refines):
ans = _read_link_maps(root, prefixes, refines)
if ans is not None:
return ans
ans = _read_author_link_map(root, prefixes, refines)
if ans:
ans = {k: v for k, v in ans.items() if v}
if ans:
return {'authors': ans}
def dict_writer(name, serialize=dump_dict, remove2=True, extra_remove=''):
pq = f'{CALIBRE_PREFIX}:{name}' pq = f'{CALIBRE_PREFIX}:{name}'
def writer(root, prefixes, refines, val): def writer(root, prefixes, refines, val):
if remove2: if remove2:
for meta in XPath('./opf:metadata/opf:meta[@name="calibre:%s"]' % name)(root): for meta in XPath('./opf:metadata/opf:meta[@name="calibre:%s"]' % name)(root):
remove_element(meta, refines) remove_element(meta, refines)
if extra_remove:
for meta in XPath('./opf:metadata/opf:meta[@name="calibre:%s"]' % extra_remove)(root):
remove_element(meta, refines)
for meta in XPath('./opf:metadata/opf:meta[@property]')(root): for meta in XPath('./opf:metadata/opf:meta[@property]')(root):
prop = expand_prefix(meta.get('property'), prefixes) prop = expand_prefix(meta.get('property'), prefixes)
if prop.lower() == pq: if prop.lower() == pq:
@ -889,7 +904,7 @@ def dict_writer(name, serialize=dump_dict, remove2=True):
set_user_categories = dict_writer('user_categories') set_user_categories = dict_writer('user_categories')
set_author_link_map = dict_writer('author_link_map') set_link_maps = dict_writer('link_maps', extra_remove='author_link_map')
def deserialize_user_metadata(val): def deserialize_user_metadata(val):
@ -1054,7 +1069,7 @@ def read_metadata(root, ver=None, return_extra_data=False):
s, si = read_series(root, prefixes, refines) s, si = read_series(root, prefixes, refines)
if s: if s:
ans.series, ans.series_index = s, si ans.series, ans.series_index = s, si
ans.author_link_map = read_author_link_map(root, prefixes, refines) or ans.author_link_map ans.link_maps = read_link_maps(root, prefixes, refines) or ans.link_maps
ans.user_categories = read_user_categories(root, prefixes, refines) or ans.user_categories ans.user_categories = read_user_categories(root, prefixes, refines) or ans.user_categories
for name, fm in iteritems(read_user_metadata(root, prefixes, refines) or {}): for name, fm in iteritems(read_user_metadata(root, prefixes, refines) or {}):
ans.set_user_metadata(name, fm) ans.set_user_metadata(name, fm)
@ -1105,8 +1120,8 @@ def apply_metadata(root, mi, cover_prefix='', cover_data=None, apply_null=False,
set_rating(root, prefixes, refines, mi.rating) set_rating(root, prefixes, refines, mi.rating)
if ok('series'): if ok('series'):
set_series(root, prefixes, refines, mi.series, mi.series_index or 1) set_series(root, prefixes, refines, mi.series, mi.series_index or 1)
if ok('author_link_map'): if ok('link_maps'):
set_author_link_map(root, prefixes, refines, getattr(mi, 'author_link_map', None)) set_link_maps(root, prefixes, refines, getattr(mi, 'link_maps', None))
if ok('user_categories'): if ok('user_categories'):
set_user_categories(root, prefixes, refines, getattr(mi, 'user_categories', None)) set_user_categories(root, prefixes, refines, getattr(mi, 'user_categories', None))
# We ignore apply_null for the next two to match the behavior with opf2.py # We ignore apply_null for the next two to match the behavior with opf2.py

View File

@ -2,29 +2,29 @@
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net> # License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
import unittest
from collections import defaultdict from collections import defaultdict
from io import BytesIO from io import BytesIO
import unittest
from calibre.ebooks.metadata.book import ALL_METADATA_FIELDS from calibre.ebooks.metadata.book import ALL_METADATA_FIELDS
from calibre.utils.xml_parse import safe_xml_fromstring
from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.metadata.opf2 import OPF
from calibre.ebooks.metadata.opf3 import ( from calibre.ebooks.metadata.opf3 import (
parse_prefixes, reserved_prefixes, expand_prefix, read_identifiers, CALIBRE_PREFIX, Author, XPath, apply_metadata, ensure_is_only_raster_cover,
read_metadata, set_identifiers, XPath, set_application_id, read_title, ensure_prefix, expand_prefix, parse_prefixes, read_authors, read_book_producers,
read_refines, set_title, read_title_sort, read_languages, set_languages, read_comments, read_identifiers, read_languages, read_last_modified, read_link_maps,
read_authors, Author, set_authors, ensure_prefix, read_prefixes, read_metadata, read_prefixes, read_pubdate, read_publisher, read_raster_cover,
read_book_producers, set_book_producers, read_timestamp, set_timestamp, read_rating, read_refines, read_series, read_tags, read_timestamp, read_title,
read_pubdate, set_pubdate, CALIBRE_PREFIX, read_last_modified, read_comments, read_title_sort, read_user_categories, read_user_metadata, reserved_prefixes,
set_comments, read_publisher, set_publisher, read_tags, set_tags, read_rating, set_application_id, set_authors, set_book_producers, set_comments, set_identifiers,
set_rating, read_series, set_series, read_user_metadata, set_user_metadata, set_languages, set_link_maps, set_pubdate, set_publisher, set_rating, set_series,
read_author_link_map, read_user_categories, set_author_link_map, set_user_categories, set_tags, set_timestamp, set_title, set_user_categories, set_user_metadata,
apply_metadata, read_raster_cover, ensure_is_only_raster_cover
) )
# This import is needed to prevent a test from running slowly # This import is needed to prevent a test from running slowly
from calibre.ebooks.oeb.polish.pretty import pretty_opf, pretty_xml_tree # noqa from calibre.ebooks.oeb.polish.pretty import pretty_opf, pretty_xml_tree # noqa
from calibre.utils.xml_parse import safe_xml_fromstring
read_author_link_map, read_user_categories, set_author_link_map, set_user_categories read_user_categories, set_user_categories, read_link_maps, set_link_maps
TEMPLATE = '''<package xmlns="http://www.idpf.org/2007/opf" version="3.0" prefix="calibre: %s" unique-identifier="uid"><metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">{metadata}</metadata><manifest>{manifest}</manifest></package>''' % CALIBRE_PREFIX # noqa TEMPLATE = '''<package xmlns="http://www.idpf.org/2007/opf" version="3.0" prefix="calibre: %s" unique-identifier="uid"><metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">{metadata}</metadata><manifest>{manifest}</manifest></package>''' % CALIBRE_PREFIX # noqa
default_refines = defaultdict(list) default_refines = defaultdict(list)
@ -288,7 +288,7 @@ class TestOPF3(unittest.TestCase):
f = globals()['set_' + name] f = globals()['set_' + name]
f(root, read_prefixes(root), read_refines(root), val) f(root, read_prefixes(root), read_refines(root), val)
return rt(root, name) return rt(root, name)
for name in 'author_link_map user_categories'.split(): for name in 'link_maps user_categories'.split():
root = self.get_opf('''<meta name="calibre:%s" content='{"1":1}'/>''' % name) root = self.get_opf('''<meta name="calibre:%s" content='{"1":1}'/>''' % name)
self.ae({'1':1}, rt(root, name)) self.ae({'1':1}, rt(root, name))
root = self.get_opf(f'''<meta name="calibre:{name}" content='{{"1":1}}'/><meta property="calibre:{name}">{{"2":2}}</meta>''') root = self.get_opf(f'''<meta name="calibre:{name}" content='{{"1":1}}'/><meta property="calibre:{name}">{{"2":2}}</meta>''')
@ -328,7 +328,7 @@ class TestOPF3(unittest.TestCase):
<dc:subject>conversion</dc:subject> <dc:subject>conversion</dc:subject>
<dc:subject>docs</dc:subject> <dc:subject>docs</dc:subject>
<dc:subject>ebook</dc:subject> <dc:subject>ebook</dc:subject>
<meta content="{&quot;Kovid Goyal&quot;: &quot;&quot;}" name="calibre:author_link_map"/> <meta content="{&quot;Kovid Goyal&quot;: &quot;https://kovidgoyal.net&quot;}" name="calibre:author_link_map"/>
<meta content="Demos" name="calibre:series"/> <meta content="Demos" name="calibre:series"/>
<meta content="1" name="calibre:series_index"/> <meta content="1" name="calibre:series_index"/>
<meta content="10" name="calibre:rating"/> <meta content="10" name="calibre:rating"/>

View File

@ -289,12 +289,19 @@ def metadata_from_xmp_packet(raw_bytes):
if val: if val:
setattr(mi, x, val) setattr(mi, x, val)
break break
for x in ('author_link_map', 'user_categories'): for x in ('link_maps', 'user_categories'):
val = first_simple('//calibre:'+x, root) val = first_simple('//calibre:'+x, root)
if val: if val:
try: try:
setattr(mi, x, json.loads(val)) setattr(mi, x, json.loads(val))
except: except Exception:
pass
elif x == 'link_maps':
val = first_simple('//calibre:author_link_map', root)
if val:
try:
setattr(mi, x, {'authors': json.loads(val)})
except Exception:
pass pass
languages = multiple_sequences('//dc:language', root) languages = multiple_sequences('//dc:language', root)
@ -526,7 +533,7 @@ def metadata_to_xmp_packet(mi):
create_series(calibre, mi.series, mi.series_index) create_series(calibre, mi.series, mi.series_index)
if not mi.is_null('timestamp'): if not mi.is_null('timestamp'):
create_simple_property(calibre, 'calibre:timestamp', isoformat(mi.timestamp, as_utc=False)) create_simple_property(calibre, 'calibre:timestamp', isoformat(mi.timestamp, as_utc=False))
for x in ('author_link_map', 'user_categories'): for x in ('link_maps', 'user_categories'):
val = getattr(mi, x, None) val = getattr(mi, x, None)
if val: if val:
create_simple_property(calibre, 'calibre:'+x, dump_dict(val)) create_simple_property(calibre, 'calibre:'+x, dump_dict(val))

View File

@ -2014,7 +2014,10 @@ class BuiltinAuthorLinks(BuiltinFormatterFunction):
def evaluate(self, formatter, kwargs, mi, locals, val_sep, pair_sep): def evaluate(self, formatter, kwargs, mi, locals, val_sep, pair_sep):
if hasattr(mi, '_proxy_metadata'): if hasattr(mi, '_proxy_metadata'):
link_data = mi._proxy_metadata.author_link_map link_data = mi._proxy_metadata.link_maps
if not link_data:
return ''
link_data = link_data.get('authors')
if not link_data: if not link_data:
return '' return ''
names = sorted(link_data.keys(), key=sort_key) names = sorted(link_data.keys(), key=sort_key)