Get rid of author_link_map

Also implement support for link_maps metadata in EPUB3 and PDF.
Reading proceeds by returning the link_maps if present, otherwise look
for a legacy author_link_map and return a link_maps constructed from it.

Writing simply sets link_maps, which as per the sceme above takes
precedence anyway.
This commit is contained in:
Kovid Goyal 2023-03-30 13:41:20 +05:30
parent 44001e2c60
commit f2445ad87a
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
12 changed files with 112 additions and 59 deletions

View File

@ -325,19 +325,16 @@ class Cache:
aut_list = [adata[i] for i in author_ids]
aum = []
aus = {}
aul = {}
for rec in aut_list:
aut = rec['name']
aum.append(aut)
aus[aut] = rec['sort']
aul[aut] = rec['link']
mi.title = self._field_for('title', book_id,
default_value=_('Unknown'))
mi.authors = aum
mi.author_sort = self._field_for('author_sort', book_id,
default_value=_('Unknown'))
mi.author_sort_map = aus
mi.author_link_map = aul
mi.comments = self._field_for('comments', book_id)
mi.publisher = self._field_for('publisher', book_id)
n = utcnow()

View File

@ -315,7 +315,7 @@ getters = {
for field in ('comments', 'publisher', 'identifiers', 'series', 'rating'):
getters[field] = simple_getter(field)
for field in ('author_sort_map', 'author_link_map'):
for field in ('author_sort_map',):
getters[field] = adata_getter(field)
for field in ('timestamp', 'pubdate', 'last_modified'):

View File

@ -5,18 +5,22 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re, os, traceback, shutil, time
from threading import Thread
import os
import re
import shutil
import time
import traceback
from contextlib import suppress
from operator import itemgetter
from threading import Thread
from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks.metadata.opf2 import OPF
from calibre import force_unicode, isbytestring
from calibre.constants import filesystem_encoding
from calibre.db.backend import DB, DBPrefs
from calibre.db.cache import Cache
from calibre.constants import filesystem_encoding
from calibre.ebooks.metadata.opf2 import OPF
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.date import utcfromtimestamp
from calibre import isbytestring, force_unicode
from polyglot.builtins import iteritems
NON_EBOOK_EXTENSIONS = frozenset((
'jpg', 'jpeg', 'gif', 'png', 'bmp',
@ -59,7 +63,7 @@ class Restore(Thread):
self.mismatched_dirs = []
self.successes = 0
self.tb = None
self.authors_links = {}
self.link_maps = {}
@property
def errors_occurred(self):
@ -209,11 +213,13 @@ class Restore(Thread):
else:
self.mismatched_dirs.append(dirpath)
alm = mi.get('author_link_map', {})
for author, link in iteritems(alm):
existing_link, timestamp = self.authors_links.get(author, (None, None))
alm = mi.get('link_maps', {})
for field, lmap in alm.items():
dest = self.link_maps.setdefault(field, {})
for item, link in lmap.items():
existing_link, timestamp = dest.get(item, (None, None))
if existing_link is None or existing_link != link and timestamp < mi.timestamp:
self.authors_links[author] = (link, mi.timestamp)
dest[item] = link, mi.timestamp
def create_cc_metadata(self):
self.books.sort(key=itemgetter('timestamp'))
@ -262,10 +268,9 @@ class Restore(Thread):
self.failed_restores.append((book, traceback.format_exc()))
self.progress_callback(book['mi'].title, i+1)
id_map = db.get_item_ids('authors', [author for author in self.authors_links])
link_map = {aid:self.authors_links[name][0] for name, aid in iteritems(id_map) if aid is not None}
if link_map:
db.set_link_for_authors(link_map)
for field, lmap in self.link_maps.items():
with suppress(Exception):
db.set_link_map(field, {k:v[0] for k, v in lmap.items()})
db.close()
def replace_db(self):

View File

@ -102,7 +102,7 @@ class BaseTest(unittest.TestCase):
self.assertEqual(allfk1, allfk2)
all_keys = {'format_metadata', 'id', 'application_id',
'author_sort_map', 'author_link_map', 'book_size',
'author_sort_map', 'link_maps', 'book_size',
'ondevice_col', 'last_modified', 'has_cover',
'cover_data'}.union(allfk1)
for attr in all_keys:

View File

@ -575,7 +575,6 @@ class LegacyTest(BaseTest):
omi = [db.get_metadata(x) for x in (0, 1, 2)]
nmi = [ndb.get_metadata(x) for x in (0, 1, 2)]
self.assertEqual([x.author_sort_map for x in omi], [x.author_sort_map for x in nmi])
self.assertEqual([x.author_link_map for x in omi], [x.author_link_map for x in nmi])
db.close()
ndb = self.init_legacy(self.cloned_library)

View File

@ -85,8 +85,8 @@ CALIBRE_METADATA_FIELDS = frozenset((
# a dict of user category names, where the value is a list of item names
# from the book that are in that category
'user_categories',
# a dict of author to an associated hyperlink
'author_link_map',
# a dict of items to associated hyperlink
'link_maps',
))
ALL_METADATA_FIELDS = SOCIAL_METADATA_FIELDS.union(

View File

@ -45,7 +45,6 @@ NULL_VALUES = {
'author_sort' : _('Unknown'),
'title' : _('Unknown'),
'user_categories' : {},
'author_link_map' : {},
'link_maps' : {},
'language' : 'und'
}

View File

@ -17,6 +17,7 @@ import os
import re
import sys
import uuid
from contextlib import suppress
from lxml import etree
from calibre import guess_type, prints
@ -458,6 +459,37 @@ class MetadataField:
obj.set_text(elem, self.renderer(val))
class LinkMapsField:
def __get__(self, obj, type=None):
ans = obj.get_metadata_element('link_maps')
if ans is not None:
ans = obj.get_text(ans)
if ans:
with suppress(Exception):
return json.loads(ans)
ans = obj.get_metadata_element('author_link_map')
if ans is not None:
ans = obj.get_text(ans)
if ans:
with suppress(Exception):
return {'authors': json.loads(ans)}
return {}
def __set__(self, obj, val):
elem = obj.get_metadata_element('author_link_map')
if elem is not None:
elem.getparent().remove(elem)
elem = obj.get_metadata_element('link_maps')
if not val:
if elem is not None:
elem.getparent().remove(elem)
return
if elem is None:
elem = obj.create_metadata_element('link_maps', is_dc=False)
obj.set_text(elem, dump_dict(val))
class TitleSortField(MetadataField):
def __get__(self, obj, type=None):
@ -593,10 +625,7 @@ class OPF: # {{{
user_categories = MetadataField('user_categories', is_dc=False,
formatter=json.loads,
renderer=dump_dict)
author_link_map = MetadataField('author_link_map', is_dc=False,
formatter=json.loads, renderer=dump_dict)
link_map = MetadataField('link_maps', is_dc=False,
formatter=json.loads, renderer=dump_dict)
link_maps = LinkMapsField()
def __init__(self, stream, basedir=os.getcwd(), unquote_urls=True,
populate_spine=True, try_to_guess_cover=False, preparsed_opf=None, read_toc=True):
@ -667,6 +696,7 @@ class OPF: # {{{
ans.set_user_metadata(n, v)
ans.set_identifiers(self.get_identifiers())
ans.link_maps = self.link_maps
return ans
@ -1312,7 +1342,7 @@ class OPF: # {{{
for attr in ('title', 'authors', 'author_sort', 'title_sort',
'publisher', 'series', 'series_index', 'rating',
'isbn', 'tags', 'category', 'comments', 'book_producer',
'pubdate', 'user_categories', 'author_link_map', 'link_map'):
'pubdate', 'user_categories', 'link_maps'):
val = getattr(mi, attr, None)
if attr == 'rating' and val:
val = float(val)
@ -1673,9 +1703,7 @@ def metadata_to_opf(mi, as_string=True, default_lang=None):
def meta(n, c):
return factory('meta', name='calibre:' + n, content=c)
if getattr(mi, 'author_link_map', None) is not None:
meta('author_link_map', dump_dict(mi.author_link_map))
if getattr(mi, 'link_maps', None) is not None:
if not mi.is_null('link_maps'):
meta('link_maps', dump_dict(mi.link_maps))
if mi.series:
meta('series', mi.series)

View File

@ -865,16 +865,31 @@ def dict_reader(name, load=json.loads, try2=True):
read_user_categories = dict_reader('user_categories')
read_author_link_map = dict_reader('author_link_map')
_read_link_maps = dict_reader('link_maps')
_read_author_link_map = dict_reader('author_link_map')
def dict_writer(name, serialize=dump_dict, remove2=True):
def read_link_maps(root, prefixes, refines):
ans = _read_link_maps(root, prefixes, refines)
if ans is not None:
return ans
ans = _read_author_link_map(root, prefixes, refines)
if ans:
ans = {k: v for k, v in ans.items() if v}
if ans:
return {'authors': ans}
def dict_writer(name, serialize=dump_dict, remove2=True, extra_remove=''):
pq = f'{CALIBRE_PREFIX}:{name}'
def writer(root, prefixes, refines, val):
if remove2:
for meta in XPath('./opf:metadata/opf:meta[@name="calibre:%s"]' % name)(root):
remove_element(meta, refines)
if extra_remove:
for meta in XPath('./opf:metadata/opf:meta[@name="calibre:%s"]' % extra_remove)(root):
remove_element(meta, refines)
for meta in XPath('./opf:metadata/opf:meta[@property]')(root):
prop = expand_prefix(meta.get('property'), prefixes)
if prop.lower() == pq:
@ -889,7 +904,7 @@ def dict_writer(name, serialize=dump_dict, remove2=True):
set_user_categories = dict_writer('user_categories')
set_author_link_map = dict_writer('author_link_map')
set_link_maps = dict_writer('link_maps', extra_remove='author_link_map')
def deserialize_user_metadata(val):
@ -1054,7 +1069,7 @@ def read_metadata(root, ver=None, return_extra_data=False):
s, si = read_series(root, prefixes, refines)
if s:
ans.series, ans.series_index = s, si
ans.author_link_map = read_author_link_map(root, prefixes, refines) or ans.author_link_map
ans.link_maps = read_link_maps(root, prefixes, refines) or ans.link_maps
ans.user_categories = read_user_categories(root, prefixes, refines) or ans.user_categories
for name, fm in iteritems(read_user_metadata(root, prefixes, refines) or {}):
ans.set_user_metadata(name, fm)
@ -1105,8 +1120,8 @@ def apply_metadata(root, mi, cover_prefix='', cover_data=None, apply_null=False,
set_rating(root, prefixes, refines, mi.rating)
if ok('series'):
set_series(root, prefixes, refines, mi.series, mi.series_index or 1)
if ok('author_link_map'):
set_author_link_map(root, prefixes, refines, getattr(mi, 'author_link_map', None))
if ok('link_maps'):
set_link_maps(root, prefixes, refines, getattr(mi, 'link_maps', None))
if ok('user_categories'):
set_user_categories(root, prefixes, refines, getattr(mi, 'user_categories', None))
# We ignore apply_null for the next two to match the behavior with opf2.py

View File

@ -2,29 +2,29 @@
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
import unittest
from collections import defaultdict
from io import BytesIO
import unittest
from calibre.ebooks.metadata.book import ALL_METADATA_FIELDS
from calibre.utils.xml_parse import safe_xml_fromstring
from calibre.ebooks.metadata.opf2 import OPF
from calibre.ebooks.metadata.opf3 import (
parse_prefixes, reserved_prefixes, expand_prefix, read_identifiers,
read_metadata, set_identifiers, XPath, set_application_id, read_title,
read_refines, set_title, read_title_sort, read_languages, set_languages,
read_authors, Author, set_authors, ensure_prefix, read_prefixes,
read_book_producers, set_book_producers, read_timestamp, set_timestamp,
read_pubdate, set_pubdate, CALIBRE_PREFIX, read_last_modified, read_comments,
set_comments, read_publisher, set_publisher, read_tags, set_tags, read_rating,
set_rating, read_series, set_series, read_user_metadata, set_user_metadata,
read_author_link_map, read_user_categories, set_author_link_map, set_user_categories,
apply_metadata, read_raster_cover, ensure_is_only_raster_cover
CALIBRE_PREFIX, Author, XPath, apply_metadata, ensure_is_only_raster_cover,
ensure_prefix, expand_prefix, parse_prefixes, read_authors, read_book_producers,
read_comments, read_identifiers, read_languages, read_last_modified, read_link_maps,
read_metadata, read_prefixes, read_pubdate, read_publisher, read_raster_cover,
read_rating, read_refines, read_series, read_tags, read_timestamp, read_title,
read_title_sort, read_user_categories, read_user_metadata, reserved_prefixes,
set_application_id, set_authors, set_book_producers, set_comments, set_identifiers,
set_languages, set_link_maps, set_pubdate, set_publisher, set_rating, set_series,
set_tags, set_timestamp, set_title, set_user_categories, set_user_metadata,
)
# This import is needed to prevent a test from running slowly
from calibre.ebooks.oeb.polish.pretty import pretty_opf, pretty_xml_tree # noqa
from calibre.utils.xml_parse import safe_xml_fromstring
read_author_link_map, read_user_categories, set_author_link_map, set_user_categories
read_user_categories, set_user_categories, read_link_maps, set_link_maps
TEMPLATE = '''<package xmlns="http://www.idpf.org/2007/opf" version="3.0" prefix="calibre: %s" unique-identifier="uid"><metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">{metadata}</metadata><manifest>{manifest}</manifest></package>''' % CALIBRE_PREFIX # noqa
default_refines = defaultdict(list)
@ -288,7 +288,7 @@ class TestOPF3(unittest.TestCase):
f = globals()['set_' + name]
f(root, read_prefixes(root), read_refines(root), val)
return rt(root, name)
for name in 'author_link_map user_categories'.split():
for name in 'link_maps user_categories'.split():
root = self.get_opf('''<meta name="calibre:%s" content='{"1":1}'/>''' % name)
self.ae({'1':1}, rt(root, name))
root = self.get_opf(f'''<meta name="calibre:{name}" content='{{"1":1}}'/><meta property="calibre:{name}">{{"2":2}}</meta>''')
@ -328,7 +328,7 @@ class TestOPF3(unittest.TestCase):
<dc:subject>conversion</dc:subject>
<dc:subject>docs</dc:subject>
<dc:subject>ebook</dc:subject>
<meta content="{&quot;Kovid Goyal&quot;: &quot;&quot;}" name="calibre:author_link_map"/>
<meta content="{&quot;Kovid Goyal&quot;: &quot;https://kovidgoyal.net&quot;}" name="calibre:author_link_map"/>
<meta content="Demos" name="calibre:series"/>
<meta content="1" name="calibre:series_index"/>
<meta content="10" name="calibre:rating"/>

View File

@ -289,12 +289,19 @@ def metadata_from_xmp_packet(raw_bytes):
if val:
setattr(mi, x, val)
break
for x in ('author_link_map', 'user_categories'):
for x in ('link_maps', 'user_categories'):
val = first_simple('//calibre:'+x, root)
if val:
try:
setattr(mi, x, json.loads(val))
except:
except Exception:
pass
elif x == 'link_maps':
val = first_simple('//calibre:author_link_map', root)
if val:
try:
setattr(mi, x, {'authors': json.loads(val)})
except Exception:
pass
languages = multiple_sequences('//dc:language', root)
@ -526,7 +533,7 @@ def metadata_to_xmp_packet(mi):
create_series(calibre, mi.series, mi.series_index)
if not mi.is_null('timestamp'):
create_simple_property(calibre, 'calibre:timestamp', isoformat(mi.timestamp, as_utc=False))
for x in ('author_link_map', 'user_categories'):
for x in ('link_maps', 'user_categories'):
val = getattr(mi, x, None)
if val:
create_simple_property(calibre, 'calibre:'+x, dump_dict(val))

View File

@ -2014,7 +2014,10 @@ class BuiltinAuthorLinks(BuiltinFormatterFunction):
def evaluate(self, formatter, kwargs, mi, locals, val_sep, pair_sep):
if hasattr(mi, '_proxy_metadata'):
link_data = mi._proxy_metadata.author_link_map
link_data = mi._proxy_metadata.link_maps
if not link_data:
return ''
link_data = link_data.get('authors')
if not link_data:
return ''
names = sorted(link_data.keys(), key=sort_key)