Refactor grouping code in annotations.py - use db prefs, iter_all_groups, and multi-valued field support

Agent-Logs-Url: https://github.com/kovidgoyal/calibre/sessions/6c6c56e2-ea78-4bc2-95b1-4ee08f08c1dd

Co-authored-by: kovidgoyal <1308621+kovidgoyal@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot] 2026-03-31 08:27:37 +00:00
parent bf1b8f18b4
commit ccd7d3ffc8
2 changed files with 227 additions and 108 deletions

View File

@ -5,8 +5,6 @@ import codecs
import json
import os
import re
from contextlib import suppress
from enum import Enum, auto
from functools import lru_cache, partial
from urllib.parse import quote
@ -49,6 +47,7 @@ from calibre.db.backend import FTSQueryError
from calibre.ebooks.metadata import authors_to_sort_string, authors_to_string, fmt_sidx
from calibre.gui2 import Application, choose_save_file, config, error_dialog, gprefs, is_dark_theme, safe_open_url
from calibre.gui2.dialogs.confirm_delete import confirm
from calibre.gui2.library.bookshelf_view import all_groupings, iter_all_groups
from calibre.gui2.viewer.widgets import ResultsDelegate, SearchBox
from calibre.gui2.widgets import BusyCursor
from calibre.gui2.widgets2 import Dialog, RightClickButton
@ -380,41 +379,22 @@ def current_db():
return (getattr(current_db, 'ans', None) or get_gui().current_db).new_api
class Group(Enum):
BOOK_ID = auto()
AUTHOR = auto()
USER = auto()
DATE = auto()
def annotation_only_groupings() -> dict[str, str]:
'''
Return annotation-specific field names that can be used for grouping but are
not present in the book field metadata (and therefore not in iter_all_groups).
@property
def field_name(self) -> str:
'''The underlying annotation or book field this grouping applies to.'''
return GROUP_INFO[self]['field_name']
@property
def display_name(self) -> str:
'''A localized human-readable name for this grouping.'''
return GROUP_INFO[self]['display_name']
The key 'annot_timestamp' is a virtual field name used to group by the
annotation's own creation date, as distinct from the book's Date Added
field ('timestamp') which comes from the database.
'''
return {
'user': _('User'),
'annot_timestamp': _('Annotation date'),
}
GROUP_INFO: dict[Group, dict[str, str]] = {
Group.BOOK_ID: {
'field_name': 'title',
'display_name': _('Title'),
},
Group.AUTHOR: {
'field_name': 'authors',
'display_name': _('Author'),
},
Group.USER: {
'field_name': 'user',
'display_name': _('User'),
},
Group.DATE: {
'field_name': 'timestamp',
'display_name': _('Annotation date'),
},
}
BROWSE_ANNOTS_GROUP_BY_PREF = 'browse_annots_group_by'
def get_annotation_value(annotation, bid, field, db):
@ -434,16 +414,13 @@ def get_group_key(result, field, db):
'''
Return (sort_key, display_label) for an annotation result grouped by field.
field may be a Group enum member or any string naming a field in the
annotation row (e.g. 'format', 'annot_type', 'user', 'timestamp') or a
book metadata field accessible via db.field_for (e.g. 'title', 'authors').
field is a string naming a field in the annotation row (e.g. 'format',
'annot_type', 'user', 'annot_timestamp') or a book metadata field
accessible via db.field_for (e.g. 'title', 'authors', 'tags').
sort_key is a tuple suitable for use as a dict key and for natural ordering.
display_label is a localized human-readable string for the group header.
'''
if isinstance(field, Group):
field = field.field_name
fm = db.field_metadata
dt = fm.get(field, {}).get('datatype')
bid = result['book_id']
@ -460,8 +437,33 @@ def get_group_key(result, field, db):
case 'user':
user = friendly_username(result['user_type'], result['user'])
return (primary_sort_key(user), user), user
case 'annot_timestamp':
ts = result.get('annotation', {}).get('timestamp', '')
df = 'dd MMM yyyy'
if 'd' in df:
qdt = QDateTime.fromString(ts, Qt.DateFormat.ISODate)
if qdt.isValid():
qdt = qdt.toLocalTime()
qdate = qdt.date()
jd = qdate.toJulianDay()
else:
qdate = QDateTime.currentDateTime().date()
jd = qdate.toJulianDay()
today = QDateTime.currentDateTime().toLocalTime().date()
days_past = today.toJulianDay() - jd
loc = QLocale.system()
label = loc.toString(qdate, loc.dateFormat(QLocale.FormatType.ShortFormat))
if not label:
label = _('Unknown date')
return (days_past, label), label
else: # Assume it's a year
year = QDateTime.fromString(ts, Qt.DateFormat.ISODate).date().year()
current_year = QDateTime.currentDateTime().date().year()
years_past = current_year - year
label = str(year) if year > 0 else _('Unknown year')
return (years_past, label), label
case field if dt == 'datetime':
ts = get_annotation_value(result, bid, field, db)
ts = db.field_for(field, bid)
df = fm[field].get('display', {}).get('date_format') or 'dd MMM yyyy'
if 'd' in df:
qdt = QDateTime.fromString(ts, Qt.DateFormat.ISODate)
@ -495,12 +497,56 @@ def get_group_key(result, field, db):
sort_key = val
if dt == 'text':
sort_key = primary_sort_key(val)
return (sort_key, label := str(val)), label
label = str(val)
return (sort_key, label), label
class ResultsList(QTreeWidget):
def get_group_keys_list(result, field, db):
'''
Return a list of (sort_key, display_label) pairs for an annotation result
grouped by field.
For multi-valued fields (e.g. tags, languages) the list contains one entry
per value so that the result appears under every applicable group. For
single-valued fields the list always contains exactly one entry.
'''
if field in ('title', 'user', 'annot_timestamp'):
key, label = get_group_key(result, field, db)
return [(key, label)]
bid = result['book_id']
fm = db.field_metadata
fm_entry = fm.get(field, {})
is_multiple = bool(fm_entry.get('is_multiple'))
if not is_multiple:
key, label = get_group_key(result, field, db)
return [(key, label)]
# Multi-valued field: yield one entry per value
vals = db.field_for(field, bid)
if not vals:
ungrouped = all_groupings().get(field) or _('Unknown')
if field == 'authors':
ungrouped = _('Unknown author')
return [(('',), ungrouped)]
dt = fm_entry.get('datatype')
entries = []
for v in vals:
v_str = str(v)
if field == 'authors':
sk = primary_sort_key(authors_to_sort_string((v,)))
elif dt == 'text':
sk = primary_sort_key(v_str)
else:
sk = v
entries.append(((sk, v_str), v_str))
return entries
class ResultsList(QTreeWidget):
current_result_changed = pyqtSignal(object)
open_annotation = pyqtSignal(object, object, object)
show_book = pyqtSignal(object, object)
@ -564,32 +610,38 @@ class ResultsList(QTreeWidget):
if isinstance(r, dict):
self.open_annotation.emit(r['book_id'], r['format'], r['annotation'])
def set_results(self, results, emphasize_text, group_order=(Group.BOOK_ID,)):
def set_results(self, results, emphasize_text, group_order=('title',)):
self.clear()
self.delegate.emphasize_text = emphasize_text
self.number_of_results = 0
self.item_map = []
# Ensure deepest level is always Book/Title
# Ensure deepest level is always the book title
if not group_order:
group_order = (Group.BOOK_ID,)
if group_order[-1] != Group.BOOK_ID:
group_order = tuple(group_order) + (Group.BOOK_ID,)
group_order = ('title',)
if group_order[-1] != 'title':
group_order = tuple(group_order) + ('title',)
db = current_db()
tree = {}
for result in results:
node = tree
entry = None
for g in group_order:
key, label = get_group_key(result, g, db)
def insert_result(result, fields, node):
if not fields:
return
field = fields[0]
rest = fields[1:]
# Multi-valued fields expand the result into multiple groups
for key, label in get_group_keys_list(result, field, db):
if key not in node:
node[key] = {'label': label, 'children': {}, 'results': []}
entry = node[key]
node = entry['children']
if entry is not None:
entry['results'].append(result)
if rest:
insert_result(result, rest, node[key]['children'])
else:
node[key]['results'].append(result)
for result in results:
insert_result(result, group_order, tree)
self.add_children(None, tree)
if self.item_map:
@ -829,6 +881,12 @@ class Restrictions(QWidget):
self.setVisible(True)
def _save_annots_group_by_pref(gb):
field = gb.currentData()
if field:
current_db().set_pref(BROWSE_ANNOTS_GROUP_BY_PREF, field)
class GroupOptions(QWidget):
grouping_changed = pyqtSignal()
@ -840,34 +898,38 @@ class GroupOptions(QWidget):
h.addWidget(la)
self.group_box = gb = QComboBox(self)
gb.currentIndexChanged.connect(self.grouping_changed)
connect_lambda(gb.currentIndexChanged, gb, lambda gb: gprefs.set('browse_annots_group_by', gb.currentData().name))
connect_lambda(gb.currentIndexChanged, gb, _save_annots_group_by_pref)
la.setBuddy(gb)
gb.setToolTip(_('Display annotations grouped by this value'))
gb.setSizeAdjustPolicy(QComboBox.SizeAdjustPolicy.AdjustToContents)
h.addWidget(gb)
@property
def selected_group(self):
data = self.group_box.currentData()
return data or Group.BOOK_ID
def selected_field(self):
'''Return the currently selected grouping field name (string).'''
return self.group_box.currentData() or 'title'
@property
def group_order(self):
# Ensure final level is always Book/Title
order = [self.selected_group]
if order[-1] != Group.BOOK_ID:
order.append(Group.BOOK_ID)
return tuple(order)
field = self.selected_field
if field == 'title':
return ('title',)
return (field, 'title')
def re_initialize(self):
def re_initialize(self, db):
gb = self.group_box
if not (before := gb.currentData()):
with suppress(KeyError):
before = Group[gprefs.get('browse_annots_group_by', Group.BOOK_ID.name)]
before = gb.currentData() or db.pref(BROWSE_ANNOTS_GROUP_BY_PREF, 'title')
gb.blockSignals(True)
gb.clear()
for group_type in Group:
gb.addItem(group_type.display_name, group_type)
# Annotation-specific fields first
for field, display_name in annotation_only_groupings().items():
gb.addItem(display_name, field)
# Title grouping
gb.addItem(_('Title'), 'title')
# All groupable DB fields
fm = db.field_metadata
for field, display_name in iter_all_groups(fm):
gb.addItem(display_name, field)
if before:
row = gb.findData(before)
if row > -1:
@ -933,7 +995,7 @@ class BrowsePanel(QWidget):
db = current_db()
self.search_box.setFocus(Qt.FocusReason.OtherFocusReason)
self.restrictions.re_initialize(db, restrict_to_book_ids or set())
self.group_options.re_initialize()
self.group_options.re_initialize(db)
self.current_query = None
self.results_list.clear()
@ -994,7 +1056,7 @@ class BrowsePanel(QWidget):
highlight_start='\x1d', highlight_end='\x1d', snippet_size=64,
ignore_removed=True, **q2
)
group_order = getattr(self.group_options, 'group_order', (Group.BOOK_ID,))
group_order = getattr(self.group_options, 'group_order', ('title',))
self.results_list.set_results(results, bool(q['fts_engine_query']), group_order=group_order)
self.current_query = q
except FTSQueryError as err:

View File

@ -4,7 +4,7 @@
import unittest
from unittest.mock import MagicMock
from calibre.gui2.library.annotations import get_group_key
from calibre.gui2.library.annotations import get_group_key, get_group_keys_list
def _make_result(book_id=1, annot_id=1, **extra):
@ -113,19 +113,6 @@ class GroupKeyTest(unittest.TestCase):
db.field_for.assert_not_called()
self.assertEqual(label, 'PDF')
def test_group_enum_resolves_via_field_name_property(self):
from calibre.gui2.library.annotations import Group
db = _make_mock_db(
field_for_map={'title': 'Dune'},
field_metadata={},
)
(sort_key_enum, label_enum) = get_group_key(_make_result(), Group.BOOK_ID, db)
(sort_key_str, label_str) = get_group_key(_make_result(), 'title', db)
self.assertEqual(label_enum, label_str)
self.assertEqual(sort_key_enum, sort_key_str)
def test_group_by_book_id_title(self):
db = _make_mock_db(
field_for_map={'title': 'The Great Gatsby'},
@ -164,21 +151,14 @@ class GroupKeyTest(unittest.TestCase):
self.assertIsInstance(label, str)
self.assertEqual(sort_key[0], label.lower())
def test_group_by_timestamp_day_bucketing(self):
def test_group_by_annot_timestamp_day_bucketing(self):
from qt.core import QDateTime, Qt
# Use an ISO timestamp from the annotation
ts_iso = '2024-12-25T10:30:00.000Z'
db = _make_mock_db(
field_metadata={
'timestamp': {
'datatype': 'datetime',
'display': {'date_format': 'dd MMM yyyy'},
}
}
)
db = _make_mock_db(field_metadata={})
result = _make_result(annotation={'timestamp': ts_iso})
(sort_key, label) = get_group_key(result, 'timestamp', db)
(sort_key, label) = get_group_key(result, 'annot_timestamp', db)
# Parse the timestamp to verify sort key
qdt = QDateTime.fromString(ts_iso, Qt.DateFormat.ISODate)
@ -193,22 +173,99 @@ class GroupKeyTest(unittest.TestCase):
# Not asserting a particular label due to locale variations
self.assertIsInstance(label, str)
def test_group_by_timestamp_with_invalid_date(self):
db = _make_mock_db(
field_metadata={
'timestamp': {
'datatype': 'datetime',
'display': {'date_format': 'dd MMM yyyy'},
}
}
)
def test_group_by_annot_timestamp_with_invalid_date(self):
db = _make_mock_db(field_metadata={})
result = _make_result(annotation={'timestamp': 'invalid-timestamp'})
(sort_key, _) = get_group_key(result, 'timestamp', db)
(sort_key, _) = get_group_key(result, 'annot_timestamp', db)
# Should use current date when parse fails
self.assertIsInstance(sort_key[0], int)
self.assertIsInstance(sort_key[1], str)
def test_group_by_book_timestamp_uses_db_field(self):
from qt.core import QDateTime, Qt
ts_iso = '2023-06-15T08:00:00.000Z'
db = _make_mock_db(
field_for_map={'timestamp': ts_iso},
field_metadata={
'timestamp': {
'datatype': 'datetime',
'display': {'date_format': 'dd MMM yyyy'},
}
},
)
# Even if the annotation has its own timestamp, grouping by 'timestamp'
# must use the book's Date Added value from the DB.
result = _make_result(annotation={'timestamp': '2000-01-01T00:00:00.000Z'})
(sort_key, label) = get_group_key(result, 'timestamp', db)
qdt = QDateTime.fromString(ts_iso, Qt.DateFormat.ISODate).toLocalTime()
qdate = qdt.date()
today = QDateTime.currentDateTime().toLocalTime().date()
expected_days_past = today.toJulianDay() - qdate.toJulianDay()
self.assertEqual(sort_key[0], expected_days_past)
self.assertIsInstance(label, str)
class GroupKeysListTest(unittest.TestCase):
def test_single_valued_field_returns_one_entry(self):
db = _make_mock_db(
field_for_map={'publisher': 'Tor Books'},
field_metadata={'publisher': {'datatype': 'text', 'is_multiple': {}}},
)
entries = get_group_keys_list(_make_result(), 'publisher', db)
self.assertEqual(len(entries), 1)
self.assertEqual(entries[0][1], 'Tor Books')
def test_multi_valued_field_returns_one_entry_per_value(self):
db = _make_mock_db(
field_for_map={'tags': ('sci-fi', 'fantasy')},
field_metadata={'tags': {'datatype': 'text', 'is_multiple': {'cache_to_list': ','}}},
)
entries = get_group_keys_list(_make_result(), 'tags', db)
labels = [label for _, label in entries]
self.assertEqual(sorted(labels), ['fantasy', 'sci-fi'])
def test_multi_valued_field_empty_returns_ungrouped(self):
db = _make_mock_db(
field_for_map={'tags': ()},
field_metadata={'tags': {'datatype': 'text', 'is_multiple': {'cache_to_list': ','}}},
)
entries = get_group_keys_list(_make_result(), 'tags', db)
self.assertEqual(len(entries), 1)
# Should be the ungrouped label from all_groupings()
self.assertIn('Untagged', entries[0][1])
def test_title_returns_one_entry(self):
db = _make_mock_db(
field_for_map={'title': 'Dune'},
field_metadata={},
)
entries = get_group_keys_list(_make_result(), 'title', db)
self.assertEqual(len(entries), 1)
self.assertEqual(entries[0][1], 'Dune')
def test_annot_timestamp_returns_one_entry(self):
db = _make_mock_db(field_metadata={})
result = _make_result(annotation={'timestamp': '2024-01-01T00:00:00.000Z'})
entries = get_group_keys_list(result, 'annot_timestamp', db)
self.assertEqual(len(entries), 1)
def test_authors_multi_valued_returns_one_per_author(self):
db = _make_mock_db(
field_for_map={'authors': ('Alice', 'Bob')},
field_metadata={'authors': {'datatype': 'text', 'is_multiple': {'cache_to_list': ','}}},
)
entries = get_group_keys_list(_make_result(), 'authors', db)
labels = [label for _, label in entries]
self.assertEqual(sorted(labels), ['Alice', 'Bob'])
def find_tests():
return unittest.defaultTestLoader.loadTestsFromTestCase(GroupKeyTest)
loader = unittest.defaultTestLoader
suite = unittest.TestSuite()
suite.addTests(loader.loadTestsFromTestCase(GroupKeyTest))
suite.addTests(loader.loadTestsFromTestCase(GroupKeysListTest))
return suite