From ccd7d3ffc8f964e82cd89c3e7e003b074e386d56 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 31 Mar 2026 08:27:37 +0000 Subject: [PATCH] Refactor grouping code in annotations.py - use db prefs, iter_all_groups, and multi-valued field support Agent-Logs-Url: https://github.com/kovidgoyal/calibre/sessions/6c6c56e2-ea78-4bc2-95b1-4ee08f08c1dd Co-authored-by: kovidgoyal <1308621+kovidgoyal@users.noreply.github.com> --- src/calibre/gui2/library/annotations.py | 208 ++++++++++++------- src/calibre/gui2/library/test_annotations.py | 127 +++++++---- 2 files changed, 227 insertions(+), 108 deletions(-) diff --git a/src/calibre/gui2/library/annotations.py b/src/calibre/gui2/library/annotations.py index aea6147f5c..3c86e5f3e7 100644 --- a/src/calibre/gui2/library/annotations.py +++ b/src/calibre/gui2/library/annotations.py @@ -5,8 +5,6 @@ import codecs import json import os import re -from contextlib import suppress -from enum import Enum, auto from functools import lru_cache, partial from urllib.parse import quote @@ -49,6 +47,7 @@ from calibre.db.backend import FTSQueryError from calibre.ebooks.metadata import authors_to_sort_string, authors_to_string, fmt_sidx from calibre.gui2 import Application, choose_save_file, config, error_dialog, gprefs, is_dark_theme, safe_open_url from calibre.gui2.dialogs.confirm_delete import confirm +from calibre.gui2.library.bookshelf_view import all_groupings, iter_all_groups from calibre.gui2.viewer.widgets import ResultsDelegate, SearchBox from calibre.gui2.widgets import BusyCursor from calibre.gui2.widgets2 import Dialog, RightClickButton @@ -380,41 +379,22 @@ def current_db(): return (getattr(current_db, 'ans', None) or get_gui().current_db).new_api -class Group(Enum): - BOOK_ID = auto() - AUTHOR = auto() - USER = auto() - DATE = auto() +def annotation_only_groupings() -> dict[str, str]: + ''' + Return annotation-specific field names that can be used for grouping but are + not present in the book field metadata (and therefore not in iter_all_groups). - @property - def field_name(self) -> str: - '''The underlying annotation or book field this grouping applies to.''' - return GROUP_INFO[self]['field_name'] - - @property - def display_name(self) -> str: - '''A localized human-readable name for this grouping.''' - return GROUP_INFO[self]['display_name'] + The key 'annot_timestamp' is a virtual field name used to group by the + annotation's own creation date, as distinct from the book's Date Added + field ('timestamp') which comes from the database. + ''' + return { + 'user': _('User'), + 'annot_timestamp': _('Annotation date'), + } -GROUP_INFO: dict[Group, dict[str, str]] = { - Group.BOOK_ID: { - 'field_name': 'title', - 'display_name': _('Title'), - }, - Group.AUTHOR: { - 'field_name': 'authors', - 'display_name': _('Author'), - }, - Group.USER: { - 'field_name': 'user', - 'display_name': _('User'), - }, - Group.DATE: { - 'field_name': 'timestamp', - 'display_name': _('Annotation date'), - }, -} +BROWSE_ANNOTS_GROUP_BY_PREF = 'browse_annots_group_by' def get_annotation_value(annotation, bid, field, db): @@ -434,16 +414,13 @@ def get_group_key(result, field, db): ''' Return (sort_key, display_label) for an annotation result grouped by field. - field may be a Group enum member or any string naming a field in the - annotation row (e.g. 'format', 'annot_type', 'user', 'timestamp') or a - book metadata field accessible via db.field_for (e.g. 'title', 'authors'). + field is a string naming a field in the annotation row (e.g. 'format', + 'annot_type', 'user', 'annot_timestamp') or a book metadata field + accessible via db.field_for (e.g. 'title', 'authors', 'tags'). sort_key is a tuple suitable for use as a dict key and for natural ordering. display_label is a localized human-readable string for the group header. ''' - if isinstance(field, Group): - field = field.field_name - fm = db.field_metadata dt = fm.get(field, {}).get('datatype') bid = result['book_id'] @@ -460,8 +437,33 @@ def get_group_key(result, field, db): case 'user': user = friendly_username(result['user_type'], result['user']) return (primary_sort_key(user), user), user + case 'annot_timestamp': + ts = result.get('annotation', {}).get('timestamp', '') + df = 'dd MMM yyyy' + if 'd' in df: + qdt = QDateTime.fromString(ts, Qt.DateFormat.ISODate) + if qdt.isValid(): + qdt = qdt.toLocalTime() + qdate = qdt.date() + jd = qdate.toJulianDay() + else: + qdate = QDateTime.currentDateTime().date() + jd = qdate.toJulianDay() + today = QDateTime.currentDateTime().toLocalTime().date() + days_past = today.toJulianDay() - jd + loc = QLocale.system() + label = loc.toString(qdate, loc.dateFormat(QLocale.FormatType.ShortFormat)) + if not label: + label = _('Unknown date') + return (days_past, label), label + else: # Assume it's a year + year = QDateTime.fromString(ts, Qt.DateFormat.ISODate).date().year() + current_year = QDateTime.currentDateTime().date().year() + years_past = current_year - year + label = str(year) if year > 0 else _('Unknown year') + return (years_past, label), label case field if dt == 'datetime': - ts = get_annotation_value(result, bid, field, db) + ts = db.field_for(field, bid) df = fm[field].get('display', {}).get('date_format') or 'dd MMM yyyy' if 'd' in df: qdt = QDateTime.fromString(ts, Qt.DateFormat.ISODate) @@ -495,12 +497,56 @@ def get_group_key(result, field, db): sort_key = val if dt == 'text': sort_key = primary_sort_key(val) + return (sort_key, label := str(val)), label label = str(val) return (sort_key, label), label -class ResultsList(QTreeWidget): +def get_group_keys_list(result, field, db): + ''' + Return a list of (sort_key, display_label) pairs for an annotation result + grouped by field. + For multi-valued fields (e.g. tags, languages) the list contains one entry + per value so that the result appears under every applicable group. For + single-valued fields the list always contains exactly one entry. + ''' + if field in ('title', 'user', 'annot_timestamp'): + key, label = get_group_key(result, field, db) + return [(key, label)] + + bid = result['book_id'] + fm = db.field_metadata + fm_entry = fm.get(field, {}) + is_multiple = bool(fm_entry.get('is_multiple')) + + if not is_multiple: + key, label = get_group_key(result, field, db) + return [(key, label)] + + # Multi-valued field: yield one entry per value + vals = db.field_for(field, bid) + if not vals: + ungrouped = all_groupings().get(field) or _('Unknown') + if field == 'authors': + ungrouped = _('Unknown author') + return [(('',), ungrouped)] + + dt = fm_entry.get('datatype') + entries = [] + for v in vals: + v_str = str(v) + if field == 'authors': + sk = primary_sort_key(authors_to_sort_string((v,))) + elif dt == 'text': + sk = primary_sort_key(v_str) + else: + sk = v + entries.append(((sk, v_str), v_str)) + return entries + + +class ResultsList(QTreeWidget): current_result_changed = pyqtSignal(object) open_annotation = pyqtSignal(object, object, object) show_book = pyqtSignal(object, object) @@ -564,32 +610,38 @@ class ResultsList(QTreeWidget): if isinstance(r, dict): self.open_annotation.emit(r['book_id'], r['format'], r['annotation']) - def set_results(self, results, emphasize_text, group_order=(Group.BOOK_ID,)): + def set_results(self, results, emphasize_text, group_order=('title',)): self.clear() self.delegate.emphasize_text = emphasize_text self.number_of_results = 0 self.item_map = [] - # Ensure deepest level is always Book/Title + # Ensure deepest level is always the book title if not group_order: - group_order = (Group.BOOK_ID,) - if group_order[-1] != Group.BOOK_ID: - group_order = tuple(group_order) + (Group.BOOK_ID,) + group_order = ('title',) + if group_order[-1] != 'title': + group_order = tuple(group_order) + ('title',) db = current_db() tree = {} - for result in results: - node = tree - entry = None - for g in group_order: - key, label = get_group_key(result, g, db) + + def insert_result(result, fields, node): + if not fields: + return + field = fields[0] + rest = fields[1:] + # Multi-valued fields expand the result into multiple groups + for key, label in get_group_keys_list(result, field, db): if key not in node: node[key] = {'label': label, 'children': {}, 'results': []} - entry = node[key] - node = entry['children'] - if entry is not None: - entry['results'].append(result) + if rest: + insert_result(result, rest, node[key]['children']) + else: + node[key]['results'].append(result) + + for result in results: + insert_result(result, group_order, tree) self.add_children(None, tree) if self.item_map: @@ -829,6 +881,12 @@ class Restrictions(QWidget): self.setVisible(True) +def _save_annots_group_by_pref(gb): + field = gb.currentData() + if field: + current_db().set_pref(BROWSE_ANNOTS_GROUP_BY_PREF, field) + + class GroupOptions(QWidget): grouping_changed = pyqtSignal() @@ -840,34 +898,38 @@ class GroupOptions(QWidget): h.addWidget(la) self.group_box = gb = QComboBox(self) gb.currentIndexChanged.connect(self.grouping_changed) - connect_lambda(gb.currentIndexChanged, gb, lambda gb: gprefs.set('browse_annots_group_by', gb.currentData().name)) + connect_lambda(gb.currentIndexChanged, gb, _save_annots_group_by_pref) la.setBuddy(gb) gb.setToolTip(_('Display annotations grouped by this value')) gb.setSizeAdjustPolicy(QComboBox.SizeAdjustPolicy.AdjustToContents) h.addWidget(gb) @property - def selected_group(self): - data = self.group_box.currentData() - return data or Group.BOOK_ID + def selected_field(self): + '''Return the currently selected grouping field name (string).''' + return self.group_box.currentData() or 'title' @property def group_order(self): - # Ensure final level is always Book/Title - order = [self.selected_group] - if order[-1] != Group.BOOK_ID: - order.append(Group.BOOK_ID) - return tuple(order) + field = self.selected_field + if field == 'title': + return ('title',) + return (field, 'title') - def re_initialize(self): + def re_initialize(self, db): gb = self.group_box - if not (before := gb.currentData()): - with suppress(KeyError): - before = Group[gprefs.get('browse_annots_group_by', Group.BOOK_ID.name)] + before = gb.currentData() or db.pref(BROWSE_ANNOTS_GROUP_BY_PREF, 'title') gb.blockSignals(True) gb.clear() - for group_type in Group: - gb.addItem(group_type.display_name, group_type) + # Annotation-specific fields first + for field, display_name in annotation_only_groupings().items(): + gb.addItem(display_name, field) + # Title grouping + gb.addItem(_('Title'), 'title') + # All groupable DB fields + fm = db.field_metadata + for field, display_name in iter_all_groups(fm): + gb.addItem(display_name, field) if before: row = gb.findData(before) if row > -1: @@ -933,7 +995,7 @@ class BrowsePanel(QWidget): db = current_db() self.search_box.setFocus(Qt.FocusReason.OtherFocusReason) self.restrictions.re_initialize(db, restrict_to_book_ids or set()) - self.group_options.re_initialize() + self.group_options.re_initialize(db) self.current_query = None self.results_list.clear() @@ -994,7 +1056,7 @@ class BrowsePanel(QWidget): highlight_start='\x1d', highlight_end='\x1d', snippet_size=64, ignore_removed=True, **q2 ) - group_order = getattr(self.group_options, 'group_order', (Group.BOOK_ID,)) + group_order = getattr(self.group_options, 'group_order', ('title',)) self.results_list.set_results(results, bool(q['fts_engine_query']), group_order=group_order) self.current_query = q except FTSQueryError as err: diff --git a/src/calibre/gui2/library/test_annotations.py b/src/calibre/gui2/library/test_annotations.py index c2e96f19bd..c1d1c29aa0 100644 --- a/src/calibre/gui2/library/test_annotations.py +++ b/src/calibre/gui2/library/test_annotations.py @@ -4,7 +4,7 @@ import unittest from unittest.mock import MagicMock -from calibre.gui2.library.annotations import get_group_key +from calibre.gui2.library.annotations import get_group_key, get_group_keys_list def _make_result(book_id=1, annot_id=1, **extra): @@ -113,19 +113,6 @@ class GroupKeyTest(unittest.TestCase): db.field_for.assert_not_called() self.assertEqual(label, 'PDF') - def test_group_enum_resolves_via_field_name_property(self): - from calibre.gui2.library.annotations import Group - - db = _make_mock_db( - field_for_map={'title': 'Dune'}, - field_metadata={}, - ) - (sort_key_enum, label_enum) = get_group_key(_make_result(), Group.BOOK_ID, db) - (sort_key_str, label_str) = get_group_key(_make_result(), 'title', db) - - self.assertEqual(label_enum, label_str) - self.assertEqual(sort_key_enum, sort_key_str) - def test_group_by_book_id_title(self): db = _make_mock_db( field_for_map={'title': 'The Great Gatsby'}, @@ -164,21 +151,14 @@ class GroupKeyTest(unittest.TestCase): self.assertIsInstance(label, str) self.assertEqual(sort_key[0], label.lower()) - def test_group_by_timestamp_day_bucketing(self): + def test_group_by_annot_timestamp_day_bucketing(self): from qt.core import QDateTime, Qt # Use an ISO timestamp from the annotation ts_iso = '2024-12-25T10:30:00.000Z' - db = _make_mock_db( - field_metadata={ - 'timestamp': { - 'datatype': 'datetime', - 'display': {'date_format': 'dd MMM yyyy'}, - } - } - ) + db = _make_mock_db(field_metadata={}) result = _make_result(annotation={'timestamp': ts_iso}) - (sort_key, label) = get_group_key(result, 'timestamp', db) + (sort_key, label) = get_group_key(result, 'annot_timestamp', db) # Parse the timestamp to verify sort key qdt = QDateTime.fromString(ts_iso, Qt.DateFormat.ISODate) @@ -193,22 +173,99 @@ class GroupKeyTest(unittest.TestCase): # Not asserting a particular label due to locale variations self.assertIsInstance(label, str) - def test_group_by_timestamp_with_invalid_date(self): - db = _make_mock_db( - field_metadata={ - 'timestamp': { - 'datatype': 'datetime', - 'display': {'date_format': 'dd MMM yyyy'}, - } - } - ) + def test_group_by_annot_timestamp_with_invalid_date(self): + db = _make_mock_db(field_metadata={}) result = _make_result(annotation={'timestamp': 'invalid-timestamp'}) - (sort_key, _) = get_group_key(result, 'timestamp', db) + (sort_key, _) = get_group_key(result, 'annot_timestamp', db) # Should use current date when parse fails self.assertIsInstance(sort_key[0], int) self.assertIsInstance(sort_key[1], str) + def test_group_by_book_timestamp_uses_db_field(self): + from qt.core import QDateTime, Qt + ts_iso = '2023-06-15T08:00:00.000Z' + db = _make_mock_db( + field_for_map={'timestamp': ts_iso}, + field_metadata={ + 'timestamp': { + 'datatype': 'datetime', + 'display': {'date_format': 'dd MMM yyyy'}, + } + }, + ) + # Even if the annotation has its own timestamp, grouping by 'timestamp' + # must use the book's Date Added value from the DB. + result = _make_result(annotation={'timestamp': '2000-01-01T00:00:00.000Z'}) + (sort_key, label) = get_group_key(result, 'timestamp', db) + + qdt = QDateTime.fromString(ts_iso, Qt.DateFormat.ISODate).toLocalTime() + qdate = qdt.date() + today = QDateTime.currentDateTime().toLocalTime().date() + expected_days_past = today.toJulianDay() - qdate.toJulianDay() + + self.assertEqual(sort_key[0], expected_days_past) + self.assertIsInstance(label, str) + + +class GroupKeysListTest(unittest.TestCase): + + def test_single_valued_field_returns_one_entry(self): + db = _make_mock_db( + field_for_map={'publisher': 'Tor Books'}, + field_metadata={'publisher': {'datatype': 'text', 'is_multiple': {}}}, + ) + entries = get_group_keys_list(_make_result(), 'publisher', db) + self.assertEqual(len(entries), 1) + self.assertEqual(entries[0][1], 'Tor Books') + + def test_multi_valued_field_returns_one_entry_per_value(self): + db = _make_mock_db( + field_for_map={'tags': ('sci-fi', 'fantasy')}, + field_metadata={'tags': {'datatype': 'text', 'is_multiple': {'cache_to_list': ','}}}, + ) + entries = get_group_keys_list(_make_result(), 'tags', db) + labels = [label for _, label in entries] + self.assertEqual(sorted(labels), ['fantasy', 'sci-fi']) + + def test_multi_valued_field_empty_returns_ungrouped(self): + db = _make_mock_db( + field_for_map={'tags': ()}, + field_metadata={'tags': {'datatype': 'text', 'is_multiple': {'cache_to_list': ','}}}, + ) + entries = get_group_keys_list(_make_result(), 'tags', db) + self.assertEqual(len(entries), 1) + # Should be the ungrouped label from all_groupings() + self.assertIn('Untagged', entries[0][1]) + + def test_title_returns_one_entry(self): + db = _make_mock_db( + field_for_map={'title': 'Dune'}, + field_metadata={}, + ) + entries = get_group_keys_list(_make_result(), 'title', db) + self.assertEqual(len(entries), 1) + self.assertEqual(entries[0][1], 'Dune') + + def test_annot_timestamp_returns_one_entry(self): + db = _make_mock_db(field_metadata={}) + result = _make_result(annotation={'timestamp': '2024-01-01T00:00:00.000Z'}) + entries = get_group_keys_list(result, 'annot_timestamp', db) + self.assertEqual(len(entries), 1) + + def test_authors_multi_valued_returns_one_per_author(self): + db = _make_mock_db( + field_for_map={'authors': ('Alice', 'Bob')}, + field_metadata={'authors': {'datatype': 'text', 'is_multiple': {'cache_to_list': ','}}}, + ) + entries = get_group_keys_list(_make_result(), 'authors', db) + labels = [label for _, label in entries] + self.assertEqual(sorted(labels), ['Alice', 'Bob']) + def find_tests(): - return unittest.defaultTestLoader.loadTestsFromTestCase(GroupKeyTest) + loader = unittest.defaultTestLoader + suite = unittest.TestSuite() + suite.addTests(loader.loadTestsFromTestCase(GroupKeyTest)) + suite.addTests(loader.loadTestsFromTestCase(GroupKeysListTest)) + return suite