Work on adding annotations support to the in-browser viewer

As part of that desktop viewer's annotations handling has been made more
robust, with merging of annotations, and sorting after merge based on
CFI
This commit is contained in:
Kovid Goyal 2020-06-30 22:47:42 +05:30
parent 631a907773
commit 0c4010b3af
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
14 changed files with 247 additions and 150 deletions

View File

@ -0,0 +1,88 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
# License: GPL v3 Copyright: 2020, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import absolute_import, division, print_function, unicode_literals
from collections import defaultdict
from itertools import chain
from operator import itemgetter
from calibre.ebooks.epub.cfi.parse import cfi_sort_key
from polyglot.builtins import itervalues
no_cfi_sort_key = cfi_sort_key('/99999999')
def bookmark_sort_key(b):
if b.get('pos_type') == 'epubcfi':
return cfi_sort_key(b['pos'], only_path=False)
return no_cfi_sort_key
def highlight_sort_key(hl):
cfi = hl.get('start_cfi')
if cfi:
return cfi_sort_key(cfi, only_path=False)
return no_cfi_sort_key
def sort_annot_list_by_position_in_book(annots, annot_type):
annots.sort(key={'bookmark': bookmark_sort_key, 'highlight': highlight_sort_key}[annot_type])
def merge_annots_with_identical_field(a, b, field='title'):
title_groups = defaultdict(list)
for x in chain(a, b):
title_groups[x[field]].append(x)
for tg in itervalues(title_groups):
tg.sort(key=itemgetter('timestamp'), reverse=True)
seen = set()
changed = False
ans = []
for x in chain(a, b):
title = x[field]
if title not in seen:
seen.add(title)
grp = title_groups[title]
if len(grp) > 1 and grp[0]['timestamp'] != grp[1]['timestamp']:
changed = True
ans.append(grp[0])
if len(ans) != len(a) or len(ans) != len(b):
changed = True
return changed, ans
def merge_annot_lists(a, b, annot_type):
if not a:
return list(b)
if not b:
return list(a)
if annot_type == 'last-read':
ans = a + b
ans.sort(key=itemgetter('timestamp'), reverse=True)
return ans
merge_field = {'bookmark': 'title', 'highlight': 'uuid'}.get(annot_type)
if merge_field is None:
return a + b
changed, c = merge_annots_with_identical_field(a, b, merge_field)
if changed:
sort_annot_list_by_position_in_book(c, annot_type)
return c
def merge_annotations(annots, annots_map):
# If you make changes to this algorithm also update the
# implementation in read_book.annotations
amap = defaultdict(list)
for annot in annots:
amap[annot['type']].append(annot)
lr = annots_map.get('last-read')
if lr:
lr.sort(key=itemgetter('timestamp'), reverse=True)
for annot_type, field in {'bookmark': 'title', 'highlight': 'uuid'}.items():
a = annots_map.get(annot_type)
b = amap[annot_type]
if not b:
continue
changed, annots_map[annot_type] = merge_annots_with_identical_field(a or [], b, field=field)

View File

@ -17,6 +17,7 @@ from calibre import isbytestring, as_unicode
from calibre.constants import iswindows, preferred_encoding
from calibre.customize.ui import run_plugins_on_import, run_plugins_on_postimport, run_plugins_on_postadd
from calibre.db import SPOOL_SIZE, _get_next_series_num_for_list
from calibre.db.annotations import merge_annotations
from calibre.db.categories import get_categories
from calibre.db.locking import create_locks, DowngradeLockError, SafeReadLock
from calibre.db.errors import NoSuchFormat, NoSuchBook
@ -2343,6 +2344,15 @@ class Cache(object):
def set_annotations_for_book(self, book_id, fmt, annots_list, user_type='local', user='viewer'):
self.backend.set_annotations_for_book(book_id, fmt, annots_list, user_type, user)
@write_api
def merge_annotations_for_book(self, book_id, fmt, annots_list, user_type='local', user='viewer'):
amap = self._annotations_map_for_book(book_id, fmt, user_type=user_type, user=user)
merge_annotations(annots_list, amap)
alist = []
for val in itervalues(amap):
alist.extend(val)
self._set_annotations_for_book(book_id, fmt, alist, user_type=user_type, user=user)
def import_library(library_key, importer, library_path, progress=None, abort=None):
from calibre.db.backend import DB

View File

@ -4,117 +4,32 @@
import os
from collections import defaultdict
from io import BytesIO
from itertools import chain
from operator import itemgetter
from threading import Thread
from calibre.ebooks.epub.cfi.parse import cfi_sort_key
from calibre.db.annotations import merge_annot_lists
from calibre.gui2.viewer.convert_book import update_book
from calibre.gui2.viewer.integration import save_annotations_list_to_library
from calibre.gui2.viewer.web_view import viewer_config_dir
from calibre.srv.render_book import (
EPUB_FILE_TYPE_MAGIC, parse_annotation, parse_annotations as _parse_annotations
)
from calibre.srv.render_book import EPUB_FILE_TYPE_MAGIC
from calibre.utils.date import EPOCH
from calibre.utils.serialize import json_dumps
from calibre.utils.iso8601 import parse_iso8601
from calibre.utils.serialize import json_dumps, json_loads
from calibre.utils.zipfile import safe_replace
from polyglot.binary import as_base64_bytes
from polyglot.builtins import iteritems, itervalues
from polyglot.builtins import iteritems
from polyglot.queue import Queue
annotations_dir = os.path.join(viewer_config_dir, 'annots')
no_cfi_sort_key = cfi_sort_key('/99999999')
def parse_annotations(raw):
return list(_parse_annotations(raw))
def bookmark_sort_key(b):
if b.get('pos_type') == 'epubcfi':
return cfi_sort_key(b['pos'], only_path=False)
return no_cfi_sort_key
def highlight_sort_key(hl):
cfi = hl.get('start_cfi')
if cfi:
return cfi_sort_key(cfi, only_path=False)
return no_cfi_sort_key
def sort_annot_list_by_position_in_book(annots, annot_type):
annots.sort(key={'bookmark': bookmark_sort_key, 'highlight': highlight_sort_key}[annot_type])
def merge_annots_with_identical_field(a, b, field='title'):
title_groups = defaultdict(list)
for x in chain(a, b):
title_groups[x[field]].append(x)
for tg in itervalues(title_groups):
tg.sort(key=itemgetter('timestamp'), reverse=True)
seen = set()
changed = False
ans = []
for x in chain(a, b):
title = x[field]
if title not in seen:
seen.add(title)
grp = title_groups[title]
if len(grp) > 1 and grp[0]['timestamp'] != grp[1]['timestamp']:
changed = True
ans.append(grp[0])
if len(ans) != len(a) or len(ans) != len(b):
changed = True
return changed, ans
def merge_annot_lists(a, b, annot_type):
if not a:
return list(b)
if not b:
return list(a)
if annot_type == 'last-read':
ans = a + b
ans.sort(key=itemgetter('timestamp'), reverse=True)
return ans
merge_field = {'bookmark': 'title', 'highlight': 'uuid'}.get(annot_type)
if merge_field is None:
return a + b
changed, c = merge_annots_with_identical_field(a, b, merge_field)
if changed:
sort_annot_list_by_position_in_book(c, annot_type)
return c
def merge_annotations(annots, annots_map):
amap = {}
for annot in annots:
annot = parse_annotation(annot)
atype = annot.pop('type')
amap.setdefault(atype, []).append(annot)
lr = annots_map['last-read']
if lr:
lr.sort(key=itemgetter('timestamp'), reverse=True)
for annot_type, field in {'bookmark': 'title', 'highlight': 'uuid'}.items():
a = annots_map.get(annot_type)
if a and len(a) > 1:
annots_map[annot_type] = list(merge_annots_with_identical_field(a, field=field))
def serialize_annotation(annot):
annot = annot.copy()
annot['timestamp'] = annot['timestamp'].isoformat()
return annot
parse_annotations = json_loads
def annotations_as_copied_list(annots_map):
for atype, annots in iteritems(annots_map):
for annot in annots:
ts = (annot['timestamp'] - EPOCH).total_seconds()
annot = serialize_annotation(annot)
ts = (parse_iso8601(annot['timestamp'], assume_utc=True) - EPOCH).total_seconds()
annot = annot.copy()
annot['type'] = atype
yield annot, ts

View File

@ -11,9 +11,7 @@ from PyQt5.Qt import (
)
from calibre.gui2 import choose_files, choose_save_file
from calibre.gui2.viewer.annotations import serialize_annotation
from calibre.gui2.viewer.shortcuts import get_shortcut_for
from calibre.srv.render_book import parse_annotation
from calibre.utils.date import EPOCH, utcnow
from calibre.utils.icu import sort_key
from polyglot.builtins import range, unicode_type
@ -231,8 +229,7 @@ class BookmarkManager(QWidget):
self, 'export-viewer-bookmarks', _('Export bookmarks'),
filters=[(_('Saved bookmarks'), ['calibre-bookmarks'])], all_files=False, initial_filename='bookmarks.calibre-bookmarks')
if filename:
entries = [serialize_annotation(a) for a in self.get_bookmarks()]
data = json.dumps({'type': 'bookmarks', 'entries': entries}, indent=True)
data = json.dumps({'type': 'bookmarks', 'entries': self.get_bookmarks()}, indent=True)
if not isinstance(data, bytes):
data = data.encode('utf-8')
with lopen(filename, 'wb') as fileobj:
@ -273,9 +270,8 @@ class BookmarkManager(QWidget):
return
bookmarks = self.get_bookmarks()
for bm in imported['entries']:
q = parse_annotation(bm)
if q not in bookmarks:
bookmarks.append(q)
if bm not in bookmarks:
bookmarks.append(bm)
self.set_bookmarks(bookmarks)
self.edited.emit(self.get_bookmarks())

View File

@ -41,7 +41,9 @@ def load_annotations_map_from_library(book_library_details):
def save_annotations_list_to_library(book_library_details, alist):
import apsw
from calibre.db.backend import save_annotations_for_book, Connection
from calibre.db.backend import save_annotations_for_book, Connection, annotations_for_book
from calibre.gui2.viewer.annotations import annotations_as_copied_list
from calibre.db.annotations import merge_annotations
dbpath = book_library_details['dbpath']
try:
conn = apsw.Connection(dbpath, flags=apsw.SQLITE_OPEN_READWRITE)
@ -49,7 +51,13 @@ def save_annotations_list_to_library(book_library_details, alist):
return
try:
conn.setbusytimeout(Connection.BUSY_TIMEOUT)
amap = {}
with conn:
save_annotations_for_book(conn.cursor(), book_library_details['book_id'], book_library_details['fmt'], alist)
cursor = conn.cursor()
for annot in annotations_for_book(cursor, book_library_details['book_id'], book_library_details['fmt']):
amap.setdefault(annot['type'], []).append(annot)
merge_annotations((x[0] for x in alist), amap)
alist = tuple(annotations_as_copied_list(amap))
save_annotations_for_book(cursor, book_library_details['book_id'], book_library_details['fmt'], alist)
finally:
conn.close()

View File

@ -19,13 +19,13 @@ from PyQt5.Qt import (
from calibre import prints
from calibre.constants import DEBUG
from calibre.customize.ui import available_input_formats
from calibre.db.annotations import merge_annotations
from calibre.gui2 import choose_files, error_dialog
from calibre.gui2.dialogs.drm_error import DRMErrorMessage
from calibre.gui2.image_popup import ImagePopup
from calibre.gui2.main_window import MainWindow
from calibre.gui2.viewer.annotations import (
AnnotationsSaveWorker, annotations_dir, merge_annotations, parse_annotations,
serialize_annotation
AnnotationsSaveWorker, annotations_dir, parse_annotations
)
from calibre.gui2.viewer.bookmarks import BookmarkManager
from calibre.gui2.viewer.convert_book import clean_running_workers, prepare_book
@ -44,7 +44,6 @@ from calibre.gui2.viewer.web_view import (
from calibre.utils.date import utcnow
from calibre.utils.img import image_from_path
from calibre.utils.ipc.simple_worker import WorkerError
from calibre.utils.iso8601 import parse_iso8601
from calibre.utils.monotonic import monotonic
from calibre.utils.serialize import json_loads
from polyglot.builtins import as_bytes, as_unicode, iteritems, itervalues
@ -531,10 +530,7 @@ class EbookViewer(MainWindow):
initial_position = {'type': 'bookpos', 'data': float(open_at)}
highlights = self.current_book_data['annotations_map']['highlight']
self.highlights_widget.load(highlights)
self.web_view.start_book_load(
initial_position=initial_position,
highlights=list(map(serialize_annotation, highlights))
)
self.web_view.start_book_load(initial_position=initial_position, highlights=highlights)
def load_book_data(self, calibre_book_data=None):
self.current_book_data['book_library_details'] = get_book_library_details(self.current_book_data['pathtoebook'])
@ -599,7 +595,7 @@ class EbookViewer(MainWindow):
if not self.current_book_data:
return
self.current_book_data['annotations_map']['last-read'] = [{
'pos': cfi, 'pos_type': 'epubcfi', 'timestamp': utcnow()}]
'pos': cfi, 'pos_type': 'epubcfi', 'timestamp': utcnow().isoformat()}]
self.save_pos_timer.start()
# }}}
@ -615,8 +611,6 @@ class EbookViewer(MainWindow):
def highlights_changed(self, highlights):
if not self.current_book_data:
return
for h in highlights:
h['timestamp'] = parse_iso8601(h['timestamp'], assume_utc=True)
amap = self.current_book_data['annotations_map']
amap['highlight'] = highlights
self.highlights_widget.refresh(highlights)

View File

@ -21,7 +21,7 @@ from calibre.srv.routes import endpoint, json
from calibre.srv.utils import get_db, get_library_data
from calibre.utils.filenames import rmtree
from calibre.utils.serialize import json_dumps
from polyglot.builtins import as_unicode, map
from polyglot.builtins import as_unicode, itervalues, map
cache_lock = RLock()
queued_jobs = {}
@ -152,6 +152,7 @@ def book_manifest(ctx, rd, book_id, fmt):
ans['metadata'] = book_as_json(db, book_id)
user = rd.username or None
ans['last_read_positions'] = db.get_last_read_positions(book_id, fmt, user) if user else []
ans['annotations_map'] = db.annotations_map_for_book(book_id, fmt, user_type='web', user=user) if user else {}
return ans
except EnvironmentError as e:
if e.errno != errno.ENOENT:
@ -226,6 +227,53 @@ def set_last_read_position(ctx, rd, library_id, book_id, fmt):
return b''
@endpoint('/book-get-annotations/{library_id}/{+which}', postprocess=json)
def get_annotations(ctx, rd, library_id, which):
'''
Get annotations and last read position data for the specified books, where which is of the form:
book_id1-fmt1_book_id2-fmt2,...
'''
db = get_db(ctx, rd, library_id)
user = rd.username or None
if not user:
raise HTTPNotFound('login required for sync')
ans = {}
allowed_book_ids = ctx.allowed_book_ids(rd, db)
for item in which.split('_'):
book_id, fmt = item.partition('-')[::2]
try:
book_id = int(book_id)
except Exception:
continue
if book_id not in allowed_book_ids:
continue
key = '{}:{}'.format(book_id, fmt)
ans[key] = {
'last_read_positions': db.get_last_read_positions(book_id, fmt, user),
'annotations_map': db.annotations_map_for_book(book_id, fmt, user_type='web', user=user) if user else {}
}
return ans
@endpoint('/book-update-annotations/{library_id}/{book_id}/{+fmt}', types={'book_id': int}, methods=('POST',))
def update_annotations(ctx, rd, library_id, book_id, fmt):
db = get_db(ctx, rd, library_id)
user = rd.username or None
if not user:
raise HTTPNotFound('login required for sync')
if not ctx.has_id(rd, db, book_id):
raise BookNotFound(book_id, db)
try:
amap = jsonlib.load(rd.request_body_file)
except Exception:
raise HTTPNotFound('Invalid data')
alist = []
for val in itervalues(amap):
if val:
alist.extend(val)
db.merge_annotations_for_book(book_id, fmt, alist, user_type='web', user=user)
mathjax_lock = Lock()
mathjax_manifest = None

View File

@ -41,7 +41,6 @@ from calibre.srv.opts import grouper
from calibre.utils.date import EPOCH
from calibre.utils.filenames import rmtree
from calibre.utils.ipc.simple_worker import start_pipe_worker
from calibre.utils.iso8601 import parse_iso8601
from calibre.utils.logging import default_log
from calibre.utils.serialize import (
json_dumps, json_loads, msgpack_dumps, msgpack_loads
@ -783,25 +782,13 @@ def serialize_datetimes(d):
EPUB_FILE_TYPE_MAGIC = b'encoding=json+base64:\n'
def parse_annotation(annot):
ts = annot['timestamp']
if hasattr(ts, 'rstrip'):
annot['timestamp'] = parse_iso8601(ts, assume_utc=True)
return annot
def parse_annotations(raw):
for annot in json_loads(raw):
yield parse_annotation(annot)
def get_stored_annotations(container, bookmark_data):
raw = bookmark_data or b''
if not raw:
return
if raw.startswith(EPUB_FILE_TYPE_MAGIC):
raw = raw[len(EPUB_FILE_TYPE_MAGIC):].replace(b'\n', b'')
for annot in parse_annotations(from_base64_bytes(raw)):
for annot in json_loads(from_base64_bytes(raw)):
yield annot
return

View File

@ -62,20 +62,30 @@ def sync_data_received(library_id, lrmap, load_type, xhr, ev):
print('Failed to get book sync data')
return
data = JSON.parse(xhr.responseText)
for key in data:
last_read_data = data.last_read_positions
db = get_db()
for key in last_read_data:
new_vals = data[key]
entry = {'last_read': None, 'last_read_position': None, 'annotations_map': None}
prev_last_read = lrmap[key]
if not prev_last_read:
continue
last_read_positions = data[key]
new_last_read = get_last_read_position(last_read_positions, prev_last_read)
if not new_last_read:
continue
last_read = new Date(new_last_read.epoch * 1000)
cfi = new_last_read.cfi
if cfi:
db = get_db()
if prev_last_read:
last_read_positions = new_vals.last_read_positions
new_last_read = get_last_read_position(last_read_positions, prev_last_read)
if new_last_read:
last_read = new Date(new_last_read.epoch * 1000)
cfi = new_last_read.cfi
if cfi:
entry.last_read = last_read
entry.last_read_position = cfi
new_amap = new_vals.annotations_map or {}
is_empty = True
v'for(var ikey in new_amap) { is_empty = false; break; }'
if !is_empty:
entry.annotations_map = new_amap
if entry.last_read_position or entry.annotations_map:
book_id, fmt = key.partition(':')[::2]
db.update_last_read_data_from_key(library_id, int(book_id), fmt, last_read, cfi)
db.update_annotations_data_from_key(library_id, int(book_id), fmt, entry)
def start_sync(to_sync):

View File

@ -284,7 +284,7 @@ thumbnail_cache = ThumbnailCache()
def sync_library_books(library_id, to_sync, callback):
url = f'book-get-last-read-position/{library_id}/'
url = f'book-get-annotations/{library_id}/'
which = v'[]'
lrmap = {}
for key, last_read in to_sync:

View File

@ -75,6 +75,8 @@ def merge_annot_lists(a, b, field):
def merge_annotation_maps(a, b):
# If you make changes to this algorithm also update the
# implementation in calibre.db.annotations
updated = False
ans = {}
for field in field_map:

View File

@ -7,6 +7,7 @@ from gettext import gettext as _
from book_list.router import is_reading_book
from modals import error_dialog
from read_book.annotations import merge_annotation_maps
from session import get_interface_data
from utils import username_key
@ -59,6 +60,7 @@ def new_book(key, metadata):
'recent_date': new Date(),
'last_read': {},
'last_read_position': {},
'annotations_map': {},
}
@ -188,6 +190,8 @@ class DB:
book_id = int(book_id)
key = v'[library_id, book_id, fmt]'
self.do_op(['books'], key, _('Failed to read from the books database'), def(result):
if result and not result.annotations_map:
result.annotations_map = {}
proceed(result or new_book(key, metadata))
)
@ -208,13 +212,16 @@ class DB:
if newest_epoch is None or pos.epoch > newest_epoch:
newest_epoch = pos.epoch
newest_pos = pos.cfi
unkey = username_key(get_interface_data().username)
if newest_pos:
unkey = username_key(get_interface_data().username)
book.last_read[unkey] = new Date(newest_epoch * 1000)
book.last_read_position[unkey] = newest_pos
if manifest.annotations_map:
book.annotations_map[unkey] = manifest.annotations_map
v'delete manifest["metadata"]'
v'delete manifest["last_read_positions"]'
v'delete manifest.metadata'
v'delete manifest.last_read_positions'
v'delete manifest.annotations_map'
self.do_op(['books'], book, _('Failed to write to the books database'), proceed, op='put')
def store_file(self, book, name, xhr, proceed, is_cover):
@ -290,13 +297,29 @@ class DB:
book.last_read[unkey] = book.recent_date = now
self.do_op(['books'], book, _('Failed to write to the books database'), op='put')
def update_last_read_data_from_key(self, library_id, book_id, fmt, last_read, last_read_position):
def update_annotations_data_from_key(self, library_id, book_id, fmt, new_data):
unkey = username_key(get_interface_data().username)
self.get_book(library_id, book_id, fmt, None, def(book):
if book.metadata: # book exists
book.last_read[unkey] = book.recent_date = last_read
book.last_read_position[unkey] = last_read_position
self.do_op(['books'], book, _('Failed to write to the books database'), op='put')
changed = False
if new_data.last_read_position:
book.last_read[unkey] = book.recent_date = new_data.last_read
book.last_read_position[unkey] = new_data.last_read_position
changed = True
if not book.annotations_map:
book.annotations_map = v'{}'
if new_data.annotations_map:
existing = book.annotations_map[unkey]
if not existing:
changed = True
book.annotations_map[unkey] = new_data.annotations_map
else:
updated, merged = merge_annotation_maps(existing, new_data.annotations_map)
if updated:
changed = True
book.annotations_map[unkey] = merged
if changed:
self.do_op(['books'], book, _('Failed to write to the books database'), op='put')
)
def get_file(self, book, name, proceed):

View File

@ -7,7 +7,7 @@ import traceback
from elementmaker import E
from gettext import gettext as _
from ajax import ajax
from ajax import ajax, ajax_send
from book_list.constants import read_book_container_id
from book_list.library_data import current_library_id, library_data
from book_list.router import home, push_state, read_book_mode, update_window_title
@ -73,6 +73,7 @@ class ReadUI:
ui_operations.focus_iframe = self.focus_iframe.bind(self)
ui_operations.toggle_toc = self.toggle_toc.bind(self)
ui_operations.toggle_full_screen = self.toggle_full_screen.bind(self)
ui_operations.highlights_changed = self.highlights_changed.bind(self)
def on_resize(self):
self.view.on_resize()
@ -192,6 +193,14 @@ class ReadUI:
def update_color_scheme(self):
self.view.update_color_scheme()
def highlights_changed(self, highlights):
amap = {'highlight': highlights}
library_id = self.base_url_data.library_id
book_id = self.base_url_data.book_id
fmt = self.base_url_data.fmt
self.db.update_annotations_data_from_key(library_id, book_id, fmt, amap)
ajax_send(f'book-update-annotations/{library_id}/{book_id}/{fmt}', amap, def (): pass;)
@property
def url_data(self):
ans = {'library_id':self.base_url_data.library_id, 'book_id':self.base_url_data.book_id, 'fmt': self.base_url_data.fmt}

View File

@ -774,14 +774,21 @@ class View:
self.loaded_resources = {}
self.content_popup_overlay.loaded_resources = {}
self.timers.start_book(book)
unkey = username_key(get_interface_data().username)
self.book = current_book.book = book
self.annotations_manager.set_highlights(book.highlights or v'[]')
hl = None
if runtime.is_standalone_viewer:
hl = book.highlights
v'delete book.highlights'
else:
if unkey and book.annotations_map[unkey]:
hl = book.annotations_map[unkey].highlight
self.annotations_manager.set_highlights(hl or v'[]')
if runtime.is_standalone_viewer:
add_book_to_recently_viewed(book)
if ui_operations.update_last_read_time:
ui_operations.update_last_read_time(book)
pos = {'replace_history':True}
unkey = username_key(get_interface_data().username)
name = book.manifest.spine[0]
cfi = None
if initial_position and initial_position.type is 'cfi' and initial_position.data.startswith('epubcfi(/'):