Implement loading of annotations from multiple sources

This commit is contained in:
Kovid Goyal 2019-08-04 14:06:22 +05:30
parent 742f322f89
commit a38defca58
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
6 changed files with 179 additions and 60 deletions

View File

@ -16,40 +16,41 @@ BM_FIELD_SEP = u'*|!|?|*'
BM_LEGACY_ESC = u'esc-text-%&*#%(){}ads19-end-esc' BM_LEGACY_ESC = u'esc-text-%&*#%(){}ads19-end-esc'
def parse_bookmarks(raw):
for line in raw.splitlines():
if '^' in line:
tokens = line.rpartition('^')
title, ref = tokens[0], tokens[2]
try:
spine, _, pos = ref.partition('#')
spine = int(spine.strip())
except Exception:
continue
yield {'type':'legacy', 'title':title, 'spine':spine, 'pos':pos}
elif BM_FIELD_SEP in line:
try:
title, spine, pos = line.strip().split(BM_FIELD_SEP)
spine = int(spine)
except Exception:
continue
# Unescape from serialization
pos = pos.replace(BM_LEGACY_ESC, u'^')
# Check for pos being a scroll fraction
try:
pos = float(pos)
except Exception:
pass
yield {'type':'cfi', 'title':title, 'pos':pos, 'spine':spine}
class BookmarksMixin(object): class BookmarksMixin(object):
def __init__(self, copy_bookmarks_to_file=True): def __init__(self, copy_bookmarks_to_file=True):
self.copy_bookmarks_to_file = copy_bookmarks_to_file self.copy_bookmarks_to_file = copy_bookmarks_to_file
def parse_bookmarks(self, raw): def parse_bookmarks(self, raw):
for line in raw.splitlines(): for bm in parse_bookmarks(raw):
bm = None self.bookmarks.append(bm)
if line.count('^') > 0:
tokens = line.rpartition('^')
title, ref = tokens[0], tokens[2]
try:
spine, _, pos = ref.partition('#')
spine = int(spine.strip())
except:
continue
bm = {'type':'legacy', 'title':title, 'spine':spine, 'pos':pos}
elif BM_FIELD_SEP in line:
try:
title, spine, pos = line.strip().split(BM_FIELD_SEP)
spine = int(spine)
except:
continue
# Unescape from serialization
pos = pos.replace(BM_LEGACY_ESC, u'^')
# Check for pos being a scroll fraction
try:
pos = float(pos)
except:
pass
bm = {'type':'cfi', 'title':title, 'pos':pos, 'spine':spine}
if bm:
self.bookmarks.append(bm)
def serialize_bookmarks(self, bookmarks): def serialize_bookmarks(self, bookmarks):
dat = [] dat = []

View File

@ -0,0 +1,57 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
# License: GPL v3 Copyright: 2019, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import absolute_import, division, print_function, unicode_literals
from collections import defaultdict
from operator import itemgetter
from calibre.utils.iso8601 import parse_iso8601
from calibre.utils.serialize import json_dumps, json_loads
from polyglot.builtins import iteritems, itervalues
def merge_annots_with_identical_titles(annots):
title_groups = defaultdict(list)
for a in annots:
title_groups[a['title']].append(a)
for tg in itervalues(title_groups):
tg.sort(key=itemgetter('timestamp'), reverse=True)
seen = set()
for a in annots:
title = a['title']
if title not in seen:
seen.add(title)
yield title_groups[title][0]
def merge_annotations(annots, annots_map):
for annot in annots:
annots_map[annot.pop('type')].append(annot)
lr = annots_map['last-read']
if lr:
lr.sort(key=itemgetter('timestamp'), reverse=True)
for annot_type in ('bookmark',):
a = annots_map.get(annot_type)
if a and len(a) > 1:
annots_map[annot_type] = list(merge_annots_with_identical_titles(a))
def parse_annotations(raw):
ans = []
for annot in json_loads(raw):
annot['timestamp'] = parse_iso8601(annot['timestamp'], assume_utc=True)
ans.append(annot)
return ans
def serialize_annotations(annots_map):
ans = []
for atype, annots in iteritems(annots_map):
for annot in annots:
annot = annot.copy()
annot['type'] = atype
annot['timestamp'] = annot['timestamp'].isoformat()
ans.append(annot)
return json_dumps(ans)

View File

@ -109,7 +109,7 @@ def prepare_convert(temp_path, key, st):
def do_convert(path, temp_path, key, instance): def do_convert(path, temp_path, key, instance):
tdir = os.path.join(temp_path, instance['path']) tdir = os.path.join(temp_path, instance['path'])
fork_job('calibre.srv.render_book', 'render', args=( fork_job('calibre.srv.render_book', 'render', args=(
path, tdir, {'size': instance['file_size'], 'mtime': instance['file_mtime'], 'hash': key}, True, path, tdir, {'size': instance['file_size'], 'mtime': instance['file_mtime'], 'hash': key}, True, True,
), timeout=3000, no_output=True ), timeout=3000, no_output=True
) )
size = 0 size = 0

View File

@ -5,22 +5,30 @@
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import absolute_import, division, print_function, unicode_literals
import os import os
from collections import defaultdict
from hashlib import sha256
from threading import Thread from threading import Thread
from PyQt5.Qt import QDockWidget, Qt, QTimer, pyqtSignal from PyQt5.Qt import QDockWidget, Qt, QTimer, pyqtSignal
from calibre.constants import config_dir
from calibre.gui2 import error_dialog from calibre.gui2 import error_dialog
from calibre.gui2.main_window import MainWindow from calibre.gui2.main_window import MainWindow
from calibre.gui2.viewer.annotations import (
merge_annotations, parse_annotations, serialize_annotations
)
from calibre.gui2.viewer.convert_book import prepare_book from calibre.gui2.viewer.convert_book import prepare_book
from calibre.gui2.viewer.web_view import WebView, set_book_path from calibre.gui2.viewer.web_view import WebView, set_book_path
from calibre.utils.config import JSONConfig from calibre.utils.date import utcnow
from calibre.utils.ipc.simple_worker import WorkerError from calibre.utils.ipc.simple_worker import WorkerError
from calibre.utils.serialize import json_loads
from polyglot.builtins import as_bytes
annotations_dir = os.path.join(config_dir, 'viewer', 'annots')
def viewer_data(): def path_key(path):
if not hasattr(viewer_data, 'ans'): return sha256(as_bytes(path)).hexdigest()
viewer_data.ans = JSONConfig('viewer-data')
return viewer_data.ans
class EbookViewer(MainWindow): class EbookViewer(MainWindow):
@ -30,9 +38,13 @@ class EbookViewer(MainWindow):
def __init__(self): def __init__(self):
MainWindow.__init__(self, None) MainWindow.__init__(self, None)
try:
os.makedirs(annotations_dir)
except EnvironmentError:
pass
self.current_book_data = {} self.current_book_data = {}
self.save_cfi_debounce_timer = t = QTimer(self) self.save_annotations_debounce_timer = t = QTimer(self)
t.setInterval(2000), t.timeout.connect(self.save_cfi) t.setInterval(3000), t.timeout.connect(self.save_annotations)
self.book_prepared.connect(self.load_finished, type=Qt.QueuedConnection) self.book_prepared.connect(self.load_finished, type=Qt.QueuedConnection)
def create_dock(title, name, area, areas=Qt.LeftDockWidgetArea | Qt.RightDockWidgetArea): def create_dock(title, name, area, areas=Qt.LeftDockWidgetArea | Qt.RightDockWidgetArea):
@ -62,8 +74,8 @@ class EbookViewer(MainWindow):
def load_ebook(self, pathtoebook, open_at=None): def load_ebook(self, pathtoebook, open_at=None):
# TODO: Implement open_at # TODO: Implement open_at
if self.save_cfi_debounce_timer.isActive(): if self.save_annotations_debounce_timer.isActive():
self.save_cfi() self.save_annotations()
self.current_book_data = {} self.current_book_data = {}
t = Thread(name='LoadBook', target=self._load_ebook_worker, args=(pathtoebook, open_at)) t = Thread(name='LoadBook', target=self._load_ebook_worker, args=(pathtoebook, open_at))
t.daemon = True t.daemon = True
@ -88,28 +100,45 @@ class EbookViewer(MainWindow):
return return
set_book_path(data['base']) set_book_path(data['base'])
self.current_book_data = data self.current_book_data = data
self.current_book_data['annotations_map'] = defaultdict(list)
self.current_book_data['annotations_path_key'] = path_key(data['pathtoebook']) + '.json'
self.load_book_annotations()
self.web_view.start_book_load(initial_cfi=self.initial_cfi_for_current_book()) self.web_view.start_book_load(initial_cfi=self.initial_cfi_for_current_book())
def load_book_annotations(self):
amap = self.current_book_data['annotations_map']
path = os.path.join(self.current_book_data['base'], 'calibre-book-annotations.json')
if os.path.exists(path):
with open(path, 'rb') as f:
raw = f.read()
merge_annotations(json_loads(raw), amap)
path = os.path.join(annotations_dir, self.current_book_data['annotations_path_key'])
if os.path.exists(path):
with open(path, 'rb') as f:
raw = f.read()
merge_annotations(parse_annotations(raw), amap)
def initial_cfi_for_current_book(self): def initial_cfi_for_current_book(self):
vd = viewer_data() lrp = self.current_book_data['annotations_map']['last-read']
lrp = vd.get('last-read-positions', {}) if lrp:
return lrp.get('path', {}).get(self.current_book_data['pathtoebook']) lrp = lrp[0]
if lrp['pos_type'] == 'epubcfi':
return lrp['pos']
def cfi_changed(self, cfi): def cfi_changed(self, cfi):
if not self.current_book_data: if not self.current_book_data:
return return
self.current_book_data['last_known_cfi'] = cfi self.current_book_data['annotations_map']['last-read'] = [{
self.save_cfi_debounce_timer.start() 'pos': cfi, 'pos_type': 'epubcfi', 'timestamp': utcnow()}]
self.save_annotations_debounce_timer.start()
def save_cfi(self): def save_annotations(self):
self.save_cfi_debounce_timer.stop() self.save_annotations_debounce_timer.stop()
vd = viewer_data() amap = self.current_book_data['annotations_map']
lrp = vd.get('last-read-positions', {}) with open(os.path.join(annotations_dir, self.current_book_data['annotations_path_key']), 'wb') as f:
path = lrp.setdefault('path', {}) f.write(as_bytes(serialize_annotations(amap)))
path[self.current_book_data['pathtoebook']] = self.current_book_data['last_known_cfi']
vd.set('last-read-positions', lrp)
def closeEvent(self, ev): def closeEvent(self, ev):
if self.save_cfi_debounce_timer.isActive(): if self.save_annotations_debounce_timer.isActive():
self.save_cfi() self.save_annotations()
return MainWindow.closeEvent(self, ev) return MainWindow.closeEvent(self, ev)

View File

@ -5,6 +5,7 @@
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import absolute_import, division, print_function, unicode_literals
import os import os
import sys
from PyQt5.Qt import ( from PyQt5.Qt import (
QApplication, QBuffer, QByteArray, QHBoxLayout, QSize, QTimer, QUrl, QWidget, QApplication, QBuffer, QByteArray, QHBoxLayout, QSize, QTimer, QUrl, QWidget,
@ -186,7 +187,8 @@ class WebPage(QWebEnginePage):
msg, linenumber, source_id.partition(':')[2]), show=True) msg, linenumber, source_id.partition(':')[2]), show=True)
prefix = {QWebEnginePage.InfoMessageLevel: 'INFO', QWebEnginePage.WarningMessageLevel: 'WARNING'}.get( prefix = {QWebEnginePage.InfoMessageLevel: 'INFO', QWebEnginePage.WarningMessageLevel: 'WARNING'}.get(
level, 'ERROR') level, 'ERROR')
prints('%s: %s:%s: %s' % (prefix, source_id, linenumber, msg)) prints('%s: %s:%s: %s' % (prefix, source_id, linenumber, msg), file=sys.stderr)
sys.stderr.flush()
def acceptNavigationRequest(self, url, req_type, is_main_frame): def acceptNavigationRequest(self, url, req_type, is_main_frame):
if req_type == self.NavigationTypeReload: if req_type == self.NavigationTypeReload:

View File

@ -9,9 +9,9 @@ import os
import re import re
import sys import sys
from collections import OrderedDict, defaultdict from collections import OrderedDict, defaultdict
from datetime import datetime
from functools import partial from functools import partial
from itertools import count from itertools import count
from datetime import datetime
from css_parser import replaceUrls from css_parser import replaceUrls
from css_parser.css import CSSRule from css_parser.css import CSSRule
@ -29,11 +29,14 @@ from calibre.ebooks.oeb.polish.cover import find_cover_image, set_epub_cover
from calibre.ebooks.oeb.polish.css import transform_css from calibre.ebooks.oeb.polish.css import transform_css
from calibre.ebooks.oeb.polish.toc import get_landmarks, get_toc from calibre.ebooks.oeb.polish.toc import get_landmarks, get_toc
from calibre.ebooks.oeb.polish.utils import extract, guess_type from calibre.ebooks.oeb.polish.utils import extract, guess_type
from calibre.srv.metadata import encode_datetime
from calibre.utils.date import EPOCH
from calibre.utils.logging import default_log from calibre.utils.logging import default_log
from calibre.utils.short_uuid import uuid4 from calibre.utils.short_uuid import uuid4
from calibre.srv.metadata import encode_datetime from polyglot.binary import (
from polyglot.binary import as_base64_unicode as encode_component, from_base64_unicode as decode_component as_base64_unicode as encode_component, from_base64_unicode as decode_component
from polyglot.builtins import iteritems, map, is_py3, unicode_type )
from polyglot.builtins import is_py3, iteritems, map, unicode_type
from polyglot.urllib import quote, urlparse from polyglot.urllib import quote, urlparse
RENDER_VERSION = 1 RENDER_VERSION = 1
@ -169,10 +172,16 @@ class Container(ContainerBase):
tweak_mode = True tweak_mode = True
def __init__(self, path_to_ebook, tdir, log=None, book_hash=None): def __init__(self, path_to_ebook, tdir, log=None, book_hash=None, save_legacy_bookmark_data=False):
log = log or default_log log = log or default_log
book_fmt, opfpath, input_fmt = extract_book(path_to_ebook, tdir, log=log) book_fmt, opfpath, input_fmt = extract_book(path_to_ebook, tdir, log=log)
ContainerBase.__init__(self, tdir, opfpath, log) ContainerBase.__init__(self, tdir, opfpath, log)
if save_legacy_bookmark_data:
bm_file = 'META-INF/calibre_bookmarks.txt'
self.legacy_bookmark_data = None
if self.exists(bm_file):
with self.open(bm_file, 'rb') as f:
self.legacy_bookmark_data = f.read().decode('utf-8')
# We do not add zero byte sized files as the IndexedDB API in the # We do not add zero byte sized files as the IndexedDB API in the
# browser has no good way to distinguish between zero byte files and # browser has no good way to distinguish between zero byte files and
# load failures. # load failures.
@ -532,8 +541,22 @@ def serialize_datetimes(d):
d[k] = v d[k] = v
def render(pathtoebook, output_dir, book_hash=None, serialize_metadata=False): def get_legacy_annotations(container):
container = Container(pathtoebook, output_dir, book_hash=book_hash) from calibre.ebooks.oeb.iterator.bookmarks import parse_bookmarks
raw = container.legacy_bookmark_data or b''
for bm in parse_bookmarks(raw):
if bm['type'] == 'cfi' and isinstance(bm['pos'], unicode_type):
spine_index = (1 + bm['spine']) * 2
epubcfi = 'epubcfi(/{}/{})'.format(spine_index, bm['pos'].lstrip('/'))
title = bm.get('title')
if title and title != 'calibre_current_page_bookmark':
yield {'type': 'bookmark', 'title': title, 'pos': epubcfi, 'pos_type': 'epubcfi', 'timestamp': EPOCH}
else:
yield {'type': 'last-read', 'pos': epubcfi, 'pos_type': 'epubcfi', 'timestamp': EPOCH}
def render(pathtoebook, output_dir, book_hash=None, serialize_metadata=False, extract_annotations=False):
container = Container(pathtoebook, output_dir, book_hash=book_hash, save_legacy_bookmark_data=extract_annotations)
if serialize_metadata: if serialize_metadata:
from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.meta import get_metadata
from calibre.utils.serialize import json_dumps from calibre.utils.serialize import json_dumps
@ -548,6 +571,13 @@ def render(pathtoebook, output_dir, book_hash=None, serialize_metadata=False):
f.write(cdata[1]) f.write(cdata[1])
with lopen(os.path.join(output_dir, 'calibre-book-metadata.json'), 'wb') as f: with lopen(os.path.join(output_dir, 'calibre-book-metadata.json'), 'wb') as f:
f.write(json_dumps(d)) f.write(json_dumps(d))
if extract_annotations:
annotations = None
if container.legacy_bookmark_data:
annotations = json_dumps(tuple(get_legacy_annotations(container)))
if annotations:
with lopen(os.path.join(output_dir, 'calibre-book-annotations.json'), 'wb') as f:
f.write(annotations)
if __name__ == '__main__': if __name__ == '__main__':