diff --git a/src/calibre/ebooks/oeb/iterator/bookmarks.py b/src/calibre/ebooks/oeb/iterator/bookmarks.py index bc16fbaf81..720a8e7cc8 100644 --- a/src/calibre/ebooks/oeb/iterator/bookmarks.py +++ b/src/calibre/ebooks/oeb/iterator/bookmarks.py @@ -16,40 +16,41 @@ BM_FIELD_SEP = u'*|!|?|*' BM_LEGACY_ESC = u'esc-text-%&*#%(){}ads19-end-esc' +def parse_bookmarks(raw): + for line in raw.splitlines(): + if '^' in line: + tokens = line.rpartition('^') + title, ref = tokens[0], tokens[2] + try: + spine, _, pos = ref.partition('#') + spine = int(spine.strip()) + except Exception: + continue + yield {'type':'legacy', 'title':title, 'spine':spine, 'pos':pos} + elif BM_FIELD_SEP in line: + try: + title, spine, pos = line.strip().split(BM_FIELD_SEP) + spine = int(spine) + except Exception: + continue + # Unescape from serialization + pos = pos.replace(BM_LEGACY_ESC, u'^') + # Check for pos being a scroll fraction + try: + pos = float(pos) + except Exception: + pass + yield {'type':'cfi', 'title':title, 'pos':pos, 'spine':spine} + + class BookmarksMixin(object): def __init__(self, copy_bookmarks_to_file=True): self.copy_bookmarks_to_file = copy_bookmarks_to_file def parse_bookmarks(self, raw): - for line in raw.splitlines(): - bm = None - if line.count('^') > 0: - tokens = line.rpartition('^') - title, ref = tokens[0], tokens[2] - try: - spine, _, pos = ref.partition('#') - spine = int(spine.strip()) - except: - continue - bm = {'type':'legacy', 'title':title, 'spine':spine, 'pos':pos} - elif BM_FIELD_SEP in line: - try: - title, spine, pos = line.strip().split(BM_FIELD_SEP) - spine = int(spine) - except: - continue - # Unescape from serialization - pos = pos.replace(BM_LEGACY_ESC, u'^') - # Check for pos being a scroll fraction - try: - pos = float(pos) - except: - pass - bm = {'type':'cfi', 'title':title, 'pos':pos, 'spine':spine} - - if bm: - self.bookmarks.append(bm) + for bm in parse_bookmarks(raw): + self.bookmarks.append(bm) def serialize_bookmarks(self, bookmarks): dat = [] diff --git a/src/calibre/gui2/viewer/annotations.py b/src/calibre/gui2/viewer/annotations.py new file mode 100644 index 0000000000..ff7c134c1e --- /dev/null +++ b/src/calibre/gui2/viewer/annotations.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python2 +# vim:fileencoding=utf-8 +# License: GPL v3 Copyright: 2019, Kovid Goyal + +from __future__ import absolute_import, division, print_function, unicode_literals + +from collections import defaultdict +from operator import itemgetter + +from calibre.utils.iso8601 import parse_iso8601 +from calibre.utils.serialize import json_dumps, json_loads +from polyglot.builtins import iteritems, itervalues + + +def merge_annots_with_identical_titles(annots): + title_groups = defaultdict(list) + for a in annots: + title_groups[a['title']].append(a) + for tg in itervalues(title_groups): + tg.sort(key=itemgetter('timestamp'), reverse=True) + seen = set() + for a in annots: + title = a['title'] + if title not in seen: + seen.add(title) + yield title_groups[title][0] + + +def merge_annotations(annots, annots_map): + for annot in annots: + annots_map[annot.pop('type')].append(annot) + lr = annots_map['last-read'] + if lr: + lr.sort(key=itemgetter('timestamp'), reverse=True) + for annot_type in ('bookmark',): + a = annots_map.get(annot_type) + if a and len(a) > 1: + annots_map[annot_type] = list(merge_annots_with_identical_titles(a)) + + +def parse_annotations(raw): + ans = [] + for annot in json_loads(raw): + annot['timestamp'] = parse_iso8601(annot['timestamp'], assume_utc=True) + ans.append(annot) + return ans + + +def serialize_annotations(annots_map): + ans = [] + for atype, annots in iteritems(annots_map): + for annot in annots: + annot = annot.copy() + annot['type'] = atype + annot['timestamp'] = annot['timestamp'].isoformat() + ans.append(annot) + return json_dumps(ans) diff --git a/src/calibre/gui2/viewer/convert_book.py b/src/calibre/gui2/viewer/convert_book.py index 30eb0337bc..ec26a0316f 100644 --- a/src/calibre/gui2/viewer/convert_book.py +++ b/src/calibre/gui2/viewer/convert_book.py @@ -109,7 +109,7 @@ def prepare_convert(temp_path, key, st): def do_convert(path, temp_path, key, instance): tdir = os.path.join(temp_path, instance['path']) fork_job('calibre.srv.render_book', 'render', args=( - path, tdir, {'size': instance['file_size'], 'mtime': instance['file_mtime'], 'hash': key}, True, + path, tdir, {'size': instance['file_size'], 'mtime': instance['file_mtime'], 'hash': key}, True, True, ), timeout=3000, no_output=True ) size = 0 diff --git a/src/calibre/gui2/viewer/ui.py b/src/calibre/gui2/viewer/ui.py index 2f074041b5..d02bf7a8a8 100644 --- a/src/calibre/gui2/viewer/ui.py +++ b/src/calibre/gui2/viewer/ui.py @@ -5,22 +5,30 @@ from __future__ import absolute_import, division, print_function, unicode_literals import os +from collections import defaultdict +from hashlib import sha256 from threading import Thread from PyQt5.Qt import QDockWidget, Qt, QTimer, pyqtSignal +from calibre.constants import config_dir from calibre.gui2 import error_dialog from calibre.gui2.main_window import MainWindow +from calibre.gui2.viewer.annotations import ( + merge_annotations, parse_annotations, serialize_annotations +) from calibre.gui2.viewer.convert_book import prepare_book from calibre.gui2.viewer.web_view import WebView, set_book_path -from calibre.utils.config import JSONConfig +from calibre.utils.date import utcnow from calibre.utils.ipc.simple_worker import WorkerError +from calibre.utils.serialize import json_loads +from polyglot.builtins import as_bytes + +annotations_dir = os.path.join(config_dir, 'viewer', 'annots') -def viewer_data(): - if not hasattr(viewer_data, 'ans'): - viewer_data.ans = JSONConfig('viewer-data') - return viewer_data.ans +def path_key(path): + return sha256(as_bytes(path)).hexdigest() class EbookViewer(MainWindow): @@ -30,9 +38,13 @@ class EbookViewer(MainWindow): def __init__(self): MainWindow.__init__(self, None) + try: + os.makedirs(annotations_dir) + except EnvironmentError: + pass self.current_book_data = {} - self.save_cfi_debounce_timer = t = QTimer(self) - t.setInterval(2000), t.timeout.connect(self.save_cfi) + self.save_annotations_debounce_timer = t = QTimer(self) + t.setInterval(3000), t.timeout.connect(self.save_annotations) self.book_prepared.connect(self.load_finished, type=Qt.QueuedConnection) def create_dock(title, name, area, areas=Qt.LeftDockWidgetArea | Qt.RightDockWidgetArea): @@ -62,8 +74,8 @@ class EbookViewer(MainWindow): def load_ebook(self, pathtoebook, open_at=None): # TODO: Implement open_at - if self.save_cfi_debounce_timer.isActive(): - self.save_cfi() + if self.save_annotations_debounce_timer.isActive(): + self.save_annotations() self.current_book_data = {} t = Thread(name='LoadBook', target=self._load_ebook_worker, args=(pathtoebook, open_at)) t.daemon = True @@ -88,28 +100,45 @@ class EbookViewer(MainWindow): return set_book_path(data['base']) self.current_book_data = data + self.current_book_data['annotations_map'] = defaultdict(list) + self.current_book_data['annotations_path_key'] = path_key(data['pathtoebook']) + '.json' + self.load_book_annotations() self.web_view.start_book_load(initial_cfi=self.initial_cfi_for_current_book()) + def load_book_annotations(self): + amap = self.current_book_data['annotations_map'] + path = os.path.join(self.current_book_data['base'], 'calibre-book-annotations.json') + if os.path.exists(path): + with open(path, 'rb') as f: + raw = f.read() + merge_annotations(json_loads(raw), amap) + path = os.path.join(annotations_dir, self.current_book_data['annotations_path_key']) + if os.path.exists(path): + with open(path, 'rb') as f: + raw = f.read() + merge_annotations(parse_annotations(raw), amap) + def initial_cfi_for_current_book(self): - vd = viewer_data() - lrp = vd.get('last-read-positions', {}) - return lrp.get('path', {}).get(self.current_book_data['pathtoebook']) + lrp = self.current_book_data['annotations_map']['last-read'] + if lrp: + lrp = lrp[0] + if lrp['pos_type'] == 'epubcfi': + return lrp['pos'] def cfi_changed(self, cfi): if not self.current_book_data: return - self.current_book_data['last_known_cfi'] = cfi - self.save_cfi_debounce_timer.start() + self.current_book_data['annotations_map']['last-read'] = [{ + 'pos': cfi, 'pos_type': 'epubcfi', 'timestamp': utcnow()}] + self.save_annotations_debounce_timer.start() - def save_cfi(self): - self.save_cfi_debounce_timer.stop() - vd = viewer_data() - lrp = vd.get('last-read-positions', {}) - path = lrp.setdefault('path', {}) - path[self.current_book_data['pathtoebook']] = self.current_book_data['last_known_cfi'] - vd.set('last-read-positions', lrp) + def save_annotations(self): + self.save_annotations_debounce_timer.stop() + amap = self.current_book_data['annotations_map'] + with open(os.path.join(annotations_dir, self.current_book_data['annotations_path_key']), 'wb') as f: + f.write(as_bytes(serialize_annotations(amap))) def closeEvent(self, ev): - if self.save_cfi_debounce_timer.isActive(): - self.save_cfi() + if self.save_annotations_debounce_timer.isActive(): + self.save_annotations() return MainWindow.closeEvent(self, ev) diff --git a/src/calibre/gui2/viewer/web_view.py b/src/calibre/gui2/viewer/web_view.py index 1b7e62e5bc..393c2f6f4e 100644 --- a/src/calibre/gui2/viewer/web_view.py +++ b/src/calibre/gui2/viewer/web_view.py @@ -5,6 +5,7 @@ from __future__ import absolute_import, division, print_function, unicode_literals import os +import sys from PyQt5.Qt import ( QApplication, QBuffer, QByteArray, QHBoxLayout, QSize, QTimer, QUrl, QWidget, @@ -186,7 +187,8 @@ class WebPage(QWebEnginePage): msg, linenumber, source_id.partition(':')[2]), show=True) prefix = {QWebEnginePage.InfoMessageLevel: 'INFO', QWebEnginePage.WarningMessageLevel: 'WARNING'}.get( level, 'ERROR') - prints('%s: %s:%s: %s' % (prefix, source_id, linenumber, msg)) + prints('%s: %s:%s: %s' % (prefix, source_id, linenumber, msg), file=sys.stderr) + sys.stderr.flush() def acceptNavigationRequest(self, url, req_type, is_main_frame): if req_type == self.NavigationTypeReload: diff --git a/src/calibre/srv/render_book.py b/src/calibre/srv/render_book.py index a92baaa591..c5f663bcd1 100644 --- a/src/calibre/srv/render_book.py +++ b/src/calibre/srv/render_book.py @@ -9,9 +9,9 @@ import os import re import sys from collections import OrderedDict, defaultdict +from datetime import datetime from functools import partial from itertools import count -from datetime import datetime from css_parser import replaceUrls from css_parser.css import CSSRule @@ -29,11 +29,14 @@ from calibre.ebooks.oeb.polish.cover import find_cover_image, set_epub_cover from calibre.ebooks.oeb.polish.css import transform_css from calibre.ebooks.oeb.polish.toc import get_landmarks, get_toc from calibre.ebooks.oeb.polish.utils import extract, guess_type +from calibre.srv.metadata import encode_datetime +from calibre.utils.date import EPOCH from calibre.utils.logging import default_log from calibre.utils.short_uuid import uuid4 -from calibre.srv.metadata import encode_datetime -from polyglot.binary import as_base64_unicode as encode_component, from_base64_unicode as decode_component -from polyglot.builtins import iteritems, map, is_py3, unicode_type +from polyglot.binary import ( + as_base64_unicode as encode_component, from_base64_unicode as decode_component +) +from polyglot.builtins import is_py3, iteritems, map, unicode_type from polyglot.urllib import quote, urlparse RENDER_VERSION = 1 @@ -169,10 +172,16 @@ class Container(ContainerBase): tweak_mode = True - def __init__(self, path_to_ebook, tdir, log=None, book_hash=None): + def __init__(self, path_to_ebook, tdir, log=None, book_hash=None, save_legacy_bookmark_data=False): log = log or default_log book_fmt, opfpath, input_fmt = extract_book(path_to_ebook, tdir, log=log) ContainerBase.__init__(self, tdir, opfpath, log) + if save_legacy_bookmark_data: + bm_file = 'META-INF/calibre_bookmarks.txt' + self.legacy_bookmark_data = None + if self.exists(bm_file): + with self.open(bm_file, 'rb') as f: + self.legacy_bookmark_data = f.read().decode('utf-8') # We do not add zero byte sized files as the IndexedDB API in the # browser has no good way to distinguish between zero byte files and # load failures. @@ -532,8 +541,22 @@ def serialize_datetimes(d): d[k] = v -def render(pathtoebook, output_dir, book_hash=None, serialize_metadata=False): - container = Container(pathtoebook, output_dir, book_hash=book_hash) +def get_legacy_annotations(container): + from calibre.ebooks.oeb.iterator.bookmarks import parse_bookmarks + raw = container.legacy_bookmark_data or b'' + for bm in parse_bookmarks(raw): + if bm['type'] == 'cfi' and isinstance(bm['pos'], unicode_type): + spine_index = (1 + bm['spine']) * 2 + epubcfi = 'epubcfi(/{}/{})'.format(spine_index, bm['pos'].lstrip('/')) + title = bm.get('title') + if title and title != 'calibre_current_page_bookmark': + yield {'type': 'bookmark', 'title': title, 'pos': epubcfi, 'pos_type': 'epubcfi', 'timestamp': EPOCH} + else: + yield {'type': 'last-read', 'pos': epubcfi, 'pos_type': 'epubcfi', 'timestamp': EPOCH} + + +def render(pathtoebook, output_dir, book_hash=None, serialize_metadata=False, extract_annotations=False): + container = Container(pathtoebook, output_dir, book_hash=book_hash, save_legacy_bookmark_data=extract_annotations) if serialize_metadata: from calibre.ebooks.metadata.meta import get_metadata from calibre.utils.serialize import json_dumps @@ -548,6 +571,13 @@ def render(pathtoebook, output_dir, book_hash=None, serialize_metadata=False): f.write(cdata[1]) with lopen(os.path.join(output_dir, 'calibre-book-metadata.json'), 'wb') as f: f.write(json_dumps(d)) + if extract_annotations: + annotations = None + if container.legacy_bookmark_data: + annotations = json_dumps(tuple(get_legacy_annotations(container))) + if annotations: + with lopen(os.path.join(output_dir, 'calibre-book-annotations.json'), 'wb') as f: + f.write(annotations) if __name__ == '__main__':