From 563b926e4bbe4752f4f3d0598e5ff27c02dd6b66 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 5 Aug 2019 20:01:59 +0530 Subject: [PATCH] Store annotations in EPUB files --- src/calibre/ebooks/oeb/iterator/bookmarks.py | 3 +- src/calibre/gui2/viewer/annotations.py | 32 +++++++++----- src/calibre/gui2/viewer/ui.py | 27 ++++++------ src/calibre/srv/render_book.py | 45 +++++++++++++++----- 4 files changed, 74 insertions(+), 33 deletions(-) diff --git a/src/calibre/ebooks/oeb/iterator/bookmarks.py b/src/calibre/ebooks/oeb/iterator/bookmarks.py index 720a8e7cc8..12a9e26e2f 100644 --- a/src/calibre/ebooks/oeb/iterator/bookmarks.py +++ b/src/calibre/ebooks/oeb/iterator/bookmarks.py @@ -10,13 +10,14 @@ import os, numbers from io import BytesIO from calibre.utils.zipfile import safe_replace -from polyglot.builtins import unicode_type +from polyglot.builtins import unicode_type, as_unicode BM_FIELD_SEP = u'*|!|?|*' BM_LEGACY_ESC = u'esc-text-%&*#%(){}ads19-end-esc' def parse_bookmarks(raw): + raw = as_unicode(raw) for line in raw.splitlines(): if '^' in line: tokens = line.rpartition('^') diff --git a/src/calibre/gui2/viewer/annotations.py b/src/calibre/gui2/viewer/annotations.py index ff7c134c1e..16a1cf6476 100644 --- a/src/calibre/gui2/viewer/annotations.py +++ b/src/calibre/gui2/viewer/annotations.py @@ -5,13 +5,22 @@ from __future__ import absolute_import, division, print_function, unicode_literals from collections import defaultdict +from io import BytesIO from operator import itemgetter -from calibre.utils.iso8601 import parse_iso8601 -from calibre.utils.serialize import json_dumps, json_loads +from calibre.srv.render_book import ( + EPUB_FILE_TYPE_MAGIC, parse_annotation, parse_annotations as _parse_annotations +) +from calibre.utils.serialize import json_dumps +from calibre.utils.zipfile import safe_replace +from polyglot.binary import as_base64_bytes from polyglot.builtins import iteritems, itervalues +def parse_annotations(raw): + return list(_parse_annotations(raw)) + + def merge_annots_with_identical_titles(annots): title_groups = defaultdict(list) for a in annots: @@ -28,6 +37,7 @@ def merge_annots_with_identical_titles(annots): def merge_annotations(annots, annots_map): for annot in annots: + annot = parse_annotation(annot) annots_map[annot.pop('type')].append(annot) lr = annots_map['last-read'] if lr: @@ -38,14 +48,6 @@ def merge_annotations(annots, annots_map): annots_map[annot_type] = list(merge_annots_with_identical_titles(a)) -def parse_annotations(raw): - ans = [] - for annot in json_loads(raw): - annot['timestamp'] = parse_iso8601(annot['timestamp'], assume_utc=True) - ans.append(annot) - return ans - - def serialize_annotations(annots_map): ans = [] for atype, annots in iteritems(annots_map): @@ -55,3 +57,13 @@ def serialize_annotations(annots_map): annot['timestamp'] = annot['timestamp'].isoformat() ans.append(annot) return json_dumps(ans) + + +def save_annots_to_epub(path, serialized_annots): + try: + zf = open(path, 'r+b') + except IOError: + return + with zf: + serialized_annots = EPUB_FILE_TYPE_MAGIC + as_base64_bytes(serialized_annots) + safe_replace(zf, 'META-INF/calibre_bookmarks.txt', BytesIO(serialized_annots), add_missing=True) diff --git a/src/calibre/gui2/viewer/ui.py b/src/calibre/gui2/viewer/ui.py index 906f9d3c8f..29d6e62b8f 100644 --- a/src/calibre/gui2/viewer/ui.py +++ b/src/calibre/gui2/viewer/ui.py @@ -9,15 +9,15 @@ from collections import defaultdict from hashlib import sha256 from threading import Thread -from PyQt5.Qt import QDockWidget, Qt, QTimer, pyqtSignal +from PyQt5.Qt import QDockWidget, Qt, pyqtSignal from calibre.constants import config_dir from calibre.gui2 import error_dialog from calibre.gui2.main_window import MainWindow from calibre.gui2.viewer.annotations import ( - merge_annotations, parse_annotations, serialize_annotations + merge_annotations, parse_annotations, save_annots_to_epub, serialize_annotations ) -from calibre.gui2.viewer.convert_book import prepare_book +from calibre.gui2.viewer.convert_book import prepare_book, update_book from calibre.gui2.viewer.web_view import WebView, set_book_path from calibre.utils.date import utcnow from calibre.utils.ipc.simple_worker import WorkerError @@ -43,8 +43,6 @@ class EbookViewer(MainWindow): except EnvironmentError: pass self.current_book_data = {} - self.save_annotations_debounce_timer = t = QTimer(self) - t.setInterval(3000), t.timeout.connect(self.save_annotations) self.book_prepared.connect(self.load_finished, type=Qt.QueuedConnection) def create_dock(title, name, area, areas=Qt.LeftDockWidgetArea | Qt.RightDockWidgetArea): @@ -75,8 +73,7 @@ class EbookViewer(MainWindow): def load_ebook(self, pathtoebook, open_at=None): # TODO: Implement open_at self.web_view.show_preparing_message() - if self.save_annotations_debounce_timer.isActive(): - self.save_annotations() + self.save_annotations() self.current_book_data = {} t = Thread(name='LoadBook', target=self._load_ebook_worker, args=(pathtoebook, open_at)) t.daemon = True @@ -131,15 +128,21 @@ class EbookViewer(MainWindow): return self.current_book_data['annotations_map']['last-read'] = [{ 'pos': cfi, 'pos_type': 'epubcfi', 'timestamp': utcnow()}] - self.save_annotations_debounce_timer.start() def save_annotations(self): - self.save_annotations_debounce_timer.stop() + if not self.current_book_data: + return amap = self.current_book_data['annotations_map'] + annots = as_bytes(serialize_annotations(amap)) with open(os.path.join(annotations_dir, self.current_book_data['annotations_path_key']), 'wb') as f: - f.write(as_bytes(serialize_annotations(amap))) + f.write(annots) + if self.current_book_data.get('pathtoebook', '').lower().endswith('.epub'): + path = self.current_book_data['pathtoebook'] + if os.access(path, os.W_OK): + before_stat = os.stat(path) + save_annots_to_epub(path, annots) + update_book(path, before_stat, {'calibre-book-annotations.json': annots}) def closeEvent(self, ev): - if self.save_annotations_debounce_timer.isActive(): - self.save_annotations() + self.save_annotations() return MainWindow.closeEvent(self, ev) diff --git a/src/calibre/srv/render_book.py b/src/calibre/srv/render_book.py index c5f663bcd1..5fe91dff16 100644 --- a/src/calibre/srv/render_book.py +++ b/src/calibre/srv/render_book.py @@ -31,10 +31,13 @@ from calibre.ebooks.oeb.polish.toc import get_landmarks, get_toc from calibre.ebooks.oeb.polish.utils import extract, guess_type from calibre.srv.metadata import encode_datetime from calibre.utils.date import EPOCH +from calibre.utils.iso8601 import parse_iso8601 from calibre.utils.logging import default_log +from calibre.utils.serialize import json_loads from calibre.utils.short_uuid import uuid4 from polyglot.binary import ( - as_base64_unicode as encode_component, from_base64_unicode as decode_component + as_base64_unicode as encode_component, from_base64_bytes, + from_base64_unicode as decode_component ) from polyglot.builtins import is_py3, iteritems, map, unicode_type from polyglot.urllib import quote, urlparse @@ -172,16 +175,16 @@ class Container(ContainerBase): tweak_mode = True - def __init__(self, path_to_ebook, tdir, log=None, book_hash=None, save_legacy_bookmark_data=False): + def __init__(self, path_to_ebook, tdir, log=None, book_hash=None, save_bookmark_data=False): log = log or default_log book_fmt, opfpath, input_fmt = extract_book(path_to_ebook, tdir, log=log) ContainerBase.__init__(self, tdir, opfpath, log) - if save_legacy_bookmark_data: + if save_bookmark_data: bm_file = 'META-INF/calibre_bookmarks.txt' - self.legacy_bookmark_data = None + self.bookmark_data = None if self.exists(bm_file): with self.open(bm_file, 'rb') as f: - self.legacy_bookmark_data = f.read().decode('utf-8') + self.bookmark_data = f.read() # We do not add zero byte sized files as the IndexedDB API in the # browser has no good way to distinguish between zero byte files and # load failures. @@ -541,9 +544,31 @@ def serialize_datetimes(d): d[k] = v -def get_legacy_annotations(container): +EPUB_FILE_TYPE_MAGIC = b'encoding=json+base64:\n' + + +def parse_annotation(annot): + ts = annot['timestamp'] + if hasattr(ts, 'rstrip'): + annot['timestamp'] = parse_iso8601(ts, assume_utc=True) + return annot + + +def parse_annotations(raw): + for annot in json_loads(raw): + yield parse_annotation(annot) + + +def get_stored_annotations(container): from calibre.ebooks.oeb.iterator.bookmarks import parse_bookmarks - raw = container.legacy_bookmark_data or b'' + + raw = container.bookmark_data or b'' + if raw.startswith(EPUB_FILE_TYPE_MAGIC): + raw = raw[len(EPUB_FILE_TYPE_MAGIC):] + for annot in parse_annotations(from_base64_bytes(raw)): + yield annot + return + for bm in parse_bookmarks(raw): if bm['type'] == 'cfi' and isinstance(bm['pos'], unicode_type): spine_index = (1 + bm['spine']) * 2 @@ -556,7 +581,7 @@ def get_legacy_annotations(container): def render(pathtoebook, output_dir, book_hash=None, serialize_metadata=False, extract_annotations=False): - container = Container(pathtoebook, output_dir, book_hash=book_hash, save_legacy_bookmark_data=extract_annotations) + container = Container(pathtoebook, output_dir, book_hash=book_hash, save_bookmark_data=extract_annotations) if serialize_metadata: from calibre.ebooks.metadata.meta import get_metadata from calibre.utils.serialize import json_dumps @@ -573,8 +598,8 @@ def render(pathtoebook, output_dir, book_hash=None, serialize_metadata=False, ex f.write(json_dumps(d)) if extract_annotations: annotations = None - if container.legacy_bookmark_data: - annotations = json_dumps(tuple(get_legacy_annotations(container))) + if container.bookmark_data: + annotations = json_dumps(tuple(get_stored_annotations(container))) if annotations: with lopen(os.path.join(output_dir, 'calibre-book-annotations.json'), 'wb') as f: f.write(annotations)