Store annotations in EPUB files

This commit is contained in:
Kovid Goyal 2019-08-05 20:01:59 +05:30
parent bc8fdc4ced
commit 563b926e4b
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
4 changed files with 74 additions and 33 deletions

View File

@ -10,13 +10,14 @@ import os, numbers
from io import BytesIO from io import BytesIO
from calibre.utils.zipfile import safe_replace from calibre.utils.zipfile import safe_replace
from polyglot.builtins import unicode_type from polyglot.builtins import unicode_type, as_unicode
BM_FIELD_SEP = u'*|!|?|*' BM_FIELD_SEP = u'*|!|?|*'
BM_LEGACY_ESC = u'esc-text-%&*#%(){}ads19-end-esc' BM_LEGACY_ESC = u'esc-text-%&*#%(){}ads19-end-esc'
def parse_bookmarks(raw): def parse_bookmarks(raw):
raw = as_unicode(raw)
for line in raw.splitlines(): for line in raw.splitlines():
if '^' in line: if '^' in line:
tokens = line.rpartition('^') tokens = line.rpartition('^')

View File

@ -5,13 +5,22 @@
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import absolute_import, division, print_function, unicode_literals
from collections import defaultdict from collections import defaultdict
from io import BytesIO
from operator import itemgetter from operator import itemgetter
from calibre.utils.iso8601 import parse_iso8601 from calibre.srv.render_book import (
from calibre.utils.serialize import json_dumps, json_loads EPUB_FILE_TYPE_MAGIC, parse_annotation, parse_annotations as _parse_annotations
)
from calibre.utils.serialize import json_dumps
from calibre.utils.zipfile import safe_replace
from polyglot.binary import as_base64_bytes
from polyglot.builtins import iteritems, itervalues from polyglot.builtins import iteritems, itervalues
def parse_annotations(raw):
return list(_parse_annotations(raw))
def merge_annots_with_identical_titles(annots): def merge_annots_with_identical_titles(annots):
title_groups = defaultdict(list) title_groups = defaultdict(list)
for a in annots: for a in annots:
@ -28,6 +37,7 @@ def merge_annots_with_identical_titles(annots):
def merge_annotations(annots, annots_map): def merge_annotations(annots, annots_map):
for annot in annots: for annot in annots:
annot = parse_annotation(annot)
annots_map[annot.pop('type')].append(annot) annots_map[annot.pop('type')].append(annot)
lr = annots_map['last-read'] lr = annots_map['last-read']
if lr: if lr:
@ -38,14 +48,6 @@ def merge_annotations(annots, annots_map):
annots_map[annot_type] = list(merge_annots_with_identical_titles(a)) annots_map[annot_type] = list(merge_annots_with_identical_titles(a))
def parse_annotations(raw):
ans = []
for annot in json_loads(raw):
annot['timestamp'] = parse_iso8601(annot['timestamp'], assume_utc=True)
ans.append(annot)
return ans
def serialize_annotations(annots_map): def serialize_annotations(annots_map):
ans = [] ans = []
for atype, annots in iteritems(annots_map): for atype, annots in iteritems(annots_map):
@ -55,3 +57,13 @@ def serialize_annotations(annots_map):
annot['timestamp'] = annot['timestamp'].isoformat() annot['timestamp'] = annot['timestamp'].isoformat()
ans.append(annot) ans.append(annot)
return json_dumps(ans) return json_dumps(ans)
def save_annots_to_epub(path, serialized_annots):
try:
zf = open(path, 'r+b')
except IOError:
return
with zf:
serialized_annots = EPUB_FILE_TYPE_MAGIC + as_base64_bytes(serialized_annots)
safe_replace(zf, 'META-INF/calibre_bookmarks.txt', BytesIO(serialized_annots), add_missing=True)

View File

@ -9,15 +9,15 @@ from collections import defaultdict
from hashlib import sha256 from hashlib import sha256
from threading import Thread from threading import Thread
from PyQt5.Qt import QDockWidget, Qt, QTimer, pyqtSignal from PyQt5.Qt import QDockWidget, Qt, pyqtSignal
from calibre.constants import config_dir from calibre.constants import config_dir
from calibre.gui2 import error_dialog from calibre.gui2 import error_dialog
from calibre.gui2.main_window import MainWindow from calibre.gui2.main_window import MainWindow
from calibre.gui2.viewer.annotations import ( from calibre.gui2.viewer.annotations import (
merge_annotations, parse_annotations, serialize_annotations merge_annotations, parse_annotations, save_annots_to_epub, serialize_annotations
) )
from calibre.gui2.viewer.convert_book import prepare_book from calibre.gui2.viewer.convert_book import prepare_book, update_book
from calibre.gui2.viewer.web_view import WebView, set_book_path from calibre.gui2.viewer.web_view import WebView, set_book_path
from calibre.utils.date import utcnow from calibre.utils.date import utcnow
from calibre.utils.ipc.simple_worker import WorkerError from calibre.utils.ipc.simple_worker import WorkerError
@ -43,8 +43,6 @@ class EbookViewer(MainWindow):
except EnvironmentError: except EnvironmentError:
pass pass
self.current_book_data = {} self.current_book_data = {}
self.save_annotations_debounce_timer = t = QTimer(self)
t.setInterval(3000), t.timeout.connect(self.save_annotations)
self.book_prepared.connect(self.load_finished, type=Qt.QueuedConnection) self.book_prepared.connect(self.load_finished, type=Qt.QueuedConnection)
def create_dock(title, name, area, areas=Qt.LeftDockWidgetArea | Qt.RightDockWidgetArea): def create_dock(title, name, area, areas=Qt.LeftDockWidgetArea | Qt.RightDockWidgetArea):
@ -75,7 +73,6 @@ class EbookViewer(MainWindow):
def load_ebook(self, pathtoebook, open_at=None): def load_ebook(self, pathtoebook, open_at=None):
# TODO: Implement open_at # TODO: Implement open_at
self.web_view.show_preparing_message() self.web_view.show_preparing_message()
if self.save_annotations_debounce_timer.isActive():
self.save_annotations() self.save_annotations()
self.current_book_data = {} self.current_book_data = {}
t = Thread(name='LoadBook', target=self._load_ebook_worker, args=(pathtoebook, open_at)) t = Thread(name='LoadBook', target=self._load_ebook_worker, args=(pathtoebook, open_at))
@ -131,15 +128,21 @@ class EbookViewer(MainWindow):
return return
self.current_book_data['annotations_map']['last-read'] = [{ self.current_book_data['annotations_map']['last-read'] = [{
'pos': cfi, 'pos_type': 'epubcfi', 'timestamp': utcnow()}] 'pos': cfi, 'pos_type': 'epubcfi', 'timestamp': utcnow()}]
self.save_annotations_debounce_timer.start()
def save_annotations(self): def save_annotations(self):
self.save_annotations_debounce_timer.stop() if not self.current_book_data:
return
amap = self.current_book_data['annotations_map'] amap = self.current_book_data['annotations_map']
annots = as_bytes(serialize_annotations(amap))
with open(os.path.join(annotations_dir, self.current_book_data['annotations_path_key']), 'wb') as f: with open(os.path.join(annotations_dir, self.current_book_data['annotations_path_key']), 'wb') as f:
f.write(as_bytes(serialize_annotations(amap))) f.write(annots)
if self.current_book_data.get('pathtoebook', '').lower().endswith('.epub'):
path = self.current_book_data['pathtoebook']
if os.access(path, os.W_OK):
before_stat = os.stat(path)
save_annots_to_epub(path, annots)
update_book(path, before_stat, {'calibre-book-annotations.json': annots})
def closeEvent(self, ev): def closeEvent(self, ev):
if self.save_annotations_debounce_timer.isActive():
self.save_annotations() self.save_annotations()
return MainWindow.closeEvent(self, ev) return MainWindow.closeEvent(self, ev)

View File

@ -31,10 +31,13 @@ from calibre.ebooks.oeb.polish.toc import get_landmarks, get_toc
from calibre.ebooks.oeb.polish.utils import extract, guess_type from calibre.ebooks.oeb.polish.utils import extract, guess_type
from calibre.srv.metadata import encode_datetime from calibre.srv.metadata import encode_datetime
from calibre.utils.date import EPOCH from calibre.utils.date import EPOCH
from calibre.utils.iso8601 import parse_iso8601
from calibre.utils.logging import default_log from calibre.utils.logging import default_log
from calibre.utils.serialize import json_loads
from calibre.utils.short_uuid import uuid4 from calibre.utils.short_uuid import uuid4
from polyglot.binary import ( from polyglot.binary import (
as_base64_unicode as encode_component, from_base64_unicode as decode_component as_base64_unicode as encode_component, from_base64_bytes,
from_base64_unicode as decode_component
) )
from polyglot.builtins import is_py3, iteritems, map, unicode_type from polyglot.builtins import is_py3, iteritems, map, unicode_type
from polyglot.urllib import quote, urlparse from polyglot.urllib import quote, urlparse
@ -172,16 +175,16 @@ class Container(ContainerBase):
tweak_mode = True tweak_mode = True
def __init__(self, path_to_ebook, tdir, log=None, book_hash=None, save_legacy_bookmark_data=False): def __init__(self, path_to_ebook, tdir, log=None, book_hash=None, save_bookmark_data=False):
log = log or default_log log = log or default_log
book_fmt, opfpath, input_fmt = extract_book(path_to_ebook, tdir, log=log) book_fmt, opfpath, input_fmt = extract_book(path_to_ebook, tdir, log=log)
ContainerBase.__init__(self, tdir, opfpath, log) ContainerBase.__init__(self, tdir, opfpath, log)
if save_legacy_bookmark_data: if save_bookmark_data:
bm_file = 'META-INF/calibre_bookmarks.txt' bm_file = 'META-INF/calibre_bookmarks.txt'
self.legacy_bookmark_data = None self.bookmark_data = None
if self.exists(bm_file): if self.exists(bm_file):
with self.open(bm_file, 'rb') as f: with self.open(bm_file, 'rb') as f:
self.legacy_bookmark_data = f.read().decode('utf-8') self.bookmark_data = f.read()
# We do not add zero byte sized files as the IndexedDB API in the # We do not add zero byte sized files as the IndexedDB API in the
# browser has no good way to distinguish between zero byte files and # browser has no good way to distinguish between zero byte files and
# load failures. # load failures.
@ -541,9 +544,31 @@ def serialize_datetimes(d):
d[k] = v d[k] = v
def get_legacy_annotations(container): EPUB_FILE_TYPE_MAGIC = b'encoding=json+base64:\n'
def parse_annotation(annot):
ts = annot['timestamp']
if hasattr(ts, 'rstrip'):
annot['timestamp'] = parse_iso8601(ts, assume_utc=True)
return annot
def parse_annotations(raw):
for annot in json_loads(raw):
yield parse_annotation(annot)
def get_stored_annotations(container):
from calibre.ebooks.oeb.iterator.bookmarks import parse_bookmarks from calibre.ebooks.oeb.iterator.bookmarks import parse_bookmarks
raw = container.legacy_bookmark_data or b''
raw = container.bookmark_data or b''
if raw.startswith(EPUB_FILE_TYPE_MAGIC):
raw = raw[len(EPUB_FILE_TYPE_MAGIC):]
for annot in parse_annotations(from_base64_bytes(raw)):
yield annot
return
for bm in parse_bookmarks(raw): for bm in parse_bookmarks(raw):
if bm['type'] == 'cfi' and isinstance(bm['pos'], unicode_type): if bm['type'] == 'cfi' and isinstance(bm['pos'], unicode_type):
spine_index = (1 + bm['spine']) * 2 spine_index = (1 + bm['spine']) * 2
@ -556,7 +581,7 @@ def get_legacy_annotations(container):
def render(pathtoebook, output_dir, book_hash=None, serialize_metadata=False, extract_annotations=False): def render(pathtoebook, output_dir, book_hash=None, serialize_metadata=False, extract_annotations=False):
container = Container(pathtoebook, output_dir, book_hash=book_hash, save_legacy_bookmark_data=extract_annotations) container = Container(pathtoebook, output_dir, book_hash=book_hash, save_bookmark_data=extract_annotations)
if serialize_metadata: if serialize_metadata:
from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.meta import get_metadata
from calibre.utils.serialize import json_dumps from calibre.utils.serialize import json_dumps
@ -573,8 +598,8 @@ def render(pathtoebook, output_dir, book_hash=None, serialize_metadata=False, ex
f.write(json_dumps(d)) f.write(json_dumps(d))
if extract_annotations: if extract_annotations:
annotations = None annotations = None
if container.legacy_bookmark_data: if container.bookmark_data:
annotations = json_dumps(tuple(get_legacy_annotations(container))) annotations = json_dumps(tuple(get_stored_annotations(container)))
if annotations: if annotations:
with lopen(os.path.join(output_dir, 'calibre-book-annotations.json'), 'wb') as f: with lopen(os.path.join(output_dir, 'calibre-book-annotations.json'), 'wb') as f:
f.write(annotations) f.write(annotations)