Store annotations in EPUB files

This commit is contained in:
Kovid Goyal 2019-08-05 20:01:59 +05:30
parent bc8fdc4ced
commit 563b926e4b
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
4 changed files with 74 additions and 33 deletions

View File

@ -10,13 +10,14 @@ import os, numbers
from io import BytesIO
from calibre.utils.zipfile import safe_replace
from polyglot.builtins import unicode_type
from polyglot.builtins import unicode_type, as_unicode
BM_FIELD_SEP = u'*|!|?|*'
BM_LEGACY_ESC = u'esc-text-%&*#%(){}ads19-end-esc'
def parse_bookmarks(raw):
raw = as_unicode(raw)
for line in raw.splitlines():
if '^' in line:
tokens = line.rpartition('^')

View File

@ -5,13 +5,22 @@
from __future__ import absolute_import, division, print_function, unicode_literals
from collections import defaultdict
from io import BytesIO
from operator import itemgetter
from calibre.utils.iso8601 import parse_iso8601
from calibre.utils.serialize import json_dumps, json_loads
from calibre.srv.render_book import (
EPUB_FILE_TYPE_MAGIC, parse_annotation, parse_annotations as _parse_annotations
)
from calibre.utils.serialize import json_dumps
from calibre.utils.zipfile import safe_replace
from polyglot.binary import as_base64_bytes
from polyglot.builtins import iteritems, itervalues
def parse_annotations(raw):
return list(_parse_annotations(raw))
def merge_annots_with_identical_titles(annots):
title_groups = defaultdict(list)
for a in annots:
@ -28,6 +37,7 @@ def merge_annots_with_identical_titles(annots):
def merge_annotations(annots, annots_map):
for annot in annots:
annot = parse_annotation(annot)
annots_map[annot.pop('type')].append(annot)
lr = annots_map['last-read']
if lr:
@ -38,14 +48,6 @@ def merge_annotations(annots, annots_map):
annots_map[annot_type] = list(merge_annots_with_identical_titles(a))
def parse_annotations(raw):
ans = []
for annot in json_loads(raw):
annot['timestamp'] = parse_iso8601(annot['timestamp'], assume_utc=True)
ans.append(annot)
return ans
def serialize_annotations(annots_map):
ans = []
for atype, annots in iteritems(annots_map):
@ -55,3 +57,13 @@ def serialize_annotations(annots_map):
annot['timestamp'] = annot['timestamp'].isoformat()
ans.append(annot)
return json_dumps(ans)
def save_annots_to_epub(path, serialized_annots):
try:
zf = open(path, 'r+b')
except IOError:
return
with zf:
serialized_annots = EPUB_FILE_TYPE_MAGIC + as_base64_bytes(serialized_annots)
safe_replace(zf, 'META-INF/calibre_bookmarks.txt', BytesIO(serialized_annots), add_missing=True)

View File

@ -9,15 +9,15 @@ from collections import defaultdict
from hashlib import sha256
from threading import Thread
from PyQt5.Qt import QDockWidget, Qt, QTimer, pyqtSignal
from PyQt5.Qt import QDockWidget, Qt, pyqtSignal
from calibre.constants import config_dir
from calibre.gui2 import error_dialog
from calibre.gui2.main_window import MainWindow
from calibre.gui2.viewer.annotations import (
merge_annotations, parse_annotations, serialize_annotations
merge_annotations, parse_annotations, save_annots_to_epub, serialize_annotations
)
from calibre.gui2.viewer.convert_book import prepare_book
from calibre.gui2.viewer.convert_book import prepare_book, update_book
from calibre.gui2.viewer.web_view import WebView, set_book_path
from calibre.utils.date import utcnow
from calibre.utils.ipc.simple_worker import WorkerError
@ -43,8 +43,6 @@ class EbookViewer(MainWindow):
except EnvironmentError:
pass
self.current_book_data = {}
self.save_annotations_debounce_timer = t = QTimer(self)
t.setInterval(3000), t.timeout.connect(self.save_annotations)
self.book_prepared.connect(self.load_finished, type=Qt.QueuedConnection)
def create_dock(title, name, area, areas=Qt.LeftDockWidgetArea | Qt.RightDockWidgetArea):
@ -75,7 +73,6 @@ class EbookViewer(MainWindow):
def load_ebook(self, pathtoebook, open_at=None):
# TODO: Implement open_at
self.web_view.show_preparing_message()
if self.save_annotations_debounce_timer.isActive():
self.save_annotations()
self.current_book_data = {}
t = Thread(name='LoadBook', target=self._load_ebook_worker, args=(pathtoebook, open_at))
@ -131,15 +128,21 @@ class EbookViewer(MainWindow):
return
self.current_book_data['annotations_map']['last-read'] = [{
'pos': cfi, 'pos_type': 'epubcfi', 'timestamp': utcnow()}]
self.save_annotations_debounce_timer.start()
def save_annotations(self):
self.save_annotations_debounce_timer.stop()
if not self.current_book_data:
return
amap = self.current_book_data['annotations_map']
annots = as_bytes(serialize_annotations(amap))
with open(os.path.join(annotations_dir, self.current_book_data['annotations_path_key']), 'wb') as f:
f.write(as_bytes(serialize_annotations(amap)))
f.write(annots)
if self.current_book_data.get('pathtoebook', '').lower().endswith('.epub'):
path = self.current_book_data['pathtoebook']
if os.access(path, os.W_OK):
before_stat = os.stat(path)
save_annots_to_epub(path, annots)
update_book(path, before_stat, {'calibre-book-annotations.json': annots})
def closeEvent(self, ev):
if self.save_annotations_debounce_timer.isActive():
self.save_annotations()
return MainWindow.closeEvent(self, ev)

View File

@ -31,10 +31,13 @@ from calibre.ebooks.oeb.polish.toc import get_landmarks, get_toc
from calibre.ebooks.oeb.polish.utils import extract, guess_type
from calibre.srv.metadata import encode_datetime
from calibre.utils.date import EPOCH
from calibre.utils.iso8601 import parse_iso8601
from calibre.utils.logging import default_log
from calibre.utils.serialize import json_loads
from calibre.utils.short_uuid import uuid4
from polyglot.binary import (
as_base64_unicode as encode_component, from_base64_unicode as decode_component
as_base64_unicode as encode_component, from_base64_bytes,
from_base64_unicode as decode_component
)
from polyglot.builtins import is_py3, iteritems, map, unicode_type
from polyglot.urllib import quote, urlparse
@ -172,16 +175,16 @@ class Container(ContainerBase):
tweak_mode = True
def __init__(self, path_to_ebook, tdir, log=None, book_hash=None, save_legacy_bookmark_data=False):
def __init__(self, path_to_ebook, tdir, log=None, book_hash=None, save_bookmark_data=False):
log = log or default_log
book_fmt, opfpath, input_fmt = extract_book(path_to_ebook, tdir, log=log)
ContainerBase.__init__(self, tdir, opfpath, log)
if save_legacy_bookmark_data:
if save_bookmark_data:
bm_file = 'META-INF/calibre_bookmarks.txt'
self.legacy_bookmark_data = None
self.bookmark_data = None
if self.exists(bm_file):
with self.open(bm_file, 'rb') as f:
self.legacy_bookmark_data = f.read().decode('utf-8')
self.bookmark_data = f.read()
# We do not add zero byte sized files as the IndexedDB API in the
# browser has no good way to distinguish between zero byte files and
# load failures.
@ -541,9 +544,31 @@ def serialize_datetimes(d):
d[k] = v
def get_legacy_annotations(container):
EPUB_FILE_TYPE_MAGIC = b'encoding=json+base64:\n'
def parse_annotation(annot):
ts = annot['timestamp']
if hasattr(ts, 'rstrip'):
annot['timestamp'] = parse_iso8601(ts, assume_utc=True)
return annot
def parse_annotations(raw):
for annot in json_loads(raw):
yield parse_annotation(annot)
def get_stored_annotations(container):
from calibre.ebooks.oeb.iterator.bookmarks import parse_bookmarks
raw = container.legacy_bookmark_data or b''
raw = container.bookmark_data or b''
if raw.startswith(EPUB_FILE_TYPE_MAGIC):
raw = raw[len(EPUB_FILE_TYPE_MAGIC):]
for annot in parse_annotations(from_base64_bytes(raw)):
yield annot
return
for bm in parse_bookmarks(raw):
if bm['type'] == 'cfi' and isinstance(bm['pos'], unicode_type):
spine_index = (1 + bm['spine']) * 2
@ -556,7 +581,7 @@ def get_legacy_annotations(container):
def render(pathtoebook, output_dir, book_hash=None, serialize_metadata=False, extract_annotations=False):
container = Container(pathtoebook, output_dir, book_hash=book_hash, save_legacy_bookmark_data=extract_annotations)
container = Container(pathtoebook, output_dir, book_hash=book_hash, save_bookmark_data=extract_annotations)
if serialize_metadata:
from calibre.ebooks.metadata.meta import get_metadata
from calibre.utils.serialize import json_dumps
@ -573,8 +598,8 @@ def render(pathtoebook, output_dir, book_hash=None, serialize_metadata=False, ex
f.write(json_dumps(d))
if extract_annotations:
annotations = None
if container.legacy_bookmark_data:
annotations = json_dumps(tuple(get_legacy_annotations(container)))
if container.bookmark_data:
annotations = json_dumps(tuple(get_stored_annotations(container)))
if annotations:
with lopen(os.path.join(output_dir, 'calibre-book-annotations.json'), 'wb') as f:
f.write(annotations)