mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Store annotations in EPUB files
This commit is contained in:
parent
bc8fdc4ced
commit
563b926e4b
@ -10,13 +10,14 @@ import os, numbers
|
|||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
|
||||||
from calibre.utils.zipfile import safe_replace
|
from calibre.utils.zipfile import safe_replace
|
||||||
from polyglot.builtins import unicode_type
|
from polyglot.builtins import unicode_type, as_unicode
|
||||||
|
|
||||||
BM_FIELD_SEP = u'*|!|?|*'
|
BM_FIELD_SEP = u'*|!|?|*'
|
||||||
BM_LEGACY_ESC = u'esc-text-%&*#%(){}ads19-end-esc'
|
BM_LEGACY_ESC = u'esc-text-%&*#%(){}ads19-end-esc'
|
||||||
|
|
||||||
|
|
||||||
def parse_bookmarks(raw):
|
def parse_bookmarks(raw):
|
||||||
|
raw = as_unicode(raw)
|
||||||
for line in raw.splitlines():
|
for line in raw.splitlines():
|
||||||
if '^' in line:
|
if '^' in line:
|
||||||
tokens = line.rpartition('^')
|
tokens = line.rpartition('^')
|
||||||
|
@ -5,13 +5,22 @@
|
|||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
from io import BytesIO
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
|
|
||||||
from calibre.utils.iso8601 import parse_iso8601
|
from calibre.srv.render_book import (
|
||||||
from calibre.utils.serialize import json_dumps, json_loads
|
EPUB_FILE_TYPE_MAGIC, parse_annotation, parse_annotations as _parse_annotations
|
||||||
|
)
|
||||||
|
from calibre.utils.serialize import json_dumps
|
||||||
|
from calibre.utils.zipfile import safe_replace
|
||||||
|
from polyglot.binary import as_base64_bytes
|
||||||
from polyglot.builtins import iteritems, itervalues
|
from polyglot.builtins import iteritems, itervalues
|
||||||
|
|
||||||
|
|
||||||
|
def parse_annotations(raw):
|
||||||
|
return list(_parse_annotations(raw))
|
||||||
|
|
||||||
|
|
||||||
def merge_annots_with_identical_titles(annots):
|
def merge_annots_with_identical_titles(annots):
|
||||||
title_groups = defaultdict(list)
|
title_groups = defaultdict(list)
|
||||||
for a in annots:
|
for a in annots:
|
||||||
@ -28,6 +37,7 @@ def merge_annots_with_identical_titles(annots):
|
|||||||
|
|
||||||
def merge_annotations(annots, annots_map):
|
def merge_annotations(annots, annots_map):
|
||||||
for annot in annots:
|
for annot in annots:
|
||||||
|
annot = parse_annotation(annot)
|
||||||
annots_map[annot.pop('type')].append(annot)
|
annots_map[annot.pop('type')].append(annot)
|
||||||
lr = annots_map['last-read']
|
lr = annots_map['last-read']
|
||||||
if lr:
|
if lr:
|
||||||
@ -38,14 +48,6 @@ def merge_annotations(annots, annots_map):
|
|||||||
annots_map[annot_type] = list(merge_annots_with_identical_titles(a))
|
annots_map[annot_type] = list(merge_annots_with_identical_titles(a))
|
||||||
|
|
||||||
|
|
||||||
def parse_annotations(raw):
|
|
||||||
ans = []
|
|
||||||
for annot in json_loads(raw):
|
|
||||||
annot['timestamp'] = parse_iso8601(annot['timestamp'], assume_utc=True)
|
|
||||||
ans.append(annot)
|
|
||||||
return ans
|
|
||||||
|
|
||||||
|
|
||||||
def serialize_annotations(annots_map):
|
def serialize_annotations(annots_map):
|
||||||
ans = []
|
ans = []
|
||||||
for atype, annots in iteritems(annots_map):
|
for atype, annots in iteritems(annots_map):
|
||||||
@ -55,3 +57,13 @@ def serialize_annotations(annots_map):
|
|||||||
annot['timestamp'] = annot['timestamp'].isoformat()
|
annot['timestamp'] = annot['timestamp'].isoformat()
|
||||||
ans.append(annot)
|
ans.append(annot)
|
||||||
return json_dumps(ans)
|
return json_dumps(ans)
|
||||||
|
|
||||||
|
|
||||||
|
def save_annots_to_epub(path, serialized_annots):
|
||||||
|
try:
|
||||||
|
zf = open(path, 'r+b')
|
||||||
|
except IOError:
|
||||||
|
return
|
||||||
|
with zf:
|
||||||
|
serialized_annots = EPUB_FILE_TYPE_MAGIC + as_base64_bytes(serialized_annots)
|
||||||
|
safe_replace(zf, 'META-INF/calibre_bookmarks.txt', BytesIO(serialized_annots), add_missing=True)
|
||||||
|
@ -9,15 +9,15 @@ from collections import defaultdict
|
|||||||
from hashlib import sha256
|
from hashlib import sha256
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
|
|
||||||
from PyQt5.Qt import QDockWidget, Qt, QTimer, pyqtSignal
|
from PyQt5.Qt import QDockWidget, Qt, pyqtSignal
|
||||||
|
|
||||||
from calibre.constants import config_dir
|
from calibre.constants import config_dir
|
||||||
from calibre.gui2 import error_dialog
|
from calibre.gui2 import error_dialog
|
||||||
from calibre.gui2.main_window import MainWindow
|
from calibre.gui2.main_window import MainWindow
|
||||||
from calibre.gui2.viewer.annotations import (
|
from calibre.gui2.viewer.annotations import (
|
||||||
merge_annotations, parse_annotations, serialize_annotations
|
merge_annotations, parse_annotations, save_annots_to_epub, serialize_annotations
|
||||||
)
|
)
|
||||||
from calibre.gui2.viewer.convert_book import prepare_book
|
from calibre.gui2.viewer.convert_book import prepare_book, update_book
|
||||||
from calibre.gui2.viewer.web_view import WebView, set_book_path
|
from calibre.gui2.viewer.web_view import WebView, set_book_path
|
||||||
from calibre.utils.date import utcnow
|
from calibre.utils.date import utcnow
|
||||||
from calibre.utils.ipc.simple_worker import WorkerError
|
from calibre.utils.ipc.simple_worker import WorkerError
|
||||||
@ -43,8 +43,6 @@ class EbookViewer(MainWindow):
|
|||||||
except EnvironmentError:
|
except EnvironmentError:
|
||||||
pass
|
pass
|
||||||
self.current_book_data = {}
|
self.current_book_data = {}
|
||||||
self.save_annotations_debounce_timer = t = QTimer(self)
|
|
||||||
t.setInterval(3000), t.timeout.connect(self.save_annotations)
|
|
||||||
self.book_prepared.connect(self.load_finished, type=Qt.QueuedConnection)
|
self.book_prepared.connect(self.load_finished, type=Qt.QueuedConnection)
|
||||||
|
|
||||||
def create_dock(title, name, area, areas=Qt.LeftDockWidgetArea | Qt.RightDockWidgetArea):
|
def create_dock(title, name, area, areas=Qt.LeftDockWidgetArea | Qt.RightDockWidgetArea):
|
||||||
@ -75,8 +73,7 @@ class EbookViewer(MainWindow):
|
|||||||
def load_ebook(self, pathtoebook, open_at=None):
|
def load_ebook(self, pathtoebook, open_at=None):
|
||||||
# TODO: Implement open_at
|
# TODO: Implement open_at
|
||||||
self.web_view.show_preparing_message()
|
self.web_view.show_preparing_message()
|
||||||
if self.save_annotations_debounce_timer.isActive():
|
self.save_annotations()
|
||||||
self.save_annotations()
|
|
||||||
self.current_book_data = {}
|
self.current_book_data = {}
|
||||||
t = Thread(name='LoadBook', target=self._load_ebook_worker, args=(pathtoebook, open_at))
|
t = Thread(name='LoadBook', target=self._load_ebook_worker, args=(pathtoebook, open_at))
|
||||||
t.daemon = True
|
t.daemon = True
|
||||||
@ -131,15 +128,21 @@ class EbookViewer(MainWindow):
|
|||||||
return
|
return
|
||||||
self.current_book_data['annotations_map']['last-read'] = [{
|
self.current_book_data['annotations_map']['last-read'] = [{
|
||||||
'pos': cfi, 'pos_type': 'epubcfi', 'timestamp': utcnow()}]
|
'pos': cfi, 'pos_type': 'epubcfi', 'timestamp': utcnow()}]
|
||||||
self.save_annotations_debounce_timer.start()
|
|
||||||
|
|
||||||
def save_annotations(self):
|
def save_annotations(self):
|
||||||
self.save_annotations_debounce_timer.stop()
|
if not self.current_book_data:
|
||||||
|
return
|
||||||
amap = self.current_book_data['annotations_map']
|
amap = self.current_book_data['annotations_map']
|
||||||
|
annots = as_bytes(serialize_annotations(amap))
|
||||||
with open(os.path.join(annotations_dir, self.current_book_data['annotations_path_key']), 'wb') as f:
|
with open(os.path.join(annotations_dir, self.current_book_data['annotations_path_key']), 'wb') as f:
|
||||||
f.write(as_bytes(serialize_annotations(amap)))
|
f.write(annots)
|
||||||
|
if self.current_book_data.get('pathtoebook', '').lower().endswith('.epub'):
|
||||||
|
path = self.current_book_data['pathtoebook']
|
||||||
|
if os.access(path, os.W_OK):
|
||||||
|
before_stat = os.stat(path)
|
||||||
|
save_annots_to_epub(path, annots)
|
||||||
|
update_book(path, before_stat, {'calibre-book-annotations.json': annots})
|
||||||
|
|
||||||
def closeEvent(self, ev):
|
def closeEvent(self, ev):
|
||||||
if self.save_annotations_debounce_timer.isActive():
|
self.save_annotations()
|
||||||
self.save_annotations()
|
|
||||||
return MainWindow.closeEvent(self, ev)
|
return MainWindow.closeEvent(self, ev)
|
||||||
|
@ -31,10 +31,13 @@ from calibre.ebooks.oeb.polish.toc import get_landmarks, get_toc
|
|||||||
from calibre.ebooks.oeb.polish.utils import extract, guess_type
|
from calibre.ebooks.oeb.polish.utils import extract, guess_type
|
||||||
from calibre.srv.metadata import encode_datetime
|
from calibre.srv.metadata import encode_datetime
|
||||||
from calibre.utils.date import EPOCH
|
from calibre.utils.date import EPOCH
|
||||||
|
from calibre.utils.iso8601 import parse_iso8601
|
||||||
from calibre.utils.logging import default_log
|
from calibre.utils.logging import default_log
|
||||||
|
from calibre.utils.serialize import json_loads
|
||||||
from calibre.utils.short_uuid import uuid4
|
from calibre.utils.short_uuid import uuid4
|
||||||
from polyglot.binary import (
|
from polyglot.binary import (
|
||||||
as_base64_unicode as encode_component, from_base64_unicode as decode_component
|
as_base64_unicode as encode_component, from_base64_bytes,
|
||||||
|
from_base64_unicode as decode_component
|
||||||
)
|
)
|
||||||
from polyglot.builtins import is_py3, iteritems, map, unicode_type
|
from polyglot.builtins import is_py3, iteritems, map, unicode_type
|
||||||
from polyglot.urllib import quote, urlparse
|
from polyglot.urllib import quote, urlparse
|
||||||
@ -172,16 +175,16 @@ class Container(ContainerBase):
|
|||||||
|
|
||||||
tweak_mode = True
|
tweak_mode = True
|
||||||
|
|
||||||
def __init__(self, path_to_ebook, tdir, log=None, book_hash=None, save_legacy_bookmark_data=False):
|
def __init__(self, path_to_ebook, tdir, log=None, book_hash=None, save_bookmark_data=False):
|
||||||
log = log or default_log
|
log = log or default_log
|
||||||
book_fmt, opfpath, input_fmt = extract_book(path_to_ebook, tdir, log=log)
|
book_fmt, opfpath, input_fmt = extract_book(path_to_ebook, tdir, log=log)
|
||||||
ContainerBase.__init__(self, tdir, opfpath, log)
|
ContainerBase.__init__(self, tdir, opfpath, log)
|
||||||
if save_legacy_bookmark_data:
|
if save_bookmark_data:
|
||||||
bm_file = 'META-INF/calibre_bookmarks.txt'
|
bm_file = 'META-INF/calibre_bookmarks.txt'
|
||||||
self.legacy_bookmark_data = None
|
self.bookmark_data = None
|
||||||
if self.exists(bm_file):
|
if self.exists(bm_file):
|
||||||
with self.open(bm_file, 'rb') as f:
|
with self.open(bm_file, 'rb') as f:
|
||||||
self.legacy_bookmark_data = f.read().decode('utf-8')
|
self.bookmark_data = f.read()
|
||||||
# We do not add zero byte sized files as the IndexedDB API in the
|
# We do not add zero byte sized files as the IndexedDB API in the
|
||||||
# browser has no good way to distinguish between zero byte files and
|
# browser has no good way to distinguish between zero byte files and
|
||||||
# load failures.
|
# load failures.
|
||||||
@ -541,9 +544,31 @@ def serialize_datetimes(d):
|
|||||||
d[k] = v
|
d[k] = v
|
||||||
|
|
||||||
|
|
||||||
def get_legacy_annotations(container):
|
EPUB_FILE_TYPE_MAGIC = b'encoding=json+base64:\n'
|
||||||
|
|
||||||
|
|
||||||
|
def parse_annotation(annot):
|
||||||
|
ts = annot['timestamp']
|
||||||
|
if hasattr(ts, 'rstrip'):
|
||||||
|
annot['timestamp'] = parse_iso8601(ts, assume_utc=True)
|
||||||
|
return annot
|
||||||
|
|
||||||
|
|
||||||
|
def parse_annotations(raw):
|
||||||
|
for annot in json_loads(raw):
|
||||||
|
yield parse_annotation(annot)
|
||||||
|
|
||||||
|
|
||||||
|
def get_stored_annotations(container):
|
||||||
from calibre.ebooks.oeb.iterator.bookmarks import parse_bookmarks
|
from calibre.ebooks.oeb.iterator.bookmarks import parse_bookmarks
|
||||||
raw = container.legacy_bookmark_data or b''
|
|
||||||
|
raw = container.bookmark_data or b''
|
||||||
|
if raw.startswith(EPUB_FILE_TYPE_MAGIC):
|
||||||
|
raw = raw[len(EPUB_FILE_TYPE_MAGIC):]
|
||||||
|
for annot in parse_annotations(from_base64_bytes(raw)):
|
||||||
|
yield annot
|
||||||
|
return
|
||||||
|
|
||||||
for bm in parse_bookmarks(raw):
|
for bm in parse_bookmarks(raw):
|
||||||
if bm['type'] == 'cfi' and isinstance(bm['pos'], unicode_type):
|
if bm['type'] == 'cfi' and isinstance(bm['pos'], unicode_type):
|
||||||
spine_index = (1 + bm['spine']) * 2
|
spine_index = (1 + bm['spine']) * 2
|
||||||
@ -556,7 +581,7 @@ def get_legacy_annotations(container):
|
|||||||
|
|
||||||
|
|
||||||
def render(pathtoebook, output_dir, book_hash=None, serialize_metadata=False, extract_annotations=False):
|
def render(pathtoebook, output_dir, book_hash=None, serialize_metadata=False, extract_annotations=False):
|
||||||
container = Container(pathtoebook, output_dir, book_hash=book_hash, save_legacy_bookmark_data=extract_annotations)
|
container = Container(pathtoebook, output_dir, book_hash=book_hash, save_bookmark_data=extract_annotations)
|
||||||
if serialize_metadata:
|
if serialize_metadata:
|
||||||
from calibre.ebooks.metadata.meta import get_metadata
|
from calibre.ebooks.metadata.meta import get_metadata
|
||||||
from calibre.utils.serialize import json_dumps
|
from calibre.utils.serialize import json_dumps
|
||||||
@ -573,8 +598,8 @@ def render(pathtoebook, output_dir, book_hash=None, serialize_metadata=False, ex
|
|||||||
f.write(json_dumps(d))
|
f.write(json_dumps(d))
|
||||||
if extract_annotations:
|
if extract_annotations:
|
||||||
annotations = None
|
annotations = None
|
||||||
if container.legacy_bookmark_data:
|
if container.bookmark_data:
|
||||||
annotations = json_dumps(tuple(get_legacy_annotations(container)))
|
annotations = json_dumps(tuple(get_stored_annotations(container)))
|
||||||
if annotations:
|
if annotations:
|
||||||
with lopen(os.path.join(output_dir, 'calibre-book-annotations.json'), 'wb') as f:
|
with lopen(os.path.join(output_dir, 'calibre-book-annotations.json'), 'wb') as f:
|
||||||
f.write(annotations)
|
f.write(annotations)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user