Endpoints to load rendered book data

This commit is contained in:
Kovid Goyal 2016-03-16 21:04:56 +05:30
parent a93ca1eaa0
commit da8031d360
6 changed files with 157 additions and 10 deletions

View File

@ -131,6 +131,10 @@ class Cache(object):
def new_api(self): def new_api(self):
return self return self
@property
def library_id(self):
return self.backend.library_id
@property @property
def safe_read_lock(self): def safe_read_lock(self):
''' A safe read lock is a lock that does nothing if the thread already ''' A safe read lock is a lock that does nothing if the thread already

128
src/calibre/srv/books.py Normal file
View File

@ -0,0 +1,128 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import (unicode_literals, division, absolute_import,
print_function)
from hashlib import sha1
from functools import partial
from threading import RLock
from cPickle import dumps
import errno, os, tempfile, shutil
from calibre.constants import cache_dir, iswindows
from calibre.customize.ui import plugin_for_input_format
from calibre.srv.render_book import RENDER_VERSION
from calibre.srv.errors import HTTPNotFound
from calibre.srv.routes import endpoint, json
from calibre.srv.utils import get_library_data
cache_lock = RLock()
queued_jobs = {}
failed_jobs = {}
_books_cache_dir = None
def books_cache_dir():
global _books_cache_dir
if _books_cache_dir:
return _books_cache_dir
base = os.path.abspath(os.path.join(cache_dir(), 'srvb'))
for d in 'sf':
try:
os.makedirs(os.path.join(base, d))
except EnvironmentError as e:
if e.errno != errno.EEXIST:
raise
_books_cache_dir = base
return base
def book_hash(library_uuid, book_id, fmt, fmt_metadata):
raw = dumps((library_uuid, book_id, fmt.upper(), fmt_metadata['size']), RENDER_VERSION)
return sha1(raw).hexdigest().decode('ascii')
staging_cleaned = False
def safe_remove(x, is_file=None):
if is_file is None:
is_file = os.path.isfile(x)
try:
os.remove(x) if is_file else shutil.rmtree(x, ignore_errors=True)
except EnvironmentError:
pass
def queue_job(ctx, copy_format_to, bhash, fmt, book_id):
global staging_cleaned
tdir = os.path.join(books_cache_dir(), 's')
if not staging_cleaned:
staging_cleaned = True
for x in os.listdir(tdir):
safe_remove(os.path.join(tdir, x))
fd, pathtoebook = tempfile.mkstemp(prefix='', suffix=('.' + fmt.lower()), dir=tdir)
with os.fdopen(fd, 'wb') as f:
copy_format_to(f)
tdir = tempfile.mkdtemp('', '', tdir)
job_id = ctx.start_job('Render book %s (%s)' % (book_id, fmt), 'calibre.srv.render_book', 'render', args=(pathtoebook, tdir, bhash),
job_done_callback=job_done, job_data=(bhash, pathtoebook, tdir))
queued_jobs[bhash] = job_id
return job_id
def job_done(job):
with cache_lock:
book_hash, pathtoebook, tdir = job.data
queued_jobs.pop(book_hash, None)
safe_remove(pathtoebook)
if job.failed:
failed_jobs[book_hash] = (job.was_aborted, job.traceback)
safe_remove(tdir, False)
else:
try:
dest = os.path.join(books_cache_dir(), 'f', book_hash)
safe_remove(dest, False)
os.rename(tdir, dest)
except Exception:
import traceback
failed_jobs[book_hash] = (False, traceback.format_exc())
@endpoint('/book-manifest/{book_id}/{fmt}', postprocess=json, types={'book_id':int})
def book_manifest(ctx, rd, book_id, fmt):
db, library_id = get_library_data(ctx, rd)[:2]
if plugin_for_input_format(fmt) is None:
raise HTTPNotFound('The format %s cannot be viewed' % fmt.upper())
with db.safe_read_lock:
fm = db.format_metadata(book_id, fmt)
if not fm:
raise HTTPNotFound('No %s format for the book %s in the library: %s' % (fm, book_id, library_id))
bhash = book_hash(db.library_id, book_id, fmt, fm)
with cache_lock:
mpath = os.path.join(books_cache_dir(), 'f', bhash, 'calibre-book-manifest.json')
if iswindows:
mpath = '\\\\?\\' + os.path.abspath(mpath)
try:
os.utime(mpath, None)
return lopen(mpath, 'rb')
except EnvironmentError as e:
if e.errno != errno.ENOENT:
raise
x = failed_jobs.pop(bhash, None)
if x is not None:
return {'aborted':x[0], 'traceback':x[1], 'job_status':'finished'}
job_id = queued_jobs.get(bhash)
if job_id is None:
job_id = queue_job(ctx, partial(db.copy_format_to, book_id, fmt), bhash, fmt, book_id)
status, result, tb, aborted = ctx.job_status(job_id)
return {'aborted': aborted, 'traceback':tb, 'job_status':status, 'job_id':job_id}
@endpoint('/book-file/{book_hash}/{name}')
def book_file(ctx, rd, book_hash, name):
base = os.path.join(books_cache_dir, 'f')
mpath = os.path.abspath(os.path.join(book_hash, name))
if not mpath.startswith(base):
raise HTTPNotFound('No book file with hash: %s and name: %s' % (book_hash, name))
try:
return rd.filesystem_file_with_custom_etag(lopen(mpath, 'rb'), book_hash, name)
except EnvironmentError as e:
if e.errno != errno.ENOENT:
raise
raise HTTPNotFound('No book file with hash: %s and name: %s' % (book_hash, name))

View File

@ -93,8 +93,8 @@ class Context(object):
self.ignored_fields = frozenset(filter(None, (x.strip() for x in (opts.ignored_fields or '').split(',')))) self.ignored_fields = frozenset(filter(None, (x.strip() for x in (opts.ignored_fields or '').split(','))))
self.displayed_fields = frozenset(filter(None, (x.strip() for x in (opts.displayed_fields or '').split(',')))) self.displayed_fields = frozenset(filter(None, (x.strip() for x in (opts.displayed_fields or '').split(','))))
def start_job(self, name, module, func, args=(), kwargs=None): def start_job(self, name, module, func, args=(), kwargs=None, job_done_callback=None, job_data=None):
return self.jobs_manager.start_job(name, module, func, args, kwargs) return self.jobs_manager.start_job(name, module, func, args, kwargs, job_done_callback, job_data)
def job_status(self, job_id): def job_status(self, job_id):
return self.jobs_manager.job_status(job_id) return self.jobs_manager.job_status(job_id)
@ -188,7 +188,7 @@ class Handler(object):
prefer_basic_auth = {'auto':has_ssl, 'basic':True}.get(opts.auth_mode, 'digest') prefer_basic_auth = {'auto':has_ssl, 'basic':True}.get(opts.auth_mode, 'digest')
self.auth_controller = AuthController(user_credentials=ctx.user_manager, prefer_basic_auth=prefer_basic_auth) self.auth_controller = AuthController(user_credentials=ctx.user_manager, prefer_basic_auth=prefer_basic_auth)
self.router = Router(ctx=ctx, url_prefix=opts.url_prefix, auth_controller=self.auth_controller) self.router = Router(ctx=ctx, url_prefix=opts.url_prefix, auth_controller=self.auth_controller)
for module in ('content', 'ajax', 'code', 'legacy', 'opds'): for module in ('content', 'ajax', 'code', 'legacy', 'opds', 'books'):
module = import_module('calibre.srv.' + module) module = import_module('calibre.srv.' + module)
self.router.load_routes(vars(module).itervalues()) self.router.load_routes(vars(module).itervalues())
self.router.finalize() self.router.finalize()

View File

@ -15,7 +15,7 @@ from calibre import detect_ncpus, force_unicode
from calibre.utils.monotonic import monotonic from calibre.utils.monotonic import monotonic
from calibre.utils.ipc.simple_worker import fork_job, WorkerError from calibre.utils.ipc.simple_worker import fork_job, WorkerError
StartEvent = namedtuple('StartEvent', 'job_id name module function args kwargs') StartEvent = namedtuple('StartEvent', 'job_id name module function args kwargs callback data')
DoneEvent = namedtuple('DoneEvent', 'job_id') DoneEvent = namedtuple('DoneEvent', 'job_id')
class Job(Thread): class Job(Thread):
@ -29,6 +29,7 @@ class Job(Thread):
self.job_name = start_event.name self.job_name = start_event.name
self.job_id = start_event.job_id self.job_id = start_event.job_id
self.func = partial(fork_job, start_event.module, start_event.function, start_event.args, start_event.kwargs, abort=self.abort_event) self.func = partial(fork_job, start_event.module, start_event.function, start_event.args, start_event.kwargs, abort=self.abort_event)
self.data, self.callback = start_event.data, start_event.callback
self.result = self.traceback = None self.result = self.traceback = None
self.done = False self.done = False
self.start_time = monotonic() self.start_time = monotonic()
@ -54,6 +55,10 @@ class Job(Thread):
def was_aborted(self): def was_aborted(self):
return self.done and self.result is None and self.abort_event.is_set() return self.done and self.result is None and self.abort_event.is_set()
@property
def failed(self):
return bool(self.traceback) or self.was_aborted
def remove_log(self): def remove_log(self):
lp, self.log_path = self.log_path, None lp, self.log_path = self.log_path, None
if lp: if lp:
@ -95,7 +100,7 @@ class JobsManager(object):
self.shutting_down = False self.shutting_down = False
self.event_loop = None self.event_loop = None
def start_job(self, name, module, func, args=(), kwargs=None): def start_job(self, name, module, func, args=(), kwargs=None, job_done_callback=None, job_data=None):
with self.lock: with self.lock:
if self.shutting_down: if self.shutting_down:
return None return None
@ -104,7 +109,7 @@ class JobsManager(object):
t.daemon = True t.daemon = True
t.start() t.start()
job_id = next(self.job_id) job_id = next(self.job_id)
self.events.put(StartEvent(job_id, name, module, func, args, kwargs or {})) self.events.put(StartEvent(job_id, name, module, func, args, kwargs or {}, job_done_callback, job_data))
self.waiting_job_ids.add(job_id) self.waiting_job_ids.add(job_id)
return job_id return job_id
@ -214,6 +219,12 @@ class JobsManager(object):
def job_finished(self, job_id): def job_finished(self, job_id):
with self.lock: with self.lock:
self.finished_jobs[job_id] = job = self.jobs.pop(job_id) self.finished_jobs[job_id] = job = self.jobs.pop(job_id)
if job.callback is not None:
try:
job.callback(job)
except Exception:
import traceback
self.log.error('Error running callback for job: %s:\n%s' % (job.name, traceback.format_exc()))
self.prune_finished_jobs() self.prune_finished_jobs()
if job.traceback and not job.was_aborted: if job.traceback and not job.was_aborted:
logdata = job.read_log() logdata = job.read_log()

View File

@ -20,6 +20,7 @@ from calibre.ebooks.oeb.polish.utils import guess_type
from calibre.utils.short_uuid import uuid4 from calibre.utils.short_uuid import uuid4
from calibre.utils.logging import default_log from calibre.utils.logging import default_log
RENDER_VERSION = 1
def encode_component(x): def encode_component(x):
return x.replace(',', ',c').replace('|', ',p') return x.replace(',', ',c').replace('|', ',p')
@ -41,10 +42,9 @@ def decode_url(x):
class Container(ContainerBase): class Container(ContainerBase):
RENDER_VERSION = 1
tweak_mode = True tweak_mode = True
def __init__(self, path_to_ebook, tdir, log=None): def __init__(self, path_to_ebook, tdir, log=None, book_hash=None):
log = log or default_log log = log or default_log
book_fmt, opfpath, input_fmt = extract_book(path_to_ebook, tdir, log=log) book_fmt, opfpath, input_fmt = extract_book(path_to_ebook, tdir, log=log)
ContainerBase.__init__(self, tdir, opfpath, log) ContainerBase.__init__(self, tdir, opfpath, log)
@ -53,10 +53,11 @@ class Container(ContainerBase):
name == self.opf_name or mt == guess_type('a.ncx') or name.startswith('META-INF/') name == self.opf_name or mt == guess_type('a.ncx') or name.startswith('META-INF/')
} }
self.book_render_data = data = { self.book_render_data = data = {
'version': self.RENDER_VERSION, 'version': RENDER_VERSION,
'toc':get_toc(self).as_dict, 'toc':get_toc(self).as_dict,
'spine':[name for name, is_linear in self.spine_names], 'spine':[name for name, is_linear in self.spine_names],
'link_uid': uuid4(), 'link_uid': uuid4(),
'book_hash': book_hash,
'is_comic': input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'}, 'is_comic': input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'},
'manifest': {name:os.path.getsize(self.name_path_map[name]) for name in set(self.name_path_map) - excluded_names}, 'manifest': {name:os.path.getsize(self.name_path_map[name]) for name in set(self.name_path_map) - excluded_names},
} }
@ -133,5 +134,8 @@ class Container(ContainerBase):
escape_cdata(root) escape_cdata(root)
return tostring(root, encoding='utf-8', xml_declaration=True, with_tail=False, doctype='<!DOCTYPE html>') return tostring(root, encoding='utf-8', xml_declaration=True, with_tail=False, doctype='<!DOCTYPE html>')
def render(pathtoebook, output_dir, book_hash=None):
Container(pathtoebook, output_dir, book_hash=book_hash)
if __name__ == '__main__': if __name__ == '__main__':
c = Container(sys.argv[-2], sys.argv[-1]) c = Container(sys.argv[-2], sys.argv[-1])

View File

@ -18,7 +18,7 @@ default_methods = frozenset(('HEAD', 'GET'))
def json(ctx, rd, endpoint, output): def json(ctx, rd, endpoint, output):
rd.outheaders.set('Content-Type', 'application/json; charset=UTF-8', replace_all=True) rd.outheaders.set('Content-Type', 'application/json; charset=UTF-8', replace_all=True)
if isinstance(output, bytes): if isinstance(output, bytes) or hasattr(output, 'fileno'):
ans = output # Assume output is already UTF-8 encoded json ans = output # Assume output is already UTF-8 encoded json
else: else:
ans = jsonlib.dumps(output, ensure_ascii=False) ans = jsonlib.dumps(output, ensure_ascii=False)