diff --git a/src/calibre/db/cache.py b/src/calibre/db/cache.py index 149205a4d6..c4f351a4a0 100644 --- a/src/calibre/db/cache.py +++ b/src/calibre/db/cache.py @@ -131,6 +131,10 @@ class Cache(object): def new_api(self): return self + @property + def library_id(self): + return self.backend.library_id + @property def safe_read_lock(self): ''' A safe read lock is a lock that does nothing if the thread already diff --git a/src/calibre/srv/books.py b/src/calibre/srv/books.py new file mode 100644 index 0000000000..5e2f0b938b --- /dev/null +++ b/src/calibre/srv/books.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python2 +# vim:fileencoding=utf-8 +# License: GPLv3 Copyright: 2016, Kovid Goyal + +from __future__ import (unicode_literals, division, absolute_import, + print_function) +from hashlib import sha1 +from functools import partial +from threading import RLock +from cPickle import dumps +import errno, os, tempfile, shutil + +from calibre.constants import cache_dir, iswindows +from calibre.customize.ui import plugin_for_input_format +from calibre.srv.render_book import RENDER_VERSION +from calibre.srv.errors import HTTPNotFound +from calibre.srv.routes import endpoint, json +from calibre.srv.utils import get_library_data + +cache_lock = RLock() +queued_jobs = {} +failed_jobs = {} + +_books_cache_dir = None +def books_cache_dir(): + global _books_cache_dir + if _books_cache_dir: + return _books_cache_dir + base = os.path.abspath(os.path.join(cache_dir(), 'srvb')) + for d in 'sf': + try: + os.makedirs(os.path.join(base, d)) + except EnvironmentError as e: + if e.errno != errno.EEXIST: + raise + _books_cache_dir = base + return base + + +def book_hash(library_uuid, book_id, fmt, fmt_metadata): + raw = dumps((library_uuid, book_id, fmt.upper(), fmt_metadata['size']), RENDER_VERSION) + return sha1(raw).hexdigest().decode('ascii') + +staging_cleaned = False + +def safe_remove(x, is_file=None): + if is_file is None: + is_file = os.path.isfile(x) + try: + os.remove(x) if is_file else shutil.rmtree(x, ignore_errors=True) + except EnvironmentError: + pass + + +def queue_job(ctx, copy_format_to, bhash, fmt, book_id): + global staging_cleaned + tdir = os.path.join(books_cache_dir(), 's') + if not staging_cleaned: + staging_cleaned = True + for x in os.listdir(tdir): + safe_remove(os.path.join(tdir, x)) + fd, pathtoebook = tempfile.mkstemp(prefix='', suffix=('.' + fmt.lower()), dir=tdir) + with os.fdopen(fd, 'wb') as f: + copy_format_to(f) + tdir = tempfile.mkdtemp('', '', tdir) + job_id = ctx.start_job('Render book %s (%s)' % (book_id, fmt), 'calibre.srv.render_book', 'render', args=(pathtoebook, tdir, bhash), + job_done_callback=job_done, job_data=(bhash, pathtoebook, tdir)) + queued_jobs[bhash] = job_id + return job_id + +def job_done(job): + with cache_lock: + book_hash, pathtoebook, tdir = job.data + queued_jobs.pop(book_hash, None) + safe_remove(pathtoebook) + if job.failed: + failed_jobs[book_hash] = (job.was_aborted, job.traceback) + safe_remove(tdir, False) + else: + try: + dest = os.path.join(books_cache_dir(), 'f', book_hash) + safe_remove(dest, False) + os.rename(tdir, dest) + except Exception: + import traceback + failed_jobs[book_hash] = (False, traceback.format_exc()) + +@endpoint('/book-manifest/{book_id}/{fmt}', postprocess=json, types={'book_id':int}) +def book_manifest(ctx, rd, book_id, fmt): + db, library_id = get_library_data(ctx, rd)[:2] + if plugin_for_input_format(fmt) is None: + raise HTTPNotFound('The format %s cannot be viewed' % fmt.upper()) + with db.safe_read_lock: + fm = db.format_metadata(book_id, fmt) + if not fm: + raise HTTPNotFound('No %s format for the book %s in the library: %s' % (fm, book_id, library_id)) + bhash = book_hash(db.library_id, book_id, fmt, fm) + with cache_lock: + mpath = os.path.join(books_cache_dir(), 'f', bhash, 'calibre-book-manifest.json') + if iswindows: + mpath = '\\\\?\\' + os.path.abspath(mpath) + try: + os.utime(mpath, None) + return lopen(mpath, 'rb') + except EnvironmentError as e: + if e.errno != errno.ENOENT: + raise + x = failed_jobs.pop(bhash, None) + if x is not None: + return {'aborted':x[0], 'traceback':x[1], 'job_status':'finished'} + job_id = queued_jobs.get(bhash) + if job_id is None: + job_id = queue_job(ctx, partial(db.copy_format_to, book_id, fmt), bhash, fmt, book_id) + status, result, tb, aborted = ctx.job_status(job_id) + return {'aborted': aborted, 'traceback':tb, 'job_status':status, 'job_id':job_id} + +@endpoint('/book-file/{book_hash}/{name}') +def book_file(ctx, rd, book_hash, name): + base = os.path.join(books_cache_dir, 'f') + mpath = os.path.abspath(os.path.join(book_hash, name)) + if not mpath.startswith(base): + raise HTTPNotFound('No book file with hash: %s and name: %s' % (book_hash, name)) + try: + return rd.filesystem_file_with_custom_etag(lopen(mpath, 'rb'), book_hash, name) + except EnvironmentError as e: + if e.errno != errno.ENOENT: + raise + raise HTTPNotFound('No book file with hash: %s and name: %s' % (book_hash, name)) diff --git a/src/calibre/srv/handler.py b/src/calibre/srv/handler.py index b7630a148e..8a9ecba584 100644 --- a/src/calibre/srv/handler.py +++ b/src/calibre/srv/handler.py @@ -93,8 +93,8 @@ class Context(object): self.ignored_fields = frozenset(filter(None, (x.strip() for x in (opts.ignored_fields or '').split(',')))) self.displayed_fields = frozenset(filter(None, (x.strip() for x in (opts.displayed_fields or '').split(',')))) - def start_job(self, name, module, func, args=(), kwargs=None): - return self.jobs_manager.start_job(name, module, func, args, kwargs) + def start_job(self, name, module, func, args=(), kwargs=None, job_done_callback=None, job_data=None): + return self.jobs_manager.start_job(name, module, func, args, kwargs, job_done_callback, job_data) def job_status(self, job_id): return self.jobs_manager.job_status(job_id) @@ -188,7 +188,7 @@ class Handler(object): prefer_basic_auth = {'auto':has_ssl, 'basic':True}.get(opts.auth_mode, 'digest') self.auth_controller = AuthController(user_credentials=ctx.user_manager, prefer_basic_auth=prefer_basic_auth) self.router = Router(ctx=ctx, url_prefix=opts.url_prefix, auth_controller=self.auth_controller) - for module in ('content', 'ajax', 'code', 'legacy', 'opds'): + for module in ('content', 'ajax', 'code', 'legacy', 'opds', 'books'): module = import_module('calibre.srv.' + module) self.router.load_routes(vars(module).itervalues()) self.router.finalize() diff --git a/src/calibre/srv/jobs.py b/src/calibre/srv/jobs.py index 68587a04ae..392bb0eba8 100644 --- a/src/calibre/srv/jobs.py +++ b/src/calibre/srv/jobs.py @@ -15,7 +15,7 @@ from calibre import detect_ncpus, force_unicode from calibre.utils.monotonic import monotonic from calibre.utils.ipc.simple_worker import fork_job, WorkerError -StartEvent = namedtuple('StartEvent', 'job_id name module function args kwargs') +StartEvent = namedtuple('StartEvent', 'job_id name module function args kwargs callback data') DoneEvent = namedtuple('DoneEvent', 'job_id') class Job(Thread): @@ -29,6 +29,7 @@ class Job(Thread): self.job_name = start_event.name self.job_id = start_event.job_id self.func = partial(fork_job, start_event.module, start_event.function, start_event.args, start_event.kwargs, abort=self.abort_event) + self.data, self.callback = start_event.data, start_event.callback self.result = self.traceback = None self.done = False self.start_time = monotonic() @@ -54,6 +55,10 @@ class Job(Thread): def was_aborted(self): return self.done and self.result is None and self.abort_event.is_set() + @property + def failed(self): + return bool(self.traceback) or self.was_aborted + def remove_log(self): lp, self.log_path = self.log_path, None if lp: @@ -95,7 +100,7 @@ class JobsManager(object): self.shutting_down = False self.event_loop = None - def start_job(self, name, module, func, args=(), kwargs=None): + def start_job(self, name, module, func, args=(), kwargs=None, job_done_callback=None, job_data=None): with self.lock: if self.shutting_down: return None @@ -104,7 +109,7 @@ class JobsManager(object): t.daemon = True t.start() job_id = next(self.job_id) - self.events.put(StartEvent(job_id, name, module, func, args, kwargs or {})) + self.events.put(StartEvent(job_id, name, module, func, args, kwargs or {}, job_done_callback, job_data)) self.waiting_job_ids.add(job_id) return job_id @@ -214,6 +219,12 @@ class JobsManager(object): def job_finished(self, job_id): with self.lock: self.finished_jobs[job_id] = job = self.jobs.pop(job_id) + if job.callback is not None: + try: + job.callback(job) + except Exception: + import traceback + self.log.error('Error running callback for job: %s:\n%s' % (job.name, traceback.format_exc())) self.prune_finished_jobs() if job.traceback and not job.was_aborted: logdata = job.read_log() diff --git a/src/calibre/srv/render_book.py b/src/calibre/srv/render_book.py index a9f51881fe..964485f493 100644 --- a/src/calibre/srv/render_book.py +++ b/src/calibre/srv/render_book.py @@ -20,6 +20,7 @@ from calibre.ebooks.oeb.polish.utils import guess_type from calibre.utils.short_uuid import uuid4 from calibre.utils.logging import default_log +RENDER_VERSION = 1 def encode_component(x): return x.replace(',', ',c').replace('|', ',p') @@ -41,10 +42,9 @@ def decode_url(x): class Container(ContainerBase): - RENDER_VERSION = 1 tweak_mode = True - def __init__(self, path_to_ebook, tdir, log=None): + def __init__(self, path_to_ebook, tdir, log=None, book_hash=None): log = log or default_log book_fmt, opfpath, input_fmt = extract_book(path_to_ebook, tdir, log=log) ContainerBase.__init__(self, tdir, opfpath, log) @@ -53,10 +53,11 @@ class Container(ContainerBase): name == self.opf_name or mt == guess_type('a.ncx') or name.startswith('META-INF/') } self.book_render_data = data = { - 'version': self.RENDER_VERSION, + 'version': RENDER_VERSION, 'toc':get_toc(self).as_dict, 'spine':[name for name, is_linear in self.spine_names], 'link_uid': uuid4(), + 'book_hash': book_hash, 'is_comic': input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'}, 'manifest': {name:os.path.getsize(self.name_path_map[name]) for name in set(self.name_path_map) - excluded_names}, } @@ -133,5 +134,8 @@ class Container(ContainerBase): escape_cdata(root) return tostring(root, encoding='utf-8', xml_declaration=True, with_tail=False, doctype='') +def render(pathtoebook, output_dir, book_hash=None): + Container(pathtoebook, output_dir, book_hash=book_hash) + if __name__ == '__main__': c = Container(sys.argv[-2], sys.argv[-1]) diff --git a/src/calibre/srv/routes.py b/src/calibre/srv/routes.py index 3229b6303c..1da306a659 100644 --- a/src/calibre/srv/routes.py +++ b/src/calibre/srv/routes.py @@ -18,7 +18,7 @@ default_methods = frozenset(('HEAD', 'GET')) def json(ctx, rd, endpoint, output): rd.outheaders.set('Content-Type', 'application/json; charset=UTF-8', replace_all=True) - if isinstance(output, bytes): + if isinstance(output, bytes) or hasattr(output, 'fileno'): ans = output # Assume output is already UTF-8 encoded json else: ans = jsonlib.dumps(output, ensure_ascii=False)