diff --git a/src/calibre/srv/content.py b/src/calibre/srv/content.py index 0825eef68d..153e51ba4b 100644 --- a/src/calibre/srv/content.py +++ b/src/calibre/srv/content.py @@ -7,9 +7,116 @@ __license__ = 'GPL v3' __copyright__ = '2015, Kovid Goyal ' import os, errno +from io import BytesIO +from calibre.db.errors import NoSuchFormat +from calibre.ebooks.metadata import authors_to_string +from calibre.ebooks.metadata.meta import set_metadata +from calibre.library.save_to_disk import find_plugboard from calibre.srv.errors import HTTPNotFound from calibre.srv.routes import endpoint +from calibre.utils.config_base import tweaks +from calibre.utils.date import timestampfromdt +from calibre.utils.filenames import ascii_filename +from calibre.utils.magick.draw import thumbnail + +plugboard_content_server_value = 'content_server' +plugboard_content_server_formats = ['epub', 'mobi', 'azw3'] +update_metadata_in_fmts = frozenset(plugboard_content_server_formats) + +# Get book formats/cover as a cached filesystem file {{{ + +def create_file_copy(ctx, rd, prefix, library_id, book_id, ext, mtime, copy_func, extra_etag_data=''): + ''' We cannot copy files directly from the library folder to the output + socket, as this can potentially lock the library for an extended period. So + instead we copy out the data from the library folder into a temp folder. We + make sure to only do this copy once, using the previous copy, if there have + been no changes to the data for the file since the last copy. ''' + + # Avoid too many items in a single directory for performance + base = os.path.join(rd.tdir, 'fcache', (('%x' % book_id)[-3:])) + + library_id = library_id.replace('\\', '_').replace('/', '_') + bname = '%s-%s-%s.%s' % (prefix, library_id, book_id, ext) + fname = os.path.join(base, bname) + do_copy = True + mtime = timestampfromdt(mtime) + try: + ans = lopen(fname, 'r+b') + do_copy = os.fstat(ans.fileno()).st_mtime < mtime + except EnvironmentError: + try: + ans = lopen(fname, 'w+b') + except EnvironmentError: + try: + os.makedirs(base) + except EnvironmentError: + pass + ans = lopen(fname, 'w+b') + do_copy = True + if do_copy: + copy_func(ans) + ans.seek(0) + if ctx.testing: + rd.outheaders['Used-Cache'] = 'no' if do_copy else 'yes' + return rd.filesystem_file_with_custom_etag(ans, prefix, library_id, book_id, mtime, extra_etag_data) + +def cover(ctx, rd, library_id, db, book_id, width=None, height=None): + mtime = db.cover_last_modified(book_id) + if mtime is None: + raise HTTPNotFound('No cover for book: %r' % book_id) + prefix = 'cover' + if width is None and height is None: + def copy_func(dest): + db.copy_cover_to(book_id, dest) + else: + prefix += '-%sx%s' % (width, height) + def copy_func(dest): + buf = BytesIO() + db.copy_cover_to(book_id, buf) + quality = min(99, max(50, tweaks['content_server_thumbnail_compression_quality'])) + w, h, data = thumbnail(buf.getvalue(), width=width, height=height, compression_quality=quality) + dest.write(data) + return create_file_copy(ctx, rd, prefix, library_id, book_id, 'jpg', mtime, copy_func) + +def book_fmt(ctx, rd, library_id, db, book_id, fmt): + mdata = db.format_metadata(book_id, fmt) + if not mdata: + raise NoSuchFormat() + mtime = mdata['mtime'] + update_metadata = fmt in update_metadata_in_fmts + extra_etag_data = '' + + if update_metadata: + mi = db.get_metadata(book_id) + mtime = max(mtime, mi.last_modified) + # Get any plugboards for the content server + plugboards = db.pref('plugboards') + if plugboards: + cpb = find_plugboard(plugboard_content_server_value, fmt, plugboards) + if cpb: + # Transform the metadata via the plugboard + newmi = mi.deepcopy_metadata() + newmi.template_to_attribute(mi, cpb) + mi = newmi + extra_etag_data = repr(cpb) + else: + mi = db.get_proxy_metadata(book_id) + + def copy_func(dest): + db.copy_format_to(book_id, fmt, dest) + if update_metadata: + set_metadata(dest, mi, fmt) + dest.seek(0) + + au = authors_to_string(mi.authors or [_('Unknown')]) + title = mi.title or _('Unknown') + fname = '%s - %s_%s.%s' % (title[:30], au[:30], book_id, fmt) + fname = ascii_filename(fname).replace('"', '_') + rd.outheaders['Content-Disposition'] = 'attachment; filename="%s"' % fname + + return create_file_copy(ctx, rd, 'fmt', library_id, book_id, fmt, mtime, copy_func, extra_etag_data=extra_etag_data) +# }}} @endpoint('/static/{+what}', auth_required=False, cache_control=24) def static(ctx, rd, what): @@ -29,3 +136,27 @@ def static(ctx, rd, what): @endpoint('/favicon.png', auth_required=False, cache_control=24) def favicon(ctx, rd): return lopen(I('lt.png'), 'rb') + +@endpoint('/get/{what}/{book_id}/{library_id=None}', types={'book_id':int}) +def get(ctx, rd, what, book_id, library_id): + db = ctx.get_library(library_id) + if db is None: + raise HTTPNotFound('Library %r not found' % library_id) + library_id = db.server_library_id + with db.safe_read_lock: + if not db.has_id(book_id): + raise HTTPNotFound('Book with id %r does not exist' % book_id) + if what == 'thumb' or what.startswith('thumb_'): + try: + w, h = map(int, what.partition('_')[2].partition('x')[::2]) + except Exception: + w, h = 60, 80 + return cover(ctx, rd, library_id, db, book_id, width=w, height=h) + elif what == 'cover': + return cover(ctx, rd, library_id, db, book_id) + # TODO: Implement opf and json + else: + try: + return book_fmt(ctx, rd, library_id, db, book_id, what.lower()) + except NoSuchFormat: + raise HTTPNotFound('No %r format for the book %r' % (what.lower(), book_id)) diff --git a/src/calibre/srv/handler.py b/src/calibre/srv/handler.py index 85bb7f95b6..379e63d26c 100644 --- a/src/calibre/srv/handler.py +++ b/src/calibre/srv/handler.py @@ -6,23 +6,62 @@ from __future__ import (unicode_literals, division, absolute_import, __license__ = 'GPL v3' __copyright__ = '2015, Kovid Goyal ' +import os from importlib import import_module +from threading import Lock +from calibre import force_unicode +from calibre.db.cache import Cache +from calibre.db.legacy import create_backend, LibraryDatabase from calibre.srv.routes import Router +def init_library(library_path): + db = Cache(create_backend(library_path)) + db.init() + return db + class LibraryBroker(object): def __init__(self, libraries): - self.libraries = libraries + self.lock = Lock() + self.lmap = {} + for path in libraries: + if not LibraryDatabase.exists_at(path): + continue + library_id = base = force_unicode(os.path.basename(path)) + c = 0 + while library_id in self.lmap: + c += 1 + library_id = base + ' (1)' + if path is libraries[0]: + self.default_library = library_id + self.lmap[library_id] = path + + def get(self, library_id=None): + with self.lock: + library_id = library_id or self.default_library + ans = self.lmap.get(library_id) + if ans is None: + return + if not callable(getattr(ans, 'init', None)): + try: + self.lmap[library_id] = ans = init_library(ans) + ans.server_library_id = library_id + except Exception: + self.lmap[library_id] = ans = None + raise + return ans + class Context(object): log = None url_for = None - def __init__(self, libraries, opts): + def __init__(self, libraries, opts, testing=False): self.opts = opts self.library_broker = LibraryBroker(libraries) + self.testing = testing def init_session(self, endpoint, data): pass @@ -30,10 +69,13 @@ class Context(object): def finalize_session(self, endpoint, data, output): pass + def get_library(self, library_id=None): + return self.library_broker.get(library_id) + class Handler(object): - def __init__(self, libraries, opts): - self.router = Router(ctx=Context(libraries, opts), url_prefix=opts.url_prefix) + def __init__(self, libraries, opts, testing=False): + self.router = Router(ctx=Context(libraries, opts, testing=testing), url_prefix=opts.url_prefix) for module in ('content',): module = import_module('calibre.srv.' + module) self.router.load_routes(vars(module).itervalues()) diff --git a/src/calibre/srv/http_response.py b/src/calibre/srv/http_response.py index 609283afa4..07e7c35744 100644 --- a/src/calibre/srv/http_response.py +++ b/src/calibre/srv/http_response.py @@ -12,6 +12,7 @@ from io import BytesIO, DEFAULT_BUFFER_SIZE from itertools import chain, repeat, izip_longest from operator import itemgetter from functools import wraps +from future_builtins import map from calibre import guess_type, force_unicode from calibre.constants import __version__ @@ -178,13 +179,22 @@ def get_range_parts(ranges, content_type, content_length): # {{{ return list(map(part, ranges)) + [('--%s--' % MULTIPART_SEPARATOR).encode('ascii')] # }}} +class ETaggedFile(object): # {{{ + + def __init__(self, output, etag): + self.output, self.etag = output, etag + + def fileno(self): + return self.output.fileno() +# }}} + class RequestData(object): # {{{ cookies = {} username = None def __init__(self, method, path, query, inheaders, request_body_file, outheaders, response_protocol, - static_cache, opts, remote_addr, remote_port, translator_cache): + static_cache, opts, remote_addr, remote_port, translator_cache, tdir): (self.method, self.path, self.query, self.inheaders, self.request_body_file, self.outheaders, self.response_protocol, self.static_cache, self.translator_cache) = ( @@ -197,6 +207,7 @@ class RequestData(object): # {{{ self.outcookie = Cookie() self.lang_code = self.gettext_func = self.ngettext_func = None self.set_translator(self.get_preferred_language()) + self.tdir = tdir def generate_static_output(self, name, generator): ans = self.static_cache.get(name) @@ -204,6 +215,12 @@ class RequestData(object): # {{{ ans = self.static_cache[name] = StaticOutput(generator()) return ans + def filesystem_file_with_custom_etag(self, output, *etag_parts): + etag = hashlib.sha1() + string = type('') + tuple(map(lambda x:etag.update(string(x)), etag_parts)) + return ETaggedFile(output, etag.hexdigest()) + def read(self, size=-1): return self.request_body_file.read(size) @@ -249,7 +266,12 @@ class ReadableOutput(object): self.src_file.seek(0) def filesystem_file_output(output, outheaders, stat_result): - etag = '"%s"' % hashlib.sha1(type('')(stat_result.st_mtime) + force_unicode(output.name or '')).hexdigest() + etag = getattr(output, 'etag', None) + if etag is None: + etag = hashlib.sha1(type('')(stat_result.st_mtime) + force_unicode(output.name or '')).hexdigest() + else: + output = output.output + etag = '"%s"' % etag self = ReadableOutput(output, etag=etag, content_length=stat_result.st_size) self.name = output.name self.use_sendfile = True @@ -358,7 +380,7 @@ class HTTPConnection(HTTPRequest): data = RequestData( self.method, self.path, self.query, inheaders, request_body_file, outheaders, self.response_protocol, self.static_cache, self.opts, - self.remote_addr, self.remote_port, self.translator_cache + self.remote_addr, self.remote_port, self.translator_cache, self.tdir ) self.queue_job(self.run_request_handler, data) diff --git a/src/calibre/srv/tests/base.py b/src/calibre/srv/tests/base.py index 37e0954dc0..08427532b1 100644 --- a/src/calibre/srv/tests/base.py +++ b/src/calibre/srv/tests/base.py @@ -59,6 +59,7 @@ class LibraryBaseTest(BaseTest): db.init() db.set_cover({1:I('lt.png', data=True), 2:I('polish.png', data=True)}) db.add_format(1, 'FMT1', BytesIO(b'book1fmt1'), run_hooks=False) + db.add_format(1, 'EPUB', open(P('quick_start/eng.epub'), 'rb'), run_hooks=False) db.add_format(1, 'FMT2', BytesIO(b'book1fmt2'), run_hooks=False) db.add_format(2, 'FMT1', BytesIO(b'book2fmt1'), run_hooks=False) db.backend.conn.close() @@ -124,11 +125,12 @@ class LibraryServer(TestServer): from calibre.srv.http_response import create_http_handler opts = Options(**kwargs) self.libraries = libraries or (library_path,) - self.handler = Handler(libraries, opts) + self.handler = Handler(self.libraries, opts, testing=True) self.loop = ServerLoop( create_http_handler(self.handler.dispatch), opts=opts, plugins=plugins, log=ServerLog(level=ServerLog.WARN), ) + self.handler.set_log(self.loop.log) specialize(self) diff --git a/src/calibre/srv/tests/content.py b/src/calibre/srv/tests/content.py index 6d0947a801..024c7f78f8 100644 --- a/src/calibre/srv/tests/content.py +++ b/src/calibre/srv/tests/content.py @@ -7,12 +7,15 @@ __license__ = 'GPL v3' __copyright__ = '2015, Kovid Goyal ' import httplib +from io import BytesIO +from calibre.ebooks.metadata.epub import get_metadata from calibre.srv.tests.base import LibraryBaseTest +from calibre.utils.magick.draw import identify_data class ContentTest(LibraryBaseTest): - def test_static(self): + def test_static(self): # {{{ 'Test serving of static content' with self.create_server() as server: conn = server.connect() @@ -48,3 +51,101 @@ class ContentTest(LibraryBaseTest): test('content-server/empty.html', '/static/empty.html') test('images/lt.png', '/favicon.png') + # }}} + + def test_get(self): # {{{ + 'Test /get' + with self.create_server() as server: + db = server.handler.router.ctx.get_library() + conn = server.connect() + + def get(what, book_id, library_id=None): + conn.request('GET', '/get/%s/%s' % (what, book_id) + (('/' + library_id) if library_id else '')) + r = conn.getresponse() + return r, r.read() + + # Test various invalid parameters + def bad(*args): + r, data = get(*args) + self.ae(r.status, httplib.NOT_FOUND) + bad('xxx', 1) + bad('fmt1', 10) + bad('fmt1', 1, 'zzzz') + bad('fmt1', 'xx') + + # Test simple fetching of format without metadata update + r, data = get('fmt1', 1, db.server_library_id) + self.ae(data, db.format(1, 'fmt1')) + self.assertIsNotNone(r.getheader('Content-Disposition')) + self.ae(r.getheader('Used-Cache'), 'no') + r, data = get('fmt1', 1) + self.ae(data, db.format(1, 'fmt1')) + self.ae(r.getheader('Used-Cache'), 'yes') + + # Test fetching of format with metadata update + raw = P('quick_start/eng.epub', data=True) + r, data = get('epub', 1) + self.ae(r.status, httplib.OK) + etag = r.getheader('ETag') + self.assertIsNotNone(etag) + self.ae(r.getheader('Used-Cache'), 'no') + self.assertTrue(data.startswith(b'PK')) + self.assertGreaterEqual(len(data), len(raw)) + db.set_field('title', {1:'changed'}) + r, data = get('epub', 1) + self.assertNotEqual(r.getheader('ETag'), etag) + etag = r.getheader('ETag') + self.ae(r.getheader('Used-Cache'), 'no') + mi = get_metadata(BytesIO(data), extract_cover=False) + self.ae(mi.title, 'changed') + r, data = get('epub', 1) + self.ae(r.getheader('Used-Cache'), 'yes') + + # Test plugboards + import calibre.library.save_to_disk as c + orig, c.DEBUG = c.DEBUG, False + try: + db.set_pref('plugboards', {u'epub': {u'content_server': [[u'changed, {title}', u'title']]}}) + # this is needed as the cache is not invalidated for plugboard changes + db.set_field('title', {1:'again'}) + r, data = get('epub', 1) + self.assertNotEqual(r.getheader('ETag'), etag) + etag = r.getheader('ETag') + self.ae(r.getheader('Used-Cache'), 'no') + mi = get_metadata(BytesIO(data), extract_cover=False) + self.ae(mi.title, 'changed, again') + finally: + c.DEBUG = orig + + # Test the serving of covers + r, data = get('cover', 1) + self.ae(r.status, httplib.OK) + self.ae(data, db.cover(1)) + self.ae(r.getheader('Used-Cache'), 'no') + r, data = get('cover', 1) + self.ae(r.status, httplib.OK) + self.ae(data, db.cover(1)) + self.ae(r.getheader('Used-Cache'), 'yes') + r, data = get('cover', 3) + self.ae(r.status, httplib.NOT_FOUND) + r, data = get('thumb', 1) + self.ae(r.status, httplib.OK) + self.ae(identify_data(data), (60, 60, 'jpeg')) + self.ae(r.getheader('Used-Cache'), 'no') + r, data = get('thumb', 1) + self.ae(r.status, httplib.OK) + self.ae(r.getheader('Used-Cache'), 'yes') + r, data = get('thumb_100x100', 1) + self.ae(r.status, httplib.OK) + self.ae(identify_data(data), (100, 100, 'jpeg')) + self.ae(r.getheader('Used-Cache'), 'no') + r, data = get('thumb_100x100', 1) + self.ae(r.status, httplib.OK) + self.ae(r.getheader('Used-Cache'), 'yes') + db.set_cover({1:I('lt.png', data=True)}) + r, data = get('thumb_100x100', 1) + self.ae(r.status, httplib.OK) + self.ae(identify_data(data), (100, 100, 'jpeg')) + self.ae(r.getheader('Used-Cache'), 'no') + + # }}} diff --git a/src/calibre/utils/magick/draw.py b/src/calibre/utils/magick/draw.py index 8b9243e7dd..2e94a70674 100644 --- a/src/calibre/utils/magick/draw.py +++ b/src/calibre/utils/magick/draw.py @@ -117,6 +117,10 @@ def thumbnail(data, width=120, height=120, bgcolor='#ffffff', fmt='jpg', img = Image() img.load(data) owidth, oheight = img.size + if width is None: + width = owidth + if height is None: + height = oheight if not preserve_aspect_ratio: scaled = owidth > width or oheight > height nwidth = width