Implement /get for ebook formats and covers/thumbs

This commit is contained in:
Kovid Goyal 2015-06-14 11:04:55 +05:30
parent a848440da8
commit 0387e6dfc8
6 changed files with 311 additions and 9 deletions

View File

@ -7,9 +7,116 @@ __license__ = 'GPL v3'
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
import os, errno
from io import BytesIO
from calibre.db.errors import NoSuchFormat
from calibre.ebooks.metadata import authors_to_string
from calibre.ebooks.metadata.meta import set_metadata
from calibre.library.save_to_disk import find_plugboard
from calibre.srv.errors import HTTPNotFound
from calibre.srv.routes import endpoint
from calibre.utils.config_base import tweaks
from calibre.utils.date import timestampfromdt
from calibre.utils.filenames import ascii_filename
from calibre.utils.magick.draw import thumbnail
plugboard_content_server_value = 'content_server'
plugboard_content_server_formats = ['epub', 'mobi', 'azw3']
update_metadata_in_fmts = frozenset(plugboard_content_server_formats)
# Get book formats/cover as a cached filesystem file {{{
def create_file_copy(ctx, rd, prefix, library_id, book_id, ext, mtime, copy_func, extra_etag_data=''):
''' We cannot copy files directly from the library folder to the output
socket, as this can potentially lock the library for an extended period. So
instead we copy out the data from the library folder into a temp folder. We
make sure to only do this copy once, using the previous copy, if there have
been no changes to the data for the file since the last copy. '''
# Avoid too many items in a single directory for performance
base = os.path.join(rd.tdir, 'fcache', (('%x' % book_id)[-3:]))
library_id = library_id.replace('\\', '_').replace('/', '_')
bname = '%s-%s-%s.%s' % (prefix, library_id, book_id, ext)
fname = os.path.join(base, bname)
do_copy = True
mtime = timestampfromdt(mtime)
try:
ans = lopen(fname, 'r+b')
do_copy = os.fstat(ans.fileno()).st_mtime < mtime
except EnvironmentError:
try:
ans = lopen(fname, 'w+b')
except EnvironmentError:
try:
os.makedirs(base)
except EnvironmentError:
pass
ans = lopen(fname, 'w+b')
do_copy = True
if do_copy:
copy_func(ans)
ans.seek(0)
if ctx.testing:
rd.outheaders['Used-Cache'] = 'no' if do_copy else 'yes'
return rd.filesystem_file_with_custom_etag(ans, prefix, library_id, book_id, mtime, extra_etag_data)
def cover(ctx, rd, library_id, db, book_id, width=None, height=None):
mtime = db.cover_last_modified(book_id)
if mtime is None:
raise HTTPNotFound('No cover for book: %r' % book_id)
prefix = 'cover'
if width is None and height is None:
def copy_func(dest):
db.copy_cover_to(book_id, dest)
else:
prefix += '-%sx%s' % (width, height)
def copy_func(dest):
buf = BytesIO()
db.copy_cover_to(book_id, buf)
quality = min(99, max(50, tweaks['content_server_thumbnail_compression_quality']))
w, h, data = thumbnail(buf.getvalue(), width=width, height=height, compression_quality=quality)
dest.write(data)
return create_file_copy(ctx, rd, prefix, library_id, book_id, 'jpg', mtime, copy_func)
def book_fmt(ctx, rd, library_id, db, book_id, fmt):
mdata = db.format_metadata(book_id, fmt)
if not mdata:
raise NoSuchFormat()
mtime = mdata['mtime']
update_metadata = fmt in update_metadata_in_fmts
extra_etag_data = ''
if update_metadata:
mi = db.get_metadata(book_id)
mtime = max(mtime, mi.last_modified)
# Get any plugboards for the content server
plugboards = db.pref('plugboards')
if plugboards:
cpb = find_plugboard(plugboard_content_server_value, fmt, plugboards)
if cpb:
# Transform the metadata via the plugboard
newmi = mi.deepcopy_metadata()
newmi.template_to_attribute(mi, cpb)
mi = newmi
extra_etag_data = repr(cpb)
else:
mi = db.get_proxy_metadata(book_id)
def copy_func(dest):
db.copy_format_to(book_id, fmt, dest)
if update_metadata:
set_metadata(dest, mi, fmt)
dest.seek(0)
au = authors_to_string(mi.authors or [_('Unknown')])
title = mi.title or _('Unknown')
fname = '%s - %s_%s.%s' % (title[:30], au[:30], book_id, fmt)
fname = ascii_filename(fname).replace('"', '_')
rd.outheaders['Content-Disposition'] = 'attachment; filename="%s"' % fname
return create_file_copy(ctx, rd, 'fmt', library_id, book_id, fmt, mtime, copy_func, extra_etag_data=extra_etag_data)
# }}}
@endpoint('/static/{+what}', auth_required=False, cache_control=24)
def static(ctx, rd, what):
@ -29,3 +136,27 @@ def static(ctx, rd, what):
@endpoint('/favicon.png', auth_required=False, cache_control=24)
def favicon(ctx, rd):
return lopen(I('lt.png'), 'rb')
@endpoint('/get/{what}/{book_id}/{library_id=None}', types={'book_id':int})
def get(ctx, rd, what, book_id, library_id):
db = ctx.get_library(library_id)
if db is None:
raise HTTPNotFound('Library %r not found' % library_id)
library_id = db.server_library_id
with db.safe_read_lock:
if not db.has_id(book_id):
raise HTTPNotFound('Book with id %r does not exist' % book_id)
if what == 'thumb' or what.startswith('thumb_'):
try:
w, h = map(int, what.partition('_')[2].partition('x')[::2])
except Exception:
w, h = 60, 80
return cover(ctx, rd, library_id, db, book_id, width=w, height=h)
elif what == 'cover':
return cover(ctx, rd, library_id, db, book_id)
# TODO: Implement opf and json
else:
try:
return book_fmt(ctx, rd, library_id, db, book_id, what.lower())
except NoSuchFormat:
raise HTTPNotFound('No %r format for the book %r' % (what.lower(), book_id))

View File

@ -6,23 +6,62 @@ from __future__ import (unicode_literals, division, absolute_import,
__license__ = 'GPL v3'
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
import os
from importlib import import_module
from threading import Lock
from calibre import force_unicode
from calibre.db.cache import Cache
from calibre.db.legacy import create_backend, LibraryDatabase
from calibre.srv.routes import Router
def init_library(library_path):
db = Cache(create_backend(library_path))
db.init()
return db
class LibraryBroker(object):
def __init__(self, libraries):
self.libraries = libraries
self.lock = Lock()
self.lmap = {}
for path in libraries:
if not LibraryDatabase.exists_at(path):
continue
library_id = base = force_unicode(os.path.basename(path))
c = 0
while library_id in self.lmap:
c += 1
library_id = base + ' (1)'
if path is libraries[0]:
self.default_library = library_id
self.lmap[library_id] = path
def get(self, library_id=None):
with self.lock:
library_id = library_id or self.default_library
ans = self.lmap.get(library_id)
if ans is None:
return
if not callable(getattr(ans, 'init', None)):
try:
self.lmap[library_id] = ans = init_library(ans)
ans.server_library_id = library_id
except Exception:
self.lmap[library_id] = ans = None
raise
return ans
class Context(object):
log = None
url_for = None
def __init__(self, libraries, opts):
def __init__(self, libraries, opts, testing=False):
self.opts = opts
self.library_broker = LibraryBroker(libraries)
self.testing = testing
def init_session(self, endpoint, data):
pass
@ -30,10 +69,13 @@ class Context(object):
def finalize_session(self, endpoint, data, output):
pass
def get_library(self, library_id=None):
return self.library_broker.get(library_id)
class Handler(object):
def __init__(self, libraries, opts):
self.router = Router(ctx=Context(libraries, opts), url_prefix=opts.url_prefix)
def __init__(self, libraries, opts, testing=False):
self.router = Router(ctx=Context(libraries, opts, testing=testing), url_prefix=opts.url_prefix)
for module in ('content',):
module = import_module('calibre.srv.' + module)
self.router.load_routes(vars(module).itervalues())

View File

@ -12,6 +12,7 @@ from io import BytesIO, DEFAULT_BUFFER_SIZE
from itertools import chain, repeat, izip_longest
from operator import itemgetter
from functools import wraps
from future_builtins import map
from calibre import guess_type, force_unicode
from calibre.constants import __version__
@ -178,13 +179,22 @@ def get_range_parts(ranges, content_type, content_length): # {{{
return list(map(part, ranges)) + [('--%s--' % MULTIPART_SEPARATOR).encode('ascii')]
# }}}
class ETaggedFile(object): # {{{
def __init__(self, output, etag):
self.output, self.etag = output, etag
def fileno(self):
return self.output.fileno()
# }}}
class RequestData(object): # {{{
cookies = {}
username = None
def __init__(self, method, path, query, inheaders, request_body_file, outheaders, response_protocol,
static_cache, opts, remote_addr, remote_port, translator_cache):
static_cache, opts, remote_addr, remote_port, translator_cache, tdir):
(self.method, self.path, self.query, self.inheaders, self.request_body_file, self.outheaders,
self.response_protocol, self.static_cache, self.translator_cache) = (
@ -197,6 +207,7 @@ class RequestData(object): # {{{
self.outcookie = Cookie()
self.lang_code = self.gettext_func = self.ngettext_func = None
self.set_translator(self.get_preferred_language())
self.tdir = tdir
def generate_static_output(self, name, generator):
ans = self.static_cache.get(name)
@ -204,6 +215,12 @@ class RequestData(object): # {{{
ans = self.static_cache[name] = StaticOutput(generator())
return ans
def filesystem_file_with_custom_etag(self, output, *etag_parts):
etag = hashlib.sha1()
string = type('')
tuple(map(lambda x:etag.update(string(x)), etag_parts))
return ETaggedFile(output, etag.hexdigest())
def read(self, size=-1):
return self.request_body_file.read(size)
@ -249,7 +266,12 @@ class ReadableOutput(object):
self.src_file.seek(0)
def filesystem_file_output(output, outheaders, stat_result):
etag = '"%s"' % hashlib.sha1(type('')(stat_result.st_mtime) + force_unicode(output.name or '')).hexdigest()
etag = getattr(output, 'etag', None)
if etag is None:
etag = hashlib.sha1(type('')(stat_result.st_mtime) + force_unicode(output.name or '')).hexdigest()
else:
output = output.output
etag = '"%s"' % etag
self = ReadableOutput(output, etag=etag, content_length=stat_result.st_size)
self.name = output.name
self.use_sendfile = True
@ -358,7 +380,7 @@ class HTTPConnection(HTTPRequest):
data = RequestData(
self.method, self.path, self.query, inheaders, request_body_file,
outheaders, self.response_protocol, self.static_cache, self.opts,
self.remote_addr, self.remote_port, self.translator_cache
self.remote_addr, self.remote_port, self.translator_cache, self.tdir
)
self.queue_job(self.run_request_handler, data)

View File

@ -59,6 +59,7 @@ class LibraryBaseTest(BaseTest):
db.init()
db.set_cover({1:I('lt.png', data=True), 2:I('polish.png', data=True)})
db.add_format(1, 'FMT1', BytesIO(b'book1fmt1'), run_hooks=False)
db.add_format(1, 'EPUB', open(P('quick_start/eng.epub'), 'rb'), run_hooks=False)
db.add_format(1, 'FMT2', BytesIO(b'book1fmt2'), run_hooks=False)
db.add_format(2, 'FMT1', BytesIO(b'book2fmt1'), run_hooks=False)
db.backend.conn.close()
@ -124,11 +125,12 @@ class LibraryServer(TestServer):
from calibre.srv.http_response import create_http_handler
opts = Options(**kwargs)
self.libraries = libraries or (library_path,)
self.handler = Handler(libraries, opts)
self.handler = Handler(self.libraries, opts, testing=True)
self.loop = ServerLoop(
create_http_handler(self.handler.dispatch),
opts=opts,
plugins=plugins,
log=ServerLog(level=ServerLog.WARN),
)
self.handler.set_log(self.loop.log)
specialize(self)

View File

@ -7,12 +7,15 @@ __license__ = 'GPL v3'
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
import httplib
from io import BytesIO
from calibre.ebooks.metadata.epub import get_metadata
from calibre.srv.tests.base import LibraryBaseTest
from calibre.utils.magick.draw import identify_data
class ContentTest(LibraryBaseTest):
def test_static(self):
def test_static(self): # {{{
'Test serving of static content'
with self.create_server() as server:
conn = server.connect()
@ -48,3 +51,101 @@ class ContentTest(LibraryBaseTest):
test('content-server/empty.html', '/static/empty.html')
test('images/lt.png', '/favicon.png')
# }}}
def test_get(self): # {{{
'Test /get'
with self.create_server() as server:
db = server.handler.router.ctx.get_library()
conn = server.connect()
def get(what, book_id, library_id=None):
conn.request('GET', '/get/%s/%s' % (what, book_id) + (('/' + library_id) if library_id else ''))
r = conn.getresponse()
return r, r.read()
# Test various invalid parameters
def bad(*args):
r, data = get(*args)
self.ae(r.status, httplib.NOT_FOUND)
bad('xxx', 1)
bad('fmt1', 10)
bad('fmt1', 1, 'zzzz')
bad('fmt1', 'xx')
# Test simple fetching of format without metadata update
r, data = get('fmt1', 1, db.server_library_id)
self.ae(data, db.format(1, 'fmt1'))
self.assertIsNotNone(r.getheader('Content-Disposition'))
self.ae(r.getheader('Used-Cache'), 'no')
r, data = get('fmt1', 1)
self.ae(data, db.format(1, 'fmt1'))
self.ae(r.getheader('Used-Cache'), 'yes')
# Test fetching of format with metadata update
raw = P('quick_start/eng.epub', data=True)
r, data = get('epub', 1)
self.ae(r.status, httplib.OK)
etag = r.getheader('ETag')
self.assertIsNotNone(etag)
self.ae(r.getheader('Used-Cache'), 'no')
self.assertTrue(data.startswith(b'PK'))
self.assertGreaterEqual(len(data), len(raw))
db.set_field('title', {1:'changed'})
r, data = get('epub', 1)
self.assertNotEqual(r.getheader('ETag'), etag)
etag = r.getheader('ETag')
self.ae(r.getheader('Used-Cache'), 'no')
mi = get_metadata(BytesIO(data), extract_cover=False)
self.ae(mi.title, 'changed')
r, data = get('epub', 1)
self.ae(r.getheader('Used-Cache'), 'yes')
# Test plugboards
import calibre.library.save_to_disk as c
orig, c.DEBUG = c.DEBUG, False
try:
db.set_pref('plugboards', {u'epub': {u'content_server': [[u'changed, {title}', u'title']]}})
# this is needed as the cache is not invalidated for plugboard changes
db.set_field('title', {1:'again'})
r, data = get('epub', 1)
self.assertNotEqual(r.getheader('ETag'), etag)
etag = r.getheader('ETag')
self.ae(r.getheader('Used-Cache'), 'no')
mi = get_metadata(BytesIO(data), extract_cover=False)
self.ae(mi.title, 'changed, again')
finally:
c.DEBUG = orig
# Test the serving of covers
r, data = get('cover', 1)
self.ae(r.status, httplib.OK)
self.ae(data, db.cover(1))
self.ae(r.getheader('Used-Cache'), 'no')
r, data = get('cover', 1)
self.ae(r.status, httplib.OK)
self.ae(data, db.cover(1))
self.ae(r.getheader('Used-Cache'), 'yes')
r, data = get('cover', 3)
self.ae(r.status, httplib.NOT_FOUND)
r, data = get('thumb', 1)
self.ae(r.status, httplib.OK)
self.ae(identify_data(data), (60, 60, 'jpeg'))
self.ae(r.getheader('Used-Cache'), 'no')
r, data = get('thumb', 1)
self.ae(r.status, httplib.OK)
self.ae(r.getheader('Used-Cache'), 'yes')
r, data = get('thumb_100x100', 1)
self.ae(r.status, httplib.OK)
self.ae(identify_data(data), (100, 100, 'jpeg'))
self.ae(r.getheader('Used-Cache'), 'no')
r, data = get('thumb_100x100', 1)
self.ae(r.status, httplib.OK)
self.ae(r.getheader('Used-Cache'), 'yes')
db.set_cover({1:I('lt.png', data=True)})
r, data = get('thumb_100x100', 1)
self.ae(r.status, httplib.OK)
self.ae(identify_data(data), (100, 100, 'jpeg'))
self.ae(r.getheader('Used-Cache'), 'no')
# }}}

View File

@ -117,6 +117,10 @@ def thumbnail(data, width=120, height=120, bgcolor='#ffffff', fmt='jpg',
img = Image()
img.load(data)
owidth, oheight = img.size
if width is None:
width = owidth
if height is None:
height = oheight
if not preserve_aspect_ratio:
scaled = owidth > width or oheight > height
nwidth = width