Add support for HTTP byte-serving

2025-07-09 03:04:10 -04:00 · 2015-05-22 11:47:28 +05:30 · 2015-05-22 11:47:28 +05:30 · 03347846d9
commit 03347846d9
parent c03ab24377
4 changed files with 205 additions and 39 deletions
--- a/src/calibre/srv/errors.py
+++ b/src/calibre/srv/errors.py
@ -30,3 +30,9 @@ class IfNoneMatch(Exception):

 class BadChunkedInput(ValueError):
    pass
+
+class RangeNotSatisfiable(ValueError):
+
+    def __init__(self, content_length):
+        ValueError.__init__(self)
+        self.content_length = content_length
--- a/src/calibre/srv/http.py
+++ b/src/calibre/srv/http.py
@ -16,7 +16,7 @@ from operator import itemgetter
 from calibre import as_unicode
 from calibre.constants import __version__
 from calibre.srv.errors import (
-    MaxSizeExceeded, NonHTTPConnRequest, HTTP404, IfNoneMatch, BadChunkedInput)
+    MaxSizeExceeded, NonHTTPConnRequest, HTTP404, IfNoneMatch, BadChunkedInput, RangeNotSatisfiable)
 from calibre.srv.respond import finalize_output, generate_static_output
 from calibre.srv.utils import MultiDict, http_date, socket_errors_to_ignore

@ -499,14 +499,26 @@ class HTTPPair(object):
        ]
        if etag is not None:
            buf.append('ETag: ' + etag)
-        for header in ('Expires', 'Cache-Control', 'Vary'):
-            val = self.outheaders.get(header)
-            if val:
-                buf.append(header + ': ' + val)
+        self.send_buf(buf)
+
+    def send_buf(self, buf, include_cache_headers=True):
+        if include_cache_headers:
+            for header in ('Expires', 'Cache-Control', 'Vary'):
+                val = self.outheaders.get(header)
+                if val:
+                    buf.append(header + ': ' + val)
        buf.append('')
        buf = [(x + '\r\n').encode('ascii') for x in buf]
        self.flushed_write(b''.join(buf))

+    def send_range_not_satisfiable(self, content_length):
+        buf = [
+            '%s %d %s' % (self.response_protocol, httplib.REQUESTED_RANGE_NOT_SATISFIABLE, httplib.responses[httplib.REQUESTED_RANGE_NOT_SATISFIABLE]),
+            "Date: " + http_date(),
+            "Content-Range: bytes */%d" % content_length,
+        ]
+        self.send_buf(buf)
+
    def flushed_write(self, data):
        self.conn.socket_file.write(data)
        self.conn.socket_file.flush()
@ -552,6 +564,9 @@ class HTTPPair(object):
            else:
                self.simple_response(httplib.PRECONDITION_FAILED)
            return
+        except RangeNotSatisfiable as e:
+            self.send_range_not_satisfiable(e.content_length)
+            return

        with self.conn.corked:
            self.send_headers()
--- a/src/calibre/srv/respond.py
+++ b/src/calibre/srv/respond.py
@ -6,16 +6,22 @@ from __future__ import (unicode_literals, division, absolute_import,
 __license__ = 'GPL v3'
 __copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'

-import os, hashlib, shutil, httplib, zlib, struct, time
+import os, hashlib, shutil, httplib, zlib, struct, time, uuid
 from io import DEFAULT_BUFFER_SIZE, BytesIO
+from collections import namedtuple
+from functools import partial
+from future_builtins import map
+from itertools import izip_longest

-from calibre import force_unicode
-from calibre.srv.errors import IfNoneMatch
+from calibre import force_unicode, guess_type
+from calibre.srv.errors import IfNoneMatch, RangeNotSatisfiable
+
+Range = namedtuple('Range', 'start stop size')
+MULTIPART_SEPARATOR = uuid.uuid4().hex.decode('ascii')

 def get_ranges(headervalue, content_length):
-    """Return a list of (start, num_of_bytes) indices from a Range header, or None.
-    If this function returns an empty list, it indicates no valid range was found.
-    """
+    ''' Return a list of ranges from the Range header. If this function returns
+    an empty list, it indicates no valid range was found. '''
    if not headervalue:
        return None

@ -40,7 +46,7 @@ def get_ranges(headervalue, content_length):
                continue
            if stop < start:
                continue
-            result.append((start, stop - start + 1))
+            result.append(Range(start, stop, stop - start + 1))
        elif stop:
            # Negative subscript (last N bytes)
            try:
@ -48,9 +54,9 @@ def get_ranges(headervalue, content_length):
            except Exception:
                continue
            if stop > content_length:
-                result.append((0, content_length))
+                result.append(Range(0, content_length-1, content_length))
            else:
-                result.append((content_length - stop, stop))
+                result.append(Range(content_length - stop, content_length - 1, stop))

    return result

@ -111,24 +117,91 @@ def write_compressed_file_obj(input_file, dest, compress_level=6):
    write_chunked_data(dest, data)
    write_chunked_data(dest, b'')

+def get_range_parts(ranges, content_type, content_length):
+
+    def part(r):
+        ans = ['--%s' % MULTIPART_SEPARATOR, 'Content-Range: bytes %d-%d/%d' % (r.start, r.stop, content_length)]
+        if content_type:
+            ans.append('Content-Type: %s' % content_type)
+        ans.append('')
+        return ('\r\n'.join(ans)).encode('ascii')
+    return list(map(part, ranges)) + [('--%s--' % MULTIPART_SEPARATOR).encode('ascii')]
+
+def parse_multipart_byterange(buf, content_type):
+    from calibre.srv.http import read_headers
+    sep = (content_type.rsplit('=', 1)[-1]).encode('utf-8')
+    ans = []
+
+    def parse_part():
+        line = buf.readline()
+        if not line:
+            raise ValueError('Premature end of message')
+        if not line.startswith(b'--' + sep):
+            raise ValueError('Malformed start of multipart message')
+        if line.endswith(b'--'):
+            return None
+        headers = read_headers(buf.readline)
+        cr = headers.get('Content-Range')
+        if not cr:
+            raise ValueError('Missing Content-Range header in sub-part')
+        if not cr.startswith('bytes '):
+            raise ValueError('Malformed Content-Range header in sub-part, no prefix')
+        try:
+            start, stop = map(lambda x: int(x.strip()), cr.partition(' ')[-1].partition('/')[0].partition('-')[::2])
+        except Exception:
+            raise ValueError('Malformed Content-Range header in sub-part, failed to parse byte range')
+        content_length = stop - start + 1
+        ret = buf.read(content_length)
+        if len(ret) != content_length:
+            raise ValueError('Malformed sub-part, length of body not equal to length specified in Content-Range')
+        buf.readline()
+        return (start, ret)
+    while True:
+        data = parse_part()
+        if data is None:
+            break
+        ans.append(data)
+    return ans

 class FileSystemOutputFile(object):

-    def __init__(self, output, outheaders):
-        self.output_file = output
-        pos = output.tell()
-        output.seek(0, os.SEEK_END)
-        self.content_length = output.tell() - pos
+    def __init__(self, output, outheaders, size):
+        self.src_file = output
+        self.name = output.name
+        self.content_length = size
        self.etag = '"%s"' % hashlib.sha1(type('')(os.fstat(output.fileno()).st_mtime) + force_unicode(output.name or '')).hexdigest()
-        output.seek(pos)
        self.accept_ranges = True

    def write(self, dest):
-        shutil.copyfileobj(self.output_file, dest)
-        self.output_file = None
+        self.src_file.seek(0)
+        shutil.copyfileobj(self.src_file, dest)
+        self.src_file = None

    def write_compressed(self, dest):
-        write_compressed_file_obj(self.output_file, dest)
+        self.src_file.seek(0)
+        write_compressed_file_obj(self.src_file, dest)
+        self.src_file = None
+
+    def write_ranges(self, ranges, dest):
+        if isinstance(ranges, Range):
+            r = ranges
+            self.copy_range(r.start, r.size, dest)
+        else:
+            for r, header in ranges:
+                dest.write(header)
+                if r is not None:
+                    dest.write(b'\r\n')
+                    self.copy_range(r.start, r.size, dest)
+                    dest.write(b'\r\n')
+        self.src_file = None
+
+    def copy_range(self, start, size, dest):
+        self.src_file.seek(start)
+        while size > 0:
+            data = self.src_file.read(min(size, DEFAULT_BUFFER_SIZE))
+            dest.write(data)
+            size -= len(data)
+            del data

 class DynamicOutput(object):

@ -193,8 +266,19 @@ def parse_if_none_match(val):
 def finalize_output(output, inheaders, outheaders, status_code, is_http1, method, compress_min_size):
    ct = outheaders.get('Content-Type', '')
    compressible = not ct or ct.startswith('text/') or ct.startswith('image/svg') or ct.startswith('application/json')
-    if isinstance(output, file):
-        output = FileSystemOutputFile(output, outheaders)
+    try:
+        fd = output.fileno()
+        fsize = os.fstat(fd).st_size
+    except Exception:
+        fd = fsize = None
+    if fsize is not None:
+        output = FileSystemOutputFile(output, outheaders, fsize)
+        if 'Content-Type' not in outheaders:
+            mt = guess_type(output.name)[0]
+            if mt:
+                if mt in ('text/plain', 'text/html'):
+                    mt =+ '; charset=UTF-8'
+                outheaders['Content-Type'] = mt
    elif isinstance(output, (bytes, type(''))):
        output = DynamicOutput(output, outheaders)
    elif isinstance(output, StaticGeneratedOutput):
@ -206,7 +290,12 @@ def finalize_output(output, inheaders, outheaders, status_code, is_http1, method
                    acceptable_encoding(inheaders.get('Accept-Encoding', '')) and not is_http1)
    accept_ranges = (not compressible and output.accept_ranges is not None and status_code == httplib.OK and
                     not is_http1)
-    ranges = None
+    ranges = get_ranges(inheaders.get('Range'), output.content_length) if output.accept_ranges and method in ('GET', 'HEAD') else None
+    if_range = (inheaders.get('If-Range') or '').strip()
+    if if_range and if_range != output.etag:
+        ranges = None
+    if ranges is not None and not ranges:
+        raise RangeNotSatisfiable(output.content_length)

    for header in ('Accept-Ranges', 'Content-Encoding', 'Transfer-Encoding', 'ETag', 'Content-Length'):
        outheaders.pop('header', all=True)
@ -216,8 +305,6 @@ def finalize_output(output, inheaders, outheaders, status_code, is_http1, method
    if matched:
        raise IfNoneMatch(output.etag)

-    # TODO: Ranges, If-Range
-
    if output.etag and method in ('GET', 'HEAD'):
        outheaders.set('ETag', output.etag, replace_all=True)
    if accept_ranges:
@ -230,6 +317,20 @@ def finalize_output(output, inheaders, outheaders, status_code, is_http1, method
    if compressible or output.content_length is None:
        outheaders.set('Transfer-Encoding', 'chunked', replace_all=True)

-    output.commit = output.write_compressed if compressible else output.write
+    if ranges:
+        if len(ranges) == 1:
+            r = ranges[0]
+            outheaders.set('Content-Length', '%d' % r.size, replace_all=True)
+            outheaders.set('Content-Range', 'bytes %d-%d/%d' % (r.start, r.stop, output.content_length), replace_all=True)
+            output.commit = partial(output.write_ranges, r)
+        else:
+            range_parts = get_range_parts(ranges, outheaders.get('Content-Type'), output.content_length)
+            size = sum(map(len, range_parts)) + sum(r.size + 4 for r in ranges)
+            outheaders.set('Content-Length', '%d' % size, replace_all=True)
+            outheaders.set('Content-Type', 'multipart/byteranges; boundary=' + MULTIPART_SEPARATOR, replace_all=True)
+            output.commit = partial(output.write_ranges, izip_longest(ranges, range_parts))
+        status_code = httplib.PARTIAL_CONTENT
+    else:
+        output.commit = output.write_compressed if compressible else output.write

    return status_code, output
--- a/src/calibre/srv/tests/http.py
+++ b/src/calibre/srv/tests/http.py
@ -8,8 +8,9 @@ __copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'

 import textwrap, httplib, hashlib, zlib, string
 from io import BytesIO
+from tempfile import NamedTemporaryFile

-from calibre.ptempfile import PersistentTemporaryFile
+from calibre import guess_type
 from calibre.srv.tests.base import BaseTest, TestServer

 def headers(raw):
@ -73,17 +74,17 @@ class TestHTTP(BaseTest):
            if len(args) == 1 and args[0] is None:
                self.assertIsNone(pval, val)
            else:
-                self.assertListEqual(pval, list(args), val)
+                self.assertListEqual([tuple(x) for x in pval], list(args), val)
        test('crap', None)
        test('crap=', None)
        test('crap=1', None)
        test('crap=1-2', None)
        test('bytes=a-2')
-        test('bytes=0-99', (0, 100))
-        test('bytes=0-0,-1', (0, 1), (99, 1))
-        test('bytes=-5', (95, 5))
-        test('bytes=95-', (95, 5))
-        test('bytes=-200', (0, 100))
+        test('bytes=0-99', (0, 99, 100))
+        test('bytes=0-0,-1', (0, 0, 1), (99, 99, 1))
+        test('bytes=-5', (95, 99, 5))
+        test('bytes=95-', (95, 99, 5))
+        test('bytes=-200', (0, 99, 100))
    # }}}

    def test_http_basic(self):  # {{{
@ -190,12 +191,12 @@ class TestHTTP(BaseTest):

    def test_http_response(self):  # {{{
        'Test HTTP protocol responses'
+        from calibre.srv.respond import parse_multipart_byterange
        def handler(conn):
            return conn.generate_static_output('test', lambda : ''.join(conn.path))
-        with TestServer(handler, timeout=0.1, compress_min_size=0) as server, PersistentTemporaryFile('test.epub') as f:
+        with TestServer(handler, timeout=0.1, compress_min_size=0) as server, NamedTemporaryFile(suffix='test.epub') as f:
            fdata = string.ascii_letters * 100
-            f.write(fdata)
-            f.close()
+            f.write(fdata), f.seek(0)

            # Test ETag
            conn = server.connect()
@ -214,4 +215,47 @@ class TestHTTP(BaseTest):
            r = conn.getresponse()
            self.ae(r.status, httplib.OK), self.ae(zlib.decompress(r.read(), 16+zlib.MAX_WBITS), b'an_etagged_path')

+            # Test getting a filesystem file
+            server.change_handler(lambda conn: f)
+            conn = server.connect()
+            conn.request('GET', '/test')
+            r = conn.getresponse()
+            etag = type('')(r.getheader('ETag'))
+            self.assertTrue(etag)
+            self.ae(r.getheader('Content-Type'), guess_type(f.name)[0])
+            self.ae(type('')(r.getheader('Accept-Ranges')), 'bytes')
+            self.ae(int(r.getheader('Content-Length')), len(fdata))
+            self.ae(r.status, httplib.OK), self.ae(r.read(), fdata)
+
+            conn.request('GET', '/test', headers={'Range':'bytes=0-25'})
+            r = conn.getresponse()
+            self.ae(type('')(r.getheader('Accept-Ranges')), 'bytes')
+            self.ae(type('')(r.getheader('Content-Range')), 'bytes 0-25/%d' % len(fdata))
+            self.ae(int(r.getheader('Content-Length')), 26)
+            self.ae(r.status, httplib.PARTIAL_CONTENT), self.ae(r.read(), fdata[0:26])
+
+            conn.request('GET', '/test', headers={'Range':'bytes=100000-'})
+            r = conn.getresponse()
+            self.ae(type('')(r.getheader('Content-Range')), 'bytes */%d' % len(fdata))
+            self.ae(r.status, httplib.REQUESTED_RANGE_NOT_SATISFIABLE), self.ae(r.read(), b'')
+
+            conn.request('GET', '/test', headers={'Range':'bytes=25-50', 'If-Range':etag})
+            r = conn.getresponse()
+            self.ae(int(r.getheader('Content-Length')), 26)
+            self.ae(r.status, httplib.PARTIAL_CONTENT), self.ae(r.read(), fdata[25:51])
+
+            conn.request('GET', '/test', headers={'Range':'bytes=25-50', 'If-Range':'"nomatch"'})
+            r = conn.getresponse()
+            self.assertFalse(r.getheader('Content-Range'))
+            self.ae(int(r.getheader('Content-Length')), len(fdata))
+            self.ae(r.status, httplib.OK), self.ae(r.read(), fdata)
+
+            conn.request('GET', '/test', headers={'Range':'bytes=0-25,26-50'})
+            r = conn.getresponse()
+            clen = int(r.getheader('Content-Length'))
+            data = r.read()
+            self.ae(clen, len(data))
+            buf = BytesIO(data)
+            self.ae(parse_multipart_byterange(buf, r.getheader('Content-Type')), [(0, fdata[:26]), (26, fdata[26:51])])
+
    # }}}