Merge from trunk

2025-08-30 23:00:21 -04:00 · 2010-05-24 22:25:29 +01:00 · 2010-05-24 22:25:29 +01:00 · e86b0e2a2e
commit e86b0e2a2e
parent 3bdf4e61f3 b895a2214a
3 changed files with 98 additions and 259 deletions
--- a/src/calibre/library/caches.py
+++ b/src/calibre/library/caches.py
@ -626,20 +626,24 @@ class ResultCache(SearchQueryParser):
        self._map.sort(cmp=fcmp, reverse=not ascending)
        self._map_filtered = [id for id in self._map if id in self._map_filtered]

-    def search(self, query, return_matches=False):
+    def search(self, query, return_matches=False,
+            ignore_search_restriction=False):
        if not query or not query.strip():
-            q = self.search_restriction
-        else:
-            q = '%s (%s)' % (self.search_restriction, query)
+            q = ''
+            if not ignore_search_restriction:
+                q = self.search_restriction
+        elif not ignore_search_restriction:
+            q = u'%s (%s)' % (self.search_restriction, query)
        if not q:
            if return_matches:
-                return list(self.map) # when return_matches, do not update the maps!
+                return list(self._map) # when return_matches, do not update the maps!
            self._map_filtered = list(self._map)
            return []
        matches = sorted(self.parse(q))
+        ans = [id for id in self._map if id in matches]
        if return_matches:
-            return [id for id in self._map if id in matches]
-        self._map_filtered = [id for id in self._map if id in matches]
+            return ans
+        self._map_filtered = ans
        return []

    def set_search_restriction(self, s):
--- a/src/calibre/library/server/cache.py
+++ b/src/calibre/library/server/cache.py
@ -6,13 +6,28 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

 from calibre.utils.date import utcnow
+from calibre.utils.ordered_dict import OrderedDict

 class Cache(object):

-    @property
-    def categories_cache(self):
-        old = getattr(self, '_category_cache', None)
+    def add_routes(self, c):
+        self._category_cache = OrderedDict()
+        self._search_cache = OrderedDict()
+
+    def search_cache(self, search):
+        old = self._search_cache.get(search, None)
        if old is None or old[0] <= self.db.last_modified():
-            categories = self.db.get_categories()
-            self._category_cache = (utcnow(), categories)
-        return self._category_cache[1]
+            matches = self.db.data.search(search)
+            self._search_cache[search] = frozenset(matches)
+            if len(self._search_cache) > 10:
+                self._search_cache.popitem(last=False)
+
+
+    def categories_cache(self, restrict_to=frozenset([])):
+        old = self._category_cache.get(frozenset(restrict_to), None)
+        if old is None or old[0] <= self.db.last_modified():
+            categories = self.db.get_categories(ids=restrict_to)
+            self._category_cache[restrict_to] = (utcnow(), categories)
+            if len(self._category_cache) > 10:
+                self._category_cache.popitem(last=False)
+        return self._category_cache[restrict_to][1]
--- a/src/calibre/library/server/opds.py
+++ b/src/calibre/library/server/opds.py
@ -5,20 +5,20 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import re, hashlib
-from itertools import repeat
+import hashlib, binascii
 from functools import partial

-import cherrypy
 from lxml import etree
 from lxml.builder import ElementMaker
+import cherrypy

-from calibre.utils.genshi.template import MarkupTemplate
-from calibre.library.server.utils import strftime, expose
-from calibre.ebooks.metadata import fmt_sidx, title_sort
-from calibre import guess_type, prepare_string_for_xml
 from calibre.constants import __appname__

+BASE_HREFS = {
+        0 : '/stanza',
+        1 : '/opds',
+}
+
 # Vocabulary for building OPDS feeds {{{
 E = ElementMaker(namespace='http://www.w3.org/2005/Atom',
                 nsmap={
@ -42,7 +42,7 @@ NAVLINK = partial(E.link,
 def SEARCH(base_href, *args, **kwargs):
    kwargs['rel'] = 'search'
    kwargs['title'] = 'Search'
-    kwargs['href'] = base_href+'/?search={searchTerms}'
+    kwargs['href'] = base_href+'/search/{searchTerms}'
    return LINK(*args, **kwargs)

 def AUTHOR(name, uri=None):
@ -53,11 +53,9 @@ def AUTHOR(name, uri=None):

 SUBTITLE = E.subtitle

-def NAVCATALOG_ENTRY(base_href, updated, title, description, query_data):
-    data = [u'%s=%s'%(key, val) for key, val in query_data.items()]
-    data = '&'.join(data)
-    href = base_href+'/?'+data
-    id_ = 'calibre-subcatalog:'+str(hashlib.sha1(href).hexdigest())
+def NAVCATALOG_ENTRY(base_href, updated, title, description, query):
+    href = base_href+'/navcatalog/'+binascii.hexlify(query)
+    id_ = 'calibre-navcatalog:'+str(hashlib.sha1(href).hexdigest())
    return E.entry(
        TITLE(title),
        ID(id_),
@ -79,14 +77,15 @@ class TopLevel(Feed):
    def __init__(self,
            updated,  # datetime object in UTC
            categories,
+            version,
            id_       = 'urn:calibre:main',
-            base_href = '/stanza'
            ):
+        base_href = BASE_HREFS[version]
        self.base_href = base_href
        subc = partial(NAVCATALOG_ENTRY, base_href, updated)

-        subcatalogs = [subc('By '+title,
-            'Books sorted by '+desc, {'sortby':q}) for title, desc, q in
+        subcatalogs = [subc(_('By ')+title,
+            _('Books sorted by ') + desc, q) for title, desc, q in
            categories]

        self.root = \
@ -100,248 +99,69 @@ class TopLevel(Feed):
                    *subcatalogs
                )

-
-
-# Templates {{{
-
-STANZA_ENTRY=MarkupTemplate('''\
-<entry xmlns:py="http://genshi.edgewall.org/">
-    <title>${record[FM['title']]}</title>
-    <id>urn:calibre:${urn}</id>
-    <author><name>${authors}</name></author>
-    <updated>${timestamp}</updated>
-    <link type="${mimetype}" href="/get/${fmt}/${record[FM['id']]}" />
-    <link rel="x-stanza-cover-image" type="image/jpeg" href="/get/cover/${record[FM['id']]}" />
-    <link rel="x-stanza-cover-image-thumbnail" type="image/jpeg" href="/get/thumb/${record[FM['id']]}" />
-    <content type="xhtml">
-        <div xmlns="http://www.w3.org/1999/xhtml" style="text-align: center">${Markup(extra)}${record[FM['comments']]}</div>
-    </content>
-</entry>
-''')
-
-STANZA_SUBCATALOG_ENTRY=MarkupTemplate('''\
-<entry xmlns:py="http://genshi.edgewall.org/">
-    <title>${title}</title>
-    <id>urn:calibre:${id}</id>
-    <updated>${updated.strftime('%Y-%m-%dT%H:%M:%S+00:00')}</updated>
-    <link type="application/atom+xml" href="/stanza/?${what}id=${id}" />
-    <content type="text">${count} books</content>
-</entry>
-''')
-
-# Feed of books
-STANZA = MarkupTemplate('''\
-<?xml version="1.0" encoding="utf-8"?>
-<feed xmlns="http://www.w3.org/2005/Atom" xmlns:py="http://genshi.edgewall.org/">
-    <title>calibre Library</title>
-    <id>$id</id>
-    <updated>${updated.strftime('%Y-%m-%dT%H:%M:%S+00:00')}</updated>
-    <link rel="search" title="Search" type="application/atom+xml" href="/stanza/?search={searchTerms}"/>
-    ${Markup(next_link)}
-    <author>
-    <name>calibre</name>
-    <uri>http://calibre-ebook.com</uri>
-    </author>
-    <subtitle>
-        ${subtitle}
-    </subtitle>
-    <py:for each="entry in data">
-    ${Markup(entry)}
-    </py:for>
-</feed>
-''')
-
-
-# }}}
+STANZA_FORMATS = frozenset(['epub', 'pdb'])

 class OPDSServer(object):

-    def build_top_level(self, updated, base_href='/stanza'):
-        categories = self.categories_cache
-        categories = [(x.capitalize(), x.capitalize(), x) for x in
-                categories.keys()]
-        categories.append(('Title', 'Title', '|title|'))
-        categories.append(('Newest', 'Newest', '|newest|'))
+    def add_routes(self, connect):
+        for base in ('stanza', 'opds'):
+            version = 0 if base == 'stanza' else 1
+            base_href = BASE_HREFS[version]
+            connect(base, base_href, self.opds, version=version)
+            connect('opdsnavcatalog_'+base, base_href+'/navcatalog/{which}',
+                    self.opds_navcatalog, version=version)
+            connect('opdssearch_'+base, base_href+'/search/{terms}',
+                    self.opds_search, version=version)

-        return TopLevel(updated, categories, base_href=base_href)
+    def get_opds_allowed_ids_for_version(self, version):
+        search = '' if version > 0 else ' '.join(['format:='+x for x in
+            STANZA_FORMATS])
+        self.seach_cache(search)

-    def get_matches(self, location, query):
-        base = self.db.data.get_matches(location, query)
-        epub = self.db.data.get_matches('format', '=epub')
-        pdb = self.db.data.get_matches('format', '=pdb')
-        return base.intersection(epub.union(pdb))
+    def opds_search(self, terms=None, version=0):
+        version = int(version)
+        if not terms or version not in BASE_HREFS:
+            raise cherrypy.HTTPError(404, 'Not found')

-    def stanza_sortby_subcategory(self, updated, sortby, offset):
-        pat = re.compile(r'\(.*\)')
+    def opds_navcatalog(self, which=None, version=0):
+        version = int(version)
+        if not which or version not in BASE_HREFS:
+            raise cherrypy.HTTPError(404, 'Not found')
+        which = binascii.unhexlify(which)
+        type_ = which[0]
+        which = which[1:]
+        if type_ == 'O':
+            return self.get_opds_all_books(which)
+        elif type_ == 'N':
+            return self.get_opds_navcatalog(which)
+        raise cherrypy.HTTPError(404, 'Not found')

-        def clean_author(x):
-            return pat.sub('', x).strip()
-
-        def author_cmp(x, y):
-            x = x if ',' in x else clean_author(x).rpartition(' ')[-1]
-            y = y if ',' in y else clean_author(y).rpartition(' ')[-1]
-            return cmp(x.lower(), y.lower())
-
-        def get_author(x):
-            pref, ___, suff = clean_author(x).rpartition(' ')
-            return suff + (', '+pref) if pref else suff
-
-
-        what, subtitle = sortby[2:], ''
-        if sortby == 'byseries':
-            data = self.db.all_series()
-            data = [(x[0], x[1], len(self.get_matches('series', '='+x[1]))) for x in data]
-            subtitle = 'Books by series'
-        elif sortby == 'byauthor':
-            data = self.db.all_authors()
-            data = [(x[0], x[1], len(self.get_matches('authors', '='+x[1]))) for x in data]
-            subtitle = 'Books by author'
-        elif sortby == 'bytag':
-            data = self.db.all_tags2()
-            data = [(x[0], x[1], len(self.get_matches('tags', '='+x[1]))) for x in data]
-            subtitle = 'Books by tag'
-        fcmp = author_cmp if sortby == 'byauthor' else cmp
-        data = [x for x in data if x[2] > 0]
-        data.sort(cmp=lambda x, y: fcmp(x[1], y[1]))
-        next_offset = offset + self.max_stanza_items
-        rdata = data[offset:next_offset]
-        if next_offset >= len(data):
-            next_offset = -1
-        gt = get_author if sortby == 'byauthor' else lambda x: x
-        entries = [STANZA_SUBCATALOG_ENTRY.generate(title=gt(title), id=id,
-            what=what, updated=updated, count=c).render('xml').decode('utf-8') for id,
-            title, c in rdata]
-        next_link = ''
-        if next_offset > -1:
-            next_link = ('<link rel="next" title="Next" '
-            'type="application/atom+xml" href="/stanza/?sortby=%s&amp;offset=%d"/>\n'
-            ) % (sortby, next_offset)
-        return STANZA.generate(subtitle=subtitle, data=entries, FM=self.db.FIELD_MAP,
-                    updated=updated, id='urn:calibre:main', next_link=next_link).render('xml')
-
-    @expose
-    def stanza(self, search=None, sortby=None, authorid=None, tagid=None,
-            seriesid=None, offset=0):
-        'Feeds to read calibre books on a ipod with stanza.'
-        books = []
+    def opds(self, version=0):
+        version = int(version)
+        if version not in BASE_HREFS:
+            raise cherrypy.HTTPError(404, 'Not found')
+        categories = self.categories_cache(
+                self.get_opds_allowed_ids_for_version(version))
+        category_meta = self.db.get_tag_browser_categories()
+        cats = [
+                (_('Newest'), _('Date'), 'Onewest'),
+                (_('Title'), _('Title'), 'Otitle'),
+                ]
+        for category in categories:
+            if category == 'formats':
+                continue
+            meta = category_meta.get(category, None)
+            if meta is None:
+                continue
+            cats.append((meta['name'], meta['name'], 'N'+category))
        updated = self.db.last_modified()
-        offset = int(offset)
+
        cherrypy.response.headers['Last-Modified'] = self.last_modified(updated)
        cherrypy.response.headers['Content-Type'] = 'text/xml'

-        # Top Level feed
-        if not sortby and not search and not authorid and not tagid and not seriesid:
-            return str(self.build_top_level(updated))
+        feed = TopLevel(updated, cats, version)

-        if sortby in ('byseries', 'byauthor', 'bytag'):
-            return self.stanza_sortby_subcategory(updated, sortby, offset)
-
-        # Get matching ids
-        if authorid:
-            authorid=int(authorid)
-            au = self.db.author_name(authorid)
-            ids = self.get_matches('authors', au)
-        elif tagid:
-            tagid=int(tagid)
-            ta = self.db.tag_name(tagid)
-            ids = self.get_matches('tags', ta)
-        elif seriesid:
-            seriesid=int(seriesid)
-            se = self.db.series_name(seriesid)
-            ids = self.get_matches('series', se)
-        else:
-            ids = self.db.data.parse(search) if search and search.strip() else self.db.data.universal_set()
-        record_list = list(iter(self.db))
-
-        FM = self.db.FIELD_MAP
-        # Sort the record list
-        if sortby == "bytitle" or authorid or tagid:
-            record_list.sort(lambda x, y:
-                    cmp(title_sort(x[FM['title']]),
-                        title_sort(y[FM['title']])))
-        elif seriesid:
-            record_list.sort(lambda x, y:
-                    cmp(x[FM['series_index']],
-                        y[FM['series_index']]))
-        else: # Sort by date
-            record_list = reversed(record_list)
+        return str(feed)


-        fmts = FM['formats']
-        pat = re.compile(r'EPUB|PDB', re.IGNORECASE)
-        record_list = [x for x in record_list if x[FM['id']] in ids and
-                pat.search(x[fmts] if x[fmts] else '') is not None]
-        next_offset = offset + self.max_stanza_items
-        nrecord_list = record_list[offset:next_offset]
-        if next_offset >= len(record_list):
-            next_offset = -1
-
-        next_link = ''
-        if next_offset > -1:
-            q = ['offset=%d'%next_offset]
-            for x in ('search', 'sortby', 'authorid', 'tagid', 'seriesid'):
-                val = locals()[x]
-                if val is not None:
-                    val = prepare_string_for_xml(unicode(val), True)
-                    q.append('%s=%s'%(x, val))
-            next_link = ('<link rel="next" title="Next" '
-            'type="application/atom+xml" href="/stanza/?%s"/>\n'
-            ) % '&amp;'.join(q)
-
-        for record in nrecord_list:
-            r = record[FM['formats']]
-            r = r.upper() if r else ''
-
-            z = record[FM['authors']]
-            if not z:
-                z = _('Unknown')
-            authors = ' & '.join([i.replace('|', ',') for i in
-                                    z.split(',')])
-
-            # Setup extra description
-            extra = []
-            rating = record[FM['rating']]
-            if rating > 0:
-                rating = ''.join(repeat('&#9733;', rating))
-                extra.append('RATING: %s<br />'%rating)
-            tags = record[FM['tags']]
-            if tags:
-                extra.append('TAGS: %s<br />'%\
-                        prepare_string_for_xml(', '.join(tags.split(','))))
-            series = record[FM['series']]
-            if series:
-                extra.append('SERIES: %s [%s]<br />'%\
-                        (prepare_string_for_xml(series),
-                        fmt_sidx(float(record[FM['series_index']]))))
-
-            fmt = 'epub' if 'EPUB' in r else 'pdb'
-            mimetype = guess_type('dummy.'+fmt)[0]
-
-            # Create the sub-catalog, which is either a list of
-            # authors/tags/series or a list of books
-            data = dict(
-                    record=record,
-                    updated=updated,
-                    authors=authors,
-                    tags=tags,
-                    series=series,
-                    FM=FM,
-                    extra='\n'.join(extra),
-                    mimetype=mimetype,
-                    fmt=fmt,
-                    urn=record[FM['uuid']],
-                    timestamp=strftime('%Y-%m-%dT%H:%M:%S+00:00',
-                        record[FM['timestamp']])
-                    )
-            books.append(STANZA_ENTRY.generate(**data)\
-                                        .render('xml').decode('utf8'))
-
-        return STANZA.generate(subtitle='', data=books, FM=FM,
-                next_link=next_link, updated=updated, id='urn:calibre:main').render('xml')
-
-
-if __name__ == '__main__':
-    from datetime import datetime
-    f = TopLevel(datetime.utcnow())
-    print f