From e6380faa01835be77c8ad634108c0e24bfba5d99 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 22 Nov 2015 07:52:55 +0530 Subject: [PATCH] Start work on generating JSON representation of Tag Browser tree --- src/calibre/srv/code.py | 4 +- src/calibre/srv/handler.py | 11 +- src/calibre/srv/metadata.py | 365 ++++++++++++++++++++++++++++++++++-- src/pyj/session.pyj | 9 +- src/pyj/srv.pyj | 4 +- 5 files changed, 368 insertions(+), 25 deletions(-) diff --git a/src/calibre/srv/code.py b/src/calibre/srv/code.py index 5b77b4b243..26a2cb2cbd 100644 --- a/src/calibre/srv/code.py +++ b/src/calibre/srv/code.py @@ -90,6 +90,8 @@ def interface_data(ctx, rd): Return the data needed to create the server main UI Optional: ?num=50&sort=timestamp.desc&library_id= + &sort_tags_by=name&partition_method=first letter&collapse_at=25& + &dont_collapse= ''' ans = {'username':rd.username} ans['library_map'], ans['default_library'] = ctx.library_map @@ -118,7 +120,7 @@ def interface_data(ctx, rd): sanitize_sort_field_name(db.field_metadata, k), v) for k, v in sf.iteritems()), key=lambda (field, name):sort_key(name)) ans['field_metadata'] = db.field_metadata.all_metadata() - ans['categories'] = categories_as_json(ctx.get_categories(rd, db)) + ans['categories'] = categories_as_json(ctx, rd, db) mdata = ans['metadata'] = {} for book_id in ans['search_result']['book_ids']: data = book_as_json(db, book_id) diff --git a/src/calibre/srv/handler.py b/src/calibre/srv/handler.py index 23e0d4b4e7..60a52aa469 100644 --- a/src/calibre/srv/handler.py +++ b/src/calibre/srv/handler.py @@ -111,19 +111,20 @@ class Context(object): ans = data.allowed_book_ids[db.server_library_id] = db.all_book_ids() return ans - def get_categories(self, data, db, restrict_to_ids=None): + def get_categories(self, data, db, restrict_to_ids=None, sort='name', first_letter_sort=True): if restrict_to_ids is None: restrict_to_ids = self.allowed_book_ids(data, db) + key = (restrict_to_ids, sort, first_letter_sort) with self.lock: cache = self.library_broker.category_caches[db.server_library_id] - old = cache.pop(restrict_to_ids, None) + old = cache.pop(key, None) if old is None or old[0] <= db.last_modified(): - categories = db.get_categories(book_ids=restrict_to_ids) - cache[restrict_to_ids] = old = (utcnow(), categories) + categories = db.get_categories(book_ids=restrict_to_ids, sort=sort, first_letter_sort=first_letter_sort) + cache[key] = old = (utcnow(), categories) if len(cache) > self.CATEGORY_CACHE_SIZE: cache.popitem(last=False) else: - cache[restrict_to_ids] = old + cache[key] = old return old[1] def search(self, data, db, query, restrict_to_ids=None): diff --git a/src/calibre/srv/metadata.py b/src/calibre/srv/metadata.py index 33237211e6..11904e28ca 100644 --- a/src/calibre/srv/metadata.py +++ b/src/calibre/srv/metadata.py @@ -4,11 +4,15 @@ from __future__ import (unicode_literals, division, absolute_import, print_function) +from copy import copy +from collections import namedtuple from datetime import datetime, time from calibre.db.categories import Tag from calibre.utils.date import isoformat, UNDEFINED_DATE, local_tz -from calibre.utils.icu import sort_key, collation_order +from calibre.utils.config import tweaks +from calibre.utils.formatter import EvalFormatter +from calibre.utils.icu import collation_order IGNORED_FIELDS = frozenset('cover ondevice path marked id au_map'.split()) @@ -48,26 +52,353 @@ _include_fields = frozenset(Tag.__slots__) - frozenset({ 'state', 'is_editable', 'is_searchable', 'original_name', 'use_sort_as_name', 'is_hierarchical' }) -def category_item_as_json(x): - sname = x.sort or x.name - ans = {'sort_key': tuple(bytearray(sort_key(sname))), 'first_letter_sort_key': collation_order(icu_upper(sname or ' '))} +def category_as_json(items, category, display_name, count, tooltip=None, parent=None, + is_editable=True, is_gst=False, is_hierarchical=False, is_searchable=True, + is_user_category=False): + ans = {'category': category, 'name': display_name, 'is_category':True, 'count':count} + if tooltip: + ans['tooltip'] = tooltip + if parent: + ans['parent'] = parent + if is_editable: + ans['is_editable'] = True + if is_gst: + ans['is_gst'] = True + if is_hierarchical: + ans['is_hierarchical'] = is_hierarchical + if is_searchable: + ans['is_searchable'] = True + if is_user_category: + ans['is_user_category'] = True + item_id = 'c' + str(len(items)) + items[item_id] = ans + return item_id + +def category_item_as_json(x, clear_rating=False): + ans = {} for k in _include_fields: val = getattr(x, k) - if isinstance(val, set): - val = tuple(val) if val is not None: ans[k] = val if x.use_sort_as_name: - ans['original_name'], ans['name'] = ans['name'], ans['sort'] - del ans['sort'] - elif x.sort == x.name: - del ans['sort'] + ans['name'] = ans['sort'] + if x.original_name != ans['name']: + ans['original_name'] = x.original_name + ans.pop('sort', None) + if clear_rating: + del ans['avg_rating'] return ans -def categories_as_json(categories): - ans = [] - f = category_item_as_json - for category in sorted(categories, key=sort_key): - items = tuple(f(x) for x in categories[category]) - ans.append((category, items)) - return ans +CategoriesSettings = namedtuple( + 'CategoriesSettings', 'dont_collapse collapse_model collapse_at sort_by template using_hierarchy grouped_search_terms') + +def categories_settings(query, db): + dont_collapse = frozenset(query.get('dont_collapse', '').split(',')) + partition_method = query.get('partition_method', 'first letter') + if partition_method not in {'first letter', 'disable', 'partition'}: + partition_method = 'first letter' + try: + collapse_at = max(0, int(query.get('collapse_at', 25))) + except Exception: + collapse_at = 25 + sort_by = query.get('sort_tags_by', 'name') + if sort_by not in {'name', 'popularity', 'rating'}: + sort_by = 'name' + collapse_model = partition_method if collapse_at else 'disable' + template = None + if collapse_model != 'disable': + if sort_by != 'name': + collapse_model = 'partition' + template = tweaks['categories_collapsed_%s_template' % sort_by] + using_hierarchy = frozenset(db.pref('categories_using_hierarchy', [])) + return CategoriesSettings( + dont_collapse, collapse_model, collapse_at, sort_by, template, using_hierarchy, db.pref('grouped_search_terms', {})) + +def create_toplevel_tree(category_data, items, field_metadata, opts): + # Create the basic tree, containing all top level categories , user + # categories and grouped search terms + last_category_node, category_node_map, root = None, {}, {'id':None, 'children':[]} + node_id_map = {} + category_nodes = [] + order = tweaks['tag_browser_category_order'] + defvalue = order.get('*', 100) + categories = [category for category in field_metadata if category in category_data] + scats = sorted(categories, key=lambda x: order.get(x, defvalue)) + + for category in scats: + is_user_category = category.startswith('@') + is_gst, tooltip = (is_user_category and category[1:] in opts.grouped_search_terms), '' + cdata = category_data[category] + if is_gst: + tooltip = _('The grouped search term name is "{0}"').format(category) + elif category != 'news': + cust_desc = '' + fm = field_metadata[category] + if fm['is_custom']: + cust_desc = fm['display'].get('description', '') + if cust_desc: + cust_desc = '\n' + _('Description:') + ' ' + cust_desc + tooltip = _('The lookup/search name is "{0}"{1}').format(category, cust_desc) + + if is_user_category: + path_parts = category.split('.') + path = '' + last_category_node = None + current_root = root + for i, p in enumerate(path_parts): + path += p + if path not in category_node_map: + last_category_node = category_as_json( + items, path, (p[1:] if i == 0 else p), len(cdata), + parent=last_category_node, tooltip=tooltip, + is_gst=is_gst, is_editable=((not is_gst) and (i == (len(path_parts)-1))), + is_hierarchical=False if is_gst else 5, is_user_category=True + ) + node_id_map[last_category_node] = category_node_map[path] = node = {'id':last_category_node, 'children':[]} + category_nodes.append(last_category_node) + current_root['children'].append(node) + current_root = node + else: + current_root = category_node_map[path] + last_category_node = current_root['id'] + path += '.' + else: + last_category_node = category_as_json( + items, category, field_metadata[category]['name'], len(cdata), + tooltip=tooltip + ) + category_node_map[category] = node_id_map[last_category_node] = node = {'id':last_category_node, 'children':[]} + root['children'].append(node) + category_nodes.append(last_category_node) + + return root, node_id_map, category_nodes + +def build_first_letter_list(category_items): + # Build a list of 'equal' first letters by noticing changes + # in ICU's 'ordinal' for the first letter. In this case, the + # first letter can actually be more than one letter long. + cl_list = [None] * len(category_items) + last_ordnum = 0 + last_c = ' ' + for idx, tag in enumerate(category_items): + if not tag.sort: + c = ' ' + else: + c = icu_upper(tag.sort) + ordnum, ordlen = collation_order(c) + if last_ordnum != ordnum: + last_c = c[0:ordlen] + last_ordnum = ordnum + cl_list[idx] = last_c + return cl_list + +categories_with_ratings = {'authors', 'series', 'publisher', 'tags'} + +def get_name_components(name): + components = filter(None, [t.strip() for t in name.split('.')]) + if not components or '.'.join(components) != name: + components = [name] + return components + +def collapse_partition(items, category_node, idx, tag, opts, top_level_component, + cat_len, category_is_hierarchical, category_items, eval_formatter, is_gst, + last_idx, node_parent): + # Only partition at the top level. This means that we must not do a break + # until the outermost component changes. + if idx >= last_idx + opts.collapse_at and not tag.original_name.startswith(top_level_component+'.'): + last = idx + opts.collapse_at - 1 if cat_len > idx + opts.collapse_at else cat_len - 1 + if category_is_hierarchical: + ct = copy(category_items[last]) + components = get_name_components(ct.original_name) + ct.sort = ct.name = components[0] + # Do the first node after the last node so that the components + # array contains the right values to be used later + ct2 = copy(tag) + components = get_name_components(ct2.original_name) + ct2.sort = ct2.name = components[0] + format_data = {'last': ct, 'first':ct2} + else: + format_data = {'first': tag, 'last': category_items[last]} + + name = eval_formatter.safe_format(opts.template, format_data, '##TAG_VIEW##', None) + if not name.startswith('##TAG_VIEW##'): + # Formatter succeeded + node_id = category_as_json( + items, items[category_node['id']].category, name, 0, + parent=category_node['id'], is_editable=False, is_gst=is_gst, + is_hierarchical=category_is_hierarchical, is_searchable=False) + node_parent = {'id':node_id, 'children':[]} + category_node['children'].append(node_parent) + last_idx = idx # remember where we last partitioned + return last_idx, node_parent + +def collapse_first_letter(items, category_node, cl_list, idx, is_gst, category_is_hierarchical, collapse_letter, node_parent): + cl = cl_list[idx] + if cl != collapse_letter: + collapse_letter = cl + node_id = category_as_json( + items, items[category_node['id']]['category'], collapse_letter, 0, + parent=category_node['id'], is_editable=False, is_gst=is_gst, + is_hierarchical=category_is_hierarchical) + node_parent = {'id':node_id, 'children':[]} + category_node['children'].append(node_parent) + return collapse_letter, node_parent + +def process_category_node(category_node, items, category_data, eval_formatter, field_metadata, opts, tag_map, hierarchical_tags, node_to_tag_map): + category = items[category_node['id']]['category'] + category_items = category_data[category] + cat_len = len(category_items) + if cat_len <= 0: + return + collapse_letter = None + is_gst = items[category_node['id']].get('is_gst', False) + collapse_model = 'disable' if category in opts.dont_collapse else opts.collapse_model + fm = field_metadata[category] + category_child_map = {} + is_user_category = fm['kind'] == 'user' and not is_gst + top_level_component = 'z' + category_items[0].original_name + last_idx = -opts.collapse_at + category_is_hierarchical = ( + category in opts.using_hierarchy and opts.sort_by == 'name' and + category not in {'authors', 'publisher', 'news', 'formats', 'rating'} + ) + clear_rating = category not in categories_with_ratings and not fm['is_custom'] and not fm['kind'] == 'user' + collapsible = collapse_model != 'disable' and cat_len > opts.collapse_at + partitioned = collapse_model == 'partition' + cl_list = build_first_letter_list(category_items) if collapsible and collapse_model == 'first letter' else () + node_parent = category_node + + def create_tag_node(tag, parent): + # User categories contain references to items in other categories, so + # reflect that in the node structure as well. + node_data = tag_map.get(id(tag), None) + if node_data is None: + node_id = 'n%d' % len(tag_map) + node_data = items[node_id] = category_item_as_json(tag, clear_rating=clear_rating) + tag_map[id(tag)] = (node_id, node_data) + node_to_tag_map[node_id] = tag + else: + node_id, node_data = node_data + node = {'id':node_id, 'children':[]} + parent['children'].append(node) + return node, node_data + + for idx, tag in enumerate(category_items): + + if collapsible: + if partitioned: + last_idx, node_parent = collapse_partition( + items, category_node, idx, tag, opts, top_level_component, + cat_len, category_is_hierarchical, category_items, + eval_formatter, is_gst, last_idx, node_parent) + else: # by 'first letter' + collapse_letter, node_parent = collapse_first_letter( + items, category_node, cl_list, idx, is_gst, category_is_hierarchical, collapse_letter, node_parent) + else: + node_parent = category_node + + tag_is_hierarchical = id(tag) in hierarchical_tags + components = get_name_components(tag.original_name) if category_is_hierarchical or tag_is_hierarchical else (tag.original_name,) + + if not tag_is_hierarchical and ( + is_user_category or not category_is_hierarchical or len(components) == 1 or + (fm['is_custom'] and fm['display'].get('is_names', False)) + ): # A non-hierarchical leaf item in a non-hierarchical category + node, item = create_tag_node(tag, node_parent) + category_child_map[item['name'], item['category']] = node + else: + orig_node_parent = node_parent + for i, component in enumerate(components): + if i == 0: + child_map = category_child_map + else: + child_map = {} + for sibling in node_parent['children']: + item = items[sibling['id']] + if not item.get('is_category', False): + child_map[item['name'], item['category']] = sibling + cm_key = component, tag.category + if cm_key in child_map: + node_parent = child_map[cm_key] + items[node_parent['id']]['is_hierarchical'] = 3 if tag.category == 'search' else 5 + hierarchical_tags.add(id(node_to_tag_map[node_parent['id']])) + else: + if i < len(components) - 1: # Non-leaf node + t = copy(tag) + t.original_name, t.count = '.'.join(components[:i+1]), 0 + t.is_editable, t.is_searchable = False, category == 'search' + node_parent, item = create_tag_node(t, node_parent) + hierarchical_tags.add(id(t)) + else: + node_parent, item = create_tag_node(tag, node_parent) + if not is_user_category: + item['original_name'] = tag.name + item['name'] = component + item['is_hierarchical'] = 3 if tag.category == 'search' else 5 + hierarchical_tags.add(id(tag)) + child_map[cm_key] = node_parent + items[node_parent['id']]['id_set'] |= tag.id_set + node_parent = orig_node_parent + + +def fillout_tree(root, items, node_id_map, category_nodes, category_data, field_metadata, opts): + eval_formatter = EvalFormatter() + tag_map, hierarchical_tags, node_to_tag_map = {}, set(), {} + first, later = [], [] + # User categories have to be processed after normal categories as they can + # reference hierarchical nodes that were created only during processing of + # normal categories + for category_node_id in category_nodes: + cnode = items[category_node_id] + coll = later if cnode.get('is_user_category', False) else first + coll.append(node_id_map[category_node_id]) + + for coll in (first, later): + for cnode in coll: + process_category_node(cnode, items, category_data, eval_formatter, field_metadata, opts, tag_map, hierarchical_tags, node_to_tag_map) + + # Do not store id_set in the tag items as it is a lot of data, with not + # much use. Instead only update the counts based on id_set + for item_id, item in tag_map.itervalues(): + id_len = len(item.pop('id_set', ())) + if id_len: + item['count'] = id_len + +def render_categories(field_metadata, opts, category_data): + items = {} + root, node_id_map, category_nodes = create_toplevel_tree(category_data, items, field_metadata, opts) + fillout_tree(root, items, node_id_map, category_nodes, category_data, field_metadata, opts) + return {'root':root, 'item_map': items} + +def categories_as_json(ctx, rd, db): + opts = categories_settings(rd.query, db) + category_data = ctx.get_categories(rd, db, sort=opts.sort_by, first_letter_sort=opts.collapse_model == 'first letter') + render_categories(db.field_metadata, opts, category_data) + +def dump_categories_tree(data): + root, items = data['root'], data['item_map'] + ans, indent = [], ' ' + def dump_node(node, level=0): + item = items[node['id']] + on = item.get('original_name', '') + if on: + on += ' ' + try: + ans.append(indent*level + item['name'] + ' [%scount=%s]' % (on, item['count'])) + except KeyError: + print(item) + raise + for child in node['children']: + dump_node(child, level+1) + if level == 0: + ans.append('') + [dump_node(c) for c in root['children']] + return '\n'.join(ans) + +def test_tag_browser(library_path=None): + from calibre.library import db + db = db(library_path).new_api + opts = categories_settings({}, db) + category_data = db.get_categories(sort=opts.sort_by, first_letter_sort=opts.collapse_model == 'first letter') + data = render_categories(db.field_metadata, opts, category_data) + print(dump_categories_tree(data)) diff --git a/src/pyj/session.pyj b/src/pyj/session.pyj index a1926af0d6..d1d41a456a 100644 --- a/src/pyj/session.pyj +++ b/src/pyj/session.pyj @@ -4,8 +4,15 @@ from ajax import ajax_send defaults = { + # Book list settings 'view_mode': 'cover_grid', - 'sort': 'timestamp.desc', + 'sort': 'timestamp.desc', # comma separated list of items of the form: field.order + + # Tag Browser settings + 'partition_method': 'first letter', # other choices: 'disable', 'partition' + 'collapse_at': 25, # number of items at which sub-groups are created, 0 to disable + 'dont_collapse': '', # comma separated list of category names + 'sort_tags_by': 'name', # other choices: popularity, rating } def storage_available(which): diff --git a/src/pyj/srv.pyj b/src/pyj/srv.pyj index d7031c60c9..ac838eba3e 100644 --- a/src/pyj/srv.pyj +++ b/src/pyj/srv.pyj @@ -32,7 +32,9 @@ def on_library_load_progress(loaded, total): def load_book_list(): temp = UserSessionData(None, {}) # So that settings for anonymous users are preserved - query = {'library_id':temp.get('library_id'), 'sort':temp.get('sort')} + query = {k:temp.get(k) for k in str.split( + 'library_id sort partition_method collapse_at dont_collapse sort_tags_by' + )} ajax('interface-data/init', on_library_loaded, on_library_load_progress, query=query).send() def on_load():