From 8402205d11827cbda6f67d987e91e4d9a8e38e25 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 19 Jan 2013 11:35:14 +0530 Subject: [PATCH 01/11] Fix datetime tests and read many-many fields in id order --- src/calibre/db/tables.py | 2 +- src/calibre/db/tests/reading.py | 38 ++++++++++++++++----------------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/calibre/db/tables.py b/src/calibre/db/tables.py index c5d7ee216c..58768c9ff5 100644 --- a/src/calibre/db/tables.py +++ b/src/calibre/db/tables.py @@ -151,7 +151,7 @@ class ManyToManyTable(ManyToOneTable): def read_maps(self, db): for row in db.conn.execute( - 'SELECT book, {0} FROM {1}'.format( + 'SELECT book, {0} FROM {1} ORDER BY id'.format( self.metadata['link_column'], self.link_table)): if row[1] not in self.col_book_map: self.col_book_map[row[1]] = [] diff --git a/src/calibre/db/tests/reading.py b/src/calibre/db/tests/reading.py index d1ff81440c..b722d30793 100644 --- a/src/calibre/db/tests/reading.py +++ b/src/calibre/db/tests/reading.py @@ -10,7 +10,7 @@ __docformat__ = 'restructuredtext en' import shutil, unittest, tempfile, datetime from cStringIO import StringIO -from calibre.utils.date import local_tz +from calibre.utils.date import utc_tz from calibre.db.tests.base import BaseTest class ReadingTest(BaseTest): @@ -37,12 +37,12 @@ class ReadingTest(BaseTest): 'tags': (), 'formats':(), 'identifiers': {}, - 'timestamp': datetime.datetime(2011, 9, 7, 13, 54, 41, - tzinfo=local_tz), - 'pubdate': datetime.datetime(2011, 9, 7, 13, 54, 41, - tzinfo=local_tz), - 'last_modified': datetime.datetime(2011, 9, 7, 13, 54, 41, - tzinfo=local_tz), + 'timestamp': datetime.datetime(2011, 9, 7, 19, 54, 41, + tzinfo=utc_tz), + 'pubdate': datetime.datetime(2011, 9, 7, 19, 54, 41, + tzinfo=utc_tz), + 'last_modified': datetime.datetime(2011, 9, 7, 19, 54, 41, + tzinfo=utc_tz), 'publisher': None, 'languages': (), 'comments': None, @@ -69,17 +69,17 @@ class ReadingTest(BaseTest): 'formats': (), 'rating': 4.0, 'identifiers': {'test':'one'}, - 'timestamp': datetime.datetime(2011, 9, 5, 15, 6, - tzinfo=local_tz), - 'pubdate': datetime.datetime(2011, 9, 5, 15, 6, - tzinfo=local_tz), + 'timestamp': datetime.datetime(2011, 9, 5, 21, 6, + tzinfo=utc_tz), + 'pubdate': datetime.datetime(2011, 9, 5, 21, 6, + tzinfo=utc_tz), 'publisher': 'Publisher One', 'languages': ('eng',), 'comments': '

Comments One

', '#enum':'One', '#authors':('Custom One', 'Custom Two'), - '#date':datetime.datetime(2011, 9, 5, 0, 0, - tzinfo=local_tz), + '#date':datetime.datetime(2011, 9, 5, 6, 0, + tzinfo=utc_tz), '#rating':2.0, '#series':'My Series One', '#series_index': 1.0, @@ -98,17 +98,17 @@ class ReadingTest(BaseTest): 'tags': ('Tag One',), 'formats':(), 'identifiers': {'test':'two'}, - 'timestamp': datetime.datetime(2011, 9, 6, 0, 0, - tzinfo=local_tz), - 'pubdate': datetime.datetime(2011, 8, 5, 0, 0, - tzinfo=local_tz), + 'timestamp': datetime.datetime(2011, 9, 6, 6, 0, + tzinfo=utc_tz), + 'pubdate': datetime.datetime(2011, 8, 5, 6, 0, + tzinfo=utc_tz), 'publisher': 'Publisher Two', 'languages': ('deu',), 'comments': '

Comments Two

', '#enum':'Two', '#authors':('My Author Two',), - '#date':datetime.datetime(2011, 9, 1, 0, 0, - tzinfo=local_tz), + '#date':datetime.datetime(2011, 9, 1, 6, 0, + tzinfo=utc_tz), '#rating':4.0, '#series':'My Series Two', '#series_index': 3.0, From 5b08c1ed60e754ddda0beb4ca529a0d8c09d97f5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 19 Jan 2013 11:45:23 +0530 Subject: [PATCH 02/11] Fix get_metadata() test --- src/calibre/db/tests/base.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/calibre/db/tests/base.py b/src/calibre/db/tests/base.py index 3264465050..8e72721c4e 100644 --- a/src/calibre/db/tests/base.py +++ b/src/calibre/db/tests/base.py @@ -7,8 +7,8 @@ __license__ = 'GPL v3' __copyright__ = '2011, Kovid Goyal ' __docformat__ = 'restructuredtext en' - import unittest, os, shutil +from future_builtins import map class BaseTest(unittest.TestCase): @@ -39,7 +39,10 @@ class BaseTest(unittest.TestCase): 'ondevice_col', 'last_modified'}.union(allfk1) for attr in all_keys: if attr == 'user_metadata': continue + if attr == 'format_metadata': continue # TODO: Not implemented yet attr1, attr2 = getattr(mi1, attr), getattr(mi2, attr) + if attr == 'formats': + attr1, attr2 = map(lambda x:tuple(x) if x else (), (attr1, attr2)) self.assertEqual(attr1, attr2, '%s not the same: %r != %r'%(attr, attr1, attr2)) if attr.startswith('#'): From 3e4d847eeefe9d921ab84146d77fd145e6343234 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 19 Jan 2013 21:28:50 +0530 Subject: [PATCH 03/11] Fix Barrons login form parsing --- recipes/barrons.recipe | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/recipes/barrons.recipe b/recipes/barrons.recipe index 9d79aed728..41ed7e26ec 100644 --- a/recipes/barrons.recipe +++ b/recipes/barrons.recipe @@ -64,8 +64,8 @@ class Barrons(BasicNewsRecipe): br = BasicNewsRecipe.get_browser() if self.username is not None and self.password is not None: br.open('http://commerce.barrons.com/auth/login') - br.select_form(name='login_form') - br['user'] = self.username + br.select_form(nr=0) + br['username'] = self.username br['password'] = self.password br.submit() return br From 419f3b63947815ba32b3ab2d770ec046694bf8e1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 19 Jan 2013 22:37:27 +0530 Subject: [PATCH 04/11] Add language dependent sorting for series like columns --- src/calibre/db/cache.py | 22 +++++++++---- src/calibre/db/fields.py | 33 +++++++++++++++---- src/calibre/db/tests/metadata.db | Bin 230400 -> 230400 bytes src/calibre/db/tests/reading.py | 53 ++++++++++++++++--------------- 4 files changed, 68 insertions(+), 40 deletions(-) diff --git a/src/calibre/db/cache.py b/src/calibre/db/cache.py index 10fe0bb014..a631f9ea46 100644 --- a/src/calibre/db/cache.py +++ b/src/calibre/db/cache.py @@ -269,11 +269,11 @@ class Cache(object): return () @read_api - def all_book_ids(self): + def all_book_ids(self, type=frozenset): ''' Frozen set of all known book ids. ''' - return frozenset(self.fields['uuid']) + return type(self.fields['uuid']) @read_api def all_field_ids(self, name): @@ -316,6 +316,10 @@ class Cache(object): self.format_metadata_cache[book_id][fmt] = ans return ans + @read_api + def pref(self, name): + return self.backend.prefs[name] + @api def get_metadata(self, book_id, get_cover=False, get_user_categories=True, cover_as_data=False): @@ -378,17 +382,21 @@ class Cache(object): all_book_ids = frozenset(self._all_book_ids() if ids_to_sort is None else ids_to_sort) get_metadata = partial(self._get_metadata, get_user_categories=False) + def get_lang(book_id): + ans = self._field_for('languages', book_id) + return ans[0] if ans else None fm = {'title':'sort', 'authors':'author_sort'} def sort_key(field): 'Handle series type fields' - ans = self.fields[fm.get(field, field)].sort_keys_for_books(get_metadata, - all_book_ids) idx = field + '_index' - if idx in self.fields: - idx_ans = self.fields[idx].sort_keys_for_books(get_metadata, - all_book_ids) + is_series = idx in self.fields + ans = self.fields[fm.get(field, field)].sort_keys_for_books( + get_metadata, get_lang, all_book_ids,) + if is_series: + idx_ans = self.fields[idx].sort_keys_for_books( + get_metadata, get_lang, all_book_ids) ans = {k:(v, idx_ans[k]) for k, v in ans.iteritems()} return ans diff --git a/src/calibre/db/fields.py b/src/calibre/db/fields.py index e154900031..3808052549 100644 --- a/src/calibre/db/fields.py +++ b/src/calibre/db/fields.py @@ -11,6 +11,8 @@ __docformat__ = 'restructuredtext en' from threading import Lock from calibre.db.tables import ONE_ONE, MANY_ONE, MANY_MANY +from calibre.ebooks.metadata import title_sort +from calibre.utils.config_base import tweaks from calibre.utils.icu import sort_key from calibre.utils.date import UNDEFINED_DATE from calibre.utils.localization import calibre_langcode_to_name @@ -72,7 +74,7 @@ class Field(object): ''' return iter(()) - def sort_keys_for_books(self, get_metadata, all_book_ids): + def sort_keys_for_books(self, get_metadata, get_lang, all_book_ids): ''' Return a mapping of book_id -> sort_key. The sort key is suitable for use in sorting the list of all books by this field, via the python cmp @@ -96,7 +98,7 @@ class OneToOneField(Field): def __iter__(self): return self.table.book_col_map.iterkeys() - def sort_keys_for_books(self, get_metadata, all_book_ids): + def sort_keys_for_books(self, get_metadata, get_lang, all_book_ids): return {id_ : self._sort_key(self.table.book_col_map.get(id_, self._default_sort_key)) for id_ in all_book_ids} @@ -133,7 +135,7 @@ class CompositeField(OneToOneField): ans = mi.get('#'+self.metadata['label']) return ans - def sort_keys_for_books(self, get_metadata, all_book_ids): + def sort_keys_for_books(self, get_metadata, get_lang, all_book_ids): return {id_ : sort_key(self.get_value_with_cache(id_, get_metadata)) for id_ in all_book_ids} @@ -170,7 +172,7 @@ class OnDeviceField(OneToOneField): def __iter__(self): return iter(()) - def sort_keys_for_books(self, get_metadata, all_book_ids): + def sort_keys_for_books(self, get_metadata, get_lang, all_book_ids): return {id_ : self.for_book(id_) for id_ in all_book_ids} @@ -196,7 +198,7 @@ class ManyToOneField(Field): def __iter__(self): return self.table.id_map.iterkeys() - def sort_keys_for_books(self, get_metadata, all_book_ids): + def sort_keys_for_books(self, get_metadata, get_lang, all_book_ids): ans = {id_ : self.table.book_col_map.get(id_, None) for id_ in all_book_ids} sk_map = {cid : (self._default_sort_key if cid is None else @@ -227,7 +229,7 @@ class ManyToManyField(Field): def __iter__(self): return self.table.id_map.iterkeys() - def sort_keys_for_books(self, get_metadata, all_book_ids): + def sort_keys_for_books(self, get_metadata, get_lang, all_book_ids): ans = {id_ : self.table.book_col_map.get(id_, ()) for id_ in all_book_ids} all_cids = set() @@ -248,7 +250,7 @@ class IdentifiersField(ManyToManyField): ids = default_value return ids - def sort_keys_for_books(self, get_metadata, all_book_ids): + def sort_keys_for_books(self, get_metadata, get_lang, all_book_ids): 'Sort by identifier keys' ans = {id_ : self.table.book_col_map.get(id_, ()) for id_ in all_book_ids} @@ -274,6 +276,21 @@ class FormatsField(ManyToManyField): def format_fname(self, book_id, fmt): return self.table.fname_map[book_id][fmt.upper()] +class SeriesField(ManyToOneField): + + def sort_key_for_series(self, book_id, get_lang, series_sort_order): + sid = self.table.book_col_map.get(book_id, None) + if sid is None: + return self._default_sort_key + return self._sort_key(title_sort(self.table.id_map[sid], + order=series_sort_order, + lang=get_lang(book_id))) + + def sort_keys_for_books(self, get_metadata, get_lang, all_book_ids): + sso = tweaks['title_series_sorting'] + return {book_id:self.sort_key_for_series(book_id, get_lang, sso) for book_id + in all_book_ids} + def create_field(name, table): cls = { ONE_ONE : OneToOneField, @@ -290,5 +307,7 @@ def create_field(name, table): cls = IdentifiersField elif table.metadata['datatype'] == 'composite': cls = CompositeField + elif table.metadata['datatype'] == 'series': + cls = SeriesField return cls(name, table) diff --git a/src/calibre/db/tests/metadata.db b/src/calibre/db/tests/metadata.db index 63a096e2f44d9ecce9f32da2f408d56ebe6949ce..da54c61ad5c83bde8917933e2d73f2e6ae370273 100644 GIT binary patch delta 991 zcmaKrTWC{B9L8tPcbdjnn?zHr_Oi#;P+N_?rlK253%XjWHnv^pO=8jW3085} zq!qmElM>-CvJVvm@nzQ%aUN8#DEK0*^}6e=X?+k8S`>?*4|Ps-7eVyEcjjZh;Wu-> z;lz(-$B$+o&zsZ17#sgAeqzarj1K0SC>ail?D^HMib_}IO0BYXRgHU9m3w(@ZPiM* zJ5gmF{#KTJZ?&GqTf9x$D_$Wdi*t!Kam6ABrZOg@_vy%-#T6q`k~5dNnmb2&Gq*7Q zPMN2aDr=QrltapCZb{@`dt(_Ga0X4Rba1eohCCRP7&ex;Jd&T79I56$dKyD<-h98x z%I#sjKjMq-^aXTZWPi9v4|Z+I36R%=9P{s8JsE7>$(T5J5XBsoH5Cclh=W87i{~1r zGsDC15-m6n9iF7^Mj2c=DCb`x{c};uw}tfHps!Dl`h$L{MOep4f(tTw(sn#1tuGgD zP6Hk|5+gTDcp&-mW`V;r!7zbaNxS(v5`T})E6az@9fyQhejLVq~<0xlGcSE#RV@~!SB-Q2CSh!Ho+}^Yr|pid*XE%R!;Q?kilzc z*b7aR?Z93x{^~><=l4at8*$KwI+W1TcKmGY{y@aH8k4p`Sm|057LuzQtw!>Y&g_Dg zS@h}iXtEB~O0_$UHAT|h(B^g&n$p%s;dTs&b0Ksnv@`$<9k0g%D%pw-gY^$B3cx{s zHcS4Chzz`qFpI2SY{Y-8zB+VX3PM$BGJs>^r(KB1i(u?AjkiFf!cLqJ;V7EYlTT>W zj|y>dKQ<^-*e|JtQFv)6giJBjE=w@3>FF}fqG?5RCWNy8>njpJ3}T$qRWBSOcOQDW zvB@)vt;Q44U56n~<;~bmGn+6=MB}jWOZ9+i{0`|SCEqE^`W1Nv52U>>bx-Q9)E%i& Usg%@h+TDVDN<}55FOCF00>i)|-v9sr delta 1190 zcmbVLU2GIp6ux)9*>%CPZK0J)f0(5PTWE3r+GPu-)&6RU{f|;w+JzmqtJ@!Ux3#u3 z6cd4HjF2H5OiYBtgb+h?vyuBC5426h2NQu(ph0QHgoN-S#J+&B-f0?$FUEK?U*_EN zopa{gIo};WVjn+ZKbpO&kumnG#L_xe;IlNc;yddTL1AC(Dt7vdoj%Q3TIMb-EAehA z_4!;Lzu`_zJmpHtJl-<5XUkTX)8p|bAEl;#iAVS&neM%h%oMK3x>k;rW_gvdCzLHk zS)!0!{En+x7D;nFHlyb8SWy*Cspj7^W@KG_m=oJjrulqD+Dc8X4e@ZV9*YNh2TZTu zxILBU2*-N#sNDLW1VsXUp}{~%H&dNO8YM!=pza_txu0f(DDtf1DsyG->F=wm2FNRjOo#?z0wuxkB0Tw=$Nr)(!<-0+R1#|$Ie|AR(qn**nIJ{ zQ91R2;`bUqOnFHSqAgpV;c|z0;Ahaxa1YHm3muaI(_Yr!`0 zO&wamXXxcIRMF=(C=p{ja0vW5P3*yo^wpbaqc3_97Wbo49<;s!8lBsT6mmxJq4>HP zt(@PWL?sI8+%B}xVjXNW-HO#BuNC{`3>dpf>E+0$GhNt8pH@k0ya8V7eg_V*|1H>X zJ_uE{4ofY%y_BefCj3np;^Ku)bW5!~x`4w&m`|^JdI1G# zC_@allp#tRN4DdJ^jw#ik+>!?Epb)iio~46WqS8@`H#8sr>&oF_IHoXZxsq tx4b0B$NqUqwk}sPJDsD`JqXca4OWQXN07#;p&C{?wHH=#^dJV5zX2&wWI6x< diff --git a/src/calibre/db/tests/reading.py b/src/calibre/db/tests/reading.py index b722d30793..d77d3ac6eb 100644 --- a/src/calibre/db/tests/reading.py +++ b/src/calibre/db/tests/reading.py @@ -63,7 +63,7 @@ class ReadingTest(BaseTest): 'sort': 'One', 'authors': ('Author One',), 'author_sort': 'One, Author', - 'series' : 'Series One', + 'series' : 'A Series One', 'series_index': 1.0, 'tags':('Tag Two', 'Tag One'), 'formats': (), @@ -92,7 +92,7 @@ class ReadingTest(BaseTest): 'sort': 'Title Two', 'authors': ('Author Two', 'Author One'), 'author_sort': 'Two, Author & One, Author', - 'series' : 'Series One', + 'series' : 'A Series One', 'series_index': 2.0, 'rating': 6.0, 'tags': ('Tag One',), @@ -130,30 +130,31 @@ class ReadingTest(BaseTest): 'Test sorting' cache = self.init_cache(self.library_path) for field, order in { - 'title' : [2, 1, 3], - 'authors': [2, 1, 3], - 'series' : [3, 2, 1], - 'tags' : [3, 1, 2], - 'rating' : [3, 2, 1], - # 'identifiers': [3, 2, 1], There is no stable sort since 1 and - # 2 have the same identifier keys - # TODO: Add an empty book to the db and ensure that empty - # fields sort the same as they do in db2 - 'timestamp': [2, 1, 3], - 'pubdate' : [1, 2, 3], - 'publisher': [3, 2, 1], - 'last_modified': [2, 1, 3], - 'languages': [3, 2, 1], - 'comments': [3, 2, 1], - '#enum' : [3, 2, 1], - '#authors' : [3, 2, 1], - '#date': [3, 1, 2], - '#rating':[3, 2, 1], - '#series':[3, 2, 1], - '#tags':[3, 2, 1], - '#yesno':[3, 1, 2], - '#comments':[3, 2, 1], - }.iteritems(): + 'title' : [2, 1, 3], + 'authors': [2, 1, 3], + 'series' : [3, 1, 2], + 'tags' : [3, 1, 2], + 'rating' : [3, 2, 1], + # 'identifiers': [3, 2, 1], There is no stable sort since 1 and + # 2 have the same identifier keys + # 'last_modified': [3, 2, 1], There is no stable sort as two + # records have the exact same value + 'timestamp': [2, 1, 3], + 'pubdate' : [1, 2, 3], + 'publisher': [3, 2, 1], + 'languages': [3, 2, 1], + 'comments': [3, 2, 1], + '#enum' : [3, 2, 1], + '#authors' : [3, 2, 1], + '#date': [3, 1, 2], + '#rating':[3, 2, 1], + '#series':[3, 2, 1], + '#tags':[3, 2, 1], + '#yesno':[3, 1, 2], + '#comments':[3, 2, 1], + # TODO: Add an empty book to the db and ensure that empty + # fields sort the same as they do in db2 + }.iteritems(): x = list(reversed(order)) self.assertEqual(order, cache.multisort([(field, True)], ids_to_sort=x), From 2d72a307593ea937bcd115784363cae0e89df560 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 20 Jan 2013 13:48:34 +0530 Subject: [PATCH 05/11] ... --- recipes/conowego_pl.recipe | 0 recipes/{ => icons}/spiders_web_pl.png | Bin recipes/linux_journal.recipe | 0 3 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 recipes/conowego_pl.recipe rename recipes/{ => icons}/spiders_web_pl.png (100%) mode change 100755 => 100644 recipes/linux_journal.recipe diff --git a/recipes/conowego_pl.recipe b/recipes/conowego_pl.recipe old mode 100755 new mode 100644 diff --git a/recipes/spiders_web_pl.png b/recipes/icons/spiders_web_pl.png similarity index 100% rename from recipes/spiders_web_pl.png rename to recipes/icons/spiders_web_pl.png diff --git a/recipes/linux_journal.recipe b/recipes/linux_journal.recipe old mode 100755 new mode 100644 From aff8f66fa1c8d99af9d7ae476641f047e022d7f0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 20 Jan 2013 14:31:16 +0530 Subject: [PATCH 06/11] Fix Michelle Malkin --- recipes/michellemalkin.recipe | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/recipes/michellemalkin.recipe b/recipes/michellemalkin.recipe index e933ed8f1c..0b1f0ebdaa 100644 --- a/recipes/michellemalkin.recipe +++ b/recipes/michellemalkin.recipe @@ -18,6 +18,8 @@ class MichelleMalkin(BasicNewsRecipe): remove_javascript = True no_stylesheets = True + auto_cleanup = True + use_embedded_content = False conversion_options = { @@ -29,16 +31,16 @@ class MichelleMalkin(BasicNewsRecipe): } - keep_only_tags = [ - dict(name='div', attrs={'class':'article'}) - ] + #keep_only_tags = [ + #dict(name='div', attrs={'class':'article'}) + #] - remove_tags = [ - dict(name=['iframe', 'embed', 'object']), - dict(name='div', attrs={'id':['comments', 'commentForm']}), - dict(name='div', attrs={'class':['postCategories', 'comments', 'blogInfo', 'postInfo']}) + #remove_tags = [ + #dict(name=['iframe', 'embed', 'object']), + #dict(name='div', attrs={'id':['comments', 'commentForm']}), + #dict(name='div', attrs={'class':['postCategories', 'comments', 'blogInfo', 'postInfo']}) - ] + #] feeds = [(u'http://feeds.feedburner.com/michellemalkin/posts')] From 63b164241a5846ca40aa20f361c20a1b29333068 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 20 Jan 2013 14:34:26 +0530 Subject: [PATCH 07/11] Start work on implementing search in the new backend. Searching for date columns working. --- src/calibre/db/cache.py | 6 + src/calibre/db/fields.py | 46 ++++++ src/calibre/db/search.py | 284 ++++++++++++++++++++++++++++++++ src/calibre/db/tests/reading.py | 20 +++ 4 files changed, 356 insertions(+) create mode 100644 src/calibre/db/search.py diff --git a/src/calibre/db/cache.py b/src/calibre/db/cache.py index a631f9ea46..88a2196a61 100644 --- a/src/calibre/db/cache.py +++ b/src/calibre/db/cache.py @@ -13,6 +13,7 @@ from functools import wraps, partial from calibre.db.locking import create_locks, RecordLock from calibre.db.fields import create_field +from calibre.db.search import Search from calibre.db.tables import VirtualTable from calibre.db.lazy import FormatMetadata, FormatsList from calibre.ebooks.metadata.book.base import Metadata @@ -50,6 +51,7 @@ class Cache(object): self.record_lock = RecordLock(self.read_lock) self.format_metadata_cache = defaultdict(dict) self.formatter_template_cache = {} + self._search_api = Search(self.field_metadata.get_search_terms()) # Implement locking for all simple read/write API methods # An unlocked version of the method is stored with the name starting @@ -409,6 +411,10 @@ class Cache(object): else: return sorted(all_book_ids, key=partial(SortKey, fields, sort_keys)) + @read_api + def search(self, query, restriction): + return self._search_api(self, query, restriction) + # }}} class SortKey(object): diff --git a/src/calibre/db/fields.py b/src/calibre/db/fields.py index 3808052549..43e89cdc6f 100644 --- a/src/calibre/db/fields.py +++ b/src/calibre/db/fields.py @@ -9,6 +9,7 @@ __copyright__ = '2011, Kovid Goyal ' __docformat__ = 'restructuredtext en' from threading import Lock +from collections import defaultdict from calibre.db.tables import ONE_ONE, MANY_ONE, MANY_MANY from calibre.ebooks.metadata import title_sort @@ -83,6 +84,15 @@ class Field(object): ''' raise NotImplementedError() + def iter_searchable_values(self, get_metadata, candidates, default_value=None): + ''' + Return a generator that yields items of the form (value, set of books + ids that have this value). Here, value is a searchable value. For + OneToOneField the set of books ids will contain only a single id, but for + other fields it will generally have more than one id. Returned books_ids + are restricted to the set of ids in candidates. + ''' + raise NotImplementedError() class OneToOneField(Field): @@ -102,6 +112,11 @@ class OneToOneField(Field): return {id_ : self._sort_key(self.table.book_col_map.get(id_, self._default_sort_key)) for id_ in all_book_ids} + def iter_searchable_values(self, get_metadata, candidates, default_value=None): + cbm = self.table.book_col_map + for book_id in candidates: + yield cbm.get(book_id, default_value), {book_id} + class CompositeField(OneToOneField): def __init__(self, *args, **kwargs): @@ -139,6 +154,9 @@ class CompositeField(OneToOneField): return {id_ : sort_key(self.get_value_with_cache(id_, get_metadata)) for id_ in all_book_ids} + def iter_searchable_values(self, get_metadata, candidates, default_value=None): + for book_id in candidates: + yield self.get_value_with_cache(book_id, get_metadata), {book_id} class OnDeviceField(OneToOneField): @@ -176,6 +194,10 @@ class OnDeviceField(OneToOneField): return {id_ : self.for_book(id_) for id_ in all_book_ids} + def iter_searchable_values(self, get_metadata, candidates, default_value=None): + for book_id in candidates: + yield self.for_book(book_id, default_value=default_value), {book_id} + class ManyToOneField(Field): def for_book(self, book_id, default_value=None): @@ -206,6 +228,13 @@ class ManyToOneField(Field): for cid in ans.itervalues()} return {id_ : sk_map[cid] for id_, cid in ans.iteritems()} + def iter_searchable_values(self, get_metadata, candidates, default_value=None): + cbm = self.table.col_book_map + for item_id, val in self.table.id_map.iteritems(): + book_ids = set(cbm.get(item_id, ())).intersection(candidates) + if book_ids: + yield val, book_ids + class ManyToManyField(Field): def __init__(self, *args, **kwargs): @@ -241,6 +270,12 @@ class ManyToManyField(Field): (self._default_sort_key,)) for id_, cids in ans.iteritems()} + def iter_searchable_values(self, get_metadata, candidates, default_value=None): + cbm = self.table.col_book_map + for item_id, val in self.table.id_map.iteritems(): + book_ids = set(cbm.get(item_id, ())).intersection(candidates) + if book_ids: + yield val, book_ids class IdentifiersField(ManyToManyField): @@ -276,6 +311,17 @@ class FormatsField(ManyToManyField): def format_fname(self, book_id, fmt): return self.table.fname_map[book_id][fmt.upper()] + def iter_searchable_values(self, get_metadata, candidates, default_value=None): + val_map = defaultdict(set) + cbm = self.table.book_col_map + for book_id in candidates: + vals = cbm.get(book_id, ()) + for val in vals: + val_map[val].add(book_id) + + for val, book_ids in val_map.iteritems(): + yield val, book_ids + class SeriesField(ManyToOneField): def sort_key_for_series(self, book_id, get_lang, series_sort_order): diff --git a/src/calibre/db/search.py b/src/calibre/db/search.py new file mode 100644 index 0000000000..d304deeb9a --- /dev/null +++ b/src/calibre/db/search.py @@ -0,0 +1,284 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2013, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import re +from functools import partial +from datetime import timedelta + +from calibre.utils.config_base import prefs +from calibre.utils.date import parse_date, UNDEFINED_DATE, now +from calibre.utils.search_query_parser import SearchQueryParser, ParseException + +# TODO: Thread safety of saved searches + +class DateSearch(object): # {{{ + + def __init__(self): + self.operators = { + '=' : (1, self.eq), + '!=' : (2, self.ne), + '>' : (1, self.gt), + '>=' : (2, self.ge), + '<' : (1, self.lt), + '<=' : (2, self.le), + } + self.local_today = { '_today', 'today', icu_lower(_('today')) } + self.local_yesterday = { '_yesterday', 'yesterday', icu_lower(_('yesterday')) } + self.local_thismonth = { '_thismonth', 'thismonth', icu_lower(_('thismonth')) } + self.daysago_pat = re.compile(r'(%s|daysago|_daysago)$'%_('daysago')) + + def eq(self, dbdate, query, field_count): + if dbdate.year == query.year: + if field_count == 1: + return True + if dbdate.month == query.month: + if field_count == 2: + return True + return dbdate.day == query.day + return False + + def ne(self, *args): + return not self.eq(*args) + + def gt(self, dbdate, query, field_count): + if dbdate.year > query.year: + return True + if field_count > 1 and dbdate.year == query.year: + if dbdate.month > query.month: + return True + return (field_count == 3 and dbdate.month == query.month and + dbdate.day > query.day) + return False + + def le(self, *args): + return not self.gt(*args) + + def lt(self, dbdate, query, field_count): + if dbdate.year < query.year: + return True + if field_count > 1 and dbdate.year == query.year: + if dbdate.month < query.month: + return True + return (field_count == 3 and dbdate.month == query.month and + dbdate.day < query.day) + return False + + def ge(self, *args): + return not self.lt(*args) + + def __call__(self, query, field_iter): + matches = set() + if len(query) < 2: + return matches + + if query == 'false': + for v, book_ids in field_iter(): + if isinstance(v, (str, unicode)): + v = parse_date(v) + if v is None or v <= UNDEFINED_DATE: + matches |= book_ids + return matches + + if query == 'true': + for v, book_ids in field_iter(): + if isinstance(v, (str, unicode)): + v = parse_date(v) + if v is not None and v > UNDEFINED_DATE: + matches |= book_ids + return matches + + relop = None + for k, op in self.operators.iteritems(): + if query.startswith(k): + p, relop = op + query = query[p:] + if relop is None: + relop = self.operators['='][-1] + + if query in self.local_today: + qd = now() + field_count = 3 + elif query in self.local_yesterday: + qd = now() - timedelta(1) + field_count = 3 + elif query in self.local_thismonth: + qd = now() + field_count = 2 + else: + m = self.daysago_pat.search(query) + if m is not None: + num = query[:-len(m.group(1))] + try: + qd = now() - timedelta(int(num)) + except: + raise ParseException(query, len(query), 'Number conversion error') + field_count = 3 + else: + try: + qd = parse_date(query, as_utc=False) + except: + raise ParseException(query, len(query), 'Date conversion error') + if '-' in query: + field_count = query.count('-') + 1 + else: + field_count = query.count('/') + 1 + + for v, book_ids in field_iter(): + if isinstance(v, (str, unicode)): + v = parse_date(v) + if v is not None and relop(v, qd, field_count): + matches |= book_ids + + return matches +# }}} + +class Parser(SearchQueryParser): + + def __init__(self, dbcache, all_book_ids, gst, date_search, + limit_search_columns, limit_search_columns_to, locations): + self.dbcache, self.all_book_ids = dbcache, all_book_ids + self.all_search_locations = frozenset(locations) + self.grouped_search_terms = gst + self.date_search = date_search + self.limit_search_columns, self.limit_search_columns_to = ( + limit_search_columns, limit_search_columns_to) + super(Parser, self).__init__(locations, optimize=True) + + @property + def field_metadata(self): + return self.dbcache.field_metadata + + def universal_set(self): + return self.all_book_ids + + def field_iter(self, name, candidates): + get_metadata = partial(self.dbcache._get_metadata, get_user_categories=False) + return self.dbcache.fields[name].iter_searchable_values(get_metadata, + candidates) + + def get_matches(self, location, query, candidates=None, + allow_recursion=True): + # If candidates is not None, it must not be modified. Changing its + # value will break query optimization in the search parser + matches = set() + + if candidates is None: + candidates = self.all_book_ids + if not candidates or not query or not query.strip(): + return matches + if location not in self.all_search_locations: + return matches + + if (len(location) > 2 and location.startswith('@') and + location[1:] in self.grouped_search_terms): + location = location[1:] + + # get metadata key associated with the search term. Eliminates + # dealing with plurals and other aliases + # original_location = location + location = self.field_metadata.search_term_to_field_key( + icu_lower(location.strip())) + # grouped search terms + if isinstance(location, list): + if allow_recursion: + if query.lower() == 'false': + invert = True + query = 'true' + else: + invert = False + for loc in location: + c = candidates.copy() + m = self.get_matches(loc, query, + candidates=c, allow_recursion=False) + matches |= m + c -= m + if len(c) == 0: + break + if invert: + matches = self.all_book_ids - matches + return matches + raise ParseException(query, len(query), 'Recursive query group detected') + + # If the user has asked to restrict searching over all field, apply + # that restriction + if (location == 'all' and self.limit_search_columns and + self.limit_search_columns_to): + terms = set() + for l in self.limit_search_columns_to: + l = icu_lower(l.strip()) + if l and l != 'all' and l in self.all_search_locations: + terms.add(l) + if terms: + c = candidates.copy() + for l in terms: + try: + m = self.get_matches(l, query, + candidates=c, allow_recursion=allow_recursion) + matches |= m + c -= m + if len(c) == 0: + break + except: + pass + return matches + + if location in self.field_metadata: + fm = self.field_metadata[location] + # take care of dates special case + if (fm['datatype'] == 'datetime' or + (fm['datatype'] == 'composite' and + fm['display'].get('composite_sort', '') == 'date')): + if location == 'date': + location = 'timestamp' + return self.date_search( + icu_lower(query), partial(self.field_iter, location, candidates)) + + return matches + + +class Search(object): + + def __init__(self, all_search_locations): + self.all_search_locations = all_search_locations + self.date_search = DateSearch() + + def change_locations(self, newlocs): + self.all_search_locations = newlocs + + def __call__(self, dbcache, query, search_restriction): + ''' + Return the set of ids of all records that match the specified + query and restriction + ''' + q = '' + if not query or not query.strip(): + q = search_restriction + else: + q = query + if search_restriction: + q = u'(%s) and (%s)' % (search_restriction, query) + + all_book_ids = dbcache.all_book_ids(type=set) + if not q: + return all_book_ids + + # We construct a new parser instance per search as pyparsing is not + # thread safe. On my desktop, constructing a SearchQueryParser instance + # takes 0.000975 seconds and restoring it from a pickle takes + # 0.000974 seconds. + sqp = Parser( + dbcache, all_book_ids, dbcache.pref('grouped_search_terms'), + self.date_search, prefs[ 'limit_search_columns' ], + prefs[ 'limit_search_columns_to' ], self.all_search_locations) + try: + ret = sqp.parse(query) + finally: + sqp.dbcache = None + return ret + diff --git a/src/calibre/db/tests/reading.py b/src/calibre/db/tests/reading.py index d77d3ac6eb..22d1bba37e 100644 --- a/src/calibre/db/tests/reading.py +++ b/src/calibre/db/tests/reading.py @@ -191,6 +191,26 @@ class ReadingTest(BaseTest): # }}} + def test_searching(self): # {{{ + 'Test searching returns the same data for both backends' + from calibre.library.database2 import LibraryDatabase2 + old = LibraryDatabase2(self.library_path) + oldvals = {query:set(old.search_getting_ids(query, '')) for query in ( + 'date:9/6/2011', 'date:true', 'date:false', 'pubdate:9/2011', + '#date:true', 'date:<100daysago', 'date:>9/6/2011', + '#date:>9/1/2011', '#date:=2011', + )} + old = None + + cache = self.init_cache(self.library_path) + for query, ans in oldvals.iteritems(): + nr = cache.search(query, '') + self.assertEqual(ans, nr, + 'Old result: %r != New result: %r for search: %s'%( + ans, nr, query)) + + # }}} + def tests(): return unittest.TestLoader().loadTestsFromTestCase(ReadingTest) From 556582f1bac11bfd6ef1119770debf884425d453 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 20 Jan 2013 16:33:28 +0530 Subject: [PATCH 08/11] Implement numeric field searches --- src/calibre/db/fields.py | 14 ++++ src/calibre/db/search.py | 118 +++++++++++++++++++++++++++++-- src/calibre/db/tables.py | 6 +- src/calibre/db/tests/metadata.db | Bin 230400 -> 235520 bytes src/calibre/db/tests/reading.py | 12 +++- 5 files changed, 141 insertions(+), 9 deletions(-) diff --git a/src/calibre/db/fields.py b/src/calibre/db/fields.py index 43e89cdc6f..194cb33011 100644 --- a/src/calibre/db/fields.py +++ b/src/calibre/db/fields.py @@ -20,6 +20,8 @@ from calibre.utils.localization import calibre_langcode_to_name class Field(object): + is_many = False + def __init__(self, name, table): self.name, self.table = name, table self.has_text_data = self.metadata['datatype'] in ('text', 'comments', @@ -200,6 +202,8 @@ class OnDeviceField(OneToOneField): class ManyToOneField(Field): + is_many = True + def for_book(self, book_id, default_value=None): ids = self.table.book_col_map.get(book_id, None) if ids is not None: @@ -237,6 +241,8 @@ class ManyToOneField(Field): class ManyToManyField(Field): + is_many = True + def __init__(self, *args, **kwargs): Field.__init__(self, *args, **kwargs) self.alphabetical_sort = self.name != 'authors' @@ -277,6 +283,14 @@ class ManyToManyField(Field): if book_ids: yield val, book_ids + def iter_counts(self, candidates): + val_map = defaultdict(set) + cbm = self.table.book_col_map + for book_id in candidates: + val_map[len(cbm.get(book_id, ()))].add(book_id) + for count, book_ids in val_map.iteritems(): + yield count, book_ids + class IdentifiersField(ManyToManyField): def for_book(self, book_id, default_value=None): diff --git a/src/calibre/db/search.py b/src/calibre/db/search.py index d304deeb9a..fe9cec79c8 100644 --- a/src/calibre/db/search.py +++ b/src/calibre/db/search.py @@ -138,14 +138,101 @@ class DateSearch(object): # {{{ return matches # }}} +class NumericSearch(object): # {{{ + + def __init__(self): + self.operators = { + '=':( 1, lambda r, q: r == q ), + '>':( 1, lambda r, q: r is not None and r > q ), + '<':( 1, lambda r, q: r is not None and r < q ), + '!=':( 2, lambda r, q: r != q ), + '>=':( 2, lambda r, q: r is not None and r >= q ), + '<=':( 2, lambda r, q: r is not None and r <= q ) + } + + def __call__(self, query, field_iter, location, datatype, candidates, is_many=False): + matches = set() + if not query: + return matches + + q = '' + cast = adjust = lambda x: x + dt = datatype + + if is_many and query in {'true', 'false'}: + valcheck = lambda x: True + if datatype == 'rating': + valcheck = lambda x: x is not None and x > 0 + found = set() + for val, book_ids in field_iter(): + if valcheck(val): + found |= book_ids + return found if query == 'true' else candidates - found + + if query == 'false': + if location == 'cover': + relop = lambda x,y: not bool(x) + else: + relop = lambda x,y: x is None + elif query == 'true': + if location == 'cover': + relop = lambda x,y: bool(x) + else: + relop = lambda x,y: x is not None + else: + relop = None + for k, op in self.operators.iteritems(): + if query.startswith(k): + p, relop = op + query = query[p:] + if relop is None: + p, relop = self.operators['='] + + cast = int + if dt == 'rating': + cast = lambda x: 0 if x is None else int(x) + adjust = lambda x: x/2 + elif dt in ('float', 'composite'): + cast = float + + mult = 1.0 + if len(query) > 1: + mult = query[-1].lower() + mult = {'k': 1024.,'m': 1024.**2, 'g': 1024.**3}.get(mult, 1.0) + if mult != 1.0: + query = query[:-1] + else: + mult = 1.0 + + try: + q = cast(query) * mult + except: + raise ParseException(query, len(query), + 'Non-numeric value in query: %r'%query) + + for val, book_ids in field_iter(): + if val is None: + continue + try: + v = cast(val) + except: + v = None + if v: + v = adjust(v) + if relop(v, q): + matches |= book_ids + return matches + +# }}} + class Parser(SearchQueryParser): - def __init__(self, dbcache, all_book_ids, gst, date_search, + def __init__(self, dbcache, all_book_ids, gst, date_search, num_search, limit_search_columns, limit_search_columns_to, locations): self.dbcache, self.all_book_ids = dbcache, all_book_ids self.all_search_locations = frozenset(locations) self.grouped_search_terms = gst - self.date_search = date_search + self.date_search, self.num_search = date_search, num_search self.limit_search_columns, self.limit_search_columns_to = ( limit_search_columns, limit_search_columns_to) super(Parser, self).__init__(locations, optimize=True) @@ -230,15 +317,33 @@ class Parser(SearchQueryParser): if location in self.field_metadata: fm = self.field_metadata[location] + dt = fm['datatype'] + # take care of dates special case - if (fm['datatype'] == 'datetime' or - (fm['datatype'] == 'composite' and - fm['display'].get('composite_sort', '') == 'date')): + if (dt == 'datetime' or ( + dt == 'composite' and + fm['display'].get('composite_sort', '') == 'date')): if location == 'date': location = 'timestamp' return self.date_search( icu_lower(query), partial(self.field_iter, location, candidates)) + # take care of numbers special case + if (dt in ('rating', 'int', 'float') or + (dt == 'composite' and + fm['display'].get('composite_sort', '') == 'number')): + field = self.dbcache.fields[location] + return self.num_search( + icu_lower(query), partial(self.field_iter, location, candidates), + location, dt, candidates, is_many=field.is_many) + + # take care of the 'count' operator for is_multiples + if (fm['is_multiple'] and + len(query) > 1 and query[0] == '#' and query[1] in '=<>!'): + return self.num_search(icu_lower(query[1:]), partial( + self.dbcache.fields[location].iter_counts, candidates), + location, dt, candidates) + return matches @@ -247,6 +352,7 @@ class Search(object): def __init__(self, all_search_locations): self.all_search_locations = all_search_locations self.date_search = DateSearch() + self.num_search = NumericSearch() def change_locations(self, newlocs): self.all_search_locations = newlocs @@ -274,7 +380,7 @@ class Search(object): # 0.000974 seconds. sqp = Parser( dbcache, all_book_ids, dbcache.pref('grouped_search_terms'), - self.date_search, prefs[ 'limit_search_columns' ], + self.date_search, self.num_search, prefs[ 'limit_search_columns' ], prefs[ 'limit_search_columns_to' ], self.all_search_locations) try: ret = sqp.parse(query) diff --git a/src/calibre/db/tables.py b/src/calibre/db/tables.py index 58768c9ff5..234a7fe4a8 100644 --- a/src/calibre/db/tables.py +++ b/src/calibre/db/tables.py @@ -148,11 +148,11 @@ class ManyToManyTable(ManyToOneTable): ''' table_type = MANY_MANY + selectq = 'SELECT book, {0} FROM {1}' def read_maps(self, db): for row in db.conn.execute( - 'SELECT book, {0} FROM {1} ORDER BY id'.format( - self.metadata['link_column'], self.link_table)): + self.selectq.format(self.metadata['link_column'], self.link_table)): if row[1] not in self.col_book_map: self.col_book_map[row[1]] = [] self.col_book_map[row[1]].append(row[0]) @@ -168,6 +168,8 @@ class ManyToManyTable(ManyToOneTable): class AuthorsTable(ManyToManyTable): + selectq = 'SELECT book, {0} FROM {1} ORDER BY id' + def read_id_maps(self, db): self.alink_map = {} self.asort_map = {} diff --git a/src/calibre/db/tests/metadata.db b/src/calibre/db/tests/metadata.db index da54c61ad5c83bde8917933e2d73f2e6ae370273..4bd6dfe4f97d037ed7c343a87e6cc929d153b5d9 100644 GIT binary patch delta 2440 zcmdT_dr(x@8NcWJ?%mxB3$kX-f+#HD10&$x<)Kg=*8y3RphA#L6EVvI3!?jAb{B&@ z3LBD4lGah?)|^Qv)7a@hjZ-_ycDU^{ol)sjTaB?XF|AF-SCwfVXqz^vIt4dY(obmhO^2(A5_sYt$%H-`i zDHdeV5!IZTOX)~b$=pgGwr8N2wDrW|k&v$~66^_ueTqVpug&iuBz0_o>`cl1g&z!<5-^3E_#$a5yud+MnAK^YMYfnl-38{c7Fon;T<7YeP)~Pe^O#ee4n1a2 zw9ZczMg^S1kI{ooc$og3PEacu7Tyy^Rhu=oZe41M;BYtyX(FT}81ctz^_Q^{NPm8~ zC)65<`Z^-fkUySZ<;<^-cC`hnoQwKhMJ4Wz!TiDGyS;9tr(%zQ5Aipg#1FA2nK96h z6d8^g$kZeoik9IvK1<8`0W{!5) z*0RBHXfogqQ~wZbofPKg)JO-Hs*^*e60arYy6pG@w;iIe%og+S3bgxT z0e`e@yY4feh=MY&H4+KNeEw*_7ms$u;{G^~_2=N&ui7Dn^57->qzk2>v3mUYLb5}B z^F#|S;1f>q8K@Ud**V42r!sVk&&u|sJT(b*>P8wt@*g*3kznj&c|%h6Ov56I5(tru zx+K#0YH| z%LBOASSQ)e6CNKR_sj$@K2OG`81iIU0-E zfh@WtOHq7ZN4UZxvY#uwQjVM;b|}JCcI#!DO%!(d1j)22&O*-9&SjjNNp_?OOSDI7 z@CT5y+TBMnKqR9_Se_S)*@75kswm7~jYrttCfFsTi`ao}C}w4Ak#8!Pv0m4J@~<36 zYq-u8L=Ky*!BQS{ID!i2gZhn4_9KsNUJny{_HlfN{7$>O5h7*X^(bVUtMMpvHsTxF z;SH$cl_m)9vic!-wdyDEOTyL+qK3U*i*jwS4nxGm_2IqO+4|%&8Q+3?8pW#NQF6d+6{t(z5h=n6uvPzll?YO_wm=sF6 zdU7I%X`LLrr5$-`1}FLqadCHb&BVge?w;0QS8RJAs*jO>4~K#Pr-e&amd-*J%wkq3 z?AFtGNc*xKeMFuGbZg-byh>Po1D3G$y;#Z)Jq4@!*0DM4U@dIyQVkYs-;1Jyuv`zE zhF0T;K5ZQJkF3FqZR}_@ve+}t$k)ER3qita8{pE)x^dDl`j9DYh-f?eu%FD-!+|!K zw0#MDOvVQQkzCS54|tQL2DZSXcSij+`<*jI*jwkNu&J6F;h>O zl=HKOk6uXQF^HLX7R_wOZ}PdI$%W(&Qm9ZX2(>hqF6CXjN*rlQY-Y>dD71a8!IqF$=XV9uYA$Jp5qh%0quTcX~aU0qCG zY-A? xk}Z(zq|It0DTx;rhD^e;=U&NG?Z+mWryr6j>W_DYJN0>xqZRk!9i!0x^e>HH*n$86 delta 1544 zcmaJ>eQZ-z6o2>rUc0_p)^!4#gGoC;zHx=EYy%viIK)X7G6@2s>()nC9PQ|bjIV41 zf(mFL4|jtxK_Vtfh`~MV)riQ3$v_Z>OeAyiVT=rcfDr`t58l23N@6_8yEo_D^Z7gH zyvBX0jc=rG9prdLlBA3GKO*iAw~d`BgyVi0VG>+9^@vkCxvlOa-C=VPy^vnArMnBA zZckBOe$j;d@gC2F!u)(~f<5scV!hXHwZKkyXepUPbjQdj)pukqA;j7;SaIsg?jG1> zSt5kUW+ZQlmTC7u(6ajvD19%~486C+Bnx4wBLCY4kJ=>m)5~y}4R_RRGTc$K%zHV# z1?{@mqL@--5sDJIrBadc8;*;N4naE}7(}4=1TvB_YGqrdRqB**8ha7P91Ntl=wWIj zjZ&xPuzjv~*5@MA3MQc&f8q!}027)$IEBw}P&ix-oi#-Aa1%FmN8S}^w|Tl?dJRUu z{iPz0=_e4i5UWIP)5qy3I+HG?>*>2f@DFV)OEF0$TY}n^Lwo3?0&Q<=c2>+6h~^YI zJ^5puR;Qqxs2T~XO-;l7+K#rK?DJVfmfQoX!r|Je*B4Q}u}Cl)^TpIyB(7>F4rjOo zp!tqKBpk0({obhRi&WQmV`^ksboJVyURf$}Xq(%!ixyU7_+;s6M@U@}^~S>9D%BfZ z8LC$O3o8ax$ABwN5Nehk)j zwvJ}Q{7Bc1w)STpKL%&McPP3lnk3rKqeq}|YZF7+cs|Lc+>n%i3XC(6lPq!pH=gh@9LPKHZ zTZbpv;BsW}$))(tP&v~aM^^K6q_d7Sn99<^NaIcO&_J4}!c216ry+RQL7y<0Uxs~# z$&YN;dN|q01)`|cwW84T5t#X=1$d3HD-mRJ5#g$#d6vCD13BzKF>Ktk7~c}MZ!xCw z>GRP}Qj;4JjMk_TFIvvXVqANf)@S5of@5zKpcgiFpqSPyPb_26`yMPyhe` diff --git a/src/calibre/db/tests/reading.py b/src/calibre/db/tests/reading.py index 22d1bba37e..35f4a7333d 100644 --- a/src/calibre/db/tests/reading.py +++ b/src/calibre/db/tests/reading.py @@ -65,7 +65,7 @@ class ReadingTest(BaseTest): 'author_sort': 'One, Author', 'series' : 'A Series One', 'series_index': 1.0, - 'tags':('Tag Two', 'Tag One'), + 'tags':('Tag One', 'Tag Two'), 'formats': (), 'rating': 4.0, 'identifiers': {'test':'one'}, @@ -196,9 +196,19 @@ class ReadingTest(BaseTest): from calibre.library.database2 import LibraryDatabase2 old = LibraryDatabase2(self.library_path) oldvals = {query:set(old.search_getting_ids(query, '')) for query in ( + # Date tests 'date:9/6/2011', 'date:true', 'date:false', 'pubdate:9/2011', '#date:true', 'date:<100daysago', 'date:>9/6/2011', '#date:>9/1/2011', '#date:=2011', + + # Number tests + 'rating:3', 'rating:>2', 'rating:=2', 'rating:true', + 'rating:false', 'rating:>4', 'tags:#<2', 'tags:#>7', + 'cover:false', 'cover:true', '#float:>11', '#float:<1k', + '#float:10.01', + + # TODO: Tests for searching the size column and + # cover:true|false )} old = None From 9d29d7ab3470da23828cb96fccdf6faa281c7096 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 20 Jan 2013 17:37:39 +0530 Subject: [PATCH 09/11] ... --- src/calibre/db/tests/reading.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/db/tests/reading.py b/src/calibre/db/tests/reading.py index 35f4a7333d..7c1ff45968 100644 --- a/src/calibre/db/tests/reading.py +++ b/src/calibre/db/tests/reading.py @@ -205,7 +205,7 @@ class ReadingTest(BaseTest): 'rating:3', 'rating:>2', 'rating:=2', 'rating:true', 'rating:false', 'rating:>4', 'tags:#<2', 'tags:#>7', 'cover:false', 'cover:true', '#float:>11', '#float:<1k', - '#float:10.01', + '#float:10.01', 'series_index:1', 'series_index:<3', # TODO: Tests for searching the size column and # cover:true|false From 62805ec26444891cbdc212bad949698903adcf79 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 20 Jan 2013 19:33:43 +0530 Subject: [PATCH 10/11] ... --- src/calibre/db/fields.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/src/calibre/db/fields.py b/src/calibre/db/fields.py index 194cb33011..34a12c9491 100644 --- a/src/calibre/db/fields.py +++ b/src/calibre/db/fields.py @@ -89,10 +89,8 @@ class Field(object): def iter_searchable_values(self, get_metadata, candidates, default_value=None): ''' Return a generator that yields items of the form (value, set of books - ids that have this value). Here, value is a searchable value. For - OneToOneField the set of books ids will contain only a single id, but for - other fields it will generally have more than one id. Returned books_ids - are restricted to the set of ids in candidates. + ids that have this value). Here, value is a searchable value. Returned + books_ids are restricted to the set of ids in candidates. ''' raise NotImplementedError() @@ -116,8 +114,17 @@ class OneToOneField(Field): def iter_searchable_values(self, get_metadata, candidates, default_value=None): cbm = self.table.book_col_map - for book_id in candidates: - yield cbm.get(book_id, default_value), {book_id} + if (self.name in {'id', 'uuid', 'title'} or + self.metadata['datatype'] == 'datetime'): + # Values are likely to be unique + for book_id in candidates: + yield cbm.get(book_id, default_value), {book_id} + else: + val_map = defaultdict(set) + for book_id in candidates: + val_map[cbm.get(book_id, default_value)].add(book_id) + for val, book_ids in val_map.iteritems(): + yield val, book_ids class CompositeField(OneToOneField): @@ -157,8 +164,11 @@ class CompositeField(OneToOneField): all_book_ids} def iter_searchable_values(self, get_metadata, candidates, default_value=None): + val_map = defaultdict(set) for book_id in candidates: - yield self.get_value_with_cache(book_id, get_metadata), {book_id} + val_map[self.get_value_with_cache(book_id, get_metadata)].add(book_id) + for val, book_ids in val_map.iteritems(): + yield val, book_ids class OnDeviceField(OneToOneField): @@ -197,8 +207,11 @@ class OnDeviceField(OneToOneField): all_book_ids} def iter_searchable_values(self, get_metadata, candidates, default_value=None): + val_map = defaultdict(set) for book_id in candidates: - yield self.for_book(book_id, default_value=default_value), {book_id} + val_map[self.for_book(book_id, default_value=default_value)].add(book_id) + for val, book_ids in val_map.iteritems(): + yield val, book_ids class ManyToOneField(Field): From 3a299104fae93ee7807d71d5b2935f5de3d444e5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 20 Jan 2013 19:43:12 +0530 Subject: [PATCH 11/11] ... --- src/calibre/db/tests/reading.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/calibre/db/tests/reading.py b/src/calibre/db/tests/reading.py index 7c1ff45968..4792f498f8 100644 --- a/src/calibre/db/tests/reading.py +++ b/src/calibre/db/tests/reading.py @@ -205,7 +205,8 @@ class ReadingTest(BaseTest): 'rating:3', 'rating:>2', 'rating:=2', 'rating:true', 'rating:false', 'rating:>4', 'tags:#<2', 'tags:#>7', 'cover:false', 'cover:true', '#float:>11', '#float:<1k', - '#float:10.01', 'series_index:1', 'series_index:<3', + '#float:10.01', 'series_index:1', 'series_index:<3', 'id:1', + 'id:>2', # TODO: Tests for searching the size column and # cover:true|false