From 86e68579f32972a2424771a7f3e84d046d630283 Mon Sep 17 00:00:00 2001 From: John Schember Date: Sat, 11 Sep 2010 08:39:40 -0400 Subject: [PATCH 01/23] PDF Input: Fix bug #6734, add additional matching for unicode characters. --- src/calibre/ebooks/conversion/preprocess.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index f7b803974f..256bcce6fc 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -166,6 +166,17 @@ class HTMLPreProcessor(object): (re.compile(u'`\s*()*\s*O', re.UNICODE), lambda match: u'Ò'), (re.compile(u'`\s*()*\s*u', re.UNICODE), lambda match: u'ù'), (re.compile(u'`\s*()*\s*U', re.UNICODE), lambda match: u'Ù'), + # ` with letter before + (re.compile(u'a\s*()*\s*`', re.UNICODE), lambda match: u'à'), + (re.compile(u'A\s*()*\s*`', re.UNICODE), lambda match: u'À'), + (re.compile(u'e\s*()*\s*`', re.UNICODE), lambda match: u'è'), + (re.compile(u'E\s*()*\s*`', re.UNICODE), lambda match: u'È'), + (re.compile(u'i\s*()*\s*`', re.UNICODE), lambda match: u'ì'), + (re.compile(u'I\s*()*\s*`', re.UNICODE), lambda match: u'Ì'), + (re.compile(u'o\s*()*\s*`', re.UNICODE), lambda match: u'ò'), + (re.compile(u'O\s*()*\s*`', re.UNICODE), lambda match: u'Ò'), + (re.compile(u'u\s*()*\s*`', re.UNICODE), lambda match: u'ù'), + (re.compile(u'U\s*()*\s*`', re.UNICODE), lambda match: u'Ù'), # ´ (re.compile(u'´\s*()*\s*a', re.UNICODE), lambda match: u'á'), From c4071a245d256642568aa8fc827a8e8516f0df98 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Sat, 11 Sep 2010 13:40:27 +0100 Subject: [PATCH 02/23] Fix library sorting problem introduced by calling model.refresh() in the device connection sequence. --- src/calibre/gui2/library/models.py | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/src/calibre/gui2/library/models.py b/src/calibre/gui2/library/models.py index bb47508531..8ad0cd6818 100644 --- a/src/calibre/gui2/library/models.py +++ b/src/calibre/gui2/library/models.py @@ -121,10 +121,8 @@ class BooksModel(QAbstractTableModel): # {{{ def set_device_connected(self, is_connected): self.device_connected = is_connected self.db.refresh_ondevice() - self.refresh() + self.refresh() # does a resort() self.research() - if is_connected and self.sorted_on[0] == 'ondevice': - self.resort() def set_book_on_device_func(self, func): self.book_on_device = func @@ -249,7 +247,7 @@ class BooksModel(QAbstractTableModel): # {{{ # the search and count records for restrictions self.searched.emit(True) - def sort(self, col, order, reset=True): + def sort(self, col, order, reset=True, update_history=True): if not self.db: return self.about_to_be_sorted.emit(self.db.id) @@ -260,23 +258,23 @@ class BooksModel(QAbstractTableModel): # {{{ self.clear_caches() self.reset() self.sorted_on = (label, order) - self.sort_history.insert(0, self.sorted_on) + if update_history: + self.sort_history.insert(0, self.sorted_on) self.sorting_done.emit(self.db.index) def refresh(self, reset=True): - try: - col = self.column_map.index(self.sorted_on[0]) - except: - col = 0 self.db.refresh(field=None) - self.sort(col, self.sorted_on[1], reset=reset) + self.resort(reset=reset) - def resort(self, reset=True): - try: - col = self.column_map.index(self.sorted_on[0]) - except ValueError: - col = 0 - self.sort(col, self.sorted_on[1], reset=reset) + def resort(self, reset=True, history=5): # Bug report needed history=4 :) + for col,ord in reversed(self.sort_history[:history]): + try: + col = self.column_map.index(col) + except ValueError: + col = 0 + self.sort(col, ord, reset=False, update_history=False) + if reset: + self.reset() def research(self, reset=True): self.search(self.last_search, reset=reset) From 96478da323e642febb94c2c1a2c9826a6b3dddb7 Mon Sep 17 00:00:00 2001 From: John Schember Date: Sat, 11 Sep 2010 08:48:47 -0400 Subject: [PATCH 03/23] PLM Input: Fix cleanup code. --- src/calibre/ebooks/pml/pmlconverter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py index 166695ff5c..3a4454725a 100644 --- a/src/calibre/ebooks/pml/pmlconverter.py +++ b/src/calibre/ebooks/pml/pmlconverter.py @@ -216,7 +216,7 @@ class PML_HTMLizer(object): html = re.sub(r'(?u)%s\s*%s' % (open % '.*?', close), '', html) else: html = re.sub(r'(?u)%s\s*%s' % (open, close), '', html) - html = re.sub(r'

\s*

', '', html) + html = re.sub(r'(?imu)

\s*

', '', html) return html def start_line(self): From dc7bc5dd5d890278d7f43377e9df944675888fc6 Mon Sep 17 00:00:00 2001 From: John Schember Date: Sat, 11 Sep 2010 09:01:34 -0400 Subject: [PATCH 04/23] PML Input: Fix bug #6770, put toc link after header so toc link goes to correct page. --- src/calibre/ebooks/pml/pmlconverter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py index 3a4454725a..6e479a71ef 100644 --- a/src/calibre/ebooks/pml/pmlconverter.py +++ b/src/calibre/ebooks/pml/pmlconverter.py @@ -556,7 +556,7 @@ class PML_HTMLizer(object): text = t else: self.toc.add_item(os.path.basename(self.file_name), id, value) - text = '%s' % (id, t) + text = '%s' % (t, id) elif c == 'm': empty = False src = self.code_value(line) From c2b3c445e17a38b5599393c943036c6c448886da Mon Sep 17 00:00:00 2001 From: John Schember Date: Sat, 11 Sep 2010 09:09:08 -0400 Subject: [PATCH 05/23] PML Input: Remove emtpy lines. --- src/calibre/ebooks/pml/pmlconverter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py index 6e479a71ef..b0fc15197a 100644 --- a/src/calibre/ebooks/pml/pmlconverter.py +++ b/src/calibre/ebooks/pml/pmlconverter.py @@ -207,6 +207,7 @@ class PML_HTMLizer(object): while html != old: old = html html = self.cleanup_html_remove_redundant(html) + html = re.sub(r'(?imu)^\s*', '', html) return html def cleanup_html_remove_redundant(self, html): From ef8408869cebac380474deb971c4b6910680c895 Mon Sep 17 00:00:00 2001 From: John Schember Date: Sat, 11 Sep 2010 09:13:23 -0400 Subject: [PATCH 06/23] TXT Output: preserve spaces, handle tab character correct. is reduced to a single space by many renderers. --- src/calibre/ebooks/txt/processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py index a12e8a0761..dac1e34df7 100644 --- a/src/calibre/ebooks/txt/processor.py +++ b/src/calibre/ebooks/txt/processor.py @@ -77,7 +77,7 @@ def separate_paragraphs_print_formatted(txt): def preserve_spaces(txt): txt = txt.replace(' ', ' ') - txt = txt.replace('\t', ' ') + txt = txt.replace('\t', '    ') return txt def opf_writer(path, opf_name, manifest, spine, mi): From a58aa5f0e5f455defefe94c10f372d33763e9b75 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Sat, 11 Sep 2010 15:37:11 +0100 Subject: [PATCH 07/23] Fix bug reported in forum: http://www.mobileread.com/forums/showthread.php?t=98242 cache.refresh still used a parameter when calling search that was removed some releases ago. --- src/calibre/library/caches.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py index b9c1211c7f..2096180f3c 100644 --- a/src/calibre/library/caches.py +++ b/src/calibre/library/caches.py @@ -549,7 +549,7 @@ class ResultCache(SearchQueryParser): self.sort(field, ascending) self._map_filtered = list(self._map) if self.search_restriction: - self.search('', return_matches=False, ignore_search_restriction=False) + self.search('', return_matches=False) def seriescmp(self, sidx, siidx, x, y, library_order=None): try: From 3766f34aab8b6ae8b78570fb51d17bd92edc39a7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 11 Sep 2010 11:54:54 -0600 Subject: [PATCH 08/23] Fix regression in filename shortening that caused loss of filename extension --- src/calibre/utils/filenames.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/calibre/utils/filenames.py b/src/calibre/utils/filenames.py index 9fd57ab53c..47ccbe73c2 100644 --- a/src/calibre/utils/filenames.py +++ b/src/calibre/utils/filenames.py @@ -54,10 +54,8 @@ def shorten_components_to(length, components): r = x[0] if x is components[-1] else '' else: if x is components[-1]: - b, _, e = x.rpartition('.') - if not b and e: - b = e - e = '' + b, e = os.path.splitext(x) + if e == '.': e = '' r = b[:-delta]+e if r.startswith('.'): r = x[0]+r else: From 6eaa75527b5754cfbb8df833ad3375b724d51cfd Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Sat, 11 Sep 2010 21:01:26 +0100 Subject: [PATCH 09/23] resort maximum_resort_levels tweak implemented --- resources/default_tweaks.py | 7 +++++++ src/calibre/gui2/library/models.py | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py index 66ee4d1471..9d9bc7651c 100644 --- a/resources/default_tweaks.py +++ b/resources/default_tweaks.py @@ -114,3 +114,10 @@ add_new_book_tags_when_importing_books = False # Set the maximum number of tags to show per book in the content server max_content_server_tags_shown=5 + +# Set the maximum number of sort 'levels' that calibre will use to resort the +# library after certain operations such as searches or device insertion. Each +# sort level adds a performance penalty. If the database is large (thousands of +# books) the penalty might be noticeable. If you are not concerned about multi- +# level sorts, and if you are seeing a slowdown, reduce the value of this tweak. +maximum_resort_levels = 5 \ No newline at end of file diff --git a/src/calibre/gui2/library/models.py b/src/calibre/gui2/library/models.py index 8ad0cd6818..d2f38cc0a1 100644 --- a/src/calibre/gui2/library/models.py +++ b/src/calibre/gui2/library/models.py @@ -266,8 +266,8 @@ class BooksModel(QAbstractTableModel): # {{{ self.db.refresh(field=None) self.resort(reset=reset) - def resort(self, reset=True, history=5): # Bug report needed history=4 :) - for col,ord in reversed(self.sort_history[:history]): + def resort(self, reset=True): + for col,ord in reversed(self.sort_history[:tweaks['maximum_resort_levels']]): try: col = self.column_map.index(col) except ValueError: From 721e61ef2a1fd090566e232ff9ca65e37400fe44 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Sat, 11 Sep 2010 21:05:05 +0100 Subject: [PATCH 10/23] Clean up tweaks.py formatting (add blank lines) --- resources/default_tweaks.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py index 9d9bc7651c..71bf2c6c37 100644 --- a/resources/default_tweaks.py +++ b/resources/default_tweaks.py @@ -120,4 +120,5 @@ max_content_server_tags_shown=5 # sort level adds a performance penalty. If the database is large (thousands of # books) the penalty might be noticeable. If you are not concerned about multi- # level sorts, and if you are seeing a slowdown, reduce the value of this tweak. -maximum_resort_levels = 5 \ No newline at end of file +maximum_resort_levels = 5 + From e531b517670e90cf99b8255fd47775e50450d7d1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 11 Sep 2010 16:16:57 -0600 Subject: [PATCH 11/23] Code organization --- src/calibre/library/caches.py | 48 ++++++++++++++++----------- src/calibre/library/field_metadata.py | 5 ++- 2 files changed, 33 insertions(+), 20 deletions(-) diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py index 2096180f3c..eb0ceb3fe4 100644 --- a/src/calibre/library/caches.py +++ b/src/calibre/library/caches.py @@ -141,6 +141,8 @@ class ResultCache(SearchQueryParser): for x in self.iterall(): yield x[idx] + # Search functions {{{ + def universal_set(self): return set([i[0] for i in self._data if i is not None]) @@ -462,6 +464,30 @@ class ResultCache(SearchQueryParser): continue return matches + def search(self, query, return_matches=False): + ans = self.search_getting_ids(query, self.search_restriction) + if return_matches: + return ans + self._map_filtered = ans + + def search_getting_ids(self, query, search_restriction): + q = '' + if not query or not query.strip(): + q = search_restriction + else: + q = query + if search_restriction: + q = u'%s (%s)' % (search_restriction, query) + if not q: + return list(self._map) + matches = sorted(self.parse(q)) + return [id for id in self._map if id in matches] + + def set_search_restriction(self, s): + self.search_restriction = s + + # }}} + def remove(self, id): self._data[id] = None if id in self._map: @@ -551,6 +577,8 @@ class ResultCache(SearchQueryParser): if self.search_restriction: self.search('', return_matches=False) + # Sorting functions {{{ + def seriescmp(self, sidx, siidx, x, y, library_order=None): try: if library_order: @@ -615,24 +643,6 @@ class ResultCache(SearchQueryParser): self._map.sort(cmp=fcmp, reverse=not ascending) self._map_filtered = [id for id in self._map if id in self._map_filtered] - def search(self, query, return_matches=False): - ans = self.search_getting_ids(query, self.search_restriction) - if return_matches: - return ans - self._map_filtered = ans + # }}} - def search_getting_ids(self, query, search_restriction): - q = '' - if not query or not query.strip(): - q = search_restriction - else: - q = query - if search_restriction: - q = u'%s (%s)' % (search_restriction, query) - if not q: - return list(self._map) - matches = sorted(self.parse(q)) - return [id for id in self._map if id in matches] - def set_search_restriction(self, s): - self.search_restriction = s diff --git a/src/calibre/library/field_metadata.py b/src/calibre/library/field_metadata.py index 66cdee51f0..096dfa66fe 100644 --- a/src/calibre/library/field_metadata.py +++ b/src/calibre/library/field_metadata.py @@ -69,6 +69,8 @@ class FieldMetadata(dict): VALID_DATA_TYPES = frozenset([None, 'rating', 'text', 'comments', 'datetime', 'int', 'float', 'bool', 'series']) + # Builtin metadata {{{ + _field_metadata = [ ('authors', {'table':'authors', 'column':'name', @@ -287,7 +289,8 @@ class FieldMetadata(dict): 'search_terms':[], 'is_custom':False, 'is_category':False}), - ] + ] + # }}} # search labels that are not db columns search_items = [ 'all', From 7382552d18d604dff3b5472195fa9f3c07b0186c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 11 Sep 2010 19:11:30 -0600 Subject: [PATCH 12/23] Much faster sorting code --- src/calibre/library/caches.py | 178 ++++++++++++++++++++++++++++++++-- 1 file changed, 171 insertions(+), 7 deletions(-) diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py index eb0ceb3fe4..59d5b45d5f 100644 --- a/src/calibre/library/caches.py +++ b/src/calibre/library/caches.py @@ -607,16 +607,22 @@ class ResultCache(SearchQueryParser): y = UNDEFINED_DATE return cmp(x, y) if subsort and ans == 0: - return cmp(self._data[x][11].lower(), self._data[y][11].lower()) + idx = self.FIELD_MAP['sort'] + return cmp(self._data[x][idx].lower(), self._data[y][idx].lower()) return ans - def sort(self, field, ascending, subsort=False): + def sanitize_field_name(self, field): field = field.lower().strip() - if field in ('author', 'tag', 'comment'): - field += 's' - if field == 'date': field = 'timestamp' - elif field == 'title': field = 'sort' - elif field == 'authors': field = 'author_sort' + if field not in self.field_metadata.iterkeys(): + if field in ('author', 'tag', 'comment'): + field += 's' + if field == 'date': field = 'timestamp' + elif field == 'title': field = 'sort' + elif field == 'authors': field = 'author_sort' + return field + + def sort(self, field, ascending, subsort=False): + field = self.sanitize_field_name(field) as_string = field not in ('size', 'rating', 'timestamp') if self.first_sort: @@ -643,6 +649,164 @@ class ResultCache(SearchQueryParser): self._map.sort(cmp=fcmp, reverse=not ascending) self._map_filtered = [id for id in self._map if id in self._map_filtered] + def multisort(self, fields=[], subsort=False): + fields = [(self.sanitize_field_name(x), bool(y)) for x, y in fields] + if subsort and 'sort' not in [x[0] for x in fields]: + fields += [('sort', True)] + if not fields: + fields = [('timestamp', False)] + keys = self.field_metadata.keys() + for f, order in fields: + if f not in keys: + raise ValueError(f + ' not an existing field name') + + keyg = SortKeyGenerator(fields, self.field_metadata, self._data) + if len(fields) == 1: + self._map.sort(key=keyg, reverse=not fields[0][1]) + else: + self._map.sort(key=keyg) + self._map_filtered = [id for id in self._map if id in self._map_filtered] + + +class SortKey(object): + + def __init__(self, orders, values): + self.orders, self.values = orders, values + + def __cmp__(self, other): + for i, ascending in enumerate(self.orders): + ans = cmp(self.values[i], other.values[i]) + if ans != 0: + if not ascending: + ans *= -1 + return ans + return 0 + +class SortKeyGenerator(object): + + def __init__(self, fields, field_metadata, data): + self.field_metadata = field_metadata + self.orders = [x[1] for x in fields] + self.entries = [(x[0], field_metadata[x[0]]) for x in fields] + self.library_order = tweaks['title_series_sorting'] == 'library_order' + self.data = data + + def __call__(self, record): + values = tuple(self.itervals(self.data[record])) + if len(values) == 1: + return values[0] + return SortKey(self.orders, values) + + def itervals(self, record): + for name, fm in self.entries: + dt = fm['datatype'] + val = record[fm['rec_index']] + + if dt == 'datetime': + if val is None: + val = UNDEFINED_DATE + + elif dt == 'series': + if val is None: + val = ('', 1) + else: + val = val.lower() + if self.library_order: + val = title_sort(val) + sidx_fm = self.field_metadata[name + '_index'] + sidx = record[sidx_fm['rec_index']] + val = (val, sidx) + + elif dt in ('text', 'comments'): + if val is None: + val = '' + val = val.lower() + yield val + # }}} +if __name__ == '__main__': + # Testing.timing for new multi-sort {{{ + import time + + from calibre.library import db + db = db() + + db.refresh() + + fields = db.field_metadata.keys() + + print fields + + + def do_single_sort(meth, field, order): + if meth == 'old': + db.data.sort(field, order) + else: + db.data.multisort([(field, order)]) + + def test_single_sort(field): + for meth in ('old', 'new'): + ttime = 0 + NUM = 10 + asc = desc = None + for i in range(NUM): + db.data.sort('id', False) + st = time.time() + do_single_sort(meth, field, True) + asc = db.data._map + do_single_sort(meth, field, False) + desc = db.data._map + ttime += time.time() - st + yield (ttime/NUM, asc, desc) + + + print 'Running single sort differentials' + for field in fields: + if field in ('search', 'id', 'news', 'flags'): continue + print '\t', field + old, new = test_single_sort(field) + if old[1] != new[1] or old[2] != new[2]: + print '\t\t', 'Sort failure!' + raise SystemExit(1) + print '\t\t', 'Old:', old[0], 'New:', new[0], 'Ratio: %.2f'%(new[0]/old[0]) + + def do_multi_sort(meth, ms): + if meth == 'new': + db.data.multisort(ms) + else: + for s in reversed(ms): + db.data.sort(*s) + + def test_multi_sort(ms): + for meth in ('old', 'new'): + ttime = 0 + NUM = 10 + for i in range(NUM): + db.data.sort('id', False) + st = time.time() + do_multi_sort(meth, ms) + ttime += time.time() - st + yield (ttime/NUM, db.data._map) + + print 'Running multi-sort differentials' + + for ms in [ + [('timestamp', False), ('author', True), ('title', False)], + [('size', True), ('tags', True), ('author', False)], + [('series', False), ('title', True)], + [('size', True), ('tags', True), ('author', False), ('pubdate', + True), ('tags', False), ('formats', False), ('uuid', True)], + + ]: + print '\t', ms + db.data.sort('id', False) + old, new = test_multi_sort(ms) + if old[1] != new[1]: + print '\t\t', 'Sort failure!' + raise SystemExit() + print '\t\t', 'Old:', old[0], 'New:', new[0], 'Ratio: %.2f'%(new[0]/old[0]) + + # }}} + From bcd0430791f44ec926910eeb8bb18d7cbbff5fc9 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Sun, 12 Sep 2010 13:37:28 +0100 Subject: [PATCH 13/23] Starting from Kovid's multisort: 1) change _map_filtered to an ordered dict to make 'in' operations much faster 2) add a method to field_metadata to return a dict of database fields. 3) fix a couple of places where field_metadata needed to be used. 4) make changes so gui2.library.models.resort uses multisort --- src/calibre/gui2/library/models.py | 14 +++---- src/calibre/library/caches.py | 59 ++++++++++++++++----------- src/calibre/library/database2.py | 1 + src/calibre/library/field_metadata.py | 3 ++ 4 files changed, 45 insertions(+), 32 deletions(-) diff --git a/src/calibre/gui2/library/models.py b/src/calibre/gui2/library/models.py index d2f38cc0a1..d18516493a 100644 --- a/src/calibre/gui2/library/models.py +++ b/src/calibre/gui2/library/models.py @@ -247,7 +247,7 @@ class BooksModel(QAbstractTableModel): # {{{ # the search and count records for restrictions self.searched.emit(True) - def sort(self, col, order, reset=True, update_history=True): + def sort(self, col, order, reset=True): if not self.db: return self.about_to_be_sorted.emit(self.db.id) @@ -258,8 +258,7 @@ class BooksModel(QAbstractTableModel): # {{{ self.clear_caches() self.reset() self.sorted_on = (label, order) - if update_history: - self.sort_history.insert(0, self.sorted_on) + self.sort_history.insert(0, self.sorted_on) self.sorting_done.emit(self.db.index) def refresh(self, reset=True): @@ -267,12 +266,9 @@ class BooksModel(QAbstractTableModel): # {{{ self.resort(reset=reset) def resort(self, reset=True): - for col,ord in reversed(self.sort_history[:tweaks['maximum_resort_levels']]): - try: - col = self.column_map.index(col) - except ValueError: - col = 0 - self.sort(col, ord, reset=False, update_history=False) + if not self.db: + return + self.db.multisort(self.sort_history[:tweaks['maximum_resort_levels']]) if reset: self.reset() diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py index 59d5b45d5f..c342d5ff15 100644 --- a/src/calibre/library/caches.py +++ b/src/calibre/library/caches.py @@ -20,6 +20,7 @@ from calibre.utils.search_query_parser import SearchQueryParser from calibre.utils.pyparsing import ParseException from calibre.ebooks.metadata import title_sort from calibre import fit_image +from calibre.utils.ordered_dict import OrderedDict class CoverCache(Thread): @@ -112,7 +113,8 @@ class ResultCache(SearchQueryParser): ''' def __init__(self, FIELD_MAP, field_metadata): self.FIELD_MAP = FIELD_MAP - self._map = self._map_filtered = self._data = [] + self._map = self._data = [] + self._map_filtered = OrderedDict() self.first_sort = True self.search_restriction = '' self.field_metadata = field_metadata @@ -122,14 +124,14 @@ class ResultCache(SearchQueryParser): self.build_numeric_relop_dict() def __getitem__(self, row): - return self._data[self._map_filtered[row]] + return self._data[self._map_filtered.keys()[row]] def __len__(self): return len(self._map_filtered) def __iter__(self): for id in self._map_filtered: - yield self._data[id] + yield id def iterall(self): for x in self._data: @@ -468,7 +470,7 @@ class ResultCache(SearchQueryParser): ans = self.search_getting_ids(query, self.search_restriction) if return_matches: return ans - self._map_filtered = ans + self._map_filtered = OrderedDict.fromkeys(ans, True) def search_getting_ids(self, query, search_restriction): q = '' @@ -480,7 +482,7 @@ class ResultCache(SearchQueryParser): q = u'%s (%s)' % (search_restriction, query) if not q: return list(self._map) - matches = sorted(self.parse(q)) + matches = self.parse(q) return [id for id in self._map if id in matches] def set_search_restriction(self, s): @@ -493,18 +495,18 @@ class ResultCache(SearchQueryParser): if id in self._map: self._map.remove(id) if id in self._map_filtered: - self._map_filtered.remove(id) + del self._map_filtered[id] def set(self, row, col, val, row_is_id=False): - id = row if row_is_id else self._map_filtered[row] + id = row if row_is_id else self._map_filtered.keys()[row] self._data[id][col] = val def get(self, row, col, row_is_id=False): - id = row if row_is_id else self._map_filtered[row] + id = row if row_is_id else self._map_filtered.keys()[row] return self._data[id][col] def index(self, id, cache=False): - x = self._map if cache else self._map_filtered + x = self._map if cache else self._map_filtered.keys() return x.index(id) def row(self, id): @@ -544,13 +546,18 @@ class ResultCache(SearchQueryParser): self._data[id].append(db.has_cover(id, index_is_id=True)) self._data[id].append(db.book_on_device_string(id)) self._map[0:0] = ids - self._map_filtered[0:0] = ids + mf = OrderedDict() + for id in ids: + mf[id] = True + for id in self._map_filtered: + mf[id] = True + self._map_filtered = mf def books_deleted(self, ids): for id in ids: self._data[id] = None if id in self._map: self._map.remove(id) - if id in self._map_filtered: self._map_filtered.remove(id) + if id in self._map_filtered: del self._map_filtered[id] def count(self): return len(self._map) @@ -573,7 +580,7 @@ class ResultCache(SearchQueryParser): self._map = [i[0] for i in self._data if i is not None] if field is not None: self.sort(field, ascending) - self._map_filtered = list(self._map) + self._map_filtered = OrderedDict.fromkeys(self._map, True) if self.search_restriction: self.search('', return_matches=False) @@ -644,10 +651,14 @@ class ResultCache(SearchQueryParser): self.FIELD_MAP['series_index'], library_order=tweaks['title_series_sorting'] == 'library_order') else: - fcmp = functools.partial(self.cmp, self.FIELD_MAP[field], + fcmp = functools.partial(self.cmp, self.field_metadata[field]['rec_index'], subsort=subsort, asstr=as_string) self._map.sort(cmp=fcmp, reverse=not ascending) - self._map_filtered = [id for id in self._map if id in self._map_filtered] + mf = OrderedDict() + for id in self._map: + if id in self._map_filtered: + mf[id] = True + self._map_filtered = mf def multisort(self, fields=[], subsort=False): fields = [(self.sanitize_field_name(x), bool(y)) for x, y in fields] @@ -655,7 +666,7 @@ class ResultCache(SearchQueryParser): fields += [('sort', True)] if not fields: fields = [('timestamp', False)] - keys = self.field_metadata.keys() + keys = self.field_metadata.field_keys() for f, order in fields: if f not in keys: raise ValueError(f + ' not an existing field name') @@ -665,7 +676,11 @@ class ResultCache(SearchQueryParser): self._map.sort(key=keyg, reverse=not fields[0][1]) else: self._map.sort(key=keyg) - self._map_filtered = [id for id in self._map if id in self._map_filtered] + mf = OrderedDict() + for id in self._map: + if id in self._map_filtered: + mf[id] = id + self._map_filtered = mf class SortKey(object): @@ -677,16 +692,14 @@ class SortKey(object): for i, ascending in enumerate(self.orders): ans = cmp(self.values[i], other.values[i]) if ans != 0: - if not ascending: - ans *= -1 - return ans + return ans * ascending return 0 class SortKeyGenerator(object): def __init__(self, fields, field_metadata, data): self.field_metadata = field_metadata - self.orders = [x[1] for x in fields] + self.orders = [-1 if x[1] else 1 for x in fields] self.entries = [(x[0], field_metadata[x[0]]) for x in fields] self.library_order = tweaks['title_series_sorting'] == 'library_order' self.data = data @@ -735,7 +748,7 @@ if __name__ == '__main__': db.refresh() - fields = db.field_metadata.keys() + fields = db.field_metadata.field_keys() print fields @@ -765,7 +778,7 @@ if __name__ == '__main__': print 'Running single sort differentials' for field in fields: if field in ('search', 'id', 'news', 'flags'): continue - print '\t', field + print '\t', field, db.field_metadata[field]['datatype'] old, new = test_single_sort(field) if old[1] != new[1] or old[2] != new[2]: print '\t\t', 'Sort failure!' @@ -797,7 +810,7 @@ if __name__ == '__main__': [('size', True), ('tags', True), ('author', False)], [('series', False), ('title', True)], [('size', True), ('tags', True), ('author', False), ('pubdate', - True), ('tags', False), ('formats', False), ('uuid', True)], + True), ('series', False), ('formats', False), ('uuid', True)], ]: print '\t', ms diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 4106f8c965..8a5ab75c3c 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -311,6 +311,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): self.search_getting_ids = self.data.search_getting_ids self.refresh = functools.partial(self.data.refresh, self) self.sort = self.data.sort + self.multisort = self.data.multisort self.index = self.data.index self.refresh_ids = functools.partial(self.data.refresh_ids, self) self.row = self.data.row diff --git a/src/calibre/library/field_metadata.py b/src/calibre/library/field_metadata.py index 096dfa66fe..276a6ba971 100644 --- a/src/calibre/library/field_metadata.py +++ b/src/calibre/library/field_metadata.py @@ -335,6 +335,9 @@ class FieldMetadata(dict): def keys(self): return self._tb_cats.keys() + def field_keys(self): + return [k for k in self._tb_cats.keys() if self._tb_cats[k]['kind']=='field'] + def iterkeys(self): for key in self._tb_cats: yield key From 8b09f4c293e82ff797635320c42487d9be190831 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Sun, 12 Sep 2010 13:42:37 +0100 Subject: [PATCH 14/23] Restore the second 'tags' to the tests --- src/calibre/library/caches.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py index c342d5ff15..882de975db 100644 --- a/src/calibre/library/caches.py +++ b/src/calibre/library/caches.py @@ -810,7 +810,7 @@ if __name__ == '__main__': [('size', True), ('tags', True), ('author', False)], [('series', False), ('title', True)], [('size', True), ('tags', True), ('author', False), ('pubdate', - True), ('series', False), ('formats', False), ('uuid', True)], + True), ('tags', False), ('formats', False), ('uuid', True)], ]: print '\t', ms From 5626418d1a6993b16f3d6a83c22a761a7490b7ee Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Sun, 12 Sep 2010 14:51:21 +0100 Subject: [PATCH 15/23] Correct regression in device handing -- sorting after sending a book. --- src/calibre/gui2/library/models.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/calibre/gui2/library/models.py b/src/calibre/gui2/library/models.py index d18516493a..c746a5aa56 100644 --- a/src/calibre/gui2/library/models.py +++ b/src/calibre/gui2/library/models.py @@ -1024,6 +1024,11 @@ class DeviceBooksModel(BooksModel): # {{{ if reset: self.reset() + def resort(self, reset=True): + if self.sorted_on: + self.sort(self.column_map.index(self.sorted_on[0]), + self.sorted_on[1], reset=reset) + def columnCount(self, parent): if parent and parent.isValid(): return 0 From 6cc332089a421e6100fa4937c5126309c483e132 Mon Sep 17 00:00:00 2001 From: Starson17 Date: Sun, 12 Sep 2010 11:28:24 -0400 Subject: [PATCH 16/23] Change Merge and Safe Merge warnings re ISBN --- src/calibre/gui2/actions/edit_metadata.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py index f0232d9859..878ba77a43 100644 --- a/src/calibre/gui2/actions/edit_metadata.py +++ b/src/calibre/gui2/actions/edit_metadata.py @@ -209,8 +209,9 @@ class EditMetadataAction(InterfaceAction): dest_id, src_books, src_ids = self.books_to_merge(rows) if safe_merge: if not confirm('

'+_( - 'All book formats and metadata from the selected books ' - 'will be added to the first selected book.

' + 'Book formats and metadata from the selected books ' + 'will be added to the first selected book. ' + 'ISBN will not be merged.

' 'The second and subsequently selected books will not ' 'be deleted or changed.

' 'Please confirm you want to proceed.') @@ -220,8 +221,9 @@ class EditMetadataAction(InterfaceAction): self.merge_metadata(dest_id, src_ids) else: if not confirm('

'+_( - 'All book formats and metadata from the selected books will be merged ' - 'into the first selected book.

' + 'Book formats and metadata from the selected books will be merged ' + 'into the first selected book. ' + 'ISBN will not be merged.

' 'After merger the second and ' 'subsequently selected books will be deleted.

' 'All book formats of the first selected book will be kept ' From 78874a9117941de749f3b09934be8588181dd4b7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 12 Sep 2010 09:32:16 -0600 Subject: [PATCH 17/23] Use the new sorting code in the content server as well. --- src/calibre/library/caches.py | 153 +------------------------- src/calibre/library/server/content.py | 38 +++---- 2 files changed, 18 insertions(+), 173 deletions(-) diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py index dfd7086076..4f795ab733 100644 --- a/src/calibre/library/caches.py +++ b/src/calibre/library/caches.py @@ -6,7 +6,7 @@ __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import re, itertools, functools +import re, itertools from itertools import repeat from datetime import timedelta from threading import Thread, RLock @@ -584,39 +584,7 @@ class ResultCache(SearchQueryParser): # Sorting functions {{{ - def seriescmp(self, sidx, siidx, x, y, library_order=None): - try: - if library_order: - ans = cmp(title_sort(self._data[x][sidx].lower()), - title_sort(self._data[y][sidx].lower())) - else: - ans = cmp(self._data[x][sidx].lower(), - self._data[y][sidx].lower()) - except AttributeError: # Some entries may be None - ans = cmp(self._data[x][sidx], self._data[y][sidx]) - if ans != 0: return ans - return cmp(self._data[x][siidx], self._data[y][siidx]) - - def cmp(self, loc, x, y, asstr=True, subsort=False): - try: - ans = cmp(self._data[x][loc].lower(), self._data[y][loc].lower()) if \ - asstr else cmp(self._data[x][loc], self._data[y][loc]) - except AttributeError: # Some entries may be None - ans = cmp(self._data[x][loc], self._data[y][loc]) - except TypeError: ## raised when a datetime is None - x = self._data[x][loc] - if x is None: - x = UNDEFINED_DATE - y = self._data[y][loc] - if y is None: - y = UNDEFINED_DATE - return cmp(x, y) - if subsort and ans == 0: - idx = self.FIELD_MAP['sort'] - return cmp(self._data[x][idx].lower(), self._data[y][idx].lower()) - return ans - - def sanitize_field_name(self, field): + def sanitize_sort_field_name(self, field): field = field.lower().strip() if field not in self.field_metadata.iterkeys(): if field in ('author', 'tag', 'comment'): @@ -627,38 +595,10 @@ class ResultCache(SearchQueryParser): return field def sort(self, field, ascending, subsort=False): - field = self.sanitize_field_name(field) - as_string = field not in ('size', 'rating', 'timestamp') - - if self.first_sort: - subsort = True - self.first_sort = False - if self.field_metadata[field]['is_custom']: - if self.field_metadata[field]['datatype'] == 'series': - fcmp = functools.partial(self.seriescmp, - self.field_metadata[field]['rec_index'], - self.field_metadata.cc_series_index_column_for(field), - library_order=tweaks['title_series_sorting'] == 'library_order') - else: - as_string = self.field_metadata[field]['datatype'] in ('comments', 'text') - field = self.field_metadata[field]['colnum'] - fcmp = functools.partial(self.cmp, self.FIELD_MAP[field], - subsort=subsort, asstr=as_string) - elif field == 'series': - fcmp = functools.partial(self.seriescmp, self.FIELD_MAP['series'], - self.FIELD_MAP['series_index'], - library_order=tweaks['title_series_sorting'] == 'library_order') - else: - fcmp = functools.partial(self.cmp, self.field_metadata[field]['rec_index'], - subsort=subsort, asstr=as_string) - self._map.sort(cmp=fcmp, reverse=not ascending) - tmap = list(itertools.repeat(False, len(self._data))) - for x in self._map_filtered: - tmap[x] = True - self._map_filtered = [x for x in self._map if tmap[x]] + self.multisort([(field, ascending)]) def multisort(self, fields=[], subsort=False): - fields = [(self.sanitize_field_name(x), bool(y)) for x, y in fields] + fields = [(self.sanitize_sort_field_name(x), bool(y)) for x, y in fields] keys = self.field_metadata.field_keys() fields = [x for x in fields if x[0] in keys] if subsort and 'sort' not in [x[0] for x in fields]: @@ -671,6 +611,7 @@ class ResultCache(SearchQueryParser): self._map.sort(key=keyg, reverse=not fields[0][1]) else: self._map.sort(key=keyg) + tmap = list(itertools.repeat(False, len(self._data))) for x in self._map_filtered: tmap[x] = True @@ -733,87 +674,3 @@ class SortKeyGenerator(object): # }}} -if __name__ == '__main__': - # Testing.timing for new multi-sort {{{ - import time - - from calibre.library import db - db = db() - - db.refresh() - - fields = db.field_metadata.field_keys() - - print fields - - - def do_single_sort(meth, field, order): - if meth == 'old': - db.data.sort(field, order) - else: - db.data.multisort([(field, order)]) - - def test_single_sort(field): - for meth in ('old', 'new'): - ttime = 0 - NUM = 10 - asc = desc = None - for i in range(NUM): - db.data.sort('id', False) - st = time.time() - do_single_sort(meth, field, True) - asc = db.data._map - do_single_sort(meth, field, False) - desc = db.data._map - ttime += time.time() - st - yield (ttime/NUM, asc, desc) - - - print 'Running single sort differentials' - for field in fields: - if field in ('search', 'id', 'news', 'flags'): continue - print '\t', field, db.field_metadata[field]['datatype'] - old, new = test_single_sort(field) - if old[1] != new[1] or old[2] != new[2]: - print '\t\t', 'Sort failure!' - raise SystemExit(1) - print '\t\t', 'Old:', old[0], 'New:', new[0], 'Ratio: %.2f'%(new[0]/old[0]) - - def do_multi_sort(meth, ms): - if meth == 'new': - db.data.multisort(ms) - else: - for s in reversed(ms): - db.data.sort(*s) - - def test_multi_sort(ms): - for meth in ('old', 'new'): - ttime = 0 - NUM = 10 - for i in range(NUM): - db.data.sort('id', False) - st = time.time() - do_multi_sort(meth, ms) - ttime += time.time() - st - yield (ttime/NUM, db.data._map) - - print 'Running multi-sort differentials' - - for ms in [ - [('timestamp', False), ('author', True), ('title', False)], - [('size', True), ('tags', True), ('author', False)], - [('series', False), ('title', True)], - [('size', True), ('tags', True), ('author', False), ('pubdate', - True), ('tags', False), ('formats', False), ('uuid', True)], - - ]: - print '\t', ms - db.data.sort('id', False) - old, new = test_multi_sort(ms) - if old[1] != new[1]: - print '\t\t', 'Sort failure!' - raise SystemExit() - print '\t\t', 'Old:', old[0], 'New:', new[0], 'Ratio: %.2f'%(new[0]/old[0]) - - # }}} - diff --git a/src/calibre/library/server/content.py b/src/calibre/library/server/content.py index 6784abd8f4..ecb467b4c2 100644 --- a/src/calibre/library/server/content.py +++ b/src/calibre/library/server/content.py @@ -5,7 +5,7 @@ __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import re, os, cStringIO, operator +import re, os, cStringIO import cherrypy try: @@ -16,7 +16,15 @@ except ImportError: from calibre import fit_image, guess_type from calibre.utils.date import fromtimestamp -from calibre.ebooks.metadata import title_sort +from calibre.library.caches import SortKeyGenerator + +class CSSortKeyGenerator(SortKeyGenerator): + + def __init__(self, fields, fm): + SortKeyGenerator.__init__(self, fields, fm, None) + + def __call__(self, record): + return self.itervals(record).next() class ContentServer(object): @@ -47,32 +55,12 @@ class ContentServer(object): def sort(self, items, field, order): - field = field.lower().strip() - if field == 'author': - field = 'authors' - if field == 'date': - field = 'timestamp' + field = self.db.data.sanitize_sort_field_name(field) if field not in ('title', 'authors', 'rating', 'timestamp', 'tags', 'size', 'series'): raise cherrypy.HTTPError(400, '%s is not a valid sort field'%field) - cmpf = cmp if field in ('rating', 'size', 'timestamp') else \ - lambda x, y: cmp(x.lower() if x else '', y.lower() if y else '') - if field == 'series': - items.sort(cmp=self.seriescmp, reverse=not order) - else: - lookup = 'sort' if field == 'title' else field - lookup = 'author_sort' if field == 'authors' else field - field = self.db.FIELD_MAP[lookup] - getter = operator.itemgetter(field) - items.sort(cmp=lambda x, y: cmpf(getter(x), getter(y)), reverse=not order) + keyg = CSSortKeyGenerator([(field, order)], self.db.field_metadata) + items.sort(key=keyg, reverse=not order) - def seriescmp(self, x, y): - si = self.db.FIELD_MAP['series'] - try: - ans = cmp(title_sort(x[si].lower()), title_sort(y[si].lower())) - except AttributeError: # Some entries may be None - ans = cmp(x[si], y[si]) - if ans != 0: return ans - return cmp(x[self.db.FIELD_MAP['series_index']], y[self.db.FIELD_MAP['series_index']]) # }}} From 80c976e0f24f05a5ee7a9bfce50bf7745215e339 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 12 Sep 2010 11:11:00 -0600 Subject: [PATCH 18/23] Fix #6794 (Updated recipes for Infobae and NSPM) --- resources/recipes/infobae.recipe | 82 ++++++++------------------------ resources/recipes/nspm.recipe | 11 ++++- 2 files changed, 30 insertions(+), 63 deletions(-) diff --git a/resources/recipes/infobae.recipe b/resources/recipes/infobae.recipe index cda9bf83d2..b7f9cd3c6c 100644 --- a/resources/recipes/infobae.recipe +++ b/resources/recipes/infobae.recipe @@ -1,12 +1,8 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' -__copyright__ = '2008-2009, Darko Miletic ' +__copyright__ = '2008-2010, Darko Miletic ' ''' infobae.com ''' -import re -import urllib, urlparse from calibre.web.feeds.news import BasicNewsRecipe @@ -20,35 +16,24 @@ class Infobae(BasicNewsRecipe): max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False - language = 'es' - lang = 'es-AR' - + language = 'es' encoding = 'cp1252' - cover_url = 'http://www.infobae.com/imgs/header/header.gif' + masthead_url = 'http://www.infobae.com/imgs/header/header.gif' remove_javascript = True - preprocess_regexps = [(re.compile( - r''), lambda m:'')] - - - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' - - extra_css = ''' - .col-center{font-family:Arial,Helvetica,sans-serif;} - h1{font-family:Arial,Helvetica,sans-serif; color:#0D4261;} - .fuenteIntNota{font-family:Arial,Helvetica,sans-serif; color:#1D1D1D; font-size:x-small;} - ''' - - keep_only_tags = [dict(name='div', attrs={'class':['content']})] - - - remove_tags = [ - dict(name='div', attrs={'class':['options','col-right','controles', 'bannerLibre','tiulo-masleidas','masleidas-h']}), - dict(name='a', attrs={'name' : 'comentario',}), - dict(name='iframe'), - dict(name='img', alt = "Ver galerias de imagenes"), - - ] - + remove_empty_feeds = True + extra_css = ''' + body{font-family:Arial,Helvetica,sans-serif;} + .popUpTitulo{color:#0D4261; font-size: xx-large} + ''' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + , 'linearize_tables' : True + } + feeds = [ (u'Noticias' , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml' ) @@ -57,39 +42,14 @@ class Infobae(BasicNewsRecipe): ,(u'Deportes' , u'http://www.infobae.com/adjuntos/html/RSS/deportes.xml' ) ] -# def print_version(self, url): -# main, sep, article_part = url.partition('contenidos/') -# article_id, rsep, rrest = article_part.partition('-') -# return u'http://www.infobae.com/notas/nota_imprimir.php?Idx=' + article_id - - def get_article_url(self, article): - ans = article.get('link').encode('utf-8') - parts = list(urlparse.urlparse(ans)) - parts[2] = urllib.quote(parts[2]) - ans = urlparse.urlunparse(parts) - return ans.decode('utf-8') - - - def preprocess_html(self, soup): - - for tag in soup.head.findAll('strong'): - tag.extract() - for tag in soup.findAll('meta'): - del tag['content'] - tag.extract() - - mtag = '\n\n' - soup.head.insert(0,mtag) - for item in soup.findAll(style=True): - del item['style'] - - return soup + def print_version(self, url): + article_part = url.rpartition('/')[2] + article_id= article_part.partition('-')[0] + return 'http://www.infobae.com/notas/nota_imprimir.php?Idx=' + article_id def postprocess_html(self, soup, first): - for tag in soup.findAll(name='strong'): tag.name = 'b' - return soup diff --git a/resources/recipes/nspm.recipe b/resources/recipes/nspm.recipe index 13ff42b277..29f2cfc5e3 100644 --- a/resources/recipes/nspm.recipe +++ b/resources/recipes/nspm.recipe @@ -6,6 +6,7 @@ nspm.rs import re from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import Tag, NavigableString class Nspm(BasicNewsRecipe): title = 'Nova srpska politicka misao' @@ -21,6 +22,7 @@ class Nspm(BasicNewsRecipe): encoding = 'utf-8' language = 'sr' delay = 2 + remove_empty_feeds = True publication_type = 'magazine' masthead_url = 'http://www.nspm.rs/templates/jsn_epic_pro/images/logol.jpg' extra_css = """ @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @@ -45,8 +47,9 @@ class Nspm(BasicNewsRecipe): dict(name=['link','object','embed','script','meta','base','iframe']) ,dict(attrs={'class':'buttonheading'}) ] - remove_tags_after = dict(attrs={'class':'article_separator'}) - remove_attributes = ['width','height'] + remove_tags_before = dict(attrs={'class':'contentheading'}) + remove_tags_after = dict(attrs={'class':'article_separator'}) + remove_attributes = ['width','height'] def get_browser(self): br = BasicNewsRecipe.get_browser() @@ -67,4 +70,8 @@ class Nspm(BasicNewsRecipe): def preprocess_html(self, soup): for item in soup.body.findAll(style=True): del item['style'] + for item in soup.body.findAll('h1'): + nh = NavigableString(item.a.string) + item.a.extract() + item.insert(0,nh) return self.adeify_images(soup) From de6aadee76d4dafe9b84133dc3af43ddef22fd0a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 13 Sep 2010 10:15:35 -0600 Subject: [PATCH 19/23] News download: Fix bug that could break some downloads in non ASCII locales --- resources/recipes/xkcd.recipe | 6 +++--- src/calibre/web/feeds/__init__.py | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/resources/recipes/xkcd.recipe b/resources/recipes/xkcd.recipe index 312027004e..ad0d420deb 100644 --- a/resources/recipes/xkcd.recipe +++ b/resources/recipes/xkcd.recipe @@ -24,18 +24,18 @@ class XkcdCom(BasicNewsRecipe): (re.compile(r'()'), lambda m: '%s%s

%s

' % (m.group(1), m.group(3), m.group(2))) ] - + def parse_index(self): INDEX = 'http://xkcd.com/archive/' - soup = self.index_to_soup(INDEX) + soup = self.index_to_soup(INDEX) articles = [] for item in soup.findAll('a', title=True): articles.append({ 'date': item['title'], 'timestamp': time.mktime(time.strptime(item['title'], '%Y-%m-%d'))+1, 'url': 'http://xkcd.com' + item['href'], - 'title': self.tag_to_string(item).encode('UTF-8'), + 'title': self.tag_to_string(item), 'description': '', 'content': '', }) diff --git a/src/calibre/web/feeds/__init__.py b/src/calibre/web/feeds/__init__.py index a70cf8b664..8aef350498 100644 --- a/src/calibre/web/feeds/__init__.py +++ b/src/calibre/web/feeds/__init__.py @@ -165,7 +165,9 @@ class Feed(object): if delta.days*24*3600 + delta.seconds <= 24*3600*self.oldest_article: self.articles.append(article) else: - self.logger.debug('Skipping article %s (%s) from feed %s as it is too old.'%(title, article.localtime.strftime('%a, %d %b, %Y %H:%M'), self.title)) + t = strftime(u'%a, %d %b, %Y %H:%M', article.localtime.timetuple()) + self.logger.debug('Skipping article %s (%s) from feed %s as it is too old.'% + (title, t, self.title)) d = item.get('date', '') article.formatted_date = d From 8b73bb52e8d551538d0c0e55e7b91b6b16f69977 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 13 Sep 2010 16:42:22 -0600 Subject: [PATCH 20/23] Fix #6802 (Sovos E Reader Not Recognised / Floppy Drive Activation) --- src/calibre/customize/builtins.py | 3 ++- src/calibre/devices/teclast/driver.py | 11 +++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 4c87236e71..68df832048 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -459,7 +459,7 @@ from calibre.devices.iriver.driver import IRIVER_STORY from calibre.devices.binatone.driver import README from calibre.devices.hanvon.driver import N516, EB511, ALEX, AZBOOKA, THEBOOK from calibre.devices.edge.driver import EDGE -from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS +from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS, SOVOS from calibre.devices.sne.driver import SNE from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, GEMEI from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG @@ -557,6 +557,7 @@ plugins += [ TECLAST_K3, NEWSMY, IPAPYRUS, + SOVOS, EDGE, SNE, ALEX, diff --git a/src/calibre/devices/teclast/driver.py b/src/calibre/devices/teclast/driver.py index 0c60a367cf..2055ff9306 100644 --- a/src/calibre/devices/teclast/driver.py +++ b/src/calibre/devices/teclast/driver.py @@ -52,3 +52,14 @@ class IPAPYRUS(TECLAST_K3): VENDOR_NAME = 'E_READER' WINDOWS_MAIN_MEM = '' +class SOVOS(TECLAST_K3): + + name = 'Sovos device interface' + gui_name = 'Sovos' + description = _('Communicate with the Sovos reader.') + + FORMATS = ['epub', 'fb2', 'pdf', 'txt'] + + VENDOR_NAME = 'RK28XX' + WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'USB-MSC' + From fb053fe3f37d531a170bb2a1d67ccf70ea030351 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 13 Sep 2010 16:58:09 -0600 Subject: [PATCH 21/23] Fix #6773 (Slightly broken CHM file) --- src/calibre/ebooks/chm/reader.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/chm/reader.py b/src/calibre/ebooks/chm/reader.py index 67a2d36607..831c16bf6a 100644 --- a/src/calibre/ebooks/chm/reader.py +++ b/src/calibre/ebooks/chm/reader.py @@ -132,7 +132,11 @@ class CHMReader(CHMFile): for path in self.Contents(): lpath = os.path.join(output_dir, path) self._ensure_dir(lpath) - data = self.GetFile(path) + try: + data = self.GetFile(path) + except: + self.log.exception('Failed to extract %s from CHM, ignoring'%path) + continue if lpath.find(';') != -1: # fix file names with ";" at the end, see _reformat() lpath = lpath.split(';')[0] From ba5de1c92d797abc1f82782c7e15bd61dfa387c5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 13 Sep 2010 18:18:32 -0600 Subject: [PATCH 22/23] Conversion pipeline: When setting margins on explicitly set padding to 0 to override and existing padding in the input document --- src/calibre/ebooks/oeb/transforms/flatcss.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/calibre/ebooks/oeb/transforms/flatcss.py b/src/calibre/ebooks/oeb/transforms/flatcss.py index f48bdb9934..ffdc641d1e 100644 --- a/src/calibre/ebooks/oeb/transforms/flatcss.py +++ b/src/calibre/ebooks/oeb/transforms/flatcss.py @@ -138,6 +138,7 @@ class CSSFlattener(object): float(self.context.margin_left)) bs.append('margin-right : %fpt'%\ float(self.context.margin_right)) + bs.extend(['padding-left: 0pt', 'padding-right: 0pt']) if self.context.change_justification != 'original': bs.append('text-align: '+ self.context.change_justification) body.set('style', '; '.join(bs)) From c5063b8633506f3b661d3e3dcc84d7ec68e74345 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 13 Sep 2010 18:26:51 -0600 Subject: [PATCH 23/23] Fix #6804 (Timeout error when browsing content server via browser) --- resources/content_server/gui.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resources/content_server/gui.js b/resources/content_server/gui.js index 631fb8b617..d0fb49cc8e 100644 --- a/resources/content_server/gui.js +++ b/resources/content_server/gui.js @@ -26,7 +26,7 @@ var current_library_request = null; ////////////////////////////// GET BOOK LIST ////////////////////////////// -var LIBRARY_FETCH_TIMEOUT = 30000; // milliseconds +var LIBRARY_FETCH_TIMEOUT = 5*60000; // milliseconds function create_table_headers() { var thead = $('table#book_list thead tr');