diff --git a/recipes/icons/le_monde_diplomatique_fr.png b/recipes/icons/le_monde_diplomatique_fr.png new file mode 100644 index 0000000000..ae4547977f Binary files /dev/null and b/recipes/icons/le_monde_diplomatique_fr.png differ diff --git a/recipes/le_monde_diplomatique_fr.recipe b/recipes/le_monde_diplomatique_fr.recipe new file mode 100644 index 0000000000..f7c3b30fa0 --- /dev/null +++ b/recipes/le_monde_diplomatique_fr.recipe @@ -0,0 +1,111 @@ +# vim:fileencoding=utf-8 +from __future__ import unicode_literals + +__license__ = 'GPL v3' +__copyright__ = '2013' +''' +monde-diplomatique.fr +''' + +import re +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.web.feeds import feeds_from_index + +class LeMondeDiplomatiqueSiteWeb(BasicNewsRecipe): + title = u'Le Monde diplomatique.fr' + __author__ = 'Gaëtan Lehmann' + description = "Le Monde diplomatique est un mensuel français d’information et d’opinion à la ligne éditoriale nettement engagée en faveur d'une gauche de rupture avec le capitalisme. Il aborde de nombreux sujets — géopolitique, relations internationales, économie, questions sociales, écologie, culture, médias, …" # noqa + oldest_article = 7 + max_articles_per_feed = 100 + auto_cleanup = True + publisher = 'monde-diplomatique.fr' + category = 'news, France, world' + language = 'fr' + masthead_url = 'http://www.monde-diplomatique.fr/squelettes/images/logotyfa.png' + timefmt = ' [%d %b %Y]' + no_stylesheets = True + + feeds = [(u'Blogs', u'http://blog.mondediplo.net/spip.php?page=backend'), (u'Archives', u'http://www.monde-diplomatique.fr/rss/')] + + preprocess_regexps = [ + (re.compile(r'(.*) - Les blogs du Diplo'), lambda m: '' + m.group(1) + ''), + (re.compile(r'

(.*) - Les blogs du Diplo

'), lambda m: '

' + m.group(1) + '

'), + (re.compile(r'(.*) \(Le Monde diplomatique\)'), lambda m: '' + m.group(1) + ''), + (re.compile(r'

(.*) \(Le Monde diplomatique\)

'), lambda m: '

' + m.group(1) + '

'), + (re.compile(r'

Grand format

'), lambda m: '')] + + remove_tags = [dict(name='div', attrs={'class':'voiraussi liste'}), + dict(name='ul', attrs={'class':'hermetique carto hombre_demi_inverse'}), + dict(name='a', attrs={'class':'tousles'}), + dict(name='h3', attrs={'class':'cat'}), + dict(name='div', attrs={'class':'logodiplo'}), + dict(name='img', attrs={'class':'spip_logos'}), + dict(name='p', attrs={'id':'hierarchie'}), + dict(name='div', attrs={'class':'espace'})] + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + ,'linearize_tables': True + } + + remove_empty_feeds = True + + filterDuplicates = True + + # don't use parse_index - we need it to send an exception so we can mix + # feed and parse_index results in parse_feeds + def parse_index_valise(self): + articles = [] + soup = self.index_to_soup('http://www.monde-diplomatique.fr/carnet/') + cnt = soup.find('ul',attrs={'class':'hermetique liste'}) + for item in cnt.findAll('li'): + description = '' + feed_link = item.find('a') + desc = item.find('div',attrs={'class':'intro'}) + date = item.find('div',attrs={'class':'dates_auteurs'}) + if desc: + description = desc.string + if feed_link and feed_link.has_key('href'): + url = 'http://www.monde-diplomatique.fr' + feed_link['href'] + title = self.tag_to_string(feed_link) + articles.append({ + 'title' :title + ,'date' :date.string.strip() + ,'url' :url + ,'description':description + }) + return [("La valise diplomatique", articles)] + + def parse_index_cartes(self): + articles = [] + soup = self.index_to_soup('http://www.monde-diplomatique.fr/cartes/') + cnt = soup.find('div',attrs={'class':'decale hermetique'}) + for item in cnt.findAll('div',attrs={'class':re.compile('grid_3 filet hombre_demi')}): + feed_link = item.find('a',attrs={'class':'couve'}) + h3 = item.find('h3') + authorAndDate = item.find('div',attrs={'class':'dates_auteurs'}) + author, date = authorAndDate.string.strip().split(', ') + if feed_link and feed_link.has_key('href'): + url = 'http://www.monde-diplomatique.fr' + feed_link['href'] + title = self.tag_to_string(h3) + articles.append({ + 'title' :title + ,'date' :date + ,'url' :url + ,'description': author + }) + return [("Cartes", articles)] + + def parse_feeds(self): + feeds = BasicNewsRecipe.parse_feeds(self) + valise = feeds_from_index(self.parse_index_valise(), oldest_article=self.oldest_article, + max_articles_per_feed=self.max_articles_per_feed, + log=self.log) + cartes = feeds_from_index(self.parse_index_cartes(), oldest_article=self.oldest_article, + max_articles_per_feed=self.max_articles_per_feed, + log=self.log) + feeds = valise + feeds + cartes + return feeds diff --git a/src/calibre/db/backend.py b/src/calibre/db/backend.py index 8ebdc4a154..0ebc9679b7 100644 --- a/src/calibre/db/backend.py +++ b/src/calibre/db/backend.py @@ -8,7 +8,7 @@ __copyright__ = '2011, Kovid Goyal ' __docformat__ = 'restructuredtext en' # Imports {{{ -import os, shutil, uuid, json, glob, time +import os, shutil, uuid, json, glob, time, cPickle from functools import partial import apsw @@ -1216,5 +1216,27 @@ class DB(object): def get_ids_for_custom_book_data(self, name): return frozenset(r[0] for r in self.conn.execute('SELECT book FROM books_plugin_data WHERE name=?', (name,))) + def conversion_options(self, book_id, fmt): + for (data,) in self.conn.get('SELECT data FROM conversion_options WHERE book=? AND format=?', (book_id, fmt.upper())): + if data: + return cPickle.loads(bytes(data)) + + def has_conversion_options(self, ids, fmt='PIPE'): + ids = frozenset(ids) + self.conn.execute('DROP TABLE IF EXISTS conversion_options_temp; CREATE TEMP TABLE conversion_options_temp (id INTEGER PRIMARY KEY);') + self.conn.executemany('INSERT INTO conversion_options_temp VALUES (?)', [(x,) for x in ids]) + for (book_id,) in self.conn.get( + 'SELECT book FROM conversion_options WHERE format=? AND book IN (SELECT id FROM conversion_options_temp)', (fmt.upper(),)): + return True + return False + + def delete_conversion_options(self, book_ids, fmt): + self.conn.executemany('DELETE FROM conversion_options WHERE book=? AND format=?', + [(book_id, fmt.upper()) for book_id in book_ids]) + + def set_conversion_options(self, options, fmt): + options = [(book_id, fmt.upper(), buffer(cPickle.dumps(data, -1))) for book_id, data in options.iteritems()] + self.conn.executemany('INSERT OR REPLACE INTO conversion_options(book,format,data) VALUES (?,?,?)', options) + # }}} diff --git a/src/calibre/db/cache.py b/src/calibre/db/cache.py index bc8f1024f5..119e166c49 100644 --- a/src/calibre/db/cache.py +++ b/src/calibre/db/cache.py @@ -265,8 +265,10 @@ class Cache(object): for name, field in self.fields.iteritems(): if name[0] == '#' and name.endswith('_index'): field.series_field = self.fields[name[:-len('_index')]] + self.fields[name[:-len('_index')]].index_field = field elif name == 'series_index': field.series_field = self.fields['series'] + self.fields['series'].index_field = field elif name == 'authors': field.author_sort_field = self.fields['author_sort'] elif name == 'title': @@ -1179,6 +1181,18 @@ class Cache(object): else: table.remove_books(book_ids, self.backend) + @write_api + def remove_items(self, field, item_ids): + ''' Delete all items in the specified field with the specified ids. Returns the set of affected book ids. ''' + field = self.fields[field] + affected_books = field.table.remove_items(item_ids, self.backend) + if affected_books: + if hasattr(field, 'index_field'): + self._set_field(field.index_field.name, {bid:1.0 for bid in affected_books}) + else: + self._mark_as_dirty(affected_books) + return affected_books + @write_api def add_custom_book_data(self, name, val_map, delete_first=False): ''' Add data for name where val_map is a map of book_ids to values. If @@ -1208,6 +1222,22 @@ class Cache(object): ''' Return the set of book ids for which name has data. ''' return self.backend.get_ids_for_custom_book_data(name) + @read_api + def conversion_options(self, book_id, fmt='PIPE'): + return self.backend.conversion_options(book_id, fmt) + + @read_api + def has_conversion_options(self, ids, fmt='PIPE'): + return self.backend.has_conversion_options(ids, fmt) + + @write_api + def delete_conversion_options(self, book_ids, fmt='PIPE'): + return self.backend.delete_conversion_options(book_ids, fmt) + + @write_api + def set_conversion_options(self, options, fmt='PIPE'): + ''' options must be a map of the form {book_id:conversion_options} ''' + return self.backend.set_conversion_options(options, fmt) # }}} diff --git a/src/calibre/db/legacy.py b/src/calibre/db/legacy.py index 8c5fa5bd31..6128d6a09a 100644 --- a/src/calibre/db/legacy.py +++ b/src/calibre/db/legacy.py @@ -98,6 +98,13 @@ class LibraryDatabase(object): return self.new_api.get_item_name(field, item_id) return func setattr(self, '%s_name' % field, MT(getter(field))) + for field in ('publisher', 'series', 'tag'): + def getter(field): + fname = 'tags' if field == 'tag' else field + def func(self, item_id): + self.new_api.remove_items(fname, (item_id,)) + return func + setattr(self, 'delete_%s_using_id' % field, MT(getter(field))) # Legacy field API for func in ( @@ -383,6 +390,18 @@ class LibraryDatabase(object): break return ans + def set_conversion_options(self, book_id, fmt, options): + self.new_api.set_conversion_options({book_id:options}, fmt=fmt) + + def conversion_options(self, book_id, fmt): + return self.new_api.conversion_options(book_id, fmt=fmt) + + def has_conversion_options(self, ids, format='PIPE'): + return self.new_api.has_conversion_options(ids, fmt=format) + + def delete_conversion_options(self, book_id, fmt, commit=True): + self.new_api.delete_conversion_options((book_id,), fmt=fmt) + # Private interface {{{ def __iter__(self): for row in self.data.iterall(): diff --git a/src/calibre/db/tables.py b/src/calibre/db/tables.py index 19c4ade10c..7715f6abef 100644 --- a/src/calibre/db/tables.py +++ b/src/calibre/db/tables.py @@ -204,6 +204,21 @@ class ManyToOneTable(Table): [(x,) for x in clean]) return clean + def remove_items(self, item_ids, db): + affected_books = set() + for item_id in item_ids: + val = self.id_map.pop(item_id, null) + if val is null: + continue + book_ids = self.col_book_map.pop(item_id, set()) + for book_id in book_ids: + self.book_col_map.pop(book_id, None) + affected_books.update(book_ids) + item_ids = tuple((x,) for x in item_ids) + db.conn.executemany('DELETE FROM {0} WHERE {1}=?'.format(self.link_table, self.metadata['link_column']), item_ids) + db.conn.executemany('DELETE FROM {0} WHERE id=?'.format(self.metadata['table']), item_ids) + return affected_books + class ManyToManyTable(ManyToOneTable): ''' @@ -250,6 +265,21 @@ class ManyToManyTable(ManyToOneTable): [(x,) for x in clean]) return clean + def remove_items(self, item_ids, db): + affected_books = set() + for item_id in item_ids: + val = self.id_map.pop(item_id, null) + if val is null: + continue + book_ids = self.col_book_map.pop(item_id, set()) + for book_id in book_ids: + self.book_col_map[book_id] = tuple(x for x in self.book_col_map.get(book_id, ()) if x != item_id) + affected_books.update(book_ids) + item_ids = tuple((x,) for x in item_ids) + db.conn.executemany('DELETE FROM {0} WHERE {1}=?'.format(self.link_table, self.metadata['link_column']), item_ids) + db.conn.executemany('DELETE FROM {0} WHERE id=?'.format(self.metadata['table']), item_ids) + return affected_books + class AuthorsTable(ManyToManyTable): def read_id_maps(self, db): @@ -274,6 +304,9 @@ class AuthorsTable(ManyToManyTable): self.asort_map.pop(item_id, None) return clean + def remove_items(self, item_ids, db): + raise ValueError('Direct removal of authors is not allowed') + class FormatsTable(ManyToManyTable): do_clean_on_remove = False @@ -331,6 +364,9 @@ class FormatsTable(ManyToManyTable): return {book_id:zero_max(book_id) for book_id in formats_map} + def remove_items(self, item_ids, db): + raise NotImplementedError('Cannot delete a format directly') + def update_fmt(self, book_id, fmt, fname, size, db): fmts = list(self.book_col_map.get(book_id, [])) try: @@ -381,4 +417,6 @@ class IdentifiersTable(ManyToManyTable): clean.add(item_id) return clean + def remove_items(self, item_ids, db): + raise NotImplementedError('Direct deletion of identifiers is not implemented') diff --git a/src/calibre/db/tests/legacy.py b/src/calibre/db/tests/legacy.py index 2f5c879532..76deaea792 100644 --- a/src/calibre/db/tests/legacy.py +++ b/src/calibre/db/tests/legacy.py @@ -191,6 +191,52 @@ class LegacyTest(BaseTest): db.close() # }}} + def test_legacy_conversion_options(self): # {{{ + 'Test conversion options API' + ndb = self.init_legacy() + db = self.init_old() + all_ids = ndb.new_api.all_book_ids() + op1, op2 = {'xx':'yy'}, {'yy':'zz'} + for x in ( + ('has_conversion_options', all_ids), + ('conversion_options', 1, 'PIPE'), + ('set_conversion_options', 1, 'PIPE', op1), + ('has_conversion_options', all_ids), + ('conversion_options', 1, 'PIPE'), + ('delete_conversion_options', 1, 'PIPE'), + ('has_conversion_options', all_ids), + ): + meth, args = x[0], x[1:] + self.assertEqual((getattr(db, meth)(*args)), (getattr(ndb, meth)(*args)), + 'The method: %s() returned different results for argument %s' % (meth, args)) + db.close() + # }}} + + def test_legacy_delete_using(self): # {{{ + 'Test delete_using() API' + ndb = self.init_legacy() + db = self.init_old() + cache = ndb.new_api + tmap = cache.get_id_map('tags') + t = next(tmap.iterkeys()) + pmap = cache.get_id_map('publisher') + p = next(pmap.iterkeys()) + for x in ( + ('delete_tag_using_id', t), + ('delete_publisher_using_id', p), + (db.refresh,), + ('all_tag_names',), ('tags', 0), ('tags', 1), ('tags', 2), + ('all_publisher_names',), ('publisher', 0), ('publisher', 1), ('publisher', 2), + ): + meth, args = x[0], x[1:] + if callable(meth): + meth(*args) + else: + self.assertEqual((getattr(db, meth)(*args)), (getattr(ndb, meth)(*args)), + 'The method: %s() returned different results for argument %s' % (meth, args)) + db.close() + # }}} + def test_legacy_adding_books(self): # {{{ 'Test various adding books methods' from calibre.ebooks.metadata.book.base import Metadata @@ -269,7 +315,10 @@ class LegacyTest(BaseTest): 'books_in_old_database', # unused # Internal API - 'clean_user_categories', 'cleanup_tags', 'books_list_filter', + 'clean_user_categories', 'cleanup_tags', 'books_list_filter', 'conn', 'connect', 'construct_file_name', + 'construct_path_name', 'clear_dirtied', 'commit_dirty_cache', 'initialize_database', 'initialize_dynamic', + 'run_import_plugins', 'vacuum', 'set_path', 'row', 'row_factory', 'rows', 'rmtree', 'series_index_pat', + 'import_old_database', 'dirtied_lock', 'dirtied_cache', 'dirty_queue_length', 'dirty_books_referencing', } SKIP_ARGSPEC = { '__init__', 'get_next_series_num_for', 'has_book', 'author_sort_from_authors', @@ -280,7 +329,7 @@ class LegacyTest(BaseTest): try: total = 0 for attr in dir(db): - if attr in SKIP_ATTRS: + if attr in SKIP_ATTRS or attr.startswith('upgrade_version'): continue total += 1 if not hasattr(ndb, attr): @@ -302,7 +351,7 @@ class LegacyTest(BaseTest): if missing: pc = len(missing)/total - raise AssertionError('{0:.1%} of API ({2} attrs) are missing. For example: {1}'.format(pc, ', '.join(missing[:5]), len(missing))) + raise AssertionError('{0:.1%} of API ({2} attrs) are missing: {1}'.format(pc, ', '.join(missing), len(missing))) # }}} diff --git a/src/calibre/db/tests/writing.py b/src/calibre/db/tests/writing.py index cb525900ee..c4918b4c4b 100644 --- a/src/calibre/db/tests/writing.py +++ b/src/calibre/db/tests/writing.py @@ -419,3 +419,58 @@ class WritingTest(BaseTest): # }}} + def test_conversion_options(self): # {{{ + ' Test saving of conversion options ' + cache = self.init_cache() + all_ids = cache.all_book_ids() + self.assertFalse(cache.has_conversion_options(all_ids)) + self.assertIsNone(cache.conversion_options(1)) + op1, op2 = {'xx':'yy'}, {'yy':'zz'} + cache.set_conversion_options({1:op1, 2:op2}) + self.assertTrue(cache.has_conversion_options(all_ids)) + self.assertEqual(cache.conversion_options(1), op1) + self.assertEqual(cache.conversion_options(2), op2) + cache.set_conversion_options({1:op2}) + self.assertEqual(cache.conversion_options(1), op2) + cache.delete_conversion_options(all_ids) + self.assertFalse(cache.has_conversion_options(all_ids)) + # }}} + + def test_remove_items(self): # {{{ + ' Test removal of many-(many,one) items ' + cache = self.init_cache() + tmap = cache.get_id_map('tags') + self.assertEqual(cache.remove_items('tags', tmap), {1, 2}) + tmap = cache.get_id_map('#tags') + t = {v:k for k, v in tmap.iteritems()}['My Tag Two'] + self.assertEqual(cache.remove_items('#tags', (t,)), {1, 2}) + + smap = cache.get_id_map('series') + self.assertEqual(cache.remove_items('series', smap), {1, 2}) + smap = cache.get_id_map('#series') + s = {v:k for k, v in smap.iteritems()}['My Series Two'] + self.assertEqual(cache.remove_items('#series', (s,)), {1}) + + for c in (cache, self.init_cache()): + self.assertFalse(c.get_id_map('tags')) + self.assertFalse(c.all_field_names('tags')) + for bid in c.all_book_ids(): + self.assertFalse(c.field_for('tags', bid)) + + self.assertEqual(len(c.get_id_map('#tags')), 1) + self.assertEqual(c.all_field_names('#tags'), {'My Tag One'}) + for bid in c.all_book_ids(): + self.assertIn(c.field_for('#tags', bid), ((), ('My Tag One',))) + + for bid in (1, 2): + self.assertEqual(c.field_for('series_index', bid), 1.0) + self.assertFalse(c.get_id_map('series')) + self.assertFalse(c.all_field_names('series')) + for bid in c.all_book_ids(): + self.assertFalse(c.field_for('series', bid)) + + self.assertEqual(c.field_for('series_index', 1), 1.0) + self.assertEqual(c.all_field_names('#series'), {'My Series One'}) + for bid in c.all_book_ids(): + self.assertIn(c.field_for('#series', bid), (None, 'My Series One')) + # }}} diff --git a/src/calibre/gui2/store/stores/amazon_plugin.py b/src/calibre/gui2/store/stores/amazon_plugin.py index 33f8f9b048..82e83401e8 100644 --- a/src/calibre/gui2/store/stores/amazon_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_plugin.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import (unicode_literals, division, absolute_import, print_function) -store_version = 3 # Needed for dynamic plugin loading +store_version = 4 # Needed for dynamic plugin loading __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' @@ -126,15 +126,47 @@ class AmazonKindleStore(StorePlugin): counter = max_results with closing(br.open(url, timeout=timeout)) as f: - doc = html.fromstring(f.read().decode('latin-1', 'replace')) + doc = html.fromstring(f.read()) - data_xpath = '//div[contains(@class, "prod")]' - format_xpath = './/ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()' - asin_xpath = '@name' - cover_xpath = './/img[@class="productImage"]/@src' - title_xpath = './/h3[@class="newaps"]/a//text()' - author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()' - price_xpath = './/ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()' + if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'): + data_xpath = '//div[contains(@class, "prod")]' + format_xpath = ( + './/ul[contains(@class, "rsltGridList")]' + '//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()') + asin_xpath = '@name' + cover_xpath = './/img[@class="productImage"]/@src' + title_xpath = './/h3[@class="newaps"]/a//text()' + author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()' + price_xpath = ( + './/ul[contains(@class, "rsltGridList")]' + '//span[contains(@class, "lrg") and contains(@class, "bld")]/text()') + elif doc.xpath('//div[@id = "atfResults" and contains(@class, "ilresults")]'): + data_xpath = '//li[(@class="ilo")]' + format_xpath = ( + './/ul[contains(@class, "rsltGridList")]' + '//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()') + asin_xpath = '@name' + cover_xpath = './div[@class = "ilf"]/a/img[contains(@class, "ilo")]/@src' + title_xpath = './/h3[@class="newaps"]/a//text()' + author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()' + # Results can be in a grid (table) or a column + price_xpath = ( + './/ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]' + '//span[contains(@class, "lrg") and contains(@class, "bld")]/text()') + elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'): + data_xpath = '//div[contains(@class, "prod")]' + format_xpath = ( + './/ul[contains(@class, "rsltL")]' + '//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()') + asin_xpath = '@name' + cover_xpath = './/img[@class="productImage"]/@src' + title_xpath = './/h3[@class="newaps"]/a//text()' + author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()' + price_xpath = ( + './/ul[contains(@class, "rsltL")]' + '//span[contains(@class, "lrg") and contains(@class, "bld")]/text()') + else: + return for data in doc.xpath(data_xpath): if counter <= 0: