mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
Merge branch 'kovidgoyal/master'
This commit is contained in:
commit
b3dbda5492
BIN
recipes/icons/le_monde_diplomatique_fr.png
Normal file
BIN
recipes/icons/le_monde_diplomatique_fr.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 446 B |
111
recipes/le_monde_diplomatique_fr.recipe
Normal file
111
recipes/le_monde_diplomatique_fr.recipe
Normal file
@ -0,0 +1,111 @@
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013'
|
||||
'''
|
||||
monde-diplomatique.fr
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.web.feeds import feeds_from_index
|
||||
|
||||
class LeMondeDiplomatiqueSiteWeb(BasicNewsRecipe):
|
||||
title = u'Le Monde diplomatique.fr'
|
||||
__author__ = 'Gaëtan Lehmann'
|
||||
description = "Le Monde diplomatique est un mensuel français d’information et d’opinion à la ligne éditoriale nettement engagée en faveur d'une gauche de rupture avec le capitalisme. Il aborde de nombreux sujets — géopolitique, relations internationales, économie, questions sociales, écologie, culture, médias, …" # noqa
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
publisher = 'monde-diplomatique.fr'
|
||||
category = 'news, France, world'
|
||||
language = 'fr'
|
||||
masthead_url = 'http://www.monde-diplomatique.fr/squelettes/images/logotyfa.png'
|
||||
timefmt = ' [%d %b %Y]'
|
||||
no_stylesheets = True
|
||||
|
||||
feeds = [(u'Blogs', u'http://blog.mondediplo.net/spip.php?page=backend'), (u'Archives', u'http://www.monde-diplomatique.fr/rss/')]
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<title>(.*) - Les blogs du Diplo</title>'), lambda m: '<title>' + m.group(1) + '</title>'),
|
||||
(re.compile(r'<h2>(.*) - Les blogs du Diplo</h2>'), lambda m: '<h2>' + m.group(1) + '</h2>'),
|
||||
(re.compile(r'<title>(.*) \(Le Monde diplomatique\)</title>'), lambda m: '<title>' + m.group(1) + '</title>'),
|
||||
(re.compile(r'<h2>(.*) \(Le Monde diplomatique\)</h2>'), lambda m: '<h2>' + m.group(1) + '</h2>'),
|
||||
(re.compile(r'<h3>Grand format</h3>'), lambda m: '')]
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class':'voiraussi liste'}),
|
||||
dict(name='ul', attrs={'class':'hermetique carto hombre_demi_inverse'}),
|
||||
dict(name='a', attrs={'class':'tousles'}),
|
||||
dict(name='h3', attrs={'class':'cat'}),
|
||||
dict(name='div', attrs={'class':'logodiplo'}),
|
||||
dict(name='img', attrs={'class':'spip_logos'}),
|
||||
dict(name='p', attrs={'id':'hierarchie'}),
|
||||
dict(name='div', attrs={'class':'espace'})]
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
,'linearize_tables': True
|
||||
}
|
||||
|
||||
remove_empty_feeds = True
|
||||
|
||||
filterDuplicates = True
|
||||
|
||||
# don't use parse_index - we need it to send an exception so we can mix
|
||||
# feed and parse_index results in parse_feeds
|
||||
def parse_index_valise(self):
|
||||
articles = []
|
||||
soup = self.index_to_soup('http://www.monde-diplomatique.fr/carnet/')
|
||||
cnt = soup.find('ul',attrs={'class':'hermetique liste'})
|
||||
for item in cnt.findAll('li'):
|
||||
description = ''
|
||||
feed_link = item.find('a')
|
||||
desc = item.find('div',attrs={'class':'intro'})
|
||||
date = item.find('div',attrs={'class':'dates_auteurs'})
|
||||
if desc:
|
||||
description = desc.string
|
||||
if feed_link and feed_link.has_key('href'):
|
||||
url = 'http://www.monde-diplomatique.fr' + feed_link['href']
|
||||
title = self.tag_to_string(feed_link)
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :date.string.strip()
|
||||
,'url' :url
|
||||
,'description':description
|
||||
})
|
||||
return [("La valise diplomatique", articles)]
|
||||
|
||||
def parse_index_cartes(self):
|
||||
articles = []
|
||||
soup = self.index_to_soup('http://www.monde-diplomatique.fr/cartes/')
|
||||
cnt = soup.find('div',attrs={'class':'decale hermetique'})
|
||||
for item in cnt.findAll('div',attrs={'class':re.compile('grid_3 filet hombre_demi')}):
|
||||
feed_link = item.find('a',attrs={'class':'couve'})
|
||||
h3 = item.find('h3')
|
||||
authorAndDate = item.find('div',attrs={'class':'dates_auteurs'})
|
||||
author, date = authorAndDate.string.strip().split(', ')
|
||||
if feed_link and feed_link.has_key('href'):
|
||||
url = 'http://www.monde-diplomatique.fr' + feed_link['href']
|
||||
title = self.tag_to_string(h3)
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'description': author
|
||||
})
|
||||
return [("Cartes", articles)]
|
||||
|
||||
def parse_feeds(self):
|
||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||
valise = feeds_from_index(self.parse_index_valise(), oldest_article=self.oldest_article,
|
||||
max_articles_per_feed=self.max_articles_per_feed,
|
||||
log=self.log)
|
||||
cartes = feeds_from_index(self.parse_index_cartes(), oldest_article=self.oldest_article,
|
||||
max_articles_per_feed=self.max_articles_per_feed,
|
||||
log=self.log)
|
||||
feeds = valise + feeds + cartes
|
||||
return feeds
|
@ -8,7 +8,7 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
# Imports {{{
|
||||
import os, shutil, uuid, json, glob, time
|
||||
import os, shutil, uuid, json, glob, time, cPickle
|
||||
from functools import partial
|
||||
|
||||
import apsw
|
||||
@ -1216,5 +1216,27 @@ class DB(object):
|
||||
def get_ids_for_custom_book_data(self, name):
|
||||
return frozenset(r[0] for r in self.conn.execute('SELECT book FROM books_plugin_data WHERE name=?', (name,)))
|
||||
|
||||
def conversion_options(self, book_id, fmt):
|
||||
for (data,) in self.conn.get('SELECT data FROM conversion_options WHERE book=? AND format=?', (book_id, fmt.upper())):
|
||||
if data:
|
||||
return cPickle.loads(bytes(data))
|
||||
|
||||
def has_conversion_options(self, ids, fmt='PIPE'):
|
||||
ids = frozenset(ids)
|
||||
self.conn.execute('DROP TABLE IF EXISTS conversion_options_temp; CREATE TEMP TABLE conversion_options_temp (id INTEGER PRIMARY KEY);')
|
||||
self.conn.executemany('INSERT INTO conversion_options_temp VALUES (?)', [(x,) for x in ids])
|
||||
for (book_id,) in self.conn.get(
|
||||
'SELECT book FROM conversion_options WHERE format=? AND book IN (SELECT id FROM conversion_options_temp)', (fmt.upper(),)):
|
||||
return True
|
||||
return False
|
||||
|
||||
def delete_conversion_options(self, book_ids, fmt):
|
||||
self.conn.executemany('DELETE FROM conversion_options WHERE book=? AND format=?',
|
||||
[(book_id, fmt.upper()) for book_id in book_ids])
|
||||
|
||||
def set_conversion_options(self, options, fmt):
|
||||
options = [(book_id, fmt.upper(), buffer(cPickle.dumps(data, -1))) for book_id, data in options.iteritems()]
|
||||
self.conn.executemany('INSERT OR REPLACE INTO conversion_options(book,format,data) VALUES (?,?,?)', options)
|
||||
|
||||
# }}}
|
||||
|
||||
|
@ -265,8 +265,10 @@ class Cache(object):
|
||||
for name, field in self.fields.iteritems():
|
||||
if name[0] == '#' and name.endswith('_index'):
|
||||
field.series_field = self.fields[name[:-len('_index')]]
|
||||
self.fields[name[:-len('_index')]].index_field = field
|
||||
elif name == 'series_index':
|
||||
field.series_field = self.fields['series']
|
||||
self.fields['series'].index_field = field
|
||||
elif name == 'authors':
|
||||
field.author_sort_field = self.fields['author_sort']
|
||||
elif name == 'title':
|
||||
@ -1179,6 +1181,18 @@ class Cache(object):
|
||||
else:
|
||||
table.remove_books(book_ids, self.backend)
|
||||
|
||||
@write_api
|
||||
def remove_items(self, field, item_ids):
|
||||
''' Delete all items in the specified field with the specified ids. Returns the set of affected book ids. '''
|
||||
field = self.fields[field]
|
||||
affected_books = field.table.remove_items(item_ids, self.backend)
|
||||
if affected_books:
|
||||
if hasattr(field, 'index_field'):
|
||||
self._set_field(field.index_field.name, {bid:1.0 for bid in affected_books})
|
||||
else:
|
||||
self._mark_as_dirty(affected_books)
|
||||
return affected_books
|
||||
|
||||
@write_api
|
||||
def add_custom_book_data(self, name, val_map, delete_first=False):
|
||||
''' Add data for name where val_map is a map of book_ids to values. If
|
||||
@ -1208,6 +1222,22 @@ class Cache(object):
|
||||
''' Return the set of book ids for which name has data. '''
|
||||
return self.backend.get_ids_for_custom_book_data(name)
|
||||
|
||||
@read_api
|
||||
def conversion_options(self, book_id, fmt='PIPE'):
|
||||
return self.backend.conversion_options(book_id, fmt)
|
||||
|
||||
@read_api
|
||||
def has_conversion_options(self, ids, fmt='PIPE'):
|
||||
return self.backend.has_conversion_options(ids, fmt)
|
||||
|
||||
@write_api
|
||||
def delete_conversion_options(self, book_ids, fmt='PIPE'):
|
||||
return self.backend.delete_conversion_options(book_ids, fmt)
|
||||
|
||||
@write_api
|
||||
def set_conversion_options(self, options, fmt='PIPE'):
|
||||
''' options must be a map of the form {book_id:conversion_options} '''
|
||||
return self.backend.set_conversion_options(options, fmt)
|
||||
|
||||
# }}}
|
||||
|
||||
|
@ -98,6 +98,13 @@ class LibraryDatabase(object):
|
||||
return self.new_api.get_item_name(field, item_id)
|
||||
return func
|
||||
setattr(self, '%s_name' % field, MT(getter(field)))
|
||||
for field in ('publisher', 'series', 'tag'):
|
||||
def getter(field):
|
||||
fname = 'tags' if field == 'tag' else field
|
||||
def func(self, item_id):
|
||||
self.new_api.remove_items(fname, (item_id,))
|
||||
return func
|
||||
setattr(self, 'delete_%s_using_id' % field, MT(getter(field)))
|
||||
|
||||
# Legacy field API
|
||||
for func in (
|
||||
@ -383,6 +390,18 @@ class LibraryDatabase(object):
|
||||
break
|
||||
return ans
|
||||
|
||||
def set_conversion_options(self, book_id, fmt, options):
|
||||
self.new_api.set_conversion_options({book_id:options}, fmt=fmt)
|
||||
|
||||
def conversion_options(self, book_id, fmt):
|
||||
return self.new_api.conversion_options(book_id, fmt=fmt)
|
||||
|
||||
def has_conversion_options(self, ids, format='PIPE'):
|
||||
return self.new_api.has_conversion_options(ids, fmt=format)
|
||||
|
||||
def delete_conversion_options(self, book_id, fmt, commit=True):
|
||||
self.new_api.delete_conversion_options((book_id,), fmt=fmt)
|
||||
|
||||
# Private interface {{{
|
||||
def __iter__(self):
|
||||
for row in self.data.iterall():
|
||||
|
@ -204,6 +204,21 @@ class ManyToOneTable(Table):
|
||||
[(x,) for x in clean])
|
||||
return clean
|
||||
|
||||
def remove_items(self, item_ids, db):
|
||||
affected_books = set()
|
||||
for item_id in item_ids:
|
||||
val = self.id_map.pop(item_id, null)
|
||||
if val is null:
|
||||
continue
|
||||
book_ids = self.col_book_map.pop(item_id, set())
|
||||
for book_id in book_ids:
|
||||
self.book_col_map.pop(book_id, None)
|
||||
affected_books.update(book_ids)
|
||||
item_ids = tuple((x,) for x in item_ids)
|
||||
db.conn.executemany('DELETE FROM {0} WHERE {1}=?'.format(self.link_table, self.metadata['link_column']), item_ids)
|
||||
db.conn.executemany('DELETE FROM {0} WHERE id=?'.format(self.metadata['table']), item_ids)
|
||||
return affected_books
|
||||
|
||||
class ManyToManyTable(ManyToOneTable):
|
||||
|
||||
'''
|
||||
@ -250,6 +265,21 @@ class ManyToManyTable(ManyToOneTable):
|
||||
[(x,) for x in clean])
|
||||
return clean
|
||||
|
||||
def remove_items(self, item_ids, db):
|
||||
affected_books = set()
|
||||
for item_id in item_ids:
|
||||
val = self.id_map.pop(item_id, null)
|
||||
if val is null:
|
||||
continue
|
||||
book_ids = self.col_book_map.pop(item_id, set())
|
||||
for book_id in book_ids:
|
||||
self.book_col_map[book_id] = tuple(x for x in self.book_col_map.get(book_id, ()) if x != item_id)
|
||||
affected_books.update(book_ids)
|
||||
item_ids = tuple((x,) for x in item_ids)
|
||||
db.conn.executemany('DELETE FROM {0} WHERE {1}=?'.format(self.link_table, self.metadata['link_column']), item_ids)
|
||||
db.conn.executemany('DELETE FROM {0} WHERE id=?'.format(self.metadata['table']), item_ids)
|
||||
return affected_books
|
||||
|
||||
class AuthorsTable(ManyToManyTable):
|
||||
|
||||
def read_id_maps(self, db):
|
||||
@ -274,6 +304,9 @@ class AuthorsTable(ManyToManyTable):
|
||||
self.asort_map.pop(item_id, None)
|
||||
return clean
|
||||
|
||||
def remove_items(self, item_ids, db):
|
||||
raise ValueError('Direct removal of authors is not allowed')
|
||||
|
||||
class FormatsTable(ManyToManyTable):
|
||||
|
||||
do_clean_on_remove = False
|
||||
@ -331,6 +364,9 @@ class FormatsTable(ManyToManyTable):
|
||||
|
||||
return {book_id:zero_max(book_id) for book_id in formats_map}
|
||||
|
||||
def remove_items(self, item_ids, db):
|
||||
raise NotImplementedError('Cannot delete a format directly')
|
||||
|
||||
def update_fmt(self, book_id, fmt, fname, size, db):
|
||||
fmts = list(self.book_col_map.get(book_id, []))
|
||||
try:
|
||||
@ -381,4 +417,6 @@ class IdentifiersTable(ManyToManyTable):
|
||||
clean.add(item_id)
|
||||
return clean
|
||||
|
||||
def remove_items(self, item_ids, db):
|
||||
raise NotImplementedError('Direct deletion of identifiers is not implemented')
|
||||
|
||||
|
@ -191,6 +191,52 @@ class LegacyTest(BaseTest):
|
||||
db.close()
|
||||
# }}}
|
||||
|
||||
def test_legacy_conversion_options(self): # {{{
|
||||
'Test conversion options API'
|
||||
ndb = self.init_legacy()
|
||||
db = self.init_old()
|
||||
all_ids = ndb.new_api.all_book_ids()
|
||||
op1, op2 = {'xx':'yy'}, {'yy':'zz'}
|
||||
for x in (
|
||||
('has_conversion_options', all_ids),
|
||||
('conversion_options', 1, 'PIPE'),
|
||||
('set_conversion_options', 1, 'PIPE', op1),
|
||||
('has_conversion_options', all_ids),
|
||||
('conversion_options', 1, 'PIPE'),
|
||||
('delete_conversion_options', 1, 'PIPE'),
|
||||
('has_conversion_options', all_ids),
|
||||
):
|
||||
meth, args = x[0], x[1:]
|
||||
self.assertEqual((getattr(db, meth)(*args)), (getattr(ndb, meth)(*args)),
|
||||
'The method: %s() returned different results for argument %s' % (meth, args))
|
||||
db.close()
|
||||
# }}}
|
||||
|
||||
def test_legacy_delete_using(self): # {{{
|
||||
'Test delete_using() API'
|
||||
ndb = self.init_legacy()
|
||||
db = self.init_old()
|
||||
cache = ndb.new_api
|
||||
tmap = cache.get_id_map('tags')
|
||||
t = next(tmap.iterkeys())
|
||||
pmap = cache.get_id_map('publisher')
|
||||
p = next(pmap.iterkeys())
|
||||
for x in (
|
||||
('delete_tag_using_id', t),
|
||||
('delete_publisher_using_id', p),
|
||||
(db.refresh,),
|
||||
('all_tag_names',), ('tags', 0), ('tags', 1), ('tags', 2),
|
||||
('all_publisher_names',), ('publisher', 0), ('publisher', 1), ('publisher', 2),
|
||||
):
|
||||
meth, args = x[0], x[1:]
|
||||
if callable(meth):
|
||||
meth(*args)
|
||||
else:
|
||||
self.assertEqual((getattr(db, meth)(*args)), (getattr(ndb, meth)(*args)),
|
||||
'The method: %s() returned different results for argument %s' % (meth, args))
|
||||
db.close()
|
||||
# }}}
|
||||
|
||||
def test_legacy_adding_books(self): # {{{
|
||||
'Test various adding books methods'
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
@ -269,7 +315,10 @@ class LegacyTest(BaseTest):
|
||||
'books_in_old_database', # unused
|
||||
|
||||
# Internal API
|
||||
'clean_user_categories', 'cleanup_tags', 'books_list_filter',
|
||||
'clean_user_categories', 'cleanup_tags', 'books_list_filter', 'conn', 'connect', 'construct_file_name',
|
||||
'construct_path_name', 'clear_dirtied', 'commit_dirty_cache', 'initialize_database', 'initialize_dynamic',
|
||||
'run_import_plugins', 'vacuum', 'set_path', 'row', 'row_factory', 'rows', 'rmtree', 'series_index_pat',
|
||||
'import_old_database', 'dirtied_lock', 'dirtied_cache', 'dirty_queue_length', 'dirty_books_referencing',
|
||||
}
|
||||
SKIP_ARGSPEC = {
|
||||
'__init__', 'get_next_series_num_for', 'has_book', 'author_sort_from_authors',
|
||||
@ -280,7 +329,7 @@ class LegacyTest(BaseTest):
|
||||
try:
|
||||
total = 0
|
||||
for attr in dir(db):
|
||||
if attr in SKIP_ATTRS:
|
||||
if attr in SKIP_ATTRS or attr.startswith('upgrade_version'):
|
||||
continue
|
||||
total += 1
|
||||
if not hasattr(ndb, attr):
|
||||
@ -302,7 +351,7 @@ class LegacyTest(BaseTest):
|
||||
|
||||
if missing:
|
||||
pc = len(missing)/total
|
||||
raise AssertionError('{0:.1%} of API ({2} attrs) are missing. For example: {1}'.format(pc, ', '.join(missing[:5]), len(missing)))
|
||||
raise AssertionError('{0:.1%} of API ({2} attrs) are missing: {1}'.format(pc, ', '.join(missing), len(missing)))
|
||||
|
||||
# }}}
|
||||
|
||||
|
@ -419,3 +419,58 @@ class WritingTest(BaseTest):
|
||||
|
||||
# }}}
|
||||
|
||||
def test_conversion_options(self): # {{{
|
||||
' Test saving of conversion options '
|
||||
cache = self.init_cache()
|
||||
all_ids = cache.all_book_ids()
|
||||
self.assertFalse(cache.has_conversion_options(all_ids))
|
||||
self.assertIsNone(cache.conversion_options(1))
|
||||
op1, op2 = {'xx':'yy'}, {'yy':'zz'}
|
||||
cache.set_conversion_options({1:op1, 2:op2})
|
||||
self.assertTrue(cache.has_conversion_options(all_ids))
|
||||
self.assertEqual(cache.conversion_options(1), op1)
|
||||
self.assertEqual(cache.conversion_options(2), op2)
|
||||
cache.set_conversion_options({1:op2})
|
||||
self.assertEqual(cache.conversion_options(1), op2)
|
||||
cache.delete_conversion_options(all_ids)
|
||||
self.assertFalse(cache.has_conversion_options(all_ids))
|
||||
# }}}
|
||||
|
||||
def test_remove_items(self): # {{{
|
||||
' Test removal of many-(many,one) items '
|
||||
cache = self.init_cache()
|
||||
tmap = cache.get_id_map('tags')
|
||||
self.assertEqual(cache.remove_items('tags', tmap), {1, 2})
|
||||
tmap = cache.get_id_map('#tags')
|
||||
t = {v:k for k, v in tmap.iteritems()}['My Tag Two']
|
||||
self.assertEqual(cache.remove_items('#tags', (t,)), {1, 2})
|
||||
|
||||
smap = cache.get_id_map('series')
|
||||
self.assertEqual(cache.remove_items('series', smap), {1, 2})
|
||||
smap = cache.get_id_map('#series')
|
||||
s = {v:k for k, v in smap.iteritems()}['My Series Two']
|
||||
self.assertEqual(cache.remove_items('#series', (s,)), {1})
|
||||
|
||||
for c in (cache, self.init_cache()):
|
||||
self.assertFalse(c.get_id_map('tags'))
|
||||
self.assertFalse(c.all_field_names('tags'))
|
||||
for bid in c.all_book_ids():
|
||||
self.assertFalse(c.field_for('tags', bid))
|
||||
|
||||
self.assertEqual(len(c.get_id_map('#tags')), 1)
|
||||
self.assertEqual(c.all_field_names('#tags'), {'My Tag One'})
|
||||
for bid in c.all_book_ids():
|
||||
self.assertIn(c.field_for('#tags', bid), ((), ('My Tag One',)))
|
||||
|
||||
for bid in (1, 2):
|
||||
self.assertEqual(c.field_for('series_index', bid), 1.0)
|
||||
self.assertFalse(c.get_id_map('series'))
|
||||
self.assertFalse(c.all_field_names('series'))
|
||||
for bid in c.all_book_ids():
|
||||
self.assertFalse(c.field_for('series', bid))
|
||||
|
||||
self.assertEqual(c.field_for('series_index', 1), 1.0)
|
||||
self.assertEqual(c.all_field_names('#series'), {'My Series One'})
|
||||
for bid in c.all_book_ids():
|
||||
self.assertIn(c.field_for('#series', bid), (None, 'My Series One'))
|
||||
# }}}
|
||||
|
@ -1,7 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||
store_version = 3 # Needed for dynamic plugin loading
|
||||
store_version = 4 # Needed for dynamic plugin loading
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||
@ -126,15 +126,47 @@ class AmazonKindleStore(StorePlugin):
|
||||
|
||||
counter = max_results
|
||||
with closing(br.open(url, timeout=timeout)) as f:
|
||||
doc = html.fromstring(f.read().decode('latin-1', 'replace'))
|
||||
doc = html.fromstring(f.read())
|
||||
|
||||
data_xpath = '//div[contains(@class, "prod")]'
|
||||
format_xpath = './/ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
|
||||
asin_xpath = '@name'
|
||||
cover_xpath = './/img[@class="productImage"]/@src'
|
||||
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||
price_xpath = './/ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'
|
||||
if doc.xpath('//div[@id = "atfResults" and contains(@class, "grid")]'):
|
||||
data_xpath = '//div[contains(@class, "prod")]'
|
||||
format_xpath = (
|
||||
'.//ul[contains(@class, "rsltGridList")]'
|
||||
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||
asin_xpath = '@name'
|
||||
cover_xpath = './/img[@class="productImage"]/@src'
|
||||
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||
price_xpath = (
|
||||
'.//ul[contains(@class, "rsltGridList")]'
|
||||
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "ilresults")]'):
|
||||
data_xpath = '//li[(@class="ilo")]'
|
||||
format_xpath = (
|
||||
'.//ul[contains(@class, "rsltGridList")]'
|
||||
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||
asin_xpath = '@name'
|
||||
cover_xpath = './div[@class = "ilf"]/a/img[contains(@class, "ilo")]/@src'
|
||||
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||
# Results can be in a grid (table) or a column
|
||||
price_xpath = (
|
||||
'.//ul[contains(@class, "rsltL") or contains(@class, "rsltGridList")]'
|
||||
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||
elif doc.xpath('//div[@id = "atfResults" and contains(@class, "list")]'):
|
||||
data_xpath = '//div[contains(@class, "prod")]'
|
||||
format_xpath = (
|
||||
'.//ul[contains(@class, "rsltL")]'
|
||||
'//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()')
|
||||
asin_xpath = '@name'
|
||||
cover_xpath = './/img[@class="productImage"]/@src'
|
||||
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()'
|
||||
price_xpath = (
|
||||
'.//ul[contains(@class, "rsltL")]'
|
||||
'//span[contains(@class, "lrg") and contains(@class, "bld")]/text()')
|
||||
else:
|
||||
return
|
||||
|
||||
for data in doc.xpath(data_xpath):
|
||||
if counter <= 0:
|
||||
|
Loading…
x
Reference in New Issue
Block a user