mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
IGN:Add a plugin to download social metadata (tags/rating/review) etc. from Amazon
This commit is contained in:
parent
289455c1d7
commit
7e05464776
@ -193,7 +193,7 @@ def extract(path, dir):
|
||||
raise Exception('Unknown archive type')
|
||||
extractor(path, dir)
|
||||
|
||||
def get_proxies():
|
||||
def get_proxies(debug=True):
|
||||
proxies = {}
|
||||
|
||||
for q in ('http', 'ftp'):
|
||||
@ -226,10 +226,40 @@ def get_proxies():
|
||||
if len(proxies[x]) < 5:
|
||||
prints('Removing invalid', x, 'proxy:', proxies[x])
|
||||
del proxies[x]
|
||||
if proxies:
|
||||
if proxies and debug:
|
||||
prints('Using proxies:', proxies)
|
||||
return proxies
|
||||
|
||||
def get_parsed_proxy(typ='http', debug=True):
|
||||
proxies = get_proxies(debug)
|
||||
if typ not in proxies:
|
||||
return
|
||||
pattern = re.compile((
|
||||
'(?:ptype://)?' \
|
||||
'(?:(?P<user>\w+):(?P<pass>.*)@)?' \
|
||||
'(?P<host>[\w\-\.]+)' \
|
||||
'(?::(?P<port>\d+))?').replace('ptype', typ)
|
||||
)
|
||||
|
||||
match = pattern.match(proxies['typ'])
|
||||
if match:
|
||||
try:
|
||||
ans = {
|
||||
'host' : match.group('host'),
|
||||
'port' : match.group('port'),
|
||||
'user' : match.group('user'),
|
||||
'pass' : match.group('pass')
|
||||
}
|
||||
if ans['port']:
|
||||
ans['port'] = int(ans['port'])
|
||||
except:
|
||||
if debug:
|
||||
traceback.print_exc()
|
||||
return
|
||||
if debug:
|
||||
prints('Using http proxy', ans)
|
||||
return ans
|
||||
|
||||
|
||||
def browser(honor_time=True, max_time=2, mobile_browser=False):
|
||||
'''
|
||||
|
@ -374,8 +374,8 @@ from calibre.devices.eslick.driver import ESLICK
|
||||
from calibre.devices.nuut2.driver import NUUT2
|
||||
from calibre.devices.iriver.driver import IRIVER_STORY
|
||||
|
||||
from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB
|
||||
plugins = [HTML2ZIP, GoogleBooks, ISBNDB]
|
||||
from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon
|
||||
plugins = [HTML2ZIP, GoogleBooks, ISBNDB, Amazon]
|
||||
plugins += [
|
||||
ComicInput,
|
||||
EPUBInput,
|
||||
|
@ -90,9 +90,10 @@ def output_profiles():
|
||||
if isinstance(plugin, OutputProfile):
|
||||
yield plugin
|
||||
|
||||
def metadata_sources(customize=True, isbndb_key=None):
|
||||
def metadata_sources(metadata_type='basic', customize=True, isbndb_key=None):
|
||||
for plugin in _initialized_plugins:
|
||||
if isinstance(plugin, MetadataSource):
|
||||
if isinstance(plugin, MetadataSource) and \
|
||||
plugin.metadata_type == metadata_type:
|
||||
if is_disabled(plugin):
|
||||
continue
|
||||
if customize:
|
||||
|
@ -6,45 +6,83 @@ __docformat__ = 'restructuredtext en'
|
||||
'''
|
||||
Fetch metadata using Amazon AWS
|
||||
'''
|
||||
import re
|
||||
import sys, re
|
||||
from datetime import datetime
|
||||
|
||||
from lxml import etree
|
||||
from dateutil import parser
|
||||
|
||||
from calibre import browser
|
||||
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
||||
|
||||
AWS_NS = 'http://webservices.amazon.com/AWSECommerceService/2005-10-05'
|
||||
|
||||
def AWS(tag):
|
||||
return '{%s}%s'%(AWS_NS, tag)
|
||||
|
||||
def check_for_errors(root):
|
||||
err = root.find('.//'+AWS('Error'))
|
||||
if err is not None:
|
||||
raise Exception('Failed to get metadata with error: '\
|
||||
+ etree.tostring(err, method='text', pretty_print=True,
|
||||
encoding=unicode))
|
||||
|
||||
def get_social_metadata(title, authors, publisher, isbn):
|
||||
mi = MetaInformation(title, authors)
|
||||
if isbn:
|
||||
br = browser()
|
||||
response_xml = br.open('http://status.calibre-ebook.com/aws/metadata/'+isbn).read()
|
||||
root = etree.fromstring(response_xml)
|
||||
check_for_errors(root)
|
||||
mi.title = root.findtext('.//'+AWS('Title'))
|
||||
authors = [x.text for x in root.findall('.//'+AWS('Author'))]
|
||||
if authors:
|
||||
mi.authors = []
|
||||
for x in authors:
|
||||
mi.authors.extend(string_to_authors(x))
|
||||
mi.publisher = root.findtext('.//'+AWS('Publisher'))
|
||||
try:
|
||||
d = root.findtext('.//'+AWS('PublicationDate'))
|
||||
if d:
|
||||
default = datetime.utcnow()
|
||||
default = datetime(default.year, default.month, 15)
|
||||
d = parser.parse(d[0].text, default=default)
|
||||
mi.pubdate = d
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
rating = float(root.findtext('.//'+AWS('AverageRating')))
|
||||
num_of_reviews = int(root.findtext('.//'+AWS('TotalReviews')))
|
||||
if num_of_reviews > 4 and rating > 0 and rating < 5:
|
||||
mi.rating = rating
|
||||
except:
|
||||
pass
|
||||
tags = [x.text for x in root.findall('.//%s/%s'%(AWS('Subjects'),
|
||||
AWS('Subject')))]
|
||||
if tags:
|
||||
mi.tags = []
|
||||
for x in tags:
|
||||
mi.tags.extend([y.strip() for y in x.split('/')])
|
||||
comments = root.find('.//%s/%s'%(AWS('EditorialReview'),
|
||||
AWS('Content')))
|
||||
if comments is not None:
|
||||
mi.comments = etree.tostring(comments,
|
||||
method='text', encoding=unicode)
|
||||
mi.comments = re.sub('<([pP]|DIV)>', '\n\n', mi.comments)
|
||||
mi.comments = re.sub('</?[iI]>', '*', mi.comments)
|
||||
mi.comments = re.sub('</?[bB]>', '**', mi.comments)
|
||||
mi.comments = re.sub('<BR>', '\n\n', mi.comments)
|
||||
mi.comments = re.sub('<[^>]+>', '', mi.comments)
|
||||
mi.comments = mi.comments.strip()
|
||||
mi.comments = _('EDITORIAL REVIEW')+':\n\n'+mi.comments
|
||||
|
||||
return mi
|
||||
|
||||
|
||||
BASE_URL = 'http://ecs.amazonaws.com/onca/xml?Service=AWSECommerceService&AWSAccessKeyId=%(key)s&Operation=ItemLookup&ItemId=1416551727&ResponseGroup=%(group)s'
|
||||
|
||||
import sys
|
||||
|
||||
def get_rating(isbn, key):
|
||||
br = browser()
|
||||
url = BASE_URL%dict(key=key, group='Reviews')
|
||||
raw = br.open(url).read()
|
||||
match = re.search(r'<AverageRating>([\d.]+)</AverageRating>', raw)
|
||||
if match:
|
||||
return float(match.group(1))
|
||||
|
||||
def get_cover_url(isbn, key):
|
||||
br = browser()
|
||||
url = BASE_URL%dict(key=key, group='Images')
|
||||
raw = br.open(url).read()
|
||||
match = re.search(r'<LargeImage><URL>(.+?)</URL>', raw)
|
||||
if match:
|
||||
return match.group(1)
|
||||
|
||||
def get_editorial_review(isbn, key):
|
||||
br = browser()
|
||||
url = BASE_URL%dict(key=key, group='EditorialReview')
|
||||
raw = br.open(url).read()
|
||||
match = re.compile(r'<EditorialReview>.*?<Content>(.+?)</Content>', re.DOTALL).search(raw)
|
||||
if match:
|
||||
return match.group(1)
|
||||
|
||||
def main(args=sys.argv):
|
||||
print 'Rating:', get_rating(args[1], args[2])
|
||||
print 'Cover:', get_rating(args[1], args[2])
|
||||
print 'EditorialReview:', get_editorial_review(args[1], args[2])
|
||||
|
||||
print get_social_metadata(None, None, None, '9781416551720')
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
sys.exit(main())
|
||||
|
@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
|
||||
import traceback, sys, textwrap, re
|
||||
from threading import Thread
|
||||
|
||||
from calibre import preferred_encoding
|
||||
from calibre import prints
|
||||
from calibre.utils.config import OptionParser
|
||||
from calibre.utils.logging import default_log
|
||||
|
||||
@ -15,7 +15,14 @@ from calibre.customize import Plugin
|
||||
class MetadataSource(Plugin):
|
||||
|
||||
author = 'Kovid Goyal'
|
||||
|
||||
supported_platforms = ['windows', 'osx', 'linux']
|
||||
|
||||
#: The type of metadata fetched. 'basic' means basic metadata like
|
||||
#: title/author/isbn/etc. 'social' means social metadata like
|
||||
#: tags/rating/reviews/etc.
|
||||
metadata_type = 'basic'
|
||||
|
||||
type = _('Metadata download')
|
||||
|
||||
def __call__(self, title, author, publisher, isbn, verbose, log=None,
|
||||
@ -49,6 +56,7 @@ class MetadataSource(Plugin):
|
||||
def join(self):
|
||||
return self.worker.join()
|
||||
|
||||
|
||||
class GoogleBooks(MetadataSource):
|
||||
|
||||
name = 'Google Books'
|
||||
@ -104,6 +112,22 @@ class ISBNDB(MetadataSource):
|
||||
ans = ans.replace('%s', '')
|
||||
return ans
|
||||
|
||||
class Amazon(MetadataSource):
|
||||
|
||||
name = 'Amazon'
|
||||
metadata_type = 'social'
|
||||
|
||||
def fetch(self):
|
||||
if not self.isbn:
|
||||
return
|
||||
from calibre.ebooks.metadata.amazon import get_social_metadata
|
||||
try:
|
||||
self.results = get_social_metadata(self.title, self.author,
|
||||
self.publisher, self.isbn)
|
||||
except Exception, e:
|
||||
self.exception = e
|
||||
self.tb = traceback.format_exc()
|
||||
|
||||
def result_index(source, result):
|
||||
if not result.isbn:
|
||||
return -1
|
||||
@ -134,16 +158,56 @@ def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
|
||||
fetcher(title, author, publisher, isbn, verbose)
|
||||
for fetcher in fetchers:
|
||||
fetcher.join()
|
||||
results = list(fetchers[0].results)
|
||||
for fetcher in fetchers[1:]:
|
||||
merge_results(fetchers[0].results, fetcher.results)
|
||||
merge_results(results, fetcher.results)
|
||||
|
||||
results = sorted(fetchers[0].results, cmp=lambda x, y : cmp(
|
||||
results = sorted(results, cmp=lambda x, y : cmp(
|
||||
(x.comments.strip() if x.comments else ''),
|
||||
(y.comments.strip() if y.comments else '')
|
||||
), reverse=True)
|
||||
|
||||
return results, [(x.name, x.exception, x.tb) for x in fetchers]
|
||||
|
||||
def get_social_metadata(mi, verbose=0):
|
||||
from calibre.customize.ui import metadata_sources
|
||||
fetchers = list(metadata_sources(metadata_type='social'))
|
||||
for fetcher in fetchers:
|
||||
fetcher(mi.title, mi.authors, mi.publisher, mi.isbn, verbose)
|
||||
for fetcher in fetchers:
|
||||
fetcher.join()
|
||||
ratings, tags, comments = [], set([]), set([])
|
||||
for fetcher in fetchers:
|
||||
if fetcher.results:
|
||||
dmi = fetcher.results
|
||||
if dmi.rating is not None:
|
||||
ratings.append(dmi.rating)
|
||||
if dmi.tags:
|
||||
for t in dmi.tags:
|
||||
tags.add(t)
|
||||
if mi.pubdate is None and dmi.pubdate is not None:
|
||||
mi.pubdate = dmi.pubdate
|
||||
if dmi.comments:
|
||||
comments.add(dmi.comments)
|
||||
if ratings:
|
||||
rating = sum(ratings)/float(len(ratings))
|
||||
if mi.rating is None:
|
||||
mi.rating = rating
|
||||
else:
|
||||
mi.rating = (mi.rating + rating)/2.0
|
||||
if tags:
|
||||
if not mi.tags:
|
||||
mi.tags = []
|
||||
mi.tags += list(tags)
|
||||
mi.tags = list(sorted(list(set(mi.tags))))
|
||||
if comments:
|
||||
mi.comments = ''
|
||||
for x in comments:
|
||||
mi.comments += '\n\n'+x
|
||||
|
||||
return [(x.name, x.exception, x.tb) for x in fetchers]
|
||||
|
||||
|
||||
|
||||
def option_parser():
|
||||
parser = OptionParser(textwrap.dedent(
|
||||
@ -174,11 +238,13 @@ def main(args=sys.argv):
|
||||
opts, args = parser.parse_args(args)
|
||||
results, exceptions = search(opts.title, opts.author, opts.publisher,
|
||||
opts.isbn, opts.isbndb_key, opts.verbose)
|
||||
social_exceptions = []
|
||||
for result in results:
|
||||
print unicode(result).encode(preferred_encoding)
|
||||
social_exceptions.extend(get_social_metadata(result, opts.verbose))
|
||||
prints(unicode(result))
|
||||
print
|
||||
|
||||
for name, exception, tb in exceptions:
|
||||
for name, exception, tb in exceptions+social_exceptions:
|
||||
if exception is not None:
|
||||
print 'WARNING: Fetching from', name, 'failed with error:'
|
||||
print exception
|
||||
|
@ -135,7 +135,11 @@ class ResultList(list):
|
||||
|
||||
def get_tags(self, entry, verbose):
|
||||
try:
|
||||
tags = [x.text for x in subject(entry)]
|
||||
btags = [x.text for x in subject(entry)]
|
||||
tags = []
|
||||
for t in btags:
|
||||
tags.extend([y.strip() for y in t.split('/')])
|
||||
tags = list(sorted(list(set(tags))))
|
||||
except:
|
||||
report(verbose)
|
||||
tags = []
|
||||
|
@ -125,7 +125,16 @@ def create_books(opts, args, timeout=5.):
|
||||
if opts.verbose:
|
||||
print ('ISBNDB query: '+url)
|
||||
|
||||
return [ISBNDBMetadata(book) for book in fetch_metadata(url, timeout=timeout)]
|
||||
tans = [ISBNDBMetadata(book) for book in fetch_metadata(url, timeout=timeout)]
|
||||
ans = []
|
||||
for x in tans:
|
||||
add = True
|
||||
for y in ans:
|
||||
if y.isbn == x.isbn:
|
||||
add = False
|
||||
if add:
|
||||
ans.append(x)
|
||||
return ans
|
||||
|
||||
def main(args=sys.argv):
|
||||
parser = option_parser()
|
||||
|
@ -122,6 +122,8 @@ Metadata download plugins
|
||||
:class:`MetaInformation` objects. If there is an error, it should be stored
|
||||
in `self.exception` and `self.tb` (for the traceback).
|
||||
|
||||
.. automember:: calibre.ebooks.metadata.fetch.MetadataSource.metadata_type
|
||||
|
||||
.. automethod:: calibre.ebooks.metadata.fetch.MetadataSource.fetch
|
||||
|
||||
.. automethod:: calibre.ebooks.metadata.fetch.MetadataSource.is_ok
|
||||
|
Loading…
x
Reference in New Issue
Block a user