mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Metadata compatibility
This commit is contained in:
parent
e3ec837fd1
commit
888aaec88f
@ -580,12 +580,12 @@ from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
|
||||
from calibre.devices.kobo.driver import KOBO
|
||||
from calibre.devices.bambook.driver import BAMBOOK
|
||||
|
||||
from calibre.ebooks.metadata.fetch import KentDistrictLibrary
|
||||
from calibre.ebooks.metadata.fetch import KentDistrictLibrary, Amazon
|
||||
from calibre.ebooks.metadata.douban import DoubanBooks
|
||||
from calibre.ebooks.metadata.isbndb import ISBNDB
|
||||
from calibre.ebooks.metadata.google_books import GoogleBooks
|
||||
from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
|
||||
from calibre.ebooks.metadata.amazon import Amazon, AmazonSocial
|
||||
# from calibre.ebooks.metadata.amazon import Amazon , AmazonSocial
|
||||
from calibre.ebooks.metadata.fictionwise import Fictionwise
|
||||
from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
|
||||
AmazonCovers, DoubanCovers, LibrarythingCovers
|
||||
@ -593,7 +593,7 @@ from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
|
||||
from calibre.ebooks.epub.fix.unmanifested import Unmanifested
|
||||
from calibre.ebooks.epub.fix.epubcheck import Epubcheck
|
||||
|
||||
plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon, AmazonSocial,
|
||||
plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon, #AmazonSocial,
|
||||
KentDistrictLibrary, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
|
||||
Epubcheck, OpenLibraryCovers, AmazonCovers, DoubanCovers, LibrarythingCovers,
|
||||
NiceBooksCovers]
|
||||
|
@ -1,7 +1,11 @@
|
||||
from __future__ import with_statement
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2010, sengian <sengian1@gmail.com>'
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Fetch metadata using Amazon AWS
|
||||
'''
|
||||
import sys, re
|
||||
from threading import RLock
|
||||
|
||||
@ -12,10 +16,6 @@ from calibre import browser
|
||||
from calibre.ebooks.metadata import check_isbn
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.ebooks.metadata import MetaInformation, check_isbn, \
|
||||
authors_to_sort_string
|
||||
from calibre.ebooks.metadata.fetch import MetadataSource
|
||||
from calibre.utils.config import OptionParser
|
||||
from calibre.library.comments import sanitize_comments_html
|
||||
|
||||
asin_cache = {}
|
||||
@ -160,229 +160,31 @@ def get_metadata(br, asin, mi):
|
||||
m = pat.match(t)
|
||||
if m is not None:
|
||||
try:
|
||||
default = utcnow().replace(day=15)
|
||||
if self.lang != 'all':
|
||||
d = replace_months(d, self.lang)
|
||||
d = parse_date(d, assume_utc=True, default=default)
|
||||
mi.pubdate = d
|
||||
mi.rating = float(m.group(1))/float(m.group(2)) * 5
|
||||
break
|
||||
except:
|
||||
report(verbose)
|
||||
#ISBN
|
||||
elt = filter(lambda x: self.reisbn.search(x.find('b').text), elts)
|
||||
if elt:
|
||||
isbn = elt[0].find('b').tail.replace('-', '').strip()
|
||||
if check_isbn(isbn):
|
||||
mi.isbn = unicode(isbn)
|
||||
elif len(elt) > 1:
|
||||
isbnone = elt[1].find('b').tail.replace('-', '').strip()
|
||||
if check_isbn(isbnone):
|
||||
mi.isbn = unicode(isbnone)
|
||||
else:
|
||||
#assume ASIN-> find a check for asin
|
||||
mi.isbn = unicode(isbn)
|
||||
#Langue
|
||||
elt = filter(lambda x: self.relang.search(x.find('b').text), elts)
|
||||
if elt:
|
||||
langue = elt[0].find('b').tail.strip()
|
||||
if langue:
|
||||
mi.language = unicode(langue)
|
||||
#ratings
|
||||
elt = filter(lambda x: self.reratelt.search(x.find('b').text), elts)
|
||||
if elt:
|
||||
ratings = elt[0].find_class('swSprite')
|
||||
if ratings:
|
||||
ratings = self.rerat.findall(ratings[0].get('title'))
|
||||
if len(ratings) == 2:
|
||||
mi.rating = float(ratings[0])/float(ratings[1]) * 5
|
||||
return mi
|
||||
pass
|
||||
|
||||
def fill_MI(self, entry, verbose):
|
||||
try:
|
||||
title = self.get_title(entry)
|
||||
authors = self.get_authors(entry)
|
||||
except Exception, e:
|
||||
if verbose:
|
||||
print _('Failed to get all details for an entry')
|
||||
print e
|
||||
print _('URL who failed: %s') % x
|
||||
report(verbose)
|
||||
return None
|
||||
mi = MetaInformation(title, authors)
|
||||
mi.author_sort = authors_to_sort_string(authors)
|
||||
try:
|
||||
mi.comments = self.get_description(entry, verbose)
|
||||
mi = self.get_book_info(entry, mi, verbose)
|
||||
except:
|
||||
pass
|
||||
return mi
|
||||
desc = root.xpath('//div[@id="productDescription"]/*[@class="content"]')
|
||||
if desc:
|
||||
desc = desc[0]
|
||||
for c in desc.xpath('descendant::*[@class="seeAll" or'
|
||||
' @class="emptyClear" or @href]'):
|
||||
c.getparent().remove(c)
|
||||
desc = html.tostring(desc, method='html', encoding=unicode).strip()
|
||||
# remove all attributes from tags
|
||||
desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
|
||||
# Collapse whitespace
|
||||
#desc = re.sub('\n+', '\n', desc)
|
||||
#desc = re.sub(' +', ' ', desc)
|
||||
# Remove the notice about text referring to out of print editions
|
||||
desc = re.sub(r'(?s)<em>--This text ref.*?</em>', '', desc)
|
||||
# Remove comments
|
||||
desc = re.sub(r'(?s)<!--.*?-->', '', desc)
|
||||
mi.comments = sanitize_comments_html(desc)
|
||||
|
||||
def get_individual_metadata(self, url, br, verbose):
|
||||
try:
|
||||
raw = br.open_novisit(url).read()
|
||||
except Exception, e:
|
||||
import socket
|
||||
report(verbose)
|
||||
if callable(getattr(e, 'getcode', None)) and \
|
||||
e.getcode() == 404:
|
||||
return None
|
||||
attr = getattr(e, 'args', [None])
|
||||
attr = attr if attr else [None]
|
||||
if isinstance(attr[0], socket.timeout):
|
||||
raise AmazonError(_('Amazon timed out. Try again later.'))
|
||||
raise AmazonError(_('Amazon encountered an error.'))
|
||||
if '<title>404 - ' in raw:
|
||||
report(verbose)
|
||||
return None
|
||||
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||
resolve_entities=True)[0]
|
||||
try:
|
||||
return soupparser.fromstring(raw)
|
||||
except:
|
||||
try:
|
||||
#remove ASCII invalid chars
|
||||
return soupparser.fromstring(clean_ascii_chars(raw))
|
||||
except:
|
||||
report(verbose)
|
||||
return None
|
||||
return True
|
||||
|
||||
def fetchdatathread(self, qbr, qsync, nb, url, verbose):
|
||||
try:
|
||||
browser = qbr.get(True)
|
||||
entry = self.get_individual_metadata(url, browser, verbose)
|
||||
except:
|
||||
report(verbose)
|
||||
entry = None
|
||||
finally:
|
||||
qbr.put(browser, True)
|
||||
qsync.put((nb, entry), True)
|
||||
|
||||
def producer(self, sync, urls, br, verbose=False):
|
||||
for i in xrange(len(urls)):
|
||||
thread = Thread(target=self.fetchdatathread,
|
||||
args=(br, sync, i, urls[i], verbose))
|
||||
thread.start()
|
||||
|
||||
def consumer(self, sync, syncbis, br, total_entries, verbose=False):
|
||||
i=0
|
||||
self.extend([None]*total_entries)
|
||||
while i < total_entries:
|
||||
rq = sync.get(True)
|
||||
nb = int(rq[0])
|
||||
entry = rq[1]
|
||||
i+=1
|
||||
if entry is not None:
|
||||
mi = self.fill_MI(entry, verbose)
|
||||
if mi is not None:
|
||||
mi.tags, atag = self.get_tags(entry, verbose)
|
||||
self[nb] = mi
|
||||
if atag:
|
||||
thread = Thread(target=self.fetchdatathread,
|
||||
args=(br, syncbis, nb, mi.tags, verbose))
|
||||
thread.start()
|
||||
else:
|
||||
syncbis.put((nb, None), True)
|
||||
|
||||
def final(self, sync, total_entries, verbose):
|
||||
i=0
|
||||
while i < total_entries:
|
||||
rq = sync.get(True)
|
||||
nb = int(rq[0])
|
||||
tags = rq[1]
|
||||
i+=1
|
||||
if tags is not None:
|
||||
self[nb].tags = self.get_tags(tags, verbose)[0]
|
||||
|
||||
def populate(self, entries, ibr, verbose=False, brcall=3):
|
||||
br = Queue(brcall)
|
||||
cbr = Queue(brcall-1)
|
||||
|
||||
syncp = Queue(1)
|
||||
syncc = Queue(1)
|
||||
|
||||
for i in xrange(brcall-1):
|
||||
br.put(browser(), True)
|
||||
cbr.put(browser(), True)
|
||||
br.put(ibr, True)
|
||||
|
||||
prod_thread = Thread(target=self.producer, args=(syncp, entries, br, verbose))
|
||||
cons_thread = Thread(target=self.consumer, args=(syncp, syncc, cbr, len(entries), verbose))
|
||||
fin_thread = Thread(target=self.final, args=(syncc, len(entries), verbose))
|
||||
prod_thread.start()
|
||||
cons_thread.start()
|
||||
fin_thread.start()
|
||||
prod_thread.join()
|
||||
cons_thread.join()
|
||||
fin_thread.join()
|
||||
|
||||
|
||||
def search(title=None, author=None, publisher=None, isbn=None,
|
||||
max_results=5, verbose=False, keywords=None, lang='all'):
|
||||
br = browser()
|
||||
entries, baseurl = Query(title=title, author=author, isbn=isbn, publisher=publisher,
|
||||
keywords=keywords, max_results=max_results,rlang=lang)(br, verbose)
|
||||
|
||||
if entries is None or len(entries) == 0:
|
||||
return None
|
||||
|
||||
#List of entry
|
||||
ans = ResultList(baseurl, lang)
|
||||
ans.populate(entries, br, verbose)
|
||||
return [x for x in ans if x is not None]
|
||||
|
||||
def get_social_metadata(title, authors, publisher, isbn, verbose=False,
|
||||
max_results=1, lang='all'):
|
||||
mi = MetaInformation(title, authors)
|
||||
if not isbn or not check_isbn(isbn):
|
||||
return [mi]
|
||||
|
||||
amazresults = search(isbn=isbn, verbose=verbose,
|
||||
max_results=max_results, lang=lang)
|
||||
if amazresults is None or amazresults[0] is None:
|
||||
from calibre.ebooks.metadata.xisbn import xisbn
|
||||
for i in xisbn.get_associated_isbns(isbn):
|
||||
amazresults = search(isbn=i, verbose=verbose,
|
||||
max_results=max_results, lang=lang)
|
||||
if amazresults is not None and amazresults[0] is not None:
|
||||
break
|
||||
if amazresults is None or amazresults[0] is None:
|
||||
return [mi]
|
||||
|
||||
miaz = amazresults[0]
|
||||
if miaz.rating is not None:
|
||||
mi.rating = miaz.rating
|
||||
if miaz.comments is not None:
|
||||
mi.comments = miaz.comments
|
||||
if miaz.tags is not None:
|
||||
mi.tags = miaz.tags
|
||||
return [mi]
|
||||
|
||||
def option_parser():
|
||||
import textwrap
|
||||
parser = OptionParser(textwrap.dedent(\
|
||||
_('''\
|
||||
%prog [options]
|
||||
|
||||
Fetch book metadata from Amazon. You must specify one of title, author,
|
||||
ISBN, publisher or keywords. Will fetch a maximum of 20 matches,
|
||||
so you should make your query as specific as possible.
|
||||
You can chose the language for metadata retrieval:
|
||||
english & french & german
|
||||
'''
|
||||
)))
|
||||
parser.add_option('-t', '--title', help=_('Book title'))
|
||||
parser.add_option('-a', '--author', help=_('Book author(s)'))
|
||||
parser.add_option('-p', '--publisher', help=_('Book publisher'))
|
||||
parser.add_option('-i', '--isbn', help=_('Book ISBN'))
|
||||
parser.add_option('-k', '--keywords', help=_('Keywords'))
|
||||
parser.add_option('-s', '--social', default=0, action='count',
|
||||
help=_('Get social data only'))
|
||||
parser.add_option('-m', '--max-results', default=10,
|
||||
help=_('Maximum number of results to fetch'))
|
||||
parser.add_option('-l', '--lang', default='all',
|
||||
help=_('Chosen language for metadata search (en, fr, de)'))
|
||||
parser.add_option('-v', '--verbose', default=0, action='count',
|
||||
help=_('Be more verbose about errors'))
|
||||
return parser
|
||||
|
||||
def main(args=sys.argv):
|
||||
import tempfile, os
|
||||
@ -412,8 +214,3 @@ def main(args=sys.argv):
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
# import cProfile
|
||||
# sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonbis; calibre.ebooks.metadata.amazonbis.main()"))
|
||||
# sys.exit(cProfile.run("import calibre.ebooks.metadata.amazonbis; calibre.ebooks.metadata.amazonbis.main()", "profile"))
|
||||
|
||||
# calibre-debug -e "D:\Mes eBooks\Developpement\calibre\src\calibre\ebooks\metadata\amazon.py" -m 5 -a gore -v>data.html
|
@ -212,6 +212,27 @@ class MetadataSource(Plugin): # {{{
|
||||
|
||||
# }}}
|
||||
|
||||
class Amazon(MetadataSource): # {{{
|
||||
|
||||
name = 'Amazon'
|
||||
metadata_type = 'social'
|
||||
description = _('Downloads social metadata from amazon.com')
|
||||
|
||||
has_html_comments = True
|
||||
|
||||
def fetch(self):
|
||||
if not self.isbn:
|
||||
return
|
||||
from calibre.ebooks.metadata.amazon import get_social_metadata
|
||||
try:
|
||||
self.results = get_social_metadata(self.title, self.book_author,
|
||||
self.publisher, self.isbn)
|
||||
except Exception, e:
|
||||
self.exception = e
|
||||
self.tb = traceback.format_exc()
|
||||
|
||||
# }}}
|
||||
|
||||
class KentDistrictLibrary(MetadataSource): # {{{
|
||||
|
||||
name = 'Kent District Library'
|
||||
|
Loading…
x
Reference in New Issue
Block a user