mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Download tags from Amazon
When downloading metadata from Amazon, convert the amazon categories into tags. You can turn this off by going to Preferences->Metadata download and configuring the Amazon source. Fixes #1206763 [[Enhancement] Get tags from Amazon](https://bugs.launchpad.net/calibre/+bug/1206763)
This commit is contained in:
parent
fda8ce7abb
commit
d38dffa86d
@ -162,6 +162,18 @@ class Worker(Thread): # Get details {{{
|
|||||||
'''
|
'''
|
||||||
self.language_names = {'Language', 'Sprache', 'Lingua', 'Idioma', 'Langue', '言語'}
|
self.language_names = {'Language', 'Sprache', 'Lingua', 'Idioma', 'Langue', '言語'}
|
||||||
|
|
||||||
|
self.tags_xpath = '''
|
||||||
|
descendant::h2[
|
||||||
|
text() = "Look for Similar Items by Category" or
|
||||||
|
text() = "Ähnliche Artikel finden" or
|
||||||
|
text() = "Buscar productos similares por categoría" or
|
||||||
|
text() = "Ricerca articoli simili per categoria" or
|
||||||
|
text() = "Rechercher des articles similaires par rubrique" or
|
||||||
|
text() = "Procure por itens similares por categoria" or
|
||||||
|
text() = "関連商品を探す"
|
||||||
|
]/../descendant::ul/li
|
||||||
|
'''
|
||||||
|
|
||||||
self.ratings_pat = re.compile(
|
self.ratings_pat = re.compile(
|
||||||
r'([0-9.]+) ?(out of|von|su|étoiles sur|つ星のうち|de un máximo de|de) ([\d\.]+)( (stars|Sternen|stelle|estrellas|estrelas)){0,1}')
|
r'([0-9.]+) ?(out of|von|su|étoiles sur|つ星のうち|de un máximo de|de) ([\d\.]+)( (stars|Sternen|stelle|estrellas|estrelas)){0,1}')
|
||||||
|
|
||||||
@ -312,6 +324,11 @@ class Worker(Thread): # Get details {{{
|
|||||||
except:
|
except:
|
||||||
self.log.exception('Error parsing series for url: %r'%self.url)
|
self.log.exception('Error parsing series for url: %r'%self.url)
|
||||||
|
|
||||||
|
try:
|
||||||
|
mi.tags = self.parse_tags(root)
|
||||||
|
except:
|
||||||
|
self.log.exception('Error parsing tags for url: %r'%self.url)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.cover_url = self.parse_cover(root, raw)
|
self.cover_url = self.parse_cover(root, raw)
|
||||||
except:
|
except:
|
||||||
@ -490,6 +507,23 @@ class Worker(Thread): # Get details {{{
|
|||||||
ans = (s, i)
|
ans = (s, i)
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
def parse_tags(self, root):
|
||||||
|
ans = []
|
||||||
|
exclude_tokens = {'kindle', 'a-z'}
|
||||||
|
exclude = {'special features', 'by authors', 'authors & illustrators', 'books', 'new; used & rental textbooks'}
|
||||||
|
seen = set()
|
||||||
|
for li in root.xpath(self.tags_xpath):
|
||||||
|
for i, a in enumerate(li.iterdescendants('a')):
|
||||||
|
if i > 0:
|
||||||
|
# we ignore the first category since it is almost always too broad
|
||||||
|
raw = (a.text or '').strip().replace(',', ';')
|
||||||
|
lraw = icu_lower(raw)
|
||||||
|
tokens = frozenset(lraw.split())
|
||||||
|
if raw and lraw not in exclude and not tokens.intersection(exclude_tokens) and lraw not in seen:
|
||||||
|
ans.append(raw)
|
||||||
|
seen.add(lraw)
|
||||||
|
return ans
|
||||||
|
|
||||||
def parse_cover(self, root, raw=b""):
|
def parse_cover(self, root, raw=b""):
|
||||||
imgs = root.xpath('//img[(@id="prodImage" or @id="original-main-image" or @id="main-image") and @src]')
|
imgs = root.xpath('//img[(@id="prodImage" or @id="original-main-image" or @id="main-image") and @src]')
|
||||||
if not imgs:
|
if not imgs:
|
||||||
@ -588,7 +622,7 @@ class Amazon(Source):
|
|||||||
capabilities = frozenset(['identify', 'cover'])
|
capabilities = frozenset(['identify', 'cover'])
|
||||||
touched_fields = frozenset(['title', 'authors', 'identifier:amazon',
|
touched_fields = frozenset(['title', 'authors', 'identifier:amazon',
|
||||||
'identifier:isbn', 'rating', 'comments', 'publisher', 'pubdate',
|
'identifier:isbn', 'rating', 'comments', 'publisher', 'pubdate',
|
||||||
'languages', 'series'])
|
'languages', 'series', 'tags'])
|
||||||
has_html_comments = True
|
has_html_comments = True
|
||||||
supports_gzip_transfer_encoding = True
|
supports_gzip_transfer_encoding = True
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user