mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 18:54:09 -04:00
Aamzon plugin: Workaround broken encoding. Detect and use mobile user agent search results page
This commit is contained in:
parent
0d924d81ef
commit
6e98d78dd7
@ -218,6 +218,9 @@ class Worker(Thread): # Get details {{{
|
||||
' @class="emptyClear" or @href]'):
|
||||
c.getparent().remove(c)
|
||||
desc = tostring(desc, method='html', encoding=unicode).strip()
|
||||
# Encoding bug in Amazon data U+fffd (replacement char)
|
||||
# in some examples it is present in place of '
|
||||
desc = desc.replace('\ufffd', "'")
|
||||
# remove all attributes from tags
|
||||
desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
|
||||
# Collapse whitespace
|
||||
@ -410,6 +413,18 @@ class Amazon(Source):
|
||||
if 'bulk pack' not in title:
|
||||
matches.append(a.get('href'))
|
||||
break
|
||||
if not matches:
|
||||
# This can happen for some user agents that Amazon thinks are
|
||||
# mobile/less capable
|
||||
log('Trying alternate results page markup')
|
||||
for td in root.xpath(
|
||||
r'//div[@id="Results"]/descendant::td[starts-with(@id, "search:Td:")]'):
|
||||
for a in td.xpath(r'descendant::td[@class="dataColumn"]/descendant::a[@href]/span[@class="srTitle"]/..'):
|
||||
title = tostring(a, method='text', encoding=unicode).lower()
|
||||
if 'bulk pack' not in title:
|
||||
matches.append(a.get('href'))
|
||||
break
|
||||
|
||||
|
||||
# Keep only the top 5 matches as the matches are sorted by relevance by
|
||||
# Amazon so lower matches are not likely to be very relevant
|
||||
|
Loading…
x
Reference in New Issue
Block a user