Implement id_for_url() for amazon as well

This commit is contained in:
Kovid Goyal 2022-04-15 09:30:26 +05:30
parent 6c4a979058
commit ddb2e5d951
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -1033,6 +1033,22 @@ class Amazon(Source):
Source.__init__(self, *args, **kwargs) Source.__init__(self, *args, **kwargs)
self.set_amazon_id_touched_fields() self.set_amazon_id_touched_fields()
def id_from_url(self, url):
from polyglot.urllib import urlparse
purl = urlparse(url)
if purl.netloc and purl.path and '/dp/' in purl.path:
host_parts = tuple(x.lower() for x in purl.netloc.split('.'))
if 'amazon' in host_parts:
domain = host_parts[-1]
parts = purl.path.split('/')
idx = parts.index('dp')
try:
val = parts[idx+1]
except IndexError:
return
aid = 'amazon' if domain == 'com' else ('amazon_' + domain)
return aid, val
def test_fields(self, mi): def test_fields(self, mi):
''' '''
Return the first field from self.touched_fields that is null on the Return the first field from self.touched_fields that is null on the