mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Amazon metadata download: When downloading from amazon.co.jp handle the 'Black curtain redirect' for adult titles. Fixes #1165628 (Amazon.co.jp : Cover and info fail collecting)
This commit is contained in:
parent
9c3591a467
commit
f539b863dd
@ -132,7 +132,7 @@ class Worker(Thread): # Get details {{{
|
|||||||
text()="Détails sur le produit" or \
|
text()="Détails sur le produit" or \
|
||||||
text()="Detalles del producto" or \
|
text()="Detalles del producto" or \
|
||||||
text()="Detalhes do produto" or \
|
text()="Detalhes do produto" or \
|
||||||
text()="登録情報"]/../div[@class="content"]
|
starts-with(text(), "登録情報")]/../div[@class="content"]
|
||||||
'''
|
'''
|
||||||
# Editor: is for Spanish
|
# Editor: is for Spanish
|
||||||
self.publisher_xpath = '''
|
self.publisher_xpath = '''
|
||||||
@ -235,6 +235,12 @@ class Worker(Thread): # Get details {{{
|
|||||||
msg = 'Failed to parse amazon details page: %r'%self.url
|
msg = 'Failed to parse amazon details page: %r'%self.url
|
||||||
self.log.exception(msg)
|
self.log.exception(msg)
|
||||||
return
|
return
|
||||||
|
if self.domain == 'jp':
|
||||||
|
for a in root.xpath('//a[@href]'):
|
||||||
|
if 'black-curtain-redirect.html' in a.get('href'):
|
||||||
|
self.url = 'http://amazon.co.jp'+a.get('href')
|
||||||
|
self.log('Black curtain redirect found, following')
|
||||||
|
return self.get_details()
|
||||||
|
|
||||||
errmsg = root.xpath('//*[@id="errorMessage"]')
|
errmsg = root.xpath('//*[@id="errorMessage"]')
|
||||||
if errmsg:
|
if errmsg:
|
||||||
@ -252,8 +258,8 @@ class Worker(Thread): # Get details {{{
|
|||||||
self.log.exception('Error parsing asin for url: %r'%self.url)
|
self.log.exception('Error parsing asin for url: %r'%self.url)
|
||||||
asin = None
|
asin = None
|
||||||
if self.testing:
|
if self.testing:
|
||||||
import tempfile
|
import tempfile, uuid
|
||||||
with tempfile.NamedTemporaryFile(prefix=asin + '_',
|
with tempfile.NamedTemporaryFile(prefix=(asin or str(uuid.uuid4()))+ '_',
|
||||||
suffix='.html', delete=False) as f:
|
suffix='.html', delete=False) as f:
|
||||||
f.write(raw)
|
f.write(raw)
|
||||||
print ('Downloaded html for', asin, 'saved in', f.name)
|
print ('Downloaded html for', asin, 'saved in', f.name)
|
||||||
@ -499,7 +505,7 @@ class Worker(Thread): # Get details {{{
|
|||||||
def parse_language(self, pd):
|
def parse_language(self, pd):
|
||||||
for x in reversed(pd.xpath(self.language_xpath)):
|
for x in reversed(pd.xpath(self.language_xpath)):
|
||||||
if x.tail:
|
if x.tail:
|
||||||
raw = x.tail.strip()
|
raw = x.tail.strip().partition(',')[0].strip()
|
||||||
ans = self.lang_map.get(raw, None)
|
ans = self.lang_map.get(raw, None)
|
||||||
if ans:
|
if ans:
|
||||||
return ans
|
return ans
|
||||||
@ -1004,6 +1010,11 @@ if __name__ == '__main__': # tests {{{
|
|||||||
] # }}}
|
] # }}}
|
||||||
|
|
||||||
jp_tests = [ # {{{
|
jp_tests = [ # {{{
|
||||||
|
( # Adult filtering test
|
||||||
|
{'identifiers':{'isbn':'4799500066'}},
|
||||||
|
[title_test(u'Bitch Trap'),]
|
||||||
|
),
|
||||||
|
|
||||||
( # isbn -> title, authors
|
( # isbn -> title, authors
|
||||||
{'identifiers':{'isbn': '9784101302720' }},
|
{'identifiers':{'isbn': '9784101302720' }},
|
||||||
[title_test(u'精霊の守り人',
|
[title_test(u'精霊の守り人',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user