This commit is contained in:
Kovid Goyal 2021-04-11 13:25:29 +05:30
parent eb3bac259d
commit 8cdd52cb3b
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -215,6 +215,20 @@ class Worker(Thread): # Get details {{{
11: ['noviembre'], 11: ['noviembre'],
12: ['diciembre'], 12: ['diciembre'],
}, },
'se': {
1: ['januari'],
2: ['februari'],
3: ['mars'],
4: ['april'],
5: ['maj'],
6: ['juni'],
7: ['juli'],
8: ['augusti'],
9: ['september'],
10: ['oktober'],
11: ['november'],
12: ['december'],
},
'jp': { 'jp': {
1: ['1月'], 1: ['1月'],
2: ['2月'], 2: ['2月'],
@ -260,13 +274,14 @@ class Worker(Thread): # Get details {{{
starts-with(text(), "Editor:") or \ starts-with(text(), "Editor:") or \
starts-with(text(), "Editora:") or \ starts-with(text(), "Editora:") or \
starts-with(text(), "Uitgever:") or \ starts-with(text(), "Uitgever:") or \
starts-with(text(), "Utgivare:") or \
starts-with(text(), "出版社:")] starts-with(text(), "出版社:")]
''' '''
self.pubdate_xpath = ''' self.pubdate_xpath = '''
descendant::*[starts-with(text(), "Publication Date:") or \ descendant::*[starts-with(text(), "Publication Date:") or \
starts-with(text(), "Audible.com Release Date:")] starts-with(text(), "Audible.com Release Date:")]
''' '''
self.publisher_names = {'Publisher', 'Uitgever', 'Verlag', self.publisher_names = {'Publisher', 'Uitgever', 'Verlag', 'Utgivare',
'Editore', 'Editeur', 'Editor', 'Editora', '出版社'} 'Editore', 'Editeur', 'Editor', 'Editora', '出版社'}
self.language_xpath = ''' self.language_xpath = '''
@ -278,10 +293,11 @@ class Worker(Thread): # Get details {{{
or text() = "Idioma:" \ or text() = "Idioma:" \
or starts-with(text(), "Langue") \ or starts-with(text(), "Langue") \
or starts-with(text(), "言語") \ or starts-with(text(), "言語") \
or starts-with(text(), "Språk") \
or starts-with(text(), "语种") or starts-with(text(), "语种")
] ]
''' '''
self.language_names = {'Language', 'Sprache', self.language_names = {'Language', 'Sprache', 'Språk',
'Lingua', 'Idioma', 'Langue', '言語', 'Taal', '语种'} 'Lingua', 'Idioma', 'Langue', '言語', 'Taal', '语种'}
self.tags_xpath = ''' self.tags_xpath = '''
@ -297,12 +313,14 @@ class Worker(Thread): # Get details {{{
''' '''
self.ratings_pat = re.compile( self.ratings_pat = re.compile(
r'([0-9.,]+) ?(out of|von|van|su|étoiles sur|つ星のうち|de un máximo de|de) ([\d\.]+)( (stars|Sternen|stelle|estrellas|estrelas|sterren)){0,1}') r'([0-9.,]+) ?(out of|von|van|su|étoiles sur|つ星のうち|de un máximo de|de|av) '
r'([\d\.]+)( (stars|Sternen|stelle|estrellas|estrelas|sterren|stjärnor)){0,1}'
)
self.ratings_pat_cn = re.compile('平均([0-9.]+)') self.ratings_pat_cn = re.compile('平均([0-9.]+)')
self.ratings_pat_jp = re.compile(r'\d+つ星のうち([\d\.]+)') self.ratings_pat_jp = re.compile(r'\d+つ星のうち([\d\.]+)')
lm = { lm = {
'eng': ('English', 'Englisch', 'Engels'), 'eng': ('English', 'Englisch', 'Engels', 'Engelska'),
'fra': ('French', 'Français'), 'fra': ('French', 'Français'),
'ita': ('Italian', 'Italiano'), 'ita': ('Italian', 'Italiano'),
'deu': ('German', 'Deutsch'), 'deu': ('German', 'Deutsch'),
@ -311,6 +329,7 @@ class Worker(Thread): # Get details {{{
'por': ('Portuguese', 'Português'), 'por': ('Portuguese', 'Português'),
'nld': ('Dutch', 'Nederlands',), 'nld': ('Dutch', 'Nederlands',),
'chs': ('Chinese', '中文', '简体中文'), 'chs': ('Chinese', '中文', '简体中文'),
'swe': ('Swedish', 'Svenska'),
} }
self.lang_map = {} self.lang_map = {}
for code, names in lm.items(): for code, names in lm.items():
@ -427,7 +446,8 @@ class Worker(Thread): # Get details {{{
detail_bullets = root.xpath('//*[@data-feature-name="detailBullets"]') detail_bullets = root.xpath('//*[@data-feature-name="detailBullets"]')
non_hero = tuple(self.selector( non_hero = tuple(self.selector(
'div#bookDetails_container_div div#nonHeroSection')) 'div#bookDetails_container_div div#nonHeroSection')) or tuple(self.selector(
'#productDetails_techSpec_sections'))
if detail_bullets: if detail_bullets:
self.parse_detail_bullets(root, mi, detail_bullets[0]) self.parse_detail_bullets(root, mi, detail_bullets[0])
elif non_hero: elif non_hero:
@ -527,6 +547,7 @@ class Worker(Thread): # Get details {{{
'#byline .author a.a-link-normal', '#byline .author a.a-link-normal',
'#bylineInfo .author .contributorNameID', '#bylineInfo .author .contributorNameID',
'#bylineInfo .author a.a-link-normal', '#bylineInfo .author a.a-link-normal',
'#bylineInfo #bylineContributor',
): ):
matches = tuple(self.selector(sel)) matches = tuple(self.selector(sel))
if matches: if matches:
@ -671,6 +692,10 @@ class Worker(Thread): # Get details {{{
except Exception as e: except Exception as e:
self.log.warn( self.log.warn(
'Parsing of obfuscated product description failed with error: %s' % as_unicode(e)) 'Parsing of obfuscated product description failed with error: %s' % as_unicode(e))
else:
desc = root.xpath('//div[@id="productDescription_fullView"]')
if desc:
ans += self._render_comments(desc[0])
return ans return ans
@ -853,7 +878,7 @@ class Worker(Thread): # Get details {{{
def parse_new_details(self, root, mi, non_hero): def parse_new_details(self, root, mi, non_hero):
table = non_hero.xpath('descendant::table')[0] table = non_hero.xpath('descendant::table')[0]
for tr in table.xpath('descendant::tr'): for tr in table.xpath('descendant::tr'):
cells = tr.xpath('descendant::td') cells = tr.xpath('descendant::*[local-name()="td" or local-name()="th"]')
if len(cells) == 2: if len(cells) == 2:
self.parse_detail_cells(mi, cells[0], cells[1]) self.parse_detail_cells(mi, cells[0], cells[1])
@ -938,7 +963,7 @@ class Worker(Thread): # Get details {{{
class Amazon(Source): class Amazon(Source):
name = 'Amazon.com' name = 'Amazon.com'
version = (1, 2, 17) version = (1, 2, 18)
minimum_calibre_version = (2, 82, 0) minimum_calibre_version = (2, 82, 0)
description = _('Downloads metadata and covers from Amazon') description = _('Downloads metadata and covers from Amazon')
@ -963,6 +988,7 @@ class Amazon(Source):
'nl': _('Netherlands'), 'nl': _('Netherlands'),
'cn': _('China'), 'cn': _('China'),
'ca': _('Canada'), 'ca': _('Canada'),
'se': _('Sweden'),
} }
SERVERS = { SERVERS = {
@ -1687,6 +1713,16 @@ def manual_tests(domain, **kw): # {{{
), ),
] # }}} ] # }}}
all_tests['se'] = [ # {{{
(
{'identifiers': {'isbn': '9780552140287'}},
[title_test('Men At Arms: A Discworld Novel: 14',
exact=False), authors_test(['Terry Pratchett'])
]
),
] # }}}
all_tests['jp'] = [ # {{{ all_tests['jp'] = [ # {{{
( # Adult filtering test ( # Adult filtering test
{'identifiers': {'isbn': '4799500066'}}, {'identifiers': {'isbn': '4799500066'}},