Amazon metadata download: Fix Editorial Reviews not downloading into the comments section

This commit is contained in:
Kovid Goyal 2014-04-13 16:36:25 +05:30
parent 9b18bd674e
commit 0c82e9c0d2

View File

@ -487,6 +487,7 @@ class Worker(Thread): # Get details {{{
return sanitize_comments_html(desc) return sanitize_comments_html(desc)
def parse_comments(self, root): def parse_comments(self, root):
ans = ''
ns = CSSSelect('#bookDescription_feature_div noscript')(root) ns = CSSSelect('#bookDescription_feature_div noscript')(root)
if ns: if ns:
ns = ns[0] ns = ns[0]
@ -496,9 +497,8 @@ class Worker(Thread): # Get details {{{
ns = html5lib.parseFragment('<div>%s</div>' % (ns.text), treebuilder='lxml', namespaceHTMLElements=False)[0] ns = html5lib.parseFragment('<div>%s</div>' % (ns.text), treebuilder='lxml', namespaceHTMLElements=False)[0]
else: else:
ns.tag = 'div' ns.tag = 'div'
return self._render_comments(ns) ans = self._render_comments(ns)
else:
ans = ''
desc = root.xpath('//div[@id="ps-content"]/div[@class="content"]') desc = root.xpath('//div[@id="ps-content"]/div[@class="content"]')
if desc: if desc:
ans = self._render_comments(desc[0]) ans = self._render_comments(desc[0])
@ -1038,11 +1038,12 @@ if __name__ == '__main__': # tests {{{
] ]
), ),
( # noscript description ( # noscript description
{'identifiers':{'amazon':'0756407117'}}, {'identifiers':{'amazon':'0756407117'}},
[title_test( [title_test(
"Throne of the Crescent Moon"), "Throne of the Crescent Moon"),
comments_test('Makhslood'), comments_test('Makhslood'), comments_test('Publishers Weekly'),
] ]
), ),