mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Amazon metadata download plugin: Improved parsing of broken HTML
This commit is contained in:
parent
e268beaa90
commit
92fe7d3725
@ -9,6 +9,7 @@ Fetch metadata using Amazon AWS
|
|||||||
import sys, re
|
import sys, re
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
from lxml.html import soupparser
|
||||||
|
|
||||||
from calibre import browser
|
from calibre import browser
|
||||||
from calibre.ebooks.metadata import check_isbn
|
from calibre.ebooks.metadata import check_isbn
|
||||||
@ -71,7 +72,7 @@ def get_metadata(br, asin, mi):
|
|||||||
return False
|
return False
|
||||||
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||||
resolve_entities=True)[0]
|
resolve_entities=True)[0]
|
||||||
root = html.fromstring(raw)
|
root = soupparser.fromstring(raw)
|
||||||
ratings = root.xpath('//form[@id="handleBuy"]/descendant::*[@class="asinReviewsSummary"]')
|
ratings = root.xpath('//form[@id="handleBuy"]/descendant::*[@class="asinReviewsSummary"]')
|
||||||
if ratings:
|
if ratings:
|
||||||
pat = re.compile(r'([0-9.]+) out of (\d+) stars')
|
pat = re.compile(r'([0-9.]+) out of (\d+) stars')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user