From 92fe7d3725f7785278c4bd2dfd5ad81e290827f5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 31 Oct 2010 12:01:16 -0600 Subject: [PATCH] Amazon metadata download plugin: Improved parsing of broken HTML --- src/calibre/ebooks/metadata/amazon.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/metadata/amazon.py b/src/calibre/ebooks/metadata/amazon.py index e61e0b2748..5d7d0358f0 100644 --- a/src/calibre/ebooks/metadata/amazon.py +++ b/src/calibre/ebooks/metadata/amazon.py @@ -9,6 +9,7 @@ Fetch metadata using Amazon AWS import sys, re from lxml import html +from lxml.html import soupparser from calibre import browser from calibre.ebooks.metadata import check_isbn @@ -71,7 +72,7 @@ def get_metadata(br, asin, mi): return False raw = xml_to_unicode(raw, strip_encoding_pats=True, resolve_entities=True)[0] - root = html.fromstring(raw) + root = soupparser.fromstring(raw) ratings = root.xpath('//form[@id="handleBuy"]/descendant::*[@class="asinReviewsSummary"]') if ratings: pat = re.compile(r'([0-9.]+) out of (\d+) stars')