From a18598a7c725b7b8c2e487c4eb58b2b975e49cf4 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 30 Oct 2010 11:21:33 -0600 Subject: [PATCH] ... --- src/calibre/ebooks/metadata/amazon.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/metadata/amazon.py b/src/calibre/ebooks/metadata/amazon.py index 10590b8cff..a8ff0f1ad0 100644 --- a/src/calibre/ebooks/metadata/amazon.py +++ b/src/calibre/ebooks/metadata/amazon.py @@ -92,10 +92,14 @@ def get_metadata(br, asin, mi): ' @class="emptyClear" or @href]'): c.getparent().remove(c) desc = html.tostring(desc, method='html', encoding=unicode).strip() - desc = re.sub(r' class=[^>]+>', '>', desc) + # remove all attributes from tags + desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc) + # Collapse whitespace desc = re.sub('\n+', '\n', desc) desc = re.sub(' +', ' ', desc) + # Remove the notice about text referring to out of print editions desc = re.sub(r'(?s)--This text ref.*?', '', desc) + # Remove comments desc = re.sub(r'(?s)', '', desc) mi.comments = desc