Apply an easily identifiable pattern for bad catalog entries

This commit is contained in:
Kovid Goyal 2016-06-05 12:41:42 +05:30
parent c8374fd05b
commit 6c8a8142bd

View File

@ -987,6 +987,9 @@ class Amazon(Source):
for x in bad: for x in bad:
if x in title: if x in title:
return False return False
if title and title[0] in '[{' and re.search(r'\(\s*author\s*\)', title) is not None:
# Bad entries in the catalog
return False
return True return True
for a in root.xpath(r'//li[starts-with(@id, "result_")]//a[@href and contains(@class, "s-access-detail-page")]'): for a in root.xpath(r'//li[starts-with(@id, "result_")]//a[@href and contains(@class, "s-access-detail-page")]'):