Filter audio books from amazon metadata download results

This commit is contained in:
Kovid Goyal 2011-05-20 10:23:16 -06:00
parent 773c564174
commit 4cc36cac1a
2 changed files with 3 additions and 2 deletions

View File

@ -549,7 +549,8 @@ class Amazon(Source):
r'//div[@id="Results"]/descendant::td[starts-with(@id, "search:Td:")]'):
for a in td.xpath(r'descendant::td[@class="dataColumn"]/descendant::a[@href]/span[@class="srTitle"]/..'):
title = tostring(a, method='text', encoding=unicode).lower()
if 'bulk pack' not in title:
if ('bulk pack' not in title and '[audiobook]' not in
title and '[audio cd]' not in title):
matches.append(a.get('href'))
break

View File

@ -313,7 +313,7 @@ class Source(Plugin):
title_patterns = [(re.compile(pat, re.IGNORECASE), repl) for pat, repl in
[
# Remove things like: (2010) (Omnibus) etc.
(r'(?i)[({\[](\d{4}|omnibus|anthology|hardcover|paperback|turtleback|mass\s*market|edition|ed\.)[\])}]', ''),
(r'(?i)[({\[](\d{4}|omnibus|anthology|hardcover|audiobook|audio\scd|paperback|turtleback|mass\s*market|edition|ed\.)[\])}]', ''),
# Remove any strings that contain the substring edition inside
# parentheses
(r'(?i)[({\[].*?(edition|ed.).*?[\]})]', ''),