calibre/recipes/villagevoice.recipe
Kovid Goyal 567040ee1e Perform PEP8 compliance checks on the entire codebase
Some bits of PEP 8 are turned off via setup.cfg
2016-07-29 21:25:17 +05:30

48 lines
1.7 KiB
Python

#!/usr/bin/env python2
import re
from calibre.web.feeds.news import BasicNewsRecipe
class VillageVoice(BasicNewsRecipe):
title = 'Village Voice'
feeds = [
("Complete Issue", "http://villagevoice.com/syndication/issue"),
("News", "http://villagevoice.com/syndication/section/news"),
("Music", "http://villagevoice.com/syndication/section/music"),
("Movies", "http://villagevoice.com/syndication/section/film"),
# ("Restaurants", "http://villagevoice.com/syndication/section/dining"),
# ("Music Events", "http://villagevoice.com/syndication/events?type=music"),
# ("Calendar Events", "http://villagevoice.com/syndication/events"),
# ("Promotional Events", "http://villagevoice.com/syndication/promoEvents"),
# ("Restaurant Guide", "http://villagevoice.com/syndication/restaurants/search")
]
auto_cleanup = True
max_articles_per_feed = 50
masthead_url = "http://assets.villagevoice.com/img/citylogo.png"
language = 'en'
__author__ = 'Barty'
seen_urls = []
# village voice breaks the article up into multiple pages, so
# parse page and grab the print url
url_regex = re.compile(r'\/content\/printVersion\/\d+', re.I)
def print_version(self, url):
if url in self.seen_urls:
return None
self.seen_urls.append(url)
soup = self.index_to_soup(url)
atag = soup.find('a', attrs={'href': self.url_regex})
if atag is None:
self.log('Warning: no print url found for ' + url)
else:
m = self.url_regex.search(atag['href'])
if m:
url = 'http://www.villagevoice.com' + m.group(0)
return url