mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-08-07 09:01:38 -04:00
Village Voice by Barty
This commit is contained in:
parent
7a6963dac3
commit
ffeb865728
46
recipes/villagevoice.recipe
Normal file
46
recipes/villagevoice.recipe
Normal file
@ -0,0 +1,46 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class VillageVoice(BasicNewsRecipe):
|
||||
|
||||
title = 'Village Voice'
|
||||
feeds = [
|
||||
("Complete Issue", "http://villagevoice.com/syndication/issue"),
|
||||
("News", "http://villagevoice.com/syndication/section/news"),
|
||||
("Music", "http://villagevoice.com/syndication/section/music"),
|
||||
("Movies", "http://villagevoice.com/syndication/section/film"),
|
||||
#("Restaurants", "http://villagevoice.com/syndication/section/dining"),
|
||||
#("Music Events", "http://villagevoice.com/syndication/events?type=music"),
|
||||
#("Calendar Events", "http://villagevoice.com/syndication/events"),
|
||||
#("Promotional Events", "http://villagevoice.com/syndication/promoEvents"),
|
||||
#("Restaurant Guide", "http://villagevoice.com/syndication/restaurants/search")
|
||||
]
|
||||
|
||||
auto_cleanup = True
|
||||
max_articles_per_feed = 50
|
||||
masthead_url = "http://assets.villagevoice.com/img/citylogo.png"
|
||||
language = 'en'
|
||||
__author__ = 'Barty'
|
||||
|
||||
seen_urls = []
|
||||
|
||||
# village voice breaks the article up into multiple pages, so
|
||||
# parse page and grab the print url
|
||||
|
||||
url_regex = re.compile(r'\/content\/printVersion\/\d+',re.I)
|
||||
|
||||
def print_version(self, url):
|
||||
if url in self.seen_urls:
|
||||
return None
|
||||
self.seen_urls.append( url)
|
||||
soup = self.index_to_soup(url)
|
||||
atag = soup.find('a',attrs={'href':self.url_regex})
|
||||
if atag is None:
|
||||
self.log('Warning: no print url found for '+url)
|
||||
else:
|
||||
m = self.url_regex.search(atag['href'])
|
||||
if m:
|
||||
url = 'http://www.villagevoice.com'+m.group(0)
|
||||
return url
|
Loading…
x
Reference in New Issue
Block a user