diff --git a/resources/recipes/pajama.recipe b/resources/recipes/pajama.recipe new file mode 100644 index 0000000000..8c5ba74317 --- /dev/null +++ b/resources/recipes/pajama.recipe @@ -0,0 +1,48 @@ +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup + +class PajamasMedia(BasicNewsRecipe): + title = u'Pajamas Media' + description = u'Provides exclusive news and opinion for forty countries.' + language = 'en' + __author__ = 'Krittika Goyal' + oldest_article = 1 #days + max_articles_per_feed = 25 + recursions = 1 + match_regexps = [r'http://pajamasmedia.com/blog/.*/2/$'] + #encoding = 'latin1' + + remove_stylesheets = True + #remove_tags_before = dict(name='h1', attrs={'class':'heading'}) + remove_tags_after = dict(name='div', attrs={'class':'paged-nav'}) + remove_tags = [ + dict(name='iframe'), + dict(name='div', attrs={'class':['pages']}), + #dict(name='div', attrs={'id':['bookmark']}), + #dict(name='span', attrs={'class':['related_link', 'slideshowcontrols']}), + #dict(name='ul', attrs={'class':'articleTools'}), + ] + + feeds = [ +('pajamas Media', + 'http://feeds.feedburner.com/PajamasMedia'), + +] + + def preprocess_html(self, soup): + story = soup.find(name='div', attrs={'id':'innerpage-content'}) + #td = heading.findParent(name='td') + #td.extract() + + soup = BeautifulSoup('