From 8b7eda245e4f9e1fa0996a9338fc72f8efef63ac Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 24 Dec 2012 16:10:54 +0530 Subject: [PATCH] Update Pajamas Media --- recipes/pajama.recipe | 52 +++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/recipes/pajama.recipe b/recipes/pajama.recipe index 8c5ba74317..9b474b6e65 100644 --- a/recipes/pajama.recipe +++ b/recipes/pajama.recipe @@ -1,27 +1,27 @@ from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import BeautifulSoup class PajamasMedia(BasicNewsRecipe): title = u'Pajamas Media' description = u'Provides exclusive news and opinion for forty countries.' language = 'en' __author__ = 'Krittika Goyal' - oldest_article = 1 #days + oldest_article = 2 #days max_articles_per_feed = 25 recursions = 1 match_regexps = [r'http://pajamasmedia.com/blog/.*/2/$'] #encoding = 'latin1' remove_stylesheets = True - #remove_tags_before = dict(name='h1', attrs={'class':'heading'}) - remove_tags_after = dict(name='div', attrs={'class':'paged-nav'}) - remove_tags = [ - dict(name='iframe'), - dict(name='div', attrs={'class':['pages']}), - #dict(name='div', attrs={'id':['bookmark']}), - #dict(name='span', attrs={'class':['related_link', 'slideshowcontrols']}), - #dict(name='ul', attrs={'class':'articleTools'}), - ] + auto_cleanup = True + ##remove_tags_before = dict(name='h1', attrs={'class':'heading'}) + #remove_tags_after = dict(name='div', attrs={'class':'paged-nav'}) + #remove_tags = [ + #dict(name='iframe'), + #dict(name='div', attrs={'class':['pages']}), + ##dict(name='div', attrs={'id':['bookmark']}), + ##dict(name='span', attrs={'class':['related_link', 'slideshowcontrols']}), + ##dict(name='ul', attrs={'class':'articleTools'}), + #] feeds = [ ('pajamas Media', @@ -29,20 +29,20 @@ class PajamasMedia(BasicNewsRecipe): ] - def preprocess_html(self, soup): - story = soup.find(name='div', attrs={'id':'innerpage-content'}) - #td = heading.findParent(name='td') - #td.extract() + #def preprocess_html(self, soup): + #story = soup.find(name='div', attrs={'id':'innerpage-content'}) + ##td = heading.findParent(name='td') + ##td.extract() - soup = BeautifulSoup('t') - body = soup.find(name='body') - body.insert(0, story) - return soup + #soup = BeautifulSoup('t') + #body = soup.find(name='body') + #body.insert(0, story) + #return soup - def postprocess_html(self, soup, first): - if not first: - h = soup.find(attrs={'class':'innerpage-header'}) - if h: h.extract() - auth = soup.find(attrs={'class':'author'}) - if auth: auth.extract() - return soup + #def postprocess_html(self, soup, first): + #if not first: + #h = soup.find(attrs={'class':'innerpage-header'}) + #if h: h.extract() + #auth = soup.find(attrs={'class':'author'}) + #if auth: auth.extract() + #return soup