diff --git a/resources/recipes/guardian.recipe b/resources/recipes/guardian.recipe index 6327b2ccea..8055be0474 100644 --- a/resources/recipes/guardian.recipe +++ b/resources/recipes/guardian.recipe @@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en' ''' www.guardian.co.uk ''' -from calibre import strftime +#from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe class Guardian(BasicNewsRecipe): @@ -15,8 +15,8 @@ class Guardian(BasicNewsRecipe): __author__ = 'Seabound and Sujata Raman' language = 'en_GB' - #oldest_article = 7 - #max_articles_per_feed = 100 + oldest_article = 7 + max_articles_per_feed = 25 remove_javascript = True timefmt = ' [%a, %d %b %Y]' @@ -43,6 +43,41 @@ class Guardian(BasicNewsRecipe): #match-stats-summary{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;} ''' + feeds = [ + ('Front Page', 'http://www.guardian.co.uk/rss'), + ('Business', 'http://www.guardian.co.uk/business/rss'), + ('Sport', 'http://www.guardian.co.uk/sport/rss'), + ('Culture', 'http://www.guardian.co.uk/culture/rss'), + ('Money', 'http://www.guardian.co.uk/money/rss'), + ('Life & Style', 'http://www.guardian.co.uk/lifeandstyle/rss'), + ('Travel', 'http://www.guardian.co.uk/travel/rss'), + ('Environment', 'http://www.guardian.co.uk/environment/rss'), + ('Comment','http://www.guardian.co.uk/commentisfree/rss'), + ] + + def get_article_url(self, article): + url = article.get('guid', None) + if '/video/' in url or '/flyer/' in url or '/quiz/' in url or \ + '/gallery/' in url or 'ivebeenthere' in url or \ + 'pickthescore' in url or 'audioslideshow' in url : + url = None + return url + + def preprocess_html(self, soup): + + for item in soup.findAll(style=True): + del item['style'] + + for item in soup.findAll(face=True): + del item['face'] + for tag in soup.findAll(name=['ul','li']): + tag.name = 'div' + + return soup + + + +''' def find_sections(self): soup = self.index_to_soup('http://www.guardian.co.uk/theguardian') # find cover pic @@ -82,20 +117,7 @@ class Guardian(BasicNewsRecipe): for title, href in self.find_sections(): feeds.append((title, list(self.find_articles(href)))) return feeds - - def preprocess_html(self, soup): - - for item in soup.findAll(style=True): - del item['style'] - - for item in soup.findAll(face=True): - del item['face'] - for tag in soup.findAll(name=['ul','li']): - tag.name = 'div' - - return soup - - +'''