From 264d032646a009a7ca2bf9c62e224ca58061a09b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 3 Sep 2009 10:32:25 -0600 Subject: [PATCH] Updated recipe for Telegraph UK --- .../web/feeds/recipes/recipe_telegraph_uk.py | 96 +++++++++++-------- 1 file changed, 58 insertions(+), 38 deletions(-) diff --git a/src/calibre/web/feeds/recipes/recipe_telegraph_uk.py b/src/calibre/web/feeds/recipes/recipe_telegraph_uk.py index 7ff0815ae1..8f8eaf706f 100644 --- a/src/calibre/web/feeds/recipes/recipe_telegraph_uk.py +++ b/src/calibre/web/feeds/recipes/recipe_telegraph_uk.py @@ -1,38 +1,58 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' -''' -telegraph.co.uk -''' - -from calibre.web.feeds.news import BasicNewsRecipe - -class TelegraphUK(BasicNewsRecipe): - title = u'Telegraph.co.uk' - __author__ = 'Darko Miletic' - description = 'News from United Kingdom' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - language = _('English') - use_embedded_content = False - - keep_only_tags = [ - dict(name='div', attrs={'class':'storyHead'}) - ,dict(name='div', attrs={'class':'story' }) - ] - remove_tags = [dict(name='div', attrs={'class':'slideshow'})] - - feeds = [ - (u'UK News' , u'http://www.telegraph.co.uk/news/uknews/rss' ) - ,(u'World News' , u'http://www.telegraph.co.uk/news/worldnews/rss' ) - ,(u'Politics' , u'http://www.telegraph.co.uk/news/newstopics/politics/rss' ) - ,(u'Technology News', u'http://www.telegraph.co.uk/scienceandtechnology/technology/technologynews/rss' ) - ,(u'UK News' , u'http://www.telegraph.co.uk/scienceandtechnology/technology/technologyreviews/rss') - ,(u'Science News' , u'http://www.telegraph.co.uk/scienceandtechnology/science/sciencenews/rss' ) - ,(u'Sport' , u'http://www.telegraph.co.uk/sport/rss' ) - ,(u'Earth News' , u'http://www.telegraph.co.uk/earth/earthnews/rss' ) - ,(u'Comment' , u'http://www.telegraph.co.uk/comment/rss' ) - ,(u'How about that?', u'http://www.telegraph.co.uk/news/newstopics/howaboutthat/rss' ) - ] +#!/usr/bin/env python +__license__ = 'GPL v3' +__copyright__ = '2008, Darko Miletic ' +''' +telegraph.co.uk +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class TelegraphUK(BasicNewsRecipe): + title = u'Telegraph.co.uk' + __author__ = 'Darko Miletic and Sujata Raman' + description = 'News from United Kingdom' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + language = _('English') + use_embedded_content = False + + extra_css = ''' + h1{font-family :Arial,Helvetica,sans-serif; font-size:large; color:#666666} + h2{font-family :Arial,Helvetica,sans-serif; font-size:small; color:#444444} + .story{font-family :Arial,Helvetica,sans-serif; font-size: x-small; color:#444444 } + .byline{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;} + a{color:#234B7B; } + .imageExtras{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;} + ''' + + keep_only_tags = [ + dict(name='div', attrs={'class':'storyHead'}) + ,dict(name='div', attrs={'class':'story' }) + #,dict(name='div', attrs={'class':['slideshowHD gutterUnder',"twoThirds gutter","caption" ] }) + ] + remove_tags = [dict(name='div', attrs={'class':['related_links_inline',"imgindex","next","prev","gutterUnder"]})] + + feeds = [ + (u'UK News' , u'http://www.telegraph.co.uk/news/uknews/rss' ) + ,(u'World News' , u'http://www.telegraph.co.uk/news/worldnews/rss' ) + ,(u'Politics' , u'http://www.telegraph.co.uk/news/newstopics/politics/rss' ) + ,(u'Technology News', u'http://www.telegraph.co.uk/scienceandtechnology/technology/technologynews/rss' ) + ,(u'UK News' , u'http://www.telegraph.co.uk/scienceandtechnology/technology/technologyreviews/rss') + ,(u'Science News' , u'http://www.telegraph.co.uk/scienceandtechnology/science/sciencenews/rss' ) + ,(u'Sport' , u'http://www.telegraph.co.uk/sport/rss' ) + ,(u'Earth News' , u'http://www.telegraph.co.uk/earth/earthnews/rss' ) + ,(u'Comment' , u'http://www.telegraph.co.uk/comment/rss' ) + ,(u'How about that?', u'http://www.telegraph.co.uk/news/newstopics/howaboutthat/rss' ) + ] + + def get_article_url(self, article): + + url = article.get('guid', None) + + if 'picture-galleries' in url or 'pictures' in url or 'picturegalleries' in url : + url = None + + return url + +