From 327158b6340c8d38da75696237eda29cf80ca9f4 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 8 Oct 2009 15:46:21 -0600 Subject: [PATCH] IGN:... --- .../web/feeds/recipes/recipe_msdnmag_en.py | 86 +++++++-------- .../web/feeds/recipes/recipe_straitstimes.py | 100 +++++++++--------- .../web/feeds/recipes/recipe_thestar.py | 98 ++++++++--------- 3 files changed, 142 insertions(+), 142 deletions(-) diff --git a/src/calibre/web/feeds/recipes/recipe_msdnmag_en.py b/src/calibre/web/feeds/recipes/recipe_msdnmag_en.py index 3f043883fe..77b8da17a8 100644 --- a/src/calibre/web/feeds/recipes/recipe_msdnmag_en.py +++ b/src/calibre/web/feeds/recipes/recipe_msdnmag_en.py @@ -1,43 +1,43 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' -''' -msdn.microsoft.com/en-us/magazine -''' -from calibre.web.feeds.news import BasicNewsRecipe - -class MSDNMagazine_en(BasicNewsRecipe): - title = 'MSDN Magazine' - __author__ = 'Darko Miletic' - description = 'The Microsoft Journal for Developers' - publisher = 'Microsoft Press' - category = 'news, IT, Microsoft, programming, windows' - oldest_article = 31 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - encoding = 'utf-8' - language = 'en' - - - - feeds = [(u'Articles', u'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1')] - - keep_only_tags = [dict(name='div', attrs={'class':'navpage'})] - - remove_tags = [ - dict(name=['object','link','base','table']) - ,dict(name='div', attrs={'class':'MTPS_CollapsibleRegion'}) - ] - remove_tags_after = dict(name='div', attrs={'class':'navpage'}) - - def preprocess_html(self, soup): - for item in soup.findAll('div',attrs={'class':['FeatureSmallHead','ColumnTypeSubTitle']}): - item.name="h2" - for item in soup.findAll('div',attrs={'class':['FeatureHeadline','ColumnTypeTitle']}): - item.name="h1" - for item in soup.findAll('div',attrs={'class':'ArticleTypeTitle'}): - item.name="h3" - return soup - +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +msdn.microsoft.com/en-us/magazine +''' +from calibre.web.feeds.news import BasicNewsRecipe + +class MSDNMagazine_en(BasicNewsRecipe): + title = 'MSDN Magazine' + __author__ = 'Darko Miletic' + description = 'The Microsoft Journal for Developers' + publisher = 'Microsoft Press' + category = 'news, IT, Microsoft, programming, windows' + oldest_article = 31 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'utf-8' + language = 'en' + + + + feeds = [(u'Articles', u'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1')] + + keep_only_tags = [dict(name='div', attrs={'class':'navpage'})] + + remove_tags = [ + dict(name=['object','link','base','table']) + ,dict(name='div', attrs={'class':'MTPS_CollapsibleRegion'}) + ] + remove_tags_after = dict(name='div', attrs={'class':'navpage'}) + + def preprocess_html(self, soup): + for item in soup.findAll('div',attrs={'class':['FeatureSmallHead','ColumnTypeSubTitle']}): + item.name="h2" + for item in soup.findAll('div',attrs={'class':['FeatureHeadline','ColumnTypeTitle']}): + item.name="h1" + for item in soup.findAll('div',attrs={'class':'ArticleTypeTitle'}): + item.name="h3" + return soup + diff --git a/src/calibre/web/feeds/recipes/recipe_straitstimes.py b/src/calibre/web/feeds/recipes/recipe_straitstimes.py index 522e8f9ff5..64e50e2f60 100644 --- a/src/calibre/web/feeds/recipes/recipe_straitstimes.py +++ b/src/calibre/web/feeds/recipes/recipe_straitstimes.py @@ -1,50 +1,50 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' -''' -www.straitstimes.com -''' - -from calibre.web.feeds.recipes import BasicNewsRecipe - -class StraitsTimes(BasicNewsRecipe): - title = 'The Straits Times' - __author__ = 'Darko Miletic' - description = 'Singapore newspaper' - oldest_article = 2 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - encoding = 'cp1252' - publisher = 'Singapore Press Holdings Ltd.' - category = 'news, politics, singapore, asia' - language = 'en' - extra_css = ' .top_headline{font-size: x-large; font-weight: bold} ' - - conversion_options = { - 'comments' : description - ,'tags' : category - ,'language' : language - ,'publisher' : publisher - } - - remove_tags = [dict(name=['object','link','map'])] - - keep_only_tags = [dict(name='div', attrs={'class':['top_headline','story_text']})] - - feeds = [ - (u'Singapore' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_singapore.xml' ) - ,(u'SE Asia' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_sea.xml' ) - ,(u'Money' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_money.xml' ) - ,(u'Sport' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_sport.xml' ) - ,(u'World' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_world.xml' ) - ,(u'Tech & Science' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_tech.xml' ) - ,(u'Lifestyle' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_lifestyle.xml' ) - ] - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - return soup - +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +www.straitstimes.com +''' + +from calibre.web.feeds.recipes import BasicNewsRecipe + +class StraitsTimes(BasicNewsRecipe): + title = 'The Straits Times' + __author__ = 'Darko Miletic' + description = 'Singapore newspaper' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'cp1252' + publisher = 'Singapore Press Holdings Ltd.' + category = 'news, politics, singapore, asia' + language = 'en' + extra_css = ' .top_headline{font-size: x-large; font-weight: bold} ' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + remove_tags = [dict(name=['object','link','map'])] + + keep_only_tags = [dict(name='div', attrs={'class':['top_headline','story_text']})] + + feeds = [ + (u'Singapore' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_singapore.xml' ) + ,(u'SE Asia' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_sea.xml' ) + ,(u'Money' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_money.xml' ) + ,(u'Sport' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_sport.xml' ) + ,(u'World' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_world.xml' ) + ,(u'Tech & Science' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_tech.xml' ) + ,(u'Lifestyle' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_lifestyle.xml' ) + ] + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup + diff --git a/src/calibre/web/feeds/recipes/recipe_thestar.py b/src/calibre/web/feeds/recipes/recipe_thestar.py index e9ea8d1834..695e50762a 100644 --- a/src/calibre/web/feeds/recipes/recipe_thestar.py +++ b/src/calibre/web/feeds/recipes/recipe_thestar.py @@ -1,49 +1,49 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' -''' -www.thestar.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - -class TheTorontoStar(BasicNewsRecipe): - title = 'The Toronto Star' - __author__ = 'Darko Miletic' - description = "Canada's largest daily newspaper" - oldest_article = 2 - language = 'en_CA' - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - delay = 2 - publisher = 'The Toronto Star' - category = "Toronto Star,Canada's largest daily newspaper,breaking news,classifieds,careers,GTA,Toronto Maple Leafs,sports,Toronto,news,editorial,The Star,Ontario,information,columnists,business,entertainment,births,deaths,automotive,rentals,weather,archives,Torstar,technology,Joseph Atkinson" - encoding = 'utf-8' - - conversion_options = { - 'comments' : description - ,'tags' : category - ,'publisher' : publisher - } - - keep_only_tags = [dict(name='div', attrs={'class':'ts-article'})] - remove_tags_before = dict(name='div',attrs={'id':'ts-article_header'}) - - feeds = [ - (u'News' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=296' ) - ,(u'Opinions' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=311' ) - ,(u'Business' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=294' ) - ,(u'Sports' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=295' ) - ,(u'Entertainment', u'http://www.thestar.com/rss/0?searchMode=Query&categories=296' ) - ,(u'Living' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=296' ) - ,(u'Travel' , u'http://www.thestar.com/rss/82858?searchMode=Lineup' ) - ,(u'Science' , u'http://www.thestar.com/rss/82848?searchMode=Query&categories=300') - ] - - def print_version(self, url): - artl = url.rpartition('--')[0] - artid = artl.rpartition('/')[2] - return 'http://www.thestar.com/printarticle/' + artid - +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +www.thestar.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class TheTorontoStar(BasicNewsRecipe): + title = 'The Toronto Star' + __author__ = 'Darko Miletic' + description = "Canada's largest daily newspaper" + oldest_article = 2 + language = 'en_CA' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + delay = 2 + publisher = 'The Toronto Star' + category = "Toronto Star,Canada's largest daily newspaper,breaking news,classifieds,careers,GTA,Toronto Maple Leafs,sports,Toronto,news,editorial,The Star,Ontario,information,columnists,business,entertainment,births,deaths,automotive,rentals,weather,archives,Torstar,technology,Joseph Atkinson" + encoding = 'utf-8' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'publisher' : publisher + } + + keep_only_tags = [dict(name='div', attrs={'class':'ts-article'})] + remove_tags_before = dict(name='div',attrs={'id':'ts-article_header'}) + + feeds = [ + (u'News' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=296' ) + ,(u'Opinions' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=311' ) + ,(u'Business' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=294' ) + ,(u'Sports' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=295' ) + ,(u'Entertainment', u'http://www.thestar.com/rss/0?searchMode=Query&categories=296' ) + ,(u'Living' , u'http://www.thestar.com/rss/0?searchMode=Query&categories=296' ) + ,(u'Travel' , u'http://www.thestar.com/rss/82858?searchMode=Lineup' ) + ,(u'Science' , u'http://www.thestar.com/rss/82848?searchMode=Query&categories=300') + ] + + def print_version(self, url): + artl = url.rpartition('--')[0] + artid = artl.rpartition('/')[2] + return 'http://www.thestar.com/printarticle/' + artid +