diff --git a/src/calibre/web/feeds/recipes/recipe_msdnmag_en.py b/src/calibre/web/feeds/recipes/recipe_msdnmag_en.py index 7fc5adb93a..3f043883fe 100644 --- a/src/calibre/web/feeds/recipes/recipe_msdnmag_en.py +++ b/src/calibre/web/feeds/recipes/recipe_msdnmag_en.py @@ -1,62 +1,43 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' -''' -msdn.microsoft.com/en-us/magazine -''' -from calibre.web.feeds.news import BasicNewsRecipe - -class MSDNMagazine_en(BasicNewsRecipe): - title = 'MSDN Magazine' - __author__ = 'Darko Miletic' - description = 'The Microsoft Journal for Developers' - publisher = 'Microsoft Press' - category = 'news, IT, Microsoft, programming, windows' - oldest_article = 31 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - encoding = 'utf-8' - remove_javascript = True - current_issue = 'http://msdn.microsoft.com/en-us/magazine/default.aspx' - language = 'en' - - - html2lrf_options = [ - '--comment', description - , '--category', category - , '--publisher', publisher - ] - - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' - - feeds = [(u'Articles', u'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1')] - - keep_only_tags = [dict(name='div', attrs={'class':'topic'})] - - remove_tags = [ - dict(name=['object','link','base','table']) - ,dict(name='div', attrs={'class':'MTPS_CollapsibleRegion'}) - ] - - def get_cover_url(self): - cover_url = None - soup = self.index_to_soup(self.current_issue) - link_item = soup.find('span',attrs={'class':'ContentsImageSpacer'}) - if link_item: - imgt = link_item.find('img') - if imgt: - cover_url = imgt['src'] - return cover_url - - - def preprocess_html(self, soup): - for item in soup.findAll('div',attrs={'class':['FeatureSmallHead','ColumnTypeSubTitle']}): - item.name="h2" - for item in soup.findAll('div',attrs={'class':['FeatureHeadline','ColumnTypeTitle']}): - item.name="h1" - for item in soup.findAll('div',attrs={'class':'ArticleTypeTitle'}): - item.name="h3" - return soup - +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +msdn.microsoft.com/en-us/magazine +''' +from calibre.web.feeds.news import BasicNewsRecipe + +class MSDNMagazine_en(BasicNewsRecipe): + title = 'MSDN Magazine' + __author__ = 'Darko Miletic' + description = 'The Microsoft Journal for Developers' + publisher = 'Microsoft Press' + category = 'news, IT, Microsoft, programming, windows' + oldest_article = 31 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'utf-8' + language = 'en' + + + + feeds = [(u'Articles', u'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1')] + + keep_only_tags = [dict(name='div', attrs={'class':'navpage'})] + + remove_tags = [ + dict(name=['object','link','base','table']) + ,dict(name='div', attrs={'class':'MTPS_CollapsibleRegion'}) + ] + remove_tags_after = dict(name='div', attrs={'class':'navpage'}) + + def preprocess_html(self, soup): + for item in soup.findAll('div',attrs={'class':['FeatureSmallHead','ColumnTypeSubTitle']}): + item.name="h2" + for item in soup.findAll('div',attrs={'class':['FeatureHeadline','ColumnTypeTitle']}): + item.name="h1" + for item in soup.findAll('div',attrs={'class':'ArticleTypeTitle'}): + item.name="h3" + return soup + diff --git a/src/calibre/web/feeds/recipes/recipe_straitstimes.py b/src/calibre/web/feeds/recipes/recipe_straitstimes.py index 9a87f03a6d..522e8f9ff5 100644 --- a/src/calibre/web/feeds/recipes/recipe_straitstimes.py +++ b/src/calibre/web/feeds/recipes/recipe_straitstimes.py @@ -1,57 +1,50 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' -''' -www.straitstimes.com -''' - -from calibre.web.feeds.recipes import BasicNewsRecipe - -class StraitsTimes(BasicNewsRecipe): - title = 'The Straits Times' - __author__ = 'Darko Miletic' - description = 'Singapore newspaper' - oldest_article = 2 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - encoding = 'cp1252' - publisher = 'Singapore Press Holdings Ltd.' - category = 'news, politics, singapore, asia' - language = 'en' - - - html2lrf_options = [ - '--comment', description - , '--category', category - , '--publisher', publisher - , '--ignore-tables' - ] - - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' - - remove_tags = [ - dict(name=['object','link']) - ,dict(name='table', attrs={'width':'980'}) - ,dict(name='td' , attrs={'class':'padlrt10'}) - ] - - feeds = [ - (u'Singapore' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_singapore.xml' ) - ,(u'SE Asia' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_sea.xml' ) - ,(u'Money' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_money.xml' ) - ,(u'Sport' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_sport.xml' ) - ,(u'World' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_world.xml' ) - ,(u'Tech & Science' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_tech.xml' ) - ,(u'Lifestyle' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_lifestyle.xml' ) - ] - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - return soup - - def print_version(self, url): - return url.replace('http://www.straitstimes.com','http://www.straitstimes.com/print') - +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +www.straitstimes.com +''' + +from calibre.web.feeds.recipes import BasicNewsRecipe + +class StraitsTimes(BasicNewsRecipe): + title = 'The Straits Times' + __author__ = 'Darko Miletic' + description = 'Singapore newspaper' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'cp1252' + publisher = 'Singapore Press Holdings Ltd.' + category = 'news, politics, singapore, asia' + language = 'en' + extra_css = ' .top_headline{font-size: x-large; font-weight: bold} ' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + remove_tags = [dict(name=['object','link','map'])] + + keep_only_tags = [dict(name='div', attrs={'class':['top_headline','story_text']})] + + feeds = [ + (u'Singapore' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_singapore.xml' ) + ,(u'SE Asia' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_sea.xml' ) + ,(u'Money' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_money.xml' ) + ,(u'Sport' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_sport.xml' ) + ,(u'World' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_world.xml' ) + ,(u'Tech & Science' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_tech.xml' ) + ,(u'Lifestyle' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_lifestyle.xml' ) + ] + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup +