From 55553753d16496306ff8cc6dc9411b04445ab88d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 6 Oct 2009 11:23:50 -0600 Subject: [PATCH] New recipe for The Toronto Sun by Darko Miletic. Improved recipe for The Moscow Times. --- .../web/feeds/recipes/recipe_moscow_times.py | 56 +++++++++++++++++-- .../web/feeds/recipes/recipe_toronto_sun.py | 50 +++++++++++++++++ 2 files changed, 101 insertions(+), 5 deletions(-) create mode 100644 src/calibre/web/feeds/recipes/recipe_toronto_sun.py diff --git a/src/calibre/web/feeds/recipes/recipe_moscow_times.py b/src/calibre/web/feeds/recipes/recipe_moscow_times.py index 93dcb3d226..3105aba58e 100644 --- a/src/calibre/web/feeds/recipes/recipe_moscow_times.py +++ b/src/calibre/web/feeds/recipes/recipe_moscow_times.py @@ -10,18 +10,64 @@ from calibre.web.feeds.news import BasicNewsRecipe class Moscowtimes(BasicNewsRecipe): title = u'The Moscow Times' - __author__ = 'Darko Miletic' + __author__ = 'Darko Miletic and Sujata Raman' description = 'News from Russia' language = 'en' - + lang = 'en' oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False + #encoding = 'utf-8' + encoding = 'cp1252' + remove_javascript = True + conversion_options = { + 'comment' : description + , 'language' : lang + } + + extra_css = ''' + h1{ color:#0066B3; font-family: Georgia,serif ; font-size: large} + .article_date{ font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; color:#000000; font-size: x-small;} + .autors{color:#999999 ; font-weight: bold ; font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size: x-small; } + .photoautors{ color:#999999 ; font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size: x-small; } + .text{font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size:75%; } + ''' feeds = [ - (u'The Moscow Times' , u'http://www.themoscowtimes.com/rss.xml' ) + (u'The Moscow Times Top Stories' , u'http://www.themoscowtimes.com/rss/top'), + (u'The Moscow Times Current Issue' , u'http://www.themoscowtimes.com/rss/issue'), + (u'The Moscow Times News' , u'http://www.themoscowtimes.com/rss/news'), + (u'The Moscow Times Business' , u'http://www.themoscowtimes.com/rss/business'), + (u'The Moscow Times Art and Ideas' , u'http://www.themoscowtimes.com/rss/art'), + (u'The Moscow Times Opinion' , u'http://www.themoscowtimes.com/rss/opinion') ] - def print_version(self, url): - return url + '&print=Y' \ No newline at end of file + keep_only_tags = [ + dict(name='div', attrs={'class':['newstextblock']}) + ] + + remove_tags = [ + dict(name='div', attrs={'class':['photo_nav']}) + ] + + def preprocess_html(self, soup): + soup.html['xml:lang'] = self.lang + soup.html['lang'] = self.lang + mtag = '' + soup.head.insert(0,mtag) + + return self.adeify_images(soup) + + + def get_cover_url(self): + + href = 'http://www.themoscowtimes.com/pdf/' + + soup = self.index_to_soup(href) + div = soup.find('div',attrs={'class':'left'}) + a = div.find('a') + print a + if a : + cover_url = a.img['src'] + return cover_url diff --git a/src/calibre/web/feeds/recipes/recipe_toronto_sun.py b/src/calibre/web/feeds/recipes/recipe_toronto_sun.py new file mode 100644 index 0000000000..996b27c1bd --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_toronto_sun.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +www.torontosun.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class TorontoSun(BasicNewsRecipe): + title = 'Toronto SUN' + __author__ = 'Darko Miletic' + description = 'News from Canada' + publisher = 'Toronto Sun' + category = 'news, politics, Canada' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'cp1252' + language = 'en_CA' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + keep_only_tags =[ + dict(name='div', attrs={'class':'articleHead'}) + ,dict(name='div', attrs={'id':'channelContent'}) + ] + remove_tags = [ + dict(name='div',attrs={'class':['leftBox','bottomBox clear','bottomBox','breadCrumb']}) + ,dict(name=['link','iframe','object']) + ,dict(name='a',attrs={'rel':'swap'}) + ,dict(name='ul',attrs={'class':'tabs dl contentSwap'}) + ] + + remove_tags_after = dict(name='div',attrs={'class':'bottomBox clear'}) + + feeds = [ + (u'News' , u'http://www.torontosun.com/news/rss.xml' ) + ,(u'Canada' , u'http://www.torontosun.com/news/canada/rss.xml' ) + ,(u'Columnists' , u'http://www.torontosun.com/news/columnists/rss.xml') + ,(u'World' , u'http://www.torontosun.com/news/world/rss.xml' ) + ,(u'Money' , u'http://www.torontosun.com/money/rss.xml' ) + ]