Updated recipe for DIscover Magazine and new Recipes for The Metro Montreal and The Gazette by Jerry Clapperton

This commit is contained in:
Kovid Goyal 2010-01-26 10:08:32 -07:00
parent 0a73a7cf51
commit b476baeaca
4 changed files with 85 additions and 21 deletions

View File

@ -10,26 +10,44 @@ doscovermagazine.com
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class DiscoverMagazine(BasicNewsRecipe): class DiscoverMagazine(BasicNewsRecipe):
title = u'Discover Magazine' title = u'Discover Magazine'
description = u'Science, Technology and the Future' description = u'Science, Technology and the Future'
__author__ = 'Mike Diaz' __author__ = 'Mike Diaz'
oldest_article = 33
language = 'en' language = 'en'
max_articles_per_feed = 20 oldest_article = 33
max_articles_per_feed = 20
no_stylesheets = True
remove_javascript = True
use_embedded_content = False
encoding = 'utf-8'
extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
remove_tags_before = dict(id='articlePage')
keep_only_tags = [dict(name='div', attrs={'id':'articlePage'})]
remove_tags = [dict(attrs={'id':['buttons', 'tool-box', 'teaser', 'already-subscriber', 'teaser-suite', 'related-articles', 'relatedItem', 'box-popular', 'box-blogs', 'box-news', 'footer']}),
dict(attrs={'class':'popularNewsBox'}),
dict(name=['img', 'style', 'head'])]
remove_tags_after = dict(id='articlePage')
feeds = [ feeds = [
(u'Technology', u'http://discovermagazine.com/topics/technology/rss.xml'), (u'Technology', u'http://discovermagazine.com/topics/technology/rss.xml'),
(u'Health - Medicine', u'http://discovermagazine.com/topics/health-medicine/rss.xml'), (u'Health - Medicine', u'http://discovermagazine.com/topics/health-medicine/rss.xml'),
(u'Mind Brain', u'http://discovermagazine.com/topics/mind-brain/rss.xml'), (u'Mind Brain', u'http://discovermagazine.com/topics/mind-brain/rss.xml'),
(u'Space', u'http://discovermagazine.com/topics/space/rss.xml'), (u'Space', u'http://discovermagazine.com/topics/space/rss.xml'),
(u'Human Origins', u'http://discovermagazine.com/topics/human-origins/rss.xml'), (u'Human Origins', u'http://discovermagazine.com/topics/human-origins/rss.xml'),
(u'Living World', u'http://discovermagazine.com/topics/living-world/rss.xml'), (u'Living World', u'http://discovermagazine.com/topics/living-world/rss.xml'),
(u'Environment', u'http://discovermagazine.com/topics/environment/rss.xml'), (u'Environment', u'http://discovermagazine.com/topics/environment/rss.xml'),
(u'Physics & Math', u'http://discovermagazine.com/topics/physics-math/rss.xml'), (u'Physics & Math', u'http://discovermagazine.com/topics/physics-math/rss.xml'),
(u'Vital Signs', u'http://discovermagazine.com/columns/vital-signs/rss.xml'), (u'Vital Signs', u'http://discovermagazine.com/columns/vital-signs/rss.xml'),
(u"20 Things you didn't know about...", u'http://discovermagazine.com/columns/20-things-you-didnt-know/rss.xml'), (u"20 Things you didn't know about...", u'http://discovermagazine.com/columns/20-things-you-didnt-know/rss.xml'),
(u'Fuzzy Math', u'http://discovermagazine.com/columns/fuzzy-math/rss.xml'), (u'Fuzzy Math', u'http://discovermagazine.com/columns/fuzzy-math/rss.xml'),
(u'The Brain', u'http://discovermagazine.com/columns/the-brain/rss.xml'), (u'The Brain', u'http://discovermagazine.com/columns/the-brain/rss.xml'),
(u'Stupid Science Word of the Month', u'http://discovermagazine.com/columns/stupid-science-word-of-the-month/rss.xml'), (u'Stupid Science Word of the Month', u'http://discovermagazine.com/columns/stupid-science-word-of-the-month/rss.xml'),
(u'Science Not Fiction', u'http://blogs.discovermagazine.com/sciencenotfiction/wp-rss.php') (u'Science Not Fiction', u'http://blogs.discovermagazine.com/sciencenotfiction/wp-rss.php')
] ]

View File

@ -0,0 +1,24 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Metro_Montreal(BasicNewsRecipe):
title = u'M\xe9tro Montr\xe9al'
__author__ = 'Jerry Clapperton'
description = 'Le quotidien le plus branché sur le monde'
language = 'fr'
oldest_article = 7
max_articles_per_feed = 20
use_embedded_content = False
remove_javascript = True
no_stylesheets = True
encoding = 'utf-8'
extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
remove_tags = [dict(attrs={'id':'buttons'}), dict(name=['img', 'style'])]
feeds = [(u"L'info", u'http://journalmetro.com/linfo/rss'), (u'Monde', u'http://journalmetro.com/monde/rss'), (u'Culture', u'http://journalmetro.com/culture/rss'), (u'Sports', u'http://journalmetro.com/sports/rss'), (u'Paroles', u'http://journalmetro.com/paroles/rss')]
def print_version(self, url):
return url.replace('article', 'ArticlePrint') + '?language=fr'

View File

@ -32,7 +32,7 @@ class Pagina12(BasicNewsRecipe):
} }
remove_tags = [dict(name='div', attrs={'id':['volver','logo','logo_suple','fin','permalink']})] remove_tags = [dict(name='div', attrs={'id':['volver','logo','logo_suple','fin','permalink']})]
feeds = [ feeds = [
(u'Edicion impresa', u'http://www.pagina12.com.ar/diario/rss/principal.xml' ) (u'Edicion impresa', u'http://www.pagina12.com.ar/diario/rss/principal.xml' )
@ -55,4 +55,4 @@ class Pagina12(BasicNewsRecipe):
imgnames = ['tapan.jpg','tapagn.jpg','tapan_gr.jpg','tapagn.jpg','tapagn.jpg','tapan.jpg','tapagn.jpg'] imgnames = ['tapan.jpg','tapagn.jpg','tapan_gr.jpg','tapagn.jpg','tapagn.jpg','tapan.jpg','tapagn.jpg']
weekday = time.localtime().tm_wday weekday = time.localtime().tm_wday
return strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/') + imgnames[weekday] return strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/') + imgnames[weekday]

View File

@ -0,0 +1,22 @@
from calibre.web.feeds.news import BasicNewsRecipe
class The_Gazette(BasicNewsRecipe):
cover_url = 'file:///D:/Documents/Pictures/Covers/The_Gazette.jpg'
title = u'The Gazette'
__author__ = 'Jerry Clapperton'
description = 'Montreal news in English'
language = 'en_CA'
oldest_article = 7
max_articles_per_feed = 20
use_embedded_content = False
remove_javascript = True
no_stylesheets = True
encoding = 'utf-8'
keep_only_tags = [dict(name='div', attrs={'id':['storyheader','page1']})]
extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
feeds = [(u'News', u'http://feeds.canada.com/canwest/F297'), (u'Opinion', u'http://feeds.canada.com/canwest/F7383'), (u'Arts', u'http://feeds.canada.com/canwest/F7366'), (u'Life', u'http://rss.canada.com/get/?F6934'), (u'Business', u'http://feeds.canada.com/canwest/F6939'), (u'Travel', u'http://rss.canada.com/get/?F6938'), (u'Health', u'http://feeds.canada.com/canwest/F7397'), (u'Technology', u'http://feeds.canada.com/canwest/F7411')]