Astronomy Pic of the Day by Starson17. Fixes #5045 (New Recipe: Astronomy Picture of the Day)

This commit is contained in:
Kovid Goyal 2010-03-03 01:29:14 -07:00
parent 80cbfb1e41
commit 13a9733d42
2 changed files with 95 additions and 58 deletions

View File

@ -0,0 +1,37 @@
from calibre.web.feeds.news import BasicNewsRecipe
class APOD(BasicNewsRecipe):
title = u'Astronomy Picture of the Day'
__author__ = 'Starson17'
description = 'Astronomy Pictures'
language = 'en'
use_embedded_content = False
no_stylesheets = True
cover_url = 'http://apod.nasa.gov/apod/image/1003/m78_torregrosa.jpg'
remove_javascript = True
recursions = 0
oldest_article = 14
feeds = [
(u'Astronomy Picture of the Day', u'http://apod.nasa.gov/apod.rss')
]
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
def postprocess_html(self, soup, first_fetch):
center_tags = soup.findAll(['center'])
p_tags = soup.findAll(['p'])
last_center = center_tags[-1:]
last_center[0].extract()
first_p = p_tags[:1]
for tag in first_p:
tag.extract()
last2_p = p_tags[-2:]
for tag in last2_p:
tag.extract()
return soup

View File

@ -1,58 +1,58 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2010, Starson17'
'''
www.epicurious.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Epicurious(BasicNewsRecipe):
title = u'Epicurious'
__author__ = 'Starson17'
description = 'Food and Recipes from Epicurious'
cover_url = 'http://up6.podbean.com/image-logos/21849_logo.jpg'
publisher = 'Epicurious'
tags = 'news, food, gourmet, recipes'
language = 'en'
use_embedded_content = False
no_stylesheets = True
remove_javascript = True
recursions = 3
oldest_article = 14
max_articles_per_feed = 20
keep_only_tags = [dict(name='div', attrs={'class':['mainconsolewrapper','videoheader','content_unit','entry-content','see_more_block']}),
dict(name='div', attrs={'id':['headline','introBlock','ingredients','preparation','articleContent','in_categories_block']})
]
remove_tags = [{'id':['printShoppingList','addnoteLnk','btnUploadVideo','enlarge_image']},
{'class':['subLnk','sbmWrapper','detail_division','entry-footer','comment-footer']},
dict(name='div', attrs={'class':['tagged','comments']})
]
remove_tags_after = [dict(name='div', attrs={'class':'entry-content'})]
feeds = [
(u'Recipes: Healthy dinner ', u'http://feeds.epicurious.com/healthy_recipes'),
(u'New Recipes ', u'http://feeds.epicurious.com/newrecipes'),
(u'Features ', u'http://feeds.epicurious.com/latestfeatures'),
(u'Blogs ', u'http://feeds.feedburner.com/epicurious/epiblog')
]
match_regexps = [
r'http://www.epicurious.com/.*recipes/.*/views'
]
preprocess_regexps = [
(re.compile(r'/\n', re.DOTALL|re.IGNORECASE), lambda match: '/'),
(re.compile(r'_116.jpg', re.DOTALL|re.IGNORECASE), lambda match: '.jpg'),
(re.compile('<div class=\"comments\".*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>')
]
def postprocess_html(self, soup, first_fetch):
for t in soup.findAll(['table', 'tr', 'td']):
t.name = 'div'
return soup
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2010, Starson17'
'''
www.epicurious.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Epicurious(BasicNewsRecipe):
title = u'Epicurious'
__author__ = 'Starson17'
description = 'Food and Recipes from Epicurious'
cover_url = 'http://up6.podbean.com/image-logos/21849_logo.jpg'
publisher = 'Epicurious'
tags = 'news, food, gourmet, recipes'
language = 'en'
use_embedded_content = False
no_stylesheets = True
remove_javascript = True
recursions = 3
oldest_article = 14
max_articles_per_feed = 20
keep_only_tags = [dict(name='div', attrs={'class':['mainconsolewrapper','videoheader','content_unit','entry-content','see_more_block']}),
dict(name='div', attrs={'id':['headline','introBlock','ingredients','preparation','articleContent','in_categories_block']})
]
remove_tags = [{'id':['printShoppingList','addnoteLnk','btnUploadVideo','enlarge_image']},
{'class':['subLnk','sbmWrapper','detail_division','entry-footer','comment-footer']},
dict(name='div', attrs={'class':['tagged','comments']})
]
remove_tags_after = [dict(name='div', attrs={'class':'entry-content'})]
feeds = [
(u'Recipes: Healthy dinner ', u'http://feeds.epicurious.com/healthy_recipes'),
(u'New Recipes ', u'http://feeds.epicurious.com/newrecipes'),
(u'Features ', u'http://feeds.epicurious.com/latestfeatures'),
(u'Blogs ', u'http://feeds.feedburner.com/epicurious/epiblog')
]
match_regexps = [
r'http://www.epicurious.com/.*recipes/.*/views'
]
preprocess_regexps = [
(re.compile(r'/\n', re.DOTALL|re.IGNORECASE), lambda match: '/'),
(re.compile(r'_116.jpg', re.DOTALL|re.IGNORECASE), lambda match: '.jpg'),
(re.compile('<div class=\"comments\".*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>')
]
def postprocess_html(self, soup, first_fetch):
for t in soup.findAll(['table', 'tr', 'td']):
t.name = 'div'
return soup