diff --git a/resources/recipes/apod.recipe b/resources/recipes/apod.recipe new file mode 100644 index 0000000000..01f4ebf391 --- /dev/null +++ b/resources/recipes/apod.recipe @@ -0,0 +1,37 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class APOD(BasicNewsRecipe): + title = u'Astronomy Picture of the Day' + __author__ = 'Starson17' + description = 'Astronomy Pictures' + language = 'en' + use_embedded_content = False + no_stylesheets = True + cover_url = 'http://apod.nasa.gov/apod/image/1003/m78_torregrosa.jpg' + remove_javascript = True + recursions = 0 + oldest_article = 14 + + feeds = [ + (u'Astronomy Picture of the Day', u'http://apod.nasa.gov/apod.rss') + ] + + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + ''' + def postprocess_html(self, soup, first_fetch): + center_tags = soup.findAll(['center']) + p_tags = soup.findAll(['p']) + last_center = center_tags[-1:] + last_center[0].extract() + first_p = p_tags[:1] + for tag in first_p: + tag.extract() + last2_p = p_tags[-2:] + for tag in last2_p: + tag.extract() + return soup + diff --git a/resources/recipes/epicurious.recipe b/resources/recipes/epicurious.recipe index 7d0925a4bb..dc86af73fd 100644 --- a/resources/recipes/epicurious.recipe +++ b/resources/recipes/epicurious.recipe @@ -1,58 +1,58 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2010, Starson17' -''' -www.epicurious.com -''' -import re -from calibre.web.feeds.news import BasicNewsRecipe - -class Epicurious(BasicNewsRecipe): - title = u'Epicurious' - __author__ = 'Starson17' - description = 'Food and Recipes from Epicurious' - cover_url = 'http://up6.podbean.com/image-logos/21849_logo.jpg' - publisher = 'Epicurious' - tags = 'news, food, gourmet, recipes' - language = 'en' - use_embedded_content = False - no_stylesheets = True - remove_javascript = True - recursions = 3 - oldest_article = 14 - max_articles_per_feed = 20 - - keep_only_tags = [dict(name='div', attrs={'class':['mainconsolewrapper','videoheader','content_unit','entry-content','see_more_block']}), - dict(name='div', attrs={'id':['headline','introBlock','ingredients','preparation','articleContent','in_categories_block']}) - ] - - remove_tags = [{'id':['printShoppingList','addnoteLnk','btnUploadVideo','enlarge_image']}, - {'class':['subLnk','sbmWrapper','detail_division','entry-footer','comment-footer']}, - dict(name='div', attrs={'class':['tagged','comments']}) - ] - - remove_tags_after = [dict(name='div', attrs={'class':'entry-content'})] - - feeds = [ - (u'Recipes: Healthy dinner ', u'http://feeds.epicurious.com/healthy_recipes'), - (u'New Recipes ', u'http://feeds.epicurious.com/newrecipes'), - (u'Features ', u'http://feeds.epicurious.com/latestfeatures'), - (u'Blogs ', u'http://feeds.feedburner.com/epicurious/epiblog') - ] - - match_regexps = [ - r'http://www.epicurious.com/.*recipes/.*/views' - ] - - preprocess_regexps = [ - (re.compile(r'/\n', re.DOTALL|re.IGNORECASE), lambda match: '/'), - (re.compile(r'_116.jpg', re.DOTALL|re.IGNORECASE), lambda match: '.jpg'), - (re.compile('
', re.DOTALL|re.IGNORECASE), lambda match: '') - ] - - def postprocess_html(self, soup, first_fetch): - for t in soup.findAll(['table', 'tr', 'td']): - t.name = 'div' - return soup - \ No newline at end of file +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2010, Starson17' +''' +www.epicurious.com +''' +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class Epicurious(BasicNewsRecipe): + title = u'Epicurious' + __author__ = 'Starson17' + description = 'Food and Recipes from Epicurious' + cover_url = 'http://up6.podbean.com/image-logos/21849_logo.jpg' + publisher = 'Epicurious' + tags = 'news, food, gourmet, recipes' + language = 'en' + use_embedded_content = False + no_stylesheets = True + remove_javascript = True + recursions = 3 + oldest_article = 14 + max_articles_per_feed = 20 + + keep_only_tags = [dict(name='div', attrs={'class':['mainconsolewrapper','videoheader','content_unit','entry-content','see_more_block']}), + dict(name='div', attrs={'id':['headline','introBlock','ingredients','preparation','articleContent','in_categories_block']}) + ] + + remove_tags = [{'id':['printShoppingList','addnoteLnk','btnUploadVideo','enlarge_image']}, + {'class':['subLnk','sbmWrapper','detail_division','entry-footer','comment-footer']}, + dict(name='div', attrs={'class':['tagged','comments']}) + ] + + remove_tags_after = [dict(name='div', attrs={'class':'entry-content'})] + + feeds = [ + (u'Recipes: Healthy dinner ', u'http://feeds.epicurious.com/healthy_recipes'), + (u'New Recipes ', u'http://feeds.epicurious.com/newrecipes'), + (u'Features ', u'http://feeds.epicurious.com/latestfeatures'), + (u'Blogs ', u'http://feeds.feedburner.com/epicurious/epiblog') + ] + + match_regexps = [ + r'http://www.epicurious.com/.*recipes/.*/views' + ] + + preprocess_regexps = [ + (re.compile(r'/\n', re.DOTALL|re.IGNORECASE), lambda match: '/'), + (re.compile(r'_116.jpg', re.DOTALL|re.IGNORECASE), lambda match: '.jpg'), + (re.compile('
', re.DOTALL|re.IGNORECASE), lambda match: '') + ] + + def postprocess_html(self, soup, first_fetch): + for t in soup.findAll(['table', 'tr', 'td']): + t.name = 'div' + return soup +