From 768fb2305705e00dc1e1207c5b25932ce8da5842 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 1 Mar 2010 18:45:08 -0700 Subject: [PATCH] Epicurious by Starson17. Fixes #5033 (New recipe: Epicurious (food recipes, gourmet)) --- resources/recipes/epicurious.recipe | 58 +++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 resources/recipes/epicurious.recipe diff --git a/resources/recipes/epicurious.recipe b/resources/recipes/epicurious.recipe new file mode 100644 index 0000000000..7d0925a4bb --- /dev/null +++ b/resources/recipes/epicurious.recipe @@ -0,0 +1,58 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2010, Starson17' +''' +www.epicurious.com +''' +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class Epicurious(BasicNewsRecipe): + title = u'Epicurious' + __author__ = 'Starson17' + description = 'Food and Recipes from Epicurious' + cover_url = 'http://up6.podbean.com/image-logos/21849_logo.jpg' + publisher = 'Epicurious' + tags = 'news, food, gourmet, recipes' + language = 'en' + use_embedded_content = False + no_stylesheets = True + remove_javascript = True + recursions = 3 + oldest_article = 14 + max_articles_per_feed = 20 + + keep_only_tags = [dict(name='div', attrs={'class':['mainconsolewrapper','videoheader','content_unit','entry-content','see_more_block']}), + dict(name='div', attrs={'id':['headline','introBlock','ingredients','preparation','articleContent','in_categories_block']}) + ] + + remove_tags = [{'id':['printShoppingList','addnoteLnk','btnUploadVideo','enlarge_image']}, + {'class':['subLnk','sbmWrapper','detail_division','entry-footer','comment-footer']}, + dict(name='div', attrs={'class':['tagged','comments']}) + ] + + remove_tags_after = [dict(name='div', attrs={'class':'entry-content'})] + + feeds = [ + (u'Recipes: Healthy dinner ', u'http://feeds.epicurious.com/healthy_recipes'), + (u'New Recipes ', u'http://feeds.epicurious.com/newrecipes'), + (u'Features ', u'http://feeds.epicurious.com/latestfeatures'), + (u'Blogs ', u'http://feeds.feedburner.com/epicurious/epiblog') + ] + + match_regexps = [ + r'http://www.epicurious.com/.*recipes/.*/views' + ] + + preprocess_regexps = [ + (re.compile(r'/\n', re.DOTALL|re.IGNORECASE), lambda match: '/'), + (re.compile(r'_116.jpg', re.DOTALL|re.IGNORECASE), lambda match: '.jpg'), + (re.compile('
', re.DOTALL|re.IGNORECASE), lambda match: '') + ] + + def postprocess_html(self, soup, first_fetch): + for t in soup.findAll(['table', 'tr', 'td']): + t.name = 'div' + return soup + \ No newline at end of file