#!/usr/bin/env python __license__ = 'GPL v3' __copyright__ = '2010, Starson17' ''' www.epicurious.com ''' import re from calibre.web.feeds.news import BasicNewsRecipe class Epicurious(BasicNewsRecipe): title = u'Epicurious' __author__ = 'Starson17' description = 'Food and Recipes from Epicurious' cover_url = 'http://up6.podbean.com/image-logos/21849_logo.jpg' publisher = 'Epicurious' tags = 'news, food, gourmet, recipes' language = 'en' use_embedded_content = False no_stylesheets = True remove_javascript = True recursions = 3 oldest_article = 14 max_articles_per_feed = 20 keep_only_tags = [dict(name='div', attrs={'class':['mainconsolewrapper','videoheader','content_unit','entry-content','see_more_block']}), dict(name='div', attrs={'id':['headline','introBlock','ingredients','preparation','articleContent','in_categories_block']}) ] remove_tags = [{'id':['printShoppingList','addnoteLnk','btnUploadVideo','enlarge_image']}, {'class':['subLnk','sbmWrapper','detail_division','entry-footer','comment-footer']}, dict(name='div', attrs={'class':['tagged','comments']}) ] remove_tags_after = [dict(name='div', attrs={'class':'entry-content'})] feeds = [ (u'Recipes: Healthy dinner ', u'http://feeds.epicurious.com/healthy_recipes'), (u'New Recipes ', u'http://feeds.epicurious.com/newrecipes'), (u'Features ', u'http://feeds.epicurious.com/latestfeatures'), (u'Blogs ', u'http://feeds.feedburner.com/epicurious/epiblog') ] match_regexps = [ r'http://www.epicurious.com/.*recipes/.*/views' ] preprocess_regexps = [ (re.compile(r'/\n', re.DOTALL|re.IGNORECASE), lambda match: '/'), (re.compile(r'_116.jpg', re.DOTALL|re.IGNORECASE), lambda match: '.jpg'), (re.compile('
', re.DOTALL|re.IGNORECASE), lambda match: '') ] def postprocess_html(self, soup, first_fetch): for t in soup.findAll(['table', 'tr', 'td']): t.name = 'div' return soup