from __future__ import print_function from calibre.web.feeds.news import BasicNewsRecipe import re class DrawAndCook(BasicNewsRecipe): title = 'DrawAndCook' __author__ = 'Starson17' __version__ = 'v1.10' __date__ = '13 March 2011' description = 'Drawings of recipes!' language = 'en' publisher = 'Starson17' category = 'news, food, recipes' use_embedded_content = False no_stylesheets = True oldest_article = 24 remove_javascript = True remove_empty_feeds = True cover_url = 'http://farm5.static.flickr.com/4043/4471139063_4dafced67f_o.jpg' INDEX = 'http://www.theydrawandcook.com' max_articles_per_feed = 30 remove_attributes = ['style', 'font'] def parse_index(self): feeds = [] for title, url in [ ("They Draw and Cook", "http://www.theydrawandcook.com/") ]: articles = self.make_links(url) if articles: feeds.append((title, articles)) print('feeds are: ', feeds) return feeds def make_links(self, url): soup = self.index_to_soup(url) title = '' date = '' current_articles = [] soup = self.index_to_soup(url) featured_major_slider = soup.find( name='div', attrs={'id': 'featured_major_slider'}) recipes = featured_major_slider.findAll( 'li', attrs={'data-id': re.compile(r'artwork_entry_\d+', re.DOTALL)}) for recipe in recipes: page_url = self.INDEX + recipe.a['href'] print('page_url is: ', page_url) title = recipe.find('strong').string print('title is: ', title) current_articles.append( {'title': title, 'url': page_url, 'description': '', 'date': date}) return current_articles keep_only_tags = [dict(name='h1', attrs={'id': 'page_title'}), dict(name='section', attrs={'id': 'artwork'}) ] remove_tags = [dict(name='article', attrs={'id': ['recipe_actions', 'metadata']}) ] extra_css = ''' h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} img {max-width:100%; min-width:100%;} p{font-family:Arial,Helvetica,sans-serif;font-size:small;} body{font-family:Helvetica,Arial,sans-serif;font-size:small;} '''