Update Draw And Cook

This commit is contained in:
Kovid Goyal 2011-03-13 10:08:38 -06:00
parent 6e9108e9b3
commit 1cc3038f0e
2 changed files with 13 additions and 8 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 575 B

View File

@ -1,8 +1,11 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import re
class DrawAndCook(BasicNewsRecipe): class DrawAndCook(BasicNewsRecipe):
title = 'DrawAndCook' title = 'DrawAndCook'
__author__ = 'Starson17' __author__ = 'Starson17'
__version__ = 'v1.10'
__date__ = '13 March 2011'
description = 'Drawings of recipes!' description = 'Drawings of recipes!'
language = 'en' language = 'en'
publisher = 'Starson17' publisher = 'Starson17'
@ -13,6 +16,7 @@ class DrawAndCook(BasicNewsRecipe):
remove_javascript = True remove_javascript = True
remove_empty_feeds = True remove_empty_feeds = True
cover_url = 'http://farm5.static.flickr.com/4043/4471139063_4dafced67f_o.jpg' cover_url = 'http://farm5.static.flickr.com/4043/4471139063_4dafced67f_o.jpg'
INDEX = 'http://www.theydrawandcook.com'
max_articles_per_feed = 30 max_articles_per_feed = 30
remove_attributes = ['style', 'font'] remove_attributes = ['style', 'font']
@ -34,20 +38,21 @@ class DrawAndCook(BasicNewsRecipe):
date = '' date = ''
current_articles = [] current_articles = []
soup = self.index_to_soup(url) soup = self.index_to_soup(url)
recipes = soup.findAll('div', attrs={'class': 'date-outer'}) featured_major_slider = soup.find(name='div', attrs={'id':'featured_major_slider'})
recipes = featured_major_slider.findAll('li', attrs={'data-id': re.compile(r'artwork_entry_\d+', re.DOTALL)})
for recipe in recipes: for recipe in recipes:
title = recipe.h3.a.string page_url = self.INDEX + recipe.a['href']
page_url = recipe.h3.a['href'] print 'page_url is: ', page_url
title = recipe.find('strong').string
print 'title is: ', title
current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':date}) current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':date})
return current_articles return current_articles
keep_only_tags = [dict(name='h1', attrs={'id':'page_title'})
keep_only_tags = [dict(name='h3', attrs={'class':'post-title entry-title'}) ,dict(name='section', attrs={'id':'artwork'})
,dict(name='div', attrs={'class':'post-body entry-content'})
] ]
remove_tags = [dict(name='div', attrs={'class':['separator']}) remove_tags = [dict(name='article', attrs={'id':['recipe_actions', 'metadata']})
,dict(name='div', attrs={'class':['post-share-buttons']})
] ]
extra_css = ''' extra_css = '''