diff --git a/resources/images/news/DrawAndCook.png b/resources/images/news/DrawAndCook.png new file mode 100644 index 0000000000..8b40b75344 Binary files /dev/null and b/resources/images/news/DrawAndCook.png differ diff --git a/resources/recipes/DrawAndCook.recipe b/resources/recipes/DrawAndCook.recipe index 1c080b85db..8db4f71014 100644 --- a/resources/recipes/DrawAndCook.recipe +++ b/resources/recipes/DrawAndCook.recipe @@ -1,8 +1,11 @@ from calibre.web.feeds.news import BasicNewsRecipe +import re class DrawAndCook(BasicNewsRecipe): title = 'DrawAndCook' __author__ = 'Starson17' + __version__ = 'v1.10' + __date__ = '13 March 2011' description = 'Drawings of recipes!' language = 'en' publisher = 'Starson17' @@ -13,6 +16,7 @@ class DrawAndCook(BasicNewsRecipe): remove_javascript = True remove_empty_feeds = True cover_url = 'http://farm5.static.flickr.com/4043/4471139063_4dafced67f_o.jpg' + INDEX = 'http://www.theydrawandcook.com' max_articles_per_feed = 30 remove_attributes = ['style', 'font'] @@ -34,20 +38,21 @@ class DrawAndCook(BasicNewsRecipe): date = '' current_articles = [] soup = self.index_to_soup(url) - recipes = soup.findAll('div', attrs={'class': 'date-outer'}) + featured_major_slider = soup.find(name='div', attrs={'id':'featured_major_slider'}) + recipes = featured_major_slider.findAll('li', attrs={'data-id': re.compile(r'artwork_entry_\d+', re.DOTALL)}) for recipe in recipes: - title = recipe.h3.a.string - page_url = recipe.h3.a['href'] + page_url = self.INDEX + recipe.a['href'] + print 'page_url is: ', page_url + title = recipe.find('strong').string + print 'title is: ', title current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':date}) return current_articles - - keep_only_tags = [dict(name='h3', attrs={'class':'post-title entry-title'}) - ,dict(name='div', attrs={'class':'post-body entry-content'}) + keep_only_tags = [dict(name='h1', attrs={'id':'page_title'}) + ,dict(name='section', attrs={'id':'artwork'}) ] - remove_tags = [dict(name='div', attrs={'class':['separator']}) - ,dict(name='div', attrs={'class':['post-share-buttons']}) + remove_tags = [dict(name='article', attrs={'id':['recipe_actions', 'metadata']}) ] extra_css = ''' diff --git a/resources/recipes/instapaper.recipe b/resources/recipes/instapaper.recipe index 73c32d08a7..0eb5cf0f09 100644 --- a/resources/recipes/instapaper.recipe +++ b/resources/recipes/instapaper.recipe @@ -1,23 +1,12 @@ -__license__ = 'GPL v3' -__copyright__ = '2009-2010, Darko Miletic ' -''' -www.instapaper.com -''' - -import urllib from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe -class Instapaper(BasicNewsRecipe): - title = 'Instapaper.com' +class AdvancedUserRecipe1299694372(BasicNewsRecipe): + title = u'Instapaper' __author__ = 'Darko Miletic' - description = '''Personalized news feeds. Go to instapaper.com to - setup up your news. Fill in your instapaper - username, and leave the password field - below blank.''' publisher = 'Instapaper.com' - category = 'news, custom' - oldest_article = 7 + category = 'info, custom, Instapaper' + oldest_article = 365 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False @@ -25,16 +14,9 @@ class Instapaper(BasicNewsRecipe): INDEX = u'http://www.instapaper.com' LOGIN = INDEX + u'/user/login' - conversion_options = { - 'comment' : description - , 'tags' : category - , 'publisher' : publisher - } - feeds = [ - (u'Unread articles' , INDEX + u'/u' ) - ,(u'Starred articles', INDEX + u'/starred') - ] + + feeds = [(u'Instapaper Unread', u'http://www.instapaper.com/u'), (u'Instapaper Starred', u'http://www.instapaper.com/starred')] def get_browser(self): br = BasicNewsRecipe.get_browser() @@ -70,7 +52,3 @@ class Instapaper(BasicNewsRecipe): }) totalfeeds.append((feedtitle, articles)) return totalfeeds - - def print_version(self, url): - return self.INDEX + '/text?u=' + urllib.quote(url) - diff --git a/resources/recipes/modoros.recipe b/resources/recipes/modoros.recipe new file mode 100644 index 0000000000..72980298d6 --- /dev/null +++ b/resources/recipes/modoros.recipe @@ -0,0 +1,89 @@ +import re +from calibre.web.feeds.recipes import BasicNewsRecipe +from calibre.constants import config_dir, CONFIG_DIR_MODE +import os, os.path, urllib +from hashlib import md5 + +class ModorosBlogHu(BasicNewsRecipe): + __author__ = 'Zsolt Botykai' + title = u'Modoros Blog' + description = u"Modoros.blog.hu" + oldest_article = 10000 + max_articles_per_feed = 10000 + reverse_article_order = True + language = 'hu' + remove_javascript = True + remove_empty_feeds = True + no_stylesheets = True + feeds = [(u'Modoros Blog', u'http://modoros.blog.hu/rss')] + remove_javascript = True + use_embedded_content = False + preprocess_regexps = [ + (re.compile(r'.*?', re.DOTALL|re.IGNORECASE), + lambda match: ''), + (re.compile(r'

', re.DOTALL|re.IGNORECASE), lambda m: ''), + (re.compile(r'( | )*?

', re.DOTALL|re.IGNORECASE), lambda match: ''), + (re.compile(r']+>.*?
.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), + (re.compile(r'