From 96e32a2deff9676570e15ff33a1a6f168cdef16d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 17 Jan 2010 20:04:19 -0700 Subject: [PATCH] New recipe for drivelry.com by Krittika Goyal --- resources/recipes/drivelry.recipe | 41 +++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 resources/recipes/drivelry.recipe diff --git a/resources/recipes/drivelry.recipe b/resources/recipes/drivelry.recipe new file mode 100644 index 0000000000..9e001ba530 --- /dev/null +++ b/resources/recipes/drivelry.recipe @@ -0,0 +1,41 @@ +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup + +class drivelrycom(BasicNewsRecipe): + title = u'drivelry.com' + language = 'en' + description = 'A blog by Mike Abrahams' + __author__ = 'Krittika Goyal' + oldest_article = 60 #days + max_articles_per_feed = 25 + #encoding = 'latin1' + + remove_stylesheets = True + #remove_tags_before = dict(name='h1', attrs={'class':'heading'}) + remove_tags_after = dict(name='div', attrs={'id':'bookmark'}) + remove_tags = [ + dict(name='iframe'), + dict(name='div', attrs={'class':['sidebar']}), + dict(name='div', attrs={'id':['bookmark']}), + #dict(name='span', attrs={'class':['related_link', 'slideshowcontrols']}), + #dict(name='ul', attrs={'class':'articleTools'}), + ] + + feeds = [ +('drivelry.com', + 'http://feeds.feedburner.com/drivelry'), + +] + + def preprocess_html(self, soup): + story = soup.find(name='div', attrs={'id':'main'}) + #td = heading.findParent(name='td') + #td.extract() + soup = BeautifulSoup(''' +t +

To donate to this blog: click here

+ +''') + body = soup.find(name='body') + body.insert(0, story) + return soup