diff --git a/recipes/icons/techcrunch.png b/recipes/icons/techcrunch.png new file mode 100644 index 0000000000..b5a8c719a1 Binary files /dev/null and b/recipes/icons/techcrunch.png differ diff --git a/recipes/techcrunch.recipe b/recipes/techcrunch.recipe new file mode 100644 index 0000000000..4975f71468 --- /dev/null +++ b/recipes/techcrunch.recipe @@ -0,0 +1,63 @@ +__license__ = 'GPL v3' +__copyright__ = '2011, Darko Miletic ' +''' +techcrunch.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class TechCrunch(BasicNewsRecipe): + title = 'TechCrunch' + __author__ = 'Darko Miletic' + description = 'IT News' + publisher = 'AOL Inc.' + category = 'news, IT' + oldest_article = 2 + max_articles_per_feed = 200 + no_stylesheets = True + encoding = 'utf8' + use_embedded_content = False + language = 'en' + remove_empty_feeds = True + publication_type = 'newsportal' + masthead_url = 'http://s2.wp.com/wp-content/themes/vip/tctechcrunch2/images/site-logo.png' + extra_css = """ + body{font-family: Helvetica,Arial,sans-serif } + img{margin-bottom: 0.4em; display:block} + """ + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + remove_tags = [dict(name=['meta','link'])] + remove_attributes=['lang'] + keep_only_tags=[ + dict(name='h1', attrs={'class':'headline'}) + ,dict(attrs={'class':['author','post-time','body-copy']}) + ] + + feeds = [(u'News', u'http://feeds.feedburner.com/TechCrunch/')] + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + for item in soup.findAll('a'): + limg = item.find('img') + if item.string is not None: + str = item.string + item.replaceWith(str) + else: + if limg: + item.name = 'div' + item.attrs = [] + else: + str = self.tag_to_string(item) + item.replaceWith(str) + for item in soup.findAll('img'): + if not item.has_key('alt'): + item['alt'] = 'image' + return soup