#!/usr/bin/env python __license__ = 'GPL v3' __docformat__ = 'restructuredtext en' import re from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.chardet import xml_to_unicode class Wired_Daily(BasicNewsRecipe): title = 'Wired Daily Edition' __author__ = 'Kovid Goyal' description = 'Technology news' timefmt = ' [%Y%b%d %H%M]' language = 'en' no_stylesheets = True preprocess_regexps = [(re.compile(r'', re.DOTALL), lambda m: '')] remove_tags_before = dict(name='div', id='content') remove_tags = [dict(id=['header', 'commenting_module', 'post_nav', 'social_tools', 'sidebar', 'footer', 'social_wishlist', 'pgwidget', 'outerWrapper', 'inf_widget']), {'class':['entryActions', 'advertisement', 'entryTags']}, dict(name=['noscript', 'script']), dict(name='h4', attrs={'class':re.compile(r'rat\d+')}), {'class':lambda x: x and x.startswith('contentjump')}, dict(name='li', attrs={'class':['entryCategories', 'entryEdit']})] feeds = [ ('Top News', 'http://feeds.wired.com/wired/index'), ('Product Reviews', 'http://www.wired.com/reviews/feeds/latestProductsRss'), ('Autopia', 'http://www.wired.com/autopia/feed/'), ('Danger Room', 'http://www.wired.com/dangerroom/feed/'), ('Epicenter', 'http://www.wired.com/epicenter/feed/'), ('Gadget Lab', 'http://www.wired.com/gadgetlab/feed/'), ('Geek Dad', 'http://www.wired.com/geekdad/feed/'), ('Playbook', 'http://www.wired.com/playbook/feed/'), ('Rawfile', 'http://www.wired.com/rawfile/feed/'), ('This Day in Tech', 'http://www.wired.com/thisdayintech/feed/'), ('Threat Level', 'http://www.wired.com/threatlevel/feed/'), ('Underwire', 'http://www.wired.com/underwire/feed/'), ('Web Monkey', 'http://www.webmonkey.com/feed/'), ('Science', 'http://www.wired.com/wiredscience/feed/'), ] def populate_article_metadata(self, article, soup, first): if article.text_summary: article.text_summary = xml_to_unicode(article.text_summary, resolve_entities=True)[0] def print_version(self, url): return url + '/all/1'