Add recipe for Wired.com (thanks to David Chen)

This commit is contained in:
Kovid Goyal 2008-03-27 04:06:19 +00:00
parent 095f0eaec9
commit 79134753a2
2 changed files with 53 additions and 0 deletions

View File

@ -7,6 +7,7 @@ Builtin recipes.
recipes = [ recipes = [
'newsweek', 'atlantic', 'economist', 'dilbert', 'portfolio', 'newsweek', 'atlantic', 'economist', 'dilbert', 'portfolio',
'nytimes', 'usatoday', 'outlook_india', 'bbc', 'greader', 'wsj', 'nytimes', 'usatoday', 'outlook_india', 'bbc', 'greader', 'wsj',
'wired',
] ]
import re, imp, inspect, time import re, imp, inspect, time

View File

@ -0,0 +1,52 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__docformat__ = 'restructuredtext en'
import re
from libprs500.web.feeds.news import BasicNewsRecipe
class Wired(BasicNewsRecipe):
title = 'Wired.com'
__author__ = 'David Chen <SonyReader<at>DaveChen<dot>org>'
description = 'Technology news'
timefmt = ' [%Y%b%d %H%M]'
no_stylesheets = True
preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[
## Remove any banners/links/ads/cruft before the body of the article.
(r'<body.*?((<div id="article_body">)|(<div id="st-page-maincontent">)|(<div id="containermain">)|(<p class="ap-story-p">)|(<!-- img_nav -->))', lambda match: '<body><div>'),
## Remove any links/ads/comments/cruft from the end of the body of the article.
(r'((<!-- end article content -->)|(<div id="st-custom-afterpagecontent">)|(<p class="ap-story-p">&copy;)|(<div class="entry-footer">)|(<div id="see_also">)|(<p>Via <a href=)|(<div id="ss_nav">)).*?</html>', lambda match : '</div></body></html>'),
## Correctly embed in-line images by removing the surrounding javascript that will be ignored in the conversion
(r'<a.*?onclick.*?>.*?(<img .*?>)', lambda match: match.group(1),),
]
]
feeds = [
('Top News', 'http://feeds.wired.com/wired/index'),
('Culture', 'http://feeds.wired.com/wired/culture'),
('Software', 'http://feeds.wired.com/wired/software'),
('Mac', 'http://feeds.feedburner.com/cultofmac/bFow'),
('Gadgets', 'http://feeds.wired.com/wired/gadgets'),
('Cars', 'http://feeds.wired.com/wired/cars'),
('Entertainment', 'http://feeds.wired.com/wired/entertainment'),
('Gaming', 'http://feeds.wired.com/wired/gaming'),
('Science', 'http://feeds.wired.com/wired/science'),
('Med Tech', 'http://feeds.wired.com/wired/medtech'),
('Politics', 'http://feeds.wired.com/wired/politics'),
('Tech Biz', 'http://feeds.wired.com/wired/techbiz'),
('Commentary', 'http://feeds.wired.com/wired/commentary'),
]
def print_version(self, url):
return url.replace('http://www.wired.com/', 'http://www.wired.com/print/')