Add recipe for Wired.com (thanks to David Chen)

2025-07-09 03:04:10 -04:00 · 2008-03-27 04:06:19 +00:00 · 2008-03-27 04:06:19 +00:00 · 79134753a2
commit 79134753a2
parent 095f0eaec9
2 changed files with 53 additions and 0 deletions
--- a/src/libprs500/web/feeds/recipes/init.py
+++ b/src/libprs500/web/feeds/recipes/init.py
@ -7,6 +7,7 @@ Builtin recipes.
 recipes = [
           'newsweek', 'atlantic', 'economist', 'dilbert', 'portfolio', 
           'nytimes', 'usatoday', 'outlook_india', 'bbc', 'greader', 'wsj',
           'wired',
          ]
 import re, imp, inspect, time
--- a/src/libprs500/web/feeds/recipes/wired.py
+++ b/src/libprs500/web/feeds/recipes/wired.py
@ -0,0 +1,52 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __docformat__ = 'restructuredtext en'
 import re
 from libprs500.web.feeds.news import BasicNewsRecipe
 class Wired(BasicNewsRecipe):
    title = 'Wired.com'
    __author__ = 'David Chen <SonyReader<at>DaveChen<dot>org>'
    description = 'Technology news'
    timefmt  = ' [%Y%b%d  %H%M]'
    no_stylesheets = True
    preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in 
                [
                ## Remove any banners/links/ads/cruft before the body of the article.
                (r'<body.*?((<div id="article_body">)|(<div id="st-page-maincontent">)|(<div id="containermain">)|(<p class="ap-story-p">)|(<!-- img_nav -->))', lambda match: '<body><div>'),
                ## Remove any links/ads/comments/cruft from the end of the body of the article.
                (r'((<!-- end article content -->)|(<div id="st-custom-afterpagecontent">)|(<p class="ap-story-p">&copy;)|(<div class="entry-footer">)|(<div id="see_also">)|(<p>Via <a href=)|(<div id="ss_nav">)).*?</html>', lambda match : '</div></body></html>'),
                ## Correctly embed in-line images by removing the surrounding javascript that will be ignored in the conversion
                (r'<a.*?onclick.*?>.*?(<img .*?>)', lambda match: match.group(1),),
                ]
            ]
    feeds = [
        ('Top News', 'http://feeds.wired.com/wired/index'),
        ('Culture', 'http://feeds.wired.com/wired/culture'),
        ('Software', 'http://feeds.wired.com/wired/software'),
        ('Mac', 'http://feeds.feedburner.com/cultofmac/bFow'),
        ('Gadgets', 'http://feeds.wired.com/wired/gadgets'),
        ('Cars', 'http://feeds.wired.com/wired/cars'),
        ('Entertainment', 'http://feeds.wired.com/wired/entertainment'),
        ('Gaming', 'http://feeds.wired.com/wired/gaming'),
        ('Science', 'http://feeds.wired.com/wired/science'),
        ('Med Tech', 'http://feeds.wired.com/wired/medtech'),
        ('Politics', 'http://feeds.wired.com/wired/politics'),
        ('Tech Biz', 'http://feeds.wired.com/wired/techbiz'),
        ('Commentary', 'http://feeds.wired.com/wired/commentary'),
        ]
    def print_version(self, url):
        return url.replace('http://www.wired.com/', 'http://www.wired.com/print/')