# Copyright (C) 2008 B.Scott Wxby [bswxby] & # Copyright (C) 2007 David Chen SonyReaderDaveChenorg ## # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. ## # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. ## # Version 0.3-2008_2_28 # Based on WIRED.py by David Chen, 2007, and newsweek.py, bbc.py, nytimes.py by Kovid Goyal ## # Usage: # >web2lrf --user-profile nasa.py # Comment out the RSS feeds you don't want in the last section below ## # Output: # NASA [YearMonthDate Time].lrf ## ''' Custom User Profile to download RSS News Feeds and Articles from Wired.com ''' import re from calibre.web.feeds.news import BasicNewsRecipe class NASA(BasicNewsRecipe): title = 'NASA' timefmt = ' [%Y%b%d %H%M]' language = 'en' description = 'News from NASA' __author__ = 'Scott Wxby & David Chen' no_stylesheets = True # Don't grab articles more than 30 days old oldest_article = 30 preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in [ # Fix the encoding to UTF-8 (r')|(
)|(
)|(

)|())', lambda match: '

'), # noqa # Remove any links/ads/comments/cruft from the end of the body # of the article. (r'(()|(
)|(

©)|(