Add CNN and Dilbert feeds

2025-07-09 03:04:10 -04:00 · 2008-01-03 19:49:52 +00:00 · 2008-01-03 19:49:52 +00:00 · 82d4c5a612
commit 82d4c5a612
parent 740798c515
6 changed files with 108 additions and 2 deletions
--- a/src/libprs500/ebooks/lrf/web/init.py
+++ b/src/libprs500/ebooks/lrf/web/init.py
@ -24,8 +24,10 @@ from libprs500.ebooks.lrf.web.profiles.faznet        import FazNet
 from libprs500.ebooks.lrf.web.profiles.wsj           import WallStreetJournal
 from libprs500.ebooks.lrf.web.profiles.barrons       import Barrons
 from libprs500.ebooks.lrf.web.profiles.portfolio     import Portfolio
 from libprs500.ebooks.lrf.web.profiles.dilbert       import Dilbert  
 from libprs500.ebooks.lrf.web.profiles.cnn           import CNN 
-builtin_profiles   = [Barrons, BBC, Economist, FazNet, Newsweek, NewYorkReviewOfBooks, NYTimes,  \
+builtin_profiles   = [Barrons, BBC, CNN, Dilbert, Economist, FazNet, Newsweek, NewYorkReviewOfBooks, NYTimes,  \
                      Portfolio, SpiegelOnline, WallStreetJournal, ZeitNachrichten,   \
                     ]
--- a/src/libprs500/ebooks/lrf/web/profiles/cnn.py
+++ b/src/libprs500/ebooks/lrf/web/profiles/cnn.py
@ -0,0 +1,63 @@
 ##    Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
 ##    This program is free software; you can redistribute it and/or modify
 ##    it under the terms of the GNU General Public License as published by
 ##    the Free Software Foundation; either version 2 of the License, or
 ##    (at your option) any later version.
 ##
 ##    This program is distributed in the hope that it will be useful,
 ##    but WITHOUT ANY WARRANTY; without even the implied warranty of
 ##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 ##    GNU General Public License for more details.
 ##
 ##    You should have received a copy of the GNU General Public License along
 ##    with this program; if not, write to the Free Software Foundation, Inc.,
 ##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 '''
 Profile to download CNN
 '''
 import re
 from libprs500.ebooks.lrf.web.profiles import DefaultProfile
 class CNN(DefaultProfile):
    title = 'CNN'
    max_recursions = 2
    timefmt  = ' [%d %b %Y]'
    html_description = True
    no_stylesheets = True
    oldest_article        = 15
    preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in [
        (r'<head>.*?<title', lambda match : '<head><title'),
        (r'</title>.*?</head>', lambda match : '</title></head>'),
        (r'<body.*?<\!\-\-Article.*?>', lambda match : ''),
        (r'<\!\-\-Article End\-\->.*?</body>', lambda match : '</body>'),
        (r'(</h\d>)<ul>.*?</ul>', lambda match : match.group(1)), # drop story highlights
        (r'<h2>(.*?)</h2><h1>(.*?)</h1>', lambda match : '<h1>' + match.group(1) + '</h1><h2>' + match.group(2) + '</h2>'), # sports uses h2 for main title and h1 for subtitle (???) switch these around
        (r'<span class="cnnEmbeddedMosLnk">.*?</span>', lambda match : ''), # drop 'watch more' links
        (r'(<div class="cnnstorybody">).*?(<p)', lambda match : match.group(1) + match.group(2)), # drop sports photos
        (r'</?table.*?>|</?tr.*?>|</?td.*?>', lambda match : ''), # drop table formatting
        (r'<div class="cnnendofstorycontent".*?>.*?</div>', lambda match : ''), # drop extra business links
        (r'<a href="#TOP">.*?</a>', lambda match : '') # drop business 'to top' link
        ] ]
    def print_version(self, url):
        return 'http://www.printthis.clickability.com/pt/printThis?clickMap=printThis&fb=Y&url=' + url
    def get_feeds(self):
        return [
             ('Top News', 'http://rss.cnn.com/rss/cnn_topstories.rss'),
             ('World', 'http://rss.cnn.com/rss/cnn_world.rss'),
             ('U.S.', 'http://rss.cnn.com/rss/cnn_us.rss'),
             ('Sports', 'http://rss.cnn.com/rss/si_topstories.rss'),
             ('Business', 'http://rss.cnn.com/rss/money_latest.rss'),
             ('Politics', 'http://rss.cnn.com/rss/cnn_allpolitics.rss'),
             ('Law', 'http://rss.cnn.com/rss/cnn_law.rss'),
             ('Technology', 'http://rss.cnn.com/rss/cnn_tech.rss'),
             ('Science & Space', 'http://rss.cnn.com/rss/cnn_space.rss'),
             ('Health', 'http://rss.cnn.com/rss/cnn_health.rss'),
             ('Entertainment', 'http://rss.cnn.com/rss/cnn_showbiz.rss'),
             ('Education', 'http://rss.cnn.com/rss/cnn_education.rss'),
             ('Offbeat', 'http://rss.cnn.com/rss/cnn_offbeat.rss'),
             ('Most Popular', 'http://rss.cnn.com/rss/cnn_mostpopular.rss')
             ]
--- a/src/libprs500/ebooks/lrf/web/profiles/dilbert.py
+++ b/src/libprs500/ebooks/lrf/web/profiles/dilbert.py
@ -0,0 +1,39 @@
 ##    Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
 ##    Costomized to Dilbert by S. Dorscht and "Stenis"
 ##    Version 0.02
 ##    This program is free software; you can redistribute it and/or modify
 ##    it under the terms of the GNU General Public License as published by
 ##    the Free Software Foundation; either version 2 of the License, or
 ##    (at your option) any later version.
 ##
 ##    This program is distributed in the hope that it will be useful,
 ##    but WITHOUT ANY WARRANTY; without even the implied warranty of
 ##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 ##    GNU General Public License for more details.
 ##
 ##    You should have received a copy of the GNU General Public License along
 ##    with this program; if not, write to the Free Software Foundation, Inc.,
 ##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 '''
 Fetch Dilbert.
 '''
 from libprs500.ebooks.lrf.web.profiles import DefaultProfile
 import re
 class Dilbert(DefaultProfile):
    title = 'Dilbert'
    timefmt = ' [%d %b %Y]'
    max_recursions = 1
    max_articles_per_feed = 6
    html_description = True
    no_stylesheets = True
    def get_feeds(self): 
        return [ ('Dilbert', 'http://feeds.feedburner.com/tapestrydilbert') ] 
--- a/src/libprs500/gui2/images.qrc
+++ b/src/libprs500/gui2/images.qrc
@ -36,12 +36,14 @@
        <file>images/mimetypes/zip.svg</file>
        <file>images/news.svg</file>
        <file>images/news/bbc.png</file>
        <file>images/news/cnn.png</file>
        <file>images/news/newsweek.png</file>
        <file>images/news/nytimes.png</file>
        <file>images/news/economist.png</file>
        <file>images/news/zeitde.png</file>
        <file>images/news/spiegelde.png</file>
        <file>images/news/faznet.png</file>
        <file>images/news/dilbert.png</file>
        <file>images/next.svg</file>
        <file>images/page.svg</file>
        <file>images/plus.svg</file>
--- a/src/libprs500/gui2/images/news/cnn.png
+++ b/src/libprs500/gui2/images/news/cnn.png
--- a/src/libprs500/gui2/images/news/dilbert.png
+++ b/src/libprs500/gui2/images/news/dilbert.png