Add CNN and Dilbert feeds

2025-07-09 03:04:10 -04:00 · 2008-01-03 19:49:52 +00:00 · 2008-01-03 19:49:52 +00:00 · 82d4c5a612
commit 82d4c5a612
parent 740798c515
6 changed files with 108 additions and 2 deletions
--- a/src/libprs500/ebooks/lrf/web/init.py
+++ b/src/libprs500/ebooks/lrf/web/init.py
@ -24,8 +24,10 @@ from libprs500.ebooks.lrf.web.profiles.faznet        import FazNet
 from libprs500.ebooks.lrf.web.profiles.wsj           import WallStreetJournal
 from libprs500.ebooks.lrf.web.profiles.barrons       import Barrons
 from libprs500.ebooks.lrf.web.profiles.portfolio     import Portfolio
+from libprs500.ebooks.lrf.web.profiles.dilbert       import Dilbert  
+from libprs500.ebooks.lrf.web.profiles.cnn           import CNN 

-builtin_profiles   = [Barrons, BBC, Economist, FazNet, Newsweek, NewYorkReviewOfBooks, NYTimes,  \
+builtin_profiles   = [Barrons, BBC, CNN, Dilbert, Economist, FazNet, Newsweek, NewYorkReviewOfBooks, NYTimes,  \
                      Portfolio, SpiegelOnline, WallStreetJournal, ZeitNachrichten,   \
                     ]

--- a/src/libprs500/ebooks/lrf/web/profiles/cnn.py
+++ b/src/libprs500/ebooks/lrf/web/profiles/cnn.py
@ -0,0 +1,63 @@
+##    Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
+##    This program is free software; you can redistribute it and/or modify
+##    it under the terms of the GNU General Public License as published by
+##    the Free Software Foundation; either version 2 of the License, or
+##    (at your option) any later version.
+##
+##    This program is distributed in the hope that it will be useful,
+##    but WITHOUT ANY WARRANTY; without even the implied warranty of
+##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+##    GNU General Public License for more details.
+##
+##    You should have received a copy of the GNU General Public License along
+##    with this program; if not, write to the Free Software Foundation, Inc.,
+##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+'''
+Profile to download CNN
+'''
+import re
+from libprs500.ebooks.lrf.web.profiles import DefaultProfile
+
+class CNN(DefaultProfile):
+    
+    title = 'CNN'
+    max_recursions = 2
+    timefmt  = ' [%d %b %Y]'
+    html_description = True
+    no_stylesheets = True
+    oldest_article        = 15
+
+    preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in [
+        (r'<head>.*?<title', lambda match : '<head><title'),
+        (r'</title>.*?</head>', lambda match : '</title></head>'),
+        (r'<body.*?<\!\-\-Article.*?>', lambda match : ''),
+        (r'<\!\-\-Article End\-\->.*?</body>', lambda match : '</body>'),
+        (r'(</h\d>)<ul>.*?</ul>', lambda match : match.group(1)), # drop story highlights
+        (r'<h2>(.*?)</h2><h1>(.*?)</h1>', lambda match : '<h1>' + match.group(1) + '</h1><h2>' + match.group(2) + '</h2>'), # sports uses h2 for main title and h1 for subtitle (???) switch these around
+        (r'<span class="cnnEmbeddedMosLnk">.*?</span>', lambda match : ''), # drop 'watch more' links
+        (r'(<div class="cnnstorybody">).*?(<p)', lambda match : match.group(1) + match.group(2)), # drop sports photos
+        (r'</?table.*?>|</?tr.*?>|</?td.*?>', lambda match : ''), # drop table formatting
+        (r'<div class="cnnendofstorycontent".*?>.*?</div>', lambda match : ''), # drop extra business links
+        (r'<a href="#TOP">.*?</a>', lambda match : '') # drop business 'to top' link
+        ] ]
+
+    def print_version(self, url):
+        return 'http://www.printthis.clickability.com/pt/printThis?clickMap=printThis&fb=Y&url=' + url
+    
+    def get_feeds(self):
+        return [
+             ('Top News', 'http://rss.cnn.com/rss/cnn_topstories.rss'),
+             ('World', 'http://rss.cnn.com/rss/cnn_world.rss'),
+             ('U.S.', 'http://rss.cnn.com/rss/cnn_us.rss'),
+             ('Sports', 'http://rss.cnn.com/rss/si_topstories.rss'),
+             ('Business', 'http://rss.cnn.com/rss/money_latest.rss'),
+             ('Politics', 'http://rss.cnn.com/rss/cnn_allpolitics.rss'),
+             ('Law', 'http://rss.cnn.com/rss/cnn_law.rss'),
+             ('Technology', 'http://rss.cnn.com/rss/cnn_tech.rss'),
+             ('Science & Space', 'http://rss.cnn.com/rss/cnn_space.rss'),
+             ('Health', 'http://rss.cnn.com/rss/cnn_health.rss'),
+             ('Entertainment', 'http://rss.cnn.com/rss/cnn_showbiz.rss'),
+             ('Education', 'http://rss.cnn.com/rss/cnn_education.rss'),
+             ('Offbeat', 'http://rss.cnn.com/rss/cnn_offbeat.rss'),
+             ('Most Popular', 'http://rss.cnn.com/rss/cnn_mostpopular.rss')
+             ]
--- a/src/libprs500/ebooks/lrf/web/profiles/dilbert.py
+++ b/src/libprs500/ebooks/lrf/web/profiles/dilbert.py
@ -0,0 +1,39 @@
+##    Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
+##    Costomized to Dilbert by S. Dorscht and "Stenis"
+##    Version 0.02
+##    This program is free software; you can redistribute it and/or modify
+##    it under the terms of the GNU General Public License as published by
+##    the Free Software Foundation; either version 2 of the License, or
+##    (at your option) any later version.
+##
+##    This program is distributed in the hope that it will be useful,
+##    but WITHOUT ANY WARRANTY; without even the implied warranty of
+##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+##    GNU General Public License for more details.
+##
+##    You should have received a copy of the GNU General Public License along
+##    with this program; if not, write to the Free Software Foundation, Inc.,
+##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+'''
+Fetch Dilbert.
+'''
+
+
+from libprs500.ebooks.lrf.web.profiles import DefaultProfile
+
+import re
+
+class Dilbert(DefaultProfile):
+
+    title = 'Dilbert'
+    timefmt = ' [%d %b %Y]'
+    max_recursions = 1
+    max_articles_per_feed = 6
+    html_description = True
+    no_stylesheets = True
+
+    def get_feeds(self): 
+        return [ ('Dilbert', 'http://feeds.feedburner.com/tapestrydilbert') ] 
+
--- a/src/libprs500/gui2/images.qrc
+++ b/src/libprs500/gui2/images.qrc
@ -36,12 +36,14 @@
        <file>images/mimetypes/zip.svg</file>
        <file>images/news.svg</file>
        <file>images/news/bbc.png</file>
+        <file>images/news/cnn.png</file>
        <file>images/news/newsweek.png</file>
        <file>images/news/nytimes.png</file>
        <file>images/news/economist.png</file>
        <file>images/news/zeitde.png</file>
        <file>images/news/spiegelde.png</file>
        <file>images/news/faznet.png</file>
+        <file>images/news/dilbert.png</file>
        <file>images/next.svg</file>
        <file>images/page.svg</file>
        <file>images/plus.svg</file>
--- a/src/libprs500/gui2/images/news/cnn.png
+++ b/src/libprs500/gui2/images/news/cnn.png
--- a/src/libprs500/gui2/images/news/dilbert.png
+++ b/src/libprs500/gui2/images/news/dilbert.png