Add CNN and Dilbert feeds

This commit is contained in:
Kovid Goyal 2008-01-03 19:49:52 +00:00
parent 740798c515
commit 82d4c5a612
6 changed files with 108 additions and 2 deletions

View File

@ -24,8 +24,10 @@ from libprs500.ebooks.lrf.web.profiles.faznet import FazNet
from libprs500.ebooks.lrf.web.profiles.wsj import WallStreetJournal
from libprs500.ebooks.lrf.web.profiles.barrons import Barrons
from libprs500.ebooks.lrf.web.profiles.portfolio import Portfolio
from libprs500.ebooks.lrf.web.profiles.dilbert import Dilbert
from libprs500.ebooks.lrf.web.profiles.cnn import CNN
builtin_profiles = [Barrons, BBC, Economist, FazNet, Newsweek, NewYorkReviewOfBooks, NYTimes, \
builtin_profiles = [Barrons, BBC, CNN, Dilbert, Economist, FazNet, Newsweek, NewYorkReviewOfBooks, NYTimes, \
Portfolio, SpiegelOnline, WallStreetJournal, ZeitNachrichten, \
]

View File

@ -0,0 +1,63 @@
## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
'''
Profile to download CNN
'''
import re
from libprs500.ebooks.lrf.web.profiles import DefaultProfile
class CNN(DefaultProfile):
title = 'CNN'
max_recursions = 2
timefmt = ' [%d %b %Y]'
html_description = True
no_stylesheets = True
oldest_article = 15
preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in [
(r'<head>.*?<title', lambda match : '<head><title'),
(r'</title>.*?</head>', lambda match : '</title></head>'),
(r'<body.*?<\!\-\-Article.*?>', lambda match : ''),
(r'<\!\-\-Article End\-\->.*?</body>', lambda match : '</body>'),
(r'(</h\d>)<ul>.*?</ul>', lambda match : match.group(1)), # drop story highlights
(r'<h2>(.*?)</h2><h1>(.*?)</h1>', lambda match : '<h1>' + match.group(1) + '</h1><h2>' + match.group(2) + '</h2>'), # sports uses h2 for main title and h1 for subtitle (???) switch these around
(r'<span class="cnnEmbeddedMosLnk">.*?</span>', lambda match : ''), # drop 'watch more' links
(r'(<div class="cnnstorybody">).*?(<p)', lambda match : match.group(1) + match.group(2)), # drop sports photos
(r'</?table.*?>|</?tr.*?>|</?td.*?>', lambda match : ''), # drop table formatting
(r'<div class="cnnendofstorycontent".*?>.*?</div>', lambda match : ''), # drop extra business links
(r'<a href="#TOP">.*?</a>', lambda match : '') # drop business 'to top' link
] ]
def print_version(self, url):
return 'http://www.printthis.clickability.com/pt/printThis?clickMap=printThis&fb=Y&url=' + url
def get_feeds(self):
return [
('Top News', 'http://rss.cnn.com/rss/cnn_topstories.rss'),
('World', 'http://rss.cnn.com/rss/cnn_world.rss'),
('U.S.', 'http://rss.cnn.com/rss/cnn_us.rss'),
('Sports', 'http://rss.cnn.com/rss/si_topstories.rss'),
('Business', 'http://rss.cnn.com/rss/money_latest.rss'),
('Politics', 'http://rss.cnn.com/rss/cnn_allpolitics.rss'),
('Law', 'http://rss.cnn.com/rss/cnn_law.rss'),
('Technology', 'http://rss.cnn.com/rss/cnn_tech.rss'),
('Science & Space', 'http://rss.cnn.com/rss/cnn_space.rss'),
('Health', 'http://rss.cnn.com/rss/cnn_health.rss'),
('Entertainment', 'http://rss.cnn.com/rss/cnn_showbiz.rss'),
('Education', 'http://rss.cnn.com/rss/cnn_education.rss'),
('Offbeat', 'http://rss.cnn.com/rss/cnn_offbeat.rss'),
('Most Popular', 'http://rss.cnn.com/rss/cnn_mostpopular.rss')
]

View File

@ -0,0 +1,39 @@
## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
## Costomized to Dilbert by S. Dorscht and "Stenis"
## Version 0.02
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
'''
Fetch Dilbert.
'''
from libprs500.ebooks.lrf.web.profiles import DefaultProfile
import re
class Dilbert(DefaultProfile):
title = 'Dilbert'
timefmt = ' [%d %b %Y]'
max_recursions = 1
max_articles_per_feed = 6
html_description = True
no_stylesheets = True
def get_feeds(self):
return [ ('Dilbert', 'http://feeds.feedburner.com/tapestrydilbert') ]

View File

@ -36,12 +36,14 @@
<file>images/mimetypes/zip.svg</file>
<file>images/news.svg</file>
<file>images/news/bbc.png</file>
<file>images/news/cnn.png</file>
<file>images/news/newsweek.png</file>
<file>images/news/nytimes.png</file>
<file>images/news/economist.png</file>
<file>images/news/zeitde.png</file>
<file>images/news/spiegelde.png</file>
<file>images/news/faznet.png</file>
<file>images/news/dilbert.png</file>
<file>images/next.svg</file>
<file>images/page.svg</file>
<file>images/plus.svg</file>

Binary file not shown.

After

Width:  |  Height:  |  Size: 758 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.3 KiB