Add CNN and Dilbert feeds

This commit is contained in:
Kovid Goyal 2008-01-03 19:49:52 +00:00
parent 740798c515
commit 82d4c5a612
6 changed files with 108 additions and 2 deletions

View File

@ -24,8 +24,10 @@ from libprs500.ebooks.lrf.web.profiles.faznet import FazNet
from libprs500.ebooks.lrf.web.profiles.wsj import WallStreetJournal from libprs500.ebooks.lrf.web.profiles.wsj import WallStreetJournal
from libprs500.ebooks.lrf.web.profiles.barrons import Barrons from libprs500.ebooks.lrf.web.profiles.barrons import Barrons
from libprs500.ebooks.lrf.web.profiles.portfolio import Portfolio from libprs500.ebooks.lrf.web.profiles.portfolio import Portfolio
from libprs500.ebooks.lrf.web.profiles.dilbert import Dilbert
from libprs500.ebooks.lrf.web.profiles.cnn import CNN
builtin_profiles = [Barrons, BBC, Economist, FazNet, Newsweek, NewYorkReviewOfBooks, NYTimes, \ builtin_profiles = [Barrons, BBC, CNN, Dilbert, Economist, FazNet, Newsweek, NewYorkReviewOfBooks, NYTimes, \
Portfolio, SpiegelOnline, WallStreetJournal, ZeitNachrichten, \ Portfolio, SpiegelOnline, WallStreetJournal, ZeitNachrichten, \
] ]

View File

@ -0,0 +1,63 @@
## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
'''
Profile to download CNN
'''
import re
from libprs500.ebooks.lrf.web.profiles import DefaultProfile
class CNN(DefaultProfile):
title = 'CNN'
max_recursions = 2
timefmt = ' [%d %b %Y]'
html_description = True
no_stylesheets = True
oldest_article = 15
preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in [
(r'<head>.*?<title', lambda match : '<head><title'),
(r'</title>.*?</head>', lambda match : '</title></head>'),
(r'<body.*?<\!\-\-Article.*?>', lambda match : ''),
(r'<\!\-\-Article End\-\->.*?</body>', lambda match : '</body>'),
(r'(</h\d>)<ul>.*?</ul>', lambda match : match.group(1)), # drop story highlights
(r'<h2>(.*?)</h2><h1>(.*?)</h1>', lambda match : '<h1>' + match.group(1) + '</h1><h2>' + match.group(2) + '</h2>'), # sports uses h2 for main title and h1 for subtitle (???) switch these around
(r'<span class="cnnEmbeddedMosLnk">.*?</span>', lambda match : ''), # drop 'watch more' links
(r'(<div class="cnnstorybody">).*?(<p)', lambda match : match.group(1) + match.group(2)), # drop sports photos
(r'</?table.*?>|</?tr.*?>|</?td.*?>', lambda match : ''), # drop table formatting
(r'<div class="cnnendofstorycontent".*?>.*?</div>', lambda match : ''), # drop extra business links
(r'<a href="#TOP">.*?</a>', lambda match : '') # drop business 'to top' link
] ]
def print_version(self, url):
return 'http://www.printthis.clickability.com/pt/printThis?clickMap=printThis&fb=Y&url=' + url
def get_feeds(self):
return [
('Top News', 'http://rss.cnn.com/rss/cnn_topstories.rss'),
('World', 'http://rss.cnn.com/rss/cnn_world.rss'),
('U.S.', 'http://rss.cnn.com/rss/cnn_us.rss'),
('Sports', 'http://rss.cnn.com/rss/si_topstories.rss'),
('Business', 'http://rss.cnn.com/rss/money_latest.rss'),
('Politics', 'http://rss.cnn.com/rss/cnn_allpolitics.rss'),
('Law', 'http://rss.cnn.com/rss/cnn_law.rss'),
('Technology', 'http://rss.cnn.com/rss/cnn_tech.rss'),
('Science & Space', 'http://rss.cnn.com/rss/cnn_space.rss'),
('Health', 'http://rss.cnn.com/rss/cnn_health.rss'),
('Entertainment', 'http://rss.cnn.com/rss/cnn_showbiz.rss'),
('Education', 'http://rss.cnn.com/rss/cnn_education.rss'),
('Offbeat', 'http://rss.cnn.com/rss/cnn_offbeat.rss'),
('Most Popular', 'http://rss.cnn.com/rss/cnn_mostpopular.rss')
]

View File

@ -0,0 +1,39 @@
## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
## Costomized to Dilbert by S. Dorscht and "Stenis"
## Version 0.02
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
'''
Fetch Dilbert.
'''
from libprs500.ebooks.lrf.web.profiles import DefaultProfile
import re
class Dilbert(DefaultProfile):
title = 'Dilbert'
timefmt = ' [%d %b %Y]'
max_recursions = 1
max_articles_per_feed = 6
html_description = True
no_stylesheets = True
def get_feeds(self):
return [ ('Dilbert', 'http://feeds.feedburner.com/tapestrydilbert') ]

View File

@ -36,12 +36,14 @@
<file>images/mimetypes/zip.svg</file> <file>images/mimetypes/zip.svg</file>
<file>images/news.svg</file> <file>images/news.svg</file>
<file>images/news/bbc.png</file> <file>images/news/bbc.png</file>
<file>images/news/cnn.png</file>
<file>images/news/newsweek.png</file> <file>images/news/newsweek.png</file>
<file>images/news/nytimes.png</file> <file>images/news/nytimes.png</file>
<file>images/news/economist.png</file> <file>images/news/economist.png</file>
<file>images/news/zeitde.png</file> <file>images/news/zeitde.png</file>
<file>images/news/spiegelde.png</file> <file>images/news/spiegelde.png</file>
<file>images/news/faznet.png</file> <file>images/news/faznet.png</file>
<file>images/news/dilbert.png</file>
<file>images/next.svg</file> <file>images/next.svg</file>
<file>images/page.svg</file> <file>images/page.svg</file>
<file>images/plus.svg</file> <file>images/plus.svg</file>

Binary file not shown.

After

Width:  |  Height:  |  Size: 758 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.3 KiB