diff --git a/src/libprs500/ebooks/lrf/web/__init__.py b/src/libprs500/ebooks/lrf/web/__init__.py
index aaf49de99e..94743b0eca 100644
--- a/src/libprs500/ebooks/lrf/web/__init__.py
+++ b/src/libprs500/ebooks/lrf/web/__init__.py
@@ -12,3 +12,88 @@
## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+import os, time, calendar, operator
+
+from libprs500 import iswindows
+from libprs500.ebooks.BeautifulSoup import BeautifulStoneSoup
+
+def parse_feeds(feeds, browser, print_version, max_articles_per_feed=10):
+ articles = {}
+ for title, url in feeds:
+ src = browser.open(url).read()
+ articles[title] = []
+ soup = BeautifulStoneSoup(src)
+ for item in soup.findAll('item'):
+ try:
+ pubdate = item.find('pubdate').string
+ if not pubdate:
+ continue
+ pubdate = pubdate.replace('+0000', 'GMT')
+ d = {
+ 'title' : item.find('title').string,
+ 'url' : print_version(item.find('guid').string),
+ 'timestamp': calendar.timegm(time.strptime(pubdate,
+ '%a, %d %b %Y %H:%M:%S %Z')),
+ 'date' : pubdate
+ }
+ except:
+ continue
+ try:
+ d['description'] = item.find('description').string
+ except:
+ d['description'] = ''
+ articles[title].append(d)
+ articles[title].sort(key=operator.itemgetter('timestamp'), reverse=True)
+ articles[title][max_articles_per_feed:] = []
+ for item in articles[title]:
+ item.pop('timestamp')
+ return articles
+
+
+def build_index(title, articles, dir):
+ '''Build an RSS based index.html'''
+
+ def build_sub_index(title, items):
+ ilist = ''
+ li = u'
%(title)s [%(date)s]
\n'+\
+ u'%(description)s
\n'
+ for item in items:
+ ilist += li%item
+ return u'''\
+
+
+ %(title)s
+
+
+
+ '''%dict(title=title, items=ilist.rstrip())
+
+ cnum = 0
+ clist = ''
+ categories = articles.keys()
+ categories.sort()
+ for category in categories:
+ cnum += 1
+ cfile = os.path.join(dir, 'category'+str(cnum)+'.html')
+ prefix = 'file:' if iswindows else ''
+ clist += u'%s\n'%(prefix+cfile, category)
+ src = build_sub_index(category, articles[category])
+ open(cfile, 'wb').write(src.encode('utf-8'))
+
+ src = '''\
+
+
+ The New York Times
+ %(date)s
+
+
+
+ '''%dict(date=time.strftime('%a, %d %B, %Y', time.localtime()), categories=clist)
+ index = os.path.join(dir, 'index.html')
+ open(index, 'wb').write(src.encode('utf-8'))
+ return index
diff --git a/src/libprs500/ebooks/lrf/web/bbc.py b/src/libprs500/ebooks/lrf/web/bbc.py
new file mode 100644
index 0000000000..01c63eb4d9
--- /dev/null
+++ b/src/libprs500/ebooks/lrf/web/bbc.py
@@ -0,0 +1,53 @@
+## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2 of the License, or
+## (at your option) any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License along
+## with this program; if not, write to the Free Software Foundation, Inc.,
+## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import tempfile, shutil, os
+from libprs500.ebooks.lrf.web import build_index, parse_feeds
+
+RSS = 'http://news.bbc.co.uk/1/hi/help/3223484.stm'
+
+from libprs500 import __appname__, iswindows, browser
+from libprs500.ebooks.BeautifulSoup import BeautifulSoup
+
+
+def get_feeds(browser):
+ src = browser.open(RSS).read()
+ soup = BeautifulSoup(src[src.index('%(title)s [%(date)s]
\n'+\
- u'%(description)s
\n'
- for item in items:
- ilist += li%item
- return u'''\
-
-
- %(title)s
-
-
-
- '''%dict(title=title, items=ilist.rstrip())
-
- cnum = 0
- clist = ''
- categories = articles.keys()
- categories.sort()
- for category in categories:
- cnum += 1
- cfile = os.path.join(dir, 'category'+str(cnum)+'.html')
- prefix = 'file:' if iswindows else ''
- clist += u'%s\n'%(prefix+cfile, category)
- src = build_sub_index(category, articles[category])
- open(cfile, 'wb').write(src.encode('utf-8'))
-
- src = '''\
-
-
- The New York Times
- %(date)s
-
-
-
- '''%dict(date=time.strftime('%a, %d %B, %Y', time.localtime()), categories=clist)
- index = os.path.join(dir, 'index.html')
- open(index, 'wb').write(src.encode('utf-8'))
- return index
-
-
def initialize(profile):
profile['temp dir'] = tempfile.mkdtemp(prefix=__appname__+'_')
profile['browser'] = login(profile)
feeds = get_feeds(profile['browser'])
- articles = parse_feeds(feeds, profile['browser'])
- index = build_index(articles, profile['temp dir'])
+ articles = parse_feeds(feeds, profile['browser'], lambda x: x + '?&pagewanted=print')
+ index = build_index('The New York Times', articles, profile['temp dir'])
profile['url'] = 'file:'+ ('' if iswindows else '//') + index
+ profile['timefmt'] = ' [%a, %d %b, %Y]'
+ profile['max_recursions'] = 2
+ profile['title'] = 'The New York Times'
def finalize(profile):
- shutil.rmtree(profile['temp dir'])
+ if os.path.isdir(profile['temp dir']):
+ shutil.rmtree(profile['temp dir'])
def login(profile):
diff --git a/src/libprs500/ebooks/lrf/web/profiles.py b/src/libprs500/ebooks/lrf/web/profiles.py
index b076858059..951b9173fa 100644
--- a/src/libprs500/ebooks/lrf/web/profiles.py
+++ b/src/libprs500/ebooks/lrf/web/profiles.py
@@ -19,6 +19,8 @@ from libprs500.ebooks.lrf.web.newsweek import initialize as newsweek_initialize
from libprs500.ebooks.lrf.web.newsweek import finalize as newsweek_finalize
from libprs500.ebooks.lrf.web.nytimes import initialize as nytimes_initialize
from libprs500.ebooks.lrf.web.nytimes import finalize as nytimes_finalize
+from libprs500.ebooks.lrf.web.bbc import initialize as bbc_initialize
+from libprs500.ebooks.lrf.web.bbc import finalize as bbc_finalize
profiles = {
@@ -42,9 +44,7 @@ profiles = {
'nytimes' : {
'initialize' : nytimes_initialize,
'finalize' : nytimes_finalize,
- 'timefmt' : ' [%a, %d %b, %Y]',
- 'max_recursions' : 2,
- 'title' : 'The New York Times',
+
'preprocess_regexps' :
[ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[
@@ -59,26 +59,24 @@ profiles = {
},
'bbc' : {
- 'title' : 'The BBC',
- 'no_stylesheets' : True,
- 'preprocess_regexps' :
+ 'initialize' : bbc_initialize,
+ 'finalize' : bbc_finalize,
+ 'preprocess_regexps' :
[ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[
- # Remove help link and replace by title
- (r'',
- lambda match: 'The BBC
\n%s
'%(time.strftime('%a %d %b %Y', time.localtime()),)),
- # Blank line before categories
- (r'\s*BBC', lambda match: 'BBC'),
# Remove footer from individual stories
(r'