diff --git a/src/libprs500/ebooks/lrf/web/convert_from.py b/src/libprs500/ebooks/lrf/web/convert_from.py
index 59a90f24e3..29622de94e 100644
--- a/src/libprs500/ebooks/lrf/web/convert_from.py
+++ b/src/libprs500/ebooks/lrf/web/convert_from.py
@@ -39,6 +39,10 @@ def option_parser():
parser.add_option('-u', '--url', dest='url', default=None,
help='The URL to download. You only need to specify this if you are not specifying a website_profile.')
+ parser.add_option('--username', dest='username', default=None,
+ help='Specify the username to be used while downloading. Only used if the profile supports it.')
+ parser.add_option('--password', dest='password', default=None,
+ help='Specify the password to be used while downloading. Only used if the profile supports it.')
parser.add_option('--timeout', help='Timeout in seconds to wait for a response from the server. Default: %default s',
default=None, type='int', dest='timeout')
parser.add_option('-r', '--max-recursions', help='Maximum number of levels to recurse i.e. depth of links to follow. Default %default',
@@ -64,7 +68,7 @@ def fetch_website(options, logger):
return fetcher.start_fetch(options.url), tdir
def create_lrf(htmlfile, options, logger):
- if not options.author:
+ if not options.author or options.author.lower() == 'unknown':
options.author = __appname__
options.header = True
if options.output:
@@ -83,9 +87,12 @@ def process_profile(args, options, logger=None):
if not profiles.has_key(args[1]):
raise CommandLineError('Unknown profile: %s\nValid profiles: %s'%(args[1], profiles.keys()))
profile = profiles[args[1]] if len(args) == 2 else profiles['default']
-
+ profile['username'] = options.username
+ profile['password'] = options.password
if profile.has_key('initialize'):
profile['initialize'](profile)
+ if profile.has_key('browser'):
+ options.browser = profile['browser']
for opt in ('url', 'timeout', 'max_recursions', 'max_files', 'delay', 'no_stylesheets'):
val = getattr(options, opt)
@@ -104,12 +111,15 @@ def process_profile(args, options, logger=None):
options.match_regexps += profile['match_regexps']
options.preprocess_regexps = profile['preprocess_regexps']
options.filter_regexps += profile['filter_regexps']
+ if len(args) == 2 and args[1] != 'default':
+ options.anchor_ids = False
htmlfile, tdir = fetch_website(options, logger)
create_lrf(htmlfile, options, logger)
if profile.has_key('finalize'):
profile['finalize'](profile)
shutil.rmtree(tdir)
+
def main(args=sys.argv, logger=None):
parser = option_parser()
diff --git a/src/libprs500/ebooks/lrf/web/nytimes.py b/src/libprs500/ebooks/lrf/web/nytimes.py
new file mode 100644
index 0000000000..d7602405e2
--- /dev/null
+++ b/src/libprs500/ebooks/lrf/web/nytimes.py
@@ -0,0 +1,146 @@
+## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2 of the License, or
+## (at your option) any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License along
+## with this program; if not, write to the Free Software Foundation, Inc.,
+## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+'''New York Times from RSS feeds.'''
+import time, tempfile, os, shutil, calendar, operator
+
+from libprs500 import __appname__, iswindows, browser
+from libprs500.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup
+
+RSS = 'http://www.nytimes.com/services/xml/rss/index.html'
+LOGIN = 'http://www.nytimes.com/auth/login'
+
+def get_feeds(browser):
+ src = browser.open(RSS).read()
+ soup = BeautifulSoup(src[src.index('%(title)s [%(date)s]
\n'+\
+ u'
%s
'%(time.strftime('%a %d %b %Y', time.localtime()),)), - # Blank line before categories - (r'\s*NYT', lambda match: 'NYT'), - # Blank line between articles - (r'