Fix nrc_next recipe to work with changed login

Changed from jsbrowser to mechanize, as it is simpler in use (or maybe
just better documented) and no JavaScript was needed.
This commit is contained in:
Niels Giesen 2014-10-08 09:31:10 +02:00
parent 3b8cf69246
commit fcdb142a01

View File

@ -1,9 +1,10 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Based on veezh's original recipe, Kovid Goyal's New York Times recipe and Snaabs nrc Handelsblad recipe # Based on veezh's original recipe, Kovid Goyal's New York Times
# recipe and Snaabs nrc Handelsblad recipe
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2014, Niels Giesen' __copyright__ = '2014 Niels Giesen'
''' '''
www.nrc.nl www.nrc.nl
@ -30,25 +31,26 @@ class NRCNext(BasicNewsRecipe):
} }
def build_index(self): def build_index(self):
from calibre.web.jsbrowser.browser import Browser, ElementNotFound import mechanize
br = Browser() br = mechanize.Browser()
br.visit('http://login.nrc.nl/login', timeout=60) br.open('https://login.nrc.nl/login', timeout=60)
f = br.select_form('#command') br.select_form(nr=0)
f['username'] = self.username br['username'] = self.username
f['password'] = self.password br['password'] = self.password
br.submit() response2 = br.submit()
raw = br.html raw = response2.get_data()
if '>log out<' not in raw: if 'ingelogd' not in raw: # in body class
raise ValueError('Failed to login, check username and password') raise ValueError('Failed to login, check username and password')
epubraw = None epubraw = None
for today in (date.today(), date.today() - timedelta(days=1),): for today in (date.today(), date.today() - timedelta(days=1),):
url = 'http://digitaleeditie.nrc.nl/digitaleeditie/NN/%s/%d/%s___/downloads.html' % (today.strftime('%Y'), today.month - 1, today.strftime('%Y%m%d')) url = 'http://digitaleeditie.nrc.nl/digitaleeditie/helekrant/epub/nn_%s.epub' \
% today.strftime('%Y%m%d')
self.log('Trying to download epub from:', url) self.log('Trying to download epub from:', url)
br.start_load(url, timeout=60)
try: try:
epubraw = br.download_file('#CompleteDownloads .download-list .download-button') response3 = br.open(url, timeout=60)
epubraw = response3.get_data()
break break
except ElementNotFound: except mechanize.HTTPError:
self.log('%r not available yet' % url) self.log('%r not available yet' % url)
continue continue