Fix nrc_next recipe to work with changed login

Changed from jsbrowser to mechanize, as it is simpler in use (or maybe
just better documented) and no JavaScript was needed.
This commit is contained in:
Niels Giesen 2014-10-08 09:31:10 +02:00
parent 3b8cf69246
commit fcdb142a01

View File

@ -1,9 +1,10 @@
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
# Based on veezh's original recipe, Kovid Goyal's New York Times recipe and Snaabs nrc Handelsblad recipe
# Based on veezh's original recipe, Kovid Goyal's New York Times
# recipe and Snaabs nrc Handelsblad recipe
__license__ = 'GPL v3'
__copyright__ = '2014, Niels Giesen'
__copyright__ = '2014 Niels Giesen'
'''
www.nrc.nl
@ -30,25 +31,26 @@ class NRCNext(BasicNewsRecipe):
}
def build_index(self):
from calibre.web.jsbrowser.browser import Browser, ElementNotFound
br = Browser()
br.visit('http://login.nrc.nl/login', timeout=60)
f = br.select_form('#command')
f['username'] = self.username
f['password'] = self.password
br.submit()
raw = br.html
if '>log out<' not in raw:
import mechanize
br = mechanize.Browser()
br.open('https://login.nrc.nl/login', timeout=60)
br.select_form(nr=0)
br['username'] = self.username
br['password'] = self.password
response2 = br.submit()
raw = response2.get_data()
if 'ingelogd' not in raw: # in body class
raise ValueError('Failed to login, check username and password')
epubraw = None
for today in (date.today(), date.today() - timedelta(days=1),):
url = 'http://digitaleeditie.nrc.nl/digitaleeditie/NN/%s/%d/%s___/downloads.html' % (today.strftime('%Y'), today.month - 1, today.strftime('%Y%m%d'))
url = 'http://digitaleeditie.nrc.nl/digitaleeditie/helekrant/epub/nn_%s.epub' \
% today.strftime('%Y%m%d')
self.log('Trying to download epub from:', url)
br.start_load(url, timeout=60)
try:
epubraw = br.download_file('#CompleteDownloads .download-list .download-button')
response3 = br.open(url, timeout=60)
epubraw = response3.get_data()
break
except ElementNotFound:
except mechanize.HTTPError:
self.log('%r not available yet' % url)
continue