From fcdb142a01692ba6e33945f84b16ee49bfe42043 Mon Sep 17 00:00:00 2001 From: Niels Giesen Date: Wed, 8 Oct 2014 09:31:10 +0200 Subject: [PATCH] Fix nrc_next recipe to work with changed login Changed from jsbrowser to mechanize, as it is simpler in use (or maybe just better documented) and no JavaScript was needed. --- recipes/nrc_next.recipe | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/recipes/nrc_next.recipe b/recipes/nrc_next.recipe index f4c8b5e2fc..c2b61caef3 100644 --- a/recipes/nrc_next.recipe +++ b/recipes/nrc_next.recipe @@ -1,9 +1,10 @@ #!/usr/bin/env python2 # -*- coding: utf-8 -*- -# Based on veezh's original recipe, Kovid Goyal's New York Times recipe and Snaabs nrc Handelsblad recipe +# Based on veezh's original recipe, Kovid Goyal's New York Times +# recipe and Snaabs nrc Handelsblad recipe __license__ = 'GPL v3' -__copyright__ = '2014, Niels Giesen' +__copyright__ = '2014 Niels Giesen' ''' www.nrc.nl @@ -30,25 +31,26 @@ class NRCNext(BasicNewsRecipe): } def build_index(self): - from calibre.web.jsbrowser.browser import Browser, ElementNotFound - br = Browser() - br.visit('http://login.nrc.nl/login', timeout=60) - f = br.select_form('#command') - f['username'] = self.username - f['password'] = self.password - br.submit() - raw = br.html - if '>log out<' not in raw: + import mechanize + br = mechanize.Browser() + br.open('https://login.nrc.nl/login', timeout=60) + br.select_form(nr=0) + br['username'] = self.username + br['password'] = self.password + response2 = br.submit() + raw = response2.get_data() + if 'ingelogd' not in raw: # in body class raise ValueError('Failed to login, check username and password') epubraw = None for today in (date.today(), date.today() - timedelta(days=1),): - url = 'http://digitaleeditie.nrc.nl/digitaleeditie/NN/%s/%d/%s___/downloads.html' % (today.strftime('%Y'), today.month - 1, today.strftime('%Y%m%d')) + url = 'http://digitaleeditie.nrc.nl/digitaleeditie/helekrant/epub/nn_%s.epub' \ + % today.strftime('%Y%m%d') self.log('Trying to download epub from:', url) - br.start_load(url, timeout=60) try: - epubraw = br.download_file('#CompleteDownloads .download-list .download-button') + response3 = br.open(url, timeout=60) + epubraw = response3.get_data() break - except ElementNotFound: + except mechanize.HTTPError: self.log('%r not available yet' % url) continue