Update nrcnext

Merge branch 'nrcnextmechanize' of https://github.com/pft/calibre
This commit is contained in:
Kovid Goyal 2014-10-08 15:25:16 +05:30
commit fe9f7bb37f

View File

@ -1,9 +1,10 @@
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
# Based on veezh's original recipe, Kovid Goyal's New York Times recipe and Snaabs nrc Handelsblad recipe
# Based on veezh's original recipe, Kovid Goyal's New York Times
# recipe and Snaabs nrc Handelsblad recipe
__license__ = 'GPL v3'
__copyright__ = '2014, Niels Giesen'
__copyright__ = '2014 Niels Giesen'
'''
www.nrc.nl
@ -30,25 +31,26 @@ class NRCNext(BasicNewsRecipe):
}
def build_index(self):
from calibre.web.jsbrowser.browser import Browser, ElementNotFound
br = Browser()
br.visit('http://login.nrc.nl/login', timeout=60)
f = br.select_form('#command')
f['username'] = self.username
f['password'] = self.password
br.submit()
raw = br.html
if '>log out<' not in raw:
import mechanize
br = mechanize.Browser()
br.open('https://login.nrc.nl/login', timeout=60)
br.select_form(nr=0)
br['username'] = self.username
br['password'] = self.password
response2 = br.submit()
raw = response2.get_data()
if 'ingelogd' not in raw: # in body class
raise ValueError('Failed to login, check username and password')
epubraw = None
for today in (date.today(), date.today() - timedelta(days=1),):
url = 'http://digitaleeditie.nrc.nl/digitaleeditie/NN/%s/%d/%s___/downloads.html' % (today.strftime('%Y'), today.month - 1, today.strftime('%Y%m%d'))
url = 'http://digitaleeditie.nrc.nl/digitaleeditie/helekrant/epub/nn_%s.epub' \
% today.strftime('%Y%m%d')
self.log('Trying to download epub from:', url)
br.start_load(url, timeout=60)
try:
epubraw = br.download_file('#CompleteDownloads .download-list .download-button')
response3 = br.open(url, timeout=60)
epubraw = response3.get_data()
break
except ElementNotFound:
except mechanize.HTTPError:
self.log('%r not available yet' % url)
continue