Update nrcnext

Merge branch 'nrcnextmechanize' of https://github.com/pft/calibre
This commit is contained in:
Kovid Goyal 2014-10-08 15:25:16 +05:30
commit fe9f7bb37f

View File

@ -1,9 +1,10 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Based on veezh's original recipe, Kovid Goyal's New York Times recipe and Snaabs nrc Handelsblad recipe # Based on veezh's original recipe, Kovid Goyal's New York Times
# recipe and Snaabs nrc Handelsblad recipe
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2014, Niels Giesen' __copyright__ = '2014 Niels Giesen'
''' '''
www.nrc.nl www.nrc.nl
@ -30,25 +31,26 @@ class NRCNext(BasicNewsRecipe):
} }
def build_index(self): def build_index(self):
from calibre.web.jsbrowser.browser import Browser, ElementNotFound import mechanize
br = Browser() br = mechanize.Browser()
br.visit('http://login.nrc.nl/login', timeout=60) br.open('https://login.nrc.nl/login', timeout=60)
f = br.select_form('#command') br.select_form(nr=0)
f['username'] = self.username br['username'] = self.username
f['password'] = self.password br['password'] = self.password
br.submit() response2 = br.submit()
raw = br.html raw = response2.get_data()
if '>log out<' not in raw: if 'ingelogd' not in raw: # in body class
raise ValueError('Failed to login, check username and password') raise ValueError('Failed to login, check username and password')
epubraw = None epubraw = None
for today in (date.today(), date.today() - timedelta(days=1),): for today in (date.today(), date.today() - timedelta(days=1),):
url = 'http://digitaleeditie.nrc.nl/digitaleeditie/NN/%s/%d/%s___/downloads.html' % (today.strftime('%Y'), today.month - 1, today.strftime('%Y%m%d')) url = 'http://digitaleeditie.nrc.nl/digitaleeditie/helekrant/epub/nn_%s.epub' \
% today.strftime('%Y%m%d')
self.log('Trying to download epub from:', url) self.log('Trying to download epub from:', url)
br.start_load(url, timeout=60)
try: try:
epubraw = br.download_file('#CompleteDownloads .download-list .download-button') response3 = br.open(url, timeout=60)
epubraw = response3.get_data()
break break
except ElementNotFound: except mechanize.HTTPError:
self.log('%r not available yet' % url) self.log('%r not available yet' % url)
continue continue