Fix recipe "nrc•next" (new URL etcetera)

Follows recent changes in news source "NRC Handelsblad"
This commit is contained in:
Niels Giesen 2014-04-30 09:02:58 +02:00 committed by Kovid Goyal
parent d6e7df6b6f
commit 3726f69d54

View File

@ -3,15 +3,16 @@
# Based on veezh's original recipe, Kovid Goyal's New York Times recipe and Snaabs nrc Handelsblad recipe # Based on veezh's original recipe, Kovid Goyal's New York Times recipe and Snaabs nrc Handelsblad recipe
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2013, Niels Giesen' __copyright__ = '2014, Niels Giesen'
''' '''
www.nrc.nl www.nrc.nl
''' '''
import os, zipfile import os, zipfile
import time from io import BytesIO
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile from datetime import date, timedelta
class NRCNext(BasicNewsRecipe): class NRCNext(BasicNewsRecipe):
@ -19,8 +20,8 @@ class NRCNext(BasicNewsRecipe):
title = u'nrc•next' title = u'nrc•next'
description = u'De ePaper-versie van nrc•next' description = u'De ePaper-versie van nrc•next'
language = 'nl' language = 'nl'
lang = 'nl-NL'
needs_subscription = True needs_subscription = True
requires_version = (1, 24, 0)
__author__ = 'Niels Giesen' __author__ = 'Niels Giesen'
@ -28,48 +29,33 @@ class NRCNext(BasicNewsRecipe):
'no_default_epub_cover' : True 'no_default_epub_cover' : True
} }
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
if self.username is not None and self.password is not None:
br.open('http://login.nrc.nl/login')
br.select_form(nr=0)
br['username'] = self.username
br['password'] = self.password
br.submit()
return br
def build_index(self): def build_index(self):
from calibre.web.jsbrowser.browser import Browser, ElementNotFound
br = Browser()
br.visit('http://login.nrc.nl/login', timeout=60)
f = br.select_form('#command')
f['username'] = self.username
f['password'] = self.password
br.submit()
raw = br.html
if '>log out<' not in raw:
raise ValueError('Failed to login, check username and password')
epubraw = None
for today in (date.today(), date.today() - timedelta(days=1),):
url = 'http://digitaleeditie.nrc.nl/digitaleeditie/NN/%s/3/%s___/downloads.html' % (today.strftime('%Y'), today.strftime('%Y%m%d'))
self.log('Trying to download epub from:', url)
br.start_load(url, timeout=60)
try:
epubraw = br.download_file('#CompleteDownloads .download-list .download-button')
break
except ElementNotFound:
self.log('%r not available yet' % url)
continue
today = time.strftime("%Y%m%d") if epubraw is None:
domain = "http://digitaleeditie.nrc.nl"
url = domain + "/digitaleeditie/helekrant/epub/nn_" + today + ".epub"
#print url
try:
br = self.get_browser()
f = br.open(url)
except:
self.report_progress(0,_('Kan niet inloggen om editie te downloaden'))
raise ValueError('Krant van vandaag nog niet beschikbaar') raise ValueError('Krant van vandaag nog niet beschikbaar')
tmp = PersistentTemporaryFile(suffix='.epub') zfile = zipfile.ZipFile(BytesIO(epubraw), 'r')
self.report_progress(0,_('downloading epub')) zfile.extractall(self.output_dir)
tmp.write(f.read())
f.close()
br.close()
if zipfile.is_zipfile(tmp):
try:
zfile = zipfile.ZipFile(tmp.name, 'r')
zfile.extractall(self.output_dir)
self.report_progress(0,_('extracting epub'))
except zipfile.BadZipfile:
self.report_progress(0,_('BadZip error, continuing'))
tmp.close()
index = os.path.join(self.output_dir, 'metadata.opf') index = os.path.join(self.output_dir, 'metadata.opf')
self.report_progress(1,_('epub downloaded and extracted'))
return index return index