Update NRC Handelsblad (subscription version)

Fixes #1276962 [Private bug](https://bugs.launchpad.net/calibre/+bug/1276962)
This commit is contained in:
Kovid Goyal 2014-02-13 16:45:32 +05:30
parent 1c7c2ec460
commit c1eee81ff3

View File

@ -9,9 +9,10 @@ __copyright__ = '2011, Snaab'
www.nrc.nl www.nrc.nl
''' '''
import os, zipfile import os, zipfile
import time from io import BytesIO
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile from datetime import date, timedelta
class NRCHandelsblad(BasicNewsRecipe): class NRCHandelsblad(BasicNewsRecipe):
@ -19,58 +20,42 @@ class NRCHandelsblad(BasicNewsRecipe):
title = u'NRC Handelsblad' title = u'NRC Handelsblad'
description = u'De ePaper-versie van NRC' description = u'De ePaper-versie van NRC'
language = 'nl' language = 'nl'
lang = 'nl-NL'
needs_subscription = True needs_subscription = True
requires_version = (1, 24, 0)
__author__ = 'Snaab' __author__ = 'Kovid Goyal'
conversion_options = { conversion_options = {
'no_default_epub_cover' : True 'no_default_epub_cover' : True
} }
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
if self.username is not None and self.password is not None:
br.open('http://login.nrc.nl/login')
br.select_form(nr=0)
br['username'] = self.username
br['password'] = self.password
br.submit()
return br
def build_index(self): def build_index(self):
from calibre.web.jsbrowser.browser import Browser, ElementNotFound
today = time.strftime("%Y%m%d") br = Browser()
br.visit('http://login.nrc.nl/login', timeout=60)
domain = "http://digitaleeditie.nrc.nl" f = br.select_form('#command')
f['username'] = self.username
url = domain + "/digitaleeditie/helekrant/epub/nrc_" + today + ".epub" f['password'] = self.password
#print url br.submit()
raw = br.html
if '>log out<' not in raw:
raise ValueError('Failed to login, check username and password')
epubraw = None
for today in (date.today(), date.today() - timedelta(days=1),):
url = 'http://digitaleeditie.nrc.nl/digitaleeditie/NH/%s/1/%s___/downloads.html' % (today.strftime('%Y'), today.strftime('%Y%m%d'))
self.log('Trying to download epub from:', url)
br.start_load(url, timeout=60)
try: try:
br = self.get_browser() epubraw = br.download_file('#CompleteDownloads .download-list .download-button')
f = br.open(url) break
except: except ElementNotFound:
self.report_progress(0,_('Kan niet inloggen om editie te downloaden')) self.log('%r not available yet' % url)
continue
if epubraw is None:
raise ValueError('Krant van vandaag nog niet beschikbaar') raise ValueError('Krant van vandaag nog niet beschikbaar')
zfile = zipfile.ZipFile(BytesIO(epubraw), 'r')
tmp = PersistentTemporaryFile(suffix='.epub')
self.report_progress(0,_('downloading epub'))
tmp.write(f.read())
f.close()
br.close()
if zipfile.is_zipfile(tmp):
try:
zfile = zipfile.ZipFile(tmp.name, 'r')
zfile.extractall(self.output_dir) zfile.extractall(self.output_dir)
self.report_progress(0,_('extracting epub'))
except zipfile.BadZipfile:
self.report_progress(0,_('BadZip error, continuing'))
tmp.close()
index = os.path.join(self.output_dir, 'metadata.opf') index = os.path.join(self.output_dir, 'metadata.opf')
self.report_progress(1,_('epub downloaded and extracted'))
return index return index