mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update NRC Handelsblad (subscription version)
Fixes #1276962 [Private bug](https://bugs.launchpad.net/calibre/+bug/1276962)
This commit is contained in:
parent
1c7c2ec460
commit
c1eee81ff3
@ -9,9 +9,10 @@ __copyright__ = '2011, Snaab'
|
||||
www.nrc.nl
|
||||
'''
|
||||
import os, zipfile
|
||||
import time
|
||||
from io import BytesIO
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from datetime import date, timedelta
|
||||
|
||||
|
||||
class NRCHandelsblad(BasicNewsRecipe):
|
||||
@ -19,58 +20,42 @@ class NRCHandelsblad(BasicNewsRecipe):
|
||||
title = u'NRC Handelsblad'
|
||||
description = u'De ePaper-versie van NRC'
|
||||
language = 'nl'
|
||||
lang = 'nl-NL'
|
||||
needs_subscription = True
|
||||
requires_version = (1, 24, 0)
|
||||
|
||||
__author__ = 'Snaab'
|
||||
__author__ = 'Kovid Goyal'
|
||||
|
||||
conversion_options = {
|
||||
'no_default_epub_cover' : True
|
||||
}
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://login.nrc.nl/login')
|
||||
br.select_form(nr=0)
|
||||
br['username'] = self.username
|
||||
br['password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
def build_index(self):
|
||||
from calibre.web.jsbrowser.browser import Browser, ElementNotFound
|
||||
br = Browser()
|
||||
br.visit('http://login.nrc.nl/login', timeout=60)
|
||||
f = br.select_form('#command')
|
||||
f['username'] = self.username
|
||||
f['password'] = self.password
|
||||
br.submit()
|
||||
raw = br.html
|
||||
if '>log out<' not in raw:
|
||||
raise ValueError('Failed to login, check username and password')
|
||||
epubraw = None
|
||||
for today in (date.today(), date.today() - timedelta(days=1),):
|
||||
url = 'http://digitaleeditie.nrc.nl/digitaleeditie/NH/%s/1/%s___/downloads.html' % (today.strftime('%Y'), today.strftime('%Y%m%d'))
|
||||
self.log('Trying to download epub from:', url)
|
||||
br.start_load(url, timeout=60)
|
||||
try:
|
||||
epubraw = br.download_file('#CompleteDownloads .download-list .download-button')
|
||||
break
|
||||
except ElementNotFound:
|
||||
self.log('%r not available yet' % url)
|
||||
continue
|
||||
|
||||
today = time.strftime("%Y%m%d")
|
||||
|
||||
domain = "http://digitaleeditie.nrc.nl"
|
||||
|
||||
url = domain + "/digitaleeditie/helekrant/epub/nrc_" + today + ".epub"
|
||||
#print url
|
||||
|
||||
try:
|
||||
br = self.get_browser()
|
||||
f = br.open(url)
|
||||
except:
|
||||
self.report_progress(0,_('Kan niet inloggen om editie te downloaden'))
|
||||
if epubraw is None:
|
||||
raise ValueError('Krant van vandaag nog niet beschikbaar')
|
||||
|
||||
|
||||
tmp = PersistentTemporaryFile(suffix='.epub')
|
||||
self.report_progress(0,_('downloading epub'))
|
||||
tmp.write(f.read())
|
||||
f.close()
|
||||
br.close()
|
||||
if zipfile.is_zipfile(tmp):
|
||||
try:
|
||||
zfile = zipfile.ZipFile(tmp.name, 'r')
|
||||
zfile.extractall(self.output_dir)
|
||||
self.report_progress(0,_('extracting epub'))
|
||||
except zipfile.BadZipfile:
|
||||
self.report_progress(0,_('BadZip error, continuing'))
|
||||
|
||||
tmp.close()
|
||||
zfile = zipfile.ZipFile(BytesIO(epubraw), 'r')
|
||||
zfile.extractall(self.output_dir)
|
||||
index = os.path.join(self.output_dir, 'metadata.opf')
|
||||
|
||||
self.report_progress(1,_('epub downloaded and extracted'))
|
||||
|
||||
return index
|
||||
|
Loading…
x
Reference in New Issue
Block a user