#!/usr/bin/env python # -*- coding: utf-8 -*- # Based on veezh's original recipe and Kovid Goyal's New York Times recipe __license__ = 'GPL v3' __copyright__ = '2011, Snaab' ''' www.nrc.nl ''' import os import zipfile from io import BytesIO from calibre.web.feeds.news import BasicNewsRecipe from datetime import date, timedelta class NRCHandelsblad(BasicNewsRecipe): title = u'NRC Handelsblad' description = u'De ePaper-versie van NRC' language = 'nl' needs_subscription = True requires_version = (1, 24, 0) __author__ = 'Kovid Goyal' conversion_options = { 'no_default_epub_cover': True } def build_index(self): from calibre.web.jsbrowser.browser import Browser, ElementNotFound br = Browser() br.visit('http://login.nrc.nl/login', timeout=60) f = br.select_form('#fm1') f['username'] = self.username f['password'] = self.password br.submit('button[type="submit"]') raw = br.html if '>Uitloggen<' not in raw: raise ValueError('Failed to login, check username and password') epubraw = None for today in (date.today(), date.today() - timedelta(days=1),): url = 'http://digitaleeditie.nrc.nl/digitaleeditie/NH/%s/%d/%s___/downloads.html' % ( today.strftime('%Y'), today.month - 1, today.strftime('%Y%m%d')) self.log('Trying to download epub from:', url) br.start_load(url, timeout=60) try: epubraw = br.download_file( '#CompleteDownloads .download-list .download-button') break except ElementNotFound: self.log('%r not available yet' % url) continue if epubraw is None: raise ValueError('Krant van vandaag nog niet beschikbaar') zfile = zipfile.ZipFile(BytesIO(epubraw), 'r') zfile.extractall(self.output_dir) index = os.path.join(self.output_dir, 'metadata.opf') return index