From bf68165605aa3465e4678ecc9abc07c2a020677a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 7 Mar 2011 08:50:14 -0700 Subject: [PATCH] Update NRC Handelsblad Epub version --- resources/recipes/nrc-nl-epub.recipe | 50 +++++++++++++++++++--------- 1 file changed, 34 insertions(+), 16 deletions(-) diff --git a/resources/recipes/nrc-nl-epub.recipe b/resources/recipes/nrc-nl-epub.recipe index da9b9195ce..2d190e4d0a 100644 --- a/resources/recipes/nrc-nl-epub.recipe +++ b/resources/recipes/nrc-nl-epub.recipe @@ -1,14 +1,14 @@ -#!/usr/bin/env python +#!/usr/bin/env python2 # -*- coding: utf-8 -*- -#Based on Lars Jacob's Taz Digiabo recipe +#Based on veezh's original recipe and Kovid Goyal's New York Times recipe __license__ = 'GPL v3' -__copyright__ = '2010, veezh' +__copyright__ = '2011, Snaab' ''' www.nrc.nl ''' -import os, urllib2, zipfile +import os, zipfile import time from calibre.web.feeds.news import BasicNewsRecipe from calibre.ptempfile import PersistentTemporaryFile @@ -17,41 +17,59 @@ from calibre.ptempfile import PersistentTemporaryFile class NRCHandelsblad(BasicNewsRecipe): title = u'NRC Handelsblad' - description = u'De EPUB-versie van NRC' + description = u'De ePaper-versie van NRC' language = 'nl' lang = 'nl-NL' + needs_subscription = True - __author__ = 'veezh' + __author__ = 'Snaab' conversion_options = { 'no_default_epub_cover' : True } + def get_browser(self): + br = BasicNewsRecipe.get_browser() + if self.username is not None and self.password is not None: + br.open('http://login.nrc.nl/login') + br.select_form(nr=0) + br['username'] = self.username + br['password'] = self.password + br.submit() + return br + def build_index(self): + today = time.strftime("%Y%m%d") + domain = "http://digitaleeditie.nrc.nl" url = domain + "/digitaleeditie/helekrant/epub/nrc_" + today + ".epub" -# print url + #print url try: - f = urllib2.urlopen(url) - except urllib2.HTTPError: + br = self.get_browser() + f = br.open(url) + except: self.report_progress(0,_('Kan niet inloggen om editie te downloaden')) raise ValueError('Krant van vandaag nog niet beschikbaar') + tmp = PersistentTemporaryFile(suffix='.epub') self.report_progress(0,_('downloading epub')) tmp.write(f.read()) - tmp.close() - - zfile = zipfile.ZipFile(tmp.name, 'r') - self.report_progress(0,_('extracting epub')) - - zfile.extractall(self.output_dir) + f.close() + br.close() + if zipfile.is_zipfile(tmp): + try: + zfile = zipfile.ZipFile(tmp.name, 'r') + zfile.extractall(self.output_dir) + self.report_progress(0,_('extracting epub')) + except zipfile.BadZipfile: + self.report_progress(0,_('BadZip error, continuing')) tmp.close() - index = os.path.join(self.output_dir, 'content.opf') + index = os.path.join(self.output_dir, 'metadata.opf') self.report_progress(1,_('epub downloaded and extracted'))