diff --git a/recipes/nrc-nl-epub.recipe b/recipes/nrc-nl-epub.recipe index 961eb723c2..dbea129eca 100644 --- a/recipes/nrc-nl-epub.recipe +++ b/recipes/nrc-nl-epub.recipe @@ -9,9 +9,10 @@ __copyright__ = '2011, Snaab' www.nrc.nl ''' import os, zipfile -import time +from io import BytesIO + from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ptempfile import PersistentTemporaryFile +from datetime import date, timedelta class NRCHandelsblad(BasicNewsRecipe): @@ -19,58 +20,42 @@ class NRCHandelsblad(BasicNewsRecipe): title = u'NRC Handelsblad' description = u'De ePaper-versie van NRC' language = 'nl' - lang = 'nl-NL' needs_subscription = True + requires_version = (1, 24, 0) - __author__ = 'Snaab' + __author__ = 'Kovid Goyal' conversion_options = { 'no_default_epub_cover' : True } - def get_browser(self): - br = BasicNewsRecipe.get_browser(self) - if self.username is not None and self.password is not None: - br.open('http://login.nrc.nl/login') - br.select_form(nr=0) - br['username'] = self.username - br['password'] = self.password - br.submit() - return br - def build_index(self): + from calibre.web.jsbrowser.browser import Browser, ElementNotFound + br = Browser() + br.visit('http://login.nrc.nl/login', timeout=60) + f = br.select_form('#command') + f['username'] = self.username + f['password'] = self.password + br.submit() + raw = br.html + if '>log out<' not in raw: + raise ValueError('Failed to login, check username and password') + epubraw = None + for today in (date.today(), date.today() - timedelta(days=1),): + url = 'http://digitaleeditie.nrc.nl/digitaleeditie/NH/%s/1/%s___/downloads.html' % (today.strftime('%Y'), today.strftime('%Y%m%d')) + self.log('Trying to download epub from:', url) + br.start_load(url, timeout=60) + try: + epubraw = br.download_file('#CompleteDownloads .download-list .download-button') + break + except ElementNotFound: + self.log('%r not available yet' % url) + continue - today = time.strftime("%Y%m%d") - - domain = "http://digitaleeditie.nrc.nl" - - url = domain + "/digitaleeditie/helekrant/epub/nrc_" + today + ".epub" - #print url - - try: - br = self.get_browser() - f = br.open(url) - except: - self.report_progress(0,_('Kan niet inloggen om editie te downloaden')) + if epubraw is None: raise ValueError('Krant van vandaag nog niet beschikbaar') - - tmp = PersistentTemporaryFile(suffix='.epub') - self.report_progress(0,_('downloading epub')) - tmp.write(f.read()) - f.close() - br.close() - if zipfile.is_zipfile(tmp): - try: - zfile = zipfile.ZipFile(tmp.name, 'r') - zfile.extractall(self.output_dir) - self.report_progress(0,_('extracting epub')) - except zipfile.BadZipfile: - self.report_progress(0,_('BadZip error, continuing')) - - tmp.close() + zfile = zipfile.ZipFile(BytesIO(epubraw), 'r') + zfile.extractall(self.output_dir) index = os.path.join(self.output_dir, 'metadata.opf') - - self.report_progress(1,_('epub downloaded and extracted')) - return index