Update NRC Handelsblad Epub version

2025-07-31 14:33:54 -04:00 · 2011-03-07 08:50:14 -07:00 · 2011-03-07 08:50:14 -07:00 · bf68165605
commit bf68165605
parent 9526d1b8b4
1 changed files with 34 additions and 16 deletions
--- a/resources/recipes/nrc-nl-epub.recipe
+++ b/resources/recipes/nrc-nl-epub.recipe
@ -1,14 +1,14 @@
-#!/usr/bin/env  python
+#!/usr/bin/env  python2
 # -*- coding: utf-8 -*-
-#Based on Lars Jacob's Taz Digiabo recipe
+#Based on veezh's original recipe and Kovid Goyal's New York Times recipe

 __license__   = 'GPL v3'
-__copyright__ = '2010, veezh'
+__copyright__ = '2011, Snaab'

 '''
 www.nrc.nl
 '''
-import os, urllib2, zipfile
+import os, zipfile
 import time
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ptempfile import PersistentTemporaryFile
@ -17,41 +17,59 @@ from calibre.ptempfile import PersistentTemporaryFile
 class NRCHandelsblad(BasicNewsRecipe):

    title = u'NRC Handelsblad'
-    description = u'De EPUB-versie van NRC'
+    description = u'De ePaper-versie van NRC'
    language = 'nl'
    lang = 'nl-NL'
+    needs_subscription = True

-    __author__ = 'veezh'
+    __author__ = 'Snaab'

    conversion_options = {
        'no_default_epub_cover' : True
    }

+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        if self.username is not None and self.password is not None:
+            br.open('http://login.nrc.nl/login')
+            br.select_form(nr=0)
+            br['username']   = self.username
+            br['password'] = self.password
+            br.submit()
+        return br
+
    def build_index(self):
+
        today = time.strftime("%Y%m%d")
+
        domain = "http://digitaleeditie.nrc.nl"

        url = domain + "/digitaleeditie/helekrant/epub/nrc_" + today + ".epub"
-#        print url
+        #print url

        try:
-            f = urllib2.urlopen(url)
-        except urllib2.HTTPError:
+            br = self.get_browser()
+            f = br.open(url)
+        except:
            self.report_progress(0,_('Kan niet inloggen om editie te downloaden'))
            raise ValueError('Krant van vandaag nog niet beschikbaar')

+
        tmp = PersistentTemporaryFile(suffix='.epub')
        self.report_progress(0,_('downloading epub'))
        tmp.write(f.read())
-        tmp.close()
-
-        zfile = zipfile.ZipFile(tmp.name, 'r')
-        self.report_progress(0,_('extracting epub'))
-
-        zfile.extractall(self.output_dir)
+        f.close()
+        br.close()
+        if zipfile.is_zipfile(tmp):
+            try:
+                zfile = zipfile.ZipFile(tmp.name, 'r')
+                zfile.extractall(self.output_dir)
+                self.report_progress(0,_('extracting epub'))
+            except zipfile.BadZipfile:
+                self.report_progress(0,_('BadZip error, continuing'))

        tmp.close()
-        index = os.path.join(self.output_dir, 'content.opf')
+        index = os.path.join(self.output_dir, 'metadata.opf')

        self.report_progress(1,_('epub downloaded and extracted'))