Update NRC Handelsblad Epub version

This commit is contained in:
Kovid Goyal 2011-03-07 08:50:14 -07:00
parent 9526d1b8b4
commit bf68165605

View File

@ -1,14 +1,14 @@
#!/usr/bin/env python #!/usr/bin/env python2
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
#Based on Lars Jacob's Taz Digiabo recipe #Based on veezh's original recipe and Kovid Goyal's New York Times recipe
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010, veezh' __copyright__ = '2011, Snaab'
''' '''
www.nrc.nl www.nrc.nl
''' '''
import os, urllib2, zipfile import os, zipfile
import time import time
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile
@ -17,41 +17,59 @@ from calibre.ptempfile import PersistentTemporaryFile
class NRCHandelsblad(BasicNewsRecipe): class NRCHandelsblad(BasicNewsRecipe):
title = u'NRC Handelsblad' title = u'NRC Handelsblad'
description = u'De EPUB-versie van NRC' description = u'De ePaper-versie van NRC'
language = 'nl' language = 'nl'
lang = 'nl-NL' lang = 'nl-NL'
needs_subscription = True
__author__ = 'veezh' __author__ = 'Snaab'
conversion_options = { conversion_options = {
'no_default_epub_cover' : True 'no_default_epub_cover' : True
} }
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('http://login.nrc.nl/login')
br.select_form(nr=0)
br['username'] = self.username
br['password'] = self.password
br.submit()
return br
def build_index(self): def build_index(self):
today = time.strftime("%Y%m%d") today = time.strftime("%Y%m%d")
domain = "http://digitaleeditie.nrc.nl" domain = "http://digitaleeditie.nrc.nl"
url = domain + "/digitaleeditie/helekrant/epub/nrc_" + today + ".epub" url = domain + "/digitaleeditie/helekrant/epub/nrc_" + today + ".epub"
# print url #print url
try: try:
f = urllib2.urlopen(url) br = self.get_browser()
except urllib2.HTTPError: f = br.open(url)
except:
self.report_progress(0,_('Kan niet inloggen om editie te downloaden')) self.report_progress(0,_('Kan niet inloggen om editie te downloaden'))
raise ValueError('Krant van vandaag nog niet beschikbaar') raise ValueError('Krant van vandaag nog niet beschikbaar')
tmp = PersistentTemporaryFile(suffix='.epub') tmp = PersistentTemporaryFile(suffix='.epub')
self.report_progress(0,_('downloading epub')) self.report_progress(0,_('downloading epub'))
tmp.write(f.read()) tmp.write(f.read())
tmp.close() f.close()
br.close()
zfile = zipfile.ZipFile(tmp.name, 'r') if zipfile.is_zipfile(tmp):
self.report_progress(0,_('extracting epub')) try:
zfile = zipfile.ZipFile(tmp.name, 'r')
zfile.extractall(self.output_dir) zfile.extractall(self.output_dir)
self.report_progress(0,_('extracting epub'))
except zipfile.BadZipfile:
self.report_progress(0,_('BadZip error, continuing'))
tmp.close() tmp.close()
index = os.path.join(self.output_dir, 'content.opf') index = os.path.join(self.output_dir, 'metadata.opf')
self.report_progress(1,_('epub downloaded and extracted')) self.report_progress(1,_('epub downloaded and extracted'))