mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-09-29 15:31:08 -04:00
74 lines
2.5 KiB
Python
74 lines
2.5 KiB
Python
#!/usr/bin/env python2
|
|
# -*- coding: utf-8 -*-
|
|
# Based on veezh's original recipe, Kovid Goyal's New York Times recipe and Snaabs nrc Handelsblad recipe
|
|
|
|
__license__ = 'GPL v3'
|
|
__copyright__ = '2014, Niels Giesen'
|
|
|
|
'''
|
|
www.nrc.nl
|
|
'''
|
|
import os, zipfile, re
|
|
from io import BytesIO
|
|
|
|
from calibre.web.feeds.news import BasicNewsRecipe
|
|
from datetime import date, timedelta
|
|
|
|
|
|
class NRCNext(BasicNewsRecipe):
|
|
|
|
title = u'nrc•next'
|
|
description = u'De ePaper-versie van nrc•next'
|
|
language = 'nl'
|
|
needs_subscription = True
|
|
requires_version = (1, 24, 0)
|
|
|
|
__author__ = 'Niels Giesen'
|
|
|
|
conversion_options = {
|
|
'no_default_epub_cover' : True
|
|
}
|
|
|
|
def build_index(self):
|
|
from calibre.web.jsbrowser.browser import Browser, ElementNotFound
|
|
br = Browser()
|
|
br.visit('http://login.nrc.nl/login', timeout=60)
|
|
f = br.select_form('#command')
|
|
f['username'] = self.username
|
|
f['password'] = self.password
|
|
br.submit()
|
|
raw = br.html
|
|
if '>log out<' not in raw:
|
|
raise ValueError('Failed to login, check username and password')
|
|
epubraw = None
|
|
for today in (date.today(), date.today() - timedelta(days=1),):
|
|
url = 'http://digitaleeditie.nrc.nl/digitaleeditie/NN/%s/%d/%s___/downloads.html' % (today.strftime('%Y'), today.month - 1, today.strftime('%Y%m%d'))
|
|
self.log('Trying to download epub from:', url)
|
|
br.start_load(url, timeout=60)
|
|
try:
|
|
epubraw = br.download_file('#CompleteDownloads .download-list .download-button')
|
|
break
|
|
except ElementNotFound:
|
|
self.log('%r not available yet' % url)
|
|
continue
|
|
|
|
if epubraw is None:
|
|
raise ValueError('Krant van vandaag nog niet beschikbaar')
|
|
|
|
zfile = zipfile.ZipFile(BytesIO(epubraw), 'r')
|
|
zfile.extractall(self.output_dir)
|
|
namelist = zfile.namelist()
|
|
emre = re.compile("<em(?:.*)>(.*)</em>")
|
|
subst = '\\1'
|
|
for name in namelist:
|
|
_, ext = os.path.splitext(name);
|
|
if (ext == '.html') or (ext == '.ncx'):
|
|
fname = os.path.join(self.output_dir, name)
|
|
with open(fname) as f:
|
|
s = f.read()
|
|
s = emre.sub(subst, s)
|
|
with open(fname, 'w') as f:
|
|
f.write(s)
|
|
index = os.path.join(self.output_dir, 'metadata.opf')
|
|
return index
|