calibre/recipes/denikn.cz.recipe
Robert Mihaly e86be7b3bc
Fix Czech local time handling in Denik N recipe
Using strftime requires locale change which is not thread safe.
Keeping it simple and using a hardcoded list of day and month names,
this should work the same way on python 2 and 3 as well.
2019-11-26 20:21:40 +01:00

108 lines
3.4 KiB
Python

#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe
from datetime import datetime
CZ_DAYS = ['Po', 'Út', 'St', 'Čt', '', 'So', 'Ne']
CZ_MONTHS = ['led', 'úno', 'bře', 'dub', 'kvě', 'čen', 'čec', 'srp', 'zář', 'říj', 'lis', 'pro']
def cz_title_time():
"""
Helper function to return date with czech locale.
Uses hardcoded lookup table of day and month names as strftime requires
locale change that is not thread safe.
"""
today = datetime.today()
weekday = CZ_DAYS[today.weekday()]
month = CZ_MONTHS[today.month-1]
return '{weekday}, {day} {month} {year}'.format(weekday=weekday,
day=today.day,
month=month,
year=today.year)
class DenikNRecipe(BasicNewsRecipe):
"""
Recipe for the RSS feed of https://denikn.cz/
"""
title = u'Deník N'
__author__ = 'Robert Mihaly'
publisher = u'N Media'
description = u'Deník N - Nezávislý český deník'
# For daily feed
oldest_article = 1
max_articles_per_feed = 20
# The default 120 seems too long
timeout = 30
# Custom title used in conversion_options below
custom_title = u'Deník N - ' + cz_title_time()
# Select the feeds to process
feeds = [
# (u'Vše', 'https://denikn.cz/feed'),
(u'Česko', 'https://denikn.cz/cesko/feed/'),
(u'Svět', 'https://denikn.cz/svet/feed'),
(u'Ekonomika', 'https://denikn.cz/ekonomika/feed'),
# (u'Komentáře', 'https://denikn.cz/nazory/feed'),
(u'Kultura', 'https://denikn.cz/kultura/feed'),
(u'Věda a technologie', 'https://denikn.cz/veda/feed'),
(u'Sport', 'https://denikn.cz/sport/feed'),
]
language = 'cs'
cover_url = 'https://denikn.cz/wp-content/themes/dn-2-cz/logo.svg'
remove_javascript = True
publication_type = 'newspaper'
tags = 'news'
# This could be 'optional', but there isn't much content if not subscribed
needs_subscription = True
# None auto-detects, otherwise 'utf8' could be also used
encoding = None
# Full articles are not embedded
use_embedded_content = False
auto_cleanup = True
remove_empty_feeds = True
ignore_duplicate_articles = {'title', 'url'}
compress_news_images = True
scale_news_images_to_device = True
resolve_internal_links = True
# Use gzip transfers
handle_gzip = True
# Add custom title with Czech dates and correctly display publisher
conversion_options = {
'title' : custom_title,
# 'comments' : description,
# 'tags' : tags,
# 'language' : language,
'publisher' : publisher,
# 'authors' : publisher,
# 'smarten_punctuation' : True
}
# Handle login here
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
if self.username is not None and self.password is not None:
br.open('https://predplatne.denikn.cz/sign/in/')
br.select_form(id='frm-signInForm')
br['username'] = self.username
br['password'] = self.password
br.submit()
return br