calibre/recipes/der_standard.recipe
Kovid Goyal 29cd8d64ea
Change shebangs to python from python2
Also remove a few other miscellaneous references to python2
2020-08-22 18:47:51 +05:30

92 lines
3.9 KiB
Python

#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, division, absolute_import, print_function
__license__ = 'GPL v3'
__copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
''' http://www.derstandard.at - Austrian Newspaper '''
from calibre.web.feeds.news import BasicNewsRecipe
def classes(classes):
q = frozenset(classes.split(' '))
return dict(attrs={
'class': lambda x: x and frozenset(x.split()).intersection(q)})
class DerStandardRecipe(BasicNewsRecipe):
title = u'derStandard'
__author__ = 'Gerhard Aigner and Sujata Raman and Marcel Jira and Peter Reschenhofer'
description = u'Nachrichten aus Österreich'
publisher = 'derStandard.at'
category = 'news, politics, nachrichten, Austria'
use_embedded_content = False
remove_empty_feeds = True
no_stylesheets = True
encoding = 'utf-8'
language = 'de_AT'
oldest_article = 1
max_articles_per_feed = 100
ignore_duplicate_articles = {'title', 'url'}
masthead_url = 'http://images.derstandard.at/2012/06/19/derStandardat_1417x274.gif'
feeds = [
(u'Newsroom', u'https://derStandard.at/?page=rss&ressort=Seite1'),
(u'International', u'https://derstandard.at/?page=rss&ressort=International'),
(u'Inland', u'https://derstandard.at/?page=rss&ressort=Inland'),
(u'Wirtschaft', u'https://derStandard.at/?page=rss&ressort=Wirtschaft'),
(u'Web', u'https://derStandard.at/?page=rss&ressort=Web'),
(u'Sport', u'https://derStandard.at/?page=rss&ressort=Sport'),
(u'Panorama', u'https://derStandard.at/?page=rss&ressort=Panorama'),
(u'Etat', u'https://derStandard.at/?page=rss&ressort=Etat'),
(u'Kultur', u'https://derStandard.at/?page=rss&ressort=Kultur'),
(u'Wissenschaft', u'https://derStandard.at/?page=rss&ressort=Wissenschaft'),
(u'Gesundheit', u'https://derStandard.at/?page=rss&ressort=Gesundheit'),
(u'Bildung', u'https://derStandard.at/?page=rss&ressort=Bildung'),
(u'Meinung', u'https://derStandard.at/?page=rss&ressort=Meinung'),
(u'Lifestyle', u'https://derStandard.at/?page=rss&ressort=Lifestyle'),
(u'Reisen', u'https://derStandard.at/?page=rss&ressort=Reisen'),
(u'Familie', u'https://derstandard.at/?page=rss&ressort=Familie'),
(u'Meinung', u'https://derStandard.at/?page=rss&ressort=Meinung'),
(u'User', u'https://derStandard.at/?page=rss&ressort=User'),
(u'Karriere', u'https://derStandard.at/?page=rss&ressort=Karriere'),
(u'Immobilien', u'https://derstandard.at/?page=rss&ressort=Immobilien'),
(u'Automobil', u'https://derstandard.at/?page=rss&ressort=Automobil'),
(u'dieStandard', u'https://derStandard.at/?page=rss&ressort=diestandard'),
]
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
br.set_simple_cookie('DSGVO_Check', '', '.derstandard.at')
headers = {
'X-Requested-With': 'XMLHttpRequest',
'Content-Type': 'application/json; charset=UTF-8',
'DNT': '1',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache'
}
import mechanize
req = mechanize.Request(url='https://apps.derstandard.at/privacyprotection/api/agree', data=None, headers=headers, method='POST')
br.open(req)
return br
keep_only_tags = [
classes('article-header article-body article-origins article-subtitle article-pubdate'),
]
remove_tags = [
dict(name=['link', 'iframe', 'style', 'hr']),
dict(attrs={'class': ['lookup-links', 'media-list']}),
dict(name='form', attrs={'name': 'sitesearch'}),
dict(name='div', attrs={'class': ['socialsharing', 'block video',
'blog-browsing section',
'diashow', 'supplemental']}),
dict(name='div', attrs={'id': 'highlighted'})
]
remove_attributes = ['width', 'height']