From acb0881aadad607e9ed14b188d65ef31facce31e Mon Sep 17 00:00:00 2001 From: Alistair Francis Date: Sun, 18 Apr 2021 11:45:47 +1000 Subject: [PATCH] recipes: Initial commit of Crikey Signed-off-by: Alistair Francis --- recipes/crikey.recipe | 58 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 recipes/crikey.recipe diff --git a/recipes/crikey.recipe b/recipes/crikey.recipe new file mode 100644 index 0000000000..b917201812 --- /dev/null +++ b/recipes/crikey.recipe @@ -0,0 +1,58 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__copyright__ = '2021, Alistair Francis alistair@alistair23.me' +__docformat__ = 'restructuredtext en' + +import re +from urllib.parse import urlencode +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ptempfile import TemporaryFile + + +def classes(classes): + q = frozenset(classes.split(' ')) + return dict(attrs={ + 'class': lambda x: x and frozenset(x.split()).intersection(q)}) + + +class Crikey(BasicNewsRecipe): + title = 'Crikey' + description = 'Australian News' + __author__ = 'Alistair Francis' + language = 'en' + + use_embedded_content = True + needs_subscription = True + + keep_only_tags = [ + classes('article-body'), + ] + + feeds = [ + ('Politics', 'https://www.crikey.com.au/politics/feed'), + ('World', 'https://www.crikey.com.au/world/feed'), + ('Media', 'https://www.crikey.com.au/media/feed'), + ('Buisness', 'https://www.crikey.com.au/business/feed'), + ('Coronavirus', 'https://www.crikey.com.au/coronavirus/feed'), + ('Culture', 'https://www.crikey.com.au/life/feed'), + + ] + + def get_browser(self): + br = BasicNewsRecipe.get_browser(self) + br.open('https://www.crikey.com.au/') + + if self.username is not None and self.password is not None: + postdata = urlencode({ + 'username': self.username, + 'password': self.password + }) + response = br.open( + 'https://www.crikey.com.au', + data=postdata, + timeout=self.timeout + ) + + br.set_handle_refresh(True) + + return br