From 3fe8bfd89a8fbe8c961e309fc661830d579e957b Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Sat, 27 Jul 2024 13:19:55 +0530 Subject: [PATCH] update sportstar --- recipes/open_magazine.recipe | 26 +++++++++++++++++++++----- recipes/reuters.recipe | 3 ++- recipes/sportstar.recipe | 22 ++++++++++++++++++---- 3 files changed, 41 insertions(+), 10 deletions(-) diff --git a/recipes/open_magazine.recipe b/recipes/open_magazine.recipe index be8c04930b..4d7aaa2952 100644 --- a/recipes/open_magazine.recipe +++ b/recipes/open_magazine.recipe @@ -20,12 +20,28 @@ class OpenMagazine(BasicNewsRecipe): 'blockquote{color:#404040;}' '.about-author{font-size:small;}' + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + def get_cover_url(self): - soup = self.index_to_soup('https://openthemagazine.com/') - tag = soup.find(attrs={'class': 'magazine-item mr-1'}) - if tag: - self.cover_url = tag.find('img')['src'] - return getattr(self, 'cover_url', None) + d = self.recipe_specific_options.get('days') + if not (d and isinstance(d, str)): + soup = self.index_to_soup('https://openthemagazine.com/') + tag = soup.find(attrs={'class': 'magazine-item mr-1'}) + if tag: + self.cover_url = tag.find('img')['src'] + return getattr(self, 'cover_url', None) keep_only_tags = [ classes('post-data post-thumb post-meta post-excerp'), diff --git a/recipes/reuters.recipe b/recipes/reuters.recipe index 7fdbbbb997..a9abcc5416 100644 --- a/recipes/reuters.recipe +++ b/recipes/reuters.recipe @@ -20,7 +20,8 @@ class Reuters(BasicNewsRecipe): 'reaching billions of people worldwide every day. Reuters provides business, financial, national and international ' 'news to professionals via desktop terminals, the world’s media organizations, industry events and directly to consumers.' ) - masthead_url = 'https://www.reutersprofessional.com/wp-content/uploads/2024/03/primary-logo.svg' + masthead_url = 'https://www.reutersagency.com/wp-content/uploads/2024/06/reuters-logo.png' + cover_url = 'https://yt3.googleusercontent.com/ytc/AIdro_mk43b9eQwN15ZBDyMPDaElxvw4V-oUS9XDUvVnYB3gA9yA=s1024' language = 'en' encoding = 'utf-8' oldest_article = 1.2 # days diff --git a/recipes/sportstar.recipe b/recipes/sportstar.recipe index bb7c49091e..90ae0cd0ed 100644 --- a/recipes/sportstar.recipe +++ b/recipes/sportstar.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 from collections import OrderedDict from calibre.web.feeds.news import BasicNewsRecipe, classes @@ -25,6 +27,13 @@ class Sportstar(BasicNewsRecipe): .author, .publish-time {font-size:small;} ''' + recipe_specific_options = { + 'issue': { + 'short': 'Enter the Issue Number you want to download\n(Volume-Issue format)', + 'long': 'For example, 47-16' + } + } + keep_only_tags = [ dict(name='h1', attrs={'class':'title'}), dict(name='h2', attrs={'class':'sub-title'}), @@ -39,10 +48,15 @@ class Sportstar(BasicNewsRecipe): ] def parse_index(self): - soup = self.index_to_soup('https://sportstar.thehindu.com/magazine/') - url = soup.find('a', href=lambda x: x and x.startswith('https://sportstar.thehindu.com/magazine/issue/'))['href'] - self.log('Downloading Issue: ', url) - soup = self.index_to_soup(url) + d = self.recipe_specific_options.get('issue') + if d and isinstance(d, str): + issue_url = 'https://sportstar.thehindu.com/magazine/issue/vol' + d + else: + soup = self.index_to_soup('https://sportstar.thehindu.com/magazine/') + issue_url = soup.find('a', href=lambda x: x and x.startswith('https://sportstar.thehindu.com/magazine/issue/'))['href'] + self.log('Downloading Issue: ', issue_url) + + soup = self.index_to_soup(issue_url) feeds = OrderedDict()