From 230812bc748903b4f5f876b1c59dd946ddd92b7d Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Fri, 26 Jul 2024 10:46:14 +0530 Subject: [PATCH 1/2] ... --- recipes/20_minutos.recipe | 2 ++ recipes/abc_es.recipe | 2 ++ recipes/horizons.recipe | 7 ++----- recipes/la_jornada.recipe | 2 ++ recipes/nhk_news.recipe | 2 ++ recipes/scmp.recipe | 2 ++ 6 files changed, 12 insertions(+), 5 deletions(-) diff --git a/recipes/20_minutos.recipe b/recipes/20_minutos.recipe index ba5da87954..cb75c2e71a 100644 --- a/recipes/20_minutos.recipe +++ b/recipes/20_minutos.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 __license__ = 'GPL v3' __author__ = 'Luis Hernandez' __copyright__ = 'Luis Hernandez' diff --git a/recipes/abc_es.recipe b/recipes/abc_es.recipe index 3a55ac44e5..f5b036b359 100644 --- a/recipes/abc_es.recipe +++ b/recipes/abc_es.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 __license__ = 'GPL v3' __author__ = 'Ricardo Jurado' __copyright__ = 'Ricardo Jurado' diff --git a/recipes/horizons.recipe b/recipes/horizons.recipe index 098ff00406..f48d825f03 100644 --- a/recipes/horizons.recipe +++ b/recipes/horizons.recipe @@ -53,11 +53,8 @@ class horizons(BasicNewsRecipe): url = 'https://www.cirsd.org' + url self.cover_url = a.find('img')['src'] self.log(self.cover_url) - issue = a.find('div', attrs={'class':'horizon-gallery-title'}) - if issue: - self.title = self.tag_to_string(issue).strip() - self.timefmt = ' [' + self.tag_to_string(issue).strip().replace('Horizons ', '') + ']' - self.log('Downloading Issue: ', self.timefmt, self.title) + self.title = url.split('/')[-1].replace('-', ' ').title() + self.log('Downloading Issue: ', self.title) soup = self.index_to_soup(url) feeds = [] diff --git a/recipes/la_jornada.recipe b/recipes/la_jornada.recipe index e85c815fee..cbf806c3ef 100644 --- a/recipes/la_jornada.recipe +++ b/recipes/la_jornada.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 __license__ = 'GPL v3' __copyright__ = '2010-2012, Darko Miletic , Rogelio Domínguez ' ''' diff --git a/recipes/nhk_news.recipe b/recipes/nhk_news.recipe index 13ab1fad60..b9dcc0ebd3 100644 --- a/recipes/nhk_news.recipe +++ b/recipes/nhk_news.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 from calibre.web.feeds.news import BasicNewsRecipe # feed source: https://www.nhk.or.jp/toppage/rss/index.html diff --git a/recipes/scmp.recipe b/recipes/scmp.recipe index c2d49abcc8..a70d8a4762 100644 --- a/recipes/scmp.recipe +++ b/recipes/scmp.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 """ scmp.com """ From 0a567a206acbe8cb669ff5fbc3b143c72c797177 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Fri, 26 Jul 2024 10:48:05 +0530 Subject: [PATCH 2/2] ... --- recipes/huffingtonpost.recipe | 16 ++++++++++++++++ recipes/new_yorker.recipe | 14 ++++++++++++-- recipes/nymag.recipe | 15 +++++++++++++-- 3 files changed, 41 insertions(+), 4 deletions(-) diff --git a/recipes/huffingtonpost.recipe b/recipes/huffingtonpost.recipe index 253e9c77f2..e949dc4016 100644 --- a/recipes/huffingtonpost.recipe +++ b/recipes/huffingtonpost.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 from __future__ import print_function from calibre.web.feeds.news import BasicNewsRecipe @@ -28,6 +30,20 @@ class HuffingtonPostRecipe(BasicNewsRecipe): no_stylesheets = True remove_javascript = True + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + keep_only_tags = [ classes('entry__header entry__body') ] diff --git a/recipes/new_yorker.recipe b/recipes/new_yorker.recipe index 8629c79f2b..6d8b49f863 100644 --- a/recipes/new_yorker.recipe +++ b/recipes/new_yorker.recipe @@ -79,9 +79,19 @@ class NewYorker(BasicNewsRecipe): # img.save(buf, format='JPEG') # return buf.getvalue() + recipe_specific_options = { + 'date': { + 'short': 'The date of the edition to download (YYYY/MM/DD format)', + 'long': 'For example, 2024/07/08' + } + } + def parse_index(self): - soup = self.index_to_soup( - 'https://www.newyorker.com/magazine?intcid=magazine') + issue_url = 'https://www.newyorker.com/magazine?intcid=magazine' + d = self.recipe_specific_options.get('date') + if d and isinstance(d, str): + issue_url = 'https://www.newyorker.com/magazine/' + d + soup = self.index_to_soup(issue_url) cover_img = soup.find('picture', attrs={'class': lambda x: x and 'asset-embed__responsive-asset' in x}) if cover_img is not None: diff --git a/recipes/nymag.recipe b/recipes/nymag.recipe index 46050559af..73abaad17a 100644 --- a/recipes/nymag.recipe +++ b/recipes/nymag.recipe @@ -19,7 +19,7 @@ class NewYorkMagazine(BasicNewsRecipe): title = 'New York Magazine' __author__ = 'Kovid Goyal' description = 'Food, culture, arts and entertainment in New York' - language = 'en' + language = 'en_US' no_stylesheets = True remove_javascript = True encoding = 'utf-8' @@ -32,8 +32,19 @@ class NewYorkMagazine(BasicNewsRecipe): ] remove_attributes = ['srcset'] + recipe_specific_options = { + 'date': { + 'short': 'The date of the edition to download (YYYY-MM-DD format)', + 'long': 'For example, 2024-07-01' + } + } + def nymag_get_index(self): - return self.index_to_soup('https://nymag.com/maglinks/nym-home-05') + issue_url = 'https://nymag.com/maglinks/nym-home-05' + d = self.recipe_specific_options.get('date') + if d and isinstance(d, str): + issue_url = 'https://nymag.com/magazine/toc/' + d + '.html' + return self.index_to_soup(issue_url) def parse_index(self): soup = self.nymag_get_index()