From 3ff582c6259b4b09e9b44d3ca7342e9b9a1967f3 Mon Sep 17 00:00:00 2001 From: claudehenchoz Date: Sun, 24 Mar 2024 11:25:07 +0100 Subject: [PATCH] Update nzz_ger.recipe to add cover retrieval --- recipes/nzz_ger.recipe | 53 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/recipes/nzz_ger.recipe b/recipes/nzz_ger.recipe index 3770d997f3..5dd98eb2a3 100644 --- a/recipes/nzz_ger.recipe +++ b/recipes/nzz_ger.recipe @@ -1,4 +1,8 @@ from calibre.web.feeds.recipes import BasicNewsRecipe +import mechanize +import json +from datetime import datetime + class Nzz(BasicNewsRecipe): title = 'NZZ' @@ -6,11 +10,12 @@ class Nzz(BasicNewsRecipe): description = 'Neue Zürcher Zeitung' publisher = 'Neue Zürcher Zeitung' category = 'news, politics' + oldest_article = 30 + max_articles_per_feed = 15 language = 'de' no_stylesheets = True use_embedded_content = False encoding = 'utf-8' - timefmt = ' [%a, %d %b %Y %H:%M:%S %z]' scale_news_images = (600, 400) scale_news_images_to_device = True @@ -45,6 +50,52 @@ class Nzz(BasicNewsRecipe): ('Technologie', 'https://www.nzz.ch/technologie.rss'), ] + def get_cover_url(self): + # Prepare the date and data + today_date = datetime.now().strftime('%Y-%m-%d') + json_data = { + 'editions': [ + { + 'publicationDate': today_date, + 'defId': 6, + }, + ], + 'startDate': today_date, + 'maxHits': 1, + 'direction': 'BACKWARD', + } + + # Prepare headers + headers = { + 'Accept': 'application/json', + 'Accept-Language': 'en-US,en;q=0.9,de;q=0.8', + 'Content-Type': 'application/json', + 'Origin': 'https://epaper.nzz.ch', + 'Referer': 'https://epaper.nzz.ch/', + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0', + 'X-Requested-With': 'XMLHttpRequest', + } + + # Encode your JSON data + encoded_data = json.dumps(json_data).encode('utf-8') + + # Create a mechanize Request object with the target URL, encoded data, and headers + req = mechanize.Request(url='https://epaper.nzz.ch/epaper/1.0/findEditionsFromDate', + data=encoded_data, + headers=headers, + method='POST') + + # Use mechanize to open the request and read the response + browser = mechanize.Browser() + response = browser.open(req) + response_data = json.loads(response.read()) + + # Extract the desired information + url = response_data['data'][0]['pages'][0]['pageDocUrl']['PREVIEW']['url'] + + return url + + def get_browser(self, *args, **kwargs): kwargs['user_agent'] = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' br = BasicNewsRecipe.get_browser(self, *args, **kwargs)