From ba6eb032785c6e58f81b051b044d5729f696181c Mon Sep 17 00:00:00 2001 From: Henrik Holm Date: Fri, 18 Apr 2025 21:35:16 +0200 Subject: [PATCH 1/4] Use `oldest_article` instead of `max_age` to conform to naming convention --- recipes/fokus.recipe | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/recipes/fokus.recipe b/recipes/fokus.recipe index 9559086188..282dab100b 100644 --- a/recipes/fokus.recipe +++ b/recipes/fokus.recipe @@ -21,7 +21,7 @@ class Fokus(BasicNewsRecipe): no_stylesheets = True compress_news_images = True needs_subscription = 'optional' - max_age = 7 # days + oldest_article = 7 # days remove_empty_feeds = True extra_css = 'img { display: block; width: 75%; height: auto }' @@ -128,12 +128,12 @@ class Fokus(BasicNewsRecipe): if time_tag := a_tag.find('time', {'class': 'Blurb__date'}): swedish_date_str = self.tag_to_string(time_tag).rstrip() - # Skip articles older than `self.max_age`. + # Skip articles older than `self.oldest_article`. datetime_str = time_tag['datetime'] datetime_time = datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M:%S%z') now = datetime.now(timezone.utc) delta = now - datetime_time - if delta.days > self.max_age: + if delta.days > self.oldest_article: self.log.debug(f"\tSkipping article as it is too old: '{title}'") return @@ -251,9 +251,9 @@ class Fokus(BasicNewsRecipe): section_to_articles[section_title] = [] section_to_articles[section_title].append(article_dict) - # Log how many sections contained no articles younger than `self.max_age`. + # Log how many sections contained no articles younger than `self.oldest_article`. if diff := len(sections) - len(section_to_articles): - self.log(f'{diff} sections contained no articles younger than {self.max_age} days.') + self.log(f'{diff} sections contained no articles younger than {self.oldest_article} days.') return section_to_articles From 8d2a1e2d497222ba345c2caffa999d3e901aeda3 Mon Sep 17 00:00:00 2001 From: Henrik Holm Date: Fri, 18 Apr 2025 21:36:42 +0200 Subject: [PATCH 2/4] Log error if no article blurbs were found under section page --- recipes/fokus.recipe | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/recipes/fokus.recipe b/recipes/fokus.recipe index 282dab100b..12137e0500 100644 --- a/recipes/fokus.recipe +++ b/recipes/fokus.recipe @@ -172,7 +172,8 @@ class Fokus(BasicNewsRecipe): article_blurbs = [] if not article_blurbs: - raise ValueError('Failed to identify any article blurbs.') + self.log.error('Failed to identify any article blurbs.') + return {} parsed_blurbs = {} for article_blurb in article_blurbs: From 43c9f028499d1200f7f5e0a336e68c9e7ce40ae5 Mon Sep 17 00:00:00 2001 From: Henrik Holm Date: Fri, 18 Apr 2025 21:58:54 +0200 Subject: [PATCH 3/4] Set `use_embedded_content` and `scale_news_images` --- recipes/fokus.recipe | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/recipes/fokus.recipe b/recipes/fokus.recipe index 12137e0500..d8360d530b 100644 --- a/recipes/fokus.recipe +++ b/recipes/fokus.recipe @@ -25,6 +25,10 @@ class Fokus(BasicNewsRecipe): remove_empty_feeds = True extra_css = 'img { display: block; width: 75%; height: auto }' + use_embedded_content = False + scale_news_images_to_device = True + scale_news_images = (800, 600) + remove_tags = [ dict(name='div', attrs={'class': 'External-ad'}), dict(name='header', attrs={'class': 'Header'}), From 0fa1a0d6b1cc0a1bbfc557ae1e276aa75b30fcf5 Mon Sep 17 00:00:00 2001 From: Henrik Holm Date: Fri, 18 Apr 2025 22:51:15 +0200 Subject: [PATCH 4/4] Define the `get_cover_url()` method --- recipes/fokus.recipe | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/recipes/fokus.recipe b/recipes/fokus.recipe index d8360d530b..1067204753 100644 --- a/recipes/fokus.recipe +++ b/recipes/fokus.recipe @@ -2,6 +2,9 @@ # vim:fileencoding=utf-8 from datetime import datetime, timezone +from mechanize import Request + +from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.web.feeds.news import BasicNewsRecipe @@ -67,6 +70,31 @@ class Fokus(BasicNewsRecipe): dict(name='div', class_='wp-block-core-paragraph'), ] + def get_cover_url(self) -> str: + # Create a `mechanize.Request` object. + req = Request(url=self.main_url, method='POST') + + # Open the requested URL in the built-in browser of the `BasicNewsRecipe` parent class. + browser = self.get_browser() + response = browser.open(req) + + # Parse the response into a BeautifulSoup soup. + soup = BeautifulSoup(response.get_data(), "html.parser") + + # The cover image of the current edition is located in a
tag with class 'Issue__thumbnail'. + try: + figure_tag = soup.find('figure', class_='Issue__thumbnail') + img_tag = figure_tag.find('img') + # Set the `img_tag` to `None` if it is falsy. This way, we can force an `AttributeError` if no cover URL + # can be found. + img_tag = img_tag if img_tag else None + cover_url = img_tag["src"] + except AttributeError: + self.log.error("Failed to identify the cover image URL. Does an 'Issue__thumbnail' figure still exist?") + return '' + + return cover_url + def get_browser(self): br = BasicNewsRecipe.get_browser(self) if self.username and self.password: