From c1634e6df5093d1e70676f329ed64208e1b2b711 Mon Sep 17 00:00:00 2001 From: ping Date: Wed, 28 Jun 2023 13:50:26 +0800 Subject: [PATCH] Fix recipe cover_url for Foreign Affairs, Nature --- recipes/foreignaffairs.recipe | 19 +++++++++---------- recipes/nature.recipe | 2 +- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/recipes/foreignaffairs.recipe b/recipes/foreignaffairs.recipe index a646c7f1cc..c0790856ce 100644 --- a/recipes/foreignaffairs.recipe +++ b/recipes/foreignaffairs.recipe @@ -6,13 +6,7 @@ import html5lib import mechanize from lxml import html -from calibre.web.feeds.news import BasicNewsRecipe - - -def classes(classes): - q = frozenset(classes.split(' ')) - return dict(attrs={ - 'class': lambda x: x and frozenset(x.split()).intersection(q)}) +from calibre.web.feeds.news import BasicNewsRecipe, classes def as_article(source, log): @@ -155,9 +149,14 @@ class ForeignAffairsRecipe(BasicNewsRecipe): self.timefmt = u' [%s]' % date link = soup.find('link', rel='canonical', href=True)['href'] year, volnum, issue_vol = link.split('/')[-3:] - self.cover_url = soup.find(**classes('subscribe-callout-image'))['srcset'].split()[-3] - self.cover_url = self.cover_url.split('?')[0] - self.cover_url = self.cover_url.replace('_webp_issue_small_2x', '_webp_issue_large_2x') + self.cover_url = re.sub( + r"_webp_issue_small_\dx", + "_webp_issue_large_2x", + soup.find(class_="subscribe-callout-image")["srcset"] + .split(",")[0] + .strip() + .split(" ")[0], + ) cls = soup.find('body')['class'] if isinstance(cls, (list, tuple)): diff --git a/recipes/nature.recipe b/recipes/nature.recipe index 60ff8864a7..5809737e0b 100644 --- a/recipes/nature.recipe +++ b/recipes/nature.recipe @@ -51,7 +51,7 @@ class Nature(BasicNewsRecipe): def parse_index(self): soup = self.index_to_soup(BASE + '/nature/current-issue') - self.cover_url = 'https:' + soup.find( + self.cover_url = soup.find( 'img', attrs={'data-test': check_words('issue-cover-image')} )['src'] try: