diff --git a/recipes/epoch_times.recipe b/recipes/epoch_times.recipe index 9ce05c09d6..d453ff5ba8 100644 --- a/recipes/epoch_times.recipe +++ b/recipes/epoch_times.recipe @@ -24,7 +24,7 @@ class EpochTimes(BasicNewsRecipe): ] remove_tags = [ classes('print:hidden h-header shortcode aspect-square'), - dict(name='button'), + dict(name='button', 'svg'), dict(name='img', attrs={'src':lambda x: x and x.endswith('svg')}) ] diff --git a/recipes/mit_technology_review.recipe b/recipes/mit_technology_review.recipe index a61fc142c9..486b9634e4 100644 --- a/recipes/mit_technology_review.recipe +++ b/recipes/mit_technology_review.recipe @@ -50,6 +50,7 @@ class MitTechnologyReview(BasicNewsRecipe): #cre-d{font-size:xx-small; text-align:center; color:gray;} #cap-d{font-size:small; text-align:center;} blockquote{text-align:center; color:#404040;} + em { color:#202020;} ''' keep_only_tags = [ prefixed_classes('contentHeader contentArticleHeader contentBody') @@ -65,14 +66,15 @@ class MitTechnologyReview(BasicNewsRecipe): def get_cover_url(self): soup = self.index_to_soup('https://www.technologyreview.com/') if script := soup.find('script', id='preload'): - link = re.findall('https\S+?front_cover\S+?.png', self.tag_to_string(script)) - return link[-1] + '?fit=572,786' + link = re.search('(https\S+?front_cover\S+?(jpg|png))', self.tag_to_string(script)) + return link.group(1) + '?fit=572,786' def parse_index(self): soup = self.index_to_soup(self.INDEX) - issue = soup.find('h1', attrs={'class':lambda x: x and x.startswith('magazineHero__title')}) + issue = soup.find(attrs={'class':lambda x: x and x.startswith('magazineHero__title')}) time = soup.find(attrs={'class': lambda x: x and x.startswith('magazineHero__date')}) - self.timefmt = ' (' + self.tag_to_string(issue) + ') [' + self.tag_to_string(time) + ']' + self.title = 'MIT Tech Review ' + self.tag_to_string(issue) + self.timefmt = ' [' + self.tag_to_string(time) + ']' self.log('Downloading issue: ', self.timefmt) # parse articles diff --git a/recipes/phillosophy_now.recipe b/recipes/phillosophy_now.recipe index 2354cd0651..486550c774 100644 --- a/recipes/phillosophy_now.recipe +++ b/recipes/phillosophy_now.recipe @@ -21,16 +21,24 @@ class PhilosophyNow(BasicNewsRecipe): remove_attributes = ['height', 'width', 'style'] encoding = 'utf-8' ignore_duplicate_articles = {'url'} + masthead_url = 'https://philosophynow.org/media/images/regulars/logoStructuredData.png' keep_only_tags = [classes('article_page')] remove_tags = [dict(name='div', attrs={'id':'welcome_box'})] + extra_css = ''' + img {display:block; margin:0 auto;} + .articleImage { font-size:small; text-align:center; } + em, blockquote { color:#202020; } + ''' def parse_index(self): soup = self.index_to_soup('https://philosophynow.org/') div = soup.find('div', attrs={'id': 'aside_issue_cover'}) url = div.find('a', href=True)['href'] - for issue in div.findAll('div', attrs={'id':'aside_issue_text'}): + if issue := div.find('div', attrs={'id':'aside_issue_text'}): self.log('Downloading issue:', self.tag_to_string(issue).strip()) + self.timefmt = ' [' + self.tag_to_string(issue.find(attrs={'id':'aside_issue_date'})) + ']' + self.title = 'Philosophy Now ' + self.tag_to_string(issue.find(attrs={'id':'aside_issue_number'})) cov_url = div.find('img', src=True)['src'] self.cover_url = 'https://philosophynow.org' + cov_url soup = self.index_to_soup('https://philosophynow.org' + url)