diff --git a/recipes/icons/newslaundry.png b/recipes/icons/newslaundry.png new file mode 100644 index 0000000000..342e059d0d Binary files /dev/null and b/recipes/icons/newslaundry.png differ diff --git a/recipes/newslaundry.recipe b/recipes/newslaundry.recipe new file mode 100644 index 0000000000..7b105a59ce --- /dev/null +++ b/recipes/newslaundry.recipe @@ -0,0 +1,36 @@ +from calibre.web.feeds.news import BasicNewsRecipe, classes + +class newslaundry(BasicNewsRecipe): + title = 'Newslaundry' + __author__ = 'unkn0wn' + description = ( + 'Newslaundry is a reader-supported, independent news media company. In an industry driven by corporate' + ' and government interests, we strongly believe in the need for an independent news model, and a free' + ' and accountable press.' + ) + language = 'en_IN' + masthead_url = 'https://images.assettype.com/newslaundry/2020-01/d91cad07-9650-47e9-8bdc-9a6247354d95/Header_logo_NL__2_New.png' + encoding = 'utf-8' + no_stylesheets = True + remove_javascript = True + oldest_article = 7 # days + resolve_internal_links = True + + ignore_duplicate_articles = {'url'} + + # keep_only_tags = [classes('headline subheadline authorWithTimeStamp story-card')] + + feeds = [ + ('Articles', 'https://www.newslaundry.com/stories.rss?time-period=last-7-days') + ] + + # def preprocess_html(self, soup): + # if h1 := soup.find(**classes('headline')): + # h1.name = 'h1' + # if h3 := soup.find(**classes('subheadline')): + # h3.name = 'h3' + # return soup + + def print_version(self, url): + if 'hindi.newslaundry' in url: self.abort_article('Skipping hindi article') # remove this line if you want hindi articles. + return url diff --git a/recipes/reuters.recipe b/recipes/reuters.recipe index f9d0e40f3a..92ddb4c0dd 100644 --- a/recipes/reuters.recipe +++ b/recipes/reuters.recipe @@ -58,27 +58,29 @@ class Reuters(BasicNewsRecipe): __author__ = 'Kovid Goyal' language = 'en' + keep_only_tags = [ prefixed_classes('article-body__container__ article-header__container__'), ] remove_tags = [ prefixed_classes( - 'context-widget__tabs___' + 'context-widget__tabs___ article-header__toolbar__ read-next-mobile__container__ toolbar__container__ button__link__' ' ArticleBody-read-time-and-social Slideshow-expand-button- TwoColumnsLayout-footer- RegistrationPrompt__container___' - ' SocialEmbed__inner___' + ' SocialEmbed__inner___ trust-badge author-bio__social__ with-spinner__spinner__ author-bio__author-image__' ), - dict(name=['button', 'link']), + dict(name=['button', 'link', 'svg']), ] - remove_attributes = ['style'] - extra_css = ''' - img { max-width: 100%; } - ''' + remove_attributes = ['style', 'height', 'width'] - def preprocess_html(self, soup, *a): - for noscript in soup.findAll('noscript'): - if noscript.findAll('img'): - noscript.name = 'div' - return soup + extra_css = ''' + img { max-width: 100%; } + [class^="article-header__tags__"], + [class^="author-bio__author-card__"], + [class^="article-header__author-date__"] { + font-size:small; + } + [data-testid="primary-gallery"], [data-testid="primary-image"] { font-size:small; text-align:center; } + ''' def parse_index(self): base, sections = country_defs[country] @@ -103,6 +105,9 @@ class Reuters(BasicNewsRecipe): self.log('\t', article['title'], article['url']) def preprocess_html(self, soup): + for noscript in soup.findAll('noscript'): + if noscript.findAll('img'): + noscript.name = 'div' for img in soup.findAll('img', attrs={'srcset':True}): img['src'] = img['srcset'].split()[0] return soup