diff --git a/recipes/icons/muy_interesante_mexico.png b/recipes/icons/muy_interesante_mexico.png new file mode 100644 index 0000000000..1a4ccfaf4a Binary files /dev/null and b/recipes/icons/muy_interesante_mexico.png differ diff --git a/recipes/muy_interesante_mexico.recipe b/recipes/muy_interesante_mexico.recipe new file mode 100644 index 0000000000..abe3be798a --- /dev/null +++ b/recipes/muy_interesante_mexico.recipe @@ -0,0 +1,65 @@ +#!/usr/bin/env python +from calibre.web.feeds.news import BasicNewsRecipe, classes + +class MuyInteresante(BasicNewsRecipe): + title = 'Muy Interesante México' + __author__ = 'unkn0wn' + description = ( + 'Muy Interesante México, de la Revista Muy Interesante, es un sitio con información ' + 'sobre ciencia, tecnología, historia, sociedad, medio ambiente, etc.' + ) + language = 'es_MX' + encoding = 'utf-8' + no_stylesheets = True + remove_javascript = True + remove_attributes = ['style', 'height', 'width'] + ignore_duplicate_articles = {'url'} + masthead_url = 'https://www.muyinteresante.com/static/img/logo_web.svg' + resolve_internal_links = True + + def get_cover_url(self): + soup = self.index_to_soup( + 'https://www.magzter.com/ES/Zinet-Media-Global/Muy-Interesante-Espa%C3%B1a/Science/1806044' + ) + for citem in soup.findAll( + 'meta', content=lambda s: s and s.endswith('view/3.jpg') + ): + return citem['content'] + + extra_css = ''' + .c-detail__bar, .c-detail__author, .c-detail__media__txt { font-size:small; } + .default-epigraph { font-style:italic; } + ''' + + keep_only_tags = [dict(name='article', attrs={'class':'c-detail'})] + + remove_tags = [ + dict(name=['aside', 'svg', 'script']), + classes('c-detail__share') + ] + + def preprocess_html(self, soup): + au = soup.find(**classes('c-detail__author')) + if au: + for p in au.findAll('p'): + p.name = 'div' + for h in soup.findAll(['h2', 'h3']): + h.name = 'h4' + return soup + + def parse_index(self): + soup = self.index_to_soup('https://www.muyinteresante.com.mx/') + ans = [] + for articles in soup.findAll('article'): + a = articles.find('a', attrs={'class':'page-link', 'href':True}) + if not a: + continue + title = self.tag_to_string(a) + url = a['href'] + desc = '' + info = articles.find(**classes('c-article__info_content')) + if info: + desc = self.tag_to_string(info) + self.log('\t', title, '\n\t', desc, '\n\t\t', url) + ans.append({'title': title, 'url': url, 'description': desc}) + return [('Articles', ans)]