From 6c0bcf75cc3d0ff78478d3577839794611e1cc0c Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Thu, 21 Sep 2023 11:34:25 +0530 Subject: [PATCH] Update ambito.recipe --- recipes/ambito.recipe | 21 ++++++++++++++------- recipes/toiprint.recipe | 6 +++--- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/recipes/ambito.recipe b/recipes/ambito.recipe index b3eae78a93..7a5a177cc6 100644 --- a/recipes/ambito.recipe +++ b/recipes/ambito.recipe @@ -8,7 +8,7 @@ __copyright__ = '2008-2021, Darko Miletic ' ambito.com ''' -from calibre.web.feeds.news import BasicNewsRecipe +from calibre.web.feeds.news import BasicNewsRecipe, classes class Ambito(BasicNewsRecipe): @@ -17,12 +17,11 @@ class Ambito(BasicNewsRecipe): description = 'Ambito.com con noticias del Diario Ambito Financiero de Buenos Aires' publisher = 'Editorial Nefir S.A.' category = 'news, politics, economy, finances, Argentina' - oldest_article = 2 + oldest_article = 1.2 no_stylesheets = True - encoding = 'utf8' + encoding = 'utf-8' use_embedded_content = False remove_empty_feeds = True - handle_gzip = True compress_news_images = True scale_news_images_to_device = True ignore_duplicate_articles = {'url'} @@ -41,11 +40,14 @@ class Ambito(BasicNewsRecipe): } keep_only_tags = [ - dict(name='div', attrs={'class': 'detail-header-wrapper'}), - dict(attrs={'class': lambda x: x and 'detail-body' in x.split()}), + classes( + 'detail-highlighted-multimedia news-headline__publication-date news-headline__title' + ' news-headline__author-wrapper news-headline__article-summary' + ), + dict(name='article', attrs={'class': lambda x: x and 'article-body' in x.split()}), ] remove_tags = [ - dict(name=['object', 'link', 'embed', 'iframe', 'meta', 'link', 'img']) + dict(name=['object', 'link', 'embed', 'iframe', 'meta', 'link']) ] feeds = [ @@ -60,3 +62,8 @@ class Ambito(BasicNewsRecipe): (u'Espectaculos', u'https://www.ambito.com/rss/espectaculos.xml'), (u'Nacional', u'https://www.ambito.com/rss/nacional.xml') ] + + def preprocess_html(self, soup): + for img in soup.findAll('img', attrs={'data-td-src-property':True}): + img['src'] = img['data-td-src-property'] + return soup diff --git a/recipes/toiprint.recipe b/recipes/toiprint.recipe index 71883fd02b..87c14307cf 100644 --- a/recipes/toiprint.recipe +++ b/recipes/toiprint.recipe @@ -37,7 +37,7 @@ class toiprint(BasicNewsRecipe): self.title = 'TOI Print Edition ' + dt.strftime('%b %d, %Y') extra_css = ''' - .sub { color:#5c5c5c; } + .sub { color:#202020; } .auth { font-size:small; font-weight:bold; color:#202020; } .cap { text-align:center; font-size:small; } img { display:block; margin:0 auto; } @@ -71,7 +71,7 @@ class toiprint(BasicNewsRecipe): continue link = art['ArticleName'] page = link.split('_')[-3] - url = page + '-' + link + url = page + '/' + link title = art.get('ArticleTitle', 'unknown') if art.get('ColumnTitle', '') == '': desc = 'Page No.' + page + ' | ' + art.get('ArticleBody', '') @@ -106,4 +106,4 @@ class toiprint(BasicNewsRecipe): return '
' + body.replace('
', '

').replace('
', '

') + '

' def print_version(self, url): - return index + '/ArticleZoneJson/' + url.replace('-', '/') + '.json' + return index + '/ArticleZoneJson/' + url + '.json'