Update ambito.recipe

This commit is contained in:
unkn0w7n 2023-09-21 11:34:25 +05:30
parent 85ec0e6fbf
commit 6c0bcf75cc
2 changed files with 17 additions and 10 deletions

View File

@ -8,7 +8,7 @@ __copyright__ = '2008-2021, Darko Miletic <darko.miletic at gmail.com>'
ambito.com ambito.com
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe, classes
class Ambito(BasicNewsRecipe): class Ambito(BasicNewsRecipe):
@ -17,12 +17,11 @@ class Ambito(BasicNewsRecipe):
description = 'Ambito.com con noticias del Diario Ambito Financiero de Buenos Aires' description = 'Ambito.com con noticias del Diario Ambito Financiero de Buenos Aires'
publisher = 'Editorial Nefir S.A.' publisher = 'Editorial Nefir S.A.'
category = 'news, politics, economy, finances, Argentina' category = 'news, politics, economy, finances, Argentina'
oldest_article = 2 oldest_article = 1.2
no_stylesheets = True no_stylesheets = True
encoding = 'utf8' encoding = 'utf-8'
use_embedded_content = False use_embedded_content = False
remove_empty_feeds = True remove_empty_feeds = True
handle_gzip = True
compress_news_images = True compress_news_images = True
scale_news_images_to_device = True scale_news_images_to_device = True
ignore_duplicate_articles = {'url'} ignore_duplicate_articles = {'url'}
@ -41,11 +40,14 @@ class Ambito(BasicNewsRecipe):
} }
keep_only_tags = [ keep_only_tags = [
dict(name='div', attrs={'class': 'detail-header-wrapper'}), classes(
dict(attrs={'class': lambda x: x and 'detail-body' in x.split()}), 'detail-highlighted-multimedia news-headline__publication-date news-headline__title'
' news-headline__author-wrapper news-headline__article-summary'
),
dict(name='article', attrs={'class': lambda x: x and 'article-body' in x.split()}),
] ]
remove_tags = [ remove_tags = [
dict(name=['object', 'link', 'embed', 'iframe', 'meta', 'link', 'img']) dict(name=['object', 'link', 'embed', 'iframe', 'meta', 'link'])
] ]
feeds = [ feeds = [
@ -60,3 +62,8 @@ class Ambito(BasicNewsRecipe):
(u'Espectaculos', u'https://www.ambito.com/rss/espectaculos.xml'), (u'Espectaculos', u'https://www.ambito.com/rss/espectaculos.xml'),
(u'Nacional', u'https://www.ambito.com/rss/nacional.xml') (u'Nacional', u'https://www.ambito.com/rss/nacional.xml')
] ]
def preprocess_html(self, soup):
for img in soup.findAll('img', attrs={'data-td-src-property':True}):
img['src'] = img['data-td-src-property']
return soup

View File

@ -37,7 +37,7 @@ class toiprint(BasicNewsRecipe):
self.title = 'TOI Print Edition ' + dt.strftime('%b %d, %Y') self.title = 'TOI Print Edition ' + dt.strftime('%b %d, %Y')
extra_css = ''' extra_css = '''
.sub { color:#5c5c5c; } .sub { color:#202020; }
.auth { font-size:small; font-weight:bold; color:#202020; } .auth { font-size:small; font-weight:bold; color:#202020; }
.cap { text-align:center; font-size:small; } .cap { text-align:center; font-size:small; }
img { display:block; margin:0 auto; } img { display:block; margin:0 auto; }
@ -71,7 +71,7 @@ class toiprint(BasicNewsRecipe):
continue continue
link = art['ArticleName'] link = art['ArticleName']
page = link.split('_')[-3] page = link.split('_')[-3]
url = page + '-' + link url = page + '/' + link
title = art.get('ArticleTitle', 'unknown') title = art.get('ArticleTitle', 'unknown')
if art.get('ColumnTitle', '') == '': if art.get('ColumnTitle', '') == '':
desc = 'Page No.' + page + ' | ' + art.get('ArticleBody', '') desc = 'Page No.' + page + ' | ' + art.get('ArticleBody', '')
@ -106,4 +106,4 @@ class toiprint(BasicNewsRecipe):
return '<html><body><div>' + body.replace('<br>', '<p>').replace('<br/>', '<p>') + '</div></body></html>' return '<html><body><div>' + body.replace('<br>', '<p>').replace('<br/>', '<p>') + '</div></body></html>'
def print_version(self, url): def print_version(self, url):
return index + '/ArticleZoneJson/' + url.replace('-', '/') + '.json' return index + '/ArticleZoneJson/' + url + '.json'