mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Update ambito.recipe
This commit is contained in:
parent
85ec0e6fbf
commit
6c0bcf75cc
@ -8,7 +8,7 @@ __copyright__ = '2008-2021, Darko Miletic <darko.miletic at gmail.com>'
|
|||||||
ambito.com
|
ambito.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe, classes
|
||||||
|
|
||||||
|
|
||||||
class Ambito(BasicNewsRecipe):
|
class Ambito(BasicNewsRecipe):
|
||||||
@ -17,12 +17,11 @@ class Ambito(BasicNewsRecipe):
|
|||||||
description = 'Ambito.com con noticias del Diario Ambito Financiero de Buenos Aires'
|
description = 'Ambito.com con noticias del Diario Ambito Financiero de Buenos Aires'
|
||||||
publisher = 'Editorial Nefir S.A.'
|
publisher = 'Editorial Nefir S.A.'
|
||||||
category = 'news, politics, economy, finances, Argentina'
|
category = 'news, politics, economy, finances, Argentina'
|
||||||
oldest_article = 2
|
oldest_article = 1.2
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf8'
|
encoding = 'utf-8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
handle_gzip = True
|
|
||||||
compress_news_images = True
|
compress_news_images = True
|
||||||
scale_news_images_to_device = True
|
scale_news_images_to_device = True
|
||||||
ignore_duplicate_articles = {'url'}
|
ignore_duplicate_articles = {'url'}
|
||||||
@ -41,11 +40,14 @@ class Ambito(BasicNewsRecipe):
|
|||||||
}
|
}
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'class': 'detail-header-wrapper'}),
|
classes(
|
||||||
dict(attrs={'class': lambda x: x and 'detail-body' in x.split()}),
|
'detail-highlighted-multimedia news-headline__publication-date news-headline__title'
|
||||||
|
' news-headline__author-wrapper news-headline__article-summary'
|
||||||
|
),
|
||||||
|
dict(name='article', attrs={'class': lambda x: x and 'article-body' in x.split()}),
|
||||||
]
|
]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['object', 'link', 'embed', 'iframe', 'meta', 'link', 'img'])
|
dict(name=['object', 'link', 'embed', 'iframe', 'meta', 'link'])
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
@ -60,3 +62,8 @@ class Ambito(BasicNewsRecipe):
|
|||||||
(u'Espectaculos', u'https://www.ambito.com/rss/espectaculos.xml'),
|
(u'Espectaculos', u'https://www.ambito.com/rss/espectaculos.xml'),
|
||||||
(u'Nacional', u'https://www.ambito.com/rss/nacional.xml')
|
(u'Nacional', u'https://www.ambito.com/rss/nacional.xml')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for img in soup.findAll('img', attrs={'data-td-src-property':True}):
|
||||||
|
img['src'] = img['data-td-src-property']
|
||||||
|
return soup
|
||||||
|
@ -37,7 +37,7 @@ class toiprint(BasicNewsRecipe):
|
|||||||
self.title = 'TOI Print Edition ' + dt.strftime('%b %d, %Y')
|
self.title = 'TOI Print Edition ' + dt.strftime('%b %d, %Y')
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
.sub { color:#5c5c5c; }
|
.sub { color:#202020; }
|
||||||
.auth { font-size:small; font-weight:bold; color:#202020; }
|
.auth { font-size:small; font-weight:bold; color:#202020; }
|
||||||
.cap { text-align:center; font-size:small; }
|
.cap { text-align:center; font-size:small; }
|
||||||
img { display:block; margin:0 auto; }
|
img { display:block; margin:0 auto; }
|
||||||
@ -71,7 +71,7 @@ class toiprint(BasicNewsRecipe):
|
|||||||
continue
|
continue
|
||||||
link = art['ArticleName']
|
link = art['ArticleName']
|
||||||
page = link.split('_')[-3]
|
page = link.split('_')[-3]
|
||||||
url = page + '-' + link
|
url = page + '/' + link
|
||||||
title = art.get('ArticleTitle', 'unknown')
|
title = art.get('ArticleTitle', 'unknown')
|
||||||
if art.get('ColumnTitle', '') == '':
|
if art.get('ColumnTitle', '') == '':
|
||||||
desc = 'Page No.' + page + ' | ' + art.get('ArticleBody', '')
|
desc = 'Page No.' + page + ' | ' + art.get('ArticleBody', '')
|
||||||
@ -106,4 +106,4 @@ class toiprint(BasicNewsRecipe):
|
|||||||
return '<html><body><div>' + body.replace('<br>', '<p>').replace('<br/>', '<p>') + '</div></body></html>'
|
return '<html><body><div>' + body.replace('<br>', '<p>').replace('<br/>', '<p>') + '</div></body></html>'
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return index + '/ArticleZoneJson/' + url.replace('-', '/') + '.json'
|
return index + '/ArticleZoneJson/' + url + '.json'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user