This commit is contained in:
Kovid Goyal 2024-07-26 10:47:11 +05:30
parent 21583ad1d0
commit 28340111d1
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 10 additions and 16 deletions

View File

@ -37,12 +37,6 @@ class ilCorriereEn(BasicNewsRecipe):
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
def get_article_url(self, article):
articleUrl = article.get('link')
segments = articleUrl.split('/')
basename = '/'.join(segments[:3]) + '/' + \
'International/english/articoli/'
recipe_specific_options = { recipe_specific_options = {
'days': { 'days': {
'short': 'Oldest article to download from this news source. In days ', 'short': 'Oldest article to download from this news source. In days ',
@ -57,7 +51,13 @@ class ilCorriereEn(BasicNewsRecipe):
if d and isinstance(d, str): if d and isinstance(d, str):
self.oldest_article = float(d) self.oldest_article = float(d)
# the date has to be redone with the url structure def get_article_url(self, article):
articleUrl = article.get('link')
segments = articleUrl.split('/')
basename = '/'.join(segments[:3]) + '/' + \
'International/english/articoli/'
# the date has to be redone with the url structure
mlist1 = ['gennaio', 'febbraio', 'marzo', 'aprile', 'maggio', 'giugno', mlist1 = ['gennaio', 'febbraio', 'marzo', 'aprile', 'maggio', 'giugno',
'luglio', 'agosto', 'settembre', 'ottobre', 'novembre', 'dicembre'] 'luglio', 'agosto', 'settembre', 'ottobre', 'novembre', 'dicembre']
mlist2 = ['01', '02', '03', '04', '05', mlist2 = ['01', '02', '03', '04', '05',
@ -71,7 +71,7 @@ class ilCorriereEn(BasicNewsRecipe):
newDateUrl = '20' + myDate[0] + '/' + noMonth + '/' + myDate[2] + '/' newDateUrl = '20' + myDate[0] + '/' + noMonth + '/' + myDate[2] + '/'
# clean the article title # clean the article title
articleURLseg = segments[5].split('-') articleURLseg = segments[5].split('-')
myArticle = (articleURLseg[0])[:-9] + '.shtml' myArticle = (articleURLseg[0])[:-9] + '.shtml'

View File

@ -22,7 +22,7 @@ class elcorreo(BasicNewsRecipe):
remove_empty_feeds = True remove_empty_feeds = True
resolve_internal_links = True resolve_internal_links = True
max_articles_per_feed = 25 # articles max_articles_per_feed = 25 # articles
compress_news_images = True compress_news_images = True
recipe_specific_options = { recipe_specific_options = {
'days': { 'days': {
@ -63,7 +63,7 @@ class elcorreo(BasicNewsRecipe):
dict(attrs={'data-voc-component':['dropdown', 'modal', 'slider-grab']}), dict(attrs={'data-voc-component':['dropdown', 'modal', 'slider-grab']}),
classes( classes(
'v-mdl-ath__img-c v-adv v-i-b v-mdl-ath__c--2 v-d-cmp-adv v-d-cmp-nws ' 'v-mdl-ath__img-c v-adv v-i-b v-mdl-ath__c--2 v-d-cmp-adv v-d-cmp-nws '
'v-pill-m--zoom v-stk-adv slider-grab g-artboard v-d-cmp-rld v-pill-m--glly' 'v-pill-m--zoom v-stk-adv slider-grab g-artboard v-d-cmp-rld v-pill-m--glly'
) )
] ]
@ -96,12 +96,6 @@ class elcorreo(BasicNewsRecipe):
} }
} }
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
def get_browser(self, *args, **kwargs): def get_browser(self, *args, **kwargs):
kwargs['user_agent'] = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' kwargs['user_agent'] = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
br = BasicNewsRecipe.get_browser(self, *args, **kwargs) br = BasicNewsRecipe.get_browser(self, *args, **kwargs)