calibre/recipes/laprensa_ni.recipe
Kovid Goyal 29cd8d64ea
Change shebangs to python from python2
Also remove a few other miscellaneous references to python2
2020-08-22 18:47:51 +05:30

71 lines
2.4 KiB
Python

#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
laprensa.com.ni
'''
import datetime
import time
from calibre.web.feeds.news import BasicNewsRecipe
class LaPrensa_ni(BasicNewsRecipe):
title = 'La Prensa - Nicaragua'
__author__ = 'Darko Miletic'
description = 'LA PRENSA - EL Diario de los Nicaraguenses'
publisher = 'La Prensa'
oldest_article = 1
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
remove_javascript = True
language = 'es_NI'
months_es = ['enero', 'febrero', 'marzo', 'abril', 'mayo', 'junio',
'julio', 'agosto', 'septiembre', 'octubre', 'noviembre', 'diciembre']
current_month = months_es[datetime.date.today().month - 1]
current_index = time.strftime(
"http://www.laprensa.com.ni/archivo/%Y/" + current_month + "/%d/noticias/")
feeds = [(u'Portada', current_index + 'portada/')]
def print_version(self, url):
return url.replace('.shtml', '_print.shtml')
def preprocess_html(self, soup):
del soup.body['onload']
mtag = '<meta http-equiv="Content-Language" content="es-NI"/>'
soup.head.insert(0, mtag)
atag = soup.find('span', attrs={'class': 'mas_noticias'})
if atag:
atag.extract()
btag = soup.find('a', attrs={'href': '/archivo'})
if btag:
btag.extract()
for item in soup.findAll(style=True):
del item['style']
return soup
def parse_index(self):
totalfeeds = []
lfeeds = self.get_feeds()
for feedobj in lfeeds:
feedtitle, feedurl = feedobj
self.report_progress(0, _('Fetching feed') + ' %s...' %
(feedtitle if feedtitle else feedurl))
articles = []
soup = self.index_to_soup(feedurl)
for item in soup.findAll('a', attrs={'class': ['titular', 'titulonotamed']}):
description = ''
url = feedurl + item['href']
title = self.tag_to_string(item)
date = time.strftime(self.timefmt)
articles.append({
'title': title, 'date': date, 'url': url, 'description': description
})
totalfeeds.append((feedtitle, articles))
return totalfeeds