#!/usr/bin/env python __license__ = 'GPL v3' __copyright__ = '2009, Darko Miletic ' ''' laprensa.com.ni ''' import datetime import time from calibre.web.feeds.news import BasicNewsRecipe class LaPrensa_ni(BasicNewsRecipe): title = 'La Prensa - Nicaragua' __author__ = 'Darko Miletic' description = 'LA PRENSA - EL Diario de los Nicaraguenses' publisher = 'La Prensa' oldest_article = 1 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False encoding = 'cp1252' remove_javascript = True language = 'es_NI' months_es = ['enero', 'febrero', 'marzo', 'abril', 'mayo', 'junio', 'julio', 'agosto', 'septiembre', 'octubre', 'noviembre', 'diciembre'] current_month = months_es[datetime.date.today().month - 1] current_index = time.strftime( "http://www.laprensa.com.ni/archivo/%Y/" + current_month + "/%d/noticias/") feeds = [(u'Portada', current_index + 'portada/')] def print_version(self, url): return url.replace('.shtml', '_print.shtml') def preprocess_html(self, soup): del soup.body['onload'] mtag = '' soup.head.insert(0, mtag) atag = soup.find('span', attrs={'class': 'mas_noticias'}) if atag: atag.extract() btag = soup.find('a', attrs={'href': '/archivo'}) if btag: btag.extract() for item in soup.findAll(style=True): del item['style'] return soup def parse_index(self): totalfeeds = [] lfeeds = self.get_feeds() for feedobj in lfeeds: feedtitle, feedurl = feedobj self.report_progress(0, _('Fetching feed') + ' %s...' % (feedtitle if feedtitle else feedurl)) articles = [] soup = self.index_to_soup(feedurl) for item in soup.findAll('a', attrs={'class': ['titular', 'titulonotamed']}): description = '' url = feedurl + item['href'] title = self.tag_to_string(item) date = time.strftime(self.timefmt) articles.append({ 'title': title, 'date': date, 'url': url, 'description': description }) totalfeeds.append((feedtitle, articles)) return totalfeeds