#!/usr/bin/env python2 __license__ = 'GPL v3' __copyright__ = '2010, Brendan Sleight ' ''' impreso.milenio.com ''' from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe import datetime class Milenio(BasicNewsRecipe): title = u'Milenio-diario' __author__ = 'Bmsleight' language = 'es_MX' description = 'Milenio-diario' oldest_article = 10 max_articles_per_feed = 100 no_stylesheets = False index = 'http://impreso.milenio.com' keep_only_tags = [ dict(name='div', attrs={'class': 'content'}) ] def parse_index(self): # "%m/%d/%Y" # http://impreso.milenio.com/Nacional/2010/09/01/ totalfeeds = [] soup = self.index_to_soup( self.index + "/Nacional/" + datetime.date.today().strftime("%Y/%m/%d")) maincontent = soup.find('div', attrs={'class': 'content'}) mfeed = [] if maincontent: for itt in maincontent.findAll('a', href=True): if "/node/" in str(itt['href']): url = self.index + itt['href'] title = self.tag_to_string(itt) description = '' date = strftime(self.timefmt) mfeed.append({ 'title': title, 'date': date, 'url': url, 'description': description }) totalfeeds.append(('Articles', mfeed)) return totalfeeds