diff --git a/recipes/ba_herald.recipe b/recipes/ba_herald.recipe new file mode 100644 index 0000000000..e966fd5676 --- /dev/null +++ b/recipes/ba_herald.recipe @@ -0,0 +1,82 @@ +__license__ = 'GPL v3' +__copyright__ = '2012, Darko Miletic ' +''' +www.buenosairesherald.com +''' + +import re +from calibre import strftime +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup + +class BuenosAiresHerald(BasicNewsRecipe): + title = 'Buenos Aires Herald' + __author__ = 'Darko Miletic' + description = 'A world of information in a few words' + publisher = 'Editorial Nefir S.A.' + category = 'news, politics, Argentina' + oldest_article = 2 + max_articles_per_feed = 200 + no_stylesheets = True + encoding = 'utf8' + use_embedded_content = False + language = 'en_AR' + remove_empty_feeds = True + publication_type = 'newspaper' + masthead_url = 'http://www.buenosairesherald.com/img/logo.jpg' + INDEX = 'http://www.buenosairesherald.com' + extra_css = """ + body{font-family: Arial,Helvetica,sans-serif } + img{margin-bottom: 0.4em; display:block} + h1{font-family: Georgia,serif} + #fecha{text-align: right; font-size: small} + """ + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + remove_tags = [dict(name=['meta','link','iframe'])] + keep_only_tags = [dict(attrs={'class':'nota_texto p'})] + + + feeds = [ + (u'Argentina' , u'http://www.buenosairesherald.com/argentina' ) + ,(u'World' , u'http://www.buenosairesherald.com/world' ) + ,(u'Latin America' , u'http://www.buenosairesherald.com/latin-america' ) + ,(u'Entertainment' , u'http://www.buenosairesherald.com/entertainment' ) + ,(u'Sports' , u'http://www.buenosairesherald.com/sports' ) + ] + + def print_version(self, url): + artidraw = url.rpartition('/article/')[2] + artid = artidraw.partition('/')[0] + return 'http://www.buenosairesherald.com/articles/print.aspx?ix=' + artid + + + def parse_index(self): + totalfeeds = [] + lfeeds = self.get_feeds() + for feedobj in lfeeds: + feedtitle, feedurl = feedobj + self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) + articles = [] + soup = self.index_to_soup(feedurl) + for item in soup.findAll('div', attrs={'class':'nota_texto_seccion'}): + description = self.tag_to_string(item.h2) + atag = item.h2.find('a') + if atag and atag.has_key('href'): + url = self.INDEX + atag['href'] + title = description + date = strftime(self.timefmt) + articles.append({ + 'title' :title + ,'date' :date + ,'url' :url + ,'description':description + }) + totalfeeds.append((feedtitle, articles)) + return totalfeeds diff --git a/recipes/icons/ba_herald.png b/recipes/icons/ba_herald.png new file mode 100644 index 0000000000..2b02a4ae93 Binary files /dev/null and b/recipes/icons/ba_herald.png differ