From 16cad7b1bc0accc11ba19cf168ef926bd1ea2e70 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 1 Dec 2008 10:24:45 -0800 Subject: [PATCH] Add new recipes for The St. Petersburg Times and Clarin (thanks to Darko Miletic) --- src/calibre/web/feeds/recipes/__init__.py | 3 +- src/calibre/web/feeds/recipes/clarin.py | 42 +++++++++++++++++++ .../web/feeds/recipes/st_petersburg_times.py | 38 +++++++++++++++++ 3 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 src/calibre/web/feeds/recipes/clarin.py create mode 100644 src/calibre/web/feeds/recipes/st_petersburg_times.py diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index 86fe996a7d..a18d42e817 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -15,7 +15,8 @@ recipe_modules = [ 'demorgen_be', 'de_standaard', 'ap', 'barrons', 'chr_mon', 'cnn', 'faznet', 'jpost', 'jutarnji', 'nasa', 'reuters', 'spiegelde', 'wash_post', 'zeitde', 'blic', 'novosti', 'danas', 'vreme', 'times_online', 'the_scotsman', - 'nytimes_sub', 'security_watch', 'cyberpresse', + 'nytimes_sub', 'security_watch', 'cyberpresse', 'st_petersburg_times', + 'clarin', ] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/clarin.py b/src/calibre/web/feeds/recipes/clarin.py new file mode 100644 index 0000000000..e4f3c4b501 --- /dev/null +++ b/src/calibre/web/feeds/recipes/clarin.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2008, Darko Miletic ' +''' +clarin.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Clarin(BasicNewsRecipe): + title = u'Clarin' + __author__ = 'Darko Miletic' + description = 'Noticias de Argentina y mundo' + oldest_article = 2 + max_articles_per_feed = 100 + use_embedded_content = False + simultaneous_downloads = 1 + delay = 1 + + remove_tags = [ + dict(name='a' , attrs={'class':'Imp' }) + ,dict(name='div' , attrs={'class':'Perma' }) + ,dict(name='h1' , text='Imprimir' ) + ] + + feeds = [ + (u'Ultimo Momento', u'http://www.clarin.com/diario/hoy/um/sumariorss.xml') + ,(u'El Pais' , u'http://www.clarin.com/diario/hoy/elpais.xml' ) + ,(u'Opinion' , u'http://www.clarin.com/diario/hoy/opinion.xml' ) + ,(u'El Mundo' , u'http://www.clarin.com/diario/hoy/elmundo.xml' ) + ,(u'Sociedad' , u'http://www.clarin.com/diario/hoy/sociedad.xml' ) + ,(u'La Ciudad' , u'http://www.clarin.com/diario/hoy/laciudad.xml' ) + ,(u'Policiales' , u'http://www.clarin.com/diario/hoy/policiales.xml' ) + ,(u'Deportes' , u'http://www.clarin.com/diario/hoy/deportes.xml' ) + ] + + def get_article_url(self, article): + artl = article.get('link', None) + rest = artl.partition('-0')[-1] + lmain = rest.partition('.')[0] + return 'http://www.servicios.clarin.com/notas/jsp/clarin/v9/notas/imprimir.jsp?pagid=' + lmain \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/st_petersburg_times.py b/src/calibre/web/feeds/recipes/st_petersburg_times.py new file mode 100644 index 0000000000..8734136752 --- /dev/null +++ b/src/calibre/web/feeds/recipes/st_petersburg_times.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2008, Darko Miletic ' +''' +sptimes.ru +''' + +from calibre import strftime +from calibre.web.feeds.news import BasicNewsRecipe + +class PetersburgTimes(BasicNewsRecipe): + title = u'The St. Petersburg Times' + __author__ = 'Darko Miletic' + description = 'News from Russia' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + INDEX = 'http://www.sptimes.ru' + + def parse_index(self): + articles = [] + soup = self.index_to_soup(self.INDEX) + + for item in soup.findAll('a', attrs={'class':'story_link_o'}): + if item.has_key('href'): + url = self.INDEX + item['href'].replace('action_id=2','action_id=100') + title = self.tag_to_string(item) + c_date = strftime('%A, %d %B, %Y') + description = '' + articles.append({ + 'title':title, + 'date':c_date, + 'url':url, + 'description':description + }) + return [(soup.head.title.string, articles)] \ No newline at end of file