Add new recipes for The St. Petersburg Times and Clarin (thanks to Darko Miletic)

2025-07-09 03:04:10 -04:00 · 2008-12-01 10:24:45 -08:00 · 2008-12-01 10:24:45 -08:00 · 16cad7b1bc
commit 16cad7b1bc
parent 02c9864c6a
3 changed files with 82 additions and 1 deletions
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -15,7 +15,8 @@ recipe_modules = [
           'demorgen_be', 'de_standaard', 'ap', 'barrons', 'chr_mon', 'cnn', 'faznet',
           'jpost', 'jutarnji', 'nasa', 'reuters', 'spiegelde', 'wash_post', 'zeitde',
           'blic', 'novosti', 'danas', 'vreme', 'times_online', 'the_scotsman',
-           'nytimes_sub', 'security_watch', 'cyberpresse',
+           'nytimes_sub', 'security_watch', 'cyberpresse', 'st_petersburg_times',
+           'clarin',
          ]

 import re, imp, inspect, time, os
--- a/src/calibre/web/feeds/recipes/clarin.py
+++ b/src/calibre/web/feeds/recipes/clarin.py
@ -0,0 +1,42 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+'''
+clarin.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Clarin(BasicNewsRecipe):
+    title                 = u'Clarin'
+    __author__            = 'Darko Miletic'
+    description           = 'Noticias de Argentina y mundo'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    use_embedded_content  = False
+    simultaneous_downloads = 1
+    delay = 1
+    
+    remove_tags = [
+                     dict(name='a'   , attrs={'class':'Imp'   })
+                    ,dict(name='div' , attrs={'class':'Perma' })
+                    ,dict(name='h1'  , text='Imprimir'         )
+                  ]
+    
+    feeds = [ 
+               (u'Ultimo Momento', u'http://www.clarin.com/diario/hoy/um/sumariorss.xml') 
+              ,(u'El Pais'       , u'http://www.clarin.com/diario/hoy/elpais.xml'       ) 
+              ,(u'Opinion'       , u'http://www.clarin.com/diario/hoy/opinion.xml'      ) 
+              ,(u'El Mundo'      , u'http://www.clarin.com/diario/hoy/elmundo.xml'      ) 
+              ,(u'Sociedad'      , u'http://www.clarin.com/diario/hoy/sociedad.xml'     ) 
+              ,(u'La Ciudad'     , u'http://www.clarin.com/diario/hoy/laciudad.xml'     ) 
+              ,(u'Policiales'    , u'http://www.clarin.com/diario/hoy/policiales.xml'   )
+              ,(u'Deportes'      , u'http://www.clarin.com/diario/hoy/deportes.xml'     )              
+            ]
+    
+    def get_article_url(self, article):
+        artl  = article.get('link',  None)
+        rest  = artl.partition('-0')[-1]
+        lmain = rest.partition('.')[0]
+        return 'http://www.servicios.clarin.com/notas/jsp/clarin/v9/notas/imprimir.jsp?pagid=' + lmain
--- a/src/calibre/web/feeds/recipes/st_petersburg_times.py
+++ b/src/calibre/web/feeds/recipes/st_petersburg_times.py
@ -0,0 +1,38 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+'''
+sptimes.ru
+'''
+
+from calibre import strftime
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class PetersburgTimes(BasicNewsRecipe):
+    title                 = u'The St. Petersburg Times'
+    __author__            = 'Darko Miletic'
+    description           = 'News from Russia'
+    oldest_article        = 7
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    INDEX = 'http://www.sptimes.ru'
+    
+    def parse_index(self):
+        articles = []
+        soup = self.index_to_soup(self.INDEX)
+        
+        for item in soup.findAll('a', attrs={'class':'story_link_o'}):
+            if item.has_key('href'):
+                url    = self.INDEX + item['href'].replace('action_id=2','action_id=100')
+                title  = self.tag_to_string(item)
+                c_date = strftime('%A, %d %B, %Y')
+                description = ''
+                articles.append({
+                                 'title':title,
+                                 'date':c_date,
+                                 'url':url,
+                                 'description':description
+                                })
+        return [(soup.head.title.string, articles)]