Add new recipes for The St. Petersburg Times and Clarin (thanks to Darko Miletic)

2025-07-09 03:04:10 -04:00 · 2008-12-01 10:24:45 -08:00 · 2008-12-01 10:24:45 -08:00 · 16cad7b1bc
commit 16cad7b1bc
parent 02c9864c6a
3 changed files with 82 additions and 1 deletions
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -15,7 +15,8 @@ recipe_modules = [
           'demorgen_be', 'de_standaard', 'ap', 'barrons', 'chr_mon', 'cnn', 'faznet',
           'jpost', 'jutarnji', 'nasa', 'reuters', 'spiegelde', 'wash_post', 'zeitde',
           'blic', 'novosti', 'danas', 'vreme', 'times_online', 'the_scotsman',
-           'nytimes_sub', 'security_watch', 'cyberpresse',
+           'nytimes_sub', 'security_watch', 'cyberpresse', 'st_petersburg_times',
           'clarin',
          ]
 import re, imp, inspect, time, os
--- a/src/calibre/web/feeds/recipes/clarin.py
+++ b/src/calibre/web/feeds/recipes/clarin.py
@ -0,0 +1,42 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
 '''
 clarin.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Clarin(BasicNewsRecipe):
    title                 = u'Clarin'
    __author__            = 'Darko Miletic'
    description           = 'Noticias de Argentina y mundo'
    oldest_article        = 2
    max_articles_per_feed = 100
    use_embedded_content  = False
    simultaneous_downloads = 1
    delay = 1
    remove_tags = [
                     dict(name='a'   , attrs={'class':'Imp'   })
                    ,dict(name='div' , attrs={'class':'Perma' })
                    ,dict(name='h1'  , text='Imprimir'         )
                  ]
    feeds = [ 
               (u'Ultimo Momento', u'http://www.clarin.com/diario/hoy/um/sumariorss.xml') 
              ,(u'El Pais'       , u'http://www.clarin.com/diario/hoy/elpais.xml'       ) 
              ,(u'Opinion'       , u'http://www.clarin.com/diario/hoy/opinion.xml'      ) 
              ,(u'El Mundo'      , u'http://www.clarin.com/diario/hoy/elmundo.xml'      ) 
              ,(u'Sociedad'      , u'http://www.clarin.com/diario/hoy/sociedad.xml'     ) 
              ,(u'La Ciudad'     , u'http://www.clarin.com/diario/hoy/laciudad.xml'     ) 
              ,(u'Policiales'    , u'http://www.clarin.com/diario/hoy/policiales.xml'   )
              ,(u'Deportes'      , u'http://www.clarin.com/diario/hoy/deportes.xml'     )              
            ]
    def get_article_url(self, article):
        artl  = article.get('link',  None)
        rest  = artl.partition('-0')[-1]
        lmain = rest.partition('.')[0]
        return 'http://www.servicios.clarin.com/notas/jsp/clarin/v9/notas/imprimir.jsp?pagid=' + lmain
--- a/src/calibre/web/feeds/recipes/st_petersburg_times.py
+++ b/src/calibre/web/feeds/recipes/st_petersburg_times.py
@ -0,0 +1,38 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
 '''
 sptimes.ru
 '''
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 class PetersburgTimes(BasicNewsRecipe):
    title                 = u'The St. Petersburg Times'
    __author__            = 'Darko Miletic'
    description           = 'News from Russia'
    oldest_article        = 7
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    INDEX = 'http://www.sptimes.ru'
    def parse_index(self):
        articles = []
        soup = self.index_to_soup(self.INDEX)
        for item in soup.findAll('a', attrs={'class':'story_link_o'}):
            if item.has_key('href'):
                url    = self.INDEX + item['href'].replace('action_id=2','action_id=100')
                title  = self.tag_to_string(item)
                c_date = strftime('%A, %d %B, %Y')
                description = ''
                articles.append({
                                 'title':title,
                                 'date':c_date,
                                 'url':url,
                                 'description':description
                                })
        return [(soup.head.title.string, articles)]