calibre/recipes/argnoticias.recipe


__license__   = 'GPL v3'
__copyright__ = '2013, Darko Miletic <darko.miletic at gmail.com>'

'''
www.argnoticias.com
'''

import time
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe

class ArgNoticias(BasicNewsRecipe):
    title                 = 'ARG Noticias'
    __author__            = 'Darko Miletic'
    description           = 'Ultimas noticias de Argentina'
    publisher             = 'ARG Noticias'
    category              = 'news, politics, Argentina'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = False
    masthead_url          = 'http://www.argnoticias.com/images/arg-logo-footer.png'
    language              = 'es_AR'
    publication_type      = 'newsportal'
    INDEX                 = 'http://www.argnoticias.com'
    extra_css             = ''

    conversion_options = {
                          'comment'  : description
                        , 'tags'     : category
                        , 'publisher': publisher
                        , 'language' : language
                        }

    keep_only_tags = [dict(name='div', attrs={'class':['itemHeader','itemBody','itemAuthorBlock']})]

    remove_tags = [
                    dict(name=['object','link','base','iframe']),
                    dict(name='div', attrs={'class':['b2jsocial_parent','itemSocialSharing']})
                  ]

    feeds = [
               (u'Politica'    , u'http://www.argnoticias.com/index.php/politica'    )
              ,(u'Economia'    , u'http://www.argnoticias.com/index.php/economia'    )
              ,(u'Sociedad'    , u'http://www.argnoticias.com/index.php/sociedad'    )
              ,(u'Mundo'       , u'http://www.argnoticias.com/index.php/mundo'       )
              ,(u'Deportes'    , u'http://www.argnoticias.com/index.php/deportes'    )
              ,(u'Espectaculos', u'http://www.argnoticias.com/index.php/espectaculos')
              ,(u'Tendencias'  , u'http://www.argnoticias.com/index.php/tendencias'  )
            ]

    def parse_index(self):
        totalfeeds = []
        lfeeds = self.get_feeds()
        checker  = []
        for feedobj in lfeeds:
            feedtitle, feedurl = feedobj
            self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
            articles = []
            soup = self.index_to_soup(feedurl)
            for item in soup.findAll('div', attrs={'class':'Nota'}):
                atag        = item.find('a', attrs={'class':'moduleItemTitle'})
                ptag        = item.find('div', attrs={'class':'moduleItemIntrotext'})
                url         = self.INDEX + atag['href']
                title       = self.tag_to_string(atag)
                description = self.tag_to_string(ptag)
                date  = strftime("%a, %d %b %Y %H:%M:%S +0000",time.gmtime())
                if url not in checker:
                    checker.append(url)
                    articles.append({
                                          'title'      :title
                                         ,'date'       :date
                                         ,'url'        :url
                                         ,'description':description
                                        })

            for item in soup.findAll('li'):
                atag        = item.find('a', attrs={'class':'moduleItemTitle'})
                if atag:
                    ptag        = item.find('div', attrs={'class':'moduleItemIntrotext'})
                    url         = self.INDEX + atag['href']
                    title       = self.tag_to_string(atag)
                    description = self.tag_to_string(ptag)
                    date  = strftime("%a, %d %b %Y %H:%M:%S +0000",time.gmtime())
                    if url not in checker:
                        checker.append(url)
                        articles.append({
                                              'title'      :title
                                             ,'date'       :date
                                             ,'url'        :url
                                             ,'description':description
                                            })
            totalfeeds.append((feedtitle, articles))
        return totalfeeds