calibre/recipes/strategic_culture.recipe


__license__   = 'GPL v3'
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'

'''
www.strategic-culture.org
'''

import time
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe

class StrategicCulture(BasicNewsRecipe):
    title                 = 'Strategic Culture Foundation'
    __author__            = 'Darko Miletic'
    description           = 'Online Journal'
    publisher             = 'Strategic Culture Foundation'
    category              = 'news, politics'
    oldest_article        = 7
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = False
    language              = 'en'
    publication_type      = 'newsportal'
    masthead_url          = 'http://www.strategic-culture.org/img/logo.jpg'
    extra_css             = '''
                             body{font-family: Arial, sans-serif}
                             h1{font-family: "Times New Roman",Times,serif}
                             img{margin-bottom: 0.8em}
                            '''

    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
                        , 'language'         : language
                        }

    keep_only_tags = [
                      dict(name=['h1','p'])
                      ,dict(name='div', attrs={'id':'cke_pastebin'})
                     ]

    remove_tags = [dict(name=['object','link','base','meta','iframe'])]

    feeds = [
               (u'News'             , u'http://www.strategic-culture.org/blocks/news.html'                )
              ,(u'Politics'         , u'http://www.strategic-culture.org/rubrics/politics.html'           )
              ,(u'Economics'        , u'http://www.strategic-culture.org/rubrics/economics.html'          )
              ,(u'History & Culture', u'http://www.strategic-culture.org/rubrics/history-and-culture.html')
              ,(u'Columnists'       , u'http://www.strategic-culture.org/rubrics/columnists.html'         )
            ]

    def print_version(self, url):
        return url.replace('-culture.org/news/','-culture.org/pview/')

    def parse_index(self):
        totalfeeds = []
        lfeeds = self.get_feeds()
        for feedobj in lfeeds:
            feedtitle, feedurl = feedobj
            self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
            articles = []
            soup = self.index_to_soup(feedurl)
            if feedurl.endswith('news.html'):
               clname = 'sini14'
            else:
               clname = 'h22'
            checker = []
            for item in soup.findAll('a', attrs={'class':clname}):
                atag          = item
                url           = atag['href']
                title         = self.tag_to_string(atag)
                description   = ''
                daypart = url.rpartition('/')[0]
                mpart,sep,day = daypart.rpartition('/')
                ypart,sep,month = mpart.rpartition('/')
                year = ypart.rpartition('/')[2]
                date          = strftime("%a, %d %b %Y %H:%M:%S +0000", time.strptime(day + "/" + month + "/" + year, "%d/%m/%Y"))
                if url not in checker:
                    checker.append(url)
                    articles.append({
                                          'title'      :title
                                         ,'date'       :date
                                         ,'url'        :url
                                         ,'description':description
                                        })
            totalfeeds.append((feedtitle, articles))
        return totalfeeds