__license__ = 'GPL v3' __copyright__ = '2012, Darko Miletic ' ''' www.strategic-culture.org ''' import time from calibre import strftime from calibre.web.feeds.recipes import BasicNewsRecipe class StrategicCulture(BasicNewsRecipe): title = 'Strategic Culture Foundation' __author__ = 'Darko Miletic' description = 'Online Journal' publisher = 'Strategic Culture Foundation' category = 'news, politics' oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True encoding = 'utf-8' use_embedded_content = False language = 'en' publication_type = 'newsportal' masthead_url = 'http://www.strategic-culture.org/img/logo.jpg' extra_css = ''' body{font-family: Arial, sans-serif} h1{font-family: "Times New Roman",Times,serif} img{margin-bottom: 0.8em} ''' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language } keep_only_tags = [ dict(name=['h1', 'p']), dict(name='div', attrs={'id': 'cke_pastebin'}) ] remove_tags = [dict(name=['object', 'link', 'base', 'meta', 'iframe'])] feeds = [ (u'News', u'http://www.strategic-culture.org/blocks/news.html'), (u'Politics', u'http://www.strategic-culture.org/rubrics/politics.html'), (u'Economics', u'http://www.strategic-culture.org/rubrics/economics.html'), (u'History & Culture', u'http://www.strategic-culture.org/rubrics/history-and-culture.html'), (u'Columnists', u'http://www.strategic-culture.org/rubrics/columnists.html') ] def print_version(self, url): return url.replace('-culture.org/news/', '-culture.org/pview/') def parse_index(self): totalfeeds = [] lfeeds = self.get_feeds() for feedobj in lfeeds: feedtitle, feedurl = feedobj self.report_progress(0, _('Fetching feed') + ' %s...' % (feedtitle if feedtitle else feedurl)) articles = [] soup = self.index_to_soup(feedurl) if feedurl.endswith('news.html'): clname = 'sini14' else: clname = 'h22' checker = [] for item in soup.findAll('a', attrs={'class': clname}): atag = item url = atag['href'] title = self.tag_to_string(atag) description = '' daypart = url.rpartition('/')[0] mpart, sep, day = daypart.rpartition('/') ypart, sep, month = mpart.rpartition('/') year = ypart.rpartition('/')[2] date = strftime("%a, %d %b %Y %H:%M:%S +0000", time.strptime(day + "/" + month + "/" + year, "%d/%m/%Y")) if url not in checker: checker.append(url) articles.append({ 'title': title, 'date': date, 'url': url, 'description': description }) totalfeeds.append((feedtitle, articles)) return totalfeeds