__license__ = 'GPL v3' __copyright__ = '2010, Darko Miletic ' ''' akter.co.rs ''' import re from calibre.web.feeds.news import BasicNewsRecipe class Akter(BasicNewsRecipe): title = 'AKTER' __author__ = 'Darko Miletic' description = 'AKTER - nedeljni politicki magazin savremene Srbije' publisher = 'Akter Media Group d.o.o.' category = 'vesti, online vesti, najnovije vesti, politika, sport, ekonomija, biznis, finansije, berza, kultura, zivot, putovanja, auto, automobili, tehnologija, politicki magazin, dogadjaji, desavanja, lifestyle, zdravlje, zdravstvo, vest, novine, nedeljnik, srbija, novi sad, vojvodina, svet, drustvo, zabava, republika srpska, beograd, intervju, komentar, reportaza, arhiva vesti, news, serbia, politics' oldest_article = 8 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False encoding = 'utf-8' masthead_url = 'http://www.akter.co.rs/templates/gk_thenews2/images/style2/logo.png' language = 'sr' publication_type = 'magazine' remove_empty_feeds = True PREFIX = 'http://www.akter.co.rs' extra_css = """ @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body{font-family: Arial,Helvetica,sans1,sans-serif} .color-2{display:block; margin-bottom: 10px; padding: 5px, 10px; border-left: 1px solid #D00000; color: #D00000} img{margin-bottom: 0.8em} """ conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher , 'language' : language , 'linearize_tables' : True } preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] feeds = [ (u'Politika' , u'http://www.akter.co.rs/index.php/politikaprint.html' ) ,(u'Ekonomija' , u'http://www.akter.co.rs/index.php/ekonomijaprint.html') ,(u'Life&Style' , u'http://www.akter.co.rs/index.php/lsprint.html' ) ,(u'Sport' , u'http://www.akter.co.rs/index.php/sportprint.html' ) ] def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] return self.adeify_images(soup) def print_version(self, url): return url + '?tmpl=component&print=1&page=' def parse_index(self): totalfeeds = [] lfeeds = self.get_feeds() for feedobj in lfeeds: feedtitle, feedurl = feedobj self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) articles = [] soup = self.index_to_soup(feedurl) for item in soup.findAll(attrs={'class':['sectiontableentry1','sectiontableentry2']}): link = item.find('a') url = self.PREFIX + link['href'] title = self.tag_to_string(link) articles.append({ 'title' :title ,'date' :'' ,'url' :url ,'description':'' }) totalfeeds.append((feedtitle, articles)) return totalfeeds