__license__ = 'GPL v3' __copyright__ = '2013, Darko Miletic ' ''' www.nezavisne.com ''' from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe class NezavisneNovine(BasicNewsRecipe): title = 'Nezavisne novine' __author__ = 'Darko Miletic' description = 'Nezavisne novine - Najnovije vijesti iz BiH, Srbije, Hrvatske, Crne Gore i svijeta' publisher = 'NIGP "DNN"' category = 'news, politics, Bosnia, Balcans' oldest_article = 2 max_articles_per_feed = 200 no_stylesheets = True encoding = 'utf8' use_embedded_content = False language = 'sr' remove_empty_feeds = True publication_type = 'newspaper' cover_url = strftime('http://pdf.nezavisne.com/slika/novina/nezavisne_novine.jpg?v=%Y%m%d') masthead_url = 'http://www.nezavisne.com/slika/osnova/nezavisne-novine-logo.gif' extra_css = """ body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em; display:block} """ conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher , 'language' : language } keep_only_tags = [dict(name='div', attrs={'class':'vijest'})] remove_tags_after = dict(name='div', attrs={'id':'wrap'}) remove_tags = [ dict(name=['meta','link','iframe','object']) ,dict(name='div', attrs={'id':'wrap'}) ] remove_attributes=['lang','xmlns:fb','xmlns:og'] feeds = [ (u'Novosti' , u'http://feeds.feedburner.com/Novosti-NezavisneNovine' ) ,(u'Posao' , u'http://feeds.feedburner.com/Posao-NezavisneNovine' ) ,(u'Sport' , u'http://feeds.feedburner.com/Sport-NezavisneNovine' ) ,(u'Komentar' , u'http://feeds.feedburner.com/Komentari-NezavisneNovine' ) ,(u'Umjetnost i zabava' , u'http://feeds.feedburner.com/UmjetnostIZabava-NezavisneNovine' ) ,(u'Život i stil' , u'http://feeds.feedburner.com/ZivotIStil-NezavisneNovine' ) ,(u'Auto' , u'http://feeds.feedburner.com/Auto-NezavisneNovine' ) ,(u'Nauka i tehnologija', u'http://feeds.feedburner.com/NaukaITehnologija-NezavisneNovine') ] def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] return soup