__license__ = 'GPL v3' __copyright__ = '2009-2010, Darko Miletic ' ''' e-novine.com ''' import re from calibre.web.feeds.news import BasicNewsRecipe class E_novine(BasicNewsRecipe): title = 'E-Novine' __author__ = 'Darko Miletic' description = 'News from Serbia' publisher = 'E-novine' category = 'news, politics, Balcans' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True encoding = 'utf-8' use_embedded_content = False language = 'sr' publication_type = 'newsportal' masthead_url = 'http://www.e-novine.com/themes/e_novine/img/logo.gif' extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body{font-family: Arial,Helvetica,sans1,sans-serif} img{float: none; margin-bottom: 0.8em} ' conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher , 'language' : language } preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] keep_only_tags = [ dict(name='div', attrs={'class':'article_head'}) ,dict(name='div', attrs={'id':'article_body'}) ] remove_tags = [ dict(name=['object','link','embed','iframe']) ,dict(attrs={'id':'box_article_tools'}) ] remove_attributes = ['height','width','lang'] feeds = [(u'Sve vesti', u'http://www.e-novine.com/feed/index.1.rss' )] def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] return self.adeify_images(soup) def print_version(self, url): return url + '?print'