__license__ = 'GPL v3' __copyright__ = '2008-2012, Darko Miletic ' ''' www.nin.co.rs ''' import re from calibre.web.feeds.news import BasicNewsRecipe class Nin(BasicNewsRecipe): title = 'NIN online' __author__ = 'Darko Miletic' description = 'Nedeljne Informativne Novine' publisher = 'NIN d.o.o. - Ringier d.o.o.' category = 'news, politics, Serbia' no_stylesheets = True oldest_article = 180 encoding = 'utf-8' needs_subscription = True remove_empty_feeds = True auto_cleanup = False PREFIX = 'http://www.nin.co.rs' INDEX = PREFIX + '/?change_lang=ls' use_embedded_content = False language = 'sr' publication_type = 'magazine' masthead_url = 'http://www.nin.co.rs/img/logo_print.jpg' extra_css = """ @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Verdana, Lucida, sans1, sans-serif} .article_description{font-family: Verdana, Lucida, sans1, sans-serif} .artTitle{font-size: x-large; font-weight: bold; color: #900} .izjava{font-size: x-large; font-weight: bold} .columnhead{font-size: small; font-weight: bold;} img{margin-top:0.5em; margin-bottom: 0.7em; display: block} b{margin-top: 1em} """ conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True } preprocess_regexps = [ (re.compile(r'
.*', re.DOTALL | re.IGNORECASE), lambda match: ''), (re.compile(u'\u0110'), lambda match: u'\u00D0') ] def get_browser(self): br = BasicNewsRecipe.get_browser(self) if self.username is not None and self.password is not None: br.open(self.INDEX) br.select_form(name='form1') br['login_name'] = self.username br['login_password'] = self.password br.submit() return br remove_tags_before = dict(name='div', attrs={'class': 'titleFont'}) remove_tags_after = dict(name='div', attrs={'class': 'standardFont'}) remove_tags = [dict(name=['object', 'link', 'iframe', 'meta', 'base'])] remove_attributes = ['border', 'background', 'height', 'width', 'align', 'valign'] def get_cover_url(self): cover_url = None soup = self.index_to_soup(self.INDEX) cover = soup.find('img', attrs={'class': 'issueImg'}) if cover: return self.PREFIX + cover['src'] return cover_url feeds = [(u'NIN Online', u'http://www.nin.co.rs/misc/rss.php?feed=RSS2.0')] def print_version(self, url): return url + '&pf=1'