__license__ = 'GPL v3' __copyright__ = '2008-2009, Darko Miletic ' ''' vreme.com ''' import re from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe class Vreme(BasicNewsRecipe): title = 'Vreme' __author__ = 'Darko Miletic' description = 'Politicki Nedeljnik Srbije' publisher = 'NP Vreme d.o.o.' category = 'news, politics, Serbia' delay = 1 no_stylesheets = True needs_subscription = True INDEX = 'http://www.vreme.com' LOGIN = 'http://www.vreme.com/account/login.php?url=%2F' use_embedded_content = False encoding = 'utf-8' language = 'sr' publication_type = 'magazine' masthead_url = 'http://www.vreme.com/g/vreme-logo.gif' extra_css = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .heading1{font-family: sans1, sans-serif; font-size: x-large; font-weight: bold} .heading2{font-family: sans1, sans-serif; font-size: large; font-weight: bold} .toc-heading{font-family: sans1, sans-serif; font-size: small} .column-heading2{font-family: sans1, sans-serif; font-size: large} .column-heading1{font-family: sans1, sans-serif; font-size: x-large} .column-normal{font-family: sans1, sans-serif; font-size: medium} .large{font-family: sans1, sans-serif; font-size: large} ' conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher , 'language' : language , 'linearize_tables' : True } preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] remove_tags_before = dict(attrs={'class':'toc-heading'}) remove_tags_after = dict(attrs={'class':'footer' }) def get_browser(self): br = BasicNewsRecipe.get_browser() if self.username is not None and self.password is not None: br.open(self.LOGIN) br.select_form(name='f') br['username'] = self.username br['password'] = self.password br.submit() return br def parse_index(self): articles = [] soup = self.index_to_soup(self.INDEX) cover_item = soup.find('div',attrs={'id':'najava'}) if cover_item: self.cover_url = self.INDEX + cover_item.img['src'] for item in soup.findAll(['h3','h4']): description = u'' title_prefix = u'' feed_link = item.find('a') if feed_link and feed_link.has_key('href') and feed_link['href'].startswith('/cms/view.php'): url = self.INDEX + feed_link['href'] title = title_prefix + self.tag_to_string(feed_link) date = strftime(self.timefmt) articles.append({ 'title' :title ,'date' :date ,'url' :url ,'description':description }) return [('Nedeljnik Vreme', articles)] remove_tags = [ dict(name=['object','link']) ,dict(name='table',attrs={'xclass':'image'}) ] def print_version(self, url): return url + '&print=yes'