__license__ = 'GPL v3' __copyright__ = '2008-2012, Darko Miletic ' ''' vreme.com ''' import re from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe class Vreme(BasicNewsRecipe): title = 'Vreme' __author__ = 'Darko Miletic' description = 'Politicki Nedeljnik Srbije' publisher = 'NP Vreme d.o.o.' category = 'news, politics, Serbia' delay = 1 no_stylesheets = True needs_subscription = True INDEX = 'http://www.vreme.com' LOGIN = 'http://www.vreme.com/account/login.php?url=%2F' use_embedded_content = False encoding = 'utf-8' language = 'sr' publication_type = 'magazine' masthead_url = 'http://www.vreme.com/g/vreme-logo.gif' extra_css = """ @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} .column-heading2{font-family: sans1, sans-serif; font-size: large} .column-heading1{font-family: sans1, sans-serif; font-size: x-large} .column-normal{font-family: sans1, sans-serif; font-size: medium} .large{font-family: sans1, sans-serif; font-size: large} """ conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True } preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] remove_tags_before = dict(attrs={'class': 'toc-heading'}) remove_tags_after = dict(attrs={'class': 'footer'}) def get_browser(self): br = BasicNewsRecipe.get_browser(self) if self.username is not None and self.password is not None: br.open(self.LOGIN) br.select_form(name='f') br['username'] = self.username br['password'] = self.password br.submit() return br def parse_index(self): articles = [] soup = self.index_to_soup(self.INDEX) cover_item = soup.find('div', attrs={'id': 'najava'}) if cover_item: self.cover_url = self.INDEX + cover_item.img['src'] for item in soup.findAll(['h3', 'h4']): description = u'' title_prefix = u'' feed_link = item.find('a', href=True) if feed_link and (feed_link['href'].startswith('cms/view.php') or feed_link['href'].startswith('/cms/view.php')): if feed_link['href'].startswith('cms/view.php'): url = self.INDEX + '/' + feed_link['href'] else: url = self.INDEX + feed_link['href'] title = title_prefix + self.tag_to_string(feed_link) date = strftime(self.timefmt) articles.append({ 'title': title, 'date': date, 'url': url, 'description': description }) return [('Nedeljnik Vreme', articles)] remove_tags = [ dict(name=['object', 'link']), dict( name='table', attrs={'xclass': 'image'}) ] def print_version(self, url): return url + '&print=yes'