#!/usr/bin/env python2 # vim:fileencoding=utf-8 # # Copyright 2014 - 2015 Martin Račák # Copyright 2011 Miroslav Vasko # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see . __license__ = 'GPL v3' __copyright__ = ('2014 - 2015 Martin Račák ,' '2011 Miroslav Vasko ') ''' .týždeň - iný pohľad na spoločnosť ''' from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe class Tyzden(BasicNewsRecipe): title = u'.týždeň' __author__ = u'Martin Račák, zemiak' description = u'Politicko-spoločenský týždenník.' publisher = 'www.tyzden.sk' publication_type = 'magazine' language = 'sk' needs_subscription = 'optional' use_embedded_content = False no_stylesheets = True base_url = 'http://www.tyzden.sk' piano_param = '?piano_d=1' issue_url = base_url + '/casopis/' keep_only_tags = [ dict(name='div', attrs={'class': 'detail__title article__title'}), dict(name='div', attrs={'class': 'article'}), ] def get_browser(self): br = BasicNewsRecipe.get_browser(self) br.open(self.base_url + '/' + self.piano_param) br.set_cookie('pianovisitkey=""') if self.username is not None and self.password is not None: br.select_form(nr=2) br['username'] = self.username br['password'] = self.password br.submit() return br def find_sections(self): soup = self.index_to_soup(self.issue_url) img_wrapper = soup.find('div', 'mag__title-img-wrapper') if img_wrapper is not None: self.cover_url = img_wrapper.img['src'] for section in soup.findAll('div', 'mag__section'): section_title = section.find('span', 'mag__section-title') yield (self.tag_to_string(section_title), section) def find_articles(self, soup): for title in soup.findAll('h1', 'teaser__title'): yield { 'title': self.tag_to_string(title.a), 'url': title.a['href'], 'date': strftime(' %a, %d %b'), } def parse_index(self): feeds = [] for title, section in self.find_sections(): feeds.append((title, list(self.find_articles(section)))) return feeds