#!/usr/bin/env python # vim:fileencoding=utf-8 # # Copyright 2014 - 2017 Martin Račák # Copyright 2011 Miroslav Vasko # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see . import re from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe __copyright__ = ( '2014 - 2017 Martin Račák ,' '2011 Miroslav Vasko ') __license__ = 'GPL-3.0+' class Tyzden(BasicNewsRecipe): title = u'.týždeň' __author__ = u'Martin Račák, zemiak' description = u'Politicko-spoločenský týždenník.' publisher = 'www.tyzden.sk' publication_type = 'magazine' language = 'sk' needs_subscription = 'optional' use_embedded_content = False no_stylesheets = True issue_url = 'http://www.tyzden.sk/casopis/' keep_only_tags = [ { 'name': 'div', 'attrs': { 'class': 'section__content section__content--archive'}}, { 'name': 'article', 'attrs': { 'class': re.compile(r'\barticle\b')}}, ] extra_css = """.theme-highlight { color: #999; text-decoration: none; } .author-highlight { color: #bf1f10; text-decoration: none; } .article__content h2::before { content: '.'; display: inline; } .article__image-credit, .content-photo__image-credit { display: block; font: 12px "TheSerifSemiLight", arial; text-transform: uppercase; } .article__image-title, .image-title { padding-top: 2px; padding-bottom: 2px; margin: 0; font: 15px "TheSerifBold", arial; border-bottom: 2px solid #bf1f10; display: block; } .teaser__title { font: 18px "TheSerifBold", arial; color: #bf1f10; } .teaser__title .highlight { color: #000; } """ def get_browser(self): br = BasicNewsRecipe.get_browser(self) if self.username is not None and self.password is not None: br.open('https://crm.tyzden.sk/sign/in/') br.select_form(nr=0) br['username'] = self.username br['password'] = self.password br.submit() return br def parse_index(self): soup = self.index_to_soup(self.issue_url) cover_img = soup.findAll('img', 'teaser__image')[-1] if cover_img is not None: self.cover_url = cover_img['src'] feeds = [] teasers = soup.findAll('div', {'class': re.compile(r'\bteaser--list\b')}) for teaser in teasers: section = self.tag_to_string(teaser.find('a', 'theme-heading__wrapper')) article_title = self.tag_to_string( teaser.find('h1', {'class': re.compile(r'\bteaser__title\b')})) article_link = teaser.find('a', 'teaser__link--main') article = { 'title': article_title, 'url': article_link['href'], 'date': strftime(' %a, %d %b')} if not feeds: # First iteration. feeds.append((section, [article])) continue last_section, last_articles = feeds[-1] if section == last_section: last_articles.append(article) else: feeds.append((section, [article])) return feeds