#!/usr/bin/env python __license__ = 'GPL v3' __copyright__ = '''2010, matek09, matek09@gmail.com Modified 2011, Mariusz Wolek Modified 2012, Artur Stachecki ''' from calibre.web.feeds.news import BasicNewsRecipe import re class Wprost(BasicNewsRecipe): title = u'Wprost (RSS)' __author__ = 'matek09' description = u'Portal informacyjny. Najświeższe wiadomości, najciekawsze komentarze i opinie. Blogi najlepszych publicystów.' encoding = 'ISO-8859-2' no_stylesheets = True language = 'pl' remove_javascript = True recursions = 0 use_embedded_content = False ignore_duplicate_articles = {'title', 'url'} remove_empty_feeds = True remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'})) remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'})) ''' keep_only_tags =[] keep_only_tags.append(dict(name = 'table', attrs = {'id' : 'title-table'})) keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-header'})) keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-content'})) keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'def element-autor'})) ''' preprocess_regexps = [(re.compile(r'style="display: none;"'), lambda match: ''), (re.compile(r'display: block;'), lambda match: ''), (re.compile(r'\\\<\/table\>'), lambda match: ''), (re.compile(r'\'), lambda match: ''), (re.compile(r'\'), lambda match: ''), (re.compile(r'\
'), lambda match: ''), (re.compile(r'\