#!/usr/bin/env python2 __license__ = 'GPL v3' __copyright__ = '''2010, matek09, matek09@gmail.com Modified 2011, Mariusz Wolek Modified 2012, Artur Stachecki ''' from calibre.web.feeds.news import BasicNewsRecipe import re class Wprost(BasicNewsRecipe): EDITION = 0 FIND_LAST_FULL_ISSUE = True EXCLUDE_LOCKED = True ICO_BLOCKED = 'http://www.wprost.pl/G/layout2/ico_blocked.png' title = u'Wprost' __author__ = 'matek09' description = u'Popularny tygodnik ogólnopolski - Wprost. Najlepszy wśród polskich tygodników - opiniotwórczy - społeczno-informacyjny - społeczno-kulturalny.' # noqa encoding = 'ISO-8859-2' no_stylesheets = True language = 'pl' remove_javascript = True recursions = 0 remove_tags_before = dict(dict(name='div', attrs={'id': 'print-layer'})) remove_tags_after = dict(dict(name='div', attrs={'id': 'print-layer'})) ''' keep_only_tags =[] keep_only_tags.append(dict(name = 'table', attrs = {'id' : 'title-table'})) keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-header'})) keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-content'})) keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'def element-autor'})) ''' preprocess_regexps = [(re.compile(r'style="display: none;"'), lambda match: ''), (re.compile(r'display: block;'), lambda match: ''), (re.compile(r'\\\<\/table\>'), lambda match: ''), (re.compile(r'\'), lambda match: ''), (re.compile(r'\'), lambda match: ''), (re.compile(r'\
'), lambda match: ''), (re.compile(r'\