__copyright__ = '2012, Micha\u0142 ' ''' Racjonalista.pl ''' from calibre.web.feeds.news import BasicNewsRecipe import re class Racjonalista(BasicNewsRecipe): __author__ = u'Micha\u0142 ' publisher = u'Fundacja Wolnej My\u015bli' title = u'Racjonalista.pl' description = u'Racjonalista.pl' category = 'newspaper' language = 'pl' encoding = 'iso-8859-2' oldest_article = 7 max_articles_per_feed = 20 remove_javascript = True no_stylesheets = True use_embedded_content = False simultaneous_downloads = 2 timeout = 30 cover_url = 'http://www.racjonalista.pl/img/uimg/rac.gif' feeds = [(u'Racjonalista.pl', u'http://www.racjonalista.pl/rss.php')] match_regexps = [r'kk\.php'] def print_version(self, url): return url.replace('/s,', '/t,') extra_css = 'h2 {font: serif large} .cytat {text-align: right}' remove_attributes = ['target', 'width', 'height'] preprocess_regexps = [ (re.compile(i[0], re.DOTALL), i[1]) for i in [(r']*> 

', lambda match: ''), (r' ', lambda match: ' '), (r']+>', lambda match: ''), (r']+>', lambda match: ''), (r'', lambda match: ''), (r'[^<]+)', lambda match: '' + match.group('a') + ''), (r'
(?P[^<]+)
', lambda match: '

' + match.group('t') + '

'), (r'
', lambda match: ''), # noqa (r'
', lambda match: ''), (r'
', lambda match: ''), (r']+>(?P

[^<]+)', lambda match: '' + match.group('p') + ''), (r']+>(?P[^<]+)', lambda match: match.group('a')), (r'Orygin[^<]+', lambda match: ''), (r'Poka[^<]+', lambda match: '')] ]