#!/usr/bin/env python2 __license__ = 'GPL v3' __author__ = 'teepel ' ''' http://prawica.net ''' from calibre.web.feeds.news import BasicNewsRecipe class prawica_recipe(BasicNewsRecipe): title = u'prawica.net' __author__ = 'teepel ' language = 'pl' description = 'WiadomoĊ›ci ze strony prawica.net' INDEX = 'http://prawica.net/' remove_empty_feeds = True oldest_article = 1 max_articles_per_feed = 100 remove_javascript = True no_stylesheets = True feeds = [(u'all', u'http://prawica.net/all/feed')] keep_only_tags = [] # this line should show title of the article, but it doesnt work keep_only_tags.append(dict(name='h1', attrs={'class': 'print-title'})) keep_only_tags.append(dict(name='div', attrs={'class': 'content'})) remove_tags = [] remove_tags.append(dict(name='div', attrs={ 'class': 'field field-type-viewfield field-field-autor2'})) remove_tags.append(dict(name='div', attrs={ 'class': 'field field-type-viewfield field-field-publikacje-autora'})) remove_tags.append(dict(name='div', attrs={ 'id': 'rate-widget-2 rate-widget clear-block rate-average rate-widget-fivestar rate-daa7512627f21dcf15e0af47e5279f0e rate-processed'})) remove_tags_after = [ (dict(name='div', attrs={'class': 'field-label-inline-first'}))] def print_version(self, url): return url.replace('http://prawica.net/', 'http://prawica.net/print/')