calibre/recipes/prawica_net.recipe
Kovid Goyal c73616b26c ...
2012-11-23 08:34:17 +05:30

41 lines
1.5 KiB
Python

#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'teepel <teepel44@gmail.com>'
'''
http://prawica.net
'''
from calibre.web.feeds.news import BasicNewsRecipe
class prawica_recipe(BasicNewsRecipe):
title = u'prawica.net'
__author__ = 'teepel <teepel44@gmail.com>'
language = 'pl'
description ='Wiadomości ze strony prawica.net'
INDEX='http://prawica.net/'
remove_empty_feeds= True
oldest_article = 1
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets=True
feeds = [(u'all', u'http://prawica.net/all/feed')]
keep_only_tags =[]
#this line should show title of the article, but it doesnt work
keep_only_tags.append(dict(name = 'h1', attrs = {'class' : 'print-title'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'content'}))
remove_tags =[]
remove_tags.append(dict(name = 'div', attrs = {'class' : 'field field-type-viewfield field-field-autor2'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'field field-type-viewfield field-field-publikacje-autora'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'rate-widget-2 rate-widget clear-block rate-average rate-widget-fivestar rate-daa7512627f21dcf15e0af47e5279f0e rate-processed'}))
remove_tags_after =[(dict(name = 'div', attrs = {'class' : 'field-label-inline-first'}))]
def print_version(self, url):
return url.replace('http://prawica.net/', 'http://prawica.net/print/')