calibre/recipes/dzieje_pl.recipe
2012-10-17 19:42:08 +05:30

24 lines
963 B
Plaintext

from calibre.web.feeds.news import BasicNewsRecipe
class Dzieje(BasicNewsRecipe):
title = u'dzieje.pl'
__author__ = 'fenuks'
description = 'Dzieje - history of Poland'
cover_url = 'http://www.dzieje.pl/sites/default/files/dzieje_logo.png'
category = 'history'
language = 'pl'
index='http://dzieje.pl'
oldest_article = 8
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets= True
keep_only_tags = [dict(name='h1', attrs={'class':'title'}), dict(id='content-area')]
remove_tags = [dict(attrs={'class':'field field-type-computed field-field-tagi'}), dict(id='dogory')]
feeds = [(u'Dzieje', u'http://dzieje.pl/rss.xml')]
def preprocess_html(self, soup):
for a in soup('a'):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
a['href']=self.index + a['href']
return soup