calibre/recipes/zycie_warszawy.recipe
2015-01-23 19:08:21 +05:30

50 lines
1.3 KiB
Python

#!/usr/bin/env python2
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = u'Łukasz Grąbczewski 2012-2013'
__version__ = '1.2'
'''
zw.com.pl
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class zyciewarszawy(BasicNewsRecipe):
__author__ = u'Łukasz Grączewski'
title = u'Życie Warszawy'
description = u'Wiadomości z Warszawy'
language = 'pl'
publisher = 'Presspublica'
publication_type = 'newspaper'
masthead_url = 'http://www.zw.com.pl/static/img/logo_zw.gif'
no_stylesheets = True
remove_javascript = True
use_embedded_content = False
remove_empty_feeds = True
oldest_article = 1.5 #last 36h
max_articles_per_feed = 100
feeds = [(u'Najnowsze', u'http://www.zw.com.pl/rss/1.html')]
keep_only_tags = []
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'storyp'}))
remove_tags = []
remove_tags.append(dict(attrs = {'class' : 'author'}))
remove_tags.append(dict(attrs = {'class' : 'more'}))
remove_tags.append(dict(attrs = {'class' : 'clr'}))
remove_tags.append(dict(attrs = {'id' : 'adk_0'}))
remove_tags.append(dict(attrs = {'id' : 'adsense_0'}))
remove_tags.append(dict(attrs = {'id' : 'share_bottom'}))
remove_tags.append(dict(attrs = {'id' : 'copyright_law'}))
preprocess_regexps = [(re.compile(r',3.jpg'), lambda m: ',2.jpg')]
def print_version(self, url):
url += "?print=tak"
return url