Various Polish recipes by Artur Stachecki

This commit is contained in:
Kovid Goyal 2012-11-18 23:30:47 +05:30
commit 6712594a3e
12 changed files with 251 additions and 4 deletions

48
recipes/antyweb.recipe Normal file
View File

@ -0,0 +1,48 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AntywebRecipe(BasicNewsRecipe):
encoding = 'utf-8'
__license__ = 'GPL v3'
__author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
language = 'pl'
version = 1
title = u'Antyweb'
category = u'News'
description = u'Blog o internecie i nowych technologiach'
cover_url=''
remove_empty_feeds= True
auto_cleanup = False
no_stylesheets=True
use_embedded_content = False
oldest_article = 1
max_articles_per_feed = 100
remove_javascript = True
simultaneous_downloads = 3
keep_only_tags =[]
keep_only_tags.append(dict(name = 'h1', attrs = { 'class' : 'mm-article-title'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'mm-article-content'}))
remove_tags =[]
remove_tags.append(dict(name = 'h2', attrs = {'class' : 'widgettitle'}))
remove_tags.append(dict(name = 'img', attrs = {'class' : 'alignleft'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'float: right;margin-left:1em;margin-bottom: 0.5em;padding-bottom: 3px; width: 72px;'}))
remove_tags.append(dict(name = 'img', attrs = {'src' : 'http://antyweb.pl/wp-content/uploads/2011/09/HOSTERSI_testy_pasek600x30.gif'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'podwpisowe'}))
extra_css = '''
body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
'''
feeds = [
(u'Artykuly', u'feed://feeds.feedburner.com/Antyweb?format=xml'),
]
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup

50
recipes/bankier_pl.recipe Normal file
View File

@ -0,0 +1,50 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'teepel <teepel44@gmail.com>'
'''
bankier.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
class bankier(BasicNewsRecipe):
title = u'Bankier.pl'
__author__ = 'teepel <teepel44@gmail.com>'
language = 'pl'
description ='Polski portal finansowy. Informacje o: gospodarka, inwestowanie, finanse osobiste, prowadzenie firmy, kursy walut, notowania akcji, fundusze.'
masthead_url='http://www.bankier.pl/gfx/hd-mid-02.gif'
INDEX='http://bankier.pl/'
remove_empty_feeds= True
oldest_article = 1
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets=True
simultaneous_downloads = 5
keep_only_tags =[]
keep_only_tags.append(dict(name = 'div', attrs = {'align' : 'left'}))
remove_tags =[]
remove_tags.append(dict(name = 'table', attrs = {'cellspacing' : '2'}))
remove_tags.append(dict(name = 'div', attrs = {'align' : 'center'}))
remove_tags.append(dict(name = 'img', attrs = {'src' : '/gfx/hd-mid-02.gif'}))
#remove_tags.append(dict(name = 'a', attrs = {'target' : '_blank'}))
#remove_tags.append(dict(name = 'br', attrs = {'clear' : 'all'}))
feeds = [
(u'Wiadomości dnia', u'http://feeds.feedburner.com/bankier-wiadomosci-dnia'),
(u'Finanse osobiste', u'http://feeds.feedburner.com/bankier-finanse-osobiste'),
(u'Firma', u'http://feeds.feedburner.com/bankier-firma'),
(u'Giełda', u'http://feeds.feedburner.com/bankier-gielda'),
(u'Rynek walutowy', u'http://feeds.feedburner.com/bankier-rynek-walutowy'),
(u'Komunikaty ze spółek', u'http://feeds.feedburner.com/bankier-espi'),
]
def print_version(self, url):
segment = url.split('.')
urlPart = segment[2]
segments = urlPart.split('-')
urlPart2 = segments[-1]
return 'http://www.bankier.pl/wiadomosci/print.html?article_id=' + urlPart2

35
recipes/f1_ultra.recipe Normal file
View File

@ -0,0 +1,35 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class f1ultra(BasicNewsRecipe):
title = u'Formuła 1 - F1 ultra'
__license__ = 'GPL v3'
__author__ = 'MrStefan <mrstefaan@gmail.com>, Artur Stachecki <artur.stachecki@gmail.com>'
language = 'pl'
description =u'Formuła 1, Robert Kubica, F3, GP2 oraz inne serie wyścigowe.'
masthead_url='http://www.f1ultra.pl/templates/f1ultra/images/logo.gif'
remove_empty_feeds= True
oldest_article = 1
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets=True
keep_only_tags =[(dict(name = 'div', attrs = {'id' : 'main'}))]
remove_tags_after =[dict(attrs = {'style' : 'margin-top:5px;margin-bottom:5px;display: inline;'})]
remove_tags =[(dict(attrs = {'class' : ['buttonheading', 'avPlayerContainer', 'createdate']}))]
remove_tags.append(dict(attrs = {'title' : ['PDF', 'Drukuj', 'Email']}))
remove_tags.append(dict(name = 'form', attrs = {'method' : 'post'}))
remove_tags.append(dict(name = 'hr', attrs = {'size' : '2'}))
preprocess_regexps = [(re.compile(r'align="left"'), lambda match: ''),
(re.compile(r'align="right"'), lambda match: ''),
(re.compile(r'width=\"*\"'), lambda match: ''),
(re.compile(r'\<table .*?\>'), lambda match: '')]
extra_css = '''.contentheading { font-size: 1.4em; font-weight: bold; }
img { display: block; clear: both;}
'''
remove_attributes = ['width','height','position','float','padding-left','padding-right','padding','text-align']
feeds = [(u'F1 Ultra', u'http://www.f1ultra.pl/index.php?option=com_rd_rss&id=1&Itemid=245')]

View File

@ -8,7 +8,6 @@ krakow.gazeta.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
import re
class gw_krakow(BasicNewsRecipe):
title = u'Gazeta.pl Kraków'

View File

@ -8,7 +8,6 @@ warszawa.gazeta.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
import re
class gw_wawa(BasicNewsRecipe):
title = u'Gazeta.pl Warszawa'

BIN
recipes/icons/antyweb.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 668 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 190 B

BIN
recipes/icons/f1_ultra.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 490 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

49
recipes/myapple_pl.recipe Normal file
View File

@ -0,0 +1,49 @@
from calibre.web.feeds.news import BasicNewsRecipe
class MyAppleRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
language = 'pl'
version = 1
title = u'MyApple.pl'
category = u'News'
description = u' Największy w Polsce serwis zajmujący się tematyką związaną z Apple i wszelkimi produktami tej firmy.'
cover_url=''
remove_empty_feeds= True
no_stylesheets=True
oldest_article = 7
max_articles_per_feed = 100000
recursions = 0
no_stylesheets = True
remove_javascript = True
simultaneous_downloads = 3
keep_only_tags =[]
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'article_content'}))
remove_tags =[]
remove_tags.append(dict(name = 'div', attrs = {'class' : 'article_author_date_comment_container'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'fullwidth'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'cmslinks'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'googleads-468'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'comments'}))
extra_css = '''
body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
td.contentheading{font-size: large; font-weight: bold;}
'''
feeds = [
('News', 'feed://myapple.pl/external.php?do=rss&type=newcontent&sectionid=1&days=120&count=10'),
]
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup

View File

@ -0,0 +1,67 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
from calibre.web.feeds.news import BasicNewsRecipe
import re
class telepolis(BasicNewsRecipe):
title = u'Telepolis.pl'
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
language = 'pl'
description = u'Twój telekomunikacyjny serwis informacyjny.\
Codzienne informacje, testy i artykuły,\
promocje, baza telefonów oraz centrum rozrywki'
oldest_article = 7
masthead_url = 'http://telepolis.pl/i/telepolis-logo2.gif'
max_articles_per_feed = 100
simultaneous_downloads = 5
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_tags = []
remove_tags.append(dict(attrs={'alt': 'TELEPOLIS.pl'}))
preprocess_regexps = [(re.compile(r'<: .*? :>'),
lambda match: ''),
(re.compile(r'<b>Zobacz:</b>.*?</a>', re.DOTALL),
lambda match: ''),
(re.compile(r'<-ankieta.*?>'),
lambda match: ''),
(re.compile(r'\(Q\!\)'),
lambda match: ''),
(re.compile(r'\(plik.*?\)'),
lambda match: ''),
(re.compile(r'<br.*?><br.*?>', re.DOTALL),
lambda match: '')
]
extra_css = '''.tb { font-weight: bold; font-size: 20px;}'''
feeds = [
(u'Wiadomości', u'http://www.telepolis.pl/rss/news.php'),
(u'Artykuły', u'http://www.telepolis.pl/rss/artykuly.php')
]
def print_version(self, url):
if 'news.php' in url:
print_url = url.replace('news.php', 'news_print.php')
else:
print_url = url.replace('artykuly.php', 'art_print.php')
return print_url
def preprocess_html(self, soup):
for image in soup.findAll('img'):
if 'm.jpg' in image['src']:
image_big = image['src']
image_big = image_big.replace('m.jpg', '.jpg')
image['src'] = image_big
logo = soup.find('tr')
logo.extract()
for tag in soup.findAll('tr'):
for strings in ['Wiadomość wydrukowana', 'copyright']:
if strings in self.tag_to_string(tag):
tag.extract()
return self.adeify_images(soup)