Various Polish news sources by fenuks
17
recipes/badania_net.recipe
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
class BadaniaNet(BasicNewsRecipe):
|
||||||
|
title = u'badania.net'
|
||||||
|
__author__ = 'fenuks'
|
||||||
|
description = u'chcesz wiedzieć więcej?'
|
||||||
|
category = 'science'
|
||||||
|
language = 'pl'
|
||||||
|
cover_url = 'http://badania.net/wp-content/badanianet_green_transparent.png'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
use_embedded_content = False
|
||||||
|
remove_tags = [dict(attrs={'class':['omc-flex-category', 'omc-comment-count', 'omc-single-tags']})]
|
||||||
|
remove_tags_after = dict(attrs={'class':'omc-single-tags'})
|
||||||
|
keep_only_tags = [dict(id='omc-full-article')]
|
||||||
|
feeds = [(u'Psychologia', u'http://badania.net/category/psychologia/feed/'), (u'Technologie', u'http://badania.net/category/technologie/feed/'), (u'Biologia', u'http://badania.net/category/biologia/feed/'), (u'Chemia', u'http://badania.net/category/chemia/feed/'), (u'Zdrowie', u'http://badania.net/category/zdrowie/'), (u'Seks', u'http://badania.net/category/psychologia-ewolucyjna-tematyka-seks/feed/')]
|
23
recipes/eso_pl.recipe
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class ESO(BasicNewsRecipe):
|
||||||
|
title = u'ESO PL'
|
||||||
|
__author__ = 'fenuks'
|
||||||
|
description = u'ESO, Europejskie Obserwatorium Południowe, buduje i obsługuje najbardziej zaawansowane naziemne teleskopy astronomiczne na świecie'
|
||||||
|
category = 'astronomy'
|
||||||
|
language = 'pl'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
use_embedded_content = False
|
||||||
|
cover_url = 'https://twimg0-a.akamaihd.net/profile_images/1922519424/eso-twitter-logo.png'
|
||||||
|
keep_only_tags = [dict(attrs={'class':'subcl'})]
|
||||||
|
remove_tags = [dict(id='lang_row'), dict(attrs={'class':['pr_typeid', 'pr_news_feature_link', 'outreach_usage', 'hidden']})]
|
||||||
|
feeds = [(u'Wiadomo\u015bci', u'http://www.eso.org/public/poland/news/feed/'), (u'Og\u0142oszenia', u'http://www.eso.org/public/poland/announcements/feed/'), (u'Zdj\u0119cie tygodnia', u'http://www.eso.org/public/poland/images/potw/feed/')]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for a in soup.findAll('a', href=True):
|
||||||
|
if a['href'].startswith('/'):
|
||||||
|
a['href'] = 'http://www.eso.org' + a['href']
|
||||||
|
return soup
|
BIN
recipes/icons/badania_net.png
Normal file
After Width: | Height: | Size: 968 B |
BIN
recipes/icons/eso_pl.png
Normal file
After Width: | Height: | Size: 3.6 KiB |
BIN
recipes/icons/kurier_galicyjski.png
Normal file
After Width: | Height: | Size: 726 B |
BIN
recipes/icons/nauka_w_polsce.png
Normal file
After Width: | Height: | Size: 744 B |
BIN
recipes/icons/osworld_pl.png
Normal file
After Width: | Height: | Size: 1.1 KiB |
BIN
recipes/icons/ubuntu_pomoc_org.png
Normal file
After Width: | Height: | Size: 757 B |
BIN
recipes/icons/wprost_rss.png
Normal file
After Width: | Height: | Size: 1.7 KiB |
14
recipes/kdefamily_pl.recipe
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class KDEFamilyPl(BasicNewsRecipe):
|
||||||
|
title = u'KDEFamily.pl'
|
||||||
|
__author__ = 'fenuks'
|
||||||
|
description = u'KDE w Polsce'
|
||||||
|
category = 'open source, KDE'
|
||||||
|
language = 'pl'
|
||||||
|
cover_url = 'http://www.mykde.home.pl/kdefamily/wp-content/uploads/2012/07/logotype-e1341585198616.jpg'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = True
|
||||||
|
feeds = [(u'Wszystko', u'http://kdefamily.pl/feed/')]
|
56
recipes/kurier_galicyjski.recipe
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup as bs
|
||||||
|
class KurierGalicyjski(BasicNewsRecipe):
|
||||||
|
title = u'Kurier Galicyjski'
|
||||||
|
__author__ = 'fenuks'
|
||||||
|
#description = u''
|
||||||
|
category = 'news'
|
||||||
|
language = 'pl'
|
||||||
|
cover_url = 'http://www.duszki.pl/Kurier_galicyjski_bis2_small.gif'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
remove_empty_feeds = True
|
||||||
|
no_stylesheets = True
|
||||||
|
keep_only_tags = [dict(attrs={'class':'item-page'})]
|
||||||
|
remove_tags = [dict(attrs={'class':'pagenav'}), dict(attrs={'style':'border-top-width: thin; border-top-style: dashed; border-top-color: #CCC; border-bottom-width: thin; border-bottom-style: dashed; border-bottom-color: #CCC; padding-top:5px; padding-bottom:5px; text-align:right; margin-top:10px; height:20px;'})]
|
||||||
|
feeds = [(u'Wydarzenia', u'http://kuriergalicyjski.com/index.php/wydarzenia?format=feed&type=atom'), (u'Publicystyka', u'http://kuriergalicyjski.com/index.php/niezwykle-historie?format=feed&type=atom'), (u'Reporta\u017ce', u'http://kuriergalicyjski.com/index.php/report?format=feed&type=atom'), (u'Rozmowy Kuriera', u'http://kuriergalicyjski.com/index.php/kuriera?format=feed&type=atom'), (u'Przegl\u0105d prasy', u'http://kuriergalicyjski.com/index.php/2012-01-05-14-08-55?format=feed&type=atom'), (u'Kultura', u'http://kuriergalicyjski.com/index.php/2011-12-02-14-26-39?format=feed&type=atom'), (u'Zabytki', u'http://kuriergalicyjski.com/index.php/2011-12-02-14-27-32?format=feed&type=atom'), (u'Polska-Ukraina', u'http://kuriergalicyjski.com/index.php/pol-ua?format=feed&type=atom'), (u'Polacy i Ukrai\u0144cy', u'http://kuriergalicyjski.com/index.php/polacy-i-ukr?format=feed&type=atom'), (u'Niezwyk\u0142e historie', u'http://kuriergalicyjski.com/index.php/niezwykle-historie?format=feed&type=atom'), (u'Polemiki', u'http://kuriergalicyjski.com/index.php/polemiki?format=feed&type=atom')]
|
||||||
|
|
||||||
|
def append_page(self, soup, appendtag):
|
||||||
|
pager = soup.find(id='article-index')
|
||||||
|
if pager:
|
||||||
|
pager = pager.findAll('a')[1:]
|
||||||
|
if pager:
|
||||||
|
for a in pager:
|
||||||
|
nexturl = 'http://www.kuriergalicyjski.com' + a['href']
|
||||||
|
soup2 = self.index_to_soup(nexturl)
|
||||||
|
pagetext = soup2.find(attrs={'class':'item-page'})
|
||||||
|
if pagetext.h2:
|
||||||
|
pagetext.h2.extract()
|
||||||
|
r = pagetext.find(attrs={'class':'article-info'})
|
||||||
|
if r:
|
||||||
|
r.extract()
|
||||||
|
pos = len(appendtag.contents)
|
||||||
|
appendtag.insert(pos, pagetext)
|
||||||
|
pos = len(appendtag.contents)
|
||||||
|
for r in appendtag.findAll(id='article-index'):
|
||||||
|
r.extract()
|
||||||
|
for r in appendtag.findAll(attrs={'class':'pagenavcounter'}):
|
||||||
|
r.extract()
|
||||||
|
for r in appendtag.findAll(attrs={'class':'pagination'}):
|
||||||
|
r.extract()
|
||||||
|
for r in appendtag.findAll(attrs={'class':'pagenav'}):
|
||||||
|
r.extract()
|
||||||
|
for r in appendtag.findAll(attrs={'style':'border-top-width: thin; border-top-style: dashed; border-top-color: #CCC; border-bottom-width: thin; border-bottom-style: dashed; border-bottom-color: #CCC; padding-top:5px; padding-bottom:5px; text-align:right; margin-top:10px; height:20px;'}):
|
||||||
|
r.extract()
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
self.append_page(soup, soup.body)
|
||||||
|
for r in soup.findAll(style=True):
|
||||||
|
del r['style']
|
||||||
|
for img in soup.findAll(attrs={'class':'easy_img_caption smartresize'}):
|
||||||
|
img.insert(len(img.contents)-1, bs('<br />'))
|
||||||
|
img.insert(len(img.contents), bs('<br /><br />'))
|
||||||
|
for a in soup.findAll('a', href=True):
|
||||||
|
if a['href'].startswith('/'):
|
||||||
|
a['href'] = 'http://kuriergalicyjski.com' + a['href']
|
||||||
|
return soup
|
47
recipes/nauka_w_polsce.recipe
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
class NaukawPolsce(BasicNewsRecipe):
|
||||||
|
title = u'Nauka w Polsce'
|
||||||
|
__author__ = 'fenuks'
|
||||||
|
description = u'Serwis Nauka w Polsce ma za zadanie popularyzację polskiej nauki. Można na nim znaleźć wiadomości takie jak: osiągnięcia polskich naukowców, wydarzenia na polskich uczelniach, osiągnięcia studentów, konkursy dla badaczy, staże i stypendia naukowe, wydarzenia w polskiej nauce, kalendarium wydarzeń w nauce, materiały wideo o nauce.'
|
||||||
|
category = 'science'
|
||||||
|
language = 'pl'
|
||||||
|
cover_url = 'http://www.naukawpolsce.pap.pl/Themes/Pap/images/logo-pl.gif'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
index = 'http://www.naukawpolsce.pl'
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'class':'margines wiadomosc'})]
|
||||||
|
remove_tags = [dict(name='div', attrs={'class':'tagi'})]
|
||||||
|
|
||||||
|
def find_articles(self, url):
|
||||||
|
articles = []
|
||||||
|
soup=self.index_to_soup(url)
|
||||||
|
for i in soup.findAll(name='div', attrs={'class':'aktualnosci-margines lista-depesz information-content'}):
|
||||||
|
title = i.h1.a.string
|
||||||
|
url = self.index + i.h1.a['href']
|
||||||
|
date = '' #i.span.string
|
||||||
|
articles.append({'title' : title,
|
||||||
|
'url' : url,
|
||||||
|
'date' : date,
|
||||||
|
'description' : ''
|
||||||
|
})
|
||||||
|
return articles
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
feeds = []
|
||||||
|
feeds.append((u"Historia i kultura", self.find_articles('http://www.naukawpolsce.pl/historia-i-kultura/')))
|
||||||
|
feeds.append((u"Kosmos", self.find_articles('http://www.naukawpolsce.pl/kosmos/')))
|
||||||
|
feeds.append((u"Przyroda", self.find_articles('http://www.naukawpolsce.pl/przyroda/')))
|
||||||
|
feeds.append((u"Społeczeństwo", self.find_articles('http://www.naukawpolsce.pl/spoleczenstwo/')))
|
||||||
|
feeds.append((u"Technologie", self.find_articles('http://www.naukawpolsce.pl/technologie/')))
|
||||||
|
feeds.append((u"Uczelnie", self.find_articles('http://www.naukawpolsce.pl/uczelnie/')))
|
||||||
|
feeds.append((u"Nauki medyczne", self.find_articles('http://www.naukawpolsce.pl/zdrowie/')))
|
||||||
|
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for p in soup.findAll(name='p', text=re.compile(' ')):
|
||||||
|
p.extract()
|
||||||
|
return soup
|
33
recipes/osworld_pl.recipe
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
class OSWorld(BasicNewsRecipe):
|
||||||
|
title = u'OSWorld.pl'
|
||||||
|
__author__ = 'fenuks'
|
||||||
|
description = u'OSWorld.pl to serwis internetowy, dzięki któremu poznasz czym naprawdę jest Open Source. Serwis poświęcony jest wolnemu oprogramowaniu jak linux mint, centos czy ubunty. Znajdziecie u nasz artykuły, unity oraz informacje o certyfikatach CACert. OSWorld to mały świat wielkich systemów!'
|
||||||
|
category = 'OS, IT, open source, Linux'
|
||||||
|
language = 'pl'
|
||||||
|
cover_url = 'http://osworld.pl/wp-content/uploads/osworld-kwadrat-128x111.png'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
use_embedded_content = False
|
||||||
|
keep_only_tags = [dict(id=['dzial', 'posts'])]
|
||||||
|
remove_tags = [dict(attrs={'class':'post-comments'})]
|
||||||
|
remove_tags_after = dict(attrs={'class':'entry clr'})
|
||||||
|
feeds = [(u'Artyku\u0142y', u'http://osworld.pl/category/artykuly/feed/'), (u'Nowe wersje', u'http://osworld.pl/category/nowe-wersje/feed/')]
|
||||||
|
|
||||||
|
def append_page(self, soup, appendtag):
|
||||||
|
tag = appendtag.find(attrs={'id':'paginacja'})
|
||||||
|
if tag:
|
||||||
|
for nexturl in tag.findAll('a'):
|
||||||
|
soup2 = self.index_to_soup(nexturl['href'])
|
||||||
|
pagetext = soup2.find(attrs={'class':'entry clr'})
|
||||||
|
pos = len(appendtag.contents)
|
||||||
|
appendtag.insert(pos, pagetext)
|
||||||
|
for r in appendtag.findAll(attrs={'id':'paginacja'}):
|
||||||
|
r.extract()
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
self.append_page(soup, soup.body)
|
||||||
|
return soup
|
22
recipes/ubuntu_pomoc_org.recipe
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
class UbuntuPomoc(BasicNewsRecipe):
|
||||||
|
title = u'Ubuntu-pomoc.org'
|
||||||
|
__author__ = 'fenuks'
|
||||||
|
description = u'Strona poświęcona systemowi Ubuntu Linux. Znajdziesz tutaj przydatne i sprawdzone poradniki oraz sposoby rozwiązywania wielu popularnych problemów. Ten blog rozwiąże każdy Twój problem - jeśli nie teraz, to wkrótce! :)'
|
||||||
|
category = 'Linux, Ubuntu, open source'
|
||||||
|
language = 'pl'
|
||||||
|
cover_url = 'http://www.ubuntu-pomoc.org/grafika/ubuntupomoc.png'
|
||||||
|
preprocess_regexps = [(re.compile(r'<div class="ciekawostka">.+', re.IGNORECASE|re.DOTALL), lambda m: '')]
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
use_embedded_content = False
|
||||||
|
remove_attrs = ['style']
|
||||||
|
keep_only_tags = [dict(attrs={'class':'post'})]
|
||||||
|
remove_tags_after = dict(attrs={'class':'underEntry'})
|
||||||
|
remove_tags = [dict(attrs={'class':['underPostTitle', 'yarpp-related', 'underEntry', 'social', 'tags', 'commentlist', 'youtube_sc']}), dict(id=['wp_rp_first', 'commentReply'])]
|
||||||
|
feeds = [(u'Ca\u0142o\u015b\u0107', u'http://feeds.feedburner.com/Ubuntu-Pomoc'),
|
||||||
|
(u'Gry', u'http://feeds.feedburner.com/GryUbuntu-pomoc')]
|
71
recipes/wprost_rss.recipe
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, matek09, matek09@gmail.com'
|
||||||
|
__copyright__ = 'Modified 2011, Mariusz Wolek <mariusz_dot_wolek @ gmail dot com>'
|
||||||
|
__copyright__ = 'Modified 2012, Artur Stachecki <artur.stachecki@gmail.com>'
|
||||||
|
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
class Wprost(BasicNewsRecipe):
|
||||||
|
title = u'Wprost (RSS)'
|
||||||
|
__author__ = 'matek09'
|
||||||
|
description = 'Weekly magazine'
|
||||||
|
encoding = 'ISO-8859-2'
|
||||||
|
no_stylesheets = True
|
||||||
|
language = 'pl'
|
||||||
|
remove_javascript = True
|
||||||
|
recursions = 0
|
||||||
|
use_embedded_content = False
|
||||||
|
remove_empty_feeds = True
|
||||||
|
remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
|
||||||
|
remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
|
||||||
|
'''
|
||||||
|
keep_only_tags =[]
|
||||||
|
keep_only_tags.append(dict(name = 'table', attrs = {'id' : 'title-table'}))
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-header'}))
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-content'}))
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'def element-autor'}))
|
||||||
|
'''
|
||||||
|
|
||||||
|
preprocess_regexps = [(re.compile(r'style="display: none;"'), lambda match: ''),
|
||||||
|
(re.compile(r'display: block;'), lambda match: ''),
|
||||||
|
(re.compile(r'\<td\>\<tr\>\<\/table\>'), lambda match: ''),
|
||||||
|
(re.compile(r'\<table .*?\>'), lambda match: ''),
|
||||||
|
(re.compile(r'\<tr>'), lambda match: ''),
|
||||||
|
(re.compile(r'\<td .*?\>'), lambda match: ''),
|
||||||
|
(re.compile(r'\<div id="footer"\>.*?\</footer\>'), lambda match: '')]
|
||||||
|
|
||||||
|
remove_tags =[]
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def element-date'}))
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def silver'}))
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'id' : 'content-main-column-right'}))
|
||||||
|
|
||||||
|
|
||||||
|
extra_css = '''.div-header {font-size: x-small; font-weight: bold}'''
|
||||||
|
#h2 {font-size: x-large; font-weight: bold}
|
||||||
|
|
||||||
|
feeds = [(u'Tylko u nas', u'http://www.wprost.pl/rss/rss_wprostextra.php'),
|
||||||
|
(u'Wydarzenia', u'http://www.wprost.pl/rss/rss.php'),
|
||||||
|
(u'Komentarze', u'http://www.wprost.pl/rss/rss_komentarze.php'),
|
||||||
|
(u'Wydarzenia: Kraj', u'http://www.wprost.pl/rss/rss_kraj.php'),
|
||||||
|
(u'Komentarze: Kraj', u'http://www.wprost.pl/rss/rss_komentarze_kraj.php'),
|
||||||
|
(u'Wydarzenia: Świat', u'http://www.wprost.pl/rss/rss_swiat.php'),
|
||||||
|
(u'Komentarze: Świat', u'http://www.wprost.pl/rss/rss_komentarze_swiat.php'),
|
||||||
|
(u'Wydarzenia: Gospodarka', u'http://www.wprost.pl/rss/rss_gospodarka.php'),
|
||||||
|
(u'Komentarze: Gospodarka', u'http://www.wprost.pl/rss/rss_komentarze_gospodarka.php'),
|
||||||
|
(u'Wydarzenia: Życie', u'http://www.wprost.pl/rss/rss_zycie.php'),
|
||||||
|
(u'Komentarze: Życie', u'http://www.wprost.pl/rss/rss_komentarze_zycie.php'),
|
||||||
|
(u'Wydarzenia: Sport', u'http://www.wprost.pl/rss/rss_sport.php'),
|
||||||
|
(u'Komentarze: Sport', u'http://www.wprost.pl/rss/rss_komentarze_sport.php'),
|
||||||
|
(u'Przegląd prasy', u'http://www.wprost.pl/rss/rss_prasa.php')
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
soup = self.index_to_soup('http://www.wprost.pl/tygodnik')
|
||||||
|
cover = soup.find(attrs={'class':'wprost-cover'})
|
||||||
|
if cover:
|
||||||
|
self.cover_url = cover['src']
|
||||||
|
return getattr(self, 'cover_url', self.cover_url)
|