diff --git a/recipes/badania_net.recipe b/recipes/badania_net.recipe new file mode 100644 index 0000000000..01499f6369 --- /dev/null +++ b/recipes/badania_net.recipe @@ -0,0 +1,17 @@ +from calibre.web.feeds.news import BasicNewsRecipe +class BadaniaNet(BasicNewsRecipe): + title = u'badania.net' + __author__ = 'fenuks' + description = u'chcesz wiedzieć więcej?' + category = 'science' + language = 'pl' + cover_url = 'http://badania.net/wp-content/badanianet_green_transparent.png' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + remove_empty_feeds = True + use_embedded_content = False + remove_tags = [dict(attrs={'class':['omc-flex-category', 'omc-comment-count', 'omc-single-tags']})] + remove_tags_after = dict(attrs={'class':'omc-single-tags'}) + keep_only_tags = [dict(id='omc-full-article')] + feeds = [(u'Psychologia', u'http://badania.net/category/psychologia/feed/'), (u'Technologie', u'http://badania.net/category/technologie/feed/'), (u'Biologia', u'http://badania.net/category/biologia/feed/'), (u'Chemia', u'http://badania.net/category/chemia/feed/'), (u'Zdrowie', u'http://badania.net/category/zdrowie/'), (u'Seks', u'http://badania.net/category/psychologia-ewolucyjna-tematyka-seks/feed/')] diff --git a/recipes/eso_pl.recipe b/recipes/eso_pl.recipe new file mode 100644 index 0000000000..5ebb420396 --- /dev/null +++ b/recipes/eso_pl.recipe @@ -0,0 +1,23 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class ESO(BasicNewsRecipe): + title = u'ESO PL' + __author__ = 'fenuks' + description = u'ESO, Europejskie Obserwatorium Południowe, buduje i obsługuje najbardziej zaawansowane naziemne teleskopy astronomiczne na świecie' + category = 'astronomy' + language = 'pl' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + remove_empty_feeds = True + use_embedded_content = False + cover_url = 'https://twimg0-a.akamaihd.net/profile_images/1922519424/eso-twitter-logo.png' + keep_only_tags = [dict(attrs={'class':'subcl'})] + remove_tags = [dict(id='lang_row'), dict(attrs={'class':['pr_typeid', 'pr_news_feature_link', 'outreach_usage', 'hidden']})] + feeds = [(u'Wiadomo\u015bci', u'http://www.eso.org/public/poland/news/feed/'), (u'Og\u0142oszenia', u'http://www.eso.org/public/poland/announcements/feed/'), (u'Zdj\u0119cie tygodnia', u'http://www.eso.org/public/poland/images/potw/feed/')] + + def preprocess_html(self, soup): + for a in soup.findAll('a', href=True): + if a['href'].startswith('/'): + a['href'] = 'http://www.eso.org' + a['href'] + return soup diff --git a/recipes/icons/badania_net.png b/recipes/icons/badania_net.png new file mode 100644 index 0000000000..de915de8d1 Binary files /dev/null and b/recipes/icons/badania_net.png differ diff --git a/recipes/icons/eso_pl.png b/recipes/icons/eso_pl.png new file mode 100644 index 0000000000..4f3319fece Binary files /dev/null and b/recipes/icons/eso_pl.png differ diff --git a/recipes/icons/kurier_galicyjski.png b/recipes/icons/kurier_galicyjski.png new file mode 100644 index 0000000000..4d66a15122 Binary files /dev/null and b/recipes/icons/kurier_galicyjski.png differ diff --git a/recipes/icons/nauka_w_polsce.png b/recipes/icons/nauka_w_polsce.png new file mode 100644 index 0000000000..0d872ce682 Binary files /dev/null and b/recipes/icons/nauka_w_polsce.png differ diff --git a/recipes/icons/osworld_pl.png b/recipes/icons/osworld_pl.png new file mode 100644 index 0000000000..97a7d0dd55 Binary files /dev/null and b/recipes/icons/osworld_pl.png differ diff --git a/recipes/icons/ubuntu_pomoc_org.png b/recipes/icons/ubuntu_pomoc_org.png new file mode 100644 index 0000000000..a143846630 Binary files /dev/null and b/recipes/icons/ubuntu_pomoc_org.png differ diff --git a/recipes/icons/wprost_rss.png b/recipes/icons/wprost_rss.png new file mode 100644 index 0000000000..5ce1b5563d Binary files /dev/null and b/recipes/icons/wprost_rss.png differ diff --git a/recipes/kdefamily_pl.recipe b/recipes/kdefamily_pl.recipe new file mode 100644 index 0000000000..75f88b0f3d --- /dev/null +++ b/recipes/kdefamily_pl.recipe @@ -0,0 +1,14 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class KDEFamilyPl(BasicNewsRecipe): + title = u'KDEFamily.pl' + __author__ = 'fenuks' + description = u'KDE w Polsce' + category = 'open source, KDE' + language = 'pl' + cover_url = 'http://www.mykde.home.pl/kdefamily/wp-content/uploads/2012/07/logotype-e1341585198616.jpg' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = True + feeds = [(u'Wszystko', u'http://kdefamily.pl/feed/')] diff --git a/recipes/kurier_galicyjski.recipe b/recipes/kurier_galicyjski.recipe new file mode 100644 index 0000000000..01a36bf724 --- /dev/null +++ b/recipes/kurier_galicyjski.recipe @@ -0,0 +1,56 @@ +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup as bs +class KurierGalicyjski(BasicNewsRecipe): + title = u'Kurier Galicyjski' + __author__ = 'fenuks' + #description = u'' + category = 'news' + language = 'pl' + cover_url = 'http://www.duszki.pl/Kurier_galicyjski_bis2_small.gif' + oldest_article = 7 + max_articles_per_feed = 100 + remove_empty_feeds = True + no_stylesheets = True + keep_only_tags = [dict(attrs={'class':'item-page'})] + remove_tags = [dict(attrs={'class':'pagenav'}), dict(attrs={'style':'border-top-width: thin; border-top-style: dashed; border-top-color: #CCC; border-bottom-width: thin; border-bottom-style: dashed; border-bottom-color: #CCC; padding-top:5px; padding-bottom:5px; text-align:right; margin-top:10px; height:20px;'})] + feeds = [(u'Wydarzenia', u'http://kuriergalicyjski.com/index.php/wydarzenia?format=feed&type=atom'), (u'Publicystyka', u'http://kuriergalicyjski.com/index.php/niezwykle-historie?format=feed&type=atom'), (u'Reporta\u017ce', u'http://kuriergalicyjski.com/index.php/report?format=feed&type=atom'), (u'Rozmowy Kuriera', u'http://kuriergalicyjski.com/index.php/kuriera?format=feed&type=atom'), (u'Przegl\u0105d prasy', u'http://kuriergalicyjski.com/index.php/2012-01-05-14-08-55?format=feed&type=atom'), (u'Kultura', u'http://kuriergalicyjski.com/index.php/2011-12-02-14-26-39?format=feed&type=atom'), (u'Zabytki', u'http://kuriergalicyjski.com/index.php/2011-12-02-14-27-32?format=feed&type=atom'), (u'Polska-Ukraina', u'http://kuriergalicyjski.com/index.php/pol-ua?format=feed&type=atom'), (u'Polacy i Ukrai\u0144cy', u'http://kuriergalicyjski.com/index.php/polacy-i-ukr?format=feed&type=atom'), (u'Niezwyk\u0142e historie', u'http://kuriergalicyjski.com/index.php/niezwykle-historie?format=feed&type=atom'), (u'Polemiki', u'http://kuriergalicyjski.com/index.php/polemiki?format=feed&type=atom')] + + def append_page(self, soup, appendtag): + pager = soup.find(id='article-index') + if pager: + pager = pager.findAll('a')[1:] + if pager: + for a in pager: + nexturl = 'http://www.kuriergalicyjski.com' + a['href'] + soup2 = self.index_to_soup(nexturl) + pagetext = soup2.find(attrs={'class':'item-page'}) + if pagetext.h2: + pagetext.h2.extract() + r = pagetext.find(attrs={'class':'article-info'}) + if r: + r.extract() + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + pos = len(appendtag.contents) + for r in appendtag.findAll(id='article-index'): + r.extract() + for r in appendtag.findAll(attrs={'class':'pagenavcounter'}): + r.extract() + for r in appendtag.findAll(attrs={'class':'pagination'}): + r.extract() + for r in appendtag.findAll(attrs={'class':'pagenav'}): + r.extract() + for r in appendtag.findAll(attrs={'style':'border-top-width: thin; border-top-style: dashed; border-top-color: #CCC; border-bottom-width: thin; border-bottom-style: dashed; border-bottom-color: #CCC; padding-top:5px; padding-bottom:5px; text-align:right; margin-top:10px; height:20px;'}): + r.extract() + + def preprocess_html(self, soup): + self.append_page(soup, soup.body) + for r in soup.findAll(style=True): + del r['style'] + for img in soup.findAll(attrs={'class':'easy_img_caption smartresize'}): + img.insert(len(img.contents)-1, bs('
')) + img.insert(len(img.contents), bs('

')) + for a in soup.findAll('a', href=True): + if a['href'].startswith('/'): + a['href'] = 'http://kuriergalicyjski.com' + a['href'] + return soup diff --git a/recipes/nauka_w_polsce.recipe b/recipes/nauka_w_polsce.recipe new file mode 100644 index 0000000000..c524c18b26 --- /dev/null +++ b/recipes/nauka_w_polsce.recipe @@ -0,0 +1,47 @@ +from calibre.web.feeds.news import BasicNewsRecipe +import re +class NaukawPolsce(BasicNewsRecipe): + title = u'Nauka w Polsce' + __author__ = 'fenuks' + description = u'Serwis Nauka w Polsce ma za zadanie popularyzację polskiej nauki. Można na nim znaleźć wiadomości takie jak: osiągnięcia polskich naukowców, wydarzenia na polskich uczelniach, osiągnięcia studentów, konkursy dla badaczy, staże i stypendia naukowe, wydarzenia w polskiej nauce, kalendarium wydarzeń w nauce, materiały wideo o nauce.' + category = 'science' + language = 'pl' + cover_url = 'http://www.naukawpolsce.pap.pl/Themes/Pap/images/logo-pl.gif' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + remove_empty_feeds = True + index = 'http://www.naukawpolsce.pl' + keep_only_tags = [dict(name='div', attrs={'class':'margines wiadomosc'})] + remove_tags = [dict(name='div', attrs={'class':'tagi'})] + + def find_articles(self, url): + articles = [] + soup=self.index_to_soup(url) + for i in soup.findAll(name='div', attrs={'class':'aktualnosci-margines lista-depesz information-content'}): + title = i.h1.a.string + url = self.index + i.h1.a['href'] + date = '' #i.span.string + articles.append({'title' : title, + 'url' : url, + 'date' : date, + 'description' : '' + }) + return articles + + def parse_index(self): + feeds = [] + feeds.append((u"Historia i kultura", self.find_articles('http://www.naukawpolsce.pl/historia-i-kultura/'))) + feeds.append((u"Kosmos", self.find_articles('http://www.naukawpolsce.pl/kosmos/'))) + feeds.append((u"Przyroda", self.find_articles('http://www.naukawpolsce.pl/przyroda/'))) + feeds.append((u"Społeczeństwo", self.find_articles('http://www.naukawpolsce.pl/spoleczenstwo/'))) + feeds.append((u"Technologie", self.find_articles('http://www.naukawpolsce.pl/technologie/'))) + feeds.append((u"Uczelnie", self.find_articles('http://www.naukawpolsce.pl/uczelnie/'))) + feeds.append((u"Nauki medyczne", self.find_articles('http://www.naukawpolsce.pl/zdrowie/'))) + + return feeds + + def preprocess_html(self, soup): + for p in soup.findAll(name='p', text=re.compile(' ')): + p.extract() + return soup diff --git a/recipes/osworld_pl.recipe b/recipes/osworld_pl.recipe new file mode 100644 index 0000000000..7784a271e0 --- /dev/null +++ b/recipes/osworld_pl.recipe @@ -0,0 +1,33 @@ +from calibre.web.feeds.news import BasicNewsRecipe +class OSWorld(BasicNewsRecipe): + title = u'OSWorld.pl' + __author__ = 'fenuks' + description = u'OSWorld.pl to serwis internetowy, dzięki któremu poznasz czym naprawdę jest Open Source. Serwis poświęcony jest wolnemu oprogramowaniu jak linux mint, centos czy ubunty. Znajdziecie u nasz artykuły, unity oraz informacje o certyfikatach CACert. OSWorld to mały świat wielkich systemów!' + category = 'OS, IT, open source, Linux' + language = 'pl' + cover_url = 'http://osworld.pl/wp-content/uploads/osworld-kwadrat-128x111.png' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + remove_empty_feeds = True + use_embedded_content = False + keep_only_tags = [dict(id=['dzial', 'posts'])] + remove_tags = [dict(attrs={'class':'post-comments'})] + remove_tags_after = dict(attrs={'class':'entry clr'}) + feeds = [(u'Artyku\u0142y', u'http://osworld.pl/category/artykuly/feed/'), (u'Nowe wersje', u'http://osworld.pl/category/nowe-wersje/feed/')] + + def append_page(self, soup, appendtag): + tag = appendtag.find(attrs={'id':'paginacja'}) + if tag: + for nexturl in tag.findAll('a'): + soup2 = self.index_to_soup(nexturl['href']) + pagetext = soup2.find(attrs={'class':'entry clr'}) + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + for r in appendtag.findAll(attrs={'id':'paginacja'}): + r.extract() + + + def preprocess_html(self, soup): + self.append_page(soup, soup.body) + return soup diff --git a/recipes/ubuntu_pomoc_org.recipe b/recipes/ubuntu_pomoc_org.recipe new file mode 100644 index 0000000000..1a78649dfc --- /dev/null +++ b/recipes/ubuntu_pomoc_org.recipe @@ -0,0 +1,22 @@ +import re +from calibre.web.feeds.news import BasicNewsRecipe +class UbuntuPomoc(BasicNewsRecipe): + title = u'Ubuntu-pomoc.org' + __author__ = 'fenuks' + description = u'Strona poświęcona systemowi Ubuntu Linux. Znajdziesz tutaj przydatne i sprawdzone poradniki oraz sposoby rozwiązywania wielu popularnych problemów. Ten blog rozwiąże każdy Twój problem - jeśli nie teraz, to wkrótce! :)' + category = 'Linux, Ubuntu, open source' + language = 'pl' + cover_url = 'http://www.ubuntu-pomoc.org/grafika/ubuntupomoc.png' + preprocess_regexps = [(re.compile(r'
.+', re.IGNORECASE|re.DOTALL), lambda m: '')] + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + remove_javascript = True + remove_empty_feeds = True + use_embedded_content = False + remove_attrs = ['style'] + keep_only_tags = [dict(attrs={'class':'post'})] + remove_tags_after = dict(attrs={'class':'underEntry'}) + remove_tags = [dict(attrs={'class':['underPostTitle', 'yarpp-related', 'underEntry', 'social', 'tags', 'commentlist', 'youtube_sc']}), dict(id=['wp_rp_first', 'commentReply'])] + feeds = [(u'Ca\u0142o\u015b\u0107', u'http://feeds.feedburner.com/Ubuntu-Pomoc'), + (u'Gry', u'http://feeds.feedburner.com/GryUbuntu-pomoc')] diff --git a/recipes/wprost_rss.recipe b/recipes/wprost_rss.recipe new file mode 100644 index 0000000000..bffbacc474 --- /dev/null +++ b/recipes/wprost_rss.recipe @@ -0,0 +1,71 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2010, matek09, matek09@gmail.com' +__copyright__ = 'Modified 2011, Mariusz Wolek ' +__copyright__ = 'Modified 2012, Artur Stachecki ' + + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class Wprost(BasicNewsRecipe): + title = u'Wprost (RSS)' + __author__ = 'matek09' + description = 'Weekly magazine' + encoding = 'ISO-8859-2' + no_stylesheets = True + language = 'pl' + remove_javascript = True + recursions = 0 + use_embedded_content = False + remove_empty_feeds = True + remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'})) + remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'})) + ''' + keep_only_tags =[] + keep_only_tags.append(dict(name = 'table', attrs = {'id' : 'title-table'})) + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-header'})) + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-content'})) + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'def element-autor'})) + ''' + + preprocess_regexps = [(re.compile(r'style="display: none;"'), lambda match: ''), + (re.compile(r'display: block;'), lambda match: ''), + (re.compile(r'\\\<\/table\>'), lambda match: ''), + (re.compile(r'\'), lambda match: ''), + (re.compile(r'\'), lambda match: ''), + (re.compile(r'\
'), lambda match: ''), + (re.compile(r'\