diff --git a/recipes/adventure_zone_pl.recipe b/recipes/adventure_zone_pl.recipe index 00b4a8753e..50a980dc92 100644 --- a/recipes/adventure_zone_pl.recipe +++ b/recipes/adventure_zone_pl.recipe @@ -66,4 +66,3 @@ class Adventure_zone(BasicNewsRecipe): if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: a['href']=self.index + a['href'] return soup - diff --git a/recipes/dzial_zagraniczny.recipe b/recipes/dzial_zagraniczny.recipe new file mode 100644 index 0000000000..1b8453dd40 --- /dev/null +++ b/recipes/dzial_zagraniczny.recipe @@ -0,0 +1,27 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = 'teepel ' + +''' +dzialzagraniczny.pl +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class dzial_zagraniczny(BasicNewsRecipe): + title = u'Dział Zagraniczny' + __author__ = 'teepel ' + language = 'pl' + description = u'Polskiego czytelnika to nie interesuje' + INDEX = 'http://dzialzagraniczny.pl' + extra_css = 'img {display: block;}' + oldest_article = 7 + cover_url = 'https://fbcdn-profile-a.akamaihd.net/hprofile-ak-prn1/c145.5.160.160/559442_415653975115959_2126205128_n.jpg' + max_articles_per_feed = 100 + remove_empty_feeds = True + remove_javascript = True + no_stylesheets = True + use_embedded_content = True + + feeds = [(u'Dział zagraniczny', u'http://feeds.feedburner.com/dyndns/UOfz')] diff --git a/recipes/el_diplo.recipe b/recipes/el_diplo.recipe index b9ef8268e1..7827cbbdd7 100644 --- a/recipes/el_diplo.recipe +++ b/recipes/el_diplo.recipe @@ -26,7 +26,7 @@ class ElDiplo_Recipe(BasicNewsRecipe): title = u'El Diplo' __author__ = 'Tomas Di Domenico' description = 'Publicacion mensual de Le Monde Diplomatique, edicion Argentina' - langauge = 'es_AR' + language = 'es_AR' needs_subscription = True auto_cleanup = True diff --git a/recipes/equipped.recipe b/recipes/equipped.recipe new file mode 100644 index 0000000000..af74c10523 --- /dev/null +++ b/recipes/equipped.recipe @@ -0,0 +1,29 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = 'teepel , Artur Stachecki ' + +''' +equipped.pl +''' + +from calibre.web.feeds.news import BasicNewsRecipe +class equipped(BasicNewsRecipe): + title = u'Equipped' + __author__ = 'teepel ' + language = 'pl' + description = u'Wiadomości z equipped.pl' + INDEX = 'http://equipped.pl' + extra_css = '.alignleft {float:left; margin-right:5px;}' + oldest_article = 7 + max_articles_per_feed = 100 + remove_empty_feeds = True + simultaneous_downloads = 5 + remove_javascript = True + no_stylesheets = True + use_embedded_content = False + #keep_only_tags = [dict(name='article')] + #remove_tags = [dict(id='disqus_thread')] + #remove_tags_after = [dict(id='disqus_thread')] + + feeds = [(u'Equipped', u'http://feeds.feedburner.com/Equippedpl?format=xml')] diff --git a/recipes/focus_pl.recipe b/recipes/focus_pl.recipe index 66864b8561..bac16ebbd5 100644 --- a/recipes/focus_pl.recipe +++ b/recipes/focus_pl.recipe @@ -1,12 +1,12 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' + import re - from calibre.web.feeds.news import BasicNewsRecipe - class FocusRecipe(BasicNewsRecipe): - __license__ = 'GPL v3' - __author__ = u'intromatyk ' + __author__ = u'Artur Stachecki ' language = 'pl' version = 1 diff --git a/recipes/gazeta-prawna-calibre-v1.recipe b/recipes/gazeta-prawna-calibre-v1.recipe index 293aa05b0d..f7d2c4935b 100644 --- a/recipes/gazeta-prawna-calibre-v1.recipe +++ b/recipes/gazeta-prawna-calibre-v1.recipe @@ -14,13 +14,14 @@ class gazetaprawna(BasicNewsRecipe): title = u'Gazeta Prawna' __author__ = u'Vroo' publisher = u'Infor Biznes' - oldest_article = 7 + oldest_article = 1 max_articles_per_feed = 20 no_stylesheets = True remove_javascript = True description = 'Polski dziennik gospodarczy' language = 'pl' encoding = 'utf-8' + ignore_duplicate_articles = {'title', 'url'} remove_tags_after = [ dict(name='div', attrs={'class':['data-art']}) @@ -30,7 +31,7 @@ class gazetaprawna(BasicNewsRecipe): ] feeds = [ - (u'Wiadomo\u015bci - najwa\u017cniejsze', u'http://www.gazetaprawna.pl/wiadomosci/najwazniejsze/rss.xml'), + (u'Z ostatniej chwili', u'http://rss.gazetaprawna.pl/GazetaPrawna'), (u'Biznes i prawo gospodarcze', u'http://biznes.gazetaprawna.pl/rss.xml'), (u'Prawo i wymiar sprawiedliwo\u015bci', u'http://prawo.gazetaprawna.pl/rss.xml'), (u'Praca i ubezpieczenia', u'http://praca.gazetaprawna.pl/rss.xml'), @@ -51,3 +52,8 @@ class gazetaprawna(BasicNewsRecipe): url = url.replace('prawo.gazetaprawna', 'www.gazetaprawna') url = url.replace('praca.gazetaprawna', 'www.gazetaprawna') return url + + def get_cover_url(self): + soup = self.index_to_soup('http://www.egazety.pl/infor/e-wydanie-dziennik-gazeta-prawna.html') + self.cover_url = soup.find('p', attrs={'class':'covr'}).a['href'] + return getattr(self, 'cover_url', self.cover_url) diff --git a/recipes/icons/dzial_zagraniczny.png b/recipes/icons/dzial_zagraniczny.png new file mode 100644 index 0000000000..1982db0462 Binary files /dev/null and b/recipes/icons/dzial_zagraniczny.png differ diff --git a/recipes/icons/equipped.png b/recipes/icons/equipped.png new file mode 100644 index 0000000000..a532b6f6ac Binary files /dev/null and b/recipes/icons/equipped.png differ diff --git a/recipes/icons/gazeta-prawna-calibre-v1.png b/recipes/icons/gazeta-prawna-calibre-v1.png new file mode 100644 index 0000000000..e5c7ae965c Binary files /dev/null and b/recipes/icons/gazeta-prawna-calibre-v1.png differ diff --git a/recipes/icons/ittechblog.png b/recipes/icons/ittechblog.png new file mode 100644 index 0000000000..825e025510 Binary files /dev/null and b/recipes/icons/ittechblog.png differ diff --git a/recipes/icons/magazyn_consido.png b/recipes/icons/magazyn_consido.png new file mode 100644 index 0000000000..5d54a337de Binary files /dev/null and b/recipes/icons/magazyn_consido.png differ diff --git a/recipes/icons/media2.png b/recipes/icons/media2.png new file mode 100644 index 0000000000..8e98c4df4e Binary files /dev/null and b/recipes/icons/media2.png differ diff --git a/recipes/icons/mobilna.png b/recipes/icons/mobilna.png new file mode 100644 index 0000000000..30db9287be Binary files /dev/null and b/recipes/icons/mobilna.png differ diff --git a/recipes/icons/mojegotowanie.png b/recipes/icons/mojegotowanie.png new file mode 100644 index 0000000000..b9df6dc6d0 Binary files /dev/null and b/recipes/icons/mojegotowanie.png differ diff --git a/recipes/icons/najwyzszy_czas.png b/recipes/icons/najwyzszy_czas.png new file mode 100644 index 0000000000..bc6812ce0b Binary files /dev/null and b/recipes/icons/najwyzszy_czas.png differ diff --git a/recipes/icons/nowiny_rybnik.png b/recipes/icons/nowiny_rybnik.png new file mode 100644 index 0000000000..6f4b11c1f3 Binary files /dev/null and b/recipes/icons/nowiny_rybnik.png differ diff --git a/recipes/icons/osw.png b/recipes/icons/osw.png new file mode 100644 index 0000000000..0693aee762 Binary files /dev/null and b/recipes/icons/osw.png differ diff --git a/recipes/icons/ppe_pl.png b/recipes/icons/ppe_pl.png new file mode 100644 index 0000000000..42c9b42fa5 Binary files /dev/null and b/recipes/icons/ppe_pl.png differ diff --git a/recipes/icons/presseurop.png b/recipes/icons/presseurop.png new file mode 100644 index 0000000000..9967aac1fb Binary files /dev/null and b/recipes/icons/presseurop.png differ diff --git a/recipes/icons/res_publica.png b/recipes/icons/res_publica.png new file mode 100644 index 0000000000..7c21e9d96e Binary files /dev/null and b/recipes/icons/res_publica.png differ diff --git a/recipes/icons/wolne_media.png b/recipes/icons/wolne_media.png new file mode 100644 index 0000000000..78d72713ab Binary files /dev/null and b/recipes/icons/wolne_media.png differ diff --git a/recipes/ittechblog.recipe b/recipes/ittechblog.recipe new file mode 100644 index 0000000000..3fa557d11e --- /dev/null +++ b/recipes/ittechblog.recipe @@ -0,0 +1,26 @@ +__license__ = 'GPL v3' +__copyright__ = 'MrStefan' + +''' +www.ittechblog.pl +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class ittechblog(BasicNewsRecipe): + title = u'IT techblog' + __author__ = 'MrStefan ' + language = 'pl' + description =u'Na naszym blogu technologicznym znajdziesz między innymi: testy sprzętu, najnowsze startupy, technologiczne nowinki, felietony tematyczne.' + extra_css = '.cover > img {display:block;}' + remove_empty_feeds = True + oldest_article = 7 + max_articles_per_feed = 100 + remove_javascript = True + no_stylesheets = True + use_embedded_content = False + + keep_only_tags =[dict(attrs={'class':'box'})] + remove_tags =[dict(name='aside'), dict(attrs={'class':['tags', 'counter', 'twitter-share-button']})] + + feeds = [(u'Artykuły', u'http://feeds.feedburner.com/ITTechBlog?format=xml')] diff --git a/recipes/kp.recipe b/recipes/kp.recipe index 85bf356b4d..3a2bc62eb0 100644 --- a/recipes/kp.recipe +++ b/recipes/kp.recipe @@ -2,8 +2,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class KrytykaPolitycznaRecipe(BasicNewsRecipe): - __license__ = 'GPL v3' - __author__ = u'intromatyk ' + __author__ = u'Artur Stachecki ' language = 'pl' version = 1 diff --git a/recipes/magazyn_consido.recipe b/recipes/magazyn_consido.recipe new file mode 100644 index 0000000000..d24c66d6a4 --- /dev/null +++ b/recipes/magazyn_consido.recipe @@ -0,0 +1,88 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' + +''' +magazynconsido.pl/ +''' + +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.utils.magick import Image + +class magazynconsido(BasicNewsRecipe): + title = u'Magazyn Consido' + __author__ = 'Artur Stachecki ,teepel ' + language = 'pl' + description =u'Portal dla architektów i projektantów' + masthead_url='http://qualitypixels.pl/wp-content/themes/airlock/advance/inc/timthumb.php?src=http://qualitypixels.pl/wp-content/uploads/2012/01/logotyp-magazynconsido-11.png&w=455&zc=1' + oldest_article = 7 + max_articles_per_feed = 100 + remove_javascript=True + no_stylesheets = True + use_embedded_content = False + + keep_only_tags =[] + keep_only_tags.append(dict(name = 'h1')) + keep_only_tags.append(dict(name = 'p')) + keep_only_tags.append(dict(attrs = {'class' : 'navigation'})) + remove_tags =[dict(attrs = {'style' : 'font-size: x-small;' })] + + remove_tags_after =[dict(attrs = {'class' : 'navigation' })] + + extra_css=''' img {max-width:30%; max-height:30%; display: block; margin-left: auto; margin-right: auto;} + h1 {text-align: center;}''' + + def parse_index(self): #(kk) + soup = self.index_to_soup('http://feeds.feedburner.com/magazynconsido?format=xml') + feeds = [] + articles = {} + sections = [] + section = '' + + for item in soup.findAll('item') : + section = self.tag_to_string(item.category) + if not articles.has_key(section) : + sections.append(section) + articles[section] = [] + article_url = self.tag_to_string(item.guid) + article_title = self.tag_to_string(item.title) + article_date = self.tag_to_string(item.pubDate) + article_description = self.tag_to_string(item.description) + articles[section].append( { 'title' : article_title, 'url' : article_url, 'date' : article_date, 'description' : article_description }) + + for section in sections : + if section == 'Video': + feeds.append((section, articles[section])) + feeds.pop() + else: + feeds.append((section, articles[section])) + return feeds + + def append_page(self, soup, appendtag): + apage = soup.find('div', attrs={'class':'wp-pagenavi'}) + if apage is not None: + nexturl = soup.find('a', attrs={'class':'nextpostslink'}) + soup2 = self.index_to_soup(nexturl['href']) + pagetext = soup2.findAll('p') + for tag in pagetext: + pos = len(appendtag.contents) + appendtag.insert(pos, tag) + + while appendtag.find('div', attrs={'class': ['height: 35px;', 'post-meta', 'addthis_toolbox addthis_default_style addthis_', 'post-meta-bottom', 'block_recently_post', 'fbcomments', 'pin-it-button', 'pages', 'navigation']}) is not None: + appendtag.find('div', attrs={'class': ['height: 35px;', 'post-meta', 'addthis_toolbox addthis_default_style addthis_', 'post-meta-bottom', 'block_recently_post', 'fbcomments', 'pin-it-button', 'pages', 'navigation']}).replaceWith('') + + def preprocess_html(self, soup): #(kk) + self.append_page(soup, soup.body) + return self.adeify_images(soup) + + def postprocess_html(self, soup, first): + #process all the images + for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')): + iurl = tag['src'] + img = Image() + img.open(iurl) + if img < 0: + raise RuntimeError('Out of memory') + img.type = "GrayscaleType" + img.save(iurl) + return soup diff --git a/recipes/media2.recipe b/recipes/media2.recipe new file mode 100644 index 0000000000..135740a62e --- /dev/null +++ b/recipes/media2.recipe @@ -0,0 +1,35 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = 'teepel' + +''' +media2.pl +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class media2_pl(BasicNewsRecipe): + title = u'Media2' + __author__ = 'teepel ' + language = 'pl' + description =u'Media2.pl to jeden z najczęściej odwiedzanych serwisów dla profesjonalistów z branży medialnej, telekomunikacyjnej, public relations oraz nowych technologii.' + masthead_url='http://media2.pl/res/logo/www.png' + remove_empty_feeds= True + oldest_article = 1 + max_articles_per_feed = 100 + remove_javascript=True + no_stylesheets=True + simultaneous_downloads = 5 + + extra_css = '''.news-lead{font-weight: bold; }''' + + keep_only_tags =[] + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'news-item tpl-big'})) + + remove_tags =[] + remove_tags.append(dict(name = 'span', attrs = {'class' : 'news-comments'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'item-sidebar'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'news-tags'})) + + feeds = [(u'Media2', u'http://feeds.feedburner.com/media2')] diff --git a/recipes/mobilna.recipe b/recipes/mobilna.recipe new file mode 100644 index 0000000000..68ae011438 --- /dev/null +++ b/recipes/mobilna.recipe @@ -0,0 +1,26 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = 'MrStefan' + +''' +www.mobilna.pl +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class mobilna(BasicNewsRecipe): + title = u'Mobilna.pl' + __author__ = 'MrStefan ' + language = 'pl' + description =u'twoja mobilna strona' + #masthead_url='' + remove_empty_feeds= True + oldest_article = 7 + max_articles_per_feed = 100 + remove_javascript=True + no_stylesheets=True + use_embedded_content = True + #keep_only_tags =[dict(attrs={'class':'Post'})] + + feeds = [(u'Artykuły', u'http://mobilna.pl/feed/')] diff --git a/recipes/mojegotowanie.recipe b/recipes/mojegotowanie.recipe new file mode 100644 index 0000000000..4b0de4a0e1 --- /dev/null +++ b/recipes/mojegotowanie.recipe @@ -0,0 +1,50 @@ +#!usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = 'MrStefan, teepel' + +''' +www.mojegotowanie.pl +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class mojegotowanie(BasicNewsRecipe): + title = u'Moje Gotowanie' + __author__ = 'MrStefan , teepel ' + language = 'pl' + description =u'Gotowanie to Twoja pasja? Uwielbiasz sałatki? Lubisz grillować? Przepisy kulinarne doskonałe na wszystkie okazje znajdziesz na www.mojegotowanie.pl.' + masthead_url='http://www.mojegotowanie.pl/extension/selfstart/design/self/images/top_c2.gif' + cover_url = 'http://www.mojegotowanie.pl/extension/selfstart/design/self/images/mgpl/mojegotowanie.gif' + remove_empty_feeds= True + oldest_article = 7 + max_articles_per_feed = 100 + remove_javascript=True + no_stylesheets=True + + keep_only_tags =[] + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'content'})) + + feeds = [(u'Artykuły', u'http://mojegotowanie.pl/rss/feed/artykuly'), + (u'Przepisy', u'http://mojegotowanie.pl/rss/feed/przepisy')] + + def parse_feeds(self): + feeds = BasicNewsRecipe.parse_feeds(self) + for feed in feeds: + for article in feed.articles[:]: + if 'film' in article.title: + feed.articles.remove(article) + return feeds + + def get_article_url(self, article): + link = article.get('link') + if 'Clayout0Cset0Cprint0' in link: + return link + + def print_version(self, url): + segment = url.split('/') + URLPart = segment[-2] + URLPart = URLPart.replace('0L0Smojegotowanie0Bpl0Clayout0Cset0Cprint0C', '/') + URLPart = URLPart.replace('0I', '_') + URLPart = URLPart.replace('0C', '/') + return 'http://www.mojegotowanie.pl/layout/set/print' + URLPart diff --git a/recipes/najwyzszy_czas.recipe b/recipes/najwyzszy_czas.recipe new file mode 100644 index 0000000000..9c4a82c4ea --- /dev/null +++ b/recipes/najwyzszy_czas.recipe @@ -0,0 +1,27 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = 'teepel ' + +''' +nczas.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class nczas(BasicNewsRecipe): + title = u'Najwy\u017cszy Czas' + __author__ = 'teepel ' + language = 'pl' + description ='Wiadomości z nczas.com' + INDEX='http://nczas.com' + oldest_article = 7 + max_articles_per_feed = 100 + use_embedded_content = True + remove_empty_feeds= True + simultaneous_downloads = 5 + remove_javascript=True + remove_attributes = ['style'] + no_stylesheets=True + + feeds = [(u'Najwyższy Czas', u'http://nczas.com/feed/')] diff --git a/recipes/nowiny_rybnik.recipe b/recipes/nowiny_rybnik.recipe new file mode 100644 index 0000000000..e00a72e09b --- /dev/null +++ b/recipes/nowiny_rybnik.recipe @@ -0,0 +1,31 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' + +from calibre.web.feeds.news import BasicNewsRecipe + +class NowinyRybnik(BasicNewsRecipe): + title = u'Nowiny - Rybnik' + __author__ = 'Artur Stachecki ' + language = 'pl' + description = u'Tygodnik Regionalny NOWINY. Ogłoszenia drobne, wiadomości i wydarzenia z regionu Rybnika i okolic' + oldest_article = 7 + masthead_url = 'http://www.nowiny.rybnik.pl/logo/logo.jpg' + max_articles_per_feed = 100 + simultaneous_downloads = 5 + remove_javascript = True + no_stylesheets = True + + keep_only_tags = [(dict(name='div', attrs={'id': 'drukuj'}))] + + remove_tags = [] + remove_tags.append(dict(name='div', attrs={'id': 'footer'})) + + feeds = [(u'Wszystkie artykuły', u'http://www.nowiny.rybnik.pl/rss,artykuly,dzial,0,miasto,0,ile,25.xml')] + + def preprocess_html(self, soup): + for alink in soup.findAll('a'): + if alink.string is not None: + tstr = alink.string + alink.replaceWith(tstr) + return soup diff --git a/recipes/osw.recipe b/recipes/osw.recipe new file mode 100644 index 0000000000..8022f3e346 --- /dev/null +++ b/recipes/osw.recipe @@ -0,0 +1,41 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = 'teepel ' + +''' +http://www.osw.waw.pl - Osrodek studiow wschodnich +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class OSW_Recipe(BasicNewsRecipe): + + language = 'pl' + title = u'Ośrodek Studiów Wschodnich' + __author__ = 'teepel ' + INDEX='http://www.osw.waw.pl' + description = u'Ośrodek Studiów Wschodnich im. Marka Karpia. Centre for Eastern Studies.' + category = u'News' + oldest_article = 7 + max_articles_per_feed = 100 + cover_url='' + remove_empty_feeds= True + no_stylesheets=True + remove_javascript = True + simultaneous_downloads = 5 + + keep_only_tags =[] + #this line should show title of the article, but it doesnt work + keep_only_tags.append(dict(name = 'h1', attrs = {'class' : 'print-title'})) + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'print-submitted'})) + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'print-content'})) + + remove_tags =[] + remove_tags.append(dict(name = 'table', attrs = {'id' : 'attachments'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'print-submitted'})) + + feeds = [(u'OSW', u'http://www.osw.waw.pl/pl/rss.xml')] + + def print_version(self, url): + return url.replace('http://www.osw.waw.pl/pl/', 'http://www.osw.waw.pl/pl/print/') diff --git a/recipes/ppe_pl.recipe b/recipes/ppe_pl.recipe new file mode 100644 index 0000000000..2edc611ad7 --- /dev/null +++ b/recipes/ppe_pl.recipe @@ -0,0 +1,41 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' + +from calibre.web.feeds.news import BasicNewsRecipe + +class ppeRecipe(BasicNewsRecipe): + __author__ = u'Artur Stachecki ' + language = 'pl' + + title = u'ppe.pl' + category = u'News' + description = u'Portal o konsolach i grach wideo.' + cover_url='' + remove_empty_feeds= True + no_stylesheets=True + oldest_article = 1 + max_articles_per_feed = 100000 + recursions = 0 + no_stylesheets = True + remove_javascript = True + simultaneous_downloads = 2 + + keep_only_tags =[] + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'news-heading'})) + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'tresc-poziom'})) + + remove_tags =[] + remove_tags.append(dict(name = 'div', attrs = {'class' : 'bateria1'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'bateria2'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'bateria3'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'news-photo'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'fbl'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'info'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'links'})) + + remove_tags.append(dict(name = 'div', attrs = {'style' : 'padding: 4px'})) + + feeds = [ + ('Newsy', 'feed://ppe.pl/rss/rss.xml'), + ] diff --git a/recipes/presseurop.recipe b/recipes/presseurop.recipe new file mode 100644 index 0000000000..ea06eb0c32 --- /dev/null +++ b/recipes/presseurop.recipe @@ -0,0 +1,33 @@ +#!/usr/bin/env python + +''' +www.presseurop.eu/pl +''' + +__license__ = 'GPL v3' +__author__ = 'teepel ' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class presseurop(BasicNewsRecipe): + title = u'Presseurop' + description = u'Najlepsze artykuły z prasy europejskiej' + language = 'pl' + oldest_article = 7 + max_articles_per_feed = 100 + auto_cleanup = True + + feeds = [ + (u'Polityka', u'http://www.presseurop.eu/pl/taxonomy/term/1/%2A/feed'), + (u'Społeczeństwo', u'http://www.presseurop.eu/pl/taxonomy/term/2/%2A/feed'), + (u'Gospodarka', u'http://www.presseurop.eu/pl/taxonomy/term/3/%2A/feed'), + (u'Kultura i debaty', u'http://www.presseurop.eu/pl/taxonomy/term/4/%2A/feed'), + (u'UE i Świat', u'http://www.presseurop.eu/pl/taxonomy/term/5/%2A/feed') + ] + + + preprocess_regexps = [ + (re.compile(r'\|.*', re.DOTALL|re.IGNORECASE), + lambda match: ''), +] diff --git a/recipes/res_publica.recipe b/recipes/res_publica.recipe new file mode 100644 index 0000000000..e0d9ebbb56 --- /dev/null +++ b/recipes/res_publica.recipe @@ -0,0 +1,35 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' + +from calibre.web.feeds.news import BasicNewsRecipe + +class ResPublicaNowaRecipe(BasicNewsRecipe): + __license__ = 'GPL v3' + __author__ = u'Artur Stachecki ' + language = 'pl' + version = 1 + + title = u'Res Publica Nowa' + category = u'News' + description = u'Portal kulturalno-społecznego kwartalnika o profilu liberalnym, wydawany przez Fundację Res Publica' + cover_url='' + remove_empty_feeds= True + no_stylesheets=True + oldest_article = 7 + max_articles_per_feed = 100000 + recursions = 0 + no_stylesheets = True + remove_javascript = True + simultaneous_downloads = 5 + + feeds = [ + ('Artykuly', 'feed://publica.pl/feed'), + ] + + def preprocess_html(self, soup): + for alink in soup.findAll('a'): + if alink.string is not None: + tstr = alink.string + alink.replaceWith(tstr) + return soup diff --git a/recipes/sport_pl.recipe b/recipes/sport_pl.recipe index 711fa44126..dd7faccdb0 100644 --- a/recipes/sport_pl.recipe +++ b/recipes/sport_pl.recipe @@ -20,7 +20,7 @@ class sport_pl(BasicNewsRecipe): remove_javascript=True no_stylesheets=True remove_empty_feeds = True - + ignore_duplicate_articles = {'title', 'url'} keep_only_tags =[] keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'article'})) diff --git a/recipes/wirtualnemedia_pl.recipe b/recipes/wirtualnemedia_pl.recipe index 28278c2e24..ed3b3787f8 100644 --- a/recipes/wirtualnemedia_pl.recipe +++ b/recipes/wirtualnemedia_pl.recipe @@ -1,7 +1,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class WirtualneMedia(BasicNewsRecipe): - title = u'wirtualnemedia.pl' + title = u'Wirtualnemedia.pl' oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True diff --git a/recipes/wolne_media.recipe b/recipes/wolne_media.recipe new file mode 100644 index 0000000000..5f8c87a607 --- /dev/null +++ b/recipes/wolne_media.recipe @@ -0,0 +1,26 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = 'teepel ' + +''' +wolnemedia.net +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class wolne_media(BasicNewsRecipe): + title = u'Wolne Media' + __author__ = 'teepel ' + language = 'pl' + description ='Wiadomości z wolnemedia.net' + INDEX='http://wolnemedia.net' + oldest_article = 1 + max_articles_per_feed = 100 + remove_empty_feeds= True + simultaneous_downloads = 5 + remove_javascript=True + no_stylesheets=True + auto_cleanup = True + + feeds = [(u'Wiadomości z kraju', u'http://wolnemedia.net/category/wiadomosci-z-kraju/feed/'),(u'Wiadomości ze świata', u'http://wolnemedia.net/category/wiadomosci-ze-swiata/feed/'),(u'Edukacja', u'http://wolnemedia.net/category/edukacja/feed/'),(u'Ekologia', u'http://wolnemedia.net/category/ekologia/feed/'),(u'Gospodarka', u'http://wolnemedia.net/category/gospodarka/feed/'),(u'Historia', u'http://wolnemedia.net/category/historia/feed/'),(u'Kultura', u'http://wolnemedia.net/category/kultura/feed/'),(u'Kulturoznawstwo', u'http://wolnemedia.net/category/kulturoznawstwo/feed/'),(u'Media', u'http://wolnemedia.net/category/media/feed/'),(u'Nauka', u'http://wolnemedia.net/category/nauka/feed/'),(u'Opowiadania', u'http://wolnemedia.net/category/opowiadania/feed/'),(u'Paranauka i ezoteryka', u'http://wolnemedia.net/category/ezoteryka/feed/'),(u'Polityka', u'http://wolnemedia.net/category/polityka/feed/'),(u'Prawo', u'http://wolnemedia.net/category/prawo/feed/'),(u'Publicystyka', u'http://wolnemedia.net/category/publicystyka/feed/'),(u'Reportaż', u'http://wolnemedia.net/category/reportaz/feed/'),(u'Seks', u'http://wolnemedia.net/category/seks/feed/'),(u'Społeczeństwo', u'http://wolnemedia.net/category/spoleczenstwo/feed/'),(u'Świat komputerów', u'http://wolnemedia.net/category/swiat-komputerow/feed/'),(u'Wierzenia', u'http://wolnemedia.net/category/wierzenia/feed/'),(u'Zdrowie', u'http://wolnemedia.net/category/zdrowie/feed/')] diff --git a/recipes/wprost.recipe b/recipes/wprost.recipe index 90dde251ca..d923f64a3f 100644 --- a/recipes/wprost.recipe +++ b/recipes/wprost.recipe @@ -1,10 +1,9 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2010, matek09, matek09@gmail.com' -__copyright__ = 'Modified 2011, Mariusz Wolek ' -__copyright__ = 'Modified 2012, Artur Stachecki ' - +__copyright__ = '''2010, matek09, matek09@gmail.com + Modified 2011, Mariusz Wolek + Modified 2012, Artur Stachecki ''' from calibre.web.feeds.news import BasicNewsRecipe import re @@ -16,12 +15,12 @@ class Wprost(BasicNewsRecipe): ICO_BLOCKED = 'http://www.wprost.pl/G/layout2/ico_blocked.png' title = u'Wprost' __author__ = 'matek09' - description = 'Weekly magazine' + description = u'Popularny tygodnik ogólnopolski - Wprost. Najlepszy wśród polskich tygodników - opiniotwórczy - społeczno-informacyjny - społeczno-kulturalny.' encoding = 'ISO-8859-2' no_stylesheets = True language = 'pl' remove_javascript = True - recursions = 0 + recursions = 0 remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'})) remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'})) ''' @@ -94,5 +93,3 @@ class Wprost(BasicNewsRecipe): 'description' : '' }) return articles - - diff --git a/recipes/wprost_rss.recipe b/recipes/wprost_rss.recipe index bffbacc474..59c130fc75 100644 --- a/recipes/wprost_rss.recipe +++ b/recipes/wprost_rss.recipe @@ -1,10 +1,9 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2010, matek09, matek09@gmail.com' -__copyright__ = 'Modified 2011, Mariusz Wolek ' -__copyright__ = 'Modified 2012, Artur Stachecki ' +#!/usr/bin/env python +__license__ = 'GPL v3' +__copyright__ = '''2010, matek09, matek09@gmail.com + Modified 2011, Mariusz Wolek + Modified 2012, Artur Stachecki ''' from calibre.web.feeds.news import BasicNewsRecipe import re @@ -12,13 +11,14 @@ import re class Wprost(BasicNewsRecipe): title = u'Wprost (RSS)' __author__ = 'matek09' - description = 'Weekly magazine' + description = u'Portal informacyjny. Najświeższe wiadomości, najciekawsze komentarze i opinie. Blogi najlepszych publicystów.' encoding = 'ISO-8859-2' no_stylesheets = True language = 'pl' remove_javascript = True recursions = 0 use_embedded_content = False + ignore_duplicate_articles = {'title', 'url'} remove_empty_feeds = True remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'})) remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'})) @@ -48,20 +48,20 @@ class Wprost(BasicNewsRecipe): #h2 {font-size: x-large; font-weight: bold} feeds = [(u'Tylko u nas', u'http://www.wprost.pl/rss/rss_wprostextra.php'), - (u'Wydarzenia', u'http://www.wprost.pl/rss/rss.php'), - (u'Komentarze', u'http://www.wprost.pl/rss/rss_komentarze.php'), - (u'Wydarzenia: Kraj', u'http://www.wprost.pl/rss/rss_kraj.php'), - (u'Komentarze: Kraj', u'http://www.wprost.pl/rss/rss_komentarze_kraj.php'), - (u'Wydarzenia: Świat', u'http://www.wprost.pl/rss/rss_swiat.php'), - (u'Komentarze: Świat', u'http://www.wprost.pl/rss/rss_komentarze_swiat.php'), - (u'Wydarzenia: Gospodarka', u'http://www.wprost.pl/rss/rss_gospodarka.php'), - (u'Komentarze: Gospodarka', u'http://www.wprost.pl/rss/rss_komentarze_gospodarka.php'), - (u'Wydarzenia: Życie', u'http://www.wprost.pl/rss/rss_zycie.php'), - (u'Komentarze: Życie', u'http://www.wprost.pl/rss/rss_komentarze_zycie.php'), - (u'Wydarzenia: Sport', u'http://www.wprost.pl/rss/rss_sport.php'), - (u'Komentarze: Sport', u'http://www.wprost.pl/rss/rss_komentarze_sport.php'), - (u'Przegląd prasy', u'http://www.wprost.pl/rss/rss_prasa.php') - ] + (u'Wydarzenia', u'http://www.wprost.pl/rss/rss.php'), + (u'Komentarze', u'http://www.wprost.pl/rss/rss_komentarze.php'), + (u'Wydarzenia: Kraj', u'http://www.wprost.pl/rss/rss_kraj.php'), + (u'Komentarze: Kraj', u'http://www.wprost.pl/rss/rss_komentarze_kraj.php'), + (u'Wydarzenia: Świat', u'http://www.wprost.pl/rss/rss_swiat.php'), + (u'Komentarze: Świat', u'http://www.wprost.pl/rss/rss_komentarze_swiat.php'), + (u'Wydarzenia: Gospodarka', u'http://www.wprost.pl/rss/rss_gospodarka.php'), + (u'Komentarze: Gospodarka', u'http://www.wprost.pl/rss/rss_komentarze_gospodarka.php'), + (u'Wydarzenia: Życie', u'http://www.wprost.pl/rss/rss_zycie.php'), + (u'Komentarze: Życie', u'http://www.wprost.pl/rss/rss_komentarze_zycie.php'), + (u'Wydarzenia: Sport', u'http://www.wprost.pl/rss/rss_sport.php'), + (u'Komentarze: Sport', u'http://www.wprost.pl/rss/rss_komentarze_sport.php'), + (u'Przegląd prasy', u'http://www.wprost.pl/rss/rss_prasa.php') + ] def get_cover_url(self): soup = self.index_to_soup('http://www.wprost.pl/tygodnik')