diff --git a/recipes/archeowiesci.recipe b/recipes/archeowiesci.recipe index 57647d7469..c0fc576c9f 100644 --- a/recipes/archeowiesci.recipe +++ b/recipes/archeowiesci.recipe @@ -1,7 +1,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Archeowiesci(BasicNewsRecipe): - title = u'Archeowiesci' + title = u'Archeowieści' __author__ = 'fenuks' category = 'archeology' language = 'pl' diff --git a/recipes/bachormagazyn.recipe b/recipes/bachormagazyn.recipe new file mode 100644 index 0000000000..fb34552beb --- /dev/null +++ b/recipes/bachormagazyn.recipe @@ -0,0 +1,43 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +__license__ = 'GPL v3' +__copyright__ = u'Łukasz Grąbczewski 2013' +__version__ = '1.0' + +''' +bachormagazyn.pl +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class bachormagazyn(BasicNewsRecipe): + __author__ = u'Łukasz Grączewski' + title = u'Bachor Magazyn' + description = u'Alternatywny magazyn o alternatywach rodzicielstwa' + language = 'pl' + publisher = 'Bachor Mag.' + publication_type = 'magazine' + masthead_url = 'http://bachormagazyn.pl/wp-content/uploads/2011/10/bachor_header1.gif' + no_stylesheets = True + remove_javascript = True + use_embedded_content = False + remove_empty_feeds = True + + oldest_article = 32 #monthly +1 + max_articles_per_feed = 100 + + feeds = [ + (u'Bezradnik dla nieudacznych rodziców', u'http://bachormagazyn.pl/feed/') + ] + + keep_only_tags = [] + keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'content'})) + + remove_tags = [] + remove_tags.append(dict(attrs = {'id' : 'nav-above'})) + remove_tags.append(dict(attrs = {'id' : 'nav-below'})) + remove_tags.append(dict(attrs = {'id' : 'comments'})) + remove_tags.append(dict(attrs = {'class' : 'entry-info'})) + remove_tags.append(dict(attrs = {'class' : 'comments-link'})) + remove_tags.append(dict(attrs = {'class' : 'sharedaddy sd-sharing-enabled'})) diff --git a/recipes/frazpc.recipe b/recipes/frazpc.recipe index 2c12a58b55..7d1cb329f8 100644 --- a/recipes/frazpc.recipe +++ b/recipes/frazpc.recipe @@ -18,6 +18,7 @@ class FrazPC(BasicNewsRecipe): max_articles_per_feed = 100 use_embedded_content = False no_stylesheets = True + remove_empty_feeds = True cover_url='http://www.frazpc.pl/images/logo.png' feeds = [ (u'Aktualno\u015bci', u'http://www.frazpc.pl/feed/aktualnosci'), diff --git a/recipes/icons/gildia_pl.png b/recipes/icons/gildia_pl.png new file mode 100644 index 0000000000..47fec1bbe8 Binary files /dev/null and b/recipes/icons/gildia_pl.png differ diff --git a/recipes/icons/nowy_obywatel.png b/recipes/icons/nowy_obywatel.png new file mode 100755 index 0000000000..f41e35365f Binary files /dev/null and b/recipes/icons/nowy_obywatel.png differ diff --git a/recipes/kopalniawiedzy.recipe b/recipes/kopalniawiedzy.recipe index 619185ed34..13703f02ef 100644 --- a/recipes/kopalniawiedzy.recipe +++ b/recipes/kopalniawiedzy.recipe @@ -16,6 +16,7 @@ class KopalniaWiedzy(BasicNewsRecipe): max_articles_per_feed = 100 INDEX = u'http://kopalniawiedzy.pl/' remove_javascript = True + remove_empty_feeds = True no_stylesheets = True remove_tags = [{'name':'p', 'attrs': {'class': 'keywords'}}, {'name':'div', 'attrs': {'class':'sexy-bookmarks sexy-bookmarks-bg-caring'}}, {'name':'div', 'attrs': {'class':'article-time-and-cat'}}, {'name':'p', 'attrs': {'class':'tags'}}] diff --git a/recipes/kurier_galicyjski.recipe b/recipes/kurier_galicyjski.recipe index 01a36bf724..17391d73dc 100644 --- a/recipes/kurier_galicyjski.recipe +++ b/recipes/kurier_galicyjski.recipe @@ -3,7 +3,7 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup as bs class KurierGalicyjski(BasicNewsRecipe): title = u'Kurier Galicyjski' __author__ = 'fenuks' - #description = u'' + description = u'Kurier Galicyjski - największa gazeta dla Polaków na Ukrainie. Bieżące wydarzenia z życia polskiej mniejszości, historia, kultura, polityka, reportaże.' category = 'news' language = 'pl' cover_url = 'http://www.duszki.pl/Kurier_galicyjski_bis2_small.gif' diff --git a/recipes/kurier_szczecinski.recipe b/recipes/kurier_szczecinski.recipe index b19e5ecd79..a4f9b115e8 100644 --- a/recipes/kurier_szczecinski.recipe +++ b/recipes/kurier_szczecinski.recipe @@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class KurierSzczecinski(BasicNewsRecipe): title = u'Kurier Szczeci\u0144ski' __author__ = 'fenuks' - description = u'24Kurier jest portalem Kuriera Szczecińskiego. Zawiera aktualności ze Szczecina oraz wiadomości regionalne z województwa zachodniopomorskiego. ' + description = u'24Kurier jest portalem Kuriera Szczecińskiego. Zawiera aktualności ze Szczecina oraz wiadomości regionalne z województwa zachodniopomorskiego.' category = 'newspaper' #publication_type = '' language = 'pl' diff --git a/recipes/niebezpiecznik.recipe b/recipes/niebezpiecznik.recipe index a582a85aef..2228ef5122 100644 --- a/recipes/niebezpiecznik.recipe +++ b/recipes/niebezpiecznik.recipe @@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Niebezpiecznik_pl(BasicNewsRecipe): title = u'Niebezpiecznik.pl' __author__ = 'fenuks' - description = 'Niebezpiecznik.pl' + description = u'Niebezpiecznik.pl – o bezpieczeństwie i nie...' category = 'hacking, IT' language = 'pl' oldest_article = 8 diff --git a/recipes/nowy_obywatel.recipe b/recipes/nowy_obywatel.recipe new file mode 100644 index 0000000000..66bdfed390 --- /dev/null +++ b/recipes/nowy_obywatel.recipe @@ -0,0 +1,42 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +__license__ = 'GPL v3' +__copyright__ = u'Łukasz Grąbczewski 2013' +__version__ = '1.0' + +''' +nowyobywatel.pl +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class nowyobywatel(BasicNewsRecipe): + __author__ = u'Łukasz Grączewski' + title = u'Nowy Obywatel' + description = u'Pismo na rzecz sprawiedliwości społecznej' + language = 'pl' + publisher = 'Stowarzyszenie „Obywatele Obywatelom”' + publication_type = 'magazine' + masthead_url = 'http://lewicowo.pl/wp-content/uploads/2011/11/nowy-obywatel1.png' + no_stylesheets = True + remove_javascript = True + use_embedded_content = True + remove_empty_feeds = True + + oldest_article = 32 #monthly +1 + max_articles_per_feed = 100 + simultaneous_downloads = 20 + + feeds = [ + (u'Aktualności', u'http://nowyobywatel.pl/category/aktualnosci/feed/'), + (u'Opinie', u'http://nowyobywatel.pl/category/opinie/feed/'), + (u'Nasze rozmowy', u'http://nowyobywatel.pl/category/nasze-rozmowy/feed/'), + (u'Inspiracje', u'http://nowyobywatel.pl/category/inspiracje/feed/') + ] + + + remove_tags = [] + remove_tags.append(dict(attrs = {'class' : 'post-date'})) + remove_tags.append(dict(attrs = {'class' : 'printfriendly'})) + remove_tags.append(dict(attrs = {'class' : 'social4i'})) diff --git a/recipes/swiat_obrazu.recipe b/recipes/swiat_obrazu.recipe index 91748fb139..f088ff5e80 100644 --- a/recipes/swiat_obrazu.recipe +++ b/recipes/swiat_obrazu.recipe @@ -1,7 +1,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Swiat_Obrazu(BasicNewsRecipe): - title = u'Swiat Obrazu' + title = u'Świat Obrazu' __author__ = 'fenuks' description = u'Internetowy Dziennik o Fotografii i Wideo www.SwiatObrazu.pl to źródło informacji o technice fotografii i wideo, o sprzęcie najbardziej znanych i uznanych firm: Canon, Nikon, Sony, Hasselblad i wielu innych. Znajdziecie tu programy do obróbki zdjęć, forum foto i forum wideo i galerie zdjęć. Codziennie najświeższe informacje: aktualności, testy, poradniki, wywiady, felietony. Swiatobrazu.pl stale organizuje konkursy oraz warsztaty fotograficzne i wideo.' category = 'photography' diff --git a/recipes/swiatkindle.recipe b/recipes/swiatkindle.recipe index d8e0e3f403..c589d1b6e1 100644 --- a/recipes/swiatkindle.recipe +++ b/recipes/swiatkindle.recipe @@ -19,6 +19,7 @@ class swiatczytnikow(BasicNewsRecipe): feeds = [(u'Świat Czytników - wpisy', u'http://swiatczytnikow.pl/feed')] - remove_tags = [dict(name = 'ul', attrs = {'class' : 'similar-posts'})] + remove_tags = [dict(name = 'ul', attrs = {'class' : 'similar-posts'}), + dict(name = 'div', attrs = {'class' : 'feedflare'})] preprocess_regexps = [(re.compile(u'

Czytaj dalej:

'), lambda match: '')] diff --git a/recipes/telepolis_pl.recipe b/recipes/telepolis_pl.recipe index 9ea878bc77..1aa7734c2c 100644 --- a/recipes/telepolis_pl.recipe +++ b/recipes/telepolis_pl.recipe @@ -16,11 +16,31 @@ class telepolis(BasicNewsRecipe): use_embedded_content = False feeds = [ - (u'Wiadomości', u'http://www.telepolis.pl/rss/news.php')#, - #(u'Artykuły', u'http://www.telepolis.pl/rss/artykuly.php') + (u'Wiadomości', u'http://www.telepolis.pl/rss,2,5,0.html') ] keep_only_tags = [ dict(name='div', attrs={'class':'flol w510'}), + dict(name='div', attrs={'class':'main_tresc'}), dict(name='div', attrs={'class':'main_tresc_news'}) ] + + def append_page(self, soup, appendtag): + chpage= appendtag.find(attrs={'class':'str'}) + if chpage: + for page in chpage.findAll('a'): + if page.renderContents() == 'Następna ›': + break + soup2 = self.index_to_soup(page['href']) + pagetext = soup2.find(attrs={'class':'main_tresc'}) + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + for r in appendtag.findAll(attrs={'class':'str'}): + r.extract() + + def preprocess_html(self, soup): + self.append_page(soup, soup.body) + for image in soup.findAll('img'): + if 'm.jpg' in image['src']: + image['src'] = image['src'].replace('m.jpg', '.jpg') + return soup diff --git a/recipes/zycie_warszawy.recipe b/recipes/zycie_warszawy.recipe index 536529f144..2878d0c99d 100644 --- a/recipes/zycie_warszawy.recipe +++ b/recipes/zycie_warszawy.recipe @@ -3,12 +3,13 @@ __license__ = 'GPL v3' __copyright__ = u'Łukasz Grąbczewski 2012-2013' -__version__ = '1.1' +__version__ = '1.2' ''' zw.com.pl ''' +import re from calibre.web.feeds.news import BasicNewsRecipe class zyciewarszawy(BasicNewsRecipe): @@ -17,12 +18,14 @@ class zyciewarszawy(BasicNewsRecipe): description = u'Wiadomości z Warszawy' language = 'pl' publisher = 'Presspublica' - publication_type = 'newspapper' + publication_type = 'newspaper' masthead_url = 'http://www.zw.com.pl/static/img/logo_zw.gif' no_stylesheets = True remove_javascript = True + use_embedded_content = False + remove_empty_feeds = True - oldest_article = 1 #daily news only + oldest_article = 1.5 #last 36h max_articles_per_feed = 100 feeds = [(u'Najnowsze', u'http://www.zw.com.pl/rss/1.html')] @@ -31,15 +34,15 @@ class zyciewarszawy(BasicNewsRecipe): keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'storyp'})) remove_tags = [] - remove_tags.append(dict(name = 'div', attrs = {'class' : 'authordate'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'author'})) - '''remove_tags.append(dict(name = 'div', attrs = {'class' : 'seealso'}))''' - remove_tags.append(dict(name = 'div', attrs = {'class' : 'more'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'clr'})) - remove_tags.append(dict(name = 'div', attrs = {'id' : 'adk_0'})) - remove_tags.append(dict(name = 'div', attrs = {'id' : 'adsense_0'})) - remove_tags.append(dict(name = 'div', attrs = {'id' : 'share_bottom'})) - remove_tags.append(dict(name = 'div', attrs = {'id' : 'copyright_law'})) + remove_tags.append(dict(attrs = {'class' : 'author'})) + remove_tags.append(dict(attrs = {'class' : 'more'})) + remove_tags.append(dict(attrs = {'class' : 'clr'})) + remove_tags.append(dict(attrs = {'id' : 'adk_0'})) + remove_tags.append(dict(attrs = {'id' : 'adsense_0'})) + remove_tags.append(dict(attrs = {'id' : 'share_bottom'})) + remove_tags.append(dict(attrs = {'id' : 'copyright_law'})) + + preprocess_regexps = [(re.compile(r',3.jpg'), lambda m: ',2.jpg')] def print_version(self, url): url += "?print=tak"