Various Polish nwes sources by fenuks

2025-07-08 10:44:09 -04:00 · 2012-02-20 11:11:32 +05:30 · 2012-02-20 11:11:32 +05:30 · ac2cc2834c
commit ac2cc2834c
parent 35d15d0eb5
32 changed files with 548 additions and 0 deletions
--- a/recipes/ciekawostki_historyczne.recipe
+++ b/recipes/ciekawostki_historyczne.recipe
@ -0,0 +1,48 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+class Ciekawostki_Historyczne(BasicNewsRecipe):
+    title          = u'Ciekawostki Historyczne'
+    oldest_article = 7
+    __author__        = 'fenuks'
+    description   = u'Serwis popularnonaukowy - odkrycia, kontrowersje, historia, ciekawostki, badania, ciekawostki z przeszłości.'
+    category       = 'history'
+    language       = 'pl'
+    masthead_url= 'http://ciekawostkihistoryczne.pl/wp-content/themes/Wordpress_Magazine/images/logo-ciekawostki-historyczne-male.jpg'
+    cover_url='http://ciekawostkihistoryczne.pl/wp-content/themes/Wordpress_Magazine/images/logo-ciekawostki-historyczne-male.jpg'
+    max_articles_per_feed = 100
+    preprocess_regexps = [(re.compile(ur'Ten artykuł ma kilka stron.*?</fb:like>', re.DOTALL), lambda match: ''), (re.compile(ur'<h2>Zobacz też:</h2>.*?</ol>', re.DOTALL), lambda match: '')]
+    no_stylesheets=True
+    remove_empty_feeds=True
+    keep_only_tags=[dict(name='div', attrs={'class':'post'})]
+    remove_tags=[dict(id='singlepostinfo')]
+    feeds          = [(u'Staro\u017cytno\u015b\u0107', u'http://ciekawostkihistoryczne.pl/tag/starozytnosc/feed/'), (u'\u015aredniowiecze', u'http://ciekawostkihistoryczne.pl/tag/sredniowiecze/feed/'), (u'Nowo\u017cytno\u015b\u0107', u'http://ciekawostkihistoryczne.pl/tag/nowozytnosc/feed/'), (u'XIX wiek', u'http://ciekawostkihistoryczne.pl/tag/xix-wiek/feed/'), (u'1914-1939', u'http://ciekawostkihistoryczne.pl/tag/1914-1939/feed/'), (u'1939-1945', u'http://ciekawostkihistoryczne.pl/tag/1939-1945/feed/'), (u'Powojnie (od 1945)', u'http://ciekawostkihistoryczne.pl/tag/powojnie/feed/'), (u'Recenzje', u'http://ciekawostkihistoryczne.pl/category/recenzje/feed/')]
+
+    def append_page(self, soup, appendtag):
+        tag=soup.find(name='h7')
+        if tag:
+            if tag.br:
+                pass
+            elif tag.nextSibling.name=='p':
+                tag=tag.nextSibling
+                nexturl = tag.findAll('a')
+                for nextpage in nexturl:
+                    tag.extract()
+                    nextpage= nextpage['href']
+                    soup2 = self.index_to_soup(nextpage)
+                    pagetext = soup2.find(name='div', attrs={'class':'post'})
+                    for r in pagetext.findAll('div', attrs={'id':'singlepostinfo'}):
+                        r.extract()
+                    for r in pagetext.findAll('div', attrs={'class':'wp-caption alignright'}):
+                        r.extract()
+                    for r in pagetext.findAll('h1'):
+                        r.extract()
+                    pagetext.find('h6').nextSibling.extract()
+                    pagetext.find('h7').nextSibling.extract()
+                    pos = len(appendtag.contents)
+                    appendtag.insert(pos, pagetext)
+
+    def preprocess_html(self, soup):
+        self.append_page(soup, soup.body)
+        return soup
+   
+        
--- a/recipes/gameplay_pl.recipe
+++ b/recipes/gameplay_pl.recipe
@ -0,0 +1,21 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+class Gameplay_pl(BasicNewsRecipe):
+    title          = u'Gameplay.pl'
+    oldest_article = 7
+    __author__        = 'fenuks'
+    description   = u'gameplay.pl - serwis o naszych zainteresowaniach, grach, filmach, książkach, muzyce, fotografii i konsolach.'
+    category       = 'games, movies, books, music'
+    language       = 'pl'
+    masthead_url= 'http://gameplay.pl/img/gpy_top_logo.png'
+    cover_url= 'http://gameplay.pl/img/gpy_top_logo.png'
+    max_articles_per_feed = 100
+    no_stylesheets= True
+    keep_only_tags=[dict(name='div', attrs={'class':['news_endpage_tit', 'news']})]
+    remove_tags=[dict(name='div', attrs={'class':['galeria', 'noedit center im']})]
+    feeds          = [(u'Wiadomo\u015bci', u'http://gameplay.pl/rss/')]
+
+    def image_url_processor(self, baseurl, url):
+        if 'http' not in url:
+            return 'http://gameplay.pl'+ url[2:]
+        else:
+		    return url
--- a/recipes/icons/ciekawostki_historyczne.png
+++ b/recipes/icons/ciekawostki_historyczne.png
--- a/recipes/icons/gameplay_pl.png
+++ b/recipes/icons/gameplay_pl.png
--- a/recipes/icons/in4_pl.png
+++ b/recipes/icons/in4_pl.png
--- a/recipes/icons/informacje_usa.png
+++ b/recipes/icons/informacje_usa.png
--- a/recipes/icons/kresy_pl.png
+++ b/recipes/icons/kresy_pl.png
--- a/recipes/icons/oclab_pl.png
+++ b/recipes/icons/oclab_pl.png
--- a/recipes/icons/overclock_pl.png
+++ b/recipes/icons/overclock_pl.png
--- a/recipes/icons/palmtop_pl.png
+++ b/recipes/icons/palmtop_pl.png
--- a/recipes/icons/pc_arena.png
+++ b/recipes/icons/pc_arena.png
--- a/recipes/icons/pc_centre_pl.png
+++ b/recipes/icons/pc_centre_pl.png
--- a/recipes/icons/pc_foster.png
+++ b/recipes/icons/pc_foster.png
--- a/recipes/icons/polska_times.png
+++ b/recipes/icons/polska_times.png
--- a/recipes/icons/pure_pc.png
+++ b/recipes/icons/pure_pc.png
--- a/recipes/icons/tanuki.png
+++ b/recipes/icons/tanuki.png
--- a/recipes/icons/tvn24.png
+++ b/recipes/icons/tvn24.png
--- a/recipes/icons/webhosting_pl.png
+++ b/recipes/icons/webhosting_pl.png
--- a/recipes/in4_pl.recipe
+++ b/recipes/in4_pl.recipe
@ -0,0 +1,44 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+class in4(BasicNewsRecipe):
+    title          = u'IN4.pl'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    __author__        = 'fenuks'
+    description   = u'Serwis Informacyjny - Aktualnosci, recenzje'
+    category       = 'IT'
+    language       = 'pl'
+    #cover_url= 'http://www.in4.pl/recenzje/337/in4pl.jpg'
+    no_stylesheets = True
+    remove_empty_feeds = True
+    preprocess_regexps = [(re.compile(ur'<a title="translate into.*?</a>', re.DOTALL), lambda match: '') ]
+    keep_only_tags=[dict(name='div', attrs={'class':'left_alone'})]
+    remove_tags_after=dict(name='img', attrs={'title':'komentarze'})
+    remove_tags=[dict(name='img', attrs={'title':'komentarze'})]
+    feeds          = [(u'Wiadomo\u015bci', u'http://www.in4.pl/rss.php'), (u'Recenzje', u'http://www.in4.pl/rss_recenzje.php'), (u'Mini recenzje', u'http://www.in4.pl/rss_mini.php')]
+
+    def append_page(self, soup, appendtag):
+        a=soup.findAll('a')
+        nexturl=None
+        for i in a:
+            if i.string and 'następna str' in i.string:
+                nexturl='http://www.in4.pl/' + i['href']
+                i.extract()
+        while nexturl:
+                    soup2 = self.index_to_soup(nexturl)
+                    pagetext = soup2.find(id='news')
+                    pos = len(appendtag.contents)
+                    appendtag.insert(pos, pagetext)
+                    nexturl=None
+                    tag=soup2.findAll('a')
+                    for z in tag:
+                        if z.string and u'następna str' in z.string:
+                            nexturl='http://www.in4.pl/' + z['href']
+                            break
+                    
+
+    def preprocess_html(self, soup):
+        self.append_page(soup, soup.body)
+        return soup
+   
+        
--- a/recipes/informacje_usa.recipe
+++ b/recipes/informacje_usa.recipe
@ -0,0 +1,18 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+class Informacje_USA(BasicNewsRecipe):
+    title          = u'Informacje USA'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    __author__        = 'fenuks'
+    description   = u'portal wiadomości amerykańskich'
+    category       = 'news'
+    language       = 'pl'
+    masthead_url= 'http://www.informacjeusa.com/wp-content/add_images/top_logo_5_2010.jpg'
+    cover_url='http://www.informacjeusa.com/wp-content/add_images/top_logo_5_2010.jpg'
+    no_stylesheets = True
+    preprocess_regexps = [(re.compile(ur'<p>Zobacz:.*?</p>', re.DOTALL), lambda match: ''), (re.compile(ur'<p><a href=".*?Zobacz także:.*?</a></p>', re.DOTALL), lambda match: ''), (re.compile(ur'<p><p>Zobacz też:.*?</a></p>', re.DOTALL), lambda match: '')]
+    keep_only_tags=[dict(name='div', attrs={'class':'box box-single'})]
+    remove_tags_after= dict(attrs={'class':'tags'})
+    remove_tags= [dict(attrs={'class':['postmetadata', 'tags', 'banner']}), dict(name='a', attrs={'title':['Drukuj', u'Wyślij']})]
+    feeds          = [(u'Informacje', u'http://www.informacjeusa.com/feed/')]
--- a/recipes/kresy_pl.recipe
+++ b/recipes/kresy_pl.recipe
@ -0,0 +1,14 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+class Kresy(BasicNewsRecipe):
+    title          = u'Kresy'
+    __author__        = 'fenuks'
+    description   = u'portal społeczności kresowej'
+    language       = 'pl'
+    masthead_url= 'http://www.kresy.pl/public/img/logo.png'
+    cover_url= 'http://www.kresy.pl/public/img/logo.png'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    keep_only_tags= [dict(id='artykul')]
+    remove_tags= [dict(attrs={'class':['twitter-share-button', 'likefbborder', 'tagi']})]
+    feeds          = [(u'Wszystkie', u'http://www.kresy.pl/rss')]
--- a/recipes/oclab_pl.recipe
+++ b/recipes/oclab_pl.recipe
@ -0,0 +1,31 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+class OCLab(BasicNewsRecipe):
+    title          = u'OCLab.pl'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    __author__        = 'fenuks'
+    description   = u'Portal OCLab.pl jest miejscem przyjaznym pasjonatom sprzętu komputerowego, w szczególności overclockerom, które będzie służyć im za aktualną bazę wiedzy o podkręcaniu komputera, źródło aktualnych informacji z rynku oraz opinii na temat sprzętu komputerowego.'
+    category       = 'IT'
+    language       = 'pl'
+    cover_url= 'http://www.idealforum.ru/attachment.php?attachmentid=7963&d=1316008118'
+    no_stylesheets = True
+    keep_only_tags=[dict(id='main')]
+    remove_tags_after= dict(attrs={'class':'single-postmetadata'})
+    remove_tags=[dict(attrs={'class':['single-postmetadata', 'pagebar']})]
+    feeds          = [(u'Wpisy', u'http://oclab.pl/feed/')]
+
+
+    def append_page(self, soup, appendtag):
+        tag=soup.find(attrs={'class':'contentjumpddl'})
+        if tag:
+            nexturl=tag.findAll('option')
+            for nextpage in nexturl[1:-1]:
+               soup2 = self.index_to_soup(nextpage['value'])
+               pagetext = soup2.find(attrs={'class':'single-entry'})
+               pos = len(appendtag.contents)
+               appendtag.insert(pos, pagetext)
+            for r in appendtag.findAll(attrs={'class':'post-nav-bottom-list'}):
+                r.extract()
+    def preprocess_html(self, soup):
+        self.append_page(soup, soup.body)
+        return soup
--- a/recipes/overclock_pl.recipe
+++ b/recipes/overclock_pl.recipe
@ -0,0 +1,37 @@
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+class Overclock_pl(BasicNewsRecipe):
+    title          = u'Overclock.pl'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    __author__        = 'fenuks'
+    description   = u'Vortal poświęcony tematyce hardware, kładący największy nacisk na podkręcanie / overclocking (włącznie z extreme) i chłodzenie / cooling (air cooling, water cooling, freon cooling, dry ice, liquid nitrogen).'
+    category       = 'IT'
+    language       = 'pl'
+    masthead_url='http://www.overclock.pl/gfx/logo_m.png'
+    cover_url='http://www.overclock.pl/gfx/logo_m.png'
+    no_stylesheets = True
+    remove_empty_feeds = True
+    preprocess_regexps = [(re.compile(ur'<b>Komentarze do aktualności:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(ur'<h3>Nawigacja</h3>', re.DOTALL), lambda match: '') ]
+    keep_only_tags=[dict(name='div', attrs={'class':'news'}), dict(id='articleContent')]
+    remove_tags=[dict(name='span', attrs={'class':'info'}), dict(attrs={'class':'shareit'})]
+    feeds          = [(u'Aktualno\u015bci', u'http://www.overclock.pl/rss.news.xml'), (u'Testy i recenzje', u'http://www.overclock.pl/rss.articles.xml')]
+
+
+    def append_page(self, soup, appendtag):
+        tag=soup.find(id='navigation')
+        if tag:
+            nexturl=tag.findAll('option')
+            tag.extract()
+            for nextpage in nexturl[2:]:
+               soup2 = self.index_to_soup(nextpage['value'])
+               pagetext = soup2.find(id='content')
+               pos = len(appendtag.contents)
+               appendtag.insert(pos, pagetext)
+            rem=appendtag.find(attrs={'alt':'Pierwsza'})
+            if rem:
+                rem.parent.extract()
+
+    def preprocess_html(self, soup):
+        self.append_page(soup, soup.body)
+        return soup
--- a/recipes/palmtop_pl.recipe
+++ b/recipes/palmtop_pl.recipe
@ -0,0 +1,14 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+class palmtop_pl(BasicNewsRecipe):
+    title          = u'Palmtop.pl'
+    __author__        = 'fenuks'
+    description   = 'wortal technologii mobilnych'
+    category       = 'mobile'
+    language       = 'pl'
+    cover_url='http://cdn.g-point.biz/wp-content/themes/palmtop-new/images/header_palmtop_logo.png'
+    masthead_url='http://cdn.g-point.biz/wp-content/themes/palmtop-new/images/header_palmtop_logo.png'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+
+    feeds          = [(u'Newsy', u'http://palmtop.pl/feed/atom/')]
--- a/recipes/pc_arena.recipe
+++ b/recipes/pc_arena.recipe
@ -0,0 +1,31 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+class PC_Arena(BasicNewsRecipe):
+    title          = u'PCArena'
+    oldest_article = 18300
+    max_articles_per_feed = 100
+    __author__        = 'fenuks'
+    description   = u'Najnowsze informacje z branży IT - testy, recenzje, aktualności, rankingi, wywiady. Twoje źródło informacji o sprzęcie komputerowym.'
+    category       = 'IT'
+    language       = 'pl'
+    masthead_url='http://pcarena.pl/public/design/frontend/images/logo.gif'
+    cover_url= 'http://pcarena.pl/public/design/frontend/images/logo.gif'
+    no_stylesheets = True
+    keep_only_tags=[dict(attrs={'class':['artHeader', 'art']})]
+    remove_tags=[dict(attrs={'class':'pages'})]
+    feeds          = [(u'Newsy', u'http://pcarena.pl/misc/rss/news'), (u'Artyku\u0142y', u'http://pcarena.pl/misc/rss/articles')]
+
+    def append_page(self, soup, appendtag):
+        tag=soup.find(name='div', attrs={'class':'pagNum'})
+        if tag:
+            nexturl=tag.findAll('a')
+            tag.extract()
+            for nextpage in nexturl[1:]:
+               nextpage= 'http://pcarena.pl' + nextpage['href']
+               soup2 = self.index_to_soup(nextpage)
+               pagetext = soup2.find(attrs={'class':'artBody'})
+               pos = len(appendtag.contents)
+               appendtag.insert(pos, pagetext)
+
+    def preprocess_html(self, soup):
+        self.append_page(soup, soup.body)
+        return soup
--- a/recipes/pc_centre_pl.recipe
+++ b/recipes/pc_centre_pl.recipe
@ -0,0 +1,41 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+class PC_Centre(BasicNewsRecipe):
+    title          = u'PC Centre'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    __author__        = 'fenuks'
+    description   = u'Portal komputerowy, a w nim: testy sprzętu komputerowego, recenzje gier i oprogramowania. a także opisy produktów związanych z komputerami.'
+    category       = 'IT'
+    language       = 'pl'
+    masthead_url= 'http://pccentre.pl/views/images/logo.gif'
+    cover_url= 'http://pccentre.pl/views/images/logo.gif'
+    no_stylesheets = True
+    keep_only_tags= [dict(id='content')]
+    remove_tags=[dict(attrs={'class':['ikony r', 'list_of_content', 'dot accordion']}), dict(id='comments')]
+    feeds          = [(u'Publikacje', u'http://pccentre.pl/backend.php?mode=a'), (u'Aktualno\u015bci', u'http://pccentre.pl/backend.php'), (u'Sprz\u0119t komputerowy', u'http://pccentre.pl/backend.php?mode=n&section=2'), (u'Oprogramowanie', u'http://pccentre.pl/backend.php?mode=n&section=3'), (u'Gry komputerowe i konsole', u'http://pccentre.pl/backend.php?mode=n&section=4'), (u'Internet', u'http://pccentre.pl/backend.php?mode=n&section=7'), (u'Bezpiecze\u0144stwo', u'http://pccentre.pl/backend.php?mode=n&section=5'), (u'Multimedia', u'http://pccentre.pl/backend.php?mode=n&section=6'), (u'Biznes', u'http://pccentre.pl/backend.php?mode=n&section=9')]
+
+
+    def append_page(self, soup, appendtag):
+        tag=soup.find(name='div', attrs={'class':'pages'})
+        if tag:
+            nexturl=tag.findAll('a')
+            tag.extract()
+            for nextpage in nexturl[:-1]:
+               nextpage= 'http://pccentre.pl' + nextpage['href']
+               soup2 = self.index_to_soup(nextpage)
+               pagetext = soup2.find(id='content')
+               rem=pagetext.findAll(attrs={'class':['subtitle', 'content_info', 'list_of_content', 'pages', 'social2', 'pcc_acc', 'pcc_acc_na']})
+               for r in rem:
+                   r.extract()
+               rem=pagetext.findAll(id='comments')
+               for r in rem:
+                   r.extract()
+               rem=pagetext.findAll('h1')
+               for r in rem:
+                   r.extract()
+               pos = len(appendtag.contents)
+               appendtag.insert(pos, pagetext)
+
+    def preprocess_html(self, soup):
+        self.append_page(soup, soup.body)
+        return soup
--- a/recipes/pc_foster.recipe
+++ b/recipes/pc_foster.recipe
@ -0,0 +1,35 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+class PC_Foster(BasicNewsRecipe):
+    title          = u'PC Foster'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    __author__        = 'fenuks'
+    description   = u'Vortal technologiczny: testy, recenzje sprzętu komputerowego i telefonów, nowinki hardware, programy i gry dla Windows. Podkręcanie, modding i Overclocking.'
+    category       = 'IT'
+    language       = 'pl'
+    masthead_url='http://pcfoster.pl/public/images/logo.png'
+    cover_url= 'http://pcfoster.pl/public/images/logo.png'
+    no_stylesheets= True
+    remove_empty_feeds= True
+    keep_only_tags= [dict(id=['news_details', 'review_details']), dict(attrs={'class':'pager more_top'})]
+    remove_tags=[dict(name='p', attrs={'class':'right'})]
+    feeds          = [(u'G\u0142\xf3wny', u'http://pcfoster.pl/public/rss/main.xml')]
+
+
+    def append_page(self, soup, appendtag):
+        nexturl= appendtag.find(attrs={'alt':u'Następna strona'})
+        if nexturl:
+            appendtag.find(attrs={'class':'pager more_top'}).extract()
+            while nexturl:
+                nexturl='http://pcfoster.pl' + nexturl.parent['href']
+                soup2 = self.index_to_soup(nexturl)
+                nexturl=soup2.find(attrs={'alt':u'Następna strona'})
+                pagetext = soup2.find(attrs={'class':'content'})
+                pos = len(appendtag.contents)
+                appendtag.insert(pos, pagetext)
+            for r in appendtag.findAll(attrs={'class':'review_content double'}):
+                r.extract()
+
+    def preprocess_html(self, soup):
+        self.append_page(soup, soup.body)
+        return soup
--- a/recipes/polska_times.recipe
+++ b/recipes/polska_times.recipe
@ -0,0 +1,81 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+class Polska_times(BasicNewsRecipe):
+    title          = u'Polska Times'
+    __author__        = 'fenuks'
+    description   = u'Internetowe wydanie dziennika ogólnopolskiego Polska The Times. Najświeższe informacje: wydarzenia w kraju i na świecie, reportaże, poradniki, opinie.'
+    category       = 'newspaper'
+    language       = 'pl'
+    masthead_url = 'http://s.polskatimes.pl/g/logo_naglowek/polska.gif?17'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    remove_emty_feeds= True
+    no_stylesheets = True
+    preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ]
+    keep_only_tags= [dict(id=['tytul-artykulu', 'kontent'])]
+    remove_tags_after= dict(id='material-tagi')
+    remove_tags=[dict(attrs={'id':'reklama_srodtekst_0'}), dict(attrs={'id':'material-tagi'}), dict(name='div', attrs={'class':'zakladki'}), dict(attrs={'title':u'CZYTAJ TAKŻE'}), dict(attrs={'id':'podobne'}), dict(name='a', attrs={'href':'http://www.dzienniklodzki.pl/newsletter'})]
+    feeds          = [(u'Fakty', u'http://polskatimes.feedsportal.com/c/32980/f/533648/index.rss'), (u'Opinie', u'http://www.polskatimes.pl/rss/opinie.xml'), (u'Sport', u'http://polskatimes.feedsportal.com/c/32980/f/533649/index.rss'), (u'Pieni\u0105dze', u'http://polskatimes.feedsportal.com/c/32980/f/533657/index.rss'), (u'Twoje finanse', u'http://www.polskatimes.pl/rss/twojefinanse.xml'), (u'Kultura', u'http://polskatimes.feedsportal.com/c/32980/f/533650/index.rss'), (u'Dodatki', u'http://www.polskatimes.pl/rss/dodatki.xml')]
+
+    def skip_ad_pages(self, soup):
+        if 'Advertisement' in soup.title:
+            nexturl=soup.find('a')['href']
+            return self.index_to_soup(nexturl, raw=True)
+
+    def append_page(self, soup, appendtag):
+        nexturl=soup.find(id='nastepna_strona')
+        while nexturl:
+            soup2= self.index_to_soup(nexturl['href'])
+            nexturl=soup2.find(id='nastepna_strona')
+            pagetext = soup2.find(id='tresc')
+            for dictionary in self.remove_tags:
+                 v=pagetext.findAll(attrs=dictionary['attrs'])
+                 for delete in v:
+                     delete.extract()
+            for b in pagetext.findAll(name='b'):
+                if b.string:
+                    if u'CZYTAJ TEŻ' in b.string or u'Czytaj także' in b.string or u'Czytaj też' in b.string or u'Zobacz także' in b.string:
+                        b.extract()
+            for center in pagetext.findAll(name='center'):
+                if center.h4:
+                    if center.h4.a:
+                        center.extract()
+            pos = len(appendtag.contents)
+            appendtag.insert(pos, pagetext)
+        for paginator in appendtag.findAll(attrs={'class':'stronicowanie'}):
+            paginator.extract()
+
+    def image_article(self, soup, appendtag):
+        nexturl=soup.find('a', attrs={'class':'nastepna'})
+        urls=[]
+        while nexturl:
+            if nexturl not in urls:
+                urls.append(nexturl)
+            else:
+                break
+            soup2= self.index_to_soup('http://www.polskatimes.pl/artykul/' + nexturl['href'])
+            nexturl=soup2.find('a', attrs={'class':'nastepna'})
+            if nexturl in urls:
+                break;
+            pagetext = soup2.find(id='galeria-material')
+            pos = len(appendtag.contents)
+            appendtag.insert(pos, '<br />')
+            pos = len(appendtag.contents)
+            appendtag.insert(pos, pagetext)
+        for rem in appendtag.findAll(attrs={'class':['galeriaNawigator', 'miniaturyPojemnik']}):
+            rem.extract()
+        for paginator in appendtag.findAll(attrs={'class':'stronicowanie'}):
+            paginator.extract()
+
+    def preprocess_html(self, soup):
+        if soup.find('a', attrs={'class':'nastepna'}):
+            self.image_article(soup, soup.body)
+        elif soup.find(id='nastepna_strona'):
+            self.append_page(soup, soup.body)
+        return soup
+
+
+    def get_cover_url(self):
+        soup = self.index_to_soup('http://www.prasa24.pl/gazeta/metropolia-warszawska/')
+        self.cover_url=soup.find(id='pojemnik').img['src']
+        return getattr(self, 'cover_url', self.cover_url)
--- a/recipes/pure_pc.recipe
+++ b/recipes/pure_pc.recipe
@ -0,0 +1,33 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+class PurePC(BasicNewsRecipe):
+    title          = u'PurePC'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    __author__        = 'fenuks'
+    description   = u'Artykuły, aktualności, sprzęt, forum, chłodzenie, modding, urządzenia mobilne - wszystko w jednym miejscu.'
+    category       = 'IT'
+    language       = 'pl'
+    masthead_url= 'http://www.purepc.pl/themes/new/images/purepc.jpg'
+    cover_url= 'http://www.purepc.pl/themes/new/images/purepc.jpg'
+    no_stylesheets = True
+    keep_only_tags= [dict(id='content')]
+    remove_tags_after= dict(attrs={'class':'fivestar-widget'})
+    remove_tags= [dict(id='navigator'), dict(attrs={'class':['box-tools', 'fivestar-widget', 'PageMenuList']})]
+    feeds          = [(u'Wiadomo\u015bci', u'http://www.purepc.pl/node/feed')]
+
+
+    def append_page(self, soup, appendtag):
+        nexturl= appendtag.find(attrs={'class':'pager-next'})
+        if nexturl:
+            while nexturl:
+                soup2 = self.index_to_soup('http://www.purepc.pl'+ nexturl.a['href'])
+                nexturl=soup2.find(attrs={'class':'pager-next'})
+                pagetext = soup2.find(attrs={'class':'article'})
+                pos = len(appendtag.contents)
+                appendtag.insert(pos, pagetext)
+            for r in appendtag.findAll(attrs={'class':['PageMenuList', 'pager', 'fivestar-widget']}):
+                r.extract()
+
+    def preprocess_html(self, soup):
+        self.append_page(soup, soup.body)
+        return soup
--- a/recipes/tanuki.recipe
+++ b/recipes/tanuki.recipe
@ -0,0 +1,37 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+class tanuki(BasicNewsRecipe):
+    title          = u'Tanuki'
+    oldest_article = 7
+    __author__        = 'fenuks'
+    category       = 'anime, manga'
+    language       = 'pl'
+    max_articles_per_feed = 100
+    encoding='utf-8'
+    extra_css= 'ul {list-style: none; padding: 0; margin: 0;} .kadr{float: left;} .dwazdania {float: right;}'
+    preprocess_regexps = [(re.compile(ur'<h3><a class="screen".*?</h3>', re.DOTALL), lambda match: ''), (re.compile(ur'<div><a href="/strony/((manga)|(anime))/[0-9]+?/oceny(\-redakcji){0,1}">Zobacz jak ocenili</a></div>', re.DOTALL), lambda match: '')]
+    remove_empty_feeds= True
+    no_stylesheets = True
+    keep_only_tags=[dict(attrs={'class':['animename', 'storyname', 'nextarrow','sideinfov', 'sidelinfov', 'sideinfo', 'sidelinfo']}), dict(name='table', attrs={'summary':'Technikalia'}), dict(attrs={'class':['chaptername','copycat']}), dict(id='rightcolumn'), dict(attrs={'class':['headn_tt', 'subtable']})]
+    remove_tags=[dict(name='div', attrs={'class':'screen'}), dict(id='randomtoplist'), dict(attrs={'class':'note'})]
+    feeds          = [(u'Anime', u'http://anime.tanuki.pl/rss_anime.xml'), (u'Manga', u'http://manga.tanuki.pl/rss_manga.xml'), (u'Tomiki', u'http://manga.tanuki.pl/rss_mangabooks.xml'), (u'Artyku\u0142y', u'http://czytelnia.tanuki.pl/rss_czytelnia_artykuly.xml'), (u'Opowiadania', u'http://czytelnia.tanuki.pl/rss_czytelnia.xml')]
+
+
+    def append_page(self, soup, appendtag):
+        nexturl= appendtag.find(attrs={'class':'nextarrow'})
+        if nexturl:
+            while nexturl:
+                soup2 = self.index_to_soup('http://czytelnia.tanuki.pl'+ nexturl['href'])
+                nexturl=soup2.find(attrs={'class':'nextarrow'})
+                pagetext = soup2.find(attrs={'class':['chaptername', 'copycat']})
+                pos = len(appendtag.contents)
+                appendtag.insert(pos, pagetext)
+                pagetext = soup2.find(attrs={'class':'copycat'})
+                pos = len(appendtag.contents)
+                appendtag.insert(pos, pagetext)
+            for r in appendtag.findAll(attrs={'class':'nextarrow'}):
+                r.extract()
+
+    def preprocess_html(self, soup):
+        self.append_page(soup, soup.body)
+        return soup
--- a/recipes/tvn24.recipe
+++ b/recipes/tvn24.recipe
@ -0,0 +1,24 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+class tvn24(BasicNewsRecipe):
+    title          = u'TVN24'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    __author__        = 'fenuks'
+    description   = u'Sport, Biznes, Gospodarka, Informacje, Wiadomości Zawsze aktualne wiadomości z Polski i ze świata'
+    category       = 'news'
+    language       = 'pl'
+    masthead_url= 'http://www.tvn24.pl/_d/topmenu/logo2.gif'
+    cover_url= 'http://www.tvn24.pl/_d/topmenu/logo2.gif'
+    extra_css= 'ul {list-style: none; padding: 0; margin: 0;} li {float: left;margin: 0 0.15em;}'
+    remove_empty_feeds = True
+    remove_javascript = True
+    no_stylesheets = True
+    keep_only_tags=[dict(id='tvn24_wiadomosci_detal'), dict(name='h1', attrs={'class':'standardHeader1'}), dict(attrs={'class':['date60m rd5', 'imageBackground fl rd7', 'contentFromCMS']})]
+    remove_tags_after= dict(name='div', attrs={'class':'socialBoxesBottom'})
+    remove_tags=[dict(attrs={'class':['tagi_detal', 'socialBoxesBottom', 'twitterBox', 'commentsInfo', 'textSize', 'obj_ukrytydruk obj_ramka1_r', 'related newsNews align-right', 'box', 'newsUserList', 'watchMaterial text']})]
+    feeds          = [(u'Najnowsze', u'http://www.tvn24.pl/najnowsze.xml'), (u'Polska', u'www.tvn24.pl/polska.xml'), (u'\u015awiat', u'http://www.tvn24.pl/swiat.xml'), (u'Sport', u'http://www.tvn24.pl/sport.xml'), (u'Biznes', u'http://www.tvn24.pl/biznes.xml'), (u'Meteo', u'http://www.tvn24.pl/meteo.xml'), (u'Micha\u0142ki', u'http://www.tvn24.pl/michalki.xml'), (u'Kultura', u'http://www.tvn24.pl/kultura.xml')]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
--- a/recipes/webhosting_pl.recipe
+++ b/recipes/webhosting_pl.recipe
@ -0,0 +1,39 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+class webhosting_pl(BasicNewsRecipe):
+    title          = u'Webhosting.pl'
+    __author__        = 'fenuks'
+    description   = 'Webhosting.pl to pierwszy na polskim rynku serwis poruszający w szerokim aspekcie tematy związane z hostingiem, globalną Siecią i usługami internetowymi. Głównym celem przedsięwzięcia jest dostarczanie przydatnej i bogatej merytorycznie wiedzy osobom, które chcą tworzyć i efektywnie wykorzystywać współczesny Internet.'
+    category       = 'web'
+    language       = 'pl'
+    cover_url='http://webhosting.pl/images/logo.png'
+    masthead_url='http://webhosting.pl/images/logo.png'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    remove_empty_feeds = True
+    #keep_only_tags= [dict(name='div', attrs={'class':'content_article'}), dict(attrs={'class':'paging'})]
+    #remove_tags=[dict(attrs={'class':['tags', 'wykop', 'facebook_button_count', 'article_bottom']})]
+    feeds          = [(u'Newsy', u'http://webhosting.pl/feed/rss/an'), 
+		(u'Artyku\u0142y', u'http://webhosting.pl/feed/rss/aa'), 
+		(u'Software', u'http://webhosting.pl/feed/rss/n/12'), 
+		(u'Internet', u'http://webhosting.pl/feed/rss/n/9'), 
+		(u'Biznes', u'http://webhosting.pl/feed/rss/n/13'), 
+		(u'Bezpiecze\u0144stwo', u'http://webhosting.pl/feed/rss/n/10'), 
+		(u'Blogi', u'http://webhosting.pl/feed/rss/ab'),  
+		(u'Programowanie', u'http://webhosting.pl/feed/rss/n/8'), 
+		(u'Kursy', u'http://webhosting.pl/feed/rss/n/11'), 
+		(u'Tips&Tricks', u'http://webhosting.pl/feed/rss/n/15'), 
+		(u'Imprezy', u'http://webhosting.pl/feed/rss/n/22'), 
+		(u'Wywiady', u'http://webhosting.pl/feed/rss/n/24'), 
+		(u'Porady', u'http://webhosting.pl/feed/rss/n/3027'), 
+		(u'Znalezione w sieci', u'http://webhosting.pl/feed/rss/n/6804'), 
+		(u'Dev area', u'http://webhosting.pl/feed/rss/n/24504'), 
+		(u"Webmaster's blog", u'http://webhosting.pl/feed/rss/n/29195'), 
+		(u'Domeny', u'http://webhosting.pl/feed/rss/n/11513'), 
+		(u'Praktyka', u'http://webhosting.pl/feed/rss/n/2'), 
+		(u'Serwery', u'http://webhosting.pl/feed/rss/n/11514'), 
+		(u'Inne', u'http://webhosting.pl/feed/rss/n/24811'), 
+		(u'Marketing', u'http://webhosting.pl/feed/rss/n/11535')]
+
+    def print_version(self, url):
+        return url.replace('webhosting.pl', 'webhosting.pl/print')