Various new and updated Polish news sources

2025-07-09 03:04:10 -04:00 · 2013-05-29 08:02:44 +05:30 · 2013-05-29 08:02:44 +05:30 · 88b7fd9e90
commit 88b7fd9e90
parent 826d5ab725
20 changed files with 590 additions and 294 deletions
--- a/recipes/alejakomiksu_com.recipe
+++ b/recipes/alejakomiksu_com.recipe
@ -0,0 +1,37 @@
+__license__ = 'GPL v3'
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AlejaKomiksu(BasicNewsRecipe):
+    title          = u'Aleja Komiksu'
+    __author__        = 'fenuks'
+    description   = u'Serwis poświęcony komiksom. Najnowsze wieści, recenzje, artykuły, wywiady, galerie, komiksy online, konkursy, linki, baza komiksów online.'
+    category       = 'comics'
+    #publication_type = ''
+    language       = 'pl'
+    #encoding = ''
+    extra_css = 'ul {list-style-type: none;} .gfx_news {float: right;}'
+    preprocess_regexps = [(re.compile(ur'((<li class="no_img_b">(Do poczytania)|(Nowości):</li>)|(<p class="head2">Komentarze</p>)).*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>')]
+    cover_url = 'http://www.alejakomiksu.com/gfx/build/logo.png'
+    masthead_url = 'http://www.alejakomiksu.com/gfx/build/logo.png'
+    use_embedded_content = False
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    remove_empty_feeds = True
+    remove_javascript = True
+    remove_attributes = ['style', 'font']
+    ignore_duplicate_articles = {'title', 'url'}
+
+    keep_only_tags = [dict(attrs={'class':'cont_tresc'})]
+    #remove_tags = [dict()]
+    #remove_tags_before = dict()
+
+    feeds = [(u'Wiadomości', 'http://www.alejakomiksu.com/rss.php5')]
+
+    def skip_ad_pages(self, soup):
+        tag = soup.find(attrs={'class':'rodzaj'})
+        if tag and tag.a.string.lower().strip() == 'recenzje':
+            link = soup.find(text=re.compile('recenzuje'))
+            if link:
+                return self.index_to_soup(link.parent['href'], raw=True)
--- a/recipes/fdb_pl.recipe
+++ b/recipes/fdb_pl.recipe
@ -0,0 +1,49 @@
+__license__ = 'GPL v3'
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class FDBPl(BasicNewsRecipe):
+    title          = u'Fdb.pl'
+    __author__        = 'fenuks'
+    description   = u'Wiadomości ze świata filmu, baza danych filmowych, recenzje, zwiastuny, boxoffice.'
+    category       = 'film'
+    #publication_type = ''
+    language       = 'pl'
+    #encoding = ''
+    extra_css = '.options-left > li {display: inline;} em {display: block;}'
+    cover_url = 'http://fdb.pl/assets/fdb2/logo.png'
+    #masthead_url = ''
+    use_embedded_content = False
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    remove_empty_feeds = True
+    remove_javascript = True
+    remove_attributes = ['style', 'font']
+    ignore_duplicate_articles = {'title', 'url'}
+
+    keep_only_tags = [dict(attrs={'class':'news-item news-first'})]
+    remove_tags = [dict(attrs={'class':['dig dig-first', 'ads clearfix', 'comments']})]
+    #remove_tags_after = dict()
+    #remove_tags_before = dict()
+    feeds = []
+
+    def parse_index(self):
+         feeds = []
+         feeds.append((u'Wiadomości', self.get_articles('http://fdb.pl/wiadomosci?page={0}', 2)))
+         return feeds
+
+    def get_articles(self, url, pages=1):
+        articles = []
+        for nr in range(1, pages+1):
+            soup = self.index_to_soup(url.format(nr))
+            for tag in soup.findAll(attrs={'class':'news-item clearfix'}):
+                node = tag.find('h2')
+                title = node.a.string
+                url = 'http://fdb.pl' + node.a['href']
+                date = ''
+                articles.append({'title' : title,
+                       'url'   : url,
+                       'date'  : date,
+                       'description' : ''
+                        })
+        return articles
--- a/recipes/gazeta_pl_krakow.recipe
+++ b/recipes/gazeta_pl_krakow.recipe
@ -8,94 +8,87 @@ krakow.gazeta.pl
 '''

 from calibre.web.feeds.news import BasicNewsRecipe
+import re
+from calibre.ebooks.BeautifulSoup import Comment

 class gw_krakow(BasicNewsRecipe):
    title          = u'Gazeta Wyborcza Kraków'
    __author__ = 'teepel <teepel44@gmail.com> based on GW from fenuks'
    language       = 'pl'
-    description =u'Wiadomości z Krakowa na portalu Gazeta.pl.'
-    category='newspaper'
+    description = u'Wiadomości z Krakowa na portalu Gazeta.pl.'
+    category = 'newspaper'
    publication_type = 'newspaper'
-    masthead_url='http://bi.gazeta.pl/im/5/8528/m8528105.gif'
-    INDEX='http://krakow.gazeta.pl/'
-    remove_empty_feeds= True
-    oldest_article = 1
+    # encoding = 'iso-8859-2'
+    masthead_url = 'http://bi.gazeta.pl/im/5/8528/m8528105.gif'
+    INDEX = 'http://krakow.gazeta.pl'
+    cover_url = 'http://bi.gazeta.pl/i/hp/hp2009/logo.gif'
+    remove_empty_feeds = True
+    oldest_article = 3
    max_articles_per_feed = 100
-    remove_javascript=True
-    no_stylesheets=True
+    remove_javascript = True
+    no_stylesheets = True
+    use_embedded_content = False
+    ignore_duplicate_articles = {'title', 'url'}

-    keep_only_tags =[]
-    keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article'}))
-
-    remove_tags =[]
-    remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_likes'}))
-    remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_tools'}))
-    remove_tags.append(dict(name = 'div', attrs = {'id' : 'rel'}))
-    remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_share'}))
-    remove_tags.append(dict(name = 'u1', attrs = {'id' : 'articleToolbar'}))
-    remove_tags.append(dict(name = 'li', attrs = {'class' : 'atComments'}))
-    remove_tags.append(dict(name = 'li', attrs = {'class' : 'atLicense'}))
-    remove_tags.append(dict(name = 'div', attrs = {'id' : 'banP4'}))
-    remove_tags.append(dict(name = 'div', attrs = {'id' : 'article_toolbar'}))
-    remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_tags'}))
-    remove_tags.append(dict(name = 'p', attrs = {'class' : 'txt_upl'}))
-    remove_tags.append(dict(name = 'div', attrs = {'class' : 'gazeta_article_related_new'}))
-    remove_tags.append(dict(name = 'div', attrs = {'class' : 'gazetaVideoPlayer'}))
-    remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_miniatures'}))
-    remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_buttons'}))
-
-    remove_tags_after = [dict(name = 'div', attrs = {'id' : 'gazeta_article_share'})]
+    # rules for gazeta.pl
+    preprocess_regexps = [(re.compile(u'<b>Czytaj więcej</b>.*', re.DOTALL), lambda m: '</body>')]
+    keep_only_tags = [dict(id='gazeta_article')]
+    remove_tags = [dict(id=['gazeta_article_tools', 'gazeta_article_miniatures']), dict(attrs={'class':['mod mod_sociallist', 'c0', 'fb', 'voteNeedLogin']})]
+    remove_tags_after = dict(id='gazeta_article_body')

    feeds          = [(u'Wiadomości', u'http://rss.gazeta.pl/pub/rss/krakow.xml')]

-    def skip_ad_pages(self, soup):
-        tag=soup.find(name='a', attrs={'class':'btn'})
-        if tag:
-            new_soup=self.index_to_soup(tag['href'], raw=True)
-            return new_soup
-
-
-    def append_page(self, soup, appendtag):
-        loop=False
-        tag = soup.find('div', attrs={'id':'Str'})
-        if appendtag.find('div', attrs={'id':'Str'}):
-            nexturl=tag.findAll('a')
-            appendtag.find('div', attrs={'id':'Str'}).extract()
-            loop=True
-            if appendtag.find(id='source'):
-                appendtag.find(id='source').extract()
-        while loop:
-            loop=False
-            for link in nexturl:
-                if u'następne' in link.string:
-                    url= self.INDEX + link['href']
-                    soup2 = self.index_to_soup(url)
-                    pagetext = soup2.find(id='artykul')
-                    pos = len(appendtag.contents)
-                    appendtag.insert(pos, pagetext)
-                    tag = soup2.find('div', attrs={'id':'Str'})
-                    nexturl=tag.findAll('a')
-                    loop=True
-
-    def gallery_article(self, appendtag):
-        tag=appendtag.find(id='container_gal')
-        if tag:
-            nexturl=appendtag.find(id='gal_btn_next').a['href']
-            appendtag.find(id='gal_navi').extract()
-        while nexturl:
-            soup2=self.index_to_soup(nexturl)
-            pagetext=soup2.find(id='container_gal')
-            nexturl=pagetext.find(id='gal_btn_next')
-            if nexturl:
-                nexturl=nexturl.a['href']
-                pos = len(appendtag.contents)
-                appendtag.insert(pos, pagetext)
-            rem=appendtag.find(id='gal_navi')
-            if rem:
-                rem.extract()
+    def print_version(self, url):
+        if 'feedsportal.com' in url:
+            s = url.rpartition('gazeta0Bpl')
+            u = s[2]
+            if not s[0]:
+                u = url.rpartition('wyborcza0Bpl')[2]
+            u = u.replace('/l/', '/')
+            u = u.replace('/ia1.htm', '')
+            u = u.replace('/story01.htm', '')
+            u = u.replace('0C', '/')
+            u = u.replace('A', '')
+            u = u.replace('0E', '-')
+            u = u.replace('0H', ',')
+            u = u.replace('0I', '_')
+            u = u.replace('0B', '.')
+            u = self.INDEX + u
+            return u
+        else:
+            return url

    def preprocess_html(self, soup):
-        self.append_page(soup, soup.body)
-        if soup.find(id='container_gal'):
-            self.gallery_article(soup.body)
+        tag = soup.find(id='Str')
+        if soup.find(attrs={'class': 'piano_btn_1'}):
+            return None
+        elif tag and tag.findAll('a'):
+            self.append_page(soup, soup.body)
        return soup
+
+    def append_page(self, soup, appendtag):
+        tag = soup.find('div', attrs={'id': 'Str'})
+        try:
+            baseurl = soup.find(name='meta', attrs={'property':'og:url'})['content']
+        except:
+            return 1
+        link = tag.findAll('a')[-1]
+        while link:
+            soup2 = self.index_to_soup(baseurl + link['href'])
+            link = soup2.find('div', attrs={'id': 'Str'}).findAll('a')[-1]
+            if not u'następne' in link.string:
+                link = ''
+            pagetext = soup2.find(id='artykul')
+            comments = pagetext.findAll(text=lambda text:isinstance(text, Comment))
+            for comment in comments:
+                comment.extract()
+            pos = len(appendtag.contents)
+            appendtag.insert(pos, pagetext)
+        tag.extract()
+
+    def image_url_processor(self, baseurl, url):
+        if url.startswith(' '):
+            return url.strip()
+        else:
+            return url
+
--- a/recipes/gazeta_pl_szczecin.recipe
+++ b/recipes/gazeta_pl_szczecin.recipe
@ -1,8 +1,8 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai

 import re
-import string
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Comment

 class GazetaPlSzczecin(BasicNewsRecipe):
    title          = u'Gazeta Wyborcza Szczecin'
@ -12,24 +12,74 @@ class GazetaPlSzczecin(BasicNewsRecipe):
    language       = 'pl'
    publisher      = 'Agora S.A.'
    category       = 'news, szczecin'
-    oldest_article = 2
+    INDEX = 'http://szczecin.gazeta.pl'
+    cover_url = 'http://bi.gazeta.pl/i/hp/hp2009/logo.gif'
+    remove_empty_feeds = True
+    oldest_article = 3
    max_articles_per_feed = 100
-    auto_cleanup   = True
-    remove_tags    = [ { "name" : "a", "attrs" : { "href" : "http://szczecin.gazeta.pl/szczecin/www.gazeta.pl" }}]
-    cover_url      = "http://bi.gazeta.pl/i/hp/hp2009/logo.gif"
+    remove_javascript = True
+    no_stylesheets = True
+    use_embedded_content = False
+    ignore_duplicate_articles = {'title', 'url'}
+
+    # rules for gazeta.pl
+    preprocess_regexps = [(re.compile(u'<b>Czytaj więcej</b>.*', re.DOTALL), lambda m: '</body>')]
+    keep_only_tags = [dict(id='gazeta_article')]
+    remove_tags = [dict(id=['gazeta_article_tools', 'gazeta_article_miniatures']), dict(attrs={'class':['mod mod_sociallist', 'c0', 'fb', 'voteNeedLogin']})]
+    remove_tags_after = dict(id='gazeta_article_body')
    feeds          = [(u'Wszystkie', u'http://rss.feedsportal.com/c/32739/f/530434/index.rss')]

-    def get_article_url(self, article):
-        s = re.search("""/0L(szczecin.*)/story01.htm""", article.link)
-        s = s.group(1)
-        replacements = { "0B" : ".", "0C" : "/", "0H" : ",", "0I"  : "_"}
-        for (a, b) in replacements.iteritems():
-            s = string.replace(s, a, b)
-        s = string.replace(s, "0A", "0")
-        return "http://"+s
-
    def print_version(self, url):
-        s = re.search("""/(\d*),(\d*),(\d*),.*\.html""", url)
-        no1 = s.group(2)
-        no2 = s.group(3)
-        return """http://szczecin.gazeta.pl/szczecin/2029020,%s,%s.html""" % (no1, no2)
+        if 'feedsportal.com' in url:
+            s = url.rpartition('gazeta0Bpl')
+            u = s[2]
+            if not s[0]:
+                u = url.rpartition('wyborcza0Bpl')[2]
+            u = u.replace('/l/', '/')
+            u = u.replace('/ia1.htm', '')
+            u = u.replace('/story01.htm', '')
+            u = u.replace('0C', '/')
+            u = u.replace('A', '')
+            u = u.replace('0E', '-')
+            u = u.replace('0H', ',')
+            u = u.replace('0I', '_')
+            u = u.replace('0B', '.')
+            u = self.INDEX + u
+            return u
+        else:
+            return url
+
+    def preprocess_html(self, soup):
+        tag = soup.find(id='Str')
+        if soup.find(attrs={'class': 'piano_btn_1'}):
+            return None
+        elif tag and tag.findAll('a'):
+            self.append_page(soup, soup.body)
+        return soup
+
+    def append_page(self, soup, appendtag):
+        tag = soup.find('div', attrs={'id': 'Str'})
+        try:
+            baseurl = soup.find(name='meta', attrs={'property':'og:url'})['content']
+        except:
+            return 1
+        link = tag.findAll('a')[-1]
+        while link:
+            soup2 = self.index_to_soup(baseurl + link['href'])
+            link = soup2.find('div', attrs={'id': 'Str'}).findAll('a')[-1]
+            if not u'następne' in link.string:
+                link = ''
+            pagetext = soup2.find(id='artykul')
+            comments = pagetext.findAll(text=lambda text:isinstance(text, Comment))
+            for comment in comments:
+                comment.extract()
+            pos = len(appendtag.contents)
+            appendtag.insert(pos, pagetext)
+        tag.extract()
+
+    def image_url_processor(self, baseurl, url):
+        if url.startswith(' '):
+            return url.strip()
+        else:
+            return url
+
--- a/recipes/gazeta_pl_warszawa.recipe
+++ b/recipes/gazeta_pl_warszawa.recipe
@ -7,7 +7,9 @@ __author__ = 'teepel <teepel44@gmail.com> based on GW from fenuks'
 warszawa.gazeta.pl
 '''

+import re
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Comment

 class gw_wawa(BasicNewsRecipe):
    title          = u'Gazeta Wyborcza Warszawa'
@ -17,82 +19,75 @@ class gw_wawa(BasicNewsRecipe):
    category='newspaper'
    publication_type = 'newspaper'
    masthead_url='http://bi.gazeta.pl/im/3/4089/m4089863.gif'
-    INDEX='http://warszawa.gazeta.pl/'
-    remove_empty_feeds= True
-    oldest_article = 1
+    INDEX = 'http://warszawa.gazeta.pl'
+    cover_url = 'http://bi.gazeta.pl/i/hp/hp2009/logo.gif'
+    remove_empty_feeds = True
+    oldest_article = 3
    max_articles_per_feed = 100
-    remove_javascript=True
-    no_stylesheets=True
+    remove_javascript = True
+    no_stylesheets = True
+    use_embedded_content = False
+    ignore_duplicate_articles = {'title', 'url'}

-    keep_only_tags =[]
-    keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article'}))
-
-    remove_tags =[]
-    remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_likes'}))
-    remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_tools'}))
-    remove_tags.append(dict(name = 'div', attrs = {'id' : 'rel'}))
-    remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_share'}))
-    remove_tags.append(dict(name = 'u1', attrs = {'id' : 'articleToolbar'}))
-    remove_tags.append(dict(name = 'li', attrs = {'class' : 'atComments'}))
-    remove_tags.append(dict(name = 'li', attrs = {'class' : 'atLicense'}))
-    remove_tags.append(dict(name = 'div', attrs = {'id' : 'banP4'}))
-    remove_tags.append(dict(name = 'div', attrs = {'id' : 'article_toolbar'}))
-    remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_tags'}))
-    remove_tags.append(dict(name = 'p', attrs = {'class' : 'txt_upl'}))
-    remove_tags.append(dict(name = 'div', attrs = {'class' : 'gazeta_article_related_new'}))
-    remove_tags.append(dict(name = 'div', attrs = {'class' : 'gazetaVideoPlayer'}))
-    remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_miniatures'}))
+    # rules for gazeta.pl
+    preprocess_regexps = [(re.compile(u'<b>Czytaj więcej</b>.*', re.DOTALL), lambda m: '</body>')]
+    keep_only_tags = [dict(id='gazeta_article')]
+    remove_tags = [dict(id=['gazeta_article_tools', 'gazeta_article_miniatures']), dict(attrs={'class':['mod mod_sociallist', 'c0', 'fb', 'voteNeedLogin']})]
+    remove_tags_after = dict(id='gazeta_article_body')

    feeds          = [(u'Wiadomości', u'http://rss.gazeta.pl/pub/rss/warszawa.xml')]

-    def skip_ad_pages(self, soup):
-        tag=soup.find(name='a', attrs={'class':'btn'})
-        if tag:
-            new_soup=self.index_to_soup(tag['href'], raw=True)
-            return new_soup
-
-
-    def append_page(self, soup, appendtag):
-        loop=False
-        tag = soup.find('div', attrs={'id':'Str'})
-        if appendtag.find('div', attrs={'id':'Str'}):
-            nexturl=tag.findAll('a')
-            appendtag.find('div', attrs={'id':'Str'}).extract()
-            loop=True
-            if appendtag.find(id='source'):
-                appendtag.find(id='source').extract()
-        while loop:
-            loop=False
-            for link in nexturl:
-                if u'następne' in link.string:
-                    url= self.INDEX + link['href']
-                    soup2 = self.index_to_soup(url)
-                    pagetext = soup2.find(id='artykul')
-                    pos = len(appendtag.contents)
-                    appendtag.insert(pos, pagetext)
-                    tag = soup2.find('div', attrs={'id':'Str'})
-                    nexturl=tag.findAll('a')
-                    loop=True
-
-    def gallery_article(self, appendtag):
-        tag=appendtag.find(id='container_gal')
-        if tag:
-            nexturl=appendtag.find(id='gal_btn_next').a['href']
-            appendtag.find(id='gal_navi').extract()
-        while nexturl:
-            soup2=self.index_to_soup(nexturl)
-            pagetext=soup2.find(id='container_gal')
-            nexturl=pagetext.find(id='gal_btn_next')
-            if nexturl:
-                nexturl=nexturl.a['href']
-                pos = len(appendtag.contents)
-                appendtag.insert(pos, pagetext)
-            rem=appendtag.find(id='gal_navi')
-            if rem:
-                rem.extract()
+    def print_version(self, url):
+        if 'feedsportal.com' in url:
+            s = url.rpartition('gazeta0Bpl')
+            u = s[2]
+            if not s[0]:
+                u = url.rpartition('wyborcza0Bpl')[2]
+            u = u.replace('/l/', '/')
+            u = u.replace('/ia1.htm', '')
+            u = u.replace('/story01.htm', '')
+            u = u.replace('0C', '/')
+            u = u.replace('A', '')
+            u = u.replace('0E', '-')
+            u = u.replace('0H', ',')
+            u = u.replace('0I', '_')
+            u = u.replace('0B', '.')
+            u = self.INDEX + u
+            return u
+        else:
+            return url

    def preprocess_html(self, soup):
-        self.append_page(soup, soup.body)
-        if soup.find(id='container_gal'):
-            self.gallery_article(soup.body)
+        tag = soup.find(id='Str')
+        if soup.find(attrs={'class': 'piano_btn_1'}):
+            return None
+        elif tag and tag.findAll('a'):
+            self.append_page(soup, soup.body)
        return soup
+
+    def append_page(self, soup, appendtag):
+        tag = soup.find('div', attrs={'id': 'Str'})
+        try:
+            baseurl = soup.find(name='meta', attrs={'property':'og:url'})['content']
+        except:
+            return 1
+        link = tag.findAll('a')[-1]
+        while link:
+            soup2 = self.index_to_soup(baseurl + link['href'])
+            link = soup2.find('div', attrs={'id': 'Str'}).findAll('a')[-1]
+            if not u'następne' in link.string:
+                link = ''
+            pagetext = soup2.find(id='artykul')
+            comments = pagetext.findAll(text=lambda text:isinstance(text, Comment))
+            for comment in comments:
+                comment.extract()
+            pos = len(appendtag.contents)
+            appendtag.insert(pos, pagetext)
+        tag.extract()
+
+    def image_url_processor(self, baseurl, url):
+        if url.startswith(' '):
+            return url.strip()
+        else:
+            return url
+
--- a/recipes/gazeta_wyborcza.recipe
+++ b/recipes/gazeta_wyborcza.recipe
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Comment
-
+import re
 class Gazeta_Wyborcza(BasicNewsRecipe):
    title = u'Gazeta Wyborcza'
    __author__ = 'fenuks, Artur Stachecki'
@ -9,7 +9,7 @@ class Gazeta_Wyborcza(BasicNewsRecipe):
    description = 'Wiadomości z Polski i ze świata. Serwisy tematyczne i lokalne w 20 miastach.'
    category = 'newspaper'
    publication_type = 'newspaper'
-    #encoding = 'iso-8859-2'
+    # encoding = 'iso-8859-2'
    masthead_url = 'http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg'
    INDEX = 'http://wyborcza.pl'
    remove_empty_feeds = True
@ -19,10 +19,18 @@ class Gazeta_Wyborcza(BasicNewsRecipe):
    no_stylesheets = True
    use_embedded_content = False
    ignore_duplicate_articles = {'title', 'url'}
-    remove_tags_before = dict(id='k0')
-    remove_tags_after = dict(id='banP4')
-    remove_tags = [dict(name='div', attrs={'class':'rel_box'}), dict(attrs={'class':['date', 'zdjP', 'zdjM', 'pollCont', 'rel_video', 'brand', 'txt_upl']}), dict(name='div', attrs={'id':'footer'})]
-    feeds = [(u'Kraj', u'http://rss.feedsportal.com/c/32739/f/530266/index.rss'), (u'\u015awiat', u'http://rss.feedsportal.com/c/32739/f/530270/index.rss'),
+
+    # rules for gazeta.pl
+    preprocess_regexps = [(re.compile(u'<b>Czytaj więcej</b>.*', re.DOTALL), lambda m: '</body>')]
+    keep_only_tags = [dict(id='gazeta_article')]
+    remove_tags = [dict(id=['gazeta_article_tools', 'gazeta_article_miniatures']), dict(attrs={'class':['mod mod_sociallist', 'c0', 'fb', 'voteNeedLogin']})]
+    remove_tags_after = dict(id='gazeta_article_body')
+
+    # rules for wyborcza.biz
+    preprocess_regexps.append((re.compile(u'(<br>)?(<br>)?        Czytaj (także|też):.*?</a>\.?<br>', re.DOTALL), lambda m: ''))
+
+    feeds = [(u'Kraj', u'http://rss.feedsportal.com/c/32739/f/530266/index.rss'),
+             (u'\u015awiat', u'http://rss.feedsportal.com/c/32739/f/530270/index.rss'),
             (u'Wyborcza.biz', u'http://wyborcza.biz/pub/rss/wyborcza_biz_wiadomosci.htm'),
             (u'Komentarze', u'http://rss.feedsportal.com/c/32739/f/530312/index.rss'),
             (u'Kultura', u'http://rss.gazeta.pl/pub/rss/gazetawyborcza_kultura.xml'),
@ -39,86 +47,55 @@ class Gazeta_Wyborcza(BasicNewsRecipe):
             (u'\u015aroda w \u015brod\u0119', u'http://rss.feedsportal.com/c/32739/f/530428/index.rss'),
             (u'W pi\u0105tek - Olejnik', u'http://rss.feedsportal.com/c/32739/f/530364/index.rss'),
             (u'Nekrologi', u'http://rss.feedsportal.com/c/32739/f/530358/index.rss')
-             ]
-
-    def skip_ad_pages(self, soup):
-        tag = soup.find(name='a', attrs={'class': 'btn'})
-        if tag:
-            new_soup = self.index_to_soup(tag['href'], raw=True)
-            return new_soup
-
-    def append_page(self, soup, appendtag):
-        loop = False
-        tag = soup.find('div', attrs={'id': 'Str'})
-        if appendtag.find('div', attrs={'id': 'Str'}):
-            nexturl = tag.findAll('a')
-            appendtag.find('div', attrs={'id': 'Str'}).extract()
-            loop = True
-            if appendtag.find(id='source'):
-                appendtag.find(id='source').extract()
-        while loop:
-            loop = False
-            for link in nexturl:
-                if u'następne' in link.string:
-                    url = self.INDEX + link['href']
-                    soup2 = self.index_to_soup(url)
-                    pagetext = soup2.find(id='artykul')
-                    comments = pagetext.findAll(text=lambda text:isinstance(text, Comment))
-                    for comment in comments:
-                        comment.extract()
-                    pos = len(appendtag.contents)
-                    appendtag.insert(pos, pagetext)
-                    tag = soup2.find('div', attrs={'id': 'Str'})
-                    nexturl = tag.findAll('a')
-                    loop = True
-
-    def gallery_article(self, appendtag):
-        tag = appendtag.find(id='container_gal')
-        if tag:
-            nexturl = appendtag.find(id='gal_btn_next').a['href']
-            appendtag.find(id='gal_navi').extract()
-        while nexturl:
-            soup2 = self.index_to_soup(nexturl)
-            pagetext = soup2.find(id='container_gal')
-            nexturl = pagetext.find(id='gal_btn_next')
-            if nexturl:
-                nexturl = nexturl.a['href']
-                comments = pagetext.findAll(text=lambda text:isinstance(text, Comment))
-                for comment in comments:
-                    comment.extract()
-                pos = len(appendtag.contents)
-                appendtag.insert(pos, pagetext)
-            rem = appendtag.find(id='gal_navi')
-            if rem:
-                rem.extract()
-
-    def preprocess_html(self, soup):
-        if soup.find(attrs={'class': 'piano_btn_1'}):
-            return None
-        else:
-            self.append_page(soup, soup.body)
-            if soup.find(id='container_gal'):
-                self.gallery_article(soup.body)
-            return soup
+            ]

    def print_version(self, url):
-        if url.count('rss.feedsportal.com'):
-            u = url.find('wyborcza0Bpl')
-            u = 'http://www.wyborcza.pl/' + url[u + 11:]
+        if 'feedsportal.com' in url:
+            s = url.rpartition('wyborcza0Bpl')
+            u = s[2]
+            if not s[0]:
+                u = url.rpartition('gazeta0Bpl')[2]
+            u = u.replace('/l/', '/')
+            u = u.replace('/ia1.htm', '')
+            u = u.replace('/story01.htm', '')
            u = u.replace('0C', '/')
            u = u.replace('A', '')
            u = u.replace('0E', '-')
            u = u.replace('0H', ',')
            u = u.replace('0I', '_')
            u = u.replace('0B', '.')
-            u = u.replace('/1,', '/2029020,')
-            u = u.replace('/story01.htm', '')
-            print(u)
+            u = self.INDEX + u
            return u
-        elif 'http://wyborcza.pl/1' in url:
-            return url.replace('http://wyborcza.pl/1', 'http://wyborcza.pl/2029020')
        else:
-            return url.replace('http://wyborcza.biz/biznes/1', 'http://wyborcza.biz/biznes/2029020')
+            return url
+
+    def preprocess_html(self, soup):
+        tag = soup.find(id='Str')
+        if soup.find(attrs={'class': 'piano_btn_1'}):
+            return None
+        elif tag and tag.findAll('a'):
+            self.append_page(soup, soup.body)
+        return soup
+
+    def append_page(self, soup, appendtag):
+        tag = soup.find('div', attrs={'id': 'Str'})
+        try:
+            baseurl = soup.find(name='meta', attrs={'property':'og:url'})['content']
+        except:
+            return 1
+        link = tag.findAll('a')[-1]
+        while link:
+            soup2 = self.index_to_soup(baseurl + link['href'])
+            link = soup2.find('div', attrs={'id': 'Str'}).findAll('a')[-1]
+            if not u'następne' in link.string:
+                link = ''
+            pagetext = soup2.find(id='artykul')
+            comments = pagetext.findAll(text=lambda text:isinstance(text, Comment))
+            for comment in comments:
+                comment.extract()
+            pos = len(appendtag.contents)
+            appendtag.insert(pos, pagetext)
+        tag.extract()

    def get_cover_url(self):
        soup = self.index_to_soup('http://wyborcza.pl/0,76762,3751429.html')
@ -127,6 +104,9 @@ class Gazeta_Wyborcza(BasicNewsRecipe):
        self.cover_url = 'http://wyborcza.pl' + soup.img['src']
        return getattr(self, 'cover_url', self.cover_url)

-    '''def image_url_processor(self, baseurl, url):
-        print "@@@@@@@@", url
-        return url.replace('http://wyborcza.pl/ ', '')'''
+    def image_url_processor(self, baseurl, url):
+        if url.startswith(' '):
+            return url.strip()
+        else:
+            return url
+
--- a/recipes/gosc_niedzielny.recipe
+++ b/recipes/gosc_niedzielny.recipe
@ -13,7 +13,7 @@ class GN(BasicNewsRecipe):

        __author__ = 'Piotr Kontek, Tomasz Długosz'
        title = u'Gość Niedzielny'
-        description = 'Ogólnopolski tygodnik katolicki'
+        description = 'Ogólnopolski tygodnik katolicki - fragmenty artykułów z poprzedniego numeru'
        encoding = 'utf-8'
        no_stylesheets = True
        language = 'pl'
@ -33,7 +33,7 @@ class GN(BasicNewsRecipe):
                a = soup.find('div',attrs={'class':'release-wp-b'}).find('a')
                articles = [
                            {'title' : self.tag_to_string(a),
-                             'url'   : 'http://www.gosc.pl' + a['href'].replace('/doc/','/doc_pr/')
+                             'url'   : 'http://www.gosc.pl' + a['href']
                            }]
                feeds.append((u'Wstępniak',articles))
                #kategorie
@ -71,12 +71,11 @@ class GN(BasicNewsRecipe):

        def preprocess_html(self, soup):
            self.append_page(soup, soup.body)
-            '''
-            for image_div in soup.findAll(attrs={'class':'doc_image'}):
-                link =
-                if 'm.jpg' in image['src']:
-                    image['src'] = image['src'].replace('m.jpg', '.jpg')
-            '''
+            return soup
+
+        def postprocess_html(self, soup, first_fetch):
+            for r in soup.findAll(attrs={'class':'pgr'}):
+                r.extract()
            return soup

        keep_only_tags = [
@ -85,12 +84,14 @@ class GN(BasicNewsRecipe):

        remove_tags = [
            dict(name='p', attrs={'class':['r tr', 'l l-2', 'wykop']}),
-            dict(name='div', attrs={'class':['doc_actions', 'pgr', 'fr1_cl']}),
-            dict(name='div', attrs={'id':'vote'})
+            dict(name='div', attrs={'class':['doc_actions', 'cf', 'fr1_cl']}),
+            dict(name='div', attrs={'id':'vote'}),
+            dict(name='a', attrs={'class':'img_enlarge'})
        ]

        extra_css = '''
            h1 {font-size:150%}
-            div#doc_image {font-style:italic; font-size:70%}
            p.limiter {font-size:150%; font-weight: bold}
+            span.cm-i-a {text-transform:uppercase;}
+            span.cm-i-p {font-style:italic; font-size:70%}
        '''
--- a/recipes/icons/alejakomiksu_com.png
+++ b/recipes/icons/alejakomiksu_com.png
--- a/recipes/icons/linuxportal_pl.png
+++ b/recipes/icons/linuxportal_pl.png
--- a/recipes/icons/picoboard_pl.png
+++ b/recipes/icons/picoboard_pl.png
--- a/recipes/icons/polter_pl.png
+++ b/recipes/icons/polter_pl.png
--- a/recipes/icons/racjonalista_pl.png
+++ b/recipes/icons/racjonalista_pl.png
--- a/recipes/icons/sekurak_pl.png
+++ b/recipes/icons/sekurak_pl.png
--- a/recipes/icons/tawernarpg_pl.png
+++ b/recipes/icons/tawernarpg_pl.png
--- a/recipes/ihned.recipe
+++ b/recipes/ihned.recipe
@ -4,10 +4,9 @@ from calibre.web.feeds.recipes import BasicNewsRecipe

 class IHNed(BasicNewsRecipe):

-
    stahnout_vsechny = True
-        #True   = stahuje vsechny z homepage
-        #False  = stahuje pouze dnesni clanky (ze dne, kdy je skript spusten)
+        # True   = stahuje vsechny z homepage
+        # False  = stahuje pouze dnesni clanky (ze dne, kdy je skript spusten)

    title       = 'iHNed'
    __author__  = 'Karel Bílek'
@ -28,38 +27,33 @@ class IHNed(BasicNewsRecipe):
      'linearize_tables' : True,
    }

-
-
    def preprocess_html(self, soup):

        def makeurl(wat):
-            return "http://ihned.cz"+wat;
+            return "http://ihned.cz"+wat

        for h1 in soup.findAll('h1'):
-             a = h1.find('a')
-             if a:
-                 string = a.string
-                 if string:
-                     soup.a.replaceWith(string)
+            a = h1.find('a')
+            if a:
+                string = a.string
+                if string:
+                    soup.a.replaceWith(string)
        for a in soup.findAll('a',  href=True) :
            cil = str(a['href'])
-            if cil.startswith("/") or  cil.startswith("index"):
+            if cil.startswith("/") or cil.startswith("index"):
                a['href'] = makeurl(cil)
        return soup

-
    def parse_index(self):

        def makeurl(wat):
-            if wat.startswith("/") or  wat.startswith("index"):
-                return "http://ihned.cz"+wat;
+            if wat.startswith("/") or wat.startswith("index"):
+                return "http://ihned.cz"+wat
            else:
                return wat

-
-        articles = {} #vysledek, asi
-        key = None #soucasna sekce
-        ans = [] #vsechny sekce
+        articles = {}  # vysledek, asi
+        ans = []  # vsechny sekce

        articles["Hlavní"] = []
        ans.append("Hlavní")
@ -70,12 +64,11 @@ class IHNed(BasicNewsRecipe):
            articles[name] = []
            ans.append(name)

-
            soup = self.index_to_soup(url)
            otvirak = soup.find(True, attrs={'class':['otv']})
            if otvirak:

-                #the code is copypasted here because I don't know python. simple as that.
+                # the code is copypasted here because I don't know python. simple as that.
                a = otvirak.find('a', href=True)
                title = self.tag_to_string(a, use_alt=True).strip()
                txt = otvirak.find(True, attrs={'class':['txt']})
@ -98,13 +91,13 @@ class IHNed(BasicNewsRecipe):
                    a = ow.find('a', href=True)
                    title = self.tag_to_string(a, use_alt=True).strip()
                    description=''
-                    prx = ow.find(True, attrs={'class':['prx']});
+                    prx = ow.find(True, attrs={'class':['prx']})
                    if prx:
                        description = str(prx.string)
-                    nfo = ow.find(True, attrs={'class':['nfo']});
+                    nfo = ow.find(True, attrs={'class':['nfo']})
                    pubdate = ''
                    if nfo:
-                        dtime = time.localtime();
+                        dtime = time.localtime()
                        day = dtime[2]
                        month = dtime[1]

@ -119,11 +112,6 @@ class IHNed(BasicNewsRecipe):
                                            description=description,
                                            content=''))

-
-
-
-
-
        soup = self.index_to_soup('http://ihned.cz/')
        otvirak = soup.find(True, attrs={'class':['otv']})
        if otvirak:
@ -150,7 +138,7 @@ class IHNed(BasicNewsRecipe):
                a = otv2.find('a', attrs={'class':['tit2']}, href=True)
                title = self.tag_to_string(a, use_alt=True).strip()
                description=''
-                span = otv2.find('span');
+                span = otv2.find('span')
                if span:
                    match = re.match(r'<span>\s*([^<]*)\s*<a', str(span), re.L)
                    if match:
@ -163,20 +151,19 @@ class IHNed(BasicNewsRecipe):
                                content=''))
                was[title]=1

-
        parse_subpage("http://komentare.ihned.cz/", "Komentáře")
        parse_subpage("http://domaci.ihned.cz", "Domácí")
        parse_subpage("http://ekonomika.ihned.cz", "Ekonomika")
-        parse_subpage("http://zahranicni.ihned.cz/", "Zahraničí");
-        parse_subpage("http://finweb.ihned.cz/", "Finance");
-        parse_subpage("http://digiweb.ihned.cz/", "DigiWeb");
+        parse_subpage("http://zahranicni.ihned.cz/", "Zahraničí")
+        parse_subpage("http://finweb.ihned.cz/", "Finance")
+        parse_subpage("http://digiweb.ihned.cz/", "DigiWeb")
        parse_subpage("http://kultura.ihned.cz/", "Kultura")
-        parse_subpage("http://sport.ihned.cz/", "Sport");
+        parse_subpage("http://sport.ihned.cz/", "Sport")

-        #seradi kategorie
+        # seradi kategorie
        ans = self.sort_index_by(ans, {'Hlavni':1, 'Domácí':2, 'Ekonomika':5, 'Zahraničí':3, 'Finance':6, 'DigiWeb':7, 'Kultura':8, 'Sport':9, 'Komentáře':4})

-        #vrati, ale pouze, kdyz je v kategoriich...
-        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        # vrati, ale pouze, kdyz je v kategoriich...
+        ans = [(key, articles[key]) for key in ans if key in articles]
        return ans

--- a/recipes/linuxportal_pl.recipe
+++ b/recipes/linuxportal_pl.recipe
@ -0,0 +1,62 @@
+__license__ = 'GPL v3'
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class LinuxPortal(BasicNewsRecipe):
+    title          = u'LinuxPortal'
+    __author__        = 'fenuks'
+    description   = u'Na LinuxPortal.pl znajdziesz wiadomości o systemie Linux, open source oraz Androidzie.'
+    category       = 'it'
+    #publication_type = ''
+    language       = 'pl'
+    #encoding = ''
+    #extra_css = ''
+    cover_url = 'http://www.linuxportal.pl/templates/css/loga/Linuxportal.gif'
+    masthead_url = 'http://www.linuxportal.pl/templates/css/loga/Linuxportal.gif'
+    use_embedded_content = False
+    oldest_article = 7
+    max_articles_per_feed = 20
+    no_stylesheets = True
+    remove_empty_feeds = True
+    remove_javascript = True
+    remove_attributes = ['style', 'font']
+    ignore_duplicate_articles = {'title', 'url'}
+    auto_cleanup = True
+    #keep_only_tags = [dict()]
+    #remove_tags = [dict()]
+    #remove_tags_after = dict()
+    #remove_tags_before = dict()
+
+    def parse_index(self):
+         feeds = []
+         feeds.append((u'Wszystkie wiadomości', self.get_articles('http://www.linuxportal.pl/news/wszystkie')))
+         return feeds
+
+    def get_articles(self, url):
+        articles = []
+        blacklist = {'dobreprogramy.pl', 'osworld.pl', 'osnews.pl',}
+        nexturl = url
+        counter = 0
+        skip = False
+        while counter < self.max_articles_per_feed:
+            soup = self.index_to_soup(nexturl)
+            nexturl = soup.find(attrs={'title':'Starsze wyniki'})['href']
+            for tag in soup.findAll(attrs={'class':'lista_wizyt_kol_tytul_news'}):
+                title = tag.h2.a.string
+                url = tag.find(attrs={'class':'linkzrodlo'})['href']
+                date = ''
+                for item in blacklist:
+                    if item in url:
+                        counter -= 1
+                        skip = True
+                        break
+                if skip:
+                    skip = False
+                    continue
+
+                articles.append({'title' : title,
+                       'url'   : url,
+                       'date'  : date,
+                       'description' : ''
+                        })
+                counter += 1
+        return articles
--- a/recipes/picoboard_pl.recipe
+++ b/recipes/picoboard_pl.recipe
@ -0,0 +1,33 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+class Pikoboard(BasicNewsRecipe):
+    title = u'Pikoboard.pl'
+    __author__ = 'fenuks'
+    description = u'Portal poświęcony takim urządzeniom jak: Raspberry Pi, XBMC, ODROID-X, BeagleBoard czy CuBox. Systemy operacyjne, modyfikacje oraz obudowy i innego rodzaju dodatki.'
+    category = 'IT, open source, Linux, Raspberry Pi'
+    language = 'pl'
+    cover_url = 'http://picoboard.pl/wp-content/themes/portal/img/logo.jpg'
+    extra_css = 'img.alignleft {float: left; margin-right: 5px;}'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    remove_empty_feeds = True
+    use_embedded_content = False
+    keep_only_tags = [dict(id=['dzial', 'posts'])]
+    remove_tags = [dict(attrs={'class':'post-comments'})]
+    remove_tags_after = dict(attrs={'class':'entry clr'})
+    feeds = [(u'Newsy', u'http://picoboard.pl/feed/atom/'), (u'Artyku\u0142y', u'http://picoboard.pl/category/artykuly/feed/')]
+
+    def append_page(self, soup, appendtag):
+        tag = appendtag.find(attrs={'id':'paginacja'})
+        if tag:
+            for nexturl in tag.findAll('a'):
+                soup2 = self.index_to_soup(nexturl['href'])
+                pagetext = soup2.find(attrs={'class':'entry clr'})
+                pos = len(appendtag.contents)
+                appendtag.insert(pos, pagetext)
+            for r in appendtag.findAll(attrs={'id':'paginacja'}):
+                r.extract()
+
+    def preprocess_html(self, soup):
+         self.append_page(soup, soup.body)
+         return soup
--- a/recipes/polter_pl.recipe
+++ b/recipes/polter_pl.recipe
@ -0,0 +1,43 @@
+__license__ = 'GPL v3'
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Poltergeist(BasicNewsRecipe):
+    title          = u'Poltergeist'
+    __author__        = 'fenuks'
+    description   = u'Największy polski serwis poświęcony ogólno pojętej fantastyce - grom fabularnym (RPG), książkom, filmowi, komiksowi, grom planszowym, karcianym i bitewnym.'
+    category       = 'fantasy, books, rpg, games'
+    #publication_type = ''
+    language       = 'pl'
+    #encoding = ''
+    extra_css = '.image, .floatright {float: right; margin-left: 10px;} .floatleft {float: left; margin-right: 10px;}'
+    cover_url = 'http://static.polter.pl/sub/promo/bpromo2524.jpg'
+    #masthead_url = ''
+    use_embedded_content = False
+    oldest_article = 7
+    preprocess_regexps = [(re.compile(ur'<div[^>]*?id="pol_lista"[^>]*?>.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>'), (re.compile(ur'<a[^>]*?>wersja do druku</a>', re.DOTALL|re.IGNORECASE), lambda match: '')]
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    remove_empty_feeds = True
+    remove_javascript = True
+    remove_attributes = ['style', 'font']
+    ignore_duplicate_articles = {'title', 'url'}
+
+    keep_only_tags = [dict(attrs={'class':'boxcontent'})]
+    remove_tags = [dict(attrs={'class':'fb-like'}), dict(attrs={'alt':'Wersja do druku'}), dict(id='pol_liczba'), dict(attrs={'scr':'http://static.polter.pl/tplimg/buttons/ceneo_140_40.gif'})]
+    remove_tags_after = dict(attrs={'class':'fb-like'})
+    #remove_tags_before = dict()
+
+    feeds = [(u'Wieści', 'http://polter.pl/wiesci,rss.html'), (u'RPG', 'http://rpg.polter.pl/wiesci,rss.html'), (u'Książki', 'http://ksiazki.polter.pl/wiesci,rss.html'), (u'Film', 'http://film.polter.pl/wiesci,rss.html'), (u'Komiks', 'http://komiks.polter.pl/wiesci,rss.html'), (u'Gry bitewne', 'http://bitewniaki.polter.pl/wiesci,rss.html'), (u'Gry karciane', 'http://karcianki.polter.pl/wiesci,rss.html'), (u'Gry planszowe', 'http://planszowki.polter.pl/wiesci,rss.html'), (u'Gry PC', 'http://gry.polter.pl/wiesci,rss.html'), (u'Gry konsolowe', 'http://konsole.polter.pl/wiesci,rss.html'), (u'Konwenty', 'http://konwenty.polter.pl/wiesci,rss.html'), (u'Blogi', 'http://polter.pl/blogi,rss.html')]
+
+    def preprocess_html(self, soup):
+        for s in soup.findAll(attrs={'style':re.compile('float: ?left')}):
+            s['class'] = 'floatleft'
+        for s in soup.findAll(attrs={'style':re.compile('float: ?right')}):
+            s['class'] = 'floatright'
+        tag = soup.find(id='twoja_ocena')
+        if tag:
+            tag.parent.extract()
+        for tag in soup.findAll(id='lista_chce_ile'):
+            tag.parent.parent.extract()
+        return soup
--- a/recipes/sekurak_pl.recipe
+++ b/recipes/sekurak_pl.recipe
@ -0,0 +1,28 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Sekurak(BasicNewsRecipe):
+    title          = u'Sekurak'
+    __author__        = 'fenuks'
+    description   = u'Wiadomości z dziedziny bezpieczeństwa'
+    category       = 'it, security'
+    #publication_type = ''
+    language       = 'pl'
+    #encoding = ''
+    #extra_css = ''
+    cover_url = 'http://www.securitum.pl/aktualnosci/sekurak.pl/image'
+    masthead_url = ''
+    use_embedded_content = False
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    remove_empty_feeds = True
+    remove_javascript = True
+    remove_attributes = ['style', 'font']
+    ignore_duplicate_articles = {'title', 'url'}
+
+    keep_only_tags = [dict(id='articleContent')]
+    #remove_tags = []
+    #remove_tags_after = dict()
+    #remove_tags_before = dict()
+
+    feeds          = [(u'Wpisy', u'http://feeds.feedburner.com/sekurak')]
--- a/recipes/tawernarpg_pl.recipe
+++ b/recipes/tawernarpg_pl.recipe
@ -0,0 +1,38 @@
+__license__ = 'GPL v3'
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class TawernaRPG(BasicNewsRecipe):
+    title          = u'Tawerna RPG'
+    __author__        = 'fenuks'
+    description   = u'Tawerna RPG to ogólnopolski serwis zajmujący się fantastyką i grami fantastycznymi. Znajdziesz u nas zarówno gry fabularne, karciane, planszowe i komputerowe, a także recenzje, opowiadania i sporą dawkę humoru.'
+    category       = 'fantasy, rpg, board games'
+    #publication_type = ''
+    language       = 'pl'
+    #encoding = ''
+    extra_css = '.slajd {list-style-type: none; padding-left: 0px; margin-left: 0px;} .lewanc {float: left; margin-right: 5px;} .srodek {display: block; margin-left: auto; margin-right: auto;}'
+    cover_url = 'http://www.tawerna.rpg.pl/img/logo.png'
+    #masthead_url = ''
+    preprocess_regexps = [(re.compile(ur'<h2>Dodaj komentarz</h2>.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>')]
+    use_embedded_content = False
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    remove_empty_feeds = True
+    remove_javascript = True
+    remove_attributes = ['style', 'font']
+    ignore_duplicate_articles = {'title', 'url'}
+
+    keep_only_tags = [dict(id='site')]
+    remove_tags = [dict(id=['player', 'komentarz'])]
+    remove_tags_after = dict(id='komentarz')
+    #remove_tags_before = dict()
+
+    feeds = [(u'Artykuły', 'http://www.tawerna.rpg.pl/css/rss.rss')]
+
+    def preprocess_html(self, soup):
+        for r in soup.findAll(attrs={'class':'powi'}):
+            r.parent.extract()
+        for c in soup.findAll(name=['li', 'ol', 'ul']):
+            c.name = 'div'
+        return soup