some fixes and a new recipe

2026-01-04 11:10:20 -05:00 · 2013-03-28 21:47:00 +01:00 · 2013-03-28 21:47:00 +01:00 · 084b8bd3dd
commit 084b8bd3dd
parent af8584474b
19 changed files with 302 additions and 164 deletions
--- a/recipes/astroflesz.recipe
+++ b/recipes/astroflesz.recipe
@ -2,12 +2,12 @@
 from calibre.web.feeds.news import BasicNewsRecipe

 class Astroflesz(BasicNewsRecipe):
-    title          = u'Astroflesz'
+    title = u'Astroflesz'
    oldest_article = 7
-    __author__        = 'fenuks'
-    description   = u'astroflesz.pl - to portal poświęcony astronomii. Informuje zarówno o aktualnych wydarzeniach i odkryciach naukowych, jak również zapowiada ciekawe zjawiska astronomiczne'
-    category       = 'astronomy'
-    language       = 'pl'
+    __author__ = 'fenuks'
+    description = u'astroflesz.pl - to portal poświęcony astronomii. Informuje zarówno o aktualnych wydarzeniach i odkryciach naukowych, jak również zapowiada ciekawe zjawiska astronomiczne'
+    category = 'astronomy'
+    language = 'pl'
    cover_url = 'http://www.astroflesz.pl/templates/astroflesz/images/logo/logo.png'
    ignore_duplicate_articles = {'title', 'url'}
    max_articles_per_feed = 100
@ -17,7 +17,7 @@ class Astroflesz(BasicNewsRecipe):
    keep_only_tags = [dict(id="k2Container")]
    remove_tags_after = dict(name='div', attrs={'class':'itemLinks'})
    remove_tags = [dict(name='div', attrs={'class':['itemLinks', 'itemToolbar', 'itemRatingBlock']})]
-    feeds          = [(u'Wszystkie', u'http://astroflesz.pl/?format=feed')]
+    feeds = [(u'Wszystkie', u'http://astroflesz.pl/?format=feed')]

    def postprocess_html(self, soup, first_fetch):
        t = soup.find(attrs={'class':'itemIntroText'})
--- a/recipes/badania_net.recipe
+++ b/recipes/badania_net.recipe
@ -1,17 +1,20 @@
 from calibre.web.feeds.news import BasicNewsRecipe
+import re
 class BadaniaNet(BasicNewsRecipe):
-    title          = u'badania.net'
+    title = u'badania.net'
    __author__ = 'fenuks'
-    description   = u'chcesz wiedzieć więcej?'
-    category       = 'science'
-    language       = 'pl'
+    description = u'chcesz wiedzieć więcej?'
+    category = 'science'
+    language = 'pl'
    cover_url = 'http://badania.net/wp-content/badanianet_green_transparent.png'
+    extra_css = '.alignleft {float:left; margin-right:5px;} .alignright {float:right; margin-left:5px;}'
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets = True
+    preprocess_regexps = [(re.compile(r"<h4>Tekst sponsoruje</h4>", re.IGNORECASE), lambda m: ''),]
    remove_empty_feeds = True
    use_embedded_content = False
    remove_tags = [dict(attrs={'class':['omc-flex-category', 'omc-comment-count', 'omc-single-tags']})]
    remove_tags_after = dict(attrs={'class':'omc-single-tags'})
    keep_only_tags = [dict(id='omc-full-article')]
-    feeds          = [(u'Psychologia', u'http://badania.net/category/psychologia/feed/'), (u'Technologie', u'http://badania.net/category/technologie/feed/'), (u'Biologia', u'http://badania.net/category/biologia/feed/'), (u'Chemia', u'http://badania.net/category/chemia/feed/'), (u'Zdrowie', u'http://badania.net/category/zdrowie/'), (u'Seks', u'http://badania.net/category/psychologia-ewolucyjna-tematyka-seks/feed/')]
+    feeds = [(u'Psychologia', u'http://badania.net/category/psychologia/feed/'), (u'Technologie', u'http://badania.net/category/technologie/feed/'), (u'Biologia', u'http://badania.net/category/biologia/feed/'), (u'Chemia', u'http://badania.net/category/chemia/feed/'), (u'Zdrowie', u'http://badania.net/category/zdrowie/'), (u'Seks', u'http://badania.net/category/psychologia-ewolucyjna-tematyka-seks/feed/')]
--- a/recipes/film_org_pl.recipe
+++ b/recipes/film_org_pl.recipe
@ -1,20 +1,54 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Comment
 import re
 class FilmOrgPl(BasicNewsRecipe):
-    title          = u'Film.org.pl'
-    __author__        = 'fenuks'
-    description   = u"Recenzje, analizy, artykuły, rankingi - wszystko o filmie dla miłośników kina. Opisy efektów specjalnych, wersji reżyserskich, remake'ów, sequeli. No i forum filmowe. Jedne z największych w Polsce."
-    category       = 'film'
-    language       = 'pl'
-    extra_css = '.alignright {float:right; margin-left:5px;} .alignleft {float:left; margin-right:5px;}'
+    title = u'Film.org.pl'
+    __author__ = 'fenuks'
+    description = u"Recenzje, analizy, artykuły, rankingi - wszystko o filmie dla miłośników kina. Opisy efektów specjalnych, wersji reżyserskich, remake'ów, sequeli. No i forum filmowe. Jedne z największych w Polsce."
+    category = 'film'
+    language = 'pl'
+    extra_css = '.alignright {float:right; margin-left:5px;} .alignleft {float:left; margin-right:5px;} .recenzja-title {font-size: 150%; margin-top: 5px; margin-bottom: 5px;}'
    cover_url = 'http://film.org.pl/wp-content/themes/KMF/images/logo_kmf10.png'
    ignore_duplicate_articles = {'title', 'url'}
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets = True
+    remove_javascript = True
    remove_empty_feeds = True
-    use_embedded_content = True
-    preprocess_regexps     = [(re.compile(ur'<h3>Przeczytaj także:</h3>.*', re.IGNORECASE|re.DOTALL), lambda m: '</body>'), (re.compile(ur'<div>Artykuł</div>', re.IGNORECASE), lambda m: ''), (re.compile(ur'<div>Ludzie filmu</div>', re.IGNORECASE), lambda m: '')]
-    remove_tags = [dict(name='img', attrs={'alt':['Ludzie filmu', u'Artykuł']})]
-    feeds          = [(u'Recenzje', u'http://film.org.pl/r/recenzje/feed/'), (u'Artyku\u0142', u'http://film.org.pl/a/artykul/feed/'), (u'Analiza', u'http://film.org.pl/a/analiza/feed/'), (u'Ranking', u'http://film.org.pl/a/ranking/feed/'), (u'Blog', u'http://film.org.pl/kmf/blog/feed/'), (u'Ludzie', u'http://film.org.pl/a/ludzie/feed/'), (u'Seriale', u'http://film.org.pl/a/seriale/feed/'), (u'Oceanarium', u'http://film.org.pl/a/ocenarium/feed/'), (u'VHS', u'http://film.org.pl/a/vhs-a/feed/')]
+    use_embedded_content = False
+    remove_attributes = ['style']
+    preprocess_regexps = [(re.compile(ur'<h3>Przeczytaj także:</h3>.*', re.IGNORECASE|re.DOTALL), lambda m: '</body>'), (re.compile(ur'</?center>', re.IGNORECASE|re.DOTALL), lambda m: ''), (re.compile(ur'<div>Artykuł</div>', re.IGNORECASE), lambda m: ''), (re.compile(ur'<div>Ludzie filmu</div>', re.IGNORECASE), lambda m: ''), (re.compile(ur'(<br ?/?>\s*?){2,}', re.IGNORECASE|re.DOTALL), lambda m: '')]
+    keep_only_tags = [dict(name=['h11', 'h16', 'h17']), dict(attrs={'class':'editor'})]
+    remove_tags_after = dict(id='comments')
+    remove_tags = [dict(name=['link', 'meta', 'style']), dict(name='img', attrs={'alt':['Ludzie filmu', u'Artykuł']}), dict(id='comments'), dict(attrs={'style':'border: 0pt none ; margin: 0pt; padding: 0pt;'}), dict(name='p', attrs={'class':'rating'}), dict(attrs={'layout':'button_count'})]
+    feeds = [(u'Recenzje', u'http://film.org.pl/r/recenzje/feed/'), (u'Artyku\u0142', u'http://film.org.pl/a/artykul/feed/'), (u'Analiza', u'http://film.org.pl/a/analiza/feed/'), (u'Ranking', u'http://film.org.pl/a/ranking/feed/'), (u'Blog', u'http://film.org.pl/kmf/blog/feed/'), (u'Ludzie', u'http://film.org.pl/a/ludzie/feed/'), (u'Seriale', u'http://film.org.pl/a/seriale/feed/'), (u'Oceanarium', u'http://film.org.pl/a/ocenarium/feed/'), (u'VHS', u'http://film.org.pl/a/vhs-a/feed/')]
+
+    def append_page(self, soup, appendtag):
+        tag = soup.find('div', attrs={'class': 'pagelink'})
+        if tag:
+            for nexturl in tag.findAll('a'):
+                url = nexturl['href']
+                soup2 = self.index_to_soup(url)
+                pagetext = soup2.find(attrs={'class': 'editor'})
+                comments = pagetext.findAll(text=lambda text:isinstance(text, Comment))
+                for comment in comments:
+                    comment.extract()
+                pos = len(appendtag.contents)
+                appendtag.insert(pos, pagetext)
+            for r in appendtag.findAll(attrs={'class': 'pagelink'}):
+                r.extract()
+            for r in appendtag.findAll(attrs={'id': 'comments'}):
+                r.extract()
+            for r in appendtag.findAll(attrs={'style':'border: 0pt none ; margin: 0pt; padding: 0pt;'}):
+                r.extract()
+            for r in appendtag.findAll(attrs={'layout':'button_count'}):
+                r.extract()
+                
+    def preprocess_html(self, soup):
+        for c in soup.findAll('h11'):
+            c.name = 'h1'
+        self.append_page(soup, soup.body)
+        for r in soup.findAll('br'):
+            r.extract()
+        return soup
--- a/recipes/gram_pl.recipe
+++ b/recipes/gram_pl.recipe
@ -2,22 +2,22 @@ from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup

 class Gram_pl(BasicNewsRecipe):
-    title          = u'Gram.pl'
-    __author__        = 'fenuks'
-    description   = u'Serwis społecznościowy o grach: recenzje, newsy, zapowiedzi, encyklopedia gier, forum. Gry PC, PS3, X360, PS Vita, sprzęt dla graczy.'
-    category       = 'games'
-    language       = 'pl'
+    title = u'Gram.pl'
+    __author__ = 'fenuks'
+    description = u'Serwis społecznościowy o grach: recenzje, newsy, zapowiedzi, encyklopedia gier, forum. Gry PC, PS3, X360, PS Vita, sprzęt dla graczy.'
+    category = 'games'
+    language = 'pl'
    oldest_article = 8
    index='http://www.gram.pl'
    max_articles_per_feed = 100
    ignore_duplicate_articles = {'title', 'url'}
    no_stylesheets= True
    remove_empty_feeds = True
-    #extra_css = 'h2 {font-style: italic;  font-size:20px;} .picbox div {float: left;}'
+    #extra_css = 'h2 {font-style: italic; font-size:20px;} .picbox div {float: left;}'
    cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png'
    keep_only_tags= [dict(id='articleModule')]
-    remove_tags = [dict(attrs={'class':['breadCrump', 'dymek', 'articleFooter', 'twitter-share-button']})]
-    feeds          = [(u'Informacje', u'http://www.gram.pl/feed_news.asp'),
+    remove_tags = [dict(attrs={'class':['breadCrump', 'dymek', 'articleFooter', 'twitter-share-button']}), dict(name='aside')]
+    feeds = [(u'Informacje', u'http://www.gram.pl/feed_news.asp'),
                        (u'Publikacje', u'http://www.gram.pl/feed_news.asp?type=articles')
                        ]

@ -46,4 +46,4 @@ class Gram_pl(BasicNewsRecipe):
        tag=soup.find(name='span', attrs={'class':'platforma'})
        if tag:
           tag.name = 'p'
-        return soup
+        return soup
--- a/recipes/historia_pl.recipe
+++ b/recipes/historia_pl.recipe
@ -1,27 +1,22 @@
 from calibre.web.feeds.news import BasicNewsRecipe

 class Historia_org_pl(BasicNewsRecipe):
-    title          = u'Historia.org.pl'
-    __author__        = 'fenuks'
-    description   = u'Artykuły dotyczące historii w układzie epok i tematów, forum. Najlepsza strona historii. Matura z historii i egzamin gimnazjalny z historii.'
-    cover_url      = 'http://lh3.googleusercontent.com/_QeRQus12wGg/TOvHsZ2GN7I/AAAAAAAAD_o/LY1JZDnq7ro/logo5.jpg'
-    category       = 'history'
-    language       = 'pl'
+    title = u'Historia.org.pl'
+    __author__ = 'fenuks'
+    description = u'Artykuły dotyczące historii w układzie epok i tematów, forum. Najlepsza strona historii. Matura z historii i egzamin gimnazjalny z historii.'
+    cover_url = 'http://lh3.googleusercontent.com/_QeRQus12wGg/TOvHsZ2GN7I/AAAAAAAAD_o/LY1JZDnq7ro/logo5.jpg'
+    category = 'history'
+    language = 'pl'
    oldest_article = 8
+    extra_css = 'img {float: left; margin-right: 10px;} .alignleft {float: left; margin-right: 10px;}'
    remove_empty_feeds= True
    no_stylesheets = True
    use_embedded_content = True
    max_articles_per_feed = 100
    ignore_duplicate_articles = {'title', 'url'}
-
-
-    feeds          = [(u'Wszystkie', u'http://historia.org.pl/feed/'),
-		(u'Wiadomości', u'http://historia.org.pl/Kategoria/wiadomosci/feed/'),
-		(u'Publikacje', u'http://historia.org.pl/Kategoria/artykuly/feed/'),
-		(u'Publicystyka', u'http://historia.org.pl/Kategoria/publicystyka/feed/'),
-		(u'Recenzje', u'http://historia.org.pl/Kategoria/recenzje/feed/'),
-		(u'Projekty', u'http://historia.org.pl/Kategoria/projekty/feed/'),]
-
-
-    def print_version(self, url):
-        return url + '?tmpl=component&print=1&layout=default&page='
+    feeds = [(u'Wszystkie', u'http://historia.org.pl/feed/'),
+        (u'Wiadomości', u'http://historia.org.pl/Kategoria/wiadomosci/feed/'),
+        (u'Publikacje', u'http://historia.org.pl/Kategoria/artykuly/feed/'),
+        (u'Publicystyka', u'http://historia.org.pl/Kategoria/publicystyka/feed/'),
+        (u'Recenzje', u'http://historia.org.pl/Kategoria/recenzje/feed/'),
+        (u'Projekty', u'http://historia.org.pl/Kategoria/projekty/feed/'),]
--- a/recipes/icons/sport_pl.png
+++ b/recipes/icons/sport_pl.png
--- a/recipes/infra_pl.recipe
+++ b/recipes/infra_pl.recipe
@ -1,21 +1,20 @@
 from calibre.web.feeds.news import BasicNewsRecipe

 class INFRA(BasicNewsRecipe):
-    title          = u'INFRA'
+    title = u'INFRA'
    oldest_article = 7
    max_articles_per_feed = 100
-    __author__        = 'fenuks'
-    description   = u'Serwis Informacyjny INFRA - UFO, Zjawiska Paranormalne, Duchy, Tajemnice świata.'
-    cover_url      = 'http://npn.nazwa.pl/templates/ja_teline_ii/images/logo.jpg'
-    category       = 'UFO'
+    __author__ = 'fenuks'
+    description = u'Serwis Informacyjny INFRA - UFO, Zjawiska Paranormalne, Duchy, Tajemnice świata.'
+    cover_url = 'http://i.imgur.com/j7hJT.jpg'
+    category = 'UFO'
    index='http://infra.org.pl'
-    language       = 'pl'
+    language = 'pl'
    max_articles_per_feed = 100
-    no_stylesheers=True
-    remove_tags_before=dict(name='h2', attrs={'class':'contentheading'})
-    remove_tags_after=dict(attrs={'class':'pagenav'})
-    remove_tags=[dict(attrs={'class':'pagenav'})]
-    feeds          = [(u'Najnowsze wiadomo\u015bci', u'http://www.infra.org.pl/rss')]
+    remove_attrs = ['style']
+    no_stylesheets = True
+    keep_only_tags = [dict(id='ja-current-content')]
+    feeds = [(u'Najnowsze wiadomo\u015bci', u'http://www.infra.org.pl/rss')]

    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
@ -23,4 +22,4 @@ class INFRA(BasicNewsRecipe):
        for a in soup('a'):
            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
                a['href']=self.index + a['href']
-        return soup
+        return soup
--- a/recipes/kdefamily_pl.recipe
+++ b/recipes/kdefamily_pl.recipe
@ -1,14 +1,16 @@
+import re
 from calibre.web.feeds.news import BasicNewsRecipe

 class KDEFamilyPl(BasicNewsRecipe):
-    title          = u'KDEFamily.pl'
-    __author__        = 'fenuks'
-    description   = u'KDE w Polsce'
-    category       = 'open source, KDE'
-    language       = 'pl'
+    title = u'KDEFamily.pl'
+    __author__ = 'fenuks'
+    description = u'KDE w Polsce'
+    category = 'open source, KDE'
+    language = 'pl'
    cover_url = 'http://www.mykde.home.pl/kdefamily/wp-content/uploads/2012/07/logotype-e1341585198616.jpg'
    oldest_article = 7
    max_articles_per_feed = 100
+    preprocess_regexps = [(re.compile(r"Podobne wpisy.*", re.IGNORECASE|re.DOTALL), lambda m: '')]
    no_stylesheets = True
    use_embedded_content = True
-    feeds          = [(u'Wszystko', u'http://kdefamily.pl/feed/')]
+    feeds = [(u'Wszystko', u'http://kdefamily.pl/feed/')]
--- a/recipes/konflikty_zbrojne.recipe
+++ b/recipes/konflikty_zbrojne.recipe
@ -3,10 +3,10 @@ from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup

 class Konflikty(BasicNewsRecipe):
-    title          = u'Konflikty Zbrojne'
-    __author__        = 'fenuks'
-    cover_url      = 'http://www.konflikty.pl/images/tapety_logo.jpg'
-    language       = 'pl'
+    title = u'Konflikty Zbrojne'
+    __author__ = 'fenuks'
+    cover_url = 'http://www.konflikty.pl/images/tapety_logo.jpg'
+    language = 'pl'
    description = u'Zbiór ciekawych artykułów historycznych, militarnych oraz recenzji książek, gier i filmów. Najświeższe informacje o lotnictwie, wojskach lądowych i polityce.'
    category='military, history'
    oldest_article = 7
@ -14,19 +14,20 @@ class Konflikty(BasicNewsRecipe):
    no_stylesheets = True
    keep_only_tags=[dict(attrs={'class':['title1', 'image']}), dict(id='body')]

-    feeds          = [(u'Aktualności', u'http://www.konflikty.pl/rss_aktualnosci_10.xml'),
-		(u'Historia', u'http://www.konflikty.pl/rss_historia_10.xml'),
-		(u'Militaria', u'http://www.konflikty.pl/rss_militaria_10.xml'),
-		(u'Relacje', u'http://www.konflikty.pl/rss_relacje_10.xml'),
-		(u'Recenzje', u'http://www.konflikty.pl/rss_recenzje_10.xml'),
-		(u'Teksty źródłowe', u'http://www.konflikty.pl/rss_tekstyzrodlowe_10.xml')]
+    feeds = [(u'Aktualności', u'http://www.konflikty.pl/rss_aktualnosci_10.xml'),
+        (u'Historia', u'http://www.konflikty.pl/rss_historia_10.xml'),
+        (u'Militaria', u'http://www.konflikty.pl/rss_militaria_10.xml'),
+        (u'Relacje', u'http://www.konflikty.pl/rss_relacje_10.xml'),
+        (u'Recenzje', u'http://www.konflikty.pl/rss_recenzje_10.xml'),
+        (u'Teksty źródłowe', u'http://www.konflikty.pl/rss_tekstyzrodlowe_10.xml')]

    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        for image in soup.findAll(name='a', attrs={'class':'image'}):
+            image['style'] = 'width: 210px; float: left; margin-right:5px;'
            if image.img and image.img.has_key('alt'):
                image.name='div'
                pos = len(image.contents)
                image.insert(pos, BeautifulSoup('<p style="font-style:italic;">'+image.img['alt']+'</p>'))
-        return soup
+        return soup
--- a/recipes/kosmonauta_pl.recipe
+++ b/recipes/kosmonauta_pl.recipe
@ -2,12 +2,13 @@

 from calibre.web.feeds.news import BasicNewsRecipe
 class Kosmonauta(BasicNewsRecipe):
-    title          = u'Kosmonauta.net'
-    __author__        = 'fenuks'
-    description   = u'polskojęzyczny portal w całości dedykowany misjom kosmicznym i badaniom kosmosu.'
-    category       = 'astronomy'
-    language       = 'pl'
+    title = u'Kosmonauta.net'
+    __author__ = 'fenuks'
+    description = u'polskojęzyczny portal w całości dedykowany misjom kosmicznym i badaniom kosmosu.'
+    category = 'astronomy'
+    language = 'pl'
    cover_url = 'http://bi.gazeta.pl/im/4/10393/z10393414X,Kosmonauta-net.jpg'
+    extra_css = '.thumbnail {float:left;margin-right:5px;}'
    no_stylesheets = True
    INDEX = 'http://www.kosmonauta.net'
    oldest_article = 7
@ -16,9 +17,12 @@ class Kosmonauta(BasicNewsRecipe):
    remove_attributes = ['style']
    max_articles_per_feed = 100
    keep_only_tags = [dict(name='div', attrs={'class':'item-page'})]
-    remove_tags = [dict(attrs={'class':['article-tools clearfix', 'cedtag', 'nav clearfix', 'jwDisqusForm']})]
+    remove_tags = [dict(attrs={'class':['article-tools clearfix', 'cedtag', 'nav clearfix', 'jwDisqusForm']}), dict(attrs={'alt':['Poprzednia strona', 'Następna strona']})]
    remove_tags_after = dict(name='div', attrs={'class':'cedtag'})
-    feeds          = [(u'Kosmonauta.net', u'http://www.kosmonauta.net/?format=feed&type=atom')]
+    feeds = [(u'Kosmonauta.net', u'http://www.kosmonauta.net/?format=feed&type=atom')]
+    
+    def print_version(self, url):
+        return url + '?tmpl=component&print=1&layout=default&page='

    def preprocess_html(self, soup):
        for a in soup.findAll(name='a'):
@ -26,5 +30,4 @@ class Kosmonauta(BasicNewsRecipe):
               href = a['href']
               if not href.startswith('http'):
                   a['href'] = self.INDEX + href
-        return soup
-            
+        return soup
--- a/recipes/mlody_technik_pl.recipe
+++ b/recipes/mlody_technik_pl.recipe
@ -2,13 +2,14 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class Mlody_technik(BasicNewsRecipe):
-    title          = u'Młody technik'
-    __author__        = 'fenuks'
-    description   = u'Młody technik'
-    category       = 'science'
-    language       = 'pl'
+    title = u'Młody technik'
+    __author__ = 'fenuks'
+    description = u'Młody technik'
+    category = 'science'
+    language = 'pl'
    #cover_url = 'http://science-everywhere.pl/wp-content/uploads/2011/10/mt12.jpg'
    no_stylesheets = True
+    extra_css = 'img.alignleft {float: left; margin-right: 5px;}'
    preprocess_regexps = [(re.compile(r"<h4>Podobne</h4>", re.IGNORECASE), lambda m: '')]
    oldest_article = 7
    max_articles_per_feed = 100
@ -17,18 +18,18 @@ class Mlody_technik(BasicNewsRecipe):
    keep_only_tags = [dict(id='content')]
    remove_tags = [dict(attrs={'class':'st-related-posts'})]
    remove_tags_after = dict(attrs={'class':'entry-content clearfix'})
-    feeds          = [(u'Wszystko', u'http://www.mt.com.pl/feed'), 
-		#(u'MT NEWS 24/7', u'http://www.mt.com.pl/kategoria/mt-newsy-24-7/feed'),
-		(u'Info zoom', u'http://www.mt.com.pl/kategoria/info-zoom/feed'),
-		(u'm.technik', u'http://www.mt.com.pl/kategoria/m-technik/feed'),
-		(u'Szkoła', u'http://www.mt.com.pl/kategoria/szkola-2/feed'),
-		(u'Na Warsztacie', u'http://www.mt.com.pl/kategoria/na-warsztacie/feed'),
-		(u'Z pasji do...', u'http://www.mt.com.pl/kategoria/z-pasji-do/feed'),
-		(u'MT testuje', u'http://www.mt.com.pl/kategoria/mt-testuje/feed')]
+    feeds = [(u'Wszystko', u'http://www.mt.com.pl/feed'),
+        #(u'MT NEWS 24/7', u'http://www.mt.com.pl/kategoria/mt-newsy-24-7/feed'),
+        (u'Info zoom', u'http://www.mt.com.pl/kategoria/info-zoom/feed'),
+        (u'm.technik', u'http://www.mt.com.pl/kategoria/m-technik/feed'),
+        (u'Szkoła', u'http://www.mt.com.pl/kategoria/szkola-2/feed'),
+        (u'Na Warsztacie', u'http://www.mt.com.pl/kategoria/na-warsztacie/feed'),
+        (u'Z pasji do...', u'http://www.mt.com.pl/kategoria/z-pasji-do/feed'),
+        (u'MT testuje', u'http://www.mt.com.pl/kategoria/mt-testuje/feed')]

    def get_cover_url(self):
        soup = self.index_to_soup('http://www.mt.com.pl/')
        tag = soup.find(attrs={'class':'xoxo'})
        if tag:
            self.cover_url = tag.find('img')['src']
-        return getattr(self, 'cover_url', self.cover_url)
+        return getattr(self, 'cover_url', self.cover_url)
--- a/recipes/nauka_w_polsce.recipe
+++ b/recipes/nauka_w_polsce.recipe
@ -1,16 +1,18 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class NaukawPolsce(BasicNewsRecipe):
-    title          = u'Nauka w Polsce'
-    __author__        = 'fenuks'
-    description   = u'Serwis Nauka w Polsce ma za zadanie popularyzację polskiej nauki. Można na nim znaleźć wiadomości takie jak:  osiągnięcia polskich naukowców, wydarzenia na polskich uczelniach, osiągnięcia studentów, konkursy dla badaczy, staże i stypendia naukowe, wydarzenia w polskiej nauce, kalendarium wydarzeń w nauce, materiały wideo o nauce.'
-    category       = 'science'
-    language       = 'pl'
+    title = u'Nauka w Polsce'
+    __author__ = 'fenuks'
+    description = u'Serwis Nauka w Polsce ma za zadanie popularyzację polskiej nauki. Można na nim znaleźć wiadomości takie jak: osiągnięcia polskich naukowców, wydarzenia na polskich uczelniach, osiągnięcia studentów, konkursy dla badaczy, staże i stypendia naukowe, wydarzenia w polskiej nauce, kalendarium wydarzeń w nauce, materiały wideo o nauce.'
+    category = 'science'
+    language = 'pl'
    cover_url = 'http://www.naukawpolsce.pap.pl/Themes/Pap/images/logo-pl.gif'
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets = True
    remove_empty_feeds = True
+    extra_css = '.miniaturka {float: left; margin-right: 5px; max-width: 350px;} .miniaturka-dol-strony {display: inline-block; margin: 0 15px; width: 120px;}'
+    ignore_duplicate_articles = {'title', 'url'}
    index = 'http://www.naukawpolsce.pl'
    keep_only_tags = [dict(name='div', attrs={'class':'margines wiadomosc'})]
    remove_tags = [dict(name='div', attrs={'class':'tagi'})]
@ -23,8 +25,8 @@ class NaukawPolsce(BasicNewsRecipe):
            url = self.index + i.h1.a['href']
            date = '' #i.span.string
            articles.append({'title' : title,
-                   'url'   : url,
-                   'date'  : date,
+                   'url' : url,
+                   'date' : date,
                   'description' : ''
                    })
        return articles
@ -44,4 +46,4 @@ class NaukawPolsce(BasicNewsRecipe):
    def preprocess_html(self, soup):
        for p in soup.findAll(name='p', text=re.compile('&nbsp;')):
            p.extract()
-        return soup
+        return soup
--- a/recipes/niebezpiecznik.recipe
+++ b/recipes/niebezpiecznik.recipe
@ -1,17 +1,19 @@
 from calibre.web.feeds.news import BasicNewsRecipe

 class Niebezpiecznik_pl(BasicNewsRecipe):
-    title          = u'Niebezpiecznik.pl'
-    __author__        = 'fenuks'
-    description   = u'Niebezpiecznik.pl – o bezpieczeństwie i nie...'
-    category       = 'hacking, IT'
-    language       = 'pl'
+    title = u'Niebezpiecznik.pl'
+    __author__ = 'fenuks'
+    description = u'Niebezpiecznik.pl – o bezpieczeństwie i nie...'
+    category = 'hacking, IT'
+    language = 'pl'
    oldest_article = 8
+    extra_css = '.entry {margin-top: 25px;}'
+    remove_attrs = ['style']
    max_articles_per_feed = 100
    no_stylesheets = True
    remove_empty_feeds = True
    cover_url = u'http://userlogos.org/files/logos/Karmody/niebezpiecznik_01.png'
    remove_tags = [dict(name='div', attrs={'class':['sociable']}), dict(name='h4'), dict(attrs={'class':'similar-posts'})]
    keep_only_tags = [dict(name='div', attrs={'class':['title', 'entry']})]
-    feeds          = [(u'Wiadomości', u'http://feeds.feedburner.com/niebezpiecznik/'),
-              ('Blog', 'http://feeds.feedburner.com/niebezpiecznik/linkblog/')]
+    feeds = [(u'Wiadomości', u'http://feeds.feedburner.com/niebezpiecznik/'),
+              ('Blog', 'http://feeds.feedburner.com/niebezpiecznik/linkblog/')]
--- a/recipes/osworld_pl.recipe
+++ b/recipes/osworld_pl.recipe
@ -1,11 +1,12 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class OSWorld(BasicNewsRecipe):
-    title          = u'OSWorld.pl'
-    __author__        = 'fenuks'
-    description   = u'OSWorld.pl to serwis internetowy, dzięki któremu poznasz czym naprawdę jest Open Source. Serwis poświęcony jest wolnemu oprogramowaniu jak linux mint, centos czy ubunty. Znajdziecie u nasz artykuły, unity oraz informacje o certyfikatach CACert. OSWorld to mały świat wielkich systemów!'
-    category       = 'OS, IT, open source, Linux'
-    language       = 'pl'
+    title = u'OSWorld.pl'
+    __author__ = 'fenuks'
+    description = u'OSWorld.pl to serwis internetowy, dzięki któremu poznasz czym naprawdę jest Open Source. Serwis poświęcony jest wolnemu oprogramowaniu jak linux mint, centos czy ubunty. Znajdziecie u nasz artykuły, unity oraz informacje o certyfikatach CACert. OSWorld to mały świat wielkich systemów!'
+    category = 'OS, IT, open source, Linux'
+    language = 'pl'
    cover_url = 'http://osworld.pl/wp-content/uploads/osworld-kwadrat-128x111.png'
+    extra_css = 'img.alignleft {float: left; margin-right: 5px;}'
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets = True
@ -14,7 +15,7 @@ class OSWorld(BasicNewsRecipe):
    keep_only_tags = [dict(id=['dzial', 'posts'])]
    remove_tags = [dict(attrs={'class':'post-comments'})]
    remove_tags_after = dict(attrs={'class':'entry clr'})
-    feeds          = [(u'Artyku\u0142y', u'http://osworld.pl/category/artykuly/feed/'), (u'Nowe wersje', u'http://osworld.pl/category/nowe-wersje/feed/')]
+    feeds = [(u'Artyku\u0142y', u'http://osworld.pl/category/artykuly/feed/'), (u'Nowe wersje', u'http://osworld.pl/category/nowe-wersje/feed/')]

    def append_page(self, soup, appendtag):
        tag = appendtag.find(attrs={'id':'paginacja'})
@ -30,4 +31,4 @@ class OSWorld(BasicNewsRecipe):

    def preprocess_html(self, soup):
         self.append_page(soup, soup.body)
-         return soup
+         return soup
--- a/recipes/pc_centre_pl.recipe
+++ b/recipes/pc_centre_pl.recipe
@ -1,20 +1,21 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class PC_Centre(BasicNewsRecipe):
-    title          = u'PC Centre'
+    title = u'PC Centre'
    oldest_article = 7
    max_articles_per_feed = 100
-    __author__        = 'fenuks'
-    description   = u'Portal komputerowy, a w nim: testy sprzętu komputerowego, recenzje gier i oprogramowania. a także opisy produktów związanych z komputerami.'
-    category       = 'IT'
-    language       = 'pl'
+    __author__ = 'fenuks'
+    description = u'Portal komputerowy, a w nim: testy sprzętu komputerowego, recenzje gier i oprogramowania. a także opisy produktów związanych z komputerami.'
+    category = 'IT'
+    language = 'pl'
    masthead_url= 'http://pccentre.pl/views/images/logo.gif'
    cover_url= 'http://pccentre.pl/views/images/logo.gif'
    no_stylesheets = True
    remove_empty_feeds = True
+    ignore_duplicate_articles = {'title', 'url'}
    #keep_only_tags= [dict(id='content')]
    #remove_tags=[dict(attrs={'class':['ikony r', 'list_of_content', 'dot accordion']}), dict(id='comments')]
    remove_tags=[dict(attrs={'class':'logo_print'})]
-    feeds          = [(u'Aktualno\u015bci', u'http://pccentre.pl/backend.php'), (u'Publikacje', u'http://pccentre.pl/backend.php?mode=a'), (u'Sprz\u0119t komputerowy', u'http://pccentre.pl/backend.php?mode=n&section=2'), (u'Oprogramowanie', u'http://pccentre.pl/backend.php?mode=n&section=3'), (u'Gry komputerowe i konsole', u'http://pccentre.pl/backend.php?mode=n&section=4'), (u'Internet', u'http://pccentre.pl/backend.php?mode=n&section=7'), (u'Bezpiecze\u0144stwo', u'http://pccentre.pl/backend.php?mode=n&section=5'), (u'Multimedia', u'http://pccentre.pl/backend.php?mode=n&section=6'), (u'Biznes', u'http://pccentre.pl/backend.php?mode=n&section=9')]
+    feeds = [(u'Aktualno\u015bci', u'http://pccentre.pl/backend.php'), (u'Publikacje', u'http://pccentre.pl/backend.php?mode=a'), (u'Sprz\u0119t komputerowy', u'http://pccentre.pl/backend.php?mode=n&section=2'), (u'Oprogramowanie', u'http://pccentre.pl/backend.php?mode=n&section=3'), (u'Gry komputerowe i konsole', u'http://pccentre.pl/backend.php?mode=n&section=4'), (u'Internet', u'http://pccentre.pl/backend.php?mode=n&section=7'), (u'Bezpiecze\u0144stwo', u'http://pccentre.pl/backend.php?mode=n&section=5'), (u'Multimedia', u'http://pccentre.pl/backend.php?mode=n&section=6'), (u'Biznes', u'http://pccentre.pl/backend.php?mode=n&section=9')]

    def print_version(self, url):
        return url.replace('show', 'print')
--- a/recipes/sport_pl.recipe
+++ b/recipes/sport_pl.recipe
@ -0,0 +1,72 @@
+#!/usr/bin/env  python
+
+__license__ = 'GPL v3'
+__copyright__ = 'teepel 2012'
+
+'''
+sport.pl
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class sport_pl(BasicNewsRecipe):
+    title          = 'Sport.pl'
+    __author__ = 'teepel <teepel44@gmail.com>'
+    language       = 'pl'
+    description =u'Największy portal sportowy w Polsce. Wiadomości sportowe z najważniejszych wydarzeń, relacje i wyniki meczów na żywo.'
+    masthead_url='http://press.gazeta.pl/file/mediakit/154509/c8/sportpl.jpg'
+    oldest_article = 1
+    max_articles_per_feed = 100
+    remove_javascript=True
+    no_stylesheets=True
+    remove_empty_feeds = True
+
+    keep_only_tags =[]
+    keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'article'}))
+
+    remove_tags =[]
+    remove_tags.append(dict(name = 'a', attrs = {'href' : 'www.gazeta.pl'}))
+
+    feeds          = [
+                            (u'Wszystkie wiadomości', u'http://rss.gazeta.pl/pub/rss/sport.xml'),
+                            (u'Piłka nożna', u'http://www.sport.pl/pub/rss/sport/pilka_nozna.htm'),
+                            (u'F1', u'http://www.sport.pl/pub/rss/sportf1.htm'),
+                            (u'Tenis', u'http://serwisy.gazeta.pl/pub/rss/tenis.htm'),
+                            (u'Siatkówka', u'http://gazeta.pl.feedsportal.com/c/32739/f/611628/index.rss'),
+                            (u'Koszykówka', u'http://gazeta.pl.feedsportal.com/c/32739/f/611647/index.rss'),
+                            (u'Piłka ręczna', u'http://gazeta.pl.feedsportal.com/c/32739/f/611635/index.rss'),
+                            (u'Inne sporty', u'http://gazeta.pl.feedsportal.com/c/32739/f/611649/index.rss'),
+                         ]
+    def parse_feeds(self):
+        feeds = BasicNewsRecipe.parse_feeds(self)
+        for feed in feeds:
+            for article in feed.articles[:]:
+                if '[ZDJĘCIA]' in article.title:
+                    article.title = article.title.replace('[ZDJĘCIA]','')
+                elif '[WIDEO]' in article.title:
+                    article.title = article.title.replace('[WIDEO]','')
+        return feeds
+
+    def print_version(self, url):
+        if 'feedsportal' in url:
+            segment = url.split('/')
+            urlPart = segment[-2]
+            urlPart = urlPart.replace('0L0Ssport0Bpl0C','')
+            urlPart = urlPart.replace('0C10H','/')
+            urlPart = urlPart.replace('0H',',')
+            urlPart = urlPart.replace('0I','_')
+            urlPart = urlPart.replace('A','')
+            segment1 = urlPart.split('/')
+            seg1 = segment1[0]
+            seg2 = segment1[1]
+            segment2 = seg2.split(',')
+            part = segment2[0] + ',' + segment2[1]
+            return 'http://www.sport.pl/' + seg1 +  '/2029020,' + part + '.html'
+        else:
+            segment = url.split('/')
+            part2 = segment[-2]
+            part1 = segment[-1]
+            segment2 = part1.split(',')
+            part = segment2[1] + ',' + segment2[2]
+            return 'http://www.sport.pl/' + part2 + '/2029020,' + part + '.html'
--- a/recipes/stopklatka.recipe
+++ b/recipes/stopklatka.recipe
--- a/recipes/tablety_pl.recipe
+++ b/recipes/tablety_pl.recipe
@ -1,18 +1,20 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class Tablety_pl(BasicNewsRecipe):
-    title          = u'Tablety.pl'
-    __author__        = 'fenuks'
-    description   = u'Tablety, gry i aplikacje na tablety.'
+    title = u'Tablety.pl'
+    __author__ = 'fenuks'
+    description = u'Tablety, gry i aplikacje na tablety.'
    masthead_url= 'http://www.tablety.pl/wp-content/themes/kolektyw/img/logo.png'
-    cover_url      = 'http://www.tablety.pl/wp-content/themes/kolektyw/img/logo.png'
-    category       = 'IT'
-    language       = 'pl'
-    use_embedded_content=True
+    cover_url = 'http://www.tablety.pl/wp-content/themes/kolektyw/img/logo.png'
+    category = 'IT'
+    language = 'pl'
+    use_embedded_content = False
+    no_stylesheets = True
    oldest_article = 8
    max_articles_per_feed = 100
    preprocess_regexps = [(re.compile(ur'<p><strong>Przeczytaj także.*?</a></strong></p>', re.DOTALL), lambda match: ''), (re.compile(ur'<p><strong>Przeczytaj koniecznie.*?</a></strong></p>', re.DOTALL), lambda match: '')]
+    keep_only_tags = [dict(id='news_block')]
    #remove_tags_before=dict(name="h1", attrs={'class':'entry-title'})
    #remove_tags_after=dict(name="footer", attrs={'class':'entry-footer clearfix'})
-    #remove_tags=[dict(name='footer', attrs={'class':'entry-footer clearfix'}), dict(name='div', attrs={'class':'entry-comment-counter'})]
-    feeds          = [(u'Najnowsze posty', u'http://www.tablety.pl/feed/')]
+    remove_tags=[dict(attrs={'class':['comments_icon', 'wp-polls', 'entry-comments']})]
+    feeds = [(u'Najnowsze posty', u'http://www.tablety.pl/feed/')]
--- a/recipes/wirtualnemedia_pl.recipe
+++ b/recipes/wirtualnemedia_pl.recipe
@ -1,21 +1,22 @@
 from calibre.web.feeds.news import BasicNewsRecipe

 class WirtualneMedia(BasicNewsRecipe):
-    title          = u'wirtualnemedia.pl'
+    title = u'wirtualnemedia.pl'
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets = True
    use_embedded_content = False
    remove_empty_feeds = True
-    __author__        = 'fenuks'
+    __author__ = 'fenuks'
    extra_css = '.thumbnail {float:left; max-width:150px; margin-right:5px;}'
-    description   = u'Portal o mediach, reklamie, internecie, PR, telekomunikacji - nr 1 w Polsce - WirtualneMedia.pl - wiadomości z pierwszej ręki.'
-    category       = 'internet'
-    language       = 'pl'
+    description = u'Portal o mediach, reklamie, internecie, PR, telekomunikacji - nr 1 w Polsce - WirtualneMedia.pl - wiadomości z pierwszej ręki.'
+    category = 'internet'
+    language = 'pl'
+    ignore_duplicate_articles = {'title', 'url'}
    masthead_url= 'http://i.wp.pl/a/f/jpeg/8654/wirtualnemedia.jpeg'
    cover_url= 'http://static.wirtualnemedia.pl/img/logo_wirtualnemedia_newsletter.gif'
    remove_tags=[dict(id=['header', 'footer'])]
-    feeds          = [(u'Gospodarka', u'http://www.wirtualnemedia.pl/rss/wm_gospodarka.xml'),
+    feeds = [(u'Gospodarka', u'http://www.wirtualnemedia.pl/rss/wm_gospodarka.xml'),
        (u'Internet', u'http://www.wirtualnemedia.pl/rss/wm_internet.xml'),
        (u'Kultura', u'http://www.wirtualnemedia.pl/rss/wm_kulturarozrywka.xml'),
        (u'Badania', u'http://www.wirtualnemedia.pl/rss/wm_marketing.xml'),
@ -24,8 +25,6 @@ class WirtualneMedia(BasicNewsRecipe):
        (u'Reklama', u'http://www.wirtualnemedia.pl/rss/wm_reklama.xml'),
        (u'PR', u'http://www.wirtualnemedia.pl/rss/wm_relations.xml'),
        (u'Technologie', u'http://www.wirtualnemedia.pl/rss/wm_telekomunikacja.xml'),
-        (u'Telewizja', u'http://www.wirtualnemedia.pl/rss/wm_telewizja_rss.xml')
-        ]
-
+        (u'Telewizja', u'http://www.wirtualnemedia.pl/rss/wm_telewizja_rss.xml')]
    def print_version(self, url):
        return url.replace('artykul', 'print')