changes from kalibrator project

2025-11-27 08:45:00 -05:00 · 2013-05-05 21:24:49 +02:00 · 2013-05-05 21:24:49 +02:00 · a3bbcf2bfa
commit a3bbcf2bfa
parent 840f41017a
17 changed files with 52 additions and 49 deletions
--- a/recipes/benchmark_pl.recipe
+++ b/recipes/benchmark_pl.recipe
@ -12,12 +12,15 @@ class BenchmarkPl(BasicNewsRecipe):
    language       = 'pl'
    oldest_article = 8
    max_articles_per_feed = 100
    extra_css = 'ul {list-style-type: none;}'
    no_stylesheets = True
-    remove_attributes = ['style']
+    #remove_attributes = ['style']
    preprocess_regexps = [(re.compile(ur'<h3><span style="font-size: small;">&nbsp;Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>'), (re.compile(ur'Więcej o .*?</ul>', re.DOTALL|re.IGNORECASE), lambda match: '')]
-    keep_only_tags = [dict(name='div', attrs={'class':['m_zwykly', 'gallery']}), dict(id='article')]
+
    keep_only_tags = [dict(id=['articleHeader', 'articleGallery']), dict(name='div', attrs={'class':['m_zwykly', 'gallery']}), dict(id='article')]
    remove_tags_after = dict(id='article')
    remove_tags = [dict(name='div', attrs={'class':['comments', 'body', 'kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery', 'breadcrumb', 'footer', 'moreTopics']}), dict(name='table', attrs = {'background':'http://www.benchmark.pl/uploads/backend_img/a/fotki_newsy/opinie_dnia/bg.png'}), dict(name='table', attrs={'width':'210', 'cellspacing':'1', 'cellpadding':'4', 'border':'0', 'align':'right'})]
    INDEX = 'http://www.benchmark.pl'
    feeds          = [(u'Aktualności', u'http://www.benchmark.pl/rss/aktualnosci-pliki.xml'),
                          (u'Testy i recenzje', u'http://www.benchmark.pl/rss/testy-recenzje-minirecenzje.xml')]
@ -42,46 +45,16 @@ class BenchmarkPl(BasicNewsRecipe):
        for r in appendtag.findAll(attrs={'class':'changePage'}):
            r.extract()
    def image_article(self, soup, appendtag):
        nexturl = soup.find('div', attrs={'class':'preview'})
        if nexturl:
            nexturl = nexturl.find('a', attrs={'class':'move_next'})
            image = appendtag.find('div', attrs={'class':'preview'}).div['style'][16:]
            image = self.INDEX + image[:image.find("')")]
            appendtag.find(attrs={'class':'preview'}).name='img'
            appendtag.find(attrs={'class':'preview'})['src']=image
            appendtag.find('a', attrs={'class':'move_next'}).extract()
        while nexturl:
            nexturl = self.INDEX + nexturl['href']
            soup2 = self.index_to_soup(nexturl)
            nexturl = soup2.find('a', attrs={'class':'move_next'})
            image = soup2.find('div', attrs={'class':'preview'}).div['style'][16:]
            image = self.INDEX + image[:image.find("')")]
            soup2.find(attrs={'class':'preview'}).name='img'
            soup2.find(attrs={'class':'preview'})['src']=image
            pagetext = soup2.find('div', attrs={'class':'gallery'})
            pagetext.find('div', attrs={'class':'title'}).extract()
            pagetext.find('div', attrs={'class':'thumb'}).extract()
            pagetext.find('div', attrs={'class':'panelOcenaObserwowane'}).extract()       
            if nexturl:
                pagetext.find('a', attrs={'class':'move_next'}).extract()
            pagetext.find('a', attrs={'class':'move_back'}).extract()
            comments = pagetext.findAll(text=lambda text:isinstance(text, Comment))
            for comment in comments:
                comment.extract()
            pos = len(appendtag.contents)
            appendtag.insert(pos, pagetext)
    def preprocess_html(self, soup):
-        if soup.find('div', attrs={'class':'preview'}):
+        self.append_page(soup, soup.body)
            self.image_article(soup, soup.body)
        else:
            self.append_page(soup, soup.body)
        for a in soup('a'):
            if a.has_key('href') and not a['href'].startswith('http'):
                a['href'] = self.INDEX + a['href']
        for r in soup.findAll(attrs={'class':['comments', 'body']}):
            r.extract()
        tag1 = soup.find(attrs={'class':'inlineGallery'})
        if tag1:
            for tag in tag1.findAll('li'):
                tag['style'] = 'float: left; margin-right: 10px;'
            tag1.findNext('p')['style'] = 'clear: both;'
        return soup
--- a/recipes/computerworld_pl.recipe
+++ b/recipes/computerworld_pl.recipe
@ -16,7 +16,7 @@ class Computerworld_pl(BasicNewsRecipe):
    preprocess_regexps = [(re.compile(u'Zobacz również:', re.IGNORECASE), lambda m: ''), (re.compile(ur'[*]+reklama[*]+', re.IGNORECASE), lambda m: ''),]
    keep_only_tags = [dict(id=['szpaltaL', 's2011'])]
    remove_tags_after = dict(name='div', attrs={'class':'tresc'})
-    remove_tags = [dict(attrs={'class':['nnav', 'rMobi', 'tagi', 'rec']}),]
+    remove_tags = [dict(attrs={'class':['nnav', 'rMobi', 'tagi', 'rec']}), dict(name='a', attrs={'target':'_blank'})]
    feeds          = [(u'Wiadomo\u015bci', u'http://rssout.idg.pl/cw/news_iso.xml')]
    def skip_ad_pages(self, soup):
--- a/recipes/conowego_pl.recipe
+++ b/recipes/conowego_pl.recipe
@ -15,6 +15,7 @@ class CoNowegoPl(BasicNewsRecipe):
    no_stylesheets = True
    remove_empty_feeds = True
    use_embedded_content = False
    ignore_duplicate_articles = {'title', 'url'}
    keep_only_tags = [dict(name='div', attrs={'class':'news_list single_view'})]
    remove_tags = [dict(name='div', attrs={'class':['ni_bottom', 'ni_rank', 'ni_date']})]
    feeds          = [(u'Aktualno\u015bci', u'http://www.conowego.pl/rss/aktualnosci-5/?type=100'), (u'Gaming', u'http://www.conowego.pl/rss/gaming-6/?type=100'), (u'Porady', u'http://www.conowego.pl/rss/porady-3/?type=100'), (u'Testy', u'http://www.conowego.pl/rss/testy-2/?type=100')]
--- a/recipes/di.recipe
+++ b/recipes/di.recipe
@ -14,7 +14,7 @@ class DziennikInternautowRecipe(BasicNewsRecipe):
    __author__ = 'Mori'
    language = 'pl'
-    title = u'Dziennik Internautow'
+    title = u'Dziennik Internautów'
    publisher = u'Dziennik Internaut\u00f3w Sp. z o.o.'
    description = u'Internet w \u017cyciu i biznesie. Porady, wywiady, interwencje, bezpiecze\u0144stwo w Sieci, technologia.'
--- a/recipes/dziennik_lodzki.recipe
+++ b/recipes/dziennik_lodzki.recipe
@ -16,7 +16,7 @@ class DziennikLodzki(BasicNewsRecipe):
    ignore_duplicate_articles = {'title', 'url'}
    #preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ]
    remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})
-    remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})]
+    remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}), dict(name='a', attrs={'href':'http://www.dzienniklodzki.pl/piano'})]
    feeds          = [(u'Na sygnale', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_nasygnale.xml?201302'), (u'\u0141\xf3d\u017a', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_lodz.xml?201302'), (u'Opinie', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_opinie.xml?201302'), (u'Pieni\u0105dze', u'http://dzienniklodzki.feedsportal.com/c/32980/f/533763/index.rss?201302'), (u'Kultura', u'http://dzienniklodzki.feedsportal.com/c/32980/f/533762/index.rss?201302'), (u'Sport', u'http://dzienniklodzki.feedsportal.com/c/32980/f/533761/index.rss?201302'), (u'Akcje', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_akcje.xml?201302'), (u'M\xf3j Reporter', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_mojreporter.xml?201302'), (u'Studni\xf3wki', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_studniowki.xml?201302'), (u'Kraj', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_kraj.xml?201302'), (u'Zdrowie', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_zdrowie.xml?201302')]
--- a/recipes/dziennik_zachodni.recipe
+++ b/recipes/dziennik_zachodni.recipe
@ -16,7 +16,7 @@ class DziennikZachodni(BasicNewsRecipe):
    ignore_duplicate_articles = {'title', 'url'}
    #preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ]
    remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})
-    remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}), dict(attrs={'href':'http://www.dziennikzachodni.pl/piano'})]
+    remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}), dict(attrs={'href':'http://www.dziennikzachodni.pl/piano'}), dict(name='aside')]
    feeds          = [(u'Wszystkie', u'http://dziennikzachodni.feedsportal.com/c/32980/f/533764/index.rss?201302'), (u'Wiadomo\u015bci', u'http://dziennikzachodni.feedsportal.com/c/32980/f/533765/index.rss?201302'), (u'Regiony', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_regiony.xml?201302'), (u'Opinie', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_regiony.xml?201302'), (u'Blogi', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_blogi.xml?201302'), (u'Serwisy', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_serwisy.xml?201302'), (u'Sport', u'http://dziennikzachodni.feedsportal.com/c/32980/f/533766/index.rss?201302'), (u'M\xf3j Reporter', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_mojreporter.xml?201302'), (u'Na narty', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_nanarty.xml?201302'), (u'Drogi', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_drogi.xml?201302'), (u'Pieni\u0105dze', u'http://dziennikzachodni.feedsportal.com/c/32980/f/533768/index.rss?201302')]
--- a/recipes/echo_dnia.recipe
+++ b/recipes/echo_dnia.recipe
@ -16,6 +16,7 @@ class EchoDnia(BasicNewsRecipe):
    max_articles_per_feed = 100
    remove_empty_feeds = True
    no_stylesheets = True
    use_embedded_content = False
    ignore_duplicate_articles = {'title', 'url'}
    preprocess_regexps = [(re.compile(ur'Czytaj:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?</a>', re.DOTALL|re.IGNORECASE), lambda match: ''), 
--- a/recipes/ekundelek_pl.recipe
+++ b/recipes/ekundelek_pl.recipe
@ -12,7 +12,7 @@ class swiatczytnikow(BasicNewsRecipe):
    __author__ = u'Artur Stachecki'
    oldest_article = 7
    max_articles_per_feed = 100
-
+    remove_empty_feeds = True
    remove_tags = [dict(name = 'div', attrs = {'class' : 'feedflare'})]
    feeds = [(u'Wpisy', u'http://feeds.feedburner.com/Ekundelekpl?format=xml')]
--- a/recipes/emuzica_pl.recipe
+++ b/recipes/emuzica_pl.recipe
@ -11,6 +11,7 @@ class eMuzyka(BasicNewsRecipe):
    cover_url='http://s.emuzyka.pl/img/emuzyka_invert_small.jpg'
    no_stylesheets = True
    oldest_article = 7
    remove_empty_feeds = True
    max_articles_per_feed = 100
    remove_attributes = ['style']
    keep_only_tags=[dict(name='div', attrs={'id':'news_container'}), dict(name='h3'), dict(name='div', attrs={'class':'review_text'})]
--- a/recipes/gazeta_wyborcza.recipe
+++ b/recipes/gazeta_wyborcza.recipe
@ -9,6 +9,7 @@ class Gazeta_Wyborcza(BasicNewsRecipe):
    description = 'Wiadomości z Polski i ze świata. Serwisy tematyczne i lokalne w 20 miastach.'
    category = 'newspaper'
    publication_type = 'newspaper'
    #encoding = 'iso-8859-2'
    masthead_url = 'http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg'
    INDEX = 'http://wyborcza.pl'
    remove_empty_feeds = True
@ -16,6 +17,7 @@ class Gazeta_Wyborcza(BasicNewsRecipe):
    max_articles_per_feed = 100
    remove_javascript = True
    no_stylesheets = True
    use_embedded_content = False
    ignore_duplicate_articles = {'title', 'url'}
    remove_tags_before = dict(id='k0')
    remove_tags_after = dict(id='banP4')
@ -24,7 +26,19 @@ class Gazeta_Wyborcza(BasicNewsRecipe):
             (u'Wyborcza.biz', u'http://wyborcza.biz/pub/rss/wyborcza_biz_wiadomosci.htm'),
             (u'Komentarze', u'http://rss.feedsportal.com/c/32739/f/530312/index.rss'),
             (u'Kultura', u'http://rss.gazeta.pl/pub/rss/gazetawyborcza_kultura.xml'),
-             (u'Nauka', u'http://rss.feedsportal.com/c/32739/f/530269/index.rss'), (u'Opinie', u'http://rss.gazeta.pl/pub/rss/opinie.xml'), (u'Gazeta \u015awi\u0105teczna', u'http://rss.feedsportal.com/c/32739/f/530431/index.rss'), (u'Du\u017cy Format', u'http://rss.feedsportal.com/c/32739/f/530265/index.rss'), (u'Witamy w Polsce', u'http://rss.feedsportal.com/c/32739/f/530476/index.rss'), (u'M\u0119ska Muzyka', u'http://rss.feedsportal.com/c/32739/f/530337/index.rss'), (u'Lata Lec\u0105', u'http://rss.feedsportal.com/c/32739/f/530326/index.rss'), (u'Solidarni z Tybetem', u'http://rss.feedsportal.com/c/32739/f/530461/index.rss'), (u'W pon. - \u017bakowski', u'http://rss.feedsportal.com/c/32739/f/530491/index.rss'), (u'We wt. - Kolenda-Zalewska', u'http://rss.feedsportal.com/c/32739/f/530310/index.rss'), (u'\u015aroda w \u015brod\u0119', u'http://rss.feedsportal.com/c/32739/f/530428/index.rss'), (u'W pi\u0105tek - Olejnik', u'http://rss.feedsportal.com/c/32739/f/530364/index.rss'), (u'Nekrologi', u'http://rss.feedsportal.com/c/32739/f/530358/index.rss')
+             (u'Nauka', u'http://rss.feedsportal.com/c/32739/f/530269/index.rss'),
             (u'Opinie', u'http://rss.gazeta.pl/pub/rss/opinie.xml'),
             (u'Gazeta \u015awi\u0105teczna', u'http://rss.feedsportal.com/c/32739/f/530431/index.rss'),
             (u'Du\u017cy Format', u'http://rss.feedsportal.com/c/32739/f/530265/index.rss'),
             (u'Witamy w Polsce', u'http://rss.feedsportal.com/c/32739/f/530476/index.rss'),
             (u'M\u0119ska Muzyka', u'http://rss.feedsportal.com/c/32739/f/530337/index.rss'),
             (u'Lata Lec\u0105', u'http://rss.feedsportal.com/c/32739/f/530326/index.rss'),
             (u'Solidarni z Tybetem', u'http://rss.feedsportal.com/c/32739/f/530461/index.rss'),
             (u'W pon. - \u017bakowski', u'http://rss.feedsportal.com/c/32739/f/530491/index.rss'),
             (u'We wt. - Kolenda-Zalewska', u'http://rss.feedsportal.com/c/32739/f/530310/index.rss'),
             (u'\u015aroda w \u015brod\u0119', u'http://rss.feedsportal.com/c/32739/f/530428/index.rss'),
             (u'W pi\u0105tek - Olejnik', u'http://rss.feedsportal.com/c/32739/f/530364/index.rss'),
             (u'Nekrologi', u'http://rss.feedsportal.com/c/32739/f/530358/index.rss')
             ]
    def skip_ad_pages(self, soup):
--- a/recipes/gildia_pl.recipe
+++ b/recipes/gildia_pl.recipe
@ -31,6 +31,14 @@ class Gildia(BasicNewsRecipe):
            for link in content.findAll(name='a'):
                if 'fragment' in link['href']:
                    return self.index_to_soup(link['href'], raw=True)
        if 'relacj' in soup.title.string.lower():
            for link in content.findAll(name='a'):
                if 'relacj' in link['href']:
                    return self.index_to_soup(link['href'], raw=True)
        if 'wywiad' in soup.title.string.lower():
            for link in content.findAll(name='a'):
                if 'wywiad' in link['href']:
                    return self.index_to_soup(link['href'], raw=True)
    def preprocess_html(self, soup):
--- a/recipes/glos_wielkopolski.recipe
+++ b/recipes/glos_wielkopolski.recipe
@ -16,7 +16,7 @@ class GlosWielkopolski(BasicNewsRecipe):
    ignore_duplicate_articles = {'title', 'url'}
    #preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ]
    remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})
-    remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})]
+    remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}), dict(name='a', attrs={'href', 'http://www.gloswielkopolski.pl/newsletter/'})]
    feeds          = [(u'Wszystkie', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533779/index.rss?201302'), (u'Wiadomo\u015bci', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533780/index.rss?201302'), (u'Sport', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533781/index.rss?201302'), (u'Kultura', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533782/index.rss?201302'), (u'Porady', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_porady.xml?201302'), (u'Blogi', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_blogi.xml?201302'), (u'Nasze akcje', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_naszeakcje.xml?201302'), (u'Opinie', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_opinie.xml?201302'), (u'Magazyn', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_magazyn.xml?201302')]
--- a/recipes/kdefamily_pl.recipe
+++ b/recipes/kdefamily_pl.recipe
@ -12,5 +12,6 @@ class KDEFamilyPl(BasicNewsRecipe):
    max_articles_per_feed = 100
    preprocess_regexps = [(re.compile(r"Podobne wpisy.*", re.IGNORECASE|re.DOTALL), lambda m: '')]
    no_stylesheets = True
    remove_empty_feeds = True
    use_embedded_content = True
    feeds = [(u'Wszystko', u'http://kdefamily.pl/feed/')]
--- a/recipes/legeartis.recipe
+++ b/recipes/legeartis.recipe
@ -21,7 +21,7 @@ class LegeArtisRecipe(BasicNewsRecipe):
    no_stylesheets = True
    remove_javascript = True
-
+    remove_empty_feeds = True
    extra_css = '''
            img{clear: both;}
    '''
--- a/recipes/lomza.recipe
+++ b/recipes/lomza.recipe
@ -8,6 +8,7 @@ class Lomza(BasicNewsRecipe):
    language       = 'pl'
    oldest_article = 15
    no_stylesheets = True
    extra_css = '#foto {float: right; max-width: 200px; margin-left: 10px;} #fotogaleria > div {float:left;} .br {clear: both;}'
    max_articles_per_feed = 100
    remove_tags=[dict(name='div', attrs={'class':['bxbanner', 'drukuj', 'wyslijznajomemu']})]
    keep_only_tags=[dict(name='div', attrs={'class':'wiadomosc'})]
--- a/recipes/pc_lab.recipe
+++ b/recipes/pc_lab.recipe
@ -1,6 +1,6 @@
 #!/usr/bin/env  python
 from calibre.web.feeds.recipes import BasicNewsRecipe
-
+from calibre.ebooks.BeautifulSoup import Comment
 class PCLab(BasicNewsRecipe):
    cover_url             = 'http://pclab.pl/img/logo.png'
    title                 = u"PC Lab"
@ -52,6 +52,9 @@ class PCLab(BasicNewsRecipe):
            pager = soup2.find('div', attrs={'class':'next'})
            pagetext = soup2.find('div', attrs={'class':'substance'})
            pagetext = pagetext.find('div', attrs={'class':'data'})
            comments = pagetext.findAll(text=lambda text:isinstance(text, Comment))
            for comment in comments:
                comment.extract()
            pos = len(appendtag.contents)
            appendtag.insert(pos, pagetext)
--- a/recipes/swiatkindle.recipe
+++ b/recipes/swiatkindle.recipe
@ -10,7 +10,7 @@ import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class swiatczytnikow(BasicNewsRecipe):
-    title          = u'Swiat Czytnikow'
+    title          = u'Świat Czytników'
    description    = u'Czytniki e-książek w Polsce. Jak wybrać, kupić i korzystać z Amazon Kindle i innych'
    language = 'pl'
    __author__ = u'Tomasz D\u0142ugosz'