Update various Polish recipes

2025-12-08 06:05:04 -05:00 · 2012-02-20 10:04:40 +05:30 · 2012-02-20 10:04:40 +05:30 · 35d15d0eb5
commit 35d15d0eb5
parent 6c224d75cf
17 changed files with 137 additions and 58 deletions
--- a/recipes/archeowiesci.recipe
+++ b/recipes/archeowiesci.recipe
@ -7,6 +7,7 @@ class Archeowiesci(BasicNewsRecipe):
    language       = 'pl'
    cover_url='http://archeowiesci.pl/wp-content/uploads/2011/05/Archeowiesci2-115x115.jpg'
    oldest_article = 7
+    needs_subscription='optional'
    max_articles_per_feed = 100
    auto_cleanup = True
    remove_tags=[dict(name='span', attrs={'class':['post-ratings', 'post-ratings-loading']})]
@ -16,6 +17,16 @@ class Archeowiesci(BasicNewsRecipe):
      feeds = BasicNewsRecipe.parse_feeds(self)
      for feed in feeds:
        for article in feed.articles[:]:
-          if 'subskrypcja' in article.title:
+          if self.username is None and 'subskrypcja' in article.title:
            feed.articles.remove(article)
      return feeds
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        if self.username is not None and self.password is not None:
+            br.open('http://archeowiesci.pl/wp-login.php')
+            br.select_form(name='loginform')
+            br['log']   = self.username
+            br['pwd'] = self.password
+            br.submit()
+        return br
--- a/recipes/astronomia_pl.recipe
+++ b/recipes/astronomia_pl.recipe
@ -1,15 +1,18 @@
 from calibre.web.feeds.news import BasicNewsRecipe
-
+import re
 class Astronomia_pl(BasicNewsRecipe):
    title          = u'Astronomia.pl'
    __author__        = 'fenuks'
    description   = 'Astronomia - polish astronomy site'
+    masthead_url      = 'http://www.astronomia.pl/grafika/logo.gif'
    cover_url      = 'http://www.astronomia.pl/grafika/logo.gif'
    category       = 'astronomy, science'
    language       = 'pl'
    oldest_article = 8
    max_articles_per_feed = 100
-    #no_stylesheets=True
+    extra_css='#h2 {font-size: 18px;}'
+    no_stylesheets=True
+    preprocess_regexps = [(re.compile(ur'<b>Przeczytaj także:.*?</BODY>', re.DOTALL), lambda match: '</BODY>') ]
    remove_tags_before=dict(name='div', attrs={'id':'a1'})
    keep_only_tags=[dict(name='div', attrs={'id':['a1', 'h2']})]
    feeds          = [(u'Wiadomości z astronomii i astronautyki', u'http://www.astronomia.pl/rss/')]
--- a/recipes/benchmark_pl.recipe
+++ b/recipes/benchmark_pl.recipe
@ -4,16 +4,17 @@ class Benchmark_pl(BasicNewsRecipe):
    title          = u'Benchmark.pl'
    __author__        = 'fenuks'
    description   = u'benchmark.pl -IT site'
+    masthead_url = 'http://www.benchmark.pl/i/logo-footer.png'
    cover_url      = 'http://www.ieaddons.pl/benchmark/logo_benchmark_new.gif'
    category       = 'IT'
    language       = 'pl'
    oldest_article = 8
    max_articles_per_feed = 100
    no_stylesheets=True
-    preprocess_regexps = [(re.compile(ur'\bWięcej o .*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>')]
+    preprocess_regexps = [(re.compile(ur'<h3><span style="font-size: small;">&nbsp;Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>'), (re.compile(ur'Więcej o .*?</ul>', re.DOTALL|re.IGNORECASE), lambda match: '')]
    keep_only_tags=[dict(name='div', attrs={'class':['m_zwykly', 'gallery']})]
    remove_tags_after=dict(name='div', attrs={'class':'body'})
-    remove_tags=[dict(name='div', attrs={'class':['kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery']})]
+    remove_tags=[dict(name='div', attrs={'class':['kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery']}), dict(name='table', attrs={'background':'http://www.benchmark.pl/uploads/backend_img/a/fotki_newsy/opinie_dnia/bg.png'}), dict(name='table', attrs={'width':'210', 'cellspacing':'1', 'cellpadding':'4', 'border':'0', 'align':'right'})]
    INDEX= 'http://www.benchmark.pl'
    feeds          = [(u'Aktualności', u'http://www.benchmark.pl/rss/aktualnosci-pliki.xml'), 
                          (u'Testy i recenzje', u'http://www.benchmark.pl/rss/testy-recenzje-minirecenzje.xml')]
--- a/recipes/biolog_pl.recipe
+++ b/recipes/biolog_pl.recipe
@ -10,10 +10,11 @@ class Biolog_pl(BasicNewsRecipe):
    description   = u'Przyrodnicze aktualności ze świata nauki (codziennie aktualizowane), kurs biologii, testy i sprawdziany, forum dyskusyjne.'
    category       = 'biology'
    language       = 'pl'
+    masthead_url= 'http://www.biolog.pl/naukowy,portal,biolog.png'
    cover_url='http://www.biolog.pl/naukowy,portal,biolog.png'
    no_stylesheets = True
    #keeps_only_tags=[dict(id='main')]
    remove_tags_before=dict(id='main')
    remove_tags_after=dict(name='a', attrs={'name':'komentarze'})
-    remove_tags=[dict(name='img', attrs={'alt':'Komentarze'})]
+    remove_tags=[dict(name='img', attrs={'alt':'Komentarze'}), dict(name='span', attrs={'class':'menu_odsylacze'})]
    feeds          = [(u'Wszystkie', u'http://www.biolog.pl/backend.php'), (u'Medycyna', u'http://www.biolog.pl/medycyna-rss.php'), (u'Ekologia', u'http://www.biolog.pl/rss-ekologia.php'), (u'Genetyka i biotechnologia', u'http://www.biolog.pl/rss-biotechnologia.php'), (u'Botanika', u'http://www.biolog.pl/rss-botanika.php'), (u'Le\u015bnictwo', u'http://www.biolog.pl/rss-lesnictwo.php'), (u'Zoologia', u'http://www.biolog.pl/rss-zoologia.php')]
--- a/recipes/cd_action.recipe
+++ b/recipes/cd_action.recipe
@ -1,16 +1,20 @@
 from calibre.web.feeds.news import BasicNewsRecipe

-
 class CD_Action(BasicNewsRecipe):
    title          = u'CD-Action'
    __author__        = 'fenuks'
-    description   = 'cdaction.pl - polish magazine about games site'
+    description   = 'cdaction.pl - polish games magazine site'
    category       = 'games'
    language       = 'pl'
    oldest_article = 8
    max_articles_per_feed = 100
    no_stylesheets= True
-    cover_url =u'http://s.cdaction.pl/obrazki/logo-CD-Action_172k9.JPG'
    keep_only_tags= dict(id='news_content')
    remove_tags_after= dict(name='div', attrs={'class':'tresc'})
    feeds          = [(u'Newsy', u'http://www.cdaction.pl/rss_newsy.xml')]
+
+
+    def get_cover_url(self):
+        soup = self.index_to_soup('http://www.cdaction.pl/magazyn/')
+        self.cover_url='http://www.cdaction.pl'+ soup.find(id='wspolnik').div.a['href']
+        return getattr(self, 'cover_url', self.cover_url)
--- a/recipes/cgm_pl.recipe
+++ b/recipes/cgm_pl.recipe
@ -5,6 +5,7 @@ class CGM(BasicNewsRecipe):
    oldest_article = 7
    __author__        = 'fenuks'
    description   = u'Codzienna Gazeta Muzyczna'
+    masthead_url='http://www.cgm.pl/img/header/logo.gif'
    cover_url      = 'http://www.krafcy.com/foto/tinymce/Image/cgm%281%29.jpg'
    category       = 'music'
    language       = 'pl'
@ -23,21 +24,19 @@ class CGM(BasicNewsRecipe):


    def preprocess_html(self, soup):
+        gallery=soup.find('div', attrs={'class':'galleryFlash'})
+        if gallery:
+            img=gallery.div
+            gallery.img.extract()
+            if img:
+                img=img['style']
+                img='http://www.cgm.pl'+img[img.find('url(')+4:img.find(')')]
+                gallery.contents[1].name='img'
+                gallery.contents[1]['src']=img
        for item in soup.findAll(style=True):
            del item['style']
        ad=soup.findAll('a')
        for r in ad:
-            if 'http://www.hustla.pl' in r['href'] or 'http://www.ebilet.pl' in r['href']:                
+            if 'www.hustla.pl' in r['href'] or 'www.ebilet.pl' in r['href']:                
                 r.extract()
-        gallery=soup.find('div', attrs={'class':'galleryFlash'})
-        if gallery:
-            img=gallery.find('embed')
-            if img:
-                img=img['src'][35:]
-                img='http://www.cgm.pl/_vault/_gallery/_photo/'+img
-                param=gallery.findAll(name='param')
-                for i in param:
-                    i.extract()
-                gallery.contents[1].name='img'
-                gallery.contents[1]['src']=img
        return soup
--- a/recipes/computerworld_pl.recipe
+++ b/recipes/computerworld_pl.recipe
@ -7,10 +7,11 @@ class Computerworld_pl(BasicNewsRecipe):
    description   = u'Serwis o IT w przemyśle, finansach, handlu, administracji oraz rynku IT i telekomunikacyjnym - wiadomości, opinie, analizy, porady prawne'
    category       = 'IT'
    language       = 'pl'
+    masthead_url= 'http://g1.computerworld.pl/cw/beta_gfx/cw2.gif'
    no_stylesheets=True
    oldest_article = 7
    max_articles_per_feed = 100
-    keep_only_tags=[dict(name='div', attrs={'id':'s'})]
+    keep_only_tags=[dict(attrs={'class':['tyt_news', 'prawo', 'autor', 'tresc']})]
    remove_tags_after=dict(name='div', attrs={'class':'rMobi'})
    remove_tags=[dict(name='div', attrs={'class':['nnav', 'rMobi']}), dict(name='table', attrs={'class':'ramka_slx'})]
    feeds          = [(u'Wiadomo\u015bci', u'http://rssout.idg.pl/cw/news_iso.xml')]
--- a/recipes/dobreprogamy.recipe
+++ b/recipes/dobreprogamy.recipe
@ -7,6 +7,7 @@ class Dobreprogramy_pl(BasicNewsRecipe):
    __licence__ ='GPL v3'
    category       = 'IT'
    language       = 'pl'
+    masthead_url='http://static.dpcdn.pl/css/Black/Images/header_logo_napis_fullVersion.png'
    cover_url = 'http://userlogos.org/files/logos/Karmody/dobreprogramy_01.png'
    description = u'Aktualności i blogi z dobreprogramy.pl'
    encoding = 'utf-8'
@ -16,7 +17,8 @@ class Dobreprogramy_pl(BasicNewsRecipe):
    oldest_article = 8
    max_articles_per_feed = 100
    preprocess_regexps = [(re.compile(ur'<div id="\S+360pmp4">Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...</div>'), lambda match: '') ]
-    remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})]
-    keep_only_tags = [dict(name='div', attrs={'class':['mainBar', 'newsContent', 'postTitle title', 'postInfo', 'contentText', 'content']})]
+    keep_only_tags=[dict(attrs={'class':['news', 'entry single']})]
+    remove_tags = [dict(name='div', attrs={'class':['newsOptions', 'noPrint', 'komentarze', 'tags  font-heading-master']})]
+    #remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})]
    feeds = [(u'Aktualności', 'http://feeds.feedburner.com/dobreprogramy/Aktualnosci'),
                 ('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')]
--- a/recipes/dziennik_pl.recipe
+++ b/recipes/dziennik_pl.recipe
@ -8,15 +8,17 @@ class Dziennik_pl(BasicNewsRecipe):
    description   = u'Wiadomości z kraju i ze świata. Wiadomości gospodarcze. Znajdziesz u nas informacje, wydarzenia, komentarze, opinie.'
    category       = 'newspaper'
    language       = 'pl'
-    cover_url='http://6.s.dziennik.pl/images/og_dziennik.jpg'
+    masthead_url= 'http://5.s.dziennik.pl/images/logos.png'
+    cover_url= 'http://5.s.dziennik.pl/images/logos.png'
    no_stylesheets = True
    oldest_article = 7
    max_articles_per_feed = 100
    remove_javascript=True
    remove_empty_feeds=True
-    preprocess_regexps     = [(re.compile("Komentarze:"), lambda m: '')]
+    extra_css= 'ul {list-style: none; padding: 0; margin: 0;} li {float: left;margin: 0 0.15em;}'
+    preprocess_regexps     = [(re.compile("Komentarze:"), lambda m: ''), (re.compile('<p><strong><a href=".*?">&gt;&gt;&gt; CZYTAJ TAKŻE: ".*?"</a></strong></p>'), lambda m: '')]
    keep_only_tags=[dict(id='article')]
-    remove_tags=[dict(name='div', attrs={'class':['art_box_dodatki', 'new_facebook_icons2', 'leftArt', 'article_print', 'quiz-widget']}), dict(name='a', attrs={'class':'komentarz'})]
+    remove_tags=[dict(name='div', attrs={'class':['art_box_dodatki', 'new_facebook_icons2', 'leftArt', 'article_print', 'quiz-widget', 'belka-spol', 'belka-spol belka-spol-bottom', 'art_data_tags', 'cl_right', 'boxRounded gal_inside']}), dict(name='a', attrs={'class':['komentarz', 'article_icon_addcommnent']})]
    feeds          = [(u'Wszystko', u'http://rss.dziennik.pl/Dziennik-PL/'),
 		(u'Wiadomości', u'http://rss.dziennik.pl/Dziennik-Wiadomosci'),
 		(u'Gospodarka', u'http://rss.dziennik.pl/Dziennik-Gospodarka'),
@ -30,6 +32,12 @@ class Dziennik_pl(BasicNewsRecipe):
 		(u'Podróże', u'http://rss.dziennik.pl/Dziennik-Podroze/'),
 		(u'Nieruchomości', u'http://rss.dziennik.pl/Dziennik-Nieruchomosci')]

+    def skip_ad_pages(self, soup):
+          tag=soup.find(name='a', attrs={'title':'CZYTAJ DALEJ'})
+          if tag:
+            new_soup=self.index_to_soup(tag['href'], raw=True)
+            return new_soup
+
    def append_page(self, soup, appendtag):
        tag=soup.find('a', attrs={'class':'page_next'})
        if tag:
@ -56,3 +64,4 @@ class Dziennik_pl(BasicNewsRecipe):
    def preprocess_html(self, soup):
         self.append_page(soup, soup.body)
         return soup
+
--- a/recipes/film_web.recipe
+++ b/recipes/film_web.recipe
@ -10,7 +10,8 @@ class Filmweb_pl(BasicNewsRecipe):
    oldest_article = 8
    max_articles_per_feed = 100
    no_stylesheets= True
-    extra_css      = '.hdrBig {font-size:22px;}'
+    remove_empty_feeds=True
+    extra_css      = '.hdrBig {font-size:22px;} ul {list-style-type:none; padding: 0; margin: 0;}'
    remove_tags= [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'})]
    keep_only_tags= [dict(name='h1', attrs={'class':'hdrBig'}), dict(name='div', attrs={'class':['newsInfo', 'reviewContent fontSizeCont description']})]
    feeds          = [(u'Wszystkie newsy', u'http://www.filmweb.pl/feed/news/latest'),
--- a/recipes/gazeta_wyborcza.recipe
+++ b/recipes/gazeta_wyborcza.recipe
@ -4,10 +4,11 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class Gazeta_Wyborcza(BasicNewsRecipe):
    title          = u'Gazeta Wyborcza'
    __author__        = 'fenuks'
-    cover_url      = 'http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg'
    language       = 'pl'
    description ='news from gazeta.pl'
    category='newspaper'
+    publication_type = 'newspaper'
+    masthead_url='http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg'
    INDEX='http://wyborcza.pl'
    remove_empty_feeds= True
    oldest_article = 3
@ -81,3 +82,10 @@ class Gazeta_Wyborcza(BasicNewsRecipe):
            return url
        else:
             return url.replace('http://wyborcza.biz/biznes/1', 'http://wyborcza.biz/biznes/2029020')
+
+    def get_cover_url(self):
+        soup = self.index_to_soup('http://wyborcza.pl/0,76762,3751429.html')
+        cover=soup.find(id='GWmini2')  
+        soup = self.index_to_soup('http://wyborcza.pl/'+ cover.contents[3].a['href'])
+        self.cover_url='http://wyborcza.pl' + soup.img['src']
+        return getattr(self, 'cover_url', self.cover_url)
--- a/recipes/gry_online_pl.recipe
+++ b/recipes/gry_online_pl.recipe
@ -8,29 +8,31 @@ class Gry_online_pl(BasicNewsRecipe):
    language       = 'pl'
    oldest_article = 13
    INDEX= 'http://www.gry-online.pl/'
-    cover_url='http://www.gry-online.pl/img/1st_10/1st-gol-logo.png'
+    masthead_url='http://www.gry-online.pl/im/gry-online-logo.png'
+    cover_url='http://www.gry-online.pl/im/gry-online-logo.png'
    max_articles_per_feed = 100
    no_stylesheets= True
-    extra_css              = 'p.wn1{font-size:22px;}'
-    remove_tags_after= [dict(name='div', attrs={'class':['tresc-newsa']})]
-    keep_only_tags = [dict(name='div', attrs={'class':['txthead']}), dict(name='p', attrs={'class':['wtx1', 'wn1', 'wob']}), dict(name='a', attrs={'class':['num_str_nex']})]
-    #remove_tags= [dict(name='div', attrs={'class':['news_plat']})]
+    keep_only_tags=[dict(name='div', attrs={'class':'gc660'})]
+    remove_tags=[dict({'class':['nav-social', 'add-info', 'smlb', 'lista lista3 lista-gry', 'S013po', 'zm_gfx_cnt_bottom', 'ocen-txt', 'wiecej-txt', 'wiecej-txt2']})]
    feeds          = [(u'Newsy', 'http://www.gry-online.pl/rss/news.xml'), ('Teksty', u'http://www.gry-online.pl/rss/teksty.xml')]


    def append_page(self, soup, appendtag):
-        nexturl = soup.find('a', attrs={'class':'num_str_nex'})
-        if appendtag.find('a', attrs={'class':'num_str_nex'}) is not None:
-            appendtag.find('a', attrs={'class':'num_str_nex'}).replaceWith('\n')
-        if nexturl is not None:
-            if 'strona' in nexturl.div.string:
-                nexturl= self.INDEX + nexturl['href']
-                soup2 = self.index_to_soup(nexturl)
-                pagetext = soup2.findAll(name='p', attrs={'class':['wtx1', 'wn1', 'wob']})
-                for tag in pagetext:
-                    pos = len(appendtag.contents)
-                    appendtag.insert(pos, tag)
-                self.append_page(soup2, appendtag)
+        tag = appendtag.find('div', attrs={'class':'n5p'})
+        if tag:
+            nexturls=tag.findAll('a')
+            for nexturl in nexturls[1:]:
+                try:
+                    soup2 = self.index_to_soup('http://www.gry-online.pl/S020.asp'+ nexturl['href'])
+                except:
+                    soup2 = self.index_to_soup('http://www.gry-online.pl/S022.asp'+ nexturl['href'])
+                pagetext = soup2.find(attrs={'class':'gc660'})
+                for r in pagetext.findAll(name='header'):
+                    r.extract()
+                pos = len(appendtag.contents)
+                appendtag.insert(pos, pagetext)
+            for r in appendtag.findAll(attrs={'class':['n5p', 'add-info', 'twitter-share-button']}):
+                r.extract()


    def preprocess_html(self, soup):
--- a/recipes/marketing_magazine.recipe
+++ b/recipes/marketing_magazine.recipe
@ -1,4 +1,5 @@
 __license__   = 'GPL v3'
+
 from calibre.web.feeds.news import BasicNewsRecipe

 class AdvancedUserRecipe1327062445(BasicNewsRecipe):
@ -7,10 +8,13 @@ class AdvancedUserRecipe1327062445(BasicNewsRecipe):
    max_articles_per_feed = 100
    auto_cleanup = True
    remove_javascript = True
-    masthead_url            = 'http://www.simrendeogun.com/wp-content/uploads/2011/06/New-Marketing-Magazine-Logo.jpg'
-    feeds          = [(u'My Marketing', u'http://feed43.com/0537744466058428.xml'), (u'My Marketing_', u'http://feed43.com/8126723074604845.xml'), (u'Venturini', u'http://robertoventurini.blogspot.com/feeds/posts/default?alt=rss'), (u'Ninja Marketing', u'http://feeds.feedburner.com/NinjaMarketing'), (u'Comunitàzione', u'http://www.comunitazione.it/feed/novita.asp'), (u'Brandforum news', u'http://www.brandforum.it/rss/news'), (u'Brandforum papers', u'http://www.brandforum.it/rss/papers'), (u'Disambiguando', u'http://giovannacosenza.wordpress.com/feed/')]
+    no_stylesheets = True
    __author__    = 'faber1971'
-    description   = 'Collection of Italian marketing websites - v1.00 (28, January 2012)'
+    description   = 'Collection of Italian marketing websites - v1.01 (19, February 2012)'
    language = 'it'

-
+    remove_tags = [
+                     dict(name='ul', attrs={'id':'ads0'})
+                  ]
+    masthead_url            = 'http://www.simrendeogun.com/wp-content/uploads/2011/06/New-Marketing-Magazine-Logo.jpg'
+    feeds          = [(u'My Marketing', u'http://feed43.com/0537744466058428.xml'), (u'My Marketing_', u'http://feed43.com/8126723074604845.xml'), (u'Venturini', u'http://robertoventurini.blogspot.com/feeds/posts/default?alt=rss'), (u'Ninja Marketing', u'http://feeds.feedburner.com/NinjaMarketing'), (u'Comunitàzione', u'http://www.comunitazione.it/feed/novita.asp'), (u'Brandforum news', u'http://www.brandforum.it/rss/news'), (u'Brandforum papers', u'http://www.brandforum.it/rss/papers'), (u'Disambiguando', u'http://giovannacosenza.wordpress.com/feed/')]
--- a/recipes/naczytniki.recipe
+++ b/recipes/naczytniki.recipe
@ -1,8 +1,9 @@
 from calibre.web.feeds.news import BasicNewsRecipe
-
+import re
 class naczytniki(BasicNewsRecipe):
    title          = u'naczytniki.pl'
    __author__        = 'fenuks'
+    masthead_url= 'http://naczytniki.pl/wp-content/uploads/2010/08/logo_nc28.png'
    cover_url      = 'http://naczytniki.pl/wp-content/uploads/2010/08/logo_nc28.png'
    language       = 'pl'
    description ='everything about e-readers'
@ -10,6 +11,7 @@ class naczytniki(BasicNewsRecipe):
    no_stylesheets=True
    oldest_article = 7
    max_articles_per_feed = 100
+    preprocess_regexps = [(re.compile(ur'<p><br><b>Zobacz także:</b></p>.*?</body>', re.DOTALL), lambda match: '</body>') ]
    remove_tags_after= dict(name='div', attrs={'class':'sociable'})
    keep_only_tags=[dict(name='div', attrs={'class':'post'})]
    remove_tags=[dict(name='span', attrs={'class':'comments'}), dict(name='div', attrs={'class':'sociable'})]
--- a/recipes/nowa_fantastyka.recipe
+++ b/recipes/nowa_fantastyka.recipe
@ -1,21 +1,33 @@
 # -*- coding: utf-8 -*-
 from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
 class Nowa_Fantastyka(BasicNewsRecipe):
    title          = u'Nowa Fantastyka'
    oldest_article = 7
    __author__        = 'fenuks'
+    __modified_by__   = 'zaslav'
    language       = 'pl'
    encoding='latin2'
    description ='site for fantasy readers'
    category='fantasy'
+    masthead_url='http://farm5.static.flickr.com/4133/4956658792_7ba7fbf562.jpg'
+    #extra_css='.tytul {font-size: 20px;}' #not working
    max_articles_per_feed = 100
    INDEX='http://www.fantastyka.pl/'
    no_stylesheets=True
    needs_subscription = 'optional'
-    remove_tags_before=dict(attrs={'class':'belka1-tlo-md'})
+    remove_tags_before=dict(attrs={'class':'naglowek2'})
    #remove_tags_after=dict(name='span', attrs={'class':'naglowek-oceny'})
-    remove_tags_after=dict(name='td', attrs={'class':'belka1-bot'})
-    remove_tags=[dict(attrs={'class':'avatar2'}), dict(name='span', attrs={'class':'alert-oceny'}), dict(name='img', attrs={'src':['obrazki/sledz1.png', 'obrazki/print.gif', 'obrazki/mlnf.gif']}), dict(name='b', text='Dodaj komentarz'),dict(name='a', attrs={'href':'http://www.fantastyka.pl/10,1727.html'})]
+    remove_tags_after=dict(name='form', attrs={'name':'form1'})
+    remove_tags=[dict(attrs={'class':['avatar2', 'belka-margin', 'naglowek2']}), dict(name='span', attrs={'class':'alert-oceny'}), dict(name='img', attrs={'src':['obrazki/sledz1.png', 'obrazki/print.gif', 'obrazki/mlnf.gif']}), dict(name='b', text='Dodaj komentarz'),dict(name='a', attrs={'href':'http://www.fantastyka.pl/10,1727.html'}), dict(name='form')]
+    preprocess_regexps = [
+    (re.compile(r'\<table .*?\>'), lambda match: ''),
+    (re.compile(r'\<td.*?\>'), lambda match: ''),
+    (re.compile(r'\<center\>'), lambda match: '')]
+
+
+

    def find_articles(self, url):
        articles = []
@ -41,10 +53,10 @@ class Nowa_Fantastyka(BasicNewsRecipe):

         return feeds

+
    def get_cover_url(self):
-        soup = self.index_to_soup('http://www.fantastyka.pl/1.html')
-        cover=soup.find(name='img', attrs={'class':'okladka'})
-        self.cover_url=self.INDEX+ cover['src']
+        soup = self.index_to_soup('http://www.e-kiosk.pl/nowa_fantastyka')
+        self.cover_url='http://www.e-kiosk.pl' + soup.find(name='a', attrs={'class':'img'})['href']
        return getattr(self, 'cover_url', self.cover_url)

    def get_browser(self):
@ -56,3 +68,18 @@ class Nowa_Fantastyka(BasicNewsRecipe):
            br['pass'] = self.password
            br.submit()
        return br
+
+    def preprocess_html(self, soup):        
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll(font=True):
+            del item['font']
+        for item in soup.findAll(align=True):
+            del item['align']
+        for item in soup.findAll(name='tr'):
+            item.name='div'
+        title=soup.find(attrs={'class':'tytul'})
+        if title:
+            title['style']='font-size: 20px; font-weight: bold;'
+        self.log.warn(soup)
+        return soup
--- a/recipes/tablety_pl.recipe
+++ b/recipes/tablety_pl.recipe
@ -1,14 +1,16 @@
 from calibre.web.feeds.news import BasicNewsRecipe
-
+import re
 class Tablety_pl(BasicNewsRecipe):
    title          = u'Tablety.pl'
    __author__        = 'fenuks'
    description   = u'tablety.pl - latest tablet news'
+    masthead_url= 'http://www.tablety.pl/wp-content/themes/kolektyw/img/logo.png'
    cover_url      = 'http://www.tablety.pl/wp-content/themes/kolektyw/img/logo.png'
    category       = 'IT'
    language       = 'pl'
    oldest_article = 8
    max_articles_per_feed = 100
+    preprocess_regexps = [(re.compile(ur'<p><strong>Przeczytaj także.*?</a></strong></p>', re.DOTALL), lambda match: ''), (re.compile(ur'<p><strong>Przeczytaj koniecznie.*?</a></strong></p>', re.DOTALL), lambda match: '')]
    remove_tags_before=dict(name="h1", attrs={'class':'entry-title'})
    remove_tags_after=dict(name="div", attrs={'class':'snap_nopreview sharing robots-nocontent'})
    remove_tags=[dict(name='div', attrs={'class':'snap_nopreview sharing robots-nocontent'})]
--- a/recipes/ubuntu_pl.recipe
+++ b/recipes/ubuntu_pl.recipe
@ -4,10 +4,12 @@ class Ubuntu_pl(BasicNewsRecipe):
    title          = u'UBUNTU.pl'
    __author__        = 'fenuks'
    description   = 'UBUNTU.pl - polish ubuntu community site'
+    masthead_url= 'http://ubuntu.pl/img/logo.jpg'
    cover_url      = 'http://ubuntu.pl/img/logo.jpg'
    category       = 'linux, IT'
    language       = 'pl'
    no_stylesheets = True
+    remove_empty_feeds = True
    oldest_article = 8
    max_articles_per_feed = 100
    extra_css      = '#main {text-align:left;}'