From c8fe13195ceffcc4bc3aff8fcf1f68b0b7e2b091 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= <tomek3d@gmail.com>
Date: Mon, 4 Mar 2013 23:44:07 +0100
Subject: [PATCH] partial fix for telepolis_pl

---
 recipes/telepolis_pl.recipe | 58 ++++++-------------------------------
 1 file changed, 9 insertions(+), 49 deletions(-)
diff --git a/recipes/telepolis_pl.recipe b/recipes/telepolis_pl.recipe
index ff4803697f..06739fe31d 100644
--- a/recipes/telepolis_pl.recipe
+++ b/recipes/telepolis_pl.recipe
@@ -8,60 +8,20 @@ import re
 
 class telepolis(BasicNewsRecipe):
     title = u'Telepolis.pl'
-    __author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
+    __author__ = 'Artur Stachecki <artur.stachecki@gmail.com>, Tomasz Długosz <tomek3d@gmail.com>'
+
     language = 'pl'
-    description = u'Twój telekomunikacyjny serwis informacyjny.\
-                  Codzienne informacje, testy i artykuły,\
-                  promocje, baza telefonów oraz centrum rozrywki'
-    oldest_article = 7
+    description = u'Twój telekomunikacyjny serwis informacyjny.'
     masthead_url = 'http://telepolis.pl/i/telepolis-logo2.gif'
-    max_articles_per_feed = 100
-    simultaneous_downloads = 5
-    remove_javascript = True
     no_stylesheets = True
     use_embedded_content = False
 
-    remove_tags = []
-    remove_tags.append(dict(attrs={'alt': 'TELEPOLIS.pl'}))
-
-    preprocess_regexps = [(re.compile(r'<: .*? :>'),
-                           lambda match: ''),
-                          (re.compile(r'<b>Zobacz:</b>.*?</a>', re.DOTALL),
-                           lambda match: ''),
-                          (re.compile(r'<-ankieta.*?>'),
-                           lambda match: ''),
-                          (re.compile(r'\(Q\!\)'),
-                           lambda match: ''),
-                          (re.compile(r'\(plik.*?\)'),
-                           lambda match: ''),
-                          (re.compile(r'<br.*?><br.*?>', re.DOTALL),
-                           lambda match: '')
-                          ]
-
-    extra_css = '''.tb { font-weight: bold; font-size: 20px;}'''
-
     feeds = [
-        (u'Wiadomości', u'http://www.telepolis.pl/rss/news.php'),
-        (u'Artykuły', u'http://www.telepolis.pl/rss/artykuly.php')
+        (u'Wiadomości', u'http://www.telepolis.pl/rss/news.php')#,
+        #(u'Artykuły', u'http://www.telepolis.pl/rss/artykuly.php')
     ]
 
-    def print_version(self, url):
-        if 'news.php' in url:
-            print_url = url.replace('news.php', 'news_print.php')
-        else:
-            print_url = url.replace('artykuly.php', 'art_print.php')
-        return print_url
-
-    def preprocess_html(self, soup):
-        for image in soup.findAll('img'):
-            if 'm.jpg' in image['src']:
-                image_big = image['src']
-                image_big = image_big.replace('m.jpg', '.jpg')
-                image['src'] = image_big
-        logo = soup.find('tr')
-        logo.extract()
-        for tag in soup.findAll('tr'):
-            for strings in ['Wiadomość wydrukowana', 'copyright']:
-                if strings in self.tag_to_string(tag):
-                    tag.extract()
-        return self.adeify_images(soup)
+    keep_only_tags = [
+        dict(name='div', attrs={'class':'flol w510'}),
+        dict(name='div', attrs={'class':'main_tresc_news'})
+    ]