Lifehacker.pl by MrStefan, Hatalska by teepel and update telepolis.pl

2025-07-31 14:33:54 -04:00 · 2013-03-05 09:15:33 +05:30 · 2013-03-05 09:15:33 +05:30 · 6fc800d6c6
commit 6fc800d6c6
parent 85a2d515ca 83007ffdfa
26 changed files with 95 additions and 98 deletions
--- a/recipes/antyweb.recipe
+++ b/recipes/antyweb.recipe
@ -43,6 +43,6 @@ class AntywebRecipe(BasicNewsRecipe):
    def preprocess_html(self, soup):
        for alink in soup.findAll('a'):
            if alink.string is not None:
-               tstr = alink.string
-               alink.replaceWith(tstr)
-	return soup
+                tstr = alink.string
+                alink.replaceWith(tstr)
+        return soup
--- a/recipes/app_funds.recipe
+++ b/recipes/app_funds.recipe
@ -24,4 +24,3 @@ class app_funds(BasicNewsRecipe):
    auto_cleanup = True

    feeds          = [(u'blog', u'http://feeds.feedburner.com/blogspot/etVI')]
-    
--- a/recipes/bankier_pl.recipe
+++ b/recipes/bankier_pl.recipe
@ -47,4 +47,3 @@ class bankier(BasicNewsRecipe):
        segments = urlPart.split('-')
        urlPart2 = segments[-1]
        return 'http://www.bankier.pl/wiadomosci/print.html?article_id=' + urlPart2
-
--- a/recipes/gazeta_pl_krakow.recipe
+++ b/recipes/gazeta_pl_krakow.recipe
@ -49,8 +49,8 @@ class gw_krakow(BasicNewsRecipe):
    feeds          = [(u'Wiadomości', u'http://rss.gazeta.pl/pub/rss/krakow.xml')]

    def skip_ad_pages(self, soup):
-          tag=soup.find(name='a', attrs={'class':'btn'})
-          if tag:
+        tag=soup.find(name='a', attrs={'class':'btn'})
+        if tag:
            new_soup=self.index_to_soup(tag['href'], raw=True)
            return new_soup

@ -95,8 +95,7 @@ class gw_krakow(BasicNewsRecipe):
                rem.extract()

    def preprocess_html(self, soup):
-         self.append_page(soup, soup.body)
-         if soup.find(id='container_gal'):
-             self.gallery_article(soup.body)
-         return soup
-
+        self.append_page(soup, soup.body)
+        if soup.find(id='container_gal'):
+            self.gallery_article(soup.body)
+        return soup
--- a/recipes/gazeta_pl_warszawa.recipe
+++ b/recipes/gazeta_pl_warszawa.recipe
@ -46,8 +46,8 @@ class gw_wawa(BasicNewsRecipe):
    feeds          = [(u'Wiadomości', u'http://rss.gazeta.pl/pub/rss/warszawa.xml')]

    def skip_ad_pages(self, soup):
-          tag=soup.find(name='a', attrs={'class':'btn'})
-          if tag:
+        tag=soup.find(name='a', attrs={'class':'btn'})
+        if tag:
            new_soup=self.index_to_soup(tag['href'], raw=True)
            return new_soup

@ -92,8 +92,7 @@ class gw_wawa(BasicNewsRecipe):
                rem.extract()

    def preprocess_html(self, soup):
-         self.append_page(soup, soup.body)
-         if soup.find(id='container_gal'):
-             self.gallery_article(soup.body)
-         return soup
-
+        self.append_page(soup, soup.body)
+        if soup.find(id='container_gal'):
+            self.gallery_article(soup.body)
+        return soup
--- a/recipes/gazeta_pomorska.recipe
+++ b/recipes/gazeta_pomorska.recipe
@ -100,5 +100,3 @@ class GazetaPomorska(BasicNewsRecipe):

    extra_css = '''h1 { font-size: 1.4em; }
                        h2 { font-size: 1.0em; }'''
-
-
--- a/recipes/hatalska.recipe
+++ b/recipes/hatalska.recipe
@ -0,0 +1,28 @@
+#!/usr/bin/env  python
+
+__license__ = 'GPL v3'
+__copyright__ = 'teepel 2012'
+
+'''
+hatalska.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class hatalska(BasicNewsRecipe):
+    title          = u'Hatalska'
+    __author__ = 'teepel <teepel44@gmail.com>'
+    language       = 'pl'
+    description    = u'Blog specjalistki z branży mediowo-reklamowej - Natalii Hatalskiej'
+    oldest_article = 7
+    masthead_url='http://hatalska.com/wp-content/themes/jamel/images/logo.png'
+    max_articles_per_feed = 100
+    simultaneous_downloads = 5
+    remove_javascript=True
+    no_stylesheets=True
+
+    remove_tags =[]
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'feedflare'}))
+
+    feeds          = [(u'Blog', u'http://feeds.feedburner.com/hatalskacom')]
--- a/recipes/icons/eksiazki.png
+++ b/recipes/icons/eksiazki.png
--- a/recipes/icons/hatalska.png
+++ b/recipes/icons/hatalska.png
--- a/recipes/icons/lifehacker_pl.png
+++ b/recipes/icons/lifehacker_pl.png
--- a/recipes/icons/money_pl.png
+++ b/recipes/icons/money_pl.png
--- a/recipes/icons/tvn24.png
+++ b/recipes/icons/tvn24.png
--- a/recipes/icons/wprost.png
+++ b/recipes/icons/wprost.png
--- a/recipes/icons/wprost_rss.png
+++ b/recipes/icons/wprost_rss.png
--- a/recipes/kp.recipe
+++ b/recipes/kp.recipe
@ -47,6 +47,6 @@ class KrytykaPolitycznaRecipe(BasicNewsRecipe):
    def preprocess_html(self, soup):
        for alink in soup.findAll('a'):
            if alink.string is not None:
-               tstr = alink.string
-               alink.replaceWith(tstr)
+                tstr = alink.string
+                alink.replaceWith(tstr)
        return soup
--- a/recipes/lifehacker_pl.recipe
+++ b/recipes/lifehacker_pl.recipe
@ -0,0 +1,25 @@
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = 'MrStefan'
+
+'''
+www.lifehacking.pl
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class lifehacking(BasicNewsRecipe):
+    title = u'Lifehacker Polska'
+    __author__ = 'MrStefan <mrstefaan@gmail.com>'
+    language = 'pl'
+    description =u'Lifehacking - sposoby na zwiększanie własnej wydajności. Ułatwiaj sobie życie, wykorzystując wiedzę, metody, technologie, przydatne strony ...'
+    masthead_url='http://lifehacking.pl/wp-content/themes/lifehacking/images/lifehackerlogo.png'
+    remove_empty_feeds= True
+    oldest_article = 7
+    max_articles_per_feed = 100
+    remove_javascript=True
+    no_stylesheets=True
+
+    feeds = [(u'Lifehacker polska', u'http://feeds.feedburner.com/pl_lh')]
--- a/recipes/michalkiewicz.recipe
+++ b/recipes/michalkiewicz.recipe
@ -8,8 +8,6 @@ michalkiewicz.pl

 from calibre.web.feeds.news import BasicNewsRecipe

-# 
-
 class michalkiewicz(BasicNewsRecipe):
    title          = u'Stanis\u0142aw Michalkiewicz'
    description    = u'Strona autorska * felietony * artyku\u0142y * komentarze'
@ -23,4 +21,3 @@ class michalkiewicz(BasicNewsRecipe):
    remove_tags = [dict(name='ul', attrs={'class':'menu'})]

    feeds          = [(u'Teksty', u'http://www.michalkiewicz.pl/rss.xml')]
-
--- a/recipes/money_pl.recipe
+++ b/recipes/money_pl.recipe
@ -60,7 +60,7 @@ class FocusRecipe(BasicNewsRecipe):
                           ]

    def print_version(self, url):
-     if url.count ('money.pl.feedsportal.com'):
+        if url.count ('money.pl.feedsportal.com'):
            u = url.find('0Cartykul0C')
            u = 'http://www.m.money.pl/wiadomosci/artykul/' + url[u + 21:]
            u = u.replace('0C', '/')
@ -71,6 +71,6 @@ class FocusRecipe(BasicNewsRecipe):
            u = u.replace ('0B','.')
            u = u.replace (',0,',',-1,')
            u = u.replace('0Tutm0Isource0Frss0Gutm0Imedium0Frss0Gutm0Icampaign0Frss/story01.htm', '')
-     else:
+        else:
            u = url.replace('/nc/1','/do-druku/1')
-     return u
+        return u
--- a/recipes/myapple_pl.recipe
+++ b/recipes/myapple_pl.recipe
@ -44,6 +44,6 @@ class MyAppleRecipe(BasicNewsRecipe):
    def preprocess_html(self, soup):
        for alink in soup.findAll('a'):
            if alink.string is not None:
-               tstr = alink.string
-               alink.replaceWith(tstr)
+                tstr = alink.string
+                alink.replaceWith(tstr)
        return soup
--- a/recipes/rynek_kolejowy.recipe
+++ b/recipes/rynek_kolejowy.recipe
@ -37,4 +37,3 @@ class rynek_kolejowy(BasicNewsRecipe):
        segment = url.split('/')
        urlPart = segment[3]
        return 'http://www.rynek-kolejowy.pl/drukuj.php?id=' + urlPart
-
--- a/recipes/rzeczpospolita.recipe
+++ b/recipes/rzeczpospolita.recipe
@ -70,5 +70,3 @@ class RzeczpospolitaRecipe(BasicNewsRecipe):
        forget, sep, index = rest.rpartition(',')

        return start + '/' + index + '?print=tak'
-
-
--- a/recipes/satkurier.recipe
+++ b/recipes/satkurier.recipe
@ -8,10 +8,7 @@ class SATKurier(BasicNewsRecipe):
    title = u'SATKurier.pl'
    __author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
    language = 'pl'
-    description = u'Największy i najstarszy serwis poświęcony\
-                  telewizji cyfrowej, przygotowywany przez wydawcę\
-                  miesięcznika SAT Kurier. Bieżące wydarzenia\
-                  z rynku mediów i nowych technologii.'
+    description = u'Serwis poświęcony telewizji cyfrowej'
    oldest_article = 7
    masthead_url = 'http://satkurier.pl/img/header_sk_logo.gif'
    max_articles_per_feed = 100
--- a/recipes/swiatkindle.recipe
+++ b/recipes/swiatkindle.recipe
@ -22,4 +22,3 @@ class swiatczytnikow(BasicNewsRecipe):
    remove_tags = [dict(name = 'ul', attrs = {'class' : 'similar-posts'})]

    preprocess_regexps = [(re.compile(u'<h3>Czytaj dalej:</h3>'), lambda match: '')]
-
--- a/recipes/telepolis_pl.recipe
+++ b/recipes/telepolis_pl.recipe
@ -8,60 +8,20 @@ import re

 class telepolis(BasicNewsRecipe):
    title = u'Telepolis.pl'
-    __author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
+    __author__ = 'Artur Stachecki <artur.stachecki@gmail.com>, Tomasz Długosz <tomek3d@gmail.com>'
+
    language = 'pl'
-    description = u'Twój telekomunikacyjny serwis informacyjny.\
-                  Codzienne informacje, testy i artykuły,\
-                  promocje, baza telefonów oraz centrum rozrywki'
-    oldest_article = 7
+    description = u'Twój telekomunikacyjny serwis informacyjny.'
    masthead_url = 'http://telepolis.pl/i/telepolis-logo2.gif'
-    max_articles_per_feed = 100
-    simultaneous_downloads = 5
-    remove_javascript = True
    no_stylesheets = True
    use_embedded_content = False

-    remove_tags = []
-    remove_tags.append(dict(attrs={'alt': 'TELEPOLIS.pl'}))
-
-    preprocess_regexps = [(re.compile(r'<: .*? :>'),
-                           lambda match: ''),
-                          (re.compile(r'<b>Zobacz:</b>.*?</a>', re.DOTALL),
-                           lambda match: ''),
-                          (re.compile(r'<-ankieta.*?>'),
-                           lambda match: ''),
-                          (re.compile(r'\(Q\!\)'),
-                           lambda match: ''),
-                          (re.compile(r'\(plik.*?\)'),
-                           lambda match: ''),
-                          (re.compile(r'<br.*?><br.*?>', re.DOTALL),
-                           lambda match: '')
-                          ]
-
-    extra_css = '''.tb { font-weight: bold; font-size: 20px;}'''
-
    feeds = [
-        (u'Wiadomości', u'http://www.telepolis.pl/rss/news.php'),
-        (u'Artykuły', u'http://www.telepolis.pl/rss/artykuly.php')
+        (u'Wiadomości', u'http://www.telepolis.pl/rss/news.php')#,
+        #(u'Artykuły', u'http://www.telepolis.pl/rss/artykuly.php')
    ]

-    def print_version(self, url):
-        if 'news.php' in url:
-            print_url = url.replace('news.php', 'news_print.php')
-        else:
-            print_url = url.replace('artykuly.php', 'art_print.php')
-        return print_url
-
-    def preprocess_html(self, soup):
-        for image in soup.findAll('img'):
-            if 'm.jpg' in image['src']:
-                image_big = image['src']
-                image_big = image_big.replace('m.jpg', '.jpg')
-                image['src'] = image_big
-        logo = soup.find('tr')
-        logo.extract()
-        for tag in soup.findAll('tr'):
-            for strings in ['Wiadomość wydrukowana', 'copyright']:
-                if strings in self.tag_to_string(tag):
-                    tag.extract()
-        return self.adeify_images(soup)
+    keep_only_tags = [
+        dict(name='div', attrs={'class':'flol w510'}),
+        dict(name='div', attrs={'class':'main_tresc_news'})
+    ]