Update various Polish news sources

2025-07-09 03:04:10 -04:00 · 2013-03-06 08:19:22 +05:30 · 2013-03-06 08:19:22 +05:30 · 218de3d35c
commit 218de3d35c
parent ac5d96f7f3 71529d52d5
28 changed files with 209 additions and 213 deletions
--- a/recipes/di.recipe
+++ b/recipes/di.recipe
@ -1,6 +1,6 @@
 #!/usr/bin/env  python

-__license__	= 'GPL v3'
+__license__     = 'GPL v3'
 __author__ = 'Mori'
 __version__ = 'v. 0.5'
 '''
@ -11,56 +11,56 @@ from calibre.web.feeds.news import BasicNewsRecipe
 import re

 class DziennikInternautowRecipe(BasicNewsRecipe):
-	__author__ = 'Mori'
-	language = 'pl'
+    __author__ = 'Mori'
+    language = 'pl'

-	title = u'Dziennik Internautow'
-	publisher = u'Dziennik Internaut\u00f3w Sp. z o.o.'
-	description = u'Internet w \u017cyciu i biznesie. Porady, wywiady, interwencje, bezpiecze\u0144stwo w Sieci, technologia.'
+    title = u'Dziennik Internautow'
+    publisher = u'Dziennik Internaut\u00f3w Sp. z o.o.'
+    description = u'Internet w \u017cyciu i biznesie. Porady, wywiady, interwencje, bezpiecze\u0144stwo w Sieci, technologia.'

-	max_articles_per_feed = 100
-	oldest_article = 7
-	cover_url = 'http://di.com.pl/pic/logo_di_norm.gif'
+    max_articles_per_feed = 100
+    oldest_article = 7
+    cover_url = 'http://di.com.pl/pic/logo_di_norm.gif'

-	no_stylesheets = True
-	remove_javascript = True
-	encoding = 'utf-8'
+    no_stylesheets = True
+    remove_javascript = True
+    encoding = 'utf-8'

-	extra_css = '''
-		.fotodesc{font-size: 75%;}
-		.pub_data{font-size: 75%;}
-		.fotonews{clear: both; padding-top: 10px; padding-bottom: 10px;}
-		#pub_foto{font-size: 75%; float: left; padding-right: 10px;}
-	'''
+    extra_css = '''
+            .fotodesc{font-size: 75%;}
+            .pub_data{font-size: 75%;}
+            .fotonews{clear: both; padding-top: 10px; padding-bottom: 10px;}
+            #pub_foto{font-size: 75%; float: left; padding-right: 10px;}
+    '''

-	feeds = [
-		(u'Dziennik Internaut\u00f3w', u'http://feeds.feedburner.com/glowny-di')
-	]
+    feeds = [
+            (u'Dziennik Internaut\u00f3w', u'http://feeds.feedburner.com/glowny-di')
+    ]

-	keep_only_tags = [
-		dict(name = 'div', attrs = {'id' : 'pub_head'}),
-		dict(name = 'div', attrs = {'id' : 'pub_content'})
-	]
+    keep_only_tags = [
+            dict(name = 'div', attrs = {'id' : 'pub_head'}),
+            dict(name = 'div', attrs = {'id' : 'pub_content'})
+    ]

-	remove_tags = [
-		dict(name = 'div', attrs = {'class' : 'poradniki_context'}),
-		dict(name = 'div', attrs = {'class' : 'uniBox'}),
-		dict(name = 'object', attrs = {}),
-		dict(name = 'h3', attrs = {}),
-		dict(attrs={'class':'twitter-share-button'})
-	]
+    remove_tags = [
+            dict(name = 'div', attrs = {'class' : 'poradniki_context'}),
+            dict(name = 'div', attrs = {'class' : 'uniBox'}),
+            dict(name = 'object', attrs = {}),
+            dict(name = 'h3', attrs = {}),
+            dict(attrs={'class':'twitter-share-button'})
+    ]

-	preprocess_regexps = [
-		(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in 
-		[
-			(r', <a href="http://di.com.pl/komentarze,.*?</div>', lambda match: '</div>'),
-			(r'<div class="fotonews".*?">', lambda match: '<div class="fotonews">'),
-			(r'http://di.com.pl/pic/photo/mini/', lambda match: 'http://di.com.pl/pic/photo/oryginal/'),
-			(r'\s*</', lambda match: '</'),
-		]
-	]
+    preprocess_regexps = [
+            (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
+            [
+                    (r', <a href="http://di.com.pl/komentarze,.*?</div>', lambda match: '</div>'),
+                    (r'<div class="fotonews".*?">', lambda match: '<div class="fotonews">'),
+                    (r'http://di.com.pl/pic/photo/mini/', lambda match: 'http://di.com.pl/pic/photo/oryginal/'),
+                    (r'\s*</', lambda match: '</'),
+            ]
+    ]

-	def skip_ad_pages(self, soup):
-		if 'Advertisement' in soup.title:
-			nexturl=soup.find('a')['href']
-			return self.index_to_soup(nexturl, raw=True)
+    def skip_ad_pages(self, soup):
+        if 'Advertisement' in soup.title:
+            nexturl=soup.find('a')['href']
+            return self.index_to_soup(nexturl, raw=True)
--- a/recipes/eclicto.recipe
+++ b/recipes/eclicto.recipe
@ -1,8 +1,6 @@
 #!/usr/bin/env  python

-__license__	= 'GPL v3'
-__author__ = 'Mori'
-__version__ = 'v. 0.1'
+__license__     = 'GPL v3'
 '''
 blog.eclicto.pl
 '''
@ -11,39 +9,39 @@ from calibre.web.feeds.news import BasicNewsRecipe
 import re

 class BlogeClictoRecipe(BasicNewsRecipe):
-	__author__ = 'Mori'
-	language = 'pl'
+    __author__ = 'Mori, Tomasz Długosz'
+    language = 'pl'

-	title = u'Blog eClicto'
-	publisher = u'Blog eClicto'
-	description = u'Blog o e-papierze i e-bookach'
+    title = u'Blog eClicto'
+    publisher = u'Blog eClicto'
+    description = u'Blog o e-papierze i e-bookach'

-	max_articles_per_feed = 100
-	cover_url = 'http://blog.eclicto.pl/wordpress/wp-content/themes/blog_eclicto/g/logo.gif'
+    max_articles_per_feed = 100
+    cover_url = 'http://blog.eclicto.pl/wordpress/wp-content/themes/blog_eclicto/g/logo.gif'

-	no_stylesheets = True
-	remove_javascript = True
-	encoding = 'utf-8'
+    no_stylesheets = True
+    remove_javascript = True
+    encoding = 'utf-8'

-	extra_css = '''
-		img{float: left; padding-right: 10px; padding-bottom: 5px;}
-	'''
+    extra_css = '''
+            img{float: left; padding-right: 10px; padding-bottom: 5px;}
+    '''

-	feeds = [
-		(u'Blog eClicto', u'http://blog.eclicto.pl/feed/')
-	]
+    feeds = [
+            (u'Blog eClicto', u'http://blog.eclicto.pl/feed/')
+    ]

-	remove_tags = [
-		dict(name = 'span', attrs = {'id' : 'tags'})
-	]
+    remove_tags = [
+            dict(name = 'div', attrs = {'class' : 'social_bookmark'}),
+    ]

-	remove_tags_after = [
-		dict(name = 'div', attrs = {'class' : 'post'})
-	]
+    keep_only_tags = [
+            dict(name = 'div', attrs = {'class' : 'post'})
+    ]

-	preprocess_regexps = [
-		(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in 
-		[
-			(r'\s*</', lambda match: '</'),
-		]
-	]
+    preprocess_regexps = [
+            (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
+            [
+                    (r'\s*</', lambda match: '</'),
+            ]
+    ]
--- a/recipes/f1_ultra.recipe
+++ b/recipes/f1_ultra.recipe
@ -22,14 +22,14 @@ class f1ultra(BasicNewsRecipe):
    remove_tags.append(dict(name = 'hr', attrs = {'size' : '2'}))

    preprocess_regexps = [(re.compile(r'align="left"'), lambda match: ''),
-		          (re.compile(r'align="right"'), lambda match: ''),
-		          (re.compile(r'width=\"*\"'), lambda match: ''),
-        		  (re.compile(r'\<table .*?\>'), lambda match: '')]
+                          (re.compile(r'align="right"'), lambda match: ''),
+                          (re.compile(r'width=\"*\"'), lambda match: ''),
+                          (re.compile(r'\<table .*?\>'), lambda match: '')]


    extra_css = '''.contentheading { font-size: 1.4em; font-weight: bold; }
-	           img { display: block; clear: both;}
-	        '''
+                   img { display: block; clear: both;}
+                '''
    remove_attributes = ['width','height','position','float','padding-left','padding-right','padding','text-align']

    feeds = [(u'F1 Ultra', u'http://www.f1ultra.pl/index.php?option=com_rd_rss&id=1&Itemid=245')]
--- a/recipes/icons/astronomia_pl.png
+++ b/recipes/icons/astronomia_pl.png
--- a/recipes/icons/bash_org_pl.png
+++ b/recipes/icons/bash_org_pl.png
--- a/recipes/icons/cgm_pl.png
+++ b/recipes/icons/cgm_pl.png
--- a/recipes/icons/czas_gentlemanow.png
+++ b/recipes/icons/czas_gentlemanow.png
--- a/recipes/icons/dziennik_pl.png
+++ b/recipes/icons/dziennik_pl.png
--- a/recipes/icons/eso_pl.png
+++ b/recipes/icons/eso_pl.png
--- a/recipes/icons/film_web.png
+++ b/recipes/icons/film_web.png
--- a/recipes/icons/gameplay_pl.png
+++ b/recipes/icons/gameplay_pl.png
--- a/recipes/icons/in4_pl.png
+++ b/recipes/icons/in4_pl.png
--- a/recipes/icons/kresy_pl.png
+++ b/recipes/icons/kresy_pl.png
--- a/recipes/icons/lomza.png
+++ b/recipes/icons/lomza.png
--- a/recipes/icons/mlody_technik_pl.png
+++ b/recipes/icons/mlody_technik_pl.png
--- a/recipes/icons/national_geographic_pl.png
+++ b/recipes/icons/national_geographic_pl.png
--- a/recipes/icons/oclab_pl.png
+++ b/recipes/icons/oclab_pl.png
--- a/recipes/icons/pc_centre_pl.png
+++ b/recipes/icons/pc_centre_pl.png
--- a/recipes/icons/spiders_web_pl.png
+++ b/recipes/icons/spiders_web_pl.png
--- a/recipes/interia_fakty.recipe
+++ b/recipes/interia_fakty.recipe
@ -35,5 +35,5 @@ class InteriaFakty(BasicNewsRecipe):
        dict(name='span', attrs={'class':'keywords'})]

    extra_css = '''
-	    h2 { font-size: 1.2em; }
-	'''
+            h2 { font-size: 1.2em; }
+        '''
--- a/recipes/kopalniawiedzy.recipe
+++ b/recipes/kopalniawiedzy.recipe
@ -6,74 +6,74 @@ import re
 from calibre.web.feeds.recipes import BasicNewsRecipe

 class KopalniaWiedzy(BasicNewsRecipe):
-        title          = u'Kopalnia Wiedzy'
-        publisher      = u'Kopalnia Wiedzy'
-        description    = u'Ciekawostki ze świata nauki i techniki'
-        encoding       = 'utf-8'
-        __author__     = 'Attis & Tomasz Długosz'
-        language       = 'pl'
-        oldest_article = 7
-        max_articles_per_feed = 100
-        INDEX          = u'http://kopalniawiedzy.pl/'
-        remove_javascript     = True
-        no_stylesheets        = True
+    title          = u'Kopalnia Wiedzy'
+    publisher      = u'Kopalnia Wiedzy'
+    description    = u'Ciekawostki ze świata nauki i techniki'
+    encoding       = 'utf-8'
+    __author__     = 'Attis & Tomasz Długosz'
+    language       = 'pl'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    INDEX          = u'http://kopalniawiedzy.pl/'
+    remove_javascript     = True
+    no_stylesheets        = True

-        remove_tags    = [{'name':'p', 'attrs': {'class': 'keywords'}}, {'name':'div', 'attrs': {'class':'sexy-bookmarks sexy-bookmarks-bg-caring'}}, {'name':'div', 'attrs': {'class':'article-time-and-cat'}}, {'name':'p', 'attrs': {'class':'tags'}}]
-        remove_tags_after = dict(attrs={'class':'ad-square'})
-        keep_only_tags    = [dict(name="div", attrs={'class':'article-text text-small'})]
-        extra_css      = '.topimage {margin-top: 30px}'
+    remove_tags    = [{'name':'p', 'attrs': {'class': 'keywords'}}, {'name':'div', 'attrs': {'class':'sexy-bookmarks sexy-bookmarks-bg-caring'}}, {'name':'div', 'attrs': {'class':'article-time-and-cat'}}, {'name':'p', 'attrs': {'class':'tags'}}]
+    remove_tags_after = dict(attrs={'class':'ad-square'})
+    keep_only_tags    = [dict(name="div", attrs={'class':'article-text text-small'})]
+    extra_css      = '.topimage {margin-top: 30px}'

-        preprocess_regexps = [
-                (re.compile(u'<a .* rel="lightboxText" .*><img (.*)></a>'),
-                lambda match: '<img class="topimage" ' + match.group(1) + '>' ),
-                (re.compile(u'<br  /><br  />'),
-                lambda match: '<br\/>')
-            ]
-
-        feeds = [
-            (u'Biologia', u'http://kopalniawiedzy.pl/wiadomosci_biologia.rss'),
-            (u'Medycyna', u'http://kopalniawiedzy.pl/wiadomosci_medycyna.rss'),
-            (u'Psychologia', u'http://kopalniawiedzy.pl/wiadomosci_psychologia.rss'),
-            (u'Technologie', u'http://kopalniawiedzy.pl/wiadomosci_technologie.rss'),
-            (u'Ciekawostki', u'http://kopalniawiedzy.pl/wiadomosci_ciekawostki.rss'),
-            (u'Artykuły', u'http://kopalniawiedzy.pl/artykuly.rss')
+    preprocess_regexps = [
+            (re.compile(u'<a .* rel="lightboxText" .*><img (.*)></a>'),
+            lambda match: '<img class="topimage" ' + match.group(1) + '>' ),
+            (re.compile(u'<br  /><br  />'),
+            lambda match: '<br\/>')
        ]

-        def is_link_wanted(self, url, tag):
-            return tag['class'] == 'next'
+    feeds = [
+        (u'Biologia', u'http://kopalniawiedzy.pl/wiadomosci_biologia.rss'),
+        (u'Medycyna', u'http://kopalniawiedzy.pl/wiadomosci_medycyna.rss'),
+        (u'Psychologia', u'http://kopalniawiedzy.pl/wiadomosci_psychologia.rss'),
+        (u'Technologie', u'http://kopalniawiedzy.pl/wiadomosci_technologie.rss'),
+        (u'Ciekawostki', u'http://kopalniawiedzy.pl/wiadomosci_ciekawostki.rss'),
+        (u'Artykuły', u'http://kopalniawiedzy.pl/artykuly.rss')
+    ]

-        def remove_beyond(self, tag, next):
-                while tag is not None and getattr(tag, 'name', None) != 'body':
-                        after = getattr(tag, next)
-                        while after is not None:
-                                ns = getattr(tag, next)
-                                after.extract()
-                                after = ns
-                        tag = tag.parent
+    def is_link_wanted(self, url, tag):
+        return tag['class'] == 'next'

-        def append_page(self, soup, appendtag, position):
-                pager = soup.find('a',attrs={'class':'next'})
-                if pager:
-                    nexturl = self.INDEX + pager['href']
-                    soup2 = self.index_to_soup(nexturl)
-                    texttag = soup2.find('div', attrs={'id':'articleContent'})
+    def remove_beyond(self, tag, next):
+        while tag is not None and getattr(tag, 'name', None) != 'body':
+            after = getattr(tag, next)
+            while after is not None:
+                ns = getattr(tag, next)
+                after.extract()
+                after = ns
+            tag = tag.parent

-                    tag = texttag.find(attrs={'class':'pages'})
-                    self.remove_beyond(tag, 'nextSibling')
+    def append_page(self, soup, appendtag, position):
+        pager = soup.find('a',attrs={'class':'next'})
+        if pager:
+            nexturl = self.INDEX + pager['href']
+            soup2 = self.index_to_soup(nexturl)
+            texttag = soup2.find('div', attrs={'id':'articleContent'})

-                    newpos = len(texttag.contents)
-                    self.append_page(soup2,texttag,newpos)
+            tag = texttag.find(attrs={'class':'pages'})
+            self.remove_beyond(tag, 'nextSibling')

-                    appendtag.insert(position,texttag)
+            newpos = len(texttag.contents)
+            self.append_page(soup2,texttag,newpos)
+
+            appendtag.insert(position,texttag)


-        def preprocess_html(self, soup):
-                self.append_page(soup, soup.body, 3)
+    def preprocess_html(self, soup):
+        self.append_page(soup, soup.body, 3)

-                for item in soup.findAll('div',attrs={'class':'pages'}):
-                    item.extract()
+        for item in soup.findAll('div',attrs={'class':'pages'}):
+            item.extract()

-                for item in soup.findAll('p', attrs={'class':'wykop'}):
-                    item.extract()
+        for item in soup.findAll('p', attrs={'class':'wykop'}):
+            item.extract()

-                return soup
+        return soup
--- a/recipes/korespondent.recipe
+++ b/recipes/korespondent.recipe
@ -24,17 +24,16 @@ class KorespondentPL(BasicNewsRecipe):
    extra_css      = '.naglowek {font-size: small}\n .tytul {font-size: x-large; padding-bottom: 10px; padding-top: 30px} \n .external {font-size: small}'

    preprocess_regexps = [
-			(re.compile(u'<a href="index\.php.*>(.*)</a>'),
-			lambda match: match.group(1) ),
-			(re.compile(u'<i>'),
-			lambda match:'<i class="external">' ),
-			(re.compile(u'<p></p>Więcej'),
-			lambda match:'Więcej' ),
-			(re.compile(u'target="_blank"'),
-			lambda match:'target="_blank" class="external"' ),
-			(re.compile(u'<p align="center">\nPoczytaj inne teksty w <a href="http://www.korespondent.pl">Serwisie wolnorynkowym Korespondent.pl</a>.*</body>', re.DOTALL|re.IGNORECASE),
-			lambda match: '</div></body>'),
-			]
+                        (re.compile(u'<a href="index\.php.*>(.*)</a>'),
+                        lambda match: match.group(1) ),
+                        (re.compile(u'<i>'),
+                        lambda match:'<i class="external">' ),
+                        (re.compile(u'<p></p>Więcej'),
+                        lambda match:'Więcej' ),
+                        (re.compile(u'target="_blank"'),
+                        lambda match:'target="_blank" class="external"' ),
+                        (re.compile(u'<p align="center">\nPoczytaj inne teksty w <a href="http://www.korespondent.pl">Serwisie wolnorynkowym Korespondent.pl</a>.*</body>', re.DOTALL|re.IGNORECASE),
+                        lambda match: '</div></body>'),
+                        ]

    feeds = [(u'Serwis informacyjny', u'http://korespondent.pl/rss.xml')]
-
--- a/recipes/legeartis.recipe
+++ b/recipes/legeartis.recipe
@ -1,6 +1,6 @@
 #!/usr/bin/env  python

-__license__	= 'GPL v3'
+__license__     = 'GPL v3'
 __author__ = 'Mori'
 __version__ = 'v. 0.1'
 '''
@ -10,34 +10,34 @@ olgierd.bblog.pl
 from calibre.web.feeds.news import BasicNewsRecipe

 class LegeArtisRecipe(BasicNewsRecipe):
-	__author__ = 'Mori'
-	language = 'pl'
+    __author__ = 'Mori'
+    language = 'pl'

-	title = u'Lege Artis'
-	publisher = u'Olgierd Rudak'
-	description = u'Wszystko, co chcieliby\u015bcie wiedzie\u0107 o prawie, ale wstydzicie si\u0119 zapyta\u0107'
+    title = u'Lege Artis'
+    publisher = u'Olgierd Rudak'
+    description = u'Wszystko, co chcieliby\u015bcie wiedzie\u0107 o prawie, ale wstydzicie si\u0119 zapyta\u0107'

-	max_articles_per_feed = 100
+    max_articles_per_feed = 100

-	no_stylesheets = True
-	remove_javascript = True
+    no_stylesheets = True
+    remove_javascript = True

-	extra_css = '''
-		img{clear: both;}
-	'''
+    extra_css = '''
+            img{clear: both;}
+    '''

-	feeds = [
-		(u'Lege Artis', u'http://olgierd.bblog.pl/rss/rss20.xml')
-	]
+    feeds = [
+            (u'Lege Artis', u'http://olgierd.bblog.pl/rss/rss20.xml')
+    ]

-	keep_only_tags = [
-		dict(name = 'div', attrs = {'class' : 'post_title'}),
-		dict(name = 'div', attrs = {'class' : 'post_date'}),
-		dict(name = 'div', attrs = {'class' : 'post_content'})
-	]
+    keep_only_tags = [
+            dict(name = 'div', attrs = {'class' : 'post_title'}),
+            dict(name = 'div', attrs = {'class' : 'post_date'}),
+            dict(name = 'div', attrs = {'class' : 'post_content'})
+    ]

-	remove_tags = [
-		dict(name = 'div', attrs = {'id' : 'bb_tools'}),
-		dict(name = 'div', attrs = {'class' : 'post_comments'}),
-		dict(name = 'object', attrs = {})
-	]
+    remove_tags = [
+            dict(name = 'div', attrs = {'id' : 'bb_tools'}),
+            dict(name = 'div', attrs = {'class' : 'post_comments'}),
+            dict(name = 'object', attrs = {})
+    ]
--- a/recipes/legitymizm.recipe
+++ b/recipes/legitymizm.recipe
@ -47,4 +47,3 @@ class Legitymizm(BasicNewsRecipe):
        #cytat p.sentencja:first-letter { font-size: 44px; line-height: 33px; margin: 0 2px 0 0; font-style: normal; float: left; display: block; }
        p.autor { text-transform: uppercase; color: #898981; font-style: normal; text-align: left; }
    '''
-
--- a/recipes/spiders_web_pl.recipe
+++ b/recipes/spiders_web_pl.recipe
@ -4,7 +4,7 @@ class SpidersWeb(BasicNewsRecipe):
    title          = u"Spider's Web"
    oldest_article = 7
    __author__        = 'fenuks'
-    description   = u''
+    description   = u'Opinie i analizy na temat technologii'
    cover_url      = 'http://www.spidersweb.pl/wp-content/themes/new_sw/images/spidersweb.png'
    category       = 'IT, WEB'
    language       = 'pl'