Various Polish recipes by Artur Stachecki

2025-07-09 03:04:10 -04:00 · 2012-11-18 23:30:47 +05:30 · 2012-11-18 23:30:47 +05:30 · 6712594a3e
commit 6712594a3e
parent 5e4f2aa6ac 1b637e7f15
12 changed files with 251 additions and 4 deletions
--- a/recipes/antyweb.recipe
+++ b/recipes/antyweb.recipe
@ -0,0 +1,48 @@
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AntywebRecipe(BasicNewsRecipe):
+    encoding = 'utf-8'
+    __license__ = 'GPL v3'
+    __author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
+    language = 'pl'
+    version = 1
+    title = u'Antyweb'
+    category = u'News'
+    description = u'Blog o internecie i nowych technologiach'
+    cover_url=''
+    remove_empty_feeds= True
+    auto_cleanup = False
+    no_stylesheets=True
+    use_embedded_content = False
+    oldest_article = 1
+    max_articles_per_feed = 100
+    remove_javascript = True
+    simultaneous_downloads = 3
+
+    keep_only_tags =[]
+    keep_only_tags.append(dict(name = 'h1', attrs = { 'class' : 'mm-article-title'}))
+    keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'mm-article-content'}))
+
+
+    remove_tags =[]
+    remove_tags.append(dict(name = 'h2', attrs = {'class' : 'widgettitle'}))
+    remove_tags.append(dict(name = 'img', attrs = {'class' : 'alignleft'}))
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'float: right;margin-left:1em;margin-bottom: 0.5em;padding-bottom: 3px; width: 72px;'}))
+    remove_tags.append(dict(name = 'img', attrs = {'src' : 'http://antyweb.pl/wp-content/uploads/2011/09/HOSTERSI_testy_pasek600x30.gif'}))
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'podwpisowe'}))
+
+
+    extra_css = '''
+                    body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
+                       '''
+
+    feeds          = [
+                            (u'Artykuly', u'feed://feeds.feedburner.com/Antyweb?format=xml'),
+                     ]
+    def preprocess_html(self, soup):
+        for alink in soup.findAll('a'):
+            if alink.string is not None:
+               tstr = alink.string
+               alink.replaceWith(tstr)
+	return soup
--- a/recipes/bankier_pl.recipe
+++ b/recipes/bankier_pl.recipe
@ -0,0 +1,50 @@
+#!/usr/bin/env  python
+
+__license__ = 'GPL v3'
+__author__ = 'teepel <teepel44@gmail.com>'
+
+'''
+bankier.pl
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class bankier(BasicNewsRecipe):
+    title          = u'Bankier.pl'
+    __author__ = 'teepel <teepel44@gmail.com>'
+    language       = 'pl'
+    description ='Polski portal finansowy. Informacje o: gospodarka, inwestowanie, finanse osobiste, prowadzenie firmy, kursy walut, notowania akcji, fundusze.'
+    masthead_url='http://www.bankier.pl/gfx/hd-mid-02.gif'
+    INDEX='http://bankier.pl/'
+    remove_empty_feeds= True
+    oldest_article = 1
+    max_articles_per_feed = 100
+    remove_javascript=True
+    no_stylesheets=True
+    simultaneous_downloads = 5
+
+    keep_only_tags =[]
+    keep_only_tags.append(dict(name = 'div', attrs = {'align' : 'left'}))
+
+    remove_tags =[]
+    remove_tags.append(dict(name = 'table', attrs = {'cellspacing' : '2'}))
+    remove_tags.append(dict(name = 'div', attrs = {'align' : 'center'}))
+    remove_tags.append(dict(name = 'img', attrs = {'src' : '/gfx/hd-mid-02.gif'}))
+    #remove_tags.append(dict(name = 'a', attrs = {'target' : '_blank'}))
+    #remove_tags.append(dict(name = 'br', attrs = {'clear' : 'all'}))
+
+    feeds          = [
+            (u'Wiadomości dnia', u'http://feeds.feedburner.com/bankier-wiadomosci-dnia'),
+            (u'Finanse osobiste', u'http://feeds.feedburner.com/bankier-finanse-osobiste'),
+            (u'Firma', u'http://feeds.feedburner.com/bankier-firma'),
+            (u'Giełda', u'http://feeds.feedburner.com/bankier-gielda'),
+            (u'Rynek walutowy', u'http://feeds.feedburner.com/bankier-rynek-walutowy'),
+            (u'Komunikaty ze spółek', u'http://feeds.feedburner.com/bankier-espi'),
+         ]
+    def print_version(self, url):
+        segment = url.split('.')
+        urlPart = segment[2]
+        segments = urlPart.split('-')
+        urlPart2 = segments[-1]
+        return 'http://www.bankier.pl/wiadomosci/print.html?article_id=' + urlPart2
+
--- a/recipes/f1_ultra.recipe
+++ b/recipes/f1_ultra.recipe
@ -0,0 +1,35 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class f1ultra(BasicNewsRecipe):
+    title = u'Formuła 1 - F1 ultra'
+    __license__ = 'GPL v3'
+    __author__ = 'MrStefan <mrstefaan@gmail.com>, Artur Stachecki <artur.stachecki@gmail.com>'
+    language = 'pl'
+    description =u'Formuła 1, Robert Kubica, F3, GP2 oraz inne serie wyścigowe.'
+    masthead_url='http://www.f1ultra.pl/templates/f1ultra/images/logo.gif'
+    remove_empty_feeds= True
+    oldest_article = 1
+    max_articles_per_feed = 100
+    remove_javascript=True
+    no_stylesheets=True
+
+    keep_only_tags =[(dict(name = 'div', attrs = {'id' : 'main'}))]
+    remove_tags_after =[dict(attrs = {'style' : 'margin-top:5px;margin-bottom:5px;display: inline;'})]
+    remove_tags =[(dict(attrs = {'class' : ['buttonheading', 'avPlayerContainer', 'createdate']}))]
+    remove_tags.append(dict(attrs = {'title' : ['PDF', 'Drukuj', 'Email']}))
+    remove_tags.append(dict(name = 'form', attrs = {'method' : 'post'}))
+    remove_tags.append(dict(name = 'hr', attrs = {'size' : '2'}))
+    
+    preprocess_regexps = [(re.compile(r'align="left"'), lambda match: ''),
+		          (re.compile(r'align="right"'), lambda match: ''),
+		          (re.compile(r'width=\"*\"'), lambda match: ''),
+        		  (re.compile(r'\<table .*?\>'), lambda match: '')]
+  
+
+    extra_css = '''.contentheading { font-size: 1.4em; font-weight: bold; }
+	           img { display: block; clear: both;}
+	        '''
+    remove_attributes = ['width','height','position','float','padding-left','padding-right','padding','text-align']
+
+    feeds = [(u'F1 Ultra', u'http://www.f1ultra.pl/index.php?option=com_rd_rss&id=1&Itemid=245')]
--- a/recipes/gazeta_pl_krakow.recipe
+++ b/recipes/gazeta_pl_krakow.recipe
@ -8,7 +8,6 @@ krakow.gazeta.pl
 '''

 from calibre.web.feeds.news import BasicNewsRecipe
-import re

 class gw_krakow(BasicNewsRecipe):
    title          = u'Gazeta.pl Kraków'
--- a/recipes/gazeta_pl_warszawa.recipe
+++ b/recipes/gazeta_pl_warszawa.recipe
@ -8,7 +8,6 @@ warszawa.gazeta.pl
 '''

 from calibre.web.feeds.news import BasicNewsRecipe
-import re

 class gw_wawa(BasicNewsRecipe):
    title          = u'Gazeta.pl Warszawa'
--- a/recipes/icons/antyweb.png
+++ b/recipes/icons/antyweb.png
--- a/recipes/icons/bankier_pl.png
+++ b/recipes/icons/bankier_pl.png
--- a/recipes/icons/f1_ultra.png
+++ b/recipes/icons/f1_ultra.png
--- a/recipes/icons/myapple_pl.png
+++ b/recipes/icons/myapple_pl.png
--- a/recipes/icons/telepolis_pl.png
+++ b/recipes/icons/telepolis_pl.png
--- a/recipes/myapple_pl.recipe
+++ b/recipes/myapple_pl.recipe
@ -0,0 +1,49 @@
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class MyAppleRecipe(BasicNewsRecipe):
+    __license__ = 'GPL v3'
+    __author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
+    language = 'pl'
+    version = 1
+
+    title = u'MyApple.pl'
+    category = u'News'
+    description = u' Największy w Polsce serwis zajmujący się tematyką związaną z Apple i wszelkimi produktami tej firmy.'
+    cover_url=''
+    remove_empty_feeds= True
+    no_stylesheets=True
+    oldest_article = 7
+    max_articles_per_feed = 100000
+    recursions = 0
+
+    no_stylesheets = True
+    remove_javascript = True
+    simultaneous_downloads = 3
+
+    keep_only_tags =[]
+    keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'article_content'}))
+
+    remove_tags =[]
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'article_author_date_comment_container'}))
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'fullwidth'}))
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'cmslinks'}))
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'googleads-468'}))
+    remove_tags.append(dict(name = 'div', attrs = {'id' : 'comments'}))
+
+
+    extra_css = '''
+                    body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
+                    td.contentheading{font-size: large; font-weight: bold;}
+                    '''
+
+    feeds          = [
+                            ('News', 'feed://myapple.pl/external.php?do=rss&type=newcontent&sectionid=1&days=120&count=10'),
+                          ]
+
+    def preprocess_html(self, soup):
+        for alink in soup.findAll('a'):
+            if alink.string is not None:
+               tstr = alink.string
+               alink.replaceWith(tstr)
+        return soup
--- a/recipes/telepolis_pl.recipe
+++ b/recipes/telepolis_pl.recipe
@ -0,0 +1,67 @@
+#!/usr/bin/env  python
+
+__license__ = 'GPL v3'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+
+class telepolis(BasicNewsRecipe):
+    title = u'Telepolis.pl'
+    __author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
+    language = 'pl'
+    description = u'Twój telekomunikacyjny serwis informacyjny.\
+                  Codzienne informacje, testy i artykuły,\
+                  promocje, baza telefonów oraz centrum rozrywki'
+    oldest_article = 7
+    masthead_url = 'http://telepolis.pl/i/telepolis-logo2.gif'
+    max_articles_per_feed = 100
+    simultaneous_downloads = 5
+    remove_javascript = True
+    no_stylesheets = True
+    use_embedded_content = False
+
+    remove_tags = []
+    remove_tags.append(dict(attrs={'alt': 'TELEPOLIS.pl'}))
+
+    preprocess_regexps = [(re.compile(r'<: .*? :>'),
+                           lambda match: ''),
+                          (re.compile(r'<b>Zobacz:</b>.*?</a>', re.DOTALL),
+                           lambda match: ''),
+                          (re.compile(r'<-ankieta.*?>'),
+                           lambda match: ''),
+                          (re.compile(r'\(Q\!\)'),
+                           lambda match: ''),
+                          (re.compile(r'\(plik.*?\)'),
+                           lambda match: ''),
+                          (re.compile(r'<br.*?><br.*?>', re.DOTALL),
+                           lambda match: '')
+                          ]
+
+    extra_css = '''.tb { font-weight: bold; font-size: 20px;}'''
+
+    feeds = [
+        (u'Wiadomości', u'http://www.telepolis.pl/rss/news.php'),
+        (u'Artykuły', u'http://www.telepolis.pl/rss/artykuly.php')
+    ]
+
+    def print_version(self, url):
+        if 'news.php' in url:
+            print_url = url.replace('news.php', 'news_print.php')
+        else:
+            print_url = url.replace('artykuly.php', 'art_print.php')
+        return print_url
+
+    def preprocess_html(self, soup):
+        for image in soup.findAll('img'):
+            if 'm.jpg' in image['src']:
+                image_big = image['src']
+                image_big = image_big.replace('m.jpg', '.jpg')
+                image['src'] = image_big
+        logo = soup.find('tr')
+        logo.extract()
+        for tag in soup.findAll('tr'):
+            for strings in ['Wiadomość wydrukowana', 'copyright']:
+                if strings in self.tag_to_string(tag):
+                    tag.extract()
+        return self.adeify_images(soup)