Various Polish recipes by Artur Stachecki

2025-07-09 03:04:10 -04:00 · 2012-11-18 23:30:47 +05:30 · 2012-11-18 23:30:47 +05:30 · 6712594a3e
commit 6712594a3e
parent 5e4f2aa6ac 1b637e7f15
12 changed files with 251 additions and 4 deletions
--- a/recipes/antyweb.recipe
+++ b/recipes/antyweb.recipe
@ -0,0 +1,48 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AntywebRecipe(BasicNewsRecipe):
    encoding = 'utf-8'
    __license__ = 'GPL v3'
    __author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
    language = 'pl'
    version = 1
    title = u'Antyweb'
    category = u'News'
    description = u'Blog o internecie i nowych technologiach'
    cover_url=''
    remove_empty_feeds= True
    auto_cleanup = False
    no_stylesheets=True
    use_embedded_content = False
    oldest_article = 1
    max_articles_per_feed = 100
    remove_javascript = True
    simultaneous_downloads = 3
    keep_only_tags =[]
    keep_only_tags.append(dict(name = 'h1', attrs = { 'class' : 'mm-article-title'}))
    keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'mm-article-content'}))
    remove_tags =[]
    remove_tags.append(dict(name = 'h2', attrs = {'class' : 'widgettitle'}))
    remove_tags.append(dict(name = 'img', attrs = {'class' : 'alignleft'}))
    remove_tags.append(dict(name = 'div', attrs = {'class' : 'float: right;margin-left:1em;margin-bottom: 0.5em;padding-bottom: 3px; width: 72px;'}))
    remove_tags.append(dict(name = 'img', attrs = {'src' : 'http://antyweb.pl/wp-content/uploads/2011/09/HOSTERSI_testy_pasek600x30.gif'}))
    remove_tags.append(dict(name = 'div', attrs = {'class' : 'podwpisowe'}))
    extra_css = '''
                    body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
                       '''
    feeds          = [
                            (u'Artykuly', u'feed://feeds.feedburner.com/Antyweb?format=xml'),
                     ]
    def preprocess_html(self, soup):
        for alink in soup.findAll('a'):
            if alink.string is not None:
               tstr = alink.string
               alink.replaceWith(tstr)
 	return soup
--- a/recipes/bankier_pl.recipe
+++ b/recipes/bankier_pl.recipe
@ -0,0 +1,50 @@
 #!/usr/bin/env  python
 __license__ = 'GPL v3'
 __author__ = 'teepel <teepel44@gmail.com>'
 '''
 bankier.pl
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class bankier(BasicNewsRecipe):
    title          = u'Bankier.pl'
    __author__ = 'teepel <teepel44@gmail.com>'
    language       = 'pl'
    description ='Polski portal finansowy. Informacje o: gospodarka, inwestowanie, finanse osobiste, prowadzenie firmy, kursy walut, notowania akcji, fundusze.'
    masthead_url='http://www.bankier.pl/gfx/hd-mid-02.gif'
    INDEX='http://bankier.pl/'
    remove_empty_feeds= True
    oldest_article = 1
    max_articles_per_feed = 100
    remove_javascript=True
    no_stylesheets=True
    simultaneous_downloads = 5
    keep_only_tags =[]
    keep_only_tags.append(dict(name = 'div', attrs = {'align' : 'left'}))
    remove_tags =[]
    remove_tags.append(dict(name = 'table', attrs = {'cellspacing' : '2'}))
    remove_tags.append(dict(name = 'div', attrs = {'align' : 'center'}))
    remove_tags.append(dict(name = 'img', attrs = {'src' : '/gfx/hd-mid-02.gif'}))
    #remove_tags.append(dict(name = 'a', attrs = {'target' : '_blank'}))
    #remove_tags.append(dict(name = 'br', attrs = {'clear' : 'all'}))
    feeds          = [
            (u'Wiadomości dnia', u'http://feeds.feedburner.com/bankier-wiadomosci-dnia'),
            (u'Finanse osobiste', u'http://feeds.feedburner.com/bankier-finanse-osobiste'),
            (u'Firma', u'http://feeds.feedburner.com/bankier-firma'),
            (u'Giełda', u'http://feeds.feedburner.com/bankier-gielda'),
            (u'Rynek walutowy', u'http://feeds.feedburner.com/bankier-rynek-walutowy'),
            (u'Komunikaty ze spółek', u'http://feeds.feedburner.com/bankier-espi'),
         ]
    def print_version(self, url):
        segment = url.split('.')
        urlPart = segment[2]
        segments = urlPart.split('-')
        urlPart2 = segments[-1]
        return 'http://www.bankier.pl/wiadomosci/print.html?article_id=' + urlPart2
--- a/recipes/f1_ultra.recipe
+++ b/recipes/f1_ultra.recipe
@ -0,0 +1,35 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class f1ultra(BasicNewsRecipe):
    title = u'Formuła 1 - F1 ultra'
    __license__ = 'GPL v3'
    __author__ = 'MrStefan <mrstefaan@gmail.com>, Artur Stachecki <artur.stachecki@gmail.com>'
    language = 'pl'
    description =u'Formuła 1, Robert Kubica, F3, GP2 oraz inne serie wyścigowe.'
    masthead_url='http://www.f1ultra.pl/templates/f1ultra/images/logo.gif'
    remove_empty_feeds= True
    oldest_article = 1
    max_articles_per_feed = 100
    remove_javascript=True
    no_stylesheets=True
    keep_only_tags =[(dict(name = 'div', attrs = {'id' : 'main'}))]
    remove_tags_after =[dict(attrs = {'style' : 'margin-top:5px;margin-bottom:5px;display: inline;'})]
    remove_tags =[(dict(attrs = {'class' : ['buttonheading', 'avPlayerContainer', 'createdate']}))]
    remove_tags.append(dict(attrs = {'title' : ['PDF', 'Drukuj', 'Email']}))
    remove_tags.append(dict(name = 'form', attrs = {'method' : 'post'}))
    remove_tags.append(dict(name = 'hr', attrs = {'size' : '2'}))
    preprocess_regexps = [(re.compile(r'align="left"'), lambda match: ''),
 		          (re.compile(r'align="right"'), lambda match: ''),
 		          (re.compile(r'width=\"*\"'), lambda match: ''),
        		  (re.compile(r'\<table .*?\>'), lambda match: '')]
    extra_css = '''.contentheading { font-size: 1.4em; font-weight: bold; }
 	           img { display: block; clear: both;}
 	        '''
    remove_attributes = ['width','height','position','float','padding-left','padding-right','padding','text-align']
    feeds = [(u'F1 Ultra', u'http://www.f1ultra.pl/index.php?option=com_rd_rss&id=1&Itemid=245')]
--- a/recipes/gazeta_pl_krakow.recipe
+++ b/recipes/gazeta_pl_krakow.recipe
@ -8,7 +8,6 @@ krakow.gazeta.pl
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class gw_krakow(BasicNewsRecipe):
    title          = u'Gazeta.pl Kraków'
@ -46,7 +45,7 @@ class gw_krakow(BasicNewsRecipe):
    remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_buttons'}))
    remove_tags_after = [dict(name = 'div', attrs = {'id' : 'gazeta_article_share'})]
-       
+
    feeds          = [(u'Wiadomości', u'http://rss.gazeta.pl/pub/rss/krakow.xml')]
    def skip_ad_pages(self, soup):
--- a/recipes/gazeta_pl_warszawa.recipe
+++ b/recipes/gazeta_pl_warszawa.recipe
@ -8,7 +8,6 @@ warszawa.gazeta.pl
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class gw_wawa(BasicNewsRecipe):
    title          = u'Gazeta.pl Warszawa'
@ -43,7 +42,7 @@ class gw_wawa(BasicNewsRecipe):
    remove_tags.append(dict(name = 'div', attrs = {'class' : 'gazeta_article_related_new'}))
    remove_tags.append(dict(name = 'div', attrs = {'class' : 'gazetaVideoPlayer'}))
    remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_miniatures'}))
-       
+
    feeds          = [(u'Wiadomości', u'http://rss.gazeta.pl/pub/rss/warszawa.xml')]
    def skip_ad_pages(self, soup):
--- a/recipes/icons/antyweb.png
+++ b/recipes/icons/antyweb.png
--- a/recipes/icons/bankier_pl.png
+++ b/recipes/icons/bankier_pl.png
--- a/recipes/icons/f1_ultra.png
+++ b/recipes/icons/f1_ultra.png
--- a/recipes/icons/myapple_pl.png
+++ b/recipes/icons/myapple_pl.png
--- a/recipes/icons/telepolis_pl.png
+++ b/recipes/icons/telepolis_pl.png
--- a/recipes/myapple_pl.recipe
+++ b/recipes/myapple_pl.recipe
@ -0,0 +1,49 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class MyAppleRecipe(BasicNewsRecipe):
    __license__ = 'GPL v3'
    __author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
    language = 'pl'
    version = 1
    title = u'MyApple.pl'
    category = u'News'
    description = u' Największy w Polsce serwis zajmujący się tematyką związaną z Apple i wszelkimi produktami tej firmy.'
    cover_url=''
    remove_empty_feeds= True
    no_stylesheets=True
    oldest_article = 7
    max_articles_per_feed = 100000
    recursions = 0
    no_stylesheets = True
    remove_javascript = True
    simultaneous_downloads = 3
    keep_only_tags =[]
    keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'article_content'}))
    remove_tags =[]
    remove_tags.append(dict(name = 'div', attrs = {'class' : 'article_author_date_comment_container'}))
    remove_tags.append(dict(name = 'div', attrs = {'class' : 'fullwidth'}))
    remove_tags.append(dict(name = 'div', attrs = {'class' : 'cmslinks'}))
    remove_tags.append(dict(name = 'div', attrs = {'class' : 'googleads-468'}))
    remove_tags.append(dict(name = 'div', attrs = {'id' : 'comments'}))
    extra_css = '''
                    body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
                    td.contentheading{font-size: large; font-weight: bold;}
                    '''
    feeds          = [
                            ('News', 'feed://myapple.pl/external.php?do=rss&type=newcontent&sectionid=1&days=120&count=10'),
                          ]
    def preprocess_html(self, soup):
        for alink in soup.findAll('a'):
            if alink.string is not None:
               tstr = alink.string
               alink.replaceWith(tstr)
        return soup
--- a/recipes/telepolis_pl.recipe
+++ b/recipes/telepolis_pl.recipe
@ -0,0 +1,67 @@
 #!/usr/bin/env  python
 __license__ = 'GPL v3'
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class telepolis(BasicNewsRecipe):
    title = u'Telepolis.pl'
    __author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
    language = 'pl'
    description = u'Twój telekomunikacyjny serwis informacyjny.\
                  Codzienne informacje, testy i artykuły,\
                  promocje, baza telefonów oraz centrum rozrywki'
    oldest_article = 7
    masthead_url = 'http://telepolis.pl/i/telepolis-logo2.gif'
    max_articles_per_feed = 100
    simultaneous_downloads = 5
    remove_javascript = True
    no_stylesheets = True
    use_embedded_content = False
    remove_tags = []
    remove_tags.append(dict(attrs={'alt': 'TELEPOLIS.pl'}))
    preprocess_regexps = [(re.compile(r'<: .*? :>'),
                           lambda match: ''),
                          (re.compile(r'<b>Zobacz:</b>.*?</a>', re.DOTALL),
                           lambda match: ''),
                          (re.compile(r'<-ankieta.*?>'),
                           lambda match: ''),
                          (re.compile(r'\(Q\!\)'),
                           lambda match: ''),
                          (re.compile(r'\(plik.*?\)'),
                           lambda match: ''),
                          (re.compile(r'<br.*?><br.*?>', re.DOTALL),
                           lambda match: '')
                          ]
    extra_css = '''.tb { font-weight: bold; font-size: 20px;}'''
    feeds = [
        (u'Wiadomości', u'http://www.telepolis.pl/rss/news.php'),
        (u'Artykuły', u'http://www.telepolis.pl/rss/artykuly.php')
    ]
    def print_version(self, url):
        if 'news.php' in url:
            print_url = url.replace('news.php', 'news_print.php')
        else:
            print_url = url.replace('artykuly.php', 'art_print.php')
        return print_url
    def preprocess_html(self, soup):
        for image in soup.findAll('img'):
            if 'm.jpg' in image['src']:
                image_big = image['src']
                image_big = image_big.replace('m.jpg', '.jpg')
                image['src'] = image_big
        logo = soup.find('tr')
        logo.extract()
        for tag in soup.findAll('tr'):
            for strings in ['Wiadomość wydrukowana', 'copyright']:
                if strings in self.tag_to_string(tag):
                    tag.extract()
        return self.adeify_images(soup)