add some recipes and icons from kalibrator, part 1

2025-07-09 03:04:10 -04:00 · 2012-11-09 00:05:46 +01:00 · 2012-11-09 00:05:46 +01:00 · 98b7cd3e4b
commit 98b7cd3e4b
parent 5d9b34cd17
13 changed files with 341 additions and 0 deletions
--- a/recipes/icons/mateusz_czytania.png
+++ b/recipes/icons/mateusz_czytania.png
--- a/recipes/icons/rushisaband.png
+++ b/recipes/icons/rushisaband.png
--- a/recipes/icons/rynek_infrastruktury.png
+++ b/recipes/icons/rynek_infrastruktury.png
--- a/recipes/icons/rynek_kolejowy.png
+++ b/recipes/icons/rynek_kolejowy.png
--- a/recipes/icons/satkurier.png
+++ b/recipes/icons/satkurier.png
--- a/recipes/kerrang.recipe
+++ b/recipes/kerrang.recipe
@ -0,0 +1,36 @@
+#!/usr/bin/env  python
+
+__license__ = 'GPL v3'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+
+class kerrang(BasicNewsRecipe):
+    title = u'Kerrang!'
+    __author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
+    language = 'en'
+    description = u'UK-based magazine devoted to rock music published by Bauer Media Group'
+    oldest_article = 7
+    masthead_url = 'http://images.kerrang.com/design/kerrang/kerrangsite/logo.gif'
+    max_articles_per_feed = 100
+    simultaneous_downloads = 5
+    remove_javascript = True
+    no_stylesheets = True
+    use_embedded_content = False
+    recursions = 0
+
+    keep_only_tags = []
+    keep_only_tags.append(dict(attrs = {'class' : ['headz', 'blktxt']}))
+
+    extra_css = '''       img { display: block; margin-right: auto;}
+                        h1 {text-align: left; font-size: 22px;}'''
+
+    feeds = [(u'News', u'http://www.kerrang.com/blog/rss.xml')]
+
+    def preprocess_html(self, soup):
+         for alink in soup.findAll('a'):
+            if alink.string is not None:
+                tstr = alink.string
+                alink.replaceWith(tstr)
+        return soup
--- a/recipes/lequipe.recipe
+++ b/recipes/lequipe.recipe
@ -0,0 +1,46 @@
+#!/usr/bin/env  python
+
+__license__ = 'GPL v3'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+
+class leequipe(BasicNewsRecipe):
+    title = u'l\'equipe'
+    __author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
+    language = 'fr'
+    description = u'Retrouvez tout le sport en direct sur le site de L\'EQUIPE et suivez l\'actualité du football, rugby, basket, cyclisme, f1, volley, hand, tous les résultats sportifs'
+    oldest_article = 1
+    masthead_url = 'http://static.lequipe.fr/v6/img/logo-lequipe.png'
+    max_articles_per_feed = 100
+    simultaneous_downloads = 5
+    remove_javascript = True
+    no_stylesheets = True
+    use_embedded_content = False
+    recursions = 0
+
+    keep_only_tags = []
+    keep_only_tags.append(dict(attrs={'id': ['article']}))
+
+    remove_tags = []
+    remove_tags.append(dict(attrs={'id': ['partage', 'ensavoirplus', 'bloc_bas_breve', 'commentaires', 'tools']}))
+    remove_tags.append(dict(attrs={'class': ['partage_bis', 'date']}))
+
+    feeds = [(u'Football', u'http://www.lequipe.fr/rss/actu_rss_Football.xml'),
+             (u'Auto-Moto', u'http://www.lequipe.fr/rss/actu_rss_Auto-Moto.xml'),
+             (u'Tennis', u'http://www.lequipe.fr/rss/actu_rss_Tennis.xml'),
+             (u'Golf', u'http://www.lequipe.fr/rss/actu_rss_Golf.xml'),
+             (u'Rugby', u'http://www.lequipe.fr/rss/actu_rss_Rugby.xml'),
+             (u'Basket', u'http://www.lequipe.fr/rss/actu_rss_Basket.xml'),
+             (u'Hand', u'http://www.lequipe.fr/rss/actu_rss_Hand.xml'),
+             (u'Cyclisme', u'http://www.lequipe.fr/rss/actu_rss_Cyclisme.xml'),
+             (u'Autres Sports', u'http://pipes.yahoo.com/pipes/pipe.run?_id=2039f7f4f350c70c5e4e8633aa1b37cd&_render=rss')
+             ]
+
+    def preprocess_html(self, soup):
+        for alink in soup.findAll('a'):
+            if alink.string is not None:
+                tstr = alink.string
+                alink.replaceWith(tstr)
+        return soup
--- a/recipes/mateusz_czytania.recipe
+++ b/recipes/mateusz_czytania.recipe
@ -0,0 +1,37 @@
+#!/usr/bin/env  python
+
+__license__ = 'GPL v3'
+__author__ = 'teepel <teepel44@gmail.com>'
+
+'''
+http://www.mateusz.pl/czytania
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class czytania_mateusz(BasicNewsRecipe):
+    title          = u'Czytania na ka\u017cdy dzie\u0144'
+    __author__ = 'teepel <teepel44@gmail.com>'
+    description    = u'Codzienne czytania z jednego z najstarszych polskich serwisów katolickich.'
+    language = 'pl'
+    INDEX='http://www.mateusz.pl/czytania'
+    oldest_article = 1
+    remove_empty_feeds= True
+    no_stylesheets=True
+    auto_cleanup = True
+    remove_javascript = True
+    simultaneous_downloads = 2
+    max_articles_per_feed = 100
+    auto_cleanup = True
+
+    feeds          = [(u'Czytania', u'http://mateusz.pl/rss/czytania/')]
+
+    remove_tags =[]
+    remove_tags.append(dict(name = 'p', attrs = {'class' : 'top'}))
+
+    #thanks t3d
+    def get_article_url(self, article):
+        link = article.get('link')
+        if 'kmt.pl' not in link:
+            return link
--- a/recipes/naszdziennik.recipe
+++ b/recipes/naszdziennik.recipe
@ -0,0 +1,61 @@
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class naszdziennik(BasicNewsRecipe):
+    title = u'Nasz Dziennik'
+    __author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
+    language = 'pl'
+    description =u'Nasz Dziennik - Ogólnopolska gazeta codzienna. Podejmuje tematykę dotyczącą życia społecznego, kulturalnego, politycznego i religijnego. Propaguje wartości chrześcijańskie oraz tradycję i kulturę polską.'
+    masthead_url='http://www.naszdziennik.pl/images/logo-male.png'
+    max_articles_per_feed = 100
+    remove_javascript=True
+    no_stylesheets = True
+
+    keep_only_tags =[dict(attrs = {'id' : 'article'})]
+    
+    #definiujemy nową funkcje; musi zwracać listę feedów wraz z artykułami
+    def parse_index(self):
+        #adres do parsowania artykułów
+        soup = self.index_to_soup('http://www.naszdziennik.pl/news')
+        #deklaracja pustej listy feedów
+        feeds = []
+        #deklaracja pustego słownika artykułów
+        articles = {}
+        #deklaracja pustej listy sekcji
+        sections = []
+        #deklaracja pierwszej sekcji jako pusty string
+        section = ''
+        
+        #pętla for, która analizuje po kolei każdy tag "news-article"
+        for item in soup.findAll(attrs = {'class' : 'news-article'}) :
+            #w tagu "news-article szukamy pierwszego taga h4"
+            section = item.find('h4')
+            #zmiennej sekcja przypisujemy zawartość tekstową taga
+            section = self.tag_to_string(section)
+            #sprawdzamy czy w słowniku artykułów istnieje klucz dotyczący sekcji
+            #jeśli nie istnieje to :
+            if not articles.has_key(section) :
+                #do listy sekcji dodajemy nową sekcje
+                sections.append(section)
+                #deklarujemy nową sekcje w słowniku artykułów przypisując jej klucz odpowiadający nowej sekcji, którego wartością jest pusta lista
+                articles[section] = []
+            #przeszukujemy kolejny tag "title-datetime"
+            article_title_datetime = item.find(attrs = {'class' : 'title-datetime'})
+            #w tagu title-datetime znajdujemy pierwszy link
+            article_a = article_title_datetime.find('a')
+            #i tworzymy z niego link absolutny do właściwego artykułu
+            article_url = 'http://naszdziennik.pl' + article_a['href']
+            #jako tytuł użyty będzie tekst pomiędzy tagami <a>
+            article_title = self.tag_to_string(article_a)
+            #a data będzie tekstem z pierwszego taga h4 znalezionego w tagu title-datetime
+            article_date = self.tag_to_string(article_title_datetime.find('h4')) 
+            #zebrane elementy dodajemy do listy zadeklarowanej w linijce 44
+            articles[section].append( { 'title' : article_title,  'url' : article_url, 'date' : article_date })
+        #po dodaniu wszystkich artykułów dodajemy sekcje do listy feedów, korzystając z list sekcji znajdujących się w słowniku
+        for section in sections:
+            feeds.append((section, articles[section]))
+        #zwracamy listę feedów, której parsowaniem zajmie się calibre
+        return feeds
--- a/recipes/rushisaband.recipe
+++ b/recipes/rushisaband.recipe
@ -0,0 +1,29 @@
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__author__ = 'MrStefan <mrstefaan@gmail.com>'
+
+'''
+www.rushisaband.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class rushisaband(BasicNewsRecipe):
+    title = u'Rushisaband'
+    __author__ = 'MrStefan <mrstefaan@gmail.com>'
+    language = 'en'
+    description =u'A blog devoted to the band RUSH and its members, Neil Peart, Geddy Lee and Alex Lifeson'
+    remove_empty_feeds= True
+    oldest_article = 7
+    max_articles_per_feed = 100
+    remove_javascript=True
+    no_stylesheets=True
+
+    keep_only_tags =[]
+    keep_only_tags.append(dict(name = 'h4'))
+    keep_only_tags.append(dict(name = 'h5'))
+    keep_only_tags.append(dict(name = 'p'))
+    
+    feeds = [(u'Rush is a Band', u'http://feeds2.feedburner.com/rushisaband/blog')]
--- a/recipes/rynek_infrastruktury.recipe
+++ b/recipes/rynek_infrastruktury.recipe
@ -0,0 +1,42 @@
+#!/usr/bin/env  python
+
+__license__ = 'GPL v3'
+__author__ = 'teepel <teepel44@gmail.com>'
+
+'''
+http://www.rynekinfrastruktury.pl
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class prawica_recipe(BasicNewsRecipe):
+    title          = u'Rynek Infrastruktury'
+    __author__ = 'teepel <teepel44@gmail.com>'
+    language       = 'pl'
+    description =u'Portal "Rynek Infrastruktury" to źródło informacji o kluczowych elementach polskiej gospodarki: drogach, kolei, lotniskach, portach, telekomunikacji, energetyce, prawie i polityce, wzmocnione eksperckimi komentarzami kluczowych analityków.'
+    remove_empty_feeds= True
+    oldest_article = 1
+    max_articles_per_feed = 100
+    remove_javascript=True
+    no_stylesheets=True
+
+    feeds          = [
+		(u'Drogi', u'http://www.rynekinfrastruktury.pl/rss/41'),
+		(u'Lotniska', u'http://www.rynekinfrastruktury.pl/rss/42'),
+		(u'Kolej', u'http://www.rynekinfrastruktury.pl/rss/37'),
+		(u'Energetyka', u'http://www.rynekinfrastruktury.pl/rss/30'),
+		(u'Telekomunikacja', u'http://www.rynekinfrastruktury.pl/rss/31'),
+		(u'Porty', u'http://www.rynekinfrastruktury.pl/rss/32'),
+		(u'Prawo i polityka', u'http://www.rynekinfrastruktury.pl/rss/47'),
+		(u'Komentarze', u'http://www.rynekinfrastruktury.pl/rss/38'),
+	     ]
+
+    keep_only_tags =[]
+    keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'articleContent'}))
+
+    remove_tags =[]
+    remove_tags.append(dict(name = 'span', attrs = {'class' : 'date'}))
+
+    def print_version(self, url):
+        return url.replace('http://www.rynekinfrastruktury.pl/artykul/', 'http://www.rynekinfrastruktury.pl/artykul/drukuj/')
--- a/recipes/rynek_kolejowy.recipe
+++ b/recipes/rynek_kolejowy.recipe
@ -0,0 +1,41 @@
+#!/usr/bin/env  python
+
+__license__ = 'GPL v3'
+__author__ = 'teepel <teepel44@gmail.com>'
+
+'''
+rynek-kolejowy.pl
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class rynek_kolejowy(BasicNewsRecipe):
+    title          = u'Rynek Kolejowy'
+    __author__ = 'teepel <teepel44@gmail.com>'
+    language       = 'pl'
+    description =u'Rynek Kolejowy - kalendarium wydarzeń branży kolejowej, konferencje, sympozja, targi kolejowe, krajowe i zagraniczne.'
+    masthead_url='http://p.wnp.pl/images/i/partners/rynek_kolejowy.gif'
+    remove_empty_feeds= True
+    oldest_article = 1
+    max_articles_per_feed = 100
+    remove_javascript=True
+    no_stylesheets=True
+
+    keep_only_tags =[]
+    keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'mainContent'}))
+
+    remove_tags =[]
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'right no-print'}))
+    remove_tags.append(dict(name = 'div', attrs = {'id' : 'font-size'}))
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'no-print'}))
+
+    extra_css = '''.wiadomosc_title{ font-size: 1.4em; font-weight: bold; }'''
+       
+    feeds          = [(u'Wiadomości', u'http://www.rynek-kolejowy.pl/rss/rss.php')]
+
+    def print_version(self, url):
+        segment = url.split('/')
+        urlPart = segment[3]
+        return 'http://www.rynek-kolejowy.pl/drukuj.php?id=' + urlPart
+        
--- a/recipes/satkurier.recipe
+++ b/recipes/satkurier.recipe
@ -0,0 +1,49 @@
+#!/usr/bin/env  python
+
+__license__ = 'GPL v3'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+
+class SATKurier(BasicNewsRecipe):
+    title = u'SATKurier.pl'
+    __author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
+    language = 'pl'
+    description = u'Największy i najstarszy serwis poświęcony\
+                  telewizji cyfrowej, przygotowywany przez wydawcę\
+                  miesięcznika SAT Kurier. Bieżące wydarzenia\
+                  z rynku mediów i nowych technologii.'
+    oldest_article = 7
+    masthead_url = 'http://satkurier.pl/img/header_sk_logo.gif'
+    max_articles_per_feed = 100
+    simultaneous_downloads = 5
+    remove_javascript = True
+    no_stylesheets = True
+
+    keep_only_tags = []
+    keep_only_tags.append(dict(name='div', attrs={'id': ['single_news', 'content']}))
+
+    remove_tags = []
+    remove_tags.append(dict(attrs={'id': ['news_info', 'comments']}))
+    remove_tags.append(dict(attrs={'href': '#czytaj'}))
+    remove_tags.append(dict(attrs={'align': 'center'}))
+    remove_tags.append(dict(attrs={'class': ['date', 'category', 'right mini-add-comment', 'socialLinks', 'commentlist']}))
+
+    remove_tags_after = [(dict(id='entry'))]
+
+    feeds = [(u'Najnowsze wiadomości', u'http://feeds.feedburner.com/satkurierpl?format=xml'),
+             (u'Sport w telewizji', u'http://feeds.feedburner.com/satkurier/sport?format=xml'),
+             (u'Blog', u'http://feeds.feedburner.com/satkurier/blog?format=xml')]
+
+    def preprocess_html(self, soup):
+        image = soup.find(attrs={'id': 'news_mini_photo'})
+        if image:
+            image.extract()
+            header = soup.find('h1')
+            header.replaceWith(header.prettify() + image.prettify())
+        for alink in soup.findAll('a'):
+            if alink.string is not None:
+                tstr = alink.string
+                alink.replaceWith(tstr)
+        return soup