Various Polish news sources by fenuks

2025-07-09 03:04:10 -04:00 · 2011-12-17 22:39:34 +05:30 · 2011-12-17 22:39:34 +05:30 · b833605f57
commit b833605f57
parent 2de126e308
11 changed files with 144 additions and 0 deletions
--- a/recipes/biolog_pl.recipe
+++ b/recipes/biolog_pl.recipe
@ -0,0 +1,19 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+from calibre.web.feeds.news import BasicNewsRecipe
+class Biolog_pl(BasicNewsRecipe):
+    title          = u'Biolog.pl'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    remove_empty_feeds=True
+    __author__        = 'fenuks'
+    description   = u'Przyrodnicze aktualności ze świata nauki (codziennie aktualizowane), kurs biologii, testy i sprawdziany, forum dyskusyjne.'
+    category       = 'biology'
+    language       = 'pl'
+    cover_url='http://www.biolog.pl/naukowy,portal,biolog.png'
+    no_stylesheets = True
+    #keeps_only_tags=[dict(id='main')]
+    remove_tags_before=dict(id='main')
+    remove_tags_after=dict(name='a', attrs={'name':'komentarze'})
+    remove_tags=[dict(name='img', attrs={'alt':'Komentarze'})]
+    feeds          = [(u'Wszystkie', u'http://www.biolog.pl/backend.php'), (u'Medycyna', u'http://www.biolog.pl/medycyna-rss.php'), (u'Ekologia', u'http://www.biolog.pl/rss-ekologia.php'), (u'Genetyka i biotechnologia', u'http://www.biolog.pl/rss-biotechnologia.php'), (u'Botanika', u'http://www.biolog.pl/rss-botanika.php'), (u'Le\u015bnictwo', u'http://www.biolog.pl/rss-lesnictwo.php'), (u'Zoologia', u'http://www.biolog.pl/rss-zoologia.php')]
--- a/recipes/computerworld_pl.recipe
+++ b/recipes/computerworld_pl.recipe
@ -0,0 +1,22 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+from calibre.web.feeds.news import BasicNewsRecipe
+class Computerworld_pl(BasicNewsRecipe):
+    title          = u'Computerworld.pl'
+    __author__        = 'fenuks'
+    description   = u'Serwis o IT w przemyśle, finansach, handlu, administracji oraz rynku IT i telekomunikacyjnym - wiadomości, opinie, analizy, porady prawne'
+    category       = 'IT'
+    language       = 'pl'
+    no_stylesheets=True
+    oldest_article = 7
+    max_articles_per_feed = 100
+    keep_only_tags=[dict(name='div', attrs={'id':'s'})]
+    remove_tags_after=dict(name='div', attrs={'class':'rMobi'})
+    remove_tags=[dict(name='div', attrs={'class':['nnav', 'rMobi']}), dict(name='table', attrs={'class':'ramka_slx'})]
+    feeds          = [(u'Wiadomo\u015bci', u'http://rssout.idg.pl/cw/news_iso.xml')]
+
+    def get_cover_url(self):
+        soup = self.index_to_soup('http://www.computerworld.pl/')
+        cover=soup.find(name='img', attrs={'class':'prawo'})
+        self.cover_url=cover['src']
+        return getattr(self, 'cover_url', self.cover_url)
--- a/recipes/dziennik_pl.recipe
+++ b/recipes/dziennik_pl.recipe
@ -0,0 +1,58 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+class Dziennik_pl(BasicNewsRecipe):
+    title          = u'Dziennik.pl'
+    __author__        = 'fenuks'
+    description   = u'Wiadomości z kraju i ze świata. Wiadomości gospodarcze. Znajdziesz u nas informacje, wydarzenia, komentarze, opinie.'
+    category       = 'newspaper'
+    language       = 'pl'
+    cover_url='http://6.s.dziennik.pl/images/og_dziennik.jpg'
+    no_stylesheets = True
+    oldest_article = 7
+    max_articles_per_feed = 100
+    remove_javascript=True
+    remove_empty_feeds=True
+    preprocess_regexps     = [(re.compile("Komentarze:"), lambda m: '')]
+    keep_only_tags=[dict(id='article')]
+    remove_tags=[dict(name='div', attrs={'class':['art_box_dodatki', 'new_facebook_icons2', 'leftArt', 'article_print', 'quiz-widget']}), dict(name='a', attrs={'class':'komentarz'})]
+    feeds          = [(u'Wszystko', u'http://rss.dziennik.pl/Dziennik-PL/'),
+		(u'Wiadomości', u'http://rss.dziennik.pl/Dziennik-Wiadomosci'),
+		(u'Gospodarka', u'http://rss.dziennik.pl/Dziennik-Gospodarka'),
+		(u'Kobieta', u'http://rss.dziennik.pl/Dziennik-Kobieta'),
+		(u'Auto', u'http://rss.dziennik.pl/Dziennik-Auto'),
+		(u'Rozrywka', u'http://rss.dziennik.pl/Dziennik-Rozrywka'),
+		(u'Film', u'http://rss.dziennik.pl/Dziennik-Film'),
+		(u'Muzyka' , u'http://rss.dziennik.pl/Dziennik-Muzyka'),
+		(u'Kultura', u'http://rss.dziennik.pl/Dziennik-Kultura'),
+		(u'Nauka', u'http://rss.dziennik.pl/Dziennik-Nauka'),
+		(u'Podróże', u'http://rss.dziennik.pl/Dziennik-Podroze/'),
+		(u'Nieruchomości', u'http://rss.dziennik.pl/Dziennik-Nieruchomosci')]
+
+    def append_page(self, soup, appendtag):
+        tag=soup.find('a', attrs={'class':'page_next'})
+        if tag:
+            appendtag.find('div', attrs={'class':'article_paginator'}).extract()
+        while tag:
+            soup2= self.index_to_soup(tag['href'])
+            tag=soup2.find('a', attrs={'class':'page_next'})
+            if not tag:
+                for r in appendtag.findAll('div', attrs={'class':'art_src'}):
+                    r.extract()
+            pagetext = soup2.find(name='div', attrs={'class':'article_body'})
+            for dictionary in self.remove_tags:
+                 v=pagetext.findAll(name=dictionary['name'], attrs=dictionary['attrs'])
+                 for delete in v:
+                     delete.extract()
+            pos = len(appendtag.contents)
+            appendtag.insert(pos, pagetext)
+            if appendtag.find('div', attrs={'class':'article_paginator'}):
+                appendtag.find('div', attrs={'class':'article_paginator'}).extract()
+
+
+
+
+    def preprocess_html(self, soup):
+         self.append_page(soup, soup.body)
+         return soup
--- a/recipes/emuzica_pl.recipe
+++ b/recipes/emuzica_pl.recipe
@ -0,0 +1,16 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+from calibre.web.feeds.news import BasicNewsRecipe
+class eMuzyka(BasicNewsRecipe):
+    title          = u'eMuzyka'
+    __author__        = 'fenuks'
+    description   = u'Emuzyka to największa i najpopularniejsza strona o muzyce w Polsce'
+    category       = 'music'
+    language       = 'pl'
+    cover_url='http://s.emuzyka.pl/img/emuzyka_invert_small.jpg'
+    no_stylesheets = True
+    oldest_article = 7
+    max_articles_per_feed = 100
+    keep_only_tags=[dict(name='div', attrs={'id':'news_container'}), dict(name='h3'), dict(name='div', attrs={'class':'review_text'})]
+    remove_tags=[dict(name='span', attrs={'id':'date'})]
+    feeds          = [(u'Aktualno\u015bci', u'http://www.emuzyka.pl/rss.php?f=1'), (u'Recenzje', u'http://www.emuzyka.pl/rss.php?f=2')]
--- a/recipes/icons/biolog_pl.png
+++ b/recipes/icons/biolog_pl.png
--- a/recipes/icons/computerworld_pl.png
+++ b/recipes/icons/computerworld_pl.png
--- a/recipes/icons/dziennik_pl.png
+++ b/recipes/icons/dziennik_pl.png
--- a/recipes/icons/kosmonauta_pl.png
+++ b/recipes/icons/kosmonauta_pl.png
--- a/recipes/icons/mlody_technik_pl.recipe
+++ b/recipes/icons/mlody_technik_pl.recipe
--- a/recipes/kosmonauta_pl.recipe
+++ b/recipes/kosmonauta_pl.recipe
@ -0,0 +1,14 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+from calibre.web.feeds.news import BasicNewsRecipe
+class Kosmonauta(BasicNewsRecipe):
+    title          = u'Kosmonauta.net'
+    __author__        = 'fenuks'
+    description   = u'polskojęzyczny portal w całości dedykowany misjom kosmicznym i badaniom kosmosu.'
+    category       = 'astronomy'
+    language       = 'pl'
+    cover_url='http://bi.gazeta.pl/im/4/10393/z10393414X,Kosmonauta-net.jpg'
+    no_stylesheets = True
+    oldest_article = 7
+    max_articles_per_feed = 100
+    feeds          = [(u'Kosmonauta.net', u'http://www.kosmonauta.net/index.php/feed/rss.html')]
--- a/recipes/mlody_technik_pl.recipe
+++ b/recipes/mlody_technik_pl.recipe
@ -0,0 +1,15 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+from calibre.web.feeds.news import BasicNewsRecipe
+class Mlody_technik(BasicNewsRecipe):
+    title          = u'Mlody technik'
+    __author__        = 'fenuks'
+    description   = u'Młody technik'
+    category       = 'science'
+    language       = 'pl'
+    cover_url='http://science-everywhere.pl/wp-content/uploads/2011/10/mt12.jpg'
+    no_stylesheets = True
+    oldest_article = 7
+    max_articles_per_feed = 100
+    #keep_only_tags=[dict(id='container')]
+    feeds          = [(u'Artyku\u0142y', u'http://www.mt.com.pl/feed')]