Various Polish news sources by fenuks

2025-07-07 10:14:46 -04:00 · 2011-09-08 10:37:47 -06:00 · 2011-09-08 10:37:47 -06:00 · ab79b30dd3
commit ab79b30dd3
parent 2d3cdfedf3
13 changed files with 166 additions and 0 deletions
--- a/recipes/adventure_zone_pl.recipe
+++ b/recipes/adventure_zone_pl.recipe
@ -0,0 +1,38 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Adventure_zone(BasicNewsRecipe):
+    title          = u'Adventure Zone'
+    __author__        = 'fenuks'
+    description   = 'Adventure zone - adventure games from A to Z'
+    category       = 'games'
+    language       = 'pl'
+    oldest_article = 15
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    remove_tags_before= dict(name='td', attrs={'class':'main-bg'})
+    remove_tags_after= dict(name='td', attrs={'class':'main-body middle-border'})
+    extra_css              = '.main-bg{text-align: left;}  td.capmain{ font-size: 22px; }'
+    feeds          = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/feeds/news.php')]
+
+    def get_cover_url(self):
+        soup = self.index_to_soup('http://www.adventure-zone.info/fusion/news.php')
+        cover=soup.find(id='box_OstatninumerAZ')
+        self.cover_url='http://www.adventure-zone.info/fusion/'+ cover.center.a.img['src']
+        return getattr(self, 'cover_url', self.cover_url)
+
+
+    def skip_ad_pages(self, soup):
+        skip_tag = soup.body.findAll(name='a')
+        if skip_tag is not None:
+            for r in skip_tag:
+                 if 'articles.php?' in r['href']:
+                     if r.strong is not None:
+                         word=r.strong.string
+                         if ('zapowied' or 'recenzj') in word:
+                             return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item_id'+r['href'][r['href'].find('_id')+3:], raw=True)
+        else:
+            None
+
+    def print_version(self, url):
+        return url.replace('news.php?readmore', 'print.php?type=N&item_id')
+
--- a/recipes/astro_news_pl.recipe
+++ b/recipes/astro_news_pl.recipe
@ -0,0 +1,18 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AstroNEWS(BasicNewsRecipe):
+    title          = u'AstroNEWS'
+    __author__        = 'fenuks'
+    description   = 'AstroNEWS- astronomy every day'
+    category       = 'astronomy, science'
+    language       = 'pl'
+    oldest_article = 8
+    max_articles_per_feed = 100
+    auto_cleanup = True
+    cover_url='http://news.astronet.pl/img/logo_news.jpg'
+   # no_stylesheets= True
+    feeds          = [(u'Wiadomości', u'http://news.astronet.pl/rss.cgi')]
+
+    def print_version(self, url):
+        return url.replace('astronet.pl/', 'astronet.pl/print.cgi?')
+
--- a/recipes/astronomia_pl.recipe
+++ b/recipes/astronomia_pl.recipe
@ -0,0 +1,15 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Astronomia_pl(BasicNewsRecipe):
+    title          = u'Astronomia.pl'
+    __author__        = 'fenuks'
+    description   = 'Astronomia - polish astronomy site'
+    cover_url      = 'http://www.astronomia.pl/grafika/logo.gif'
+    category       = 'astronomy, science'
+    language       = 'pl'
+    oldest_article = 8
+    max_articles_per_feed = 100
+    #no_stylesheets=True
+    remove_tags_before=dict(name='div', attrs={'id':'a1'})
+    keep_only_tags=[dict(name='div', attrs={'id':['a1', 'h2']})]
+    feeds          = [(u'Wiadomości z astronomii i astronautyki', u'http://www.astronomia.pl/rss/')]
--- a/recipes/elektroda_pl.recipe
+++ b/recipes/elektroda_pl.recipe
@ -0,0 +1,15 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Elektroda(BasicNewsRecipe):
+    title          = u'Elektroda'
+    oldest_article = 8
+    __author__        = 'fenuks'
+    description   = 'Elektroda.pl'
+    cover_url      = 'http://demotywatory.elektroda.pl/Thunderpic/logo.gif'
+    category       = 'electronics'
+    language       = 'pl'
+    max_articles_per_feed = 100
+    remove_tags_before=dict(name='span', attrs={'class':'postbody'})
+    remove_tags_after=dict(name='td', attrs={'class':'spaceRow'})
+    remove_tags=[dict(name='a', attrs={'href':'#top'})]
+    feeds          = [(u'Elektroda', u'http://www.elektroda.pl/rtvforum/rss.php')]
--- a/recipes/gildia_pl.recipe
+++ b/recipes/gildia_pl.recipe
@ -0,0 +1,26 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Gildia(BasicNewsRecipe):
+    title          = u'Gildia.pl'
+    __author__        = 'fenuks'
+    description   = 'Gildia - cultural site'
+    cover_url      = 'http://www.film.gildia.pl/_n_/portal/redakcja/logo/logo-gildia.pl-500.jpg'
+    category       = 'culture'
+    language       = 'pl'
+    oldest_article = 8
+    max_articles_per_feed = 100
+    no_stylesheets=True
+    remove_tags=[dict(name='div', attrs={'class':'backlink'}), dict(name='div', attrs={'class':'im_img'}), dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'})]
+    keep_only_tags=dict(name='div', attrs={'class':'widetext'})
+    feeds          = [(u'Gry', u'http://www.gry.gildia.pl/rss'), (u'Literatura', u'http://www.literatura.gildia.pl/rss'), (u'Film', u'http://www.film.gildia.pl/rss'), (u'Horror', u'http://www.horror.gildia.pl/rss'), (u'Konwenty', u'http://www.konwenty.gildia.pl/rss'), (u'Plansz\xf3wki', u'http://www.planszowki.gildia.pl/rss'), (u'Manga i anime', u'http://www.manga.gildia.pl/rss'), (u'Star Wars', u'http://www.starwars.gildia.pl/rss'), (u'Techno', u'http://www.techno.gildia.pl/rss'), (u'Historia', u'http://www.historia.gildia.pl/rss'), (u'Magia', u'http://www.magia.gildia.pl/rss'), (u'Bitewniaki', u'http://www.bitewniaki.gildia.pl/rss'), (u'RPG', u'http://www.rpg.gildia.pl/rss'), (u'LARP', u'http://www.larp.gildia.pl/rss'), (u'Muzyka', u'http://www.muzyka.gildia.pl/rss'), (u'Nauka', u'http://www.nauka.gildia.pl/rss')]
+
+
+    def skip_ad_pages(self, soup):
+        content = soup.find('div', attrs={'class':'news'})
+        skip_tag= content.findAll(name='a')
+        if skip_tag is not None:
+            for link in skip_tag:
+                if 'recenzja' in link['href']:
+                    self.log.warn('odnosnik')
+                    self.log.warn(link['href'])
+                    return self.index_to_soup(link['href'], raw=True)
--- a/recipes/gry_online_pl.recipe
+++ b/recipes/gry_online_pl.recipe
@ -0,0 +1,38 @@
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class Gry_online_pl(BasicNewsRecipe):
+    title          = u'Gry-Online.pl'
+    __author__        = 'fenuks'
+    description   = 'Gry-Online.pl - computer games'
+    category       = 'games'
+    language       = 'pl'
+    oldest_article = 13
+    INDEX= 'http://www.gry-online.pl/'
+    cover_url='http://www.gry-online.pl/img/1st_10/1st-gol-logo.png'
+    max_articles_per_feed = 100
+    no_stylesheets= True
+    extra_css              = 'p.wn1{font-size:22px;}'
+    remove_tags_after= [dict(name='div', attrs={'class':['tresc-newsa']})]
+    keep_only_tags = [dict(name='div', attrs={'class':['txthead']}), dict(name='p', attrs={'class':['wtx1', 'wn1', 'wob']}), dict(name='a', attrs={'class':['num_str_nex']})]
+    #remove_tags= [dict(name='div', attrs={'class':['news_plat']})]
+    feeds          = [(u'Newsy', 'http://www.gry-online.pl/rss/news.xml'), ('Teksty', u'http://www.gry-online.pl/rss/teksty.xml')]
+
+
+    def append_page(self, soup, appendtag):
+        nexturl = soup.find('a', attrs={'class':'num_str_nex'})
+        if appendtag.find('a', attrs={'class':'num_str_nex'}) is not None:
+            appendtag.find('a', attrs={'class':'num_str_nex'}).replaceWith('\n')
+        if nexturl is not None:
+            if 'strona' in nexturl.div.string:
+                nexturl= self.INDEX + nexturl['href']
+                soup2 = self.index_to_soup(nexturl)
+                pagetext = soup2.findAll(name='p', attrs={'class':['wtx1', 'wn1', 'wob']})
+                for tag in pagetext:
+                    pos = len(appendtag.contents)
+                    appendtag.insert(pos, tag)
+                self.append_page(soup2, appendtag)
+
+
+    def preprocess_html(self, soup):
+         self.append_page(soup, soup.body)
+         return soup
--- a/recipes/icons/adventure_zone_pl.png
+++ b/recipes/icons/adventure_zone_pl.png
--- a/recipes/icons/astro_news_pl.png
+++ b/recipes/icons/astro_news_pl.png
--- a/recipes/icons/astronomia_pl.png
+++ b/recipes/icons/astronomia_pl.png
--- a/recipes/icons/elektroda_pl.png
+++ b/recipes/icons/elektroda_pl.png
--- a/recipes/icons/gry_online_pl.png
+++ b/recipes/icons/gry_online_pl.png
--- a/recipes/icons/ubuntu_pl.png
+++ b/recipes/icons/ubuntu_pl.png
--- a/recipes/ubuntu_pl.recipe
+++ b/recipes/ubuntu_pl.recipe
@ -0,0 +1,16 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Ubuntu_pl(BasicNewsRecipe):
+    title          = u'UBUNTU.pl'
+    __author__        = 'fenuks'
+    description   = 'UBUNTU.pl - polish ubuntu community site'
+    cover_url      = 'http://ubuntu.pl/img/logo.jpg'
+    category       = 'linux, IT'
+    language       = 'pl'
+    no_stylesheets = True
+    oldest_article = 8
+    max_articles_per_feed = 100
+    extra_css      = '#main {text-align:left;}'
+    keep_only_tags= [dict(name='td', attrs={'class':'teaser-node-mc'}), dict(name='h3', attrs={'class':'entry-title'}), dict(name='div', attrs={'class':'entry-content'})]
+    remove_tags_after= [dict(name='div' , attrs={'class':'content'})]
+    feeds          = [('Czytelnia Ubuntu', 'http://feeds.feedburner.com/ubuntu-czytelnia'), (u'WikiGames', u'http://feeds.feedburner.com/WikiGames')]