Updated various Polish recipes

2025-07-09 03:04:10 -04:00 · 2012-04-18 09:33:44 +05:30 · 2012-04-18 09:33:44 +05:30 · 015d45a06b
commit 015d45a06b
parent 18cd783913
19 changed files with 188 additions and 18 deletions
--- a/recipes/adventure_zone_pl.recipe
+++ b/recipes/adventure_zone_pl.recipe
@ -9,6 +9,7 @@ class Adventure_zone(BasicNewsRecipe):
    no_stylesheets = True
    oldest_article = 20
    max_articles_per_feed = 100
+    index='http://www.adventure-zone.info/fusion/'
    use_embedded_content=False
    preprocess_regexps     = [(re.compile(r"<td class='capmain'>Komentarze</td>", re.IGNORECASE), lambda m: '')]
    remove_tags_before= dict(name='td', attrs={'class':'main-bg'})
@ -45,6 +46,19 @@ class Adventure_zone(BasicNewsRecipe):
        skip_tag = skip_tag.findAll(name='a')
        for r in skip_tag:
           if r.strong:
-                 word=r.strong.string
-                 if word and (('zapowied' in word) or ('recenzj' in word)  or ('solucj' in word)):
+                 word=r.strong.string.lower()
+                 if word and (('zapowied' in word) or ('recenzj' in word)  or ('solucj' in word) or ('poradnik' in word)):
                   return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item'+r['href'][r['href'].find('article_id')+7:], raw=True)
+
+    def preprocess_html(self, soup):
+        footer=soup.find(attrs={'class':'news-footer middle-border'})
+        if footer and len(footer('a'))>=2:
+            footer('a')[1].extract()
+        for item in soup.findAll(style=True):
+            del item['style']
+        for a in soup('a'):
+            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+                a['href']=self.index + a['href']
+        return soup
+           
+            
--- a/recipes/benchmark_pl.recipe
+++ b/recipes/benchmark_pl.recipe
@ -68,4 +68,7 @@ class Benchmark_pl(BasicNewsRecipe):
            self.image_article(soup, soup.body)
        else:
            self.append_page(soup, soup.body)
+        for a in soup('a'):
+            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+                a['href']=self.INDEX + a['href']
        return soup
--- a/recipes/cd_action.recipe
+++ b/recipes/cd_action.recipe
@ -6,6 +6,7 @@ class CD_Action(BasicNewsRecipe):
    description   = 'cdaction.pl - polish games magazine site'
    category       = 'games'
    language       = 'pl'
+    index='http://www.cdaction.pl'
    oldest_article = 8
    max_articles_per_feed = 100
    no_stylesheets= True
@ -18,3 +19,9 @@ class CD_Action(BasicNewsRecipe):
        soup = self.index_to_soup('http://www.cdaction.pl/magazyn/')
        self.cover_url='http://www.cdaction.pl'+ soup.find(id='wspolnik').div.a['href']
        return getattr(self, 'cover_url', self.cover_url)
+
+    def preprocess_html(self, soup):
+        for a in soup('a'):
+            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+                a['href']=self.index + a['href']
+        return soup
--- a/recipes/dobreprogamy.recipe
+++ b/recipes/dobreprogamy.recipe
@ -11,6 +11,7 @@ class Dobreprogramy_pl(BasicNewsRecipe):
    cover_url = 'http://userlogos.org/files/logos/Karmody/dobreprogramy_01.png'
    description = u'Aktualności i blogi z dobreprogramy.pl'
    encoding = 'utf-8'
+    index='http://www.dobreprogramy.pl/'
    no_stylesheets = True
    language       = 'pl'
    extra_css      = '.title {font-size:22px;}'
@ -22,3 +23,10 @@ class Dobreprogramy_pl(BasicNewsRecipe):
    #remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})]
    feeds = [(u'Aktualności', 'http://feeds.feedburner.com/dobreprogramy/Aktualnosci'),
                 ('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')]
+
+
+    def preprocess_html(self, soup):
+        for a in soup('a'):
+            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+                a['href']=self.index + a['href']
+        return soup
--- a/recipes/dzieje_pl.recipe
+++ b/recipes/dzieje_pl.recipe
@ -7,6 +7,7 @@ class Dzieje(BasicNewsRecipe):
    cover_url      = 'http://www.dzieje.pl/sites/default/files/dzieje_logo.png'
    category       = 'history'
    language       = 'pl'
+    index='http://dzieje.pl'
    oldest_article = 8
    max_articles_per_feed = 100
    remove_javascript=True
@ -15,3 +16,10 @@ class Dzieje(BasicNewsRecipe):
    remove_tags_after= dict(id='dogory')
    remove_tags=[dict(id='dogory')]
    feeds          = [(u'Dzieje', u'http://dzieje.pl/rss.xml')]
+
+
+    def preprocess_html(self, soup):
+        for a in soup('a'):
+            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+                a['href']=self.index + a['href']
+        return soup
--- a/recipes/eioba.recipe
+++ b/recipes/eioba.recipe
@ -21,3 +21,8 @@ class eioba(BasicNewsRecipe):
 	(u'Rozrywka', u'http://www.eioba.pl/feed/categories/10.xml'),
 	(u'Rożne', u'http://www.eioba.pl/feed/categories/9.xml')
 	]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
--- a/recipes/emuzica_pl.recipe
+++ b/recipes/emuzica_pl.recipe
@ -7,6 +7,7 @@ class eMuzyka(BasicNewsRecipe):
    description   = u'Emuzyka to największa i najpopularniejsza strona o muzyce w Polsce'
    category       = 'music'
    language       = 'pl'
+    index='http://www.emuzyka.pl'
    cover_url='http://s.emuzyka.pl/img/emuzyka_invert_small.jpg'
    no_stylesheets = True
    oldest_article = 7
@ -14,3 +15,9 @@ class eMuzyka(BasicNewsRecipe):
    keep_only_tags=[dict(name='div', attrs={'id':'news_container'}), dict(name='h3'), dict(name='div', attrs={'class':'review_text'})]
    remove_tags=[dict(name='span', attrs={'id':'date'})]
    feeds          = [(u'Aktualno\u015bci', u'http://www.emuzyka.pl/rss.php?f=1'), (u'Recenzje', u'http://www.emuzyka.pl/rss.php?f=2')]
+
+    def preprocess_html(self, soup):
+        for a in soup('a'):
+            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+                a['href']=self.index + a['href']
+        return soup
--- a/recipes/film_web.recipe
+++ b/recipes/film_web.recipe
@ -7,6 +7,7 @@ class Filmweb_pl(BasicNewsRecipe):
    cover_url      = 'http://userlogos.org/files/logos/crudus/filmweb.png'
    category       = 'movies'
    language       = 'pl'
+    index='http://www.filmweb.pl'
    oldest_article = 8
    max_articles_per_feed = 100
    no_stylesheets= True
@ -39,3 +40,9 @@ class Filmweb_pl(BasicNewsRecipe):
            self.log.warn(skip_tag)
            return self.index_to_soup(skip_tag['href'], raw=True)
        
+
+    def preprocess_html(self, soup):
+        for a in soup('a'):
+            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+                a['href']=self.index + a['href']
+        return soup
--- a/recipes/gameplay_pl.recipe
+++ b/recipes/gameplay_pl.recipe
@ -6,12 +6,14 @@ class Gameplay_pl(BasicNewsRecipe):
    description   = u'gameplay.pl - serwis o naszych zainteresowaniach, grach, filmach, książkach, muzyce, fotografii i konsolach.'
    category       = 'games, movies, books, music'
    language       = 'pl'
+    index='http://gameplay.pl'
    masthead_url= 'http://gameplay.pl/img/gpy_top_logo.png'
    cover_url= 'http://gameplay.pl/img/gpy_top_logo.png'
    max_articles_per_feed = 100
+    remove_javascript= True
    no_stylesheets= True
    keep_only_tags=[dict(name='div', attrs={'class':['news_endpage_tit', 'news']})]
-    remove_tags=[dict(name='div', attrs={'class':['galeria', 'noedit center im']})]
+    remove_tags=[dict(name='div', attrs={'class':['galeria', 'noedit center im', 'news_list', 'news_list_autor', 'stop_bot', 'tagi']}), dict(attrs={'usemap':'#map'})]
    feeds          = [(u'Wiadomo\u015bci', u'http://gameplay.pl/rss/')]

    def image_url_processor(self, baseurl, url):
@ -19,3 +21,9 @@ class Gameplay_pl(BasicNewsRecipe):
            return 'http://gameplay.pl'+ url[2:]
        else:
 	  return url
+
+    def preprocess_html(self, soup):
+        for a in soup('a'):
+            if a.has_key('href') and '../' in a['href']:
+                a['href']=self.index + a['href'][2:]
+        return soup
--- a/recipes/gildia_pl.recipe
+++ b/recipes/gildia_pl.recipe
@ -9,6 +9,7 @@ class Gildia(BasicNewsRecipe):
    language       = 'pl'
    oldest_article = 8
    max_articles_per_feed = 100
+    remove_empty_feeds=True
    no_stylesheets=True
    remove_tags=[dict(name='div', attrs={'class':'backlink'}), dict(name='div', attrs={'class':'im_img'}), dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'})]
    keep_only_tags=dict(name='div', attrs={'class':'widetext'})
@ -24,3 +25,16 @@ class Gildia(BasicNewsRecipe):
                    self.log.warn('odnosnik')
                    self.log.warn(link['href'])
                    return self.index_to_soup(link['href'], raw=True)
+
+    def preprocess_html(self, soup):
+        for a in soup('a'):
+            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+                if '/gry/' in a['href']:
+                    a['href']='http://www.gry.gildia.pl' + a['href']
+                elif u'książk' in soup.title.string.lower() or u'komiks' in soup.title.string.lower():
+                    a['href']='http://www.literatura.gildia.pl' + a['href']
+                elif u'komiks' in soup.title.string.lower():
+                    a['href']='http://www.literatura.gildia.pl' + a['href']
+                else:
+                    a['href']='http://www.gildia.pl' + a['href']
+        return soup
--- a/recipes/gram_pl.recipe
+++ b/recipes/gram_pl.recipe
@ -7,6 +7,7 @@ class Gram_pl(BasicNewsRecipe):
    category       = 'games'
    language       = 'pl'
    oldest_article = 8
+    index='http://www.gram.pl'
    max_articles_per_feed = 100
    no_stylesheets= True
    extra_css = 'h2 {font-style: italic;  font-size:20px;} .picbox div {float: left;}'
@ -52,4 +53,7 @@ class Gram_pl(BasicNewsRecipe):
        tag=soup.findAll(name='div', attrs={'class':'picbox'})
        for t in tag:
            t['style']='float: left;'
+        for a in soup('a'):
+            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+                a['href']=self.index + a['href']
        return soup
--- a/recipes/in4_pl.recipe
+++ b/recipes/in4_pl.recipe
@ -8,6 +8,7 @@ class in4(BasicNewsRecipe):
    description   = u'Serwis Informacyjny - Aktualnosci, recenzje'
    category       = 'IT'
    language       = 'pl'
+    index='http://www.in4.pl/'
    #cover_url= 'http://www.in4.pl/recenzje/337/in4pl.jpg'
    no_stylesheets = True
    remove_empty_feeds = True
@ -39,6 +40,7 @@ class in4(BasicNewsRecipe):

    def preprocess_html(self, soup):
        self.append_page(soup, soup.body)
+        for a in soup('a'):
+            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+                a['href']=self.index + a['href']
        return soup
-   
-        
--- a/recipes/infra_pl.recipe
+++ b/recipes/infra_pl.recipe
@ -8,6 +8,7 @@ class INFRA(BasicNewsRecipe):
    description   = u'Serwis Informacyjny INFRA - UFO, Zjawiska Paranormalne, Duchy, Tajemnice świata.'
    cover_url      = 'http://npn.nazwa.pl/templates/ja_teline_ii/images/logo.jpg'
    category       = 'UFO'
+    index='http://infra.org.pl'
    language       = 'pl'
    max_articles_per_feed = 100
    no_stylesheers=True
@ -15,3 +16,11 @@ class INFRA(BasicNewsRecipe):
    remove_tags_after=dict(attrs={'class':'pagenav'})
    remove_tags=[dict(attrs={'class':'pagenav'})]
    feeds          = [(u'Najnowsze wiadomo\u015bci', u'http://www.infra.org.pl/index.php?option=com_rd_rss&id=1')]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for a in soup('a'):
+            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+                a['href']=self.index + a['href']
+        return soup
--- a/recipes/konflikty_zbrojne.recipe
+++ b/recipes/konflikty_zbrojne.recipe
@ -10,6 +10,23 @@ class Konflikty(BasicNewsRecipe):
    category='military, history'
    oldest_article = 7
    max_articles_per_feed = 100
-    auto_cleanup = True
+    no_stylesheets = True
+    keep_only_tags=[dict(attrs={'class':['title1', 'image']}), dict(id='body')]

-    feeds          = [(u'Aktualności', u'http://www.konflikty.pl/rss_aktualnosci_10.xml'), (u'Artyku\u0142y', u'http://www.konflikty.pl/rss_artykuly_10.xml'), (u'Relacje', u'http://www.konflikty.pl/rss_relacje_10.xml'), (u'Recenzje', u'http://www.konflikty.pl/rss_recenzje_10.xml')]
+    feeds          = [(u'Aktualności', u'http://www.konflikty.pl/rss_aktualnosci_10.xml'), 
+		(u'Artyku\u0142y', u'http://www.konflikty.pl/rss_artykuly_10.xml'), 
+		(u'Historia', u'http://www.konflikty.pl/rss_historia_10.xml'), 
+		(u'Militaria', u'http://www.konflikty.pl/rss_militaria_10.xml'), 
+		(u'Relacje', u'http://www.konflikty.pl/rss_relacje_10.xml'), 
+		(u'Recenzje', u'http://www.konflikty.pl/rss_recenzje_10.xml'),
+		(u'Teksty źródłowe', u'http://www.konflikty.pl/rss_tekstyzrodlowe_10.xml')]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for image in soup.findAll(name='a', attrs={'class':'image'}):
+            if image.img and image.img.has_key('alt'):
+                image.name='div'
+                pos = len(image.contents)
+                image.insert(pos, BeautifulSoup('<p style="font-style:italic;">'+image.img['alt']+'</p>'))
+        return soup
--- a/recipes/national_geographic_pl.recipe
+++ b/recipes/national_geographic_pl.recipe
@ -9,8 +9,9 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
 class recipeMagic(BasicNewsRecipe):
    title                  = 'National Geographic PL'
    __author__             = 'Marcin Urban 2011'
+    __modified_by__        = 'fenuks'
    description            = 'legenda wśród magazynów z historią sięgającą 120 lat'
-    cover_url      	       = 'http://www.guj.pl/var/guj/storage/images/media/nasze_magazyny/national_geographic/logo/ng_logo/2606-1-pol-PL/ng_logo.jpg'
+    #cover_url      	       = 'http://www.guj.pl/var/guj/storage/images/media/nasze_magazyny/national_geographic/logo/ng_logo/2606-1-pol-PL/ng_logo.jpg'
    oldest_article         = 7
    max_articles_per_feed  = 100
    no_stylesheets         = True
@ -42,11 +43,43 @@ class recipeMagic(BasicNewsRecipe):
                        ]

    remove_attributes = ['width','height']
+    feeds=[]

-    feeds          = [
-                      ('National Geographic PL', 'http://www.national-geographic.pl/rss/'),
-                    ]
+    def find_articles(self, url):
+        articles = []
+        soup=self.index_to_soup(url)
+        tag=soup.find(attrs={'class':'arl'})
+        art=tag.ul.findAll('li')
+        for i in art:
+            title=i.a['title']
+            url=i.a['href']
+            #date=soup.find(id='footer').ul.li.string[41:-1]
+            desc=i.div.p.string
+            articles.append({'title' : title,
+                   'url'   : url,
+                   'date'  : '',
+                   'description' : desc
+                    })
+        return articles
+
+    def parse_index(self):
+         feeds = []
+         feeds.append((u"Aktualności", self.find_articles('http://www.national-geographic.pl/aktualnosci/')))
+         feeds.append((u"Artykuły", self.find_articles('http://www.national-geographic.pl/artykuly/')))
+
+         return feeds

    def print_version(self, url):
-        return url.replace('artykuly0Cpokaz', 'drukuj-artykul')
+        if 'artykuly' in url:
+            return url.replace('artykuly/pokaz', 'drukuj-artykul')
+        elif 'aktualnosci' in url:
+            return url.replace('aktualnosci/pokaz', 'drukuj-artykul')  
+        else:
+            return url
+
+    def get_cover_url(self):
+        soup = self.index_to_soup('http://www.national-geographic.pl/biezace-wydania/')
+        tag=soup.find(attrs={'class':'txt jus'})
+        self.cover_url=tag.img['src']
+        return getattr(self, 'cover_url', self.cover_url)

--- a/recipes/nowa_fantastyka.recipe
+++ b/recipes/nowa_fantastyka.recipe
@ -81,5 +81,7 @@ class Nowa_Fantastyka(BasicNewsRecipe):
        title=soup.find(attrs={'class':'tytul'})
        if title:
            title['style']='font-size: 20px; font-weight: bold;'
-        self.log.warn(soup)
+        for a in soup('a'):
+            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+                a['href']=self.INDEX + a['href']
        return soup
--- a/recipes/pc_arena.recipe
+++ b/recipes/pc_arena.recipe
@ -7,6 +7,7 @@ class PC_Arena(BasicNewsRecipe):
    description   = u'Najnowsze informacje z branży IT - testy, recenzje, aktualności, rankingi, wywiady. Twoje źródło informacji o sprzęcie komputerowym.'
    category       = 'IT'
    language       = 'pl'
+    index='http://pcarena.pl'
    masthead_url='http://pcarena.pl/pcarena/img/logo.png'
    cover_url= 'http://pcarena.pl/pcarena/img/logo.png'
    no_stylesheets = True
@ -23,3 +24,9 @@ class PC_Arena(BasicNewsRecipe):
            return 'http://pcarena.pl' + url
        else:
            return url
+
+    def preprocess_html(self, soup):
+        for a in soup('a'):
+            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+                a['href']=self.index + a['href']
+        return soup
--- a/recipes/tanuki.recipe
+++ b/recipes/tanuki.recipe
@ -34,4 +34,12 @@ class tanuki(BasicNewsRecipe):

    def preprocess_html(self, soup):
        self.append_page(soup, soup.body)
+        for a in soup('a'):
+            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+                if 'tanuki-anime' in soup.title.string.lower():
+                    a['href']='http://anime.tanuki.pl' + a['href']
+                elif 'tanuki-manga' in soup.title.string.lower():
+                    a['href']='http://manga.tanuki.pl' + a['href']
+                elif 'tanuki-czytelnia' in soup.title.string.lower():
+                    a['href']='http://czytelnia.tanuki.pl' + a['href']
        return soup
--- a/recipes/webhosting_pl.recipe
+++ b/recipes/webhosting_pl.recipe
@ -8,6 +8,7 @@ class webhosting_pl(BasicNewsRecipe):
    cover_url='http://webhosting.pl/images/logo.png'
    masthead_url='http://webhosting.pl/images/logo.png'
    oldest_article = 7
+    index='http://webhosting.pl'
    max_articles_per_feed = 100
    no_stylesheets = True
    remove_empty_feeds = True
@ -37,3 +38,9 @@ class webhosting_pl(BasicNewsRecipe):

    def print_version(self, url):
        return url.replace('webhosting.pl', 'webhosting.pl/print')
+
+    def preprocess_html(self, soup):
+        for a in soup('a'):
+            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
+                a['href']=self.index + a['href']
+        return soup