diff --git a/recipes/archeowiesci.recipe b/recipes/archeowiesci.recipe new file mode 100644 index 0000000000..3c93d3644f --- /dev/null +++ b/recipes/archeowiesci.recipe @@ -0,0 +1,21 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class Archeowiesci(BasicNewsRecipe): + title = u'Archeowiesci' + __author__ = 'fenuks' + category = 'archeology' + language = 'pl' + cover_url='http://archeowiesci.pl/wp-content/uploads/2011/05/Archeowiesci2-115x115.jpg' + oldest_article = 7 + max_articles_per_feed = 100 + auto_cleanup = True + remove_tags=[dict(name='span', attrs={'class':['post-ratings', 'post-ratings-loading']})] + feeds = [(u'Archeowieści', u'http://archeowiesci.pl/feed/')] + + def parse_feeds (self): + feeds = BasicNewsRecipe.parse_feeds(self) + for feed in feeds: + for article in feed.articles[:]: + if 'subskrypcja' in article.title: + feed.articles.remove(article) + return feeds diff --git a/recipes/eioba.recipe b/recipes/eioba.recipe new file mode 100644 index 0000000000..14256c5811 --- /dev/null +++ b/recipes/eioba.recipe @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +from calibre.web.feeds.news import BasicNewsRecipe + +class eioba(BasicNewsRecipe): + title = u'eioba' + __author__ = 'fenuks' + cover_url = 'http://www.eioba.org/lay/logo_pl_v3.png' + language = 'pl' + oldest_article = 7 + remove_empty_feeds= True + max_articles_per_feed = 100 + extra_css = '#ctl0_body_Topic {font-weight: bold; font-size:30px;}' + keep_only_tags=[dict(id=['ctl0_body_Topic', 'articleContent'])] + feeds = [(u'Wszyskie kategorie', u'http://feeds.eioba.pl/eioba-pl-top'), + (u'Technologia', u'http://www.eioba.pl/feed/categories/1.xml'), + (u'Nauka', u'http://www.eioba.pl/feed/categories/12.xml'), + (u'Finanse', u'http://www.eioba.pl/feed/categories/7.xml'), + (u'Życie', u'http://www.eioba.pl/feed/categories/5.xml'), + (u'Zainteresowania', u'http://www.eioba.pl/feed/categories/420.xml'), + (u'Społeczeństwo', u'http://www.eioba.pl/feed/categories/8.xml'), + (u'Rozrywka', u'http://www.eioba.pl/feed/categories/10.xml'), + (u'Rożne', u'http://www.eioba.pl/feed/categories/9.xml') + ] diff --git a/recipes/focus_pl.recipe b/recipes/focus_pl.recipe new file mode 100644 index 0000000000..d63af135bc --- /dev/null +++ b/recipes/focus_pl.recipe @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- +from calibre.web.feeds.news import BasicNewsRecipe + +class Focus_pl(BasicNewsRecipe): + title = u'Focus.pl' + oldest_article = 15 + max_articles_per_feed = 100 + __author__ = 'fenuks' + language = 'pl' + description ='polish scientific monthly magazine' + category='magazine' + cover_url='' + remove_empty_feeds= True + no_stylesheets=True + remove_tags_before=dict(name='div', attrs={'class':'h2 h2f'}) + remove_tags_after=dict(name='div', attrs={'class':'clear'}) + feeds = [(u'Wszystkie kategorie', u'http://focus.pl.feedsportal.com/c/32992/f/532692/index.rss'), + (u'Nauka', u'http://focus.pl.feedsportal.com/c/32992/f/532693/index.rss'), + (u'Historia', u'http://focus.pl.feedsportal.com/c/32992/f/532694/index.rss'), + (u'Cywilizacja', u'http://focus.pl.feedsportal.com/c/32992/f/532695/index.rss'), + (u'Sport', u'http://focus.pl.feedsportal.com/c/32992/f/532696/index.rss'), + (u'Technika', u'http://focus.pl.feedsportal.com/c/32992/f/532697/index.rss'), + (u'Przyroda', u'http://focus.pl.feedsportal.com/c/32992/f/532698/index.rss'), + (u'Technologie', u'http://focus.pl.feedsportal.com/c/32992/f/532699/index.rss'), + (u'Warto wiedzieć', u'http://focus.pl.feedsportal.com/c/32992/f/532700/index.rss'), + + + +] + + def skip_ad_pages(self, soup): + tag=soup.find(name='a') + if tag: + new_soup=self.index_to_soup(tag['href']+ 'do-druku/1/', raw=True) + return new_soup + + def append_page(self, appendtag): + tag=appendtag.find(name='div', attrs={'class':'arrows'}) + if tag: + nexturl='http://www.focus.pl/'+tag.a['href'] + for rem in appendtag.findAll(name='div', attrs={'class':'klik-nav'}): + rem.extract() + while nexturl: + soup2=self.index_to_soup(nexturl) + nexturl=None + pagetext=soup2.find(name='div', attrs={'class':'txt'}) + tag=pagetext.find(name='div', attrs={'class':'arrows'}) + for r in tag.findAll(name='a'): + if u'Następne' in r.string: + nexturl='http://www.focus.pl/'+r['href'] + for rem in pagetext.findAll(name='div', attrs={'class':'klik-nav'}): + rem.extract() + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + + def get_cover_url(self): + soup=self.index_to_soup('http://www.focus.pl/magazyn/') + tag=soup.find(name='div', attrs={'class':'clr fl'}) + if tag: + self.cover_url='http://www.focus.pl/' + tag.a['href'] + return getattr(self, 'cover_url', self.cover_url) + + + def preprocess_html(self, soup): + self.append_page(soup.body) + return soup diff --git a/recipes/gazeta_wyborcza.recipe b/recipes/gazeta_wyborcza.recipe new file mode 100644 index 0000000000..0959ff80a3 --- /dev/null +++ b/recipes/gazeta_wyborcza.recipe @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- +from calibre.web.feeds.news import BasicNewsRecipe + +class Gazeta_Wyborcza(BasicNewsRecipe): + title = u'Gazeta Wyborcza' + __author__ = 'fenuks' + cover_url = 'http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg' + language = 'pl' + description ='news from gazeta.pl' + category='newspaper' + INDEX='http://wyborcza.pl' + remove_empty_feeds= True + oldest_article = 3 + max_articles_per_feed = 100 + remove_javascript=True + no_stylesheets=True + remove_tags_before=dict(id='k0') + remove_tags_after=dict(id='banP4') + remove_tags=[dict(name='div', attrs={'class':'rel_box'}), dict(attrs={'class':['date', 'zdjP', 'zdjM', 'pollCont', 'rel_video', 'brand', 'txt_upl']}), dict(name='div', attrs={'id':'footer'})] + feeds = [(u'Kraj', u'http://rss.feedsportal.com/c/32739/f/530266/index.rss'), (u'\u015awiat', u'http://rss.feedsportal.com/c/32739/f/530270/index.rss'), + (u'Wyborcza.biz', u'http://wyborcza.biz/pub/rss/wyborcza_biz_wiadomosci.htm'), + (u'Komentarze', u'http://rss.feedsportal.com/c/32739/f/530312/index.rss'), + (u'Kultura', u'http://rss.gazeta.pl/pub/rss/gazetawyborcza_kultura.xml'), + (u'Nauka', u'http://rss.feedsportal.com/c/32739/f/530269/index.rss'), (u'Opinie', u'http://rss.gazeta.pl/pub/rss/opinie.xml'), (u'Gazeta \u015awi\u0105teczna', u'http://rss.feedsportal.com/c/32739/f/530431/index.rss'), (u'Du\u017cy Format', u'http://rss.feedsportal.com/c/32739/f/530265/index.rss'), (u'Witamy w Polsce', u'http://rss.feedsportal.com/c/32739/f/530476/index.rss'), (u'M\u0119ska Muzyka', u'http://rss.feedsportal.com/c/32739/f/530337/index.rss'), (u'Lata Lec\u0105', u'http://rss.feedsportal.com/c/32739/f/530326/index.rss'), (u'Solidarni z Tybetem', u'http://rss.feedsportal.com/c/32739/f/530461/index.rss'), (u'W pon. - \u017bakowski', u'http://rss.feedsportal.com/c/32739/f/530491/index.rss'), (u'We wt. - Kolenda-Zalewska', u'http://rss.feedsportal.com/c/32739/f/530310/index.rss'), (u'\u015aroda w \u015brod\u0119', u'http://rss.feedsportal.com/c/32739/f/530428/index.rss'), (u'W pi\u0105tek - Olejnik', u'http://rss.feedsportal.com/c/32739/f/530364/index.rss'), (u'Nekrologi', u'http://rss.feedsportal.com/c/32739/f/530358/index.rss') + ] + + def skip_ad_pages(self, soup): + tag=soup.find(name='a', attrs={'class':'btn'}) + if tag: + new_soup=self.index_to_soup(tag['href'], raw=True) + return new_soup + + + def append_page(self, soup, appendtag): + loop=False + tag = soup.find('div', attrs={'id':'Str'}) + if appendtag.find('div', attrs={'id':'Str'}): + nexturl=tag.findAll('a') + appendtag.find('div', attrs={'id':'Str'}).extract() + loop=True + if appendtag.find(id='source'): + appendtag.find(id='source').extract() + while loop: + loop=False + for link in nexturl: + if u'następne' in link.string: + url= self.INDEX + link['href'] + soup2 = self.index_to_soup(url) + pagetext = soup2.find(id='artykul') + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + tag = soup2.find('div', attrs={'id':'Str'}) + nexturl=tag.findAll('a') + loop=True + + def gallery_article(self, appendtag): + tag=appendtag.find(id='container_gal') + if tag: + nexturl=appendtag.find(id='gal_btn_next').a['href'] + appendtag.find(id='gal_navi').extract() + while nexturl: + soup2=self.index_to_soup(nexturl) + pagetext=soup2.find(id='container_gal') + nexturl=pagetext.find(id='gal_btn_next') + if nexturl: + nexturl=nexturl.a['href'] + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + rem=appendtag.find(id='gal_navi') + if rem: + rem.extract() + + def preprocess_html(self, soup): + self.append_page(soup, soup.body) + if soup.find(id='container_gal'): + self.gallery_article(soup.body) + return soup + + def print_version(self, url): + if 'http://wyborcza.biz/biznes/' not in url: + return url + else: + return url.replace('http://wyborcza.biz/biznes/1', 'http://wyborcza.biz/biznes/2029020') diff --git a/recipes/icons/archeowiesci.png b/recipes/icons/archeowiesci.png new file mode 100644 index 0000000000..7cf0ee1ff6 Binary files /dev/null and b/recipes/icons/archeowiesci.png differ diff --git a/recipes/icons/eioba.png b/recipes/icons/eioba.png new file mode 100644 index 0000000000..9004d28d25 Binary files /dev/null and b/recipes/icons/eioba.png differ diff --git a/recipes/icons/focus_pl.png b/recipes/icons/focus_pl.png new file mode 100644 index 0000000000..4dfd72200c Binary files /dev/null and b/recipes/icons/focus_pl.png differ diff --git a/recipes/icons/gazeta_wyborcza.png b/recipes/icons/gazeta_wyborcza.png new file mode 100644 index 0000000000..9e480cc41d Binary files /dev/null and b/recipes/icons/gazeta_wyborcza.png differ diff --git a/recipes/icons/konflikty_zbrojne.png b/recipes/icons/konflikty_zbrojne.png new file mode 100644 index 0000000000..e8dcceba7c Binary files /dev/null and b/recipes/icons/konflikty_zbrojne.png differ diff --git a/recipes/konflikty_zbrojne.recipe b/recipes/konflikty_zbrojne.recipe new file mode 100644 index 0000000000..7921e98f48 --- /dev/null +++ b/recipes/konflikty_zbrojne.recipe @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- +from calibre.web.feeds.news import BasicNewsRecipe + +class Konflikty(BasicNewsRecipe): + title = u'Konflikty Zbrojne' + __author__ = 'fenuks' + cover_url = 'http://www.konflikty.pl/images/tapety_logo.jpg' + language = 'pl' + description ='military news' + category='military, history' + oldest_article = 7 + max_articles_per_feed = 100 + auto_cleanup = True + + feeds = [(u'Aktualności', u'http://www.konflikty.pl/rss_aktualnosci_10.xml'), (u'Artyku\u0142y', u'http://www.konflikty.pl/rss_artykuly_10.xml'), (u'Relacje', u'http://www.konflikty.pl/rss_relacje_10.xml'), (u'Recenzje', u'http://www.konflikty.pl/rss_recenzje_10.xml')] diff --git a/recipes/naczytniki.recipe b/recipes/naczytniki.recipe new file mode 100644 index 0000000000..374c6dd0cb --- /dev/null +++ b/recipes/naczytniki.recipe @@ -0,0 +1,15 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class naczytniki(BasicNewsRecipe): + title = u'naczytniki.pl' + __author__ = 'fenuks' + cover_url = 'http://naczytniki.pl/wp-content/uploads/2010/08/logo_nc28.png' + language = 'pl' + description ='everything about e-readers' + category='readers' + oldest_article = 7 + max_articles_per_feed = 100 + remove_tags_after= dict(name='div', attrs={'class':'sociable'}) + keep_only_tags=[dict(name='div', attrs={'class':'post'})] + remove_tags=[dict(name='span', attrs={'class':'comments'}), dict(name='div', attrs={'class':'sociable'})] + feeds = [(u'Wpisy', u'http://naczytniki.pl/?feed=rss2')] diff --git a/recipes/nowa_fantastyka.recipe b/recipes/nowa_fantastyka.recipe new file mode 100644 index 0000000000..d8015105f8 --- /dev/null +++ b/recipes/nowa_fantastyka.recipe @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- +from calibre.web.feeds.news import BasicNewsRecipe + +class Nowa_Fantastyka(BasicNewsRecipe): + title = u'Nowa Fantastyka' + oldest_article = 7 + __author__ = 'fenuks' + language = 'pl' + description ='site for fantasy readers' + category='fantasy' + max_articles_per_feed = 100 + INDEX='http://www.fantastyka.pl/' + remove_tags_before=dict(attrs={'class':'belka1-tlo-md'}) + #remove_tags_after=dict(name='span', attrs={'class':'naglowek-oceny'}) + remove_tags_after=dict(name='td', attrs={'class':'belka1-bot'}) + remove_tags=[dict(attrs={'class':'avatar2'})] + feeds = [] + + def find_articles(self, url): + articles = [] + soup=self.index_to_soup(url) + tag=soup.find(attrs={'class':'belka1-tlo-m'}) + art=tag.findAll(name='a', attrs={'class':'a-box'}) + for i in art: + title=i.string + url=self.INDEX+i['href'] + #date=soup.find(id='footer').ul.li.string[41:-1] + articles.append({'title' : title, + 'url' : url, + 'date' : '', + 'description' : '' + }) + return articles + + def parse_index(self): + feeds = [] + feeds.append((u"Opowiadania", self.find_articles('http://www.fantastyka.pl/3.html'))) + feeds.append((u"Publicystyka", self.find_articles('http://www.fantastyka.pl/6.html'))) + feeds.append((u"Hype Park", self.find_articles('http://www.fantastyka.pl/9.html'))) + + return feeds + + def get_cover_url(self): + soup = self.index_to_soup('http://www.fantastyka.pl/1.html') + cover=soup.find(name='img', attrs={'class':'okladka'}) + self.cover_url=self.INDEX+ cover['src'] + return getattr(self, 'cover_url', self.cover_url)