From 802597744715a120fade8b7ace18dddc4442a7d2 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 2 Oct 2011 08:50:47 -0600 Subject: [PATCH] Various polish news sources by fenuks --- recipes/archeowiesci.recipe | 21 +++++++ recipes/eioba.recipe | 23 ++++++++ recipes/focus_pl.recipe | 66 ++++++++++++++++++++++ recipes/gazeta_wyborcza.recipe | 83 ++++++++++++++++++++++++++++ recipes/icons/archeowiesci.png | Bin 0 -> 718 bytes recipes/icons/eioba.png | Bin 0 -> 908 bytes recipes/icons/focus_pl.png | Bin 0 -> 695 bytes recipes/icons/gazeta_wyborcza.png | Bin 0 -> 221 bytes recipes/icons/konflikty_zbrojne.png | Bin 0 -> 320 bytes recipes/konflikty_zbrojne.recipe | 15 +++++ recipes/naczytniki.recipe | 15 +++++ recipes/nowa_fantastyka.recipe | 47 ++++++++++++++++ 12 files changed, 270 insertions(+) create mode 100644 recipes/archeowiesci.recipe create mode 100644 recipes/eioba.recipe create mode 100644 recipes/focus_pl.recipe create mode 100644 recipes/gazeta_wyborcza.recipe create mode 100644 recipes/icons/archeowiesci.png create mode 100644 recipes/icons/eioba.png create mode 100644 recipes/icons/focus_pl.png create mode 100644 recipes/icons/gazeta_wyborcza.png create mode 100644 recipes/icons/konflikty_zbrojne.png create mode 100644 recipes/konflikty_zbrojne.recipe create mode 100644 recipes/naczytniki.recipe create mode 100644 recipes/nowa_fantastyka.recipe diff --git a/recipes/archeowiesci.recipe b/recipes/archeowiesci.recipe new file mode 100644 index 0000000000..3c93d3644f --- /dev/null +++ b/recipes/archeowiesci.recipe @@ -0,0 +1,21 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class Archeowiesci(BasicNewsRecipe): + title = u'Archeowiesci' + __author__ = 'fenuks' + category = 'archeology' + language = 'pl' + cover_url='http://archeowiesci.pl/wp-content/uploads/2011/05/Archeowiesci2-115x115.jpg' + oldest_article = 7 + max_articles_per_feed = 100 + auto_cleanup = True + remove_tags=[dict(name='span', attrs={'class':['post-ratings', 'post-ratings-loading']})] + feeds = [(u'Archeowieści', u'http://archeowiesci.pl/feed/')] + + def parse_feeds (self): + feeds = BasicNewsRecipe.parse_feeds(self) + for feed in feeds: + for article in feed.articles[:]: + if 'subskrypcja' in article.title: + feed.articles.remove(article) + return feeds diff --git a/recipes/eioba.recipe b/recipes/eioba.recipe new file mode 100644 index 0000000000..14256c5811 --- /dev/null +++ b/recipes/eioba.recipe @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +from calibre.web.feeds.news import BasicNewsRecipe + +class eioba(BasicNewsRecipe): + title = u'eioba' + __author__ = 'fenuks' + cover_url = 'http://www.eioba.org/lay/logo_pl_v3.png' + language = 'pl' + oldest_article = 7 + remove_empty_feeds= True + max_articles_per_feed = 100 + extra_css = '#ctl0_body_Topic {font-weight: bold; font-size:30px;}' + keep_only_tags=[dict(id=['ctl0_body_Topic', 'articleContent'])] + feeds = [(u'Wszyskie kategorie', u'http://feeds.eioba.pl/eioba-pl-top'), + (u'Technologia', u'http://www.eioba.pl/feed/categories/1.xml'), + (u'Nauka', u'http://www.eioba.pl/feed/categories/12.xml'), + (u'Finanse', u'http://www.eioba.pl/feed/categories/7.xml'), + (u'Życie', u'http://www.eioba.pl/feed/categories/5.xml'), + (u'Zainteresowania', u'http://www.eioba.pl/feed/categories/420.xml'), + (u'Społeczeństwo', u'http://www.eioba.pl/feed/categories/8.xml'), + (u'Rozrywka', u'http://www.eioba.pl/feed/categories/10.xml'), + (u'Rożne', u'http://www.eioba.pl/feed/categories/9.xml') + ] diff --git a/recipes/focus_pl.recipe b/recipes/focus_pl.recipe new file mode 100644 index 0000000000..d63af135bc --- /dev/null +++ b/recipes/focus_pl.recipe @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- +from calibre.web.feeds.news import BasicNewsRecipe + +class Focus_pl(BasicNewsRecipe): + title = u'Focus.pl' + oldest_article = 15 + max_articles_per_feed = 100 + __author__ = 'fenuks' + language = 'pl' + description ='polish scientific monthly magazine' + category='magazine' + cover_url='' + remove_empty_feeds= True + no_stylesheets=True + remove_tags_before=dict(name='div', attrs={'class':'h2 h2f'}) + remove_tags_after=dict(name='div', attrs={'class':'clear'}) + feeds = [(u'Wszystkie kategorie', u'http://focus.pl.feedsportal.com/c/32992/f/532692/index.rss'), + (u'Nauka', u'http://focus.pl.feedsportal.com/c/32992/f/532693/index.rss'), + (u'Historia', u'http://focus.pl.feedsportal.com/c/32992/f/532694/index.rss'), + (u'Cywilizacja', u'http://focus.pl.feedsportal.com/c/32992/f/532695/index.rss'), + (u'Sport', u'http://focus.pl.feedsportal.com/c/32992/f/532696/index.rss'), + (u'Technika', u'http://focus.pl.feedsportal.com/c/32992/f/532697/index.rss'), + (u'Przyroda', u'http://focus.pl.feedsportal.com/c/32992/f/532698/index.rss'), + (u'Technologie', u'http://focus.pl.feedsportal.com/c/32992/f/532699/index.rss'), + (u'Warto wiedzieć', u'http://focus.pl.feedsportal.com/c/32992/f/532700/index.rss'), + + + +] + + def skip_ad_pages(self, soup): + tag=soup.find(name='a') + if tag: + new_soup=self.index_to_soup(tag['href']+ 'do-druku/1/', raw=True) + return new_soup + + def append_page(self, appendtag): + tag=appendtag.find(name='div', attrs={'class':'arrows'}) + if tag: + nexturl='http://www.focus.pl/'+tag.a['href'] + for rem in appendtag.findAll(name='div', attrs={'class':'klik-nav'}): + rem.extract() + while nexturl: + soup2=self.index_to_soup(nexturl) + nexturl=None + pagetext=soup2.find(name='div', attrs={'class':'txt'}) + tag=pagetext.find(name='div', attrs={'class':'arrows'}) + for r in tag.findAll(name='a'): + if u'Następne' in r.string: + nexturl='http://www.focus.pl/'+r['href'] + for rem in pagetext.findAll(name='div', attrs={'class':'klik-nav'}): + rem.extract() + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + + def get_cover_url(self): + soup=self.index_to_soup('http://www.focus.pl/magazyn/') + tag=soup.find(name='div', attrs={'class':'clr fl'}) + if tag: + self.cover_url='http://www.focus.pl/' + tag.a['href'] + return getattr(self, 'cover_url', self.cover_url) + + + def preprocess_html(self, soup): + self.append_page(soup.body) + return soup diff --git a/recipes/gazeta_wyborcza.recipe b/recipes/gazeta_wyborcza.recipe new file mode 100644 index 0000000000..0959ff80a3 --- /dev/null +++ b/recipes/gazeta_wyborcza.recipe @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- +from calibre.web.feeds.news import BasicNewsRecipe + +class Gazeta_Wyborcza(BasicNewsRecipe): + title = u'Gazeta Wyborcza' + __author__ = 'fenuks' + cover_url = 'http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg' + language = 'pl' + description ='news from gazeta.pl' + category='newspaper' + INDEX='http://wyborcza.pl' + remove_empty_feeds= True + oldest_article = 3 + max_articles_per_feed = 100 + remove_javascript=True + no_stylesheets=True + remove_tags_before=dict(id='k0') + remove_tags_after=dict(id='banP4') + remove_tags=[dict(name='div', attrs={'class':'rel_box'}), dict(attrs={'class':['date', 'zdjP', 'zdjM', 'pollCont', 'rel_video', 'brand', 'txt_upl']}), dict(name='div', attrs={'id':'footer'})] + feeds = [(u'Kraj', u'http://rss.feedsportal.com/c/32739/f/530266/index.rss'), (u'\u015awiat', u'http://rss.feedsportal.com/c/32739/f/530270/index.rss'), + (u'Wyborcza.biz', u'http://wyborcza.biz/pub/rss/wyborcza_biz_wiadomosci.htm'), + (u'Komentarze', u'http://rss.feedsportal.com/c/32739/f/530312/index.rss'), + (u'Kultura', u'http://rss.gazeta.pl/pub/rss/gazetawyborcza_kultura.xml'), + (u'Nauka', u'http://rss.feedsportal.com/c/32739/f/530269/index.rss'), (u'Opinie', u'http://rss.gazeta.pl/pub/rss/opinie.xml'), (u'Gazeta \u015awi\u0105teczna', u'http://rss.feedsportal.com/c/32739/f/530431/index.rss'), (u'Du\u017cy Format', u'http://rss.feedsportal.com/c/32739/f/530265/index.rss'), (u'Witamy w Polsce', u'http://rss.feedsportal.com/c/32739/f/530476/index.rss'), (u'M\u0119ska Muzyka', u'http://rss.feedsportal.com/c/32739/f/530337/index.rss'), (u'Lata Lec\u0105', u'http://rss.feedsportal.com/c/32739/f/530326/index.rss'), (u'Solidarni z Tybetem', u'http://rss.feedsportal.com/c/32739/f/530461/index.rss'), (u'W pon. - \u017bakowski', u'http://rss.feedsportal.com/c/32739/f/530491/index.rss'), (u'We wt. - Kolenda-Zalewska', u'http://rss.feedsportal.com/c/32739/f/530310/index.rss'), (u'\u015aroda w \u015brod\u0119', u'http://rss.feedsportal.com/c/32739/f/530428/index.rss'), (u'W pi\u0105tek - Olejnik', u'http://rss.feedsportal.com/c/32739/f/530364/index.rss'), (u'Nekrologi', u'http://rss.feedsportal.com/c/32739/f/530358/index.rss') + ] + + def skip_ad_pages(self, soup): + tag=soup.find(name='a', attrs={'class':'btn'}) + if tag: + new_soup=self.index_to_soup(tag['href'], raw=True) + return new_soup + + + def append_page(self, soup, appendtag): + loop=False + tag = soup.find('div', attrs={'id':'Str'}) + if appendtag.find('div', attrs={'id':'Str'}): + nexturl=tag.findAll('a') + appendtag.find('div', attrs={'id':'Str'}).extract() + loop=True + if appendtag.find(id='source'): + appendtag.find(id='source').extract() + while loop: + loop=False + for link in nexturl: + if u'następne' in link.string: + url= self.INDEX + link['href'] + soup2 = self.index_to_soup(url) + pagetext = soup2.find(id='artykul') + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + tag = soup2.find('div', attrs={'id':'Str'}) + nexturl=tag.findAll('a') + loop=True + + def gallery_article(self, appendtag): + tag=appendtag.find(id='container_gal') + if tag: + nexturl=appendtag.find(id='gal_btn_next').a['href'] + appendtag.find(id='gal_navi').extract() + while nexturl: + soup2=self.index_to_soup(nexturl) + pagetext=soup2.find(id='container_gal') + nexturl=pagetext.find(id='gal_btn_next') + if nexturl: + nexturl=nexturl.a['href'] + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + rem=appendtag.find(id='gal_navi') + if rem: + rem.extract() + + def preprocess_html(self, soup): + self.append_page(soup, soup.body) + if soup.find(id='container_gal'): + self.gallery_article(soup.body) + return soup + + def print_version(self, url): + if 'http://wyborcza.biz/biznes/' not in url: + return url + else: + return url.replace('http://wyborcza.biz/biznes/1', 'http://wyborcza.biz/biznes/2029020') diff --git a/recipes/icons/archeowiesci.png b/recipes/icons/archeowiesci.png new file mode 100644 index 0000000000000000000000000000000000000000..7cf0ee1ff6294a22822a6ad12a0642c27faa6d51 GIT binary patch literal 718 zcmeAS@N?(olHy`uVBq!ia0vp^0w65F1|;@pOMYr#O{`p~vu;L_uX`ZEf-7JEb*yCtX_~mTeNXwj)1(y4O*q z1D4O@)#u!Du(|)bw&qKfg8+-8z!&LPyVkw7N|<%{@Uols^Okw%><);CK7Eh*?z`XB z+qv)Dx3{se{o22NjgGFa4#Tc>nY;KT+7cz)rfE5J9j&P=HQmi<`q_ZT`PibG)uq+d z<>DW|esxlu&+c>CL|1z^V=qwd@E4|IOBP1$`usU+ZCwAc+WX&Y_y6Ck6U!>q7G$BT z_4x7Q$&)V!ANXu@{xiezl_95|cC218Vd?5c%j7ma0J?2g^Kc=<}T*>%!U7=m%LnNW?*b=X=y0<^!D57)30AY$XmSg<;#^T->`L<{Q8-# zI`?by740MMd)8MbMJg*#duq{gV$;Wu8#i9OxPSk&`}eHo`Yn(8%d$ah`L8m|pg+I_ zqFUk_QIe8al4_M)lnSI6j0_A7bqx)44U9qzEUb);t&B}|4a}?z3_6bPKY*ekH$Npa YtrE9}pEKBNfEpM)UHx3vIVCg!00hQ4zW@LL literal 0 HcmV?d00001 diff --git a/recipes/icons/eioba.png b/recipes/icons/eioba.png new file mode 100644 index 0000000000000000000000000000000000000000..9004d28d2547e7fe66b3199bf92d5ff602ee5ece GIT binary patch literal 908 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!63?wyl`GXl47}o~)gt!8^eOum?EP7rFM2nx7 zEq*?8_uIk+Pm6$P;nU)UPfHd(&71qAdcn8GMgN-?|8HLMzh%k))}{Yz7JV+5|DOF0kc7@A?OsQ?7c~pRJnvt!U<3pN6wtYwo2^y5d%Ss(Z=Z*9Sb`9Ppg6 z7U(;dH~Za-roVBoIn}oEPE!9Rr-~C@i*CKyZFX~`%QT>;cAMVV;FLf0wNvGZrscP7 zOO8gi-^rTvB6~6rJ~=ES z4#@cE>Eak7aXC35At8l{F-6%vYr|C1y*^o;A}r zA|k5stF(0VY>C@9ZW-RNwrKidZ8>|#i5`ZnRHba*7mZq=m zGiUseYG2~;A)(1BlNx^o{|UN$$=GPK#HQk+&!5(EJM7r7V#ksx zjSJW8S+r@@u1(8U?b^0(ALEPZ$Cf>NHtpKBZ_}>zom-bTpR literal 0 HcmV?d00001 diff --git a/recipes/icons/focus_pl.png b/recipes/icons/focus_pl.png new file mode 100644 index 0000000000000000000000000000000000000000..4dfd72200ce04adceb4c5aac894fedc3bf65b418 GIT binary patch literal 695 zcmZ`#ZAepL6n^gRz3_?WGlRE{GNuoBt@B=e{WEz<{LoJe8MNtg`wJDl& zew2Z&w8ZJIZhoXIO<`0*C?;(`M)aXHgHXZ*G26Q9Uq6C#p68syc@F0sZEjAQzuz7| z0R9>2$~=JsyzWB_7$)0(44`)9X62_$QsN1aDM$tZ6EFtK>NREzOh3bJIqV*f$L;ZK zto;}UV+A`3avyMe+^2SLnUWZOaOA?gLqgXjTamk!XwB8f>vc(DB- zCXhy;!yB`GAUfayMT{UuigXJ_k#7%3uQWn04Xc(OsFNOS-W%~K_>cvX9*B(;YovS( zU^~G)q(~>&F0fC9eh_zq{iEa8MMUUqp!MQix(>+jo6Y0Y1LX|}oEXjWc!x+Z7Wi^Rp-!>a;X zv`vwH2&m^ z<;(9@W2xjj2~oFh)M*mqq9$f1Nw<`0jF&dN)~|k>JGZj$(|G-pRnMlTZ>08v6YajC zA%}mOox+Ks>XhT^($i{{;><;rK#1n#a+#cyanbo)oFXPx!Ntlr0lE45^38t)=P#6& XmtFh6z+j(G6$Hpg$x-sj#kc&7I;J!18EO1b~~AE2W0Shx;Tb#Tu+|x|DXdG6T|H9f`#Y4-JSuIQ7v(e zC`m~yNwrEYN(E93Mh1q4x`qb221X$U7FNacptHiD0 Tsn!Q4paup{S3j3^P6^{_0Rav`v3p`i!(p2Y5TtkD3qFT z7fQ`Om)V|9}6$BHP3N>lgjszyB_O$e;fWp}ZGZ6dw0G*tdMV-23y7 zmq7BT->1*~bU5a}`~QD#i-sOg2kRK-Cz5qH8*}d3%ICbS@6h95U?>VouF^9Ic>#2d zYKdz^NlIc#s#S7PDv)9@GB7mMH8jvQFbXlSurju^GBMRPFtai+n5v`(@-9L{ZhlH; YS|x4`PwMBb0cv3IboFyt=akR{081x$D*ylh literal 0 HcmV?d00001 diff --git a/recipes/konflikty_zbrojne.recipe b/recipes/konflikty_zbrojne.recipe new file mode 100644 index 0000000000..7921e98f48 --- /dev/null +++ b/recipes/konflikty_zbrojne.recipe @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- +from calibre.web.feeds.news import BasicNewsRecipe + +class Konflikty(BasicNewsRecipe): + title = u'Konflikty Zbrojne' + __author__ = 'fenuks' + cover_url = 'http://www.konflikty.pl/images/tapety_logo.jpg' + language = 'pl' + description ='military news' + category='military, history' + oldest_article = 7 + max_articles_per_feed = 100 + auto_cleanup = True + + feeds = [(u'Aktualności', u'http://www.konflikty.pl/rss_aktualnosci_10.xml'), (u'Artyku\u0142y', u'http://www.konflikty.pl/rss_artykuly_10.xml'), (u'Relacje', u'http://www.konflikty.pl/rss_relacje_10.xml'), (u'Recenzje', u'http://www.konflikty.pl/rss_recenzje_10.xml')] diff --git a/recipes/naczytniki.recipe b/recipes/naczytniki.recipe new file mode 100644 index 0000000000..374c6dd0cb --- /dev/null +++ b/recipes/naczytniki.recipe @@ -0,0 +1,15 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class naczytniki(BasicNewsRecipe): + title = u'naczytniki.pl' + __author__ = 'fenuks' + cover_url = 'http://naczytniki.pl/wp-content/uploads/2010/08/logo_nc28.png' + language = 'pl' + description ='everything about e-readers' + category='readers' + oldest_article = 7 + max_articles_per_feed = 100 + remove_tags_after= dict(name='div', attrs={'class':'sociable'}) + keep_only_tags=[dict(name='div', attrs={'class':'post'})] + remove_tags=[dict(name='span', attrs={'class':'comments'}), dict(name='div', attrs={'class':'sociable'})] + feeds = [(u'Wpisy', u'http://naczytniki.pl/?feed=rss2')] diff --git a/recipes/nowa_fantastyka.recipe b/recipes/nowa_fantastyka.recipe new file mode 100644 index 0000000000..d8015105f8 --- /dev/null +++ b/recipes/nowa_fantastyka.recipe @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- +from calibre.web.feeds.news import BasicNewsRecipe + +class Nowa_Fantastyka(BasicNewsRecipe): + title = u'Nowa Fantastyka' + oldest_article = 7 + __author__ = 'fenuks' + language = 'pl' + description ='site for fantasy readers' + category='fantasy' + max_articles_per_feed = 100 + INDEX='http://www.fantastyka.pl/' + remove_tags_before=dict(attrs={'class':'belka1-tlo-md'}) + #remove_tags_after=dict(name='span', attrs={'class':'naglowek-oceny'}) + remove_tags_after=dict(name='td', attrs={'class':'belka1-bot'}) + remove_tags=[dict(attrs={'class':'avatar2'})] + feeds = [] + + def find_articles(self, url): + articles = [] + soup=self.index_to_soup(url) + tag=soup.find(attrs={'class':'belka1-tlo-m'}) + art=tag.findAll(name='a', attrs={'class':'a-box'}) + for i in art: + title=i.string + url=self.INDEX+i['href'] + #date=soup.find(id='footer').ul.li.string[41:-1] + articles.append({'title' : title, + 'url' : url, + 'date' : '', + 'description' : '' + }) + return articles + + def parse_index(self): + feeds = [] + feeds.append((u"Opowiadania", self.find_articles('http://www.fantastyka.pl/3.html'))) + feeds.append((u"Publicystyka", self.find_articles('http://www.fantastyka.pl/6.html'))) + feeds.append((u"Hype Park", self.find_articles('http://www.fantastyka.pl/9.html'))) + + return feeds + + def get_cover_url(self): + soup = self.index_to_soup('http://www.fantastyka.pl/1.html') + cover=soup.find(name='img', attrs={'class':'okladka'}) + self.cover_url=self.INDEX+ cover['src'] + return getattr(self, 'cover_url', self.cover_url)