From ab79b30dd3a84f3dcc33ec3e71383810c35bbfbb Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 8 Sep 2011 10:37:47 -0600 Subject: [PATCH] Various Polish news sources by fenuks --- recipes/adventure_zone_pl.recipe | 38 ++++++++++++++++++++++++++++ recipes/astro_news_pl.recipe | 18 +++++++++++++ recipes/astronomia_pl.recipe | 15 +++++++++++ recipes/elektroda_pl.recipe | 15 +++++++++++ recipes/gildia_pl.recipe | 26 +++++++++++++++++++ recipes/gry_online_pl.recipe | 38 ++++++++++++++++++++++++++++ recipes/icons/adventure_zone_pl.png | Bin 0 -> 1603 bytes recipes/icons/astro_news_pl.png | Bin 0 -> 625 bytes recipes/icons/astronomia_pl.png | Bin 0 -> 389 bytes recipes/icons/elektroda_pl.png | Bin 0 -> 1023 bytes recipes/icons/gry_online_pl.png | Bin 0 -> 249 bytes recipes/icons/ubuntu_pl.png | Bin 0 -> 508 bytes recipes/ubuntu_pl.recipe | 16 ++++++++++++ 13 files changed, 166 insertions(+) create mode 100644 recipes/adventure_zone_pl.recipe create mode 100644 recipes/astro_news_pl.recipe create mode 100644 recipes/astronomia_pl.recipe create mode 100644 recipes/elektroda_pl.recipe create mode 100644 recipes/gildia_pl.recipe create mode 100644 recipes/gry_online_pl.recipe create mode 100644 recipes/icons/adventure_zone_pl.png create mode 100644 recipes/icons/astro_news_pl.png create mode 100644 recipes/icons/astronomia_pl.png create mode 100644 recipes/icons/elektroda_pl.png create mode 100644 recipes/icons/gry_online_pl.png create mode 100644 recipes/icons/ubuntu_pl.png create mode 100644 recipes/ubuntu_pl.recipe diff --git a/recipes/adventure_zone_pl.recipe b/recipes/adventure_zone_pl.recipe new file mode 100644 index 0000000000..366b1ccf5a --- /dev/null +++ b/recipes/adventure_zone_pl.recipe @@ -0,0 +1,38 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class Adventure_zone(BasicNewsRecipe): + title = u'Adventure Zone' + __author__ = 'fenuks' + description = 'Adventure zone - adventure games from A to Z' + category = 'games' + language = 'pl' + oldest_article = 15 + max_articles_per_feed = 100 + no_stylesheets = True + remove_tags_before= dict(name='td', attrs={'class':'main-bg'}) + remove_tags_after= dict(name='td', attrs={'class':'main-body middle-border'}) + extra_css = '.main-bg{text-align: left;} td.capmain{ font-size: 22px; }' + feeds = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/feeds/news.php')] + + def get_cover_url(self): + soup = self.index_to_soup('http://www.adventure-zone.info/fusion/news.php') + cover=soup.find(id='box_OstatninumerAZ') + self.cover_url='http://www.adventure-zone.info/fusion/'+ cover.center.a.img['src'] + return getattr(self, 'cover_url', self.cover_url) + + + def skip_ad_pages(self, soup): + skip_tag = soup.body.findAll(name='a') + if skip_tag is not None: + for r in skip_tag: + if 'articles.php?' in r['href']: + if r.strong is not None: + word=r.strong.string + if ('zapowied' or 'recenzj') in word: + return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item_id'+r['href'][r['href'].find('_id')+3:], raw=True) + else: + None + + def print_version(self, url): + return url.replace('news.php?readmore', 'print.php?type=N&item_id') + diff --git a/recipes/astro_news_pl.recipe b/recipes/astro_news_pl.recipe new file mode 100644 index 0000000000..e5561fc98d --- /dev/null +++ b/recipes/astro_news_pl.recipe @@ -0,0 +1,18 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AstroNEWS(BasicNewsRecipe): + title = u'AstroNEWS' + __author__ = 'fenuks' + description = 'AstroNEWS- astronomy every day' + category = 'astronomy, science' + language = 'pl' + oldest_article = 8 + max_articles_per_feed = 100 + auto_cleanup = True + cover_url='http://news.astronet.pl/img/logo_news.jpg' + # no_stylesheets= True + feeds = [(u'Wiadomości', u'http://news.astronet.pl/rss.cgi')] + + def print_version(self, url): + return url.replace('astronet.pl/', 'astronet.pl/print.cgi?') + diff --git a/recipes/astronomia_pl.recipe b/recipes/astronomia_pl.recipe new file mode 100644 index 0000000000..a142520ec5 --- /dev/null +++ b/recipes/astronomia_pl.recipe @@ -0,0 +1,15 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class Astronomia_pl(BasicNewsRecipe): + title = u'Astronomia.pl' + __author__ = 'fenuks' + description = 'Astronomia - polish astronomy site' + cover_url = 'http://www.astronomia.pl/grafika/logo.gif' + category = 'astronomy, science' + language = 'pl' + oldest_article = 8 + max_articles_per_feed = 100 + #no_stylesheets=True + remove_tags_before=dict(name='div', attrs={'id':'a1'}) + keep_only_tags=[dict(name='div', attrs={'id':['a1', 'h2']})] + feeds = [(u'Wiadomości z astronomii i astronautyki', u'http://www.astronomia.pl/rss/')] diff --git a/recipes/elektroda_pl.recipe b/recipes/elektroda_pl.recipe new file mode 100644 index 0000000000..c2123cb8cf --- /dev/null +++ b/recipes/elektroda_pl.recipe @@ -0,0 +1,15 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class Elektroda(BasicNewsRecipe): + title = u'Elektroda' + oldest_article = 8 + __author__ = 'fenuks' + description = 'Elektroda.pl' + cover_url = 'http://demotywatory.elektroda.pl/Thunderpic/logo.gif' + category = 'electronics' + language = 'pl' + max_articles_per_feed = 100 + remove_tags_before=dict(name='span', attrs={'class':'postbody'}) + remove_tags_after=dict(name='td', attrs={'class':'spaceRow'}) + remove_tags=[dict(name='a', attrs={'href':'#top'})] + feeds = [(u'Elektroda', u'http://www.elektroda.pl/rtvforum/rss.php')] diff --git a/recipes/gildia_pl.recipe b/recipes/gildia_pl.recipe new file mode 100644 index 0000000000..042902b5fc --- /dev/null +++ b/recipes/gildia_pl.recipe @@ -0,0 +1,26 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class Gildia(BasicNewsRecipe): + title = u'Gildia.pl' + __author__ = 'fenuks' + description = 'Gildia - cultural site' + cover_url = 'http://www.film.gildia.pl/_n_/portal/redakcja/logo/logo-gildia.pl-500.jpg' + category = 'culture' + language = 'pl' + oldest_article = 8 + max_articles_per_feed = 100 + no_stylesheets=True + remove_tags=[dict(name='div', attrs={'class':'backlink'}), dict(name='div', attrs={'class':'im_img'}), dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'})] + keep_only_tags=dict(name='div', attrs={'class':'widetext'}) + feeds = [(u'Gry', u'http://www.gry.gildia.pl/rss'), (u'Literatura', u'http://www.literatura.gildia.pl/rss'), (u'Film', u'http://www.film.gildia.pl/rss'), (u'Horror', u'http://www.horror.gildia.pl/rss'), (u'Konwenty', u'http://www.konwenty.gildia.pl/rss'), (u'Plansz\xf3wki', u'http://www.planszowki.gildia.pl/rss'), (u'Manga i anime', u'http://www.manga.gildia.pl/rss'), (u'Star Wars', u'http://www.starwars.gildia.pl/rss'), (u'Techno', u'http://www.techno.gildia.pl/rss'), (u'Historia', u'http://www.historia.gildia.pl/rss'), (u'Magia', u'http://www.magia.gildia.pl/rss'), (u'Bitewniaki', u'http://www.bitewniaki.gildia.pl/rss'), (u'RPG', u'http://www.rpg.gildia.pl/rss'), (u'LARP', u'http://www.larp.gildia.pl/rss'), (u'Muzyka', u'http://www.muzyka.gildia.pl/rss'), (u'Nauka', u'http://www.nauka.gildia.pl/rss')] + + + def skip_ad_pages(self, soup): + content = soup.find('div', attrs={'class':'news'}) + skip_tag= content.findAll(name='a') + if skip_tag is not None: + for link in skip_tag: + if 'recenzja' in link['href']: + self.log.warn('odnosnik') + self.log.warn(link['href']) + return self.index_to_soup(link['href'], raw=True) diff --git a/recipes/gry_online_pl.recipe b/recipes/gry_online_pl.recipe new file mode 100644 index 0000000000..d9c461dc63 --- /dev/null +++ b/recipes/gry_online_pl.recipe @@ -0,0 +1,38 @@ +from calibre.web.feeds.recipes import BasicNewsRecipe + +class Gry_online_pl(BasicNewsRecipe): + title = u'Gry-Online.pl' + __author__ = 'fenuks' + description = 'Gry-Online.pl - computer games' + category = 'games' + language = 'pl' + oldest_article = 13 + INDEX= 'http://www.gry-online.pl/' + cover_url='http://www.gry-online.pl/img/1st_10/1st-gol-logo.png' + max_articles_per_feed = 100 + no_stylesheets= True + extra_css = 'p.wn1{font-size:22px;}' + remove_tags_after= [dict(name='div', attrs={'class':['tresc-newsa']})] + keep_only_tags = [dict(name='div', attrs={'class':['txthead']}), dict(name='p', attrs={'class':['wtx1', 'wn1', 'wob']}), dict(name='a', attrs={'class':['num_str_nex']})] + #remove_tags= [dict(name='div', attrs={'class':['news_plat']})] + feeds = [(u'Newsy', 'http://www.gry-online.pl/rss/news.xml'), ('Teksty', u'http://www.gry-online.pl/rss/teksty.xml')] + + + def append_page(self, soup, appendtag): + nexturl = soup.find('a', attrs={'class':'num_str_nex'}) + if appendtag.find('a', attrs={'class':'num_str_nex'}) is not None: + appendtag.find('a', attrs={'class':'num_str_nex'}).replaceWith('\n') + if nexturl is not None: + if 'strona' in nexturl.div.string: + nexturl= self.INDEX + nexturl['href'] + soup2 = self.index_to_soup(nexturl) + pagetext = soup2.findAll(name='p', attrs={'class':['wtx1', 'wn1', 'wob']}) + for tag in pagetext: + pos = len(appendtag.contents) + appendtag.insert(pos, tag) + self.append_page(soup2, appendtag) + + + def preprocess_html(self, soup): + self.append_page(soup, soup.body) + return soup diff --git a/recipes/icons/adventure_zone_pl.png b/recipes/icons/adventure_zone_pl.png new file mode 100644 index 0000000000000000000000000000000000000000..bfa597c39c26c79d5daa7a8eb42a611a367db1b5 GIT binary patch literal 1603 zcmV-J2E6%+P)u^n%+H_MlH8$zV}tA52P;tTa?G?LuV|S`lhOL3A@H zGfFqJln9~Af6vC)ht8Mtd*8k1yw7=_bKa3&9~sHv5PkW=eKE zN_d)^tE`NSgx6?zJfiE@Yio;(k%+-yGLg)jxU^IxN=qY>NI0A=TZoiO4#&rbsJc2U z3rxAVuvpH{W{`+eDn+7{6r%2Kk;va4OsG@vYgK zAi&RW`*v%^LSa%;R1{Hmw!god8<9?zoD2XrZb&2n0Yn1>G8rzYsuGL&d}~~4YD^3m zZEWQ8asJh-@^VP=?VDDMix(EOS`=*jIC&BugM%kd$mK*14)*r8wnWR9vsg|}s9Y!n z;<`GiG${$x?%xk;ySl2X&~$x$)zy3V5~h(3Qle%u6Le!jUGh-zx&^2|&$l8p^0jEUj#fLN(aNB|>_MzgoKYHV#kYZIau zEb$hzoU(YeIyW~pm1uZaqXBb92`*kl6UN1HxflNKC?`iCh>ix6QmL1h zX$(6%L4~)sm)FLPR;(&jULNNA^l6F2)fEyQJGOp3QBTjILm0=VrozItYl+UDEht#G zj_AmdqN11>>v)D3ZO0CxsHlhtFuBxZL<)sO0tru@%F9DMO-x+7h9+BXG9p`BdwV-O zqOvlH1i&s`QmI^Ah~B-saKXa^+O@X>fMq=~(cX^Ai;J_fah}O!Fd&6xJiWa)Yyj}G zvcf`)w?e_^5kpK1TwzickjRCiB zpFa;um`q?Ma(8!g1LYcxRJwI5#!aV#cC)kh@2_4Bz#Sdv`YlGoJ7Cd#g~=eDOonO6 z&reOo6g4!IlsGsL-MQ1$1ZJnEdV8_|^XG#HI8UEG4Gtnve}8i`Ubg!B@^W8a1gcal z7SW?eSFT`RtJP>QcCM~&ZqVFvF^9uu<2Hw;&=4;C z^=o9r%;eY@@*BtS*)wSI=FPo(sBn1r@@4pk4?8;C-HBpjqoYBwjmgYX;c_mQ!+}Jl zr3D4S!I+AY5_EM-OMN{ao{^E8HzC1`7Xt&3cyzSCAI&&A+SiA;`T6njHEWCje*d1F zME5T)zJI@I6VZFMXsQMt3p%;Rb4aG0fOp*SdIFzoCA%+_e;cK|QLzj2

RC3HntbYx+4 zWjbwdWNBu305UK!F)c7TEigD#F)%taF*-ChEig7ZFfdNr>S+J~002ovPDHLkV1i+3 B=0yMi literal 0 HcmV?d00001 diff --git a/recipes/icons/astro_news_pl.png b/recipes/icons/astro_news_pl.png new file mode 100644 index 0000000000000000000000000000000000000000..7a93ce657a06be9d52fe1905c2a69abe57c3d94a GIT binary patch literal 625 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!63?wyl`GbMf!vLQUS0D`p92|-g5{8C`Q4$iG zK|xbmT8=GU`hD%%|AvN6OP4;scJ2S(z5lmv{eSMNYAMii0Rc@33Cp0MwNX)9lau!r7hm)8nxd&0#K9p@Tzs#(<-Mh4 zwv$t}lT%A_^0}6l_lk-@#XVk5EnZ%8dU~$Sne)85`dUxV^QBAQFI{>sJA19AWwoYe zvY}zJmsbyaU)?pJ4;f2>{DK)Ap4~_TaySb-B8!2v2N=7Z%(eqEj(fT|hDcm??Z4jC zWFX>F?D?i^-J%Yc9$mH#4I|#=`XZqafFTQQDd(7-PW7^** z4>^J-_p2u#pIc?;WX9Ofo@md*Y^JdD8yJ2Bc5X`cBCz4 zyS%b~al7dBk|}J~K!>W9xJHzuB$lLFB^RXvDF!10LqlByOI-tt5JLkiV^b>=BV7YC lD+7Z=O4s(FXvob^$xN%ntzmP09#B04gQu&X%Q~loCIEsZi8BBI literal 0 HcmV?d00001 diff --git a/recipes/icons/elektroda_pl.png b/recipes/icons/elektroda_pl.png new file mode 100644 index 0000000000000000000000000000000000000000..2a4fcd7e4ebba3559bb4b706447b6d84efff953c GIT binary patch literal 1023 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!63?wyl`GXl47&`-eLR^8|nVX)5HlDJ}-S1p< zAbaw)(9lqzpuN3)R^J)N!h_zG$E>pUxfGvBtX!g|tRNvF!O6)fEH0s_sHm)}YGh>Q zUv)IJ?nH9e`LN~-fw>F0xj4AEc?=8;>g($RtB(3r9gpui?_YJyukNCE#xzb2Hfd>T zK_Ovl8=JBjS8NN8+2@@Iu05h_=EB9rX>MhemX_uf6r`u8Us_t3)x5?wqRSzwP21QO z=p;8ckI9oKb8&IoIXHN^JF&Aea&ofT**RurW+x>j0X4?N#sW>2mzUQwFc20NwzIPn z6PFMX5f_z^6crT}0b)^6K@m|QAyJ^qLn9*m{QQ89l$4Z`lT)w+df47cQc6l*K}kVD z0mzAnh~VYp2TI8+DDd&|PntBz%gc*}l}$)UEH*aQ!No;gT}wbvn3oR-1ax)v{rncqS|1} z;tgN#y`7$T=iByEUxT^iH`>P@-*xF++Rscx)qeRsI!e))wx`}gnLyYJu< z??Qn)cQg_XtZBE2dAT^*yNVxnN+goYUw%`Ho&l+-Z_oH%jH z(BOo&hL+~)6=JJaii?U*%P=s=$jY4l>cy*<(h^dlBGacz8yn1+F>7XY1e0lWcMX7H*ury?_1u?ez`` z0?f_@frW*&9Fu~b4|jEOA5xZ(P+GhwH94tKsgSqzD(gqi&ZDfQTRl%Im};*4>G1dy z_v@}pmlSsy8ExJqJ-djd=(B_YBZG&Y8RHY{`?G)^RV{IiC`m3#O)N>(O#u=NMh1q4 zx(24Y1|}hf##Sb#R)&_^1}0Vp1}dSs9q?8URUy t|LH!DP&DM`r(~v8;?~e-^(79b0iu?GlcZS-mIE~~c)I$ztaD0e0sys}K_LJD literal 0 HcmV?d00001 diff --git a/recipes/icons/gry_online_pl.png b/recipes/icons/gry_online_pl.png new file mode 100644 index 0000000000000000000000000000000000000000..f9f7a738b1ad6f8c2f5cfb56531abc06b5f18b2e GIT binary patch literal 249 zcmeAS@N?(olHy`uVBq!ia0vp^0w65F1SAhIZYc#)Y)RhkF8{%RGvfOlpa^GyM`SUO z_5fqIli7AahPkJUV~EA+t+On6wEsu!?8lhU^8c~vxSdwa$T$Bo=7>o=I4RsAHbqy>+ v3=OP|&8&=oY%?nZgSWK?hfp-+=BH$)RpQn#N1>w*sDZ)L)z4*}Q$iB}JC#a{ literal 0 HcmV?d00001 diff --git a/recipes/icons/ubuntu_pl.png b/recipes/icons/ubuntu_pl.png new file mode 100644 index 0000000000000000000000000000000000000000..84fa18f6f864b29a1b953e2979a9fbfe8d5f31a0 GIT binary patch literal 508 zcmeAS@N?(olHy`uVBq!ia0vp^0w65F1|WlpX)+ z)EPeiG$Jl9$aTKn`rB4VB#BFJ>Vn4?4xaBjZYY{z_P4on_0^nfuSL6W>b&{>FokJ( zu!YPaKlY*+2gYfw)1JmVgc@$R+O$sk-0fZwZjGST2fs?m@daFe{`sQEBqK?lTY2}j z9db-;xF4{3Htm%0S(>nv$$a+trs>`uSJ(#HWYnOStY#4BYJI}?T`3*?7W+P-Fp_mcN!R+swJ)wB`Jv|saDBF zsX&Us$iUE0*T7QOz#_!Zz{=Rn%G5~Lz|6|P;OYKcMHCIW`6-!cmAExbd31FyPy>Uf LtDnm{r-UW|a!|?a literal 0 HcmV?d00001 diff --git a/recipes/ubuntu_pl.recipe b/recipes/ubuntu_pl.recipe new file mode 100644 index 0000000000..24212e8608 --- /dev/null +++ b/recipes/ubuntu_pl.recipe @@ -0,0 +1,16 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class Ubuntu_pl(BasicNewsRecipe): + title = u'UBUNTU.pl' + __author__ = 'fenuks' + description = 'UBUNTU.pl - polish ubuntu community site' + cover_url = 'http://ubuntu.pl/img/logo.jpg' + category = 'linux, IT' + language = 'pl' + no_stylesheets = True + oldest_article = 8 + max_articles_per_feed = 100 + extra_css = '#main {text-align:left;}' + keep_only_tags= [dict(name='td', attrs={'class':'teaser-node-mc'}), dict(name='h3', attrs={'class':'entry-title'}), dict(name='div', attrs={'class':'entry-content'})] + remove_tags_after= [dict(name='div' , attrs={'class':'content'})] + feeds = [('Czytelnia Ubuntu', 'http://feeds.feedburner.com/ubuntu-czytelnia'), (u'WikiGames', u'http://feeds.feedburner.com/WikiGames')]