From 41b9342a2a1cf4e9013b4c6e83c30de8fa7820d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Sat, 9 Mar 2013 10:17:39 +0100 Subject: [PATCH 1/2] fix telepolis_pl and improve swiatkindle --- recipes/swiatkindle.recipe | 3 ++- recipes/telepolis_pl.recipe | 24 ++++++++++++++++++++++-- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/recipes/swiatkindle.recipe b/recipes/swiatkindle.recipe index d8e0e3f403..c589d1b6e1 100644 --- a/recipes/swiatkindle.recipe +++ b/recipes/swiatkindle.recipe @@ -19,6 +19,7 @@ class swiatczytnikow(BasicNewsRecipe): feeds = [(u'Świat Czytników - wpisy', u'http://swiatczytnikow.pl/feed')] - remove_tags = [dict(name = 'ul', attrs = {'class' : 'similar-posts'})] + remove_tags = [dict(name = 'ul', attrs = {'class' : 'similar-posts'}), + dict(name = 'div', attrs = {'class' : 'feedflare'})] preprocess_regexps = [(re.compile(u'

Czytaj dalej:

'), lambda match: '')] diff --git a/recipes/telepolis_pl.recipe b/recipes/telepolis_pl.recipe index 9ea878bc77..1aa7734c2c 100644 --- a/recipes/telepolis_pl.recipe +++ b/recipes/telepolis_pl.recipe @@ -16,11 +16,31 @@ class telepolis(BasicNewsRecipe): use_embedded_content = False feeds = [ - (u'Wiadomości', u'http://www.telepolis.pl/rss/news.php')#, - #(u'Artykuły', u'http://www.telepolis.pl/rss/artykuly.php') + (u'Wiadomości', u'http://www.telepolis.pl/rss,2,5,0.html') ] keep_only_tags = [ dict(name='div', attrs={'class':'flol w510'}), + dict(name='div', attrs={'class':'main_tresc'}), dict(name='div', attrs={'class':'main_tresc_news'}) ] + + def append_page(self, soup, appendtag): + chpage= appendtag.find(attrs={'class':'str'}) + if chpage: + for page in chpage.findAll('a'): + if page.renderContents() == 'Następna ›': + break + soup2 = self.index_to_soup(page['href']) + pagetext = soup2.find(attrs={'class':'main_tresc'}) + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + for r in appendtag.findAll(attrs={'class':'str'}): + r.extract() + + def preprocess_html(self, soup): + self.append_page(soup, soup.body) + for image in soup.findAll('img'): + if 'm.jpg' in image['src']: + image['src'] = image['src'].replace('m.jpg', '.jpg') + return soup From a16669710cc090af08570e4c864baaadb120c560 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Sun, 10 Mar 2013 12:53:24 +0100 Subject: [PATCH 2/2] 2 new recipes, 2 new icons and a bunch of minor fixes --- recipes/archeowiesci.recipe | 2 +- recipes/bachormagazyn.recipe | 43 ++++++++++++++++++++++++++++++ recipes/frazpc.recipe | 1 + recipes/icons/gildia_pl.png | Bin 0 -> 3582 bytes recipes/icons/nowy_obywatel.png | Bin 0 -> 480 bytes recipes/kopalniawiedzy.recipe | 1 + recipes/kurier_galicyjski.recipe | 2 +- recipes/kurier_szczecinski.recipe | 2 +- recipes/niebezpiecznik.recipe | 2 +- recipes/nowy_obywatel.recipe | 42 +++++++++++++++++++++++++++++ recipes/swiat_obrazu.recipe | 2 +- recipes/zycie_warszawy.recipe | 26 +++++++++--------- 12 files changed, 106 insertions(+), 17 deletions(-) create mode 100644 recipes/bachormagazyn.recipe create mode 100644 recipes/icons/gildia_pl.png create mode 100755 recipes/icons/nowy_obywatel.png create mode 100644 recipes/nowy_obywatel.recipe diff --git a/recipes/archeowiesci.recipe b/recipes/archeowiesci.recipe index 57647d7469..c0fc576c9f 100644 --- a/recipes/archeowiesci.recipe +++ b/recipes/archeowiesci.recipe @@ -1,7 +1,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Archeowiesci(BasicNewsRecipe): - title = u'Archeowiesci' + title = u'Archeowieści' __author__ = 'fenuks' category = 'archeology' language = 'pl' diff --git a/recipes/bachormagazyn.recipe b/recipes/bachormagazyn.recipe new file mode 100644 index 0000000000..fb34552beb --- /dev/null +++ b/recipes/bachormagazyn.recipe @@ -0,0 +1,43 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +__license__ = 'GPL v3' +__copyright__ = u'Łukasz Grąbczewski 2013' +__version__ = '1.0' + +''' +bachormagazyn.pl +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class bachormagazyn(BasicNewsRecipe): + __author__ = u'Łukasz Grączewski' + title = u'Bachor Magazyn' + description = u'Alternatywny magazyn o alternatywach rodzicielstwa' + language = 'pl' + publisher = 'Bachor Mag.' + publication_type = 'magazine' + masthead_url = 'http://bachormagazyn.pl/wp-content/uploads/2011/10/bachor_header1.gif' + no_stylesheets = True + remove_javascript = True + use_embedded_content = False + remove_empty_feeds = True + + oldest_article = 32 #monthly +1 + max_articles_per_feed = 100 + + feeds = [ + (u'Bezradnik dla nieudacznych rodziców', u'http://bachormagazyn.pl/feed/') + ] + + keep_only_tags = [] + keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'content'})) + + remove_tags = [] + remove_tags.append(dict(attrs = {'id' : 'nav-above'})) + remove_tags.append(dict(attrs = {'id' : 'nav-below'})) + remove_tags.append(dict(attrs = {'id' : 'comments'})) + remove_tags.append(dict(attrs = {'class' : 'entry-info'})) + remove_tags.append(dict(attrs = {'class' : 'comments-link'})) + remove_tags.append(dict(attrs = {'class' : 'sharedaddy sd-sharing-enabled'})) diff --git a/recipes/frazpc.recipe b/recipes/frazpc.recipe index 2c12a58b55..7d1cb329f8 100644 --- a/recipes/frazpc.recipe +++ b/recipes/frazpc.recipe @@ -18,6 +18,7 @@ class FrazPC(BasicNewsRecipe): max_articles_per_feed = 100 use_embedded_content = False no_stylesheets = True + remove_empty_feeds = True cover_url='http://www.frazpc.pl/images/logo.png' feeds = [ (u'Aktualno\u015bci', u'http://www.frazpc.pl/feed/aktualnosci'), diff --git a/recipes/icons/gildia_pl.png b/recipes/icons/gildia_pl.png new file mode 100644 index 0000000000000000000000000000000000000000..47fec1bbe854b3369692d6c6994f60cd2c3a7af0 GIT binary patch literal 3582 zcmV|D^_ww@lRz|vCuzLs)$;-`! zo*{AqUjza0dRV*yaMRE;fKCVhpQKsoe1Yhg01=zBIT!& zC1$=TK@rP|Ibo3vKKm@PqnO#LJhq6%Ij6Hz*<$V$@wQAMN5qJ)hzm2hoGcOF60t^# zFqJFfH{#e-4l@G)6iI9sa9D{VHW4w29}?su;^hF~NC{tY+*d5%WDCTXa!E_i;d2ub z1#}&jF5T4HnnCyEWTkKf0>c0%E1Ah>(_PY1)0w;+02c53Su*0<(nUqKG_|(0G&D0Z z{i;y^b@OjZ+}lNZ8Th$p5Uu}MTtq^NHl z*T1?CO*}7&0ztZsv2j*bmJyf3G7=Z`5B*PvzoDiKdLpOAxi2$L0#SX*@cY_n(^h55xYX z#km%V()bZjV~l{*bt*u9?FT3d5g^g~#a;iSZ@&02Abxq_DwB(I|L-^bXThc7C4-yr zInE_0gw7K3GZ**7&k~>k0Z0NWkO#^@9q0fwx1%qjZ=)yBuQ3=5 z4Wo^*!gyjLF-e%Um=erBOdIALW)L%unZshS@>qSW9o8Sq#0s#5*edK%>{;v(b^`kb zN5rY%%y90wC>#%$kE_5P!JWYk;U;klcqzOl-UjcFXXA75rT9jCH~u<)0>40zCTJ7v z2qAyk54cquI@7b&LHdZ`+zlTss6bJ7%PQ)z$cROu4wBhpu-r)01)S~6}jY?%U? zgEALn#wiFzo#H}aQ8rT=DHkadR18&{>P1bW7E`~Y4p3)hWn`DhhRJ5j*2tcg9i<^O zEt(fCg;q*CP8+7ZTcWhYX$fb^_9d-LhL+6BEtPYWVlfK zTBusSTASKKb%HuWJzl+By+?gkLq)?+BTu761jmyXF)a;mc z^>(B7bo*HQ1NNg1st!zt28YLv>W*y3CdWx9U8f|cqfXDAO`Q48?auQqHZJR2&bcD4 z9Ip>EY~kKEPV6Wm+eXFV)D)_R=tM0@&p?(!V*Qu1PXHG9o^TY0bZ?)4%0 z1p8F`JoeS|<@=<@RE7GY07EYX@lwd>4oW|Yi!o+Su@M`;WuSK8LKk71XR(_ zRKHM1xJ5XYX`fk>`6eqY>qNG6HZQwBM=xi4&Sb88?zd}EYguc1@>KIS<&CX#T35dw zS|7K*XM_5Nf(;WJJvJWRMA($P>8E^?{IdL4o5MGE7bq2MEEwP7v8AO@qL5!WvekBL z-8R%V?zVyL=G&{be=K4bT`e{#t|)$A!YaA?jp;X)-+bB;zhj`(vULAW%ue3U;av{9 z4wp%n<(7@__S@Z2PA@Mif3+uO&y|X06?J#o zSi8M;ejj_^(0<4Lt#wLu#dYrva1Y$6_o(k^&}yhSh&h;f@JVA>W8b%oZ=0JGnu?n~ z9O4}sJsfnnx7n(>`H13?(iXTy*fM=I`sj`CT)*pTHEgYKqqP+u1IL8No_-(u{qS+0 z<2@%BCt82d{Gqm;(q7a7b>wu+b|!X?c13m#p7cK1({0<`{-e>4hfb-UsyQuty7Ua; zOu?B?XLHZaol8GAb3Wnxcu!2v{R_`T4=x`(GvqLI{-*2AOSimkUAw*F_TX^n z@STz9kDQ$NC=!KfXWC z8h`dn#xL(D3Z9UkR7|Q&Hcy#Notk!^zVUSB(}`#4&lYA1f0h2V_PNgUAAWQEt$#LR zcH#y9#i!p(Udq2b^lI6wp1FXzN3T;~FU%Lck$-deE#qz9yYP3D3t8{6?<+s(e(3(_ z^YOu_)K8!O1p}D#{JO;G(*OVf32;bRa{vGf6951U69E94oEQKA00(qQO+^RY0|^%! zCOVF5nE(I-SV=@dR5;5`j$KS!bpXYG|9fwH+j6_MKqnh$rA*oQs)iMLfDL3@+_DE) zm?n$v!3U$+BGDI@_#j3TjYgR>;z#0=nHeO@$Z!T(rVeo-(WwL&Sdg;8WQ_6+9sRiN zy*^CL&f7UT=a(Z|RMq4@SvP5TOAEX> zxh1Nq%5W!iY~g9lrBw#|N9q3bJhH>aoj*sh3xOhQcAQ&WSR{ksp(-TFKy#}23v;o! zH1u=|n-J7B?k1hJi9{la&8_i7brJIe6G(=tkYTrRwzU&gks+1FB1;rk?4a#~b_TEX zW65;#_g|9?{rCrakJVFkq>f*@e#5t~9J$u1HV5kSk%=-fGRoFGL3DYB%N=~r# z%u&)72zOs+X_7AYZRr~SCxy;@4;#@QLQG5oF*&FLiOP?u!81w zpbEiyXpX`Ik6@5w^1Bc-BQr?GT7rK@rqN?-w7%WV#K<&eGKy0(kx206dzZP^8)hN8 zj=wICz^+_=?+Q^+l8GmtLf$XS%{S&3sN5~%f7r@LZ}d@In!>HFkX(zfW4nsxNZ>26 z64;wbXkdcg^I?k1T`Ui;A@5h@=7mUtSxhP_-2@(U^7<>+F)iSG$cC(#XqtqOOwOIV z&DX6rc>bk)zCS&MBW)sDRMpfm8BH6M)oPkC$(jaYs5Rgs@E_JT(^G~bf=&)4?#QgvO z-87joO>UbeQ3XoNR3rg+mksWObuxelBtbR;NeBS`1)fYxS@*(OMgRZ+07*qoM6N<$ Eg2yzRJ%EZ86q|G)mXrsJCW zb?2NjGiT1sT>mBExsnCyK^#cEKE=LN6Q;|x%s#jT+5SxIFO1v8*ZXZH$9 ziAD4tN*a8dS%`$phi7bmYe=J%7R8`Y`W2W~3)9NxTxa>9RQfXm9B9Ep_L*D8)%Qv; z0KQBoqSBAqMh?!YQsiyzVC|nawt#hYfX}UC1{az_N(NX3%RJOgq*ndkk~3uvocQKv zqEm-+OE6lKs&$5;7)JQ)#gJhjksz+;36F=m4&IOJ7or~1wJ8%Sc!J{?)5nur0 WOHl({Tx{q70000