From 2822623f7e154eaa17f386157d3f5f69b4a6fa98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Sun, 18 Nov 2012 18:47:45 +0100 Subject: [PATCH 1/5] telepolis --- recipes/icons/telepolis_pl.png | Bin 0 -> 1179 bytes recipes/telepolis_pl.recipe | 67 +++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 recipes/icons/telepolis_pl.png create mode 100644 recipes/telepolis_pl.recipe diff --git a/recipes/icons/telepolis_pl.png b/recipes/icons/telepolis_pl.png new file mode 100644 index 0000000000000000000000000000000000000000..0b94658d947f4785ef1618f478fcad335ceeb748 GIT binary patch literal 1179 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`jKx9jP7LeL$-D$|*pj^6T^Rm@ z;DWu&Cj&(|3p^r=85p>QL70(Y)*K0-AbW|YuPgf<4mn|Y=H#{UQy3Unq&;06Lo5Ut z1z6ty2CAS8Fp{EumWE0@Q2h_69*(pdcPd%1830ts0F=ZgRe>gm-2jkgpdtT}6$8bv zx)$sLY)(Vg3}d4kvZkb%5#|L9rvWu}05ROZAVc;aIP{-^5#bO=ds{{(kOa{Hr1?LJ z&lp%h&Isx0MHq`P4ZEut8Btut0@VC}|G~p3#)6!NFb6*i76urq&;tyr0K-)<126)N zq-eyj9)|$B*MMGUfVlv}X+SeDT!n5f&_K8W=yIT>T!|rwO#tK!bbw6^T@BUvA8$Gt T'), + lambda match: ''), + (re.compile(r'Zobacz:.*?', re.DOTALL), + lambda match: ''), + (re.compile(r'<-ankieta.*?>'), + lambda match: ''), + (re.compile(r'\(Q\!\)'), + lambda match: ''), + (re.compile(r'\(plik.*?\)'), + lambda match: ''), + (re.compile(r'', re.DOTALL), + lambda match: '') + ] + + extra_css = '''.tb { font-weight: bold; font-size: 20px;}''' + + feeds = [ + (u'Wiadomości', u'http://www.telepolis.pl/rss/news.php'), + (u'Artykuły', u'http://www.telepolis.pl/rss/artykuly.php') + ] + + def print_version(self, url): + if 'news.php' in url: + print_url = url.replace('news.php', 'news_print.php') + else: + print_url = url.replace('artykuly.php', 'art_print.php') + return print_url + + def preprocess_html(self, soup): + for image in soup.findAll('img'): + if 'm.jpg' in image['src']: + image_big = image['src'] + image_big = image_big.replace('m.jpg', '.jpg') + image['src'] = image_big + logo = soup.find('tr') + logo.extract() + for tag in soup.findAll('tr'): + for strings in ['Wiadomość wydrukowana', 'copyright']: + if strings in self.tag_to_string(tag): + tag.extract() + return self.adeify_images(soup) From fc2023911f9e885ddaf1ec1cbd9b39c8c396267b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Sun, 18 Nov 2012 18:48:30 +0100 Subject: [PATCH 2/5] antyweb --- recipes/antyweb.recipe | 49 ++++++++++++++++++++++++++++++++++++++ recipes/icons/antyweb.png | Bin 0 -> 668 bytes 2 files changed, 49 insertions(+) create mode 100644 recipes/antyweb.recipe create mode 100644 recipes/icons/antyweb.png diff --git a/recipes/antyweb.recipe b/recipes/antyweb.recipe new file mode 100644 index 0000000000..0b8c5af9f4 --- /dev/null +++ b/recipes/antyweb.recipe @@ -0,0 +1,49 @@ +import re + +from calibre.web.feeds.news import BasicNewsRecipe + +class AntywebRecipe(BasicNewsRecipe): + encoding = 'utf-8' + __license__ = 'GPL v3' + __author__ = u'Artur Stachecki ' + language = 'pl' + version = 1 + title = u'Antyweb' + category = u'News' + description = u'Blog o internecie i nowych technologiach' + cover_url='' + remove_empty_feeds= True + auto_cleanup = False + no_stylesheets=True + use_embedded_content = False + oldest_article = 1 + max_articles_per_feed = 100 + remove_javascript = True + simultaneous_downloads = 3 + + keep_only_tags =[] + keep_only_tags.append(dict(name = 'h1', attrs = { 'class' : 'mm-article-title'})) + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'mm-article-content'})) + + + remove_tags =[] + remove_tags.append(dict(name = 'h2', attrs = {'class' : 'widgettitle'})) + remove_tags.append(dict(name = 'img', attrs = {'class' : 'alignleft'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'float: right;margin-left:1em;margin-bottom: 0.5em;padding-bottom: 3px; width: 72px;'})) + remove_tags.append(dict(name = 'img', attrs = {'src' : 'http://antyweb.pl/wp-content/uploads/2011/09/HOSTERSI_testy_pasek600x30.gif'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'podwpisowe'})) + + + extra_css = ''' + body {font-family: verdana, arial, helvetica, geneva, sans-serif ;} + ''' + + feeds = [ + (u'Artykuly', u'feed://feeds.feedburner.com/Antyweb?format=xml'), + ] + def preprocess_html(self, soup): + for alink in soup.findAll('a'): + if alink.string is not None: + tstr = alink.string + alink.replaceWith(tstr) + return soup diff --git a/recipes/icons/antyweb.png b/recipes/icons/antyweb.png new file mode 100644 index 0000000000000000000000000000000000000000..8ca9870f60072171a8822aef03f2c67ddeddc594 GIT binary patch literal 668 zcmV;N0%QG&P) zsc2A?peR)nb0v-DntK!94mXx=9em;N42SdZp7T79kW%uNr@Uzz&;TBek7dQS@rp$% zwvBUlhm*@uxxc4k+dSvJ`0Io6_ddA05glSTBU1YtX)CZ#RINix4&9NBN$r5vOq*hjFO(ZB?U;8&`h{cdV zLsu7HHa7UWv4Lt?bk59>+T0`%i%|n=fEkZtB@*0UUQ#O-{Tl?MQ54{JF2~c;6I~-C zbdQemV|khO!9l9FO=5PIYNbNy@DNp1sX7i`spJ!wk%%va>!Os)eE;^1_~ay~=`>f{ z+w@FL5$^BDYHQ^%(gJ`H2=FeMer)eJ^rsg;l zvRUHO(-e=6P=MOwqfcPAwjzLVZ!cBP<7{`AtDPM_EiF;b=c&3bLf1K4T?NbH@!|pj z)Ji38Z*HV!G&Nx~Hu7;{0mrr}Y;Ex?805pm1Wg?s>@O~&s4C9+Iq6giY1{q}N+mqU zK}acC@92<^&(BEL#VD8Yq{IWvx_qyeeU(}QLf08OIPjgQ1%q&NgIYJxt9cLtp=k)q z@+%ZVX>3F^G$5i;yq6cBAUZb2^T`Qnb2C~bf*KB^hC+W)4~Nl1A#}?^4+PKy0W{Mj z(bp%$YZ`xMGEy^548y=M3^Y}xGa42D3zK+joc{n$!0;i>Ta9r50000 Date: Sun, 18 Nov 2012 18:51:39 +0100 Subject: [PATCH 3/5] f1 ultra --- recipes/f1_ultra.recipe | 35 +++++++++++++++++++++++++++++++++++ recipes/icons/f1_ultra.png | Bin 0 -> 490 bytes 2 files changed, 35 insertions(+) create mode 100644 recipes/f1_ultra.recipe create mode 100644 recipes/icons/f1_ultra.png diff --git a/recipes/f1_ultra.recipe b/recipes/f1_ultra.recipe new file mode 100644 index 0000000000..ada82542fc --- /dev/null +++ b/recipes/f1_ultra.recipe @@ -0,0 +1,35 @@ +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class f1ultra(BasicNewsRecipe): + title = u'Formuła 1 - F1 ultra' + __license__ = 'GPL v3' + __author__ = 'MrStefan , Artur Stachecki ' + language = 'pl' + description =u'Formuła 1, Robert Kubica, F3, GP2 oraz inne serie wyścigowe.' + masthead_url='http://www.f1ultra.pl/templates/f1ultra/images/logo.gif' + remove_empty_feeds= True + oldest_article = 1 + max_articles_per_feed = 100 + remove_javascript=True + no_stylesheets=True + + keep_only_tags =[(dict(name = 'div', attrs = {'id' : 'main'}))] + remove_tags_after =[dict(attrs = {'style' : 'margin-top:5px;margin-bottom:5px;display: inline;'})] + remove_tags =[(dict(attrs = {'class' : ['buttonheading', 'avPlayerContainer', 'createdate']}))] + remove_tags.append(dict(attrs = {'title' : ['PDF', 'Drukuj', 'Email']})) + remove_tags.append(dict(name = 'form', attrs = {'method' : 'post'})) + remove_tags.append(dict(name = 'hr', attrs = {'size' : '2'})) + + preprocess_regexps = [(re.compile(r'align="left"'), lambda match: ''), + (re.compile(r'align="right"'), lambda match: ''), + (re.compile(r'width=\"*\"'), lambda match: ''), + (re.compile(r'\'), lambda match: '')] + + + extra_css = '''.contentheading { font-size: 1.4em; font-weight: bold; } + img { display: block; clear: both;} + ''' + remove_attributes = ['width','height','position','float','padding-left','padding-right','padding','text-align'] + + feeds = [(u'F1 Ultra', u'http://www.f1ultra.pl/index.php?option=com_rd_rss&id=1&Itemid=245')] diff --git a/recipes/icons/f1_ultra.png b/recipes/icons/f1_ultra.png new file mode 100644 index 0000000000000000000000000000000000000000..f45a94f53adf1189058039c877c0735420896e5f GIT binary patch literal 490 zcmV8|3!g$Vuv>%y+2U5YC| zA`@DiXqjoac-n6931U&*nNUrJ}D%ss^pm9PV<3 z)XpbbN|96z>a`k)=!bufk4PULa9*v(3h0K#V&xs4eE~OIH!KQEi}3=T(G3d#TWT~* zQ(&5;BYe1Aju*HzO@7vD;o-T}6$YQb0HE*51lq?B)NeSzgM1ztJh+bIXHO18XiJUU zt0nrUXGQzeY^jlcHreh#`~3?|HSk;)fU$obV!R;k;sD;@P`kk2W|LH8oZ{n$ zTrIrx0o>(EjO&KQKg1<}ce%jI@~PJzJ&=C`+ayT5m<;9xtkH3ougsWI~U4T(%f1Q$d(T(33k;Og`{ zN!1A9K(QDezK_dJfx2OZg2K*va2W`UCJ+>)7ncCJrt11dqv^$k>o|UWro`^q^^A4b g<-Oy?1oq Date: Sun, 18 Nov 2012 18:52:01 +0100 Subject: [PATCH 4/5] bankier.pl --- recipes/bankier_pl.recipe | 51 +++++++++++++++++++++++++++++++++++ recipes/icons/bankier_pl.png | Bin 0 -> 190 bytes 2 files changed, 51 insertions(+) create mode 100644 recipes/bankier_pl.recipe create mode 100644 recipes/icons/bankier_pl.png diff --git a/recipes/bankier_pl.recipe b/recipes/bankier_pl.recipe new file mode 100644 index 0000000000..d65b0c17ed --- /dev/null +++ b/recipes/bankier_pl.recipe @@ -0,0 +1,51 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = 'teepel ' + +''' +bankier.pl +''' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class bankier(BasicNewsRecipe): + title = u'Bankier.pl' + __author__ = 'teepel ' + language = 'pl' + description ='Polski portal finansowy. Informacje o: gospodarka, inwestowanie, finanse osobiste, prowadzenie firmy, kursy walut, notowania akcji, fundusze.' + masthead_url='http://www.bankier.pl/gfx/hd-mid-02.gif' + INDEX='http://bankier.pl/' + remove_empty_feeds= True + oldest_article = 1 + max_articles_per_feed = 100 + remove_javascript=True + no_stylesheets=True + simultaneous_downloads = 5 + + keep_only_tags =[] + keep_only_tags.append(dict(name = 'div', attrs = {'align' : 'left'})) + + remove_tags =[] + remove_tags.append(dict(name = 'table', attrs = {'cellspacing' : '2'})) + remove_tags.append(dict(name = 'div', attrs = {'align' : 'center'})) + remove_tags.append(dict(name = 'img', attrs = {'src' : '/gfx/hd-mid-02.gif'})) + #remove_tags.append(dict(name = 'a', attrs = {'target' : '_blank'})) + #remove_tags.append(dict(name = 'br', attrs = {'clear' : 'all'})) + + feeds = [ + (u'Wiadomości dnia', u'http://feeds.feedburner.com/bankier-wiadomosci-dnia'), + (u'Finanse osobiste', u'http://feeds.feedburner.com/bankier-finanse-osobiste'), + (u'Firma', u'http://feeds.feedburner.com/bankier-firma'), + (u'Giełda', u'http://feeds.feedburner.com/bankier-gielda'), + (u'Rynek walutowy', u'http://feeds.feedburner.com/bankier-rynek-walutowy'), + (u'Komunikaty ze spółek', u'http://feeds.feedburner.com/bankier-espi'), + ] + def print_version(self, url): + segment = url.split('.') + urlPart = segment[2] + segments = urlPart.split('-') + urlPart2 = segments[-1] + return 'http://www.bankier.pl/wiadomosci/print.html?article_id=' + urlPart2 + \ No newline at end of file diff --git a/recipes/icons/bankier_pl.png b/recipes/icons/bankier_pl.png new file mode 100644 index 0000000000000000000000000000000000000000..c26f006a57cf1532311ef04ae4a2c42cbfeff33f GIT binary patch literal 190 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!73?$#)eFPGa4)6(a1=3&QSW?_tNldzopr0Nr>tNdN!< literal 0 HcmV?d00001 From 1b637e7f15e6220d3d3ad4ff5a283e518758e954 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Sun, 18 Nov 2012 18:52:19 +0100 Subject: [PATCH 5/5] myapple --- recipes/icons/myapple_pl.png | Bin 0 -> 1176 bytes recipes/myapple_pl.recipe | 50 +++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 recipes/icons/myapple_pl.png create mode 100644 recipes/myapple_pl.recipe diff --git a/recipes/icons/myapple_pl.png b/recipes/icons/myapple_pl.png new file mode 100644 index 0000000000000000000000000000000000000000..a68cf4e7efc3c395730cf255545eb990e9899993 GIT binary patch literal 1176 zcma))YfO_@7{?DH5$Ri6CeQ|uAp}^4Y;EPXru0T{FO*s;g`yCy1qv1zb~1=^al@b{ z(*-t3#JKeeEw`6a!E(P8Au1s1bdyChV73k06!@?i27B2)ZJ&0M=lst9g@o0LIS{<0|2!Z076BZHH8Miz5`-mDi=+-v2^i^yNayNh&&`l zHOQ8A9gXYt0uE3$mdd_?&ycZ^7iv{c-0tq~q;sUi9RGUmVTm;%r{@FZfZ*dnfk5Y{ zuyC?FKfGcqPVNZ)da*Xk607Wusk7w?G=XJ9aM=K?7~nK7)tsBiJZlwCIR_ru=emBp znc4Mz&8Y0Nse)^>CB;b8sV<7Fmz3B+Wa)4cQ%I4S)_6O*dMr(7Jj~YMlMKYn9tvzE z!X5q$EhIJtr*wv;bwxDTa@cB*crC=&5em$-`mvMw{nXMy!QA?UK<~@^7UCHQsm9Qa zjo+GVCH!_`dQW84on*d&02>43{q*HGGfFF;r}sHGeX7h%MT_$czS8dSsv)VcBT#G# zY8=lM>fdE(arynUsWl6;4VR$vDe8&se0=qUDV*C*%<79}-NdogUd8=%r{$HS;#>0UJj0}@nUIk3=KA^Js?5vP1j)Adc43$p*;oy5fz6vM0aE+6V|x!iRx0sPeonm$ z51SmRJRC;6o{@h1#rWcEtJ|bJnjC%XR-B@*G1R|xVLn$LZ=3n9%sAp2-n3x*#UAzG zd)pJGb?5-@+QPk8j_9f9cb_iQ@qjBs%nTwGmsB+66hFKF+WkmSb{wT7O$FT&oL*l4 zeSgX~^|HLBwpH&pkNfY;JKkZA7Vv%^?mXSmw)_WXV?)=lkK)#dxpFxv@t*Hj>9ZHs zTTGe5TBEhl(f0$jx>w`vyuOf)R7w-_n}esHBt9@bJ*rSBOVcmDP1I@3rXfSR&#$GH z(Vc@%$Q^)|X4=b#J*E)!wV^7mOjVGpl2h|5A-IO?&Oe+=EshCb8)V6KSM&Ca>-7Y2LPCjbBd literal 0 HcmV?d00001 diff --git a/recipes/myapple_pl.recipe b/recipes/myapple_pl.recipe new file mode 100644 index 0000000000..eee333012c --- /dev/null +++ b/recipes/myapple_pl.recipe @@ -0,0 +1,50 @@ +import re + +from calibre.web.feeds.news import BasicNewsRecipe + +class MyAppleRecipe(BasicNewsRecipe): + __license__ = 'GPL v3' + __author__ = u'Artur Stachecki ' + language = 'pl' + version = 1 + + title = u'MyApple.pl' + category = u'News' + description = u' Największy w Polsce serwis zajmujący się tematyką związaną z Apple i wszelkimi produktami tej firmy.' + cover_url='' + remove_empty_feeds= True + no_stylesheets=True + oldest_article = 7 + max_articles_per_feed = 100000 + recursions = 0 + + no_stylesheets = True + remove_javascript = True + simultaneous_downloads = 3 + + keep_only_tags =[] + keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'article_content'})) + + remove_tags =[] + remove_tags.append(dict(name = 'div', attrs = {'class' : 'article_author_date_comment_container'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'fullwidth'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'cmslinks'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'googleads-468'})) + remove_tags.append(dict(name = 'div', attrs = {'id' : 'comments'})) + + + extra_css = ''' + body {font-family: verdana, arial, helvetica, geneva, sans-serif ;} + td.contentheading{font-size: large; font-weight: bold;} + ''' + + feeds = [ + ('News', 'feed://myapple.pl/external.php?do=rss&type=newcontent§ionid=1&days=120&count=10'), + ] + + def preprocess_html(self, soup): + for alink in soup.findAll('a'): + if alink.string is not None: + tstr = alink.string + alink.replaceWith(tstr) + return soup \ No newline at end of file