From 98b7cd3e4b6e86152a18762b1654f79915d5c67e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Fri, 9 Nov 2012 00:05:46 +0100 Subject: [PATCH] add some recipes and icons from kalibrator, part 1 --- recipes/icons/mateusz_czytania.png | Bin 0 -> 1087 bytes recipes/icons/rushisaband.png | Bin 0 -> 965 bytes recipes/icons/rynek_infrastruktury.png | Bin 0 -> 820 bytes recipes/icons/rynek_kolejowy.png | Bin 0 -> 330 bytes recipes/icons/satkurier.png | Bin 0 -> 1179 bytes recipes/kerrang.recipe | 36 +++++++++++++++ recipes/lequipe.recipe | 46 +++++++++++++++++++ recipes/mateusz_czytania.recipe | 37 +++++++++++++++ recipes/naszdziennik.recipe | 61 +++++++++++++++++++++++++ recipes/rushisaband.recipe | 29 ++++++++++++ recipes/rynek_infrastruktury.recipe | 42 +++++++++++++++++ recipes/rynek_kolejowy.recipe | 41 +++++++++++++++++ recipes/satkurier.recipe | 49 ++++++++++++++++++++ 13 files changed, 341 insertions(+) create mode 100644 recipes/icons/mateusz_czytania.png create mode 100644 recipes/icons/rushisaband.png create mode 100644 recipes/icons/rynek_infrastruktury.png create mode 100644 recipes/icons/rynek_kolejowy.png create mode 100644 recipes/icons/satkurier.png create mode 100644 recipes/kerrang.recipe create mode 100644 recipes/lequipe.recipe create mode 100644 recipes/mateusz_czytania.recipe create mode 100644 recipes/naszdziennik.recipe create mode 100644 recipes/rushisaband.recipe create mode 100644 recipes/rynek_infrastruktury.recipe create mode 100644 recipes/rynek_kolejowy.recipe create mode 100644 recipes/satkurier.recipe diff --git a/recipes/icons/mateusz_czytania.png b/recipes/icons/mateusz_czytania.png new file mode 100644 index 0000000000000000000000000000000000000000..75681394336c51c8bf08410c6b7bbc545ad8a1d7 GIT binary patch literal 1087 zcmV-F1i<@=P)(_`g8%^e{{R4h=>PzAFaQARU;qF*m;eA5Z<1fd zMgRZ<08mU+MF0Q*{LVc5=A8WAeEjy@{Kh2w`t1Dbto+hU{Kgdg{`~yzy!_Tw{K5?U z+HCyFEd27!{NjxKz6AWvH2mh2{PMi~-gNx-)cnRA{MKCj`sDoTr2O7&{PxWJ(nS2i z6#V-1{L)nX$~64ql>Gkm{O+{;+I0NVJpA(1{OX+i!W{h8O#H$G{PyJh=A``EWcK0ZG`KR`f0K|w)6 zLPA4BLqtSGMMXtMMn*?RM@UFWNl8gcN=i#hOH52mO-)TsPEJoxPf$=$QBhG+Qc_b> zQ&dz`RaI41R#sP6S6EnBSy@?HT3TCMTU=aRU0q#XUS3~cUtnNhVPRonVq#-sV`OAx zWo2b%W@cw+XJ}|>X=!O{YHDk1Yiw+6ZEbCCZf7mzbEC znVFfInwp!No1C1Sot>SYo}QndpP-Ll?si~=| zs;aB2tE{Z7t*x!DuCA}IuduMNv9YnTva++Yv$V9dwY9ajwzjvox45{txw*Nzy1Ki& zyS%)-y}iA@zP`V|zreu2!NI}8!otJD!^FhI#l^+O#>U6T$H>UY$;rve%F4^j%goHo z&CSiu&d$%z&(P4&(b3V;($dq@)6~?|)z#J3*4Ee8*Vx$D+1c6J+S=RO+uYpT-QC^Z z-rnEe-{9cj;o;%p;^O1ulq(=H}<;=jiC@>FMd}>gwz3>+J08?d|RE?(XmJ z@9^;O@$vEU^78ZZ^Yrxe_4W1k_V)Mp_xSku`T6@-$B5b6iLau2OGb>SafM zr=>iO6FUFBEVtK&rAnb|?8D=PDwfhr6Q4`;wNxy1%9Ec-$DD&p+f9`nA*F~URWgm-l5?h@{^<%B{KD)#7ioHV?O*+wWqBz>$*0O!gO# z$k#M5=kIs?{I5N6#v{WGnwqgMU%a`u`-#JB$I!P~3(8C{?CiQKe{DtbMK{%JMlaeA zvrp00^_+BP!+HZI7M8Fa#>1U#+vi$!b_*-L=gY=cQdYE6>&c6#Xt+ zJ@d-+mYlhCMXBfl!GHVZrb!Cvp$ntdz7w|@A?=@}H5QWUwWAj4UNQ`{l4 zYEcyL+a0rgpL>K^zu^;N+m`V11kdI7dycMicGoQCINadGCOyU3CsE?Pv-bu=yK`4& z`p@|F%jD+k^#}dTO>lSG z^B;c?fB*1QV7j==PX=q29^tin63j20k@4$uWNJOVQCWG*y_$LVeU8mr;a2m?cwOyx z-N4N!FNoh@QsD0};&15SG!${zD`xybwcPF(OVu|^vF@IgA#3LRnNsvqf5XFVju9IS z!k!;ra_W)CTp`AUr<{-261N`x7v6ud@`>WJ6dKW%;pyCP42C5~l z5hck*sfi`2x+y?{!N|bSP}k5%*T^u$(89{t(#p_S+rZMwz`#AmtO%wdH$Npat&)HS TiMI}JKn)C@u6{1-oD!M(_`g8%^e{{R4h=>PzAFaQARU;qF*m;eA5Z<1fd zMgRZ;x=BPqRCwBSlU-=lbr{D#&+qp?|DCfu&1B@;az^Qd=8d94FJLzkUD`#^O~o)H zx{&Tdgvy&Px+sVeMiQYeh@i4jSxRlS=BPF;AsL~~R))uU*xGsdy**u=oKEezd2XKP z!}odlimDgbgT%NZKF*s*r#L`tTAm{y%mC&!Th#D}jFC;*CY-7LC0q zCy2U+pf3~j3xW=i_zZXoSzIitiX5KlFi7GEiJ^rN zOfCml}C%mJA?yZ2-^EZu{(|*`4c$ zPSTa9=bf=5^YG;AbLU@QTB%HoZQasQyDh(ilpfHa8y5gXEbtXZCS$!hIB{-f>W^~U zPFPuhj$Ux?-72dP+TRmjq$cqzdlwPJ>6FL ztMaGj*%@!Gxn4mD_>`o5gme+-enGX9%=YI)KS=4jpH^p=tB+_QdnIIcp9X)7;D>>M z79iLT)-WZ40^ y!9F*;u|+;gN<+l>vY5XSM}iDj@;#6qks@<{|cQ&{8L9H2pZK0+_1WL1l}wBM$xhH(n5voDF4hAQ@5*u8Jh?f86~Y`YNS;Rk`BXK$m-rX3Lv%!P zIq()n$oFuI)p39>-f&oS7CH!xtb!Whs|2twn|+fw&l}l6I3@Im>P1n%KnK^DCDcm* z(0U?jW1}S2Be};7(fR}cBoEj_vt$e;cQ_#4DgZ)bMDd3 z5rK(56!b&V4-^r){*lp-87pa}<*142s@wfe-DNt%z2`gs_ndp@{Go~xS4?sY5xM-O zMwQ??R)9r}iOjwLb8K3gm+`{r z2y^(^^>wsterK6CBnWie3a>0K@_Ku-ISk#v=@SthaQEJ=5!vdirqms$%<^jbIr5I) zx3|*Ln;uTQlD98X1sGLfmo#vey5N=MAD#3~fTK+OhHn)q9YIp1WDsk-PS(B!oK43fAvyvT0M@ICxYX0CJ z;ea3zdd9Kwv%!u+AQ`9N*&>0Qh5Re$KYw6Hw1%00q)xVgnE!5pz5Z>of>kH6)pj*0 T%hpuidjU3oafQ)YbmZ(GTYFKj literal 0 HcmV?d00001 diff --git a/recipes/kerrang.recipe b/recipes/kerrang.recipe new file mode 100644 index 0000000000..e9db9d886e --- /dev/null +++ b/recipes/kerrang.recipe @@ -0,0 +1,36 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + + +class kerrang(BasicNewsRecipe): + title = u'Kerrang!' + __author__ = 'Artur Stachecki ' + language = 'en' + description = u'UK-based magazine devoted to rock music published by Bauer Media Group' + oldest_article = 7 + masthead_url = 'http://images.kerrang.com/design/kerrang/kerrangsite/logo.gif' + max_articles_per_feed = 100 + simultaneous_downloads = 5 + remove_javascript = True + no_stylesheets = True + use_embedded_content = False + recursions = 0 + + keep_only_tags = [] + keep_only_tags.append(dict(attrs = {'class' : ['headz', 'blktxt']})) + + extra_css = ''' img { display: block; margin-right: auto;} + h1 {text-align: left; font-size: 22px;}''' + + feeds = [(u'News', u'http://www.kerrang.com/blog/rss.xml')] + + def preprocess_html(self, soup): + for alink in soup.findAll('a'): + if alink.string is not None: + tstr = alink.string + alink.replaceWith(tstr) + return soup \ No newline at end of file diff --git a/recipes/lequipe.recipe b/recipes/lequipe.recipe new file mode 100644 index 0000000000..edbc81441b --- /dev/null +++ b/recipes/lequipe.recipe @@ -0,0 +1,46 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + + +class leequipe(BasicNewsRecipe): + title = u'l\'equipe' + __author__ = 'Artur Stachecki ' + language = 'fr' + description = u'Retrouvez tout le sport en direct sur le site de L\'EQUIPE et suivez l\'actualité du football, rugby, basket, cyclisme, f1, volley, hand, tous les résultats sportifs' + oldest_article = 1 + masthead_url = 'http://static.lequipe.fr/v6/img/logo-lequipe.png' + max_articles_per_feed = 100 + simultaneous_downloads = 5 + remove_javascript = True + no_stylesheets = True + use_embedded_content = False + recursions = 0 + + keep_only_tags = [] + keep_only_tags.append(dict(attrs={'id': ['article']})) + + remove_tags = [] + remove_tags.append(dict(attrs={'id': ['partage', 'ensavoirplus', 'bloc_bas_breve', 'commentaires', 'tools']})) + remove_tags.append(dict(attrs={'class': ['partage_bis', 'date']})) + + feeds = [(u'Football', u'http://www.lequipe.fr/rss/actu_rss_Football.xml'), + (u'Auto-Moto', u'http://www.lequipe.fr/rss/actu_rss_Auto-Moto.xml'), + (u'Tennis', u'http://www.lequipe.fr/rss/actu_rss_Tennis.xml'), + (u'Golf', u'http://www.lequipe.fr/rss/actu_rss_Golf.xml'), + (u'Rugby', u'http://www.lequipe.fr/rss/actu_rss_Rugby.xml'), + (u'Basket', u'http://www.lequipe.fr/rss/actu_rss_Basket.xml'), + (u'Hand', u'http://www.lequipe.fr/rss/actu_rss_Hand.xml'), + (u'Cyclisme', u'http://www.lequipe.fr/rss/actu_rss_Cyclisme.xml'), + (u'Autres Sports', u'http://pipes.yahoo.com/pipes/pipe.run?_id=2039f7f4f350c70c5e4e8633aa1b37cd&_render=rss') + ] + + def preprocess_html(self, soup): + for alink in soup.findAll('a'): + if alink.string is not None: + tstr = alink.string + alink.replaceWith(tstr) + return soup diff --git a/recipes/mateusz_czytania.recipe b/recipes/mateusz_czytania.recipe new file mode 100644 index 0000000000..86702c2107 --- /dev/null +++ b/recipes/mateusz_czytania.recipe @@ -0,0 +1,37 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = 'teepel ' + +''' +http://www.mateusz.pl/czytania +''' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class czytania_mateusz(BasicNewsRecipe): + title = u'Czytania na ka\u017cdy dzie\u0144' + __author__ = 'teepel ' + description = u'Codzienne czytania z jednego z najstarszych polskich serwisów katolickich.' + language = 'pl' + INDEX='http://www.mateusz.pl/czytania' + oldest_article = 1 + remove_empty_feeds= True + no_stylesheets=True + auto_cleanup = True + remove_javascript = True + simultaneous_downloads = 2 + max_articles_per_feed = 100 + auto_cleanup = True + + feeds = [(u'Czytania', u'http://mateusz.pl/rss/czytania/')] + + remove_tags =[] + remove_tags.append(dict(name = 'p', attrs = {'class' : 'top'})) + + #thanks t3d + def get_article_url(self, article): + link = article.get('link') + if 'kmt.pl' not in link: + return link diff --git a/recipes/naszdziennik.recipe b/recipes/naszdziennik.recipe new file mode 100644 index 0000000000..4c7b78c199 --- /dev/null +++ b/recipes/naszdziennik.recipe @@ -0,0 +1,61 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' + +from calibre.web.feeds.news import BasicNewsRecipe + +class naszdziennik(BasicNewsRecipe): + title = u'Nasz Dziennik' + __author__ = 'Artur Stachecki ' + language = 'pl' + description =u'Nasz Dziennik - Ogólnopolska gazeta codzienna. Podejmuje tematykę dotyczącą życia społecznego, kulturalnego, politycznego i religijnego. Propaguje wartości chrześcijańskie oraz tradycję i kulturę polską.' + masthead_url='http://www.naszdziennik.pl/images/logo-male.png' + max_articles_per_feed = 100 + remove_javascript=True + no_stylesheets = True + + keep_only_tags =[dict(attrs = {'id' : 'article'})] + + #definiujemy nową funkcje; musi zwracać listę feedów wraz z artykułami + def parse_index(self): + #adres do parsowania artykułów + soup = self.index_to_soup('http://www.naszdziennik.pl/news') + #deklaracja pustej listy feedów + feeds = [] + #deklaracja pustego słownika artykułów + articles = {} + #deklaracja pustej listy sekcji + sections = [] + #deklaracja pierwszej sekcji jako pusty string + section = '' + + #pętla for, która analizuje po kolei każdy tag "news-article" + for item in soup.findAll(attrs = {'class' : 'news-article'}) : + #w tagu "news-article szukamy pierwszego taga h4" + section = item.find('h4') + #zmiennej sekcja przypisujemy zawartość tekstową taga + section = self.tag_to_string(section) + #sprawdzamy czy w słowniku artykułów istnieje klucz dotyczący sekcji + #jeśli nie istnieje to : + if not articles.has_key(section) : + #do listy sekcji dodajemy nową sekcje + sections.append(section) + #deklarujemy nową sekcje w słowniku artykułów przypisując jej klucz odpowiadający nowej sekcji, którego wartością jest pusta lista + articles[section] = [] + #przeszukujemy kolejny tag "title-datetime" + article_title_datetime = item.find(attrs = {'class' : 'title-datetime'}) + #w tagu title-datetime znajdujemy pierwszy link + article_a = article_title_datetime.find('a') + #i tworzymy z niego link absolutny do właściwego artykułu + article_url = 'http://naszdziennik.pl' + article_a['href'] + #jako tytuł użyty będzie tekst pomiędzy tagami + article_title = self.tag_to_string(article_a) + #a data będzie tekstem z pierwszego taga h4 znalezionego w tagu title-datetime + article_date = self.tag_to_string(article_title_datetime.find('h4')) + #zebrane elementy dodajemy do listy zadeklarowanej w linijce 44 + articles[section].append( { 'title' : article_title, 'url' : article_url, 'date' : article_date }) + #po dodaniu wszystkich artykułów dodajemy sekcje do listy feedów, korzystając z list sekcji znajdujących się w słowniku + for section in sections: + feeds.append((section, articles[section])) + #zwracamy listę feedów, której parsowaniem zajmie się calibre + return feeds \ No newline at end of file diff --git a/recipes/rushisaband.recipe b/recipes/rushisaband.recipe new file mode 100644 index 0000000000..2e18903389 --- /dev/null +++ b/recipes/rushisaband.recipe @@ -0,0 +1,29 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = 'MrStefan ' + +''' +www.rushisaband.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class rushisaband(BasicNewsRecipe): + title = u'Rushisaband' + __author__ = 'MrStefan ' + language = 'en' + description =u'A blog devoted to the band RUSH and its members, Neil Peart, Geddy Lee and Alex Lifeson' + remove_empty_feeds= True + oldest_article = 7 + max_articles_per_feed = 100 + remove_javascript=True + no_stylesheets=True + + keep_only_tags =[] + keep_only_tags.append(dict(name = 'h4')) + keep_only_tags.append(dict(name = 'h5')) + keep_only_tags.append(dict(name = 'p')) + + feeds = [(u'Rush is a Band', u'http://feeds2.feedburner.com/rushisaband/blog')] \ No newline at end of file diff --git a/recipes/rynek_infrastruktury.recipe b/recipes/rynek_infrastruktury.recipe new file mode 100644 index 0000000000..f84f225579 --- /dev/null +++ b/recipes/rynek_infrastruktury.recipe @@ -0,0 +1,42 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = 'teepel ' + +''' +http://www.rynekinfrastruktury.pl +''' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class prawica_recipe(BasicNewsRecipe): + title = u'Rynek Infrastruktury' + __author__ = 'teepel ' + language = 'pl' + description =u'Portal "Rynek Infrastruktury" to źródło informacji o kluczowych elementach polskiej gospodarki: drogach, kolei, lotniskach, portach, telekomunikacji, energetyce, prawie i polityce, wzmocnione eksperckimi komentarzami kluczowych analityków.' + remove_empty_feeds= True + oldest_article = 1 + max_articles_per_feed = 100 + remove_javascript=True + no_stylesheets=True + + feeds = [ + (u'Drogi', u'http://www.rynekinfrastruktury.pl/rss/41'), + (u'Lotniska', u'http://www.rynekinfrastruktury.pl/rss/42'), + (u'Kolej', u'http://www.rynekinfrastruktury.pl/rss/37'), + (u'Energetyka', u'http://www.rynekinfrastruktury.pl/rss/30'), + (u'Telekomunikacja', u'http://www.rynekinfrastruktury.pl/rss/31'), + (u'Porty', u'http://www.rynekinfrastruktury.pl/rss/32'), + (u'Prawo i polityka', u'http://www.rynekinfrastruktury.pl/rss/47'), + (u'Komentarze', u'http://www.rynekinfrastruktury.pl/rss/38'), + ] + + keep_only_tags =[] + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'articleContent'})) + + remove_tags =[] + remove_tags.append(dict(name = 'span', attrs = {'class' : 'date'})) + + def print_version(self, url): + return url.replace('http://www.rynekinfrastruktury.pl/artykul/', 'http://www.rynekinfrastruktury.pl/artykul/drukuj/') diff --git a/recipes/rynek_kolejowy.recipe b/recipes/rynek_kolejowy.recipe new file mode 100644 index 0000000000..5a3e9218c8 --- /dev/null +++ b/recipes/rynek_kolejowy.recipe @@ -0,0 +1,41 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = 'teepel ' + +''' +rynek-kolejowy.pl +''' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class rynek_kolejowy(BasicNewsRecipe): + title = u'Rynek Kolejowy' + __author__ = 'teepel ' + language = 'pl' + description =u'Rynek Kolejowy - kalendarium wydarzeń branży kolejowej, konferencje, sympozja, targi kolejowe, krajowe i zagraniczne.' + masthead_url='http://p.wnp.pl/images/i/partners/rynek_kolejowy.gif' + remove_empty_feeds= True + oldest_article = 1 + max_articles_per_feed = 100 + remove_javascript=True + no_stylesheets=True + + keep_only_tags =[] + keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'mainContent'})) + + remove_tags =[] + remove_tags.append(dict(name = 'div', attrs = {'class' : 'right no-print'})) + remove_tags.append(dict(name = 'div', attrs = {'id' : 'font-size'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'no-print'})) + + extra_css = '''.wiadomosc_title{ font-size: 1.4em; font-weight: bold; }''' + + feeds = [(u'Wiadomości', u'http://www.rynek-kolejowy.pl/rss/rss.php')] + + def print_version(self, url): + segment = url.split('/') + urlPart = segment[3] + return 'http://www.rynek-kolejowy.pl/drukuj.php?id=' + urlPart + \ No newline at end of file diff --git a/recipes/satkurier.recipe b/recipes/satkurier.recipe new file mode 100644 index 0000000000..a4b4e78eec --- /dev/null +++ b/recipes/satkurier.recipe @@ -0,0 +1,49 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + + +class SATKurier(BasicNewsRecipe): + title = u'SATKurier.pl' + __author__ = 'Artur Stachecki ' + language = 'pl' + description = u'Największy i najstarszy serwis poświęcony\ + telewizji cyfrowej, przygotowywany przez wydawcę\ + miesięcznika SAT Kurier. Bieżące wydarzenia\ + z rynku mediów i nowych technologii.' + oldest_article = 7 + masthead_url = 'http://satkurier.pl/img/header_sk_logo.gif' + max_articles_per_feed = 100 + simultaneous_downloads = 5 + remove_javascript = True + no_stylesheets = True + + keep_only_tags = [] + keep_only_tags.append(dict(name='div', attrs={'id': ['single_news', 'content']})) + + remove_tags = [] + remove_tags.append(dict(attrs={'id': ['news_info', 'comments']})) + remove_tags.append(dict(attrs={'href': '#czytaj'})) + remove_tags.append(dict(attrs={'align': 'center'})) + remove_tags.append(dict(attrs={'class': ['date', 'category', 'right mini-add-comment', 'socialLinks', 'commentlist']})) + + remove_tags_after = [(dict(id='entry'))] + + feeds = [(u'Najnowsze wiadomości', u'http://feeds.feedburner.com/satkurierpl?format=xml'), + (u'Sport w telewizji', u'http://feeds.feedburner.com/satkurier/sport?format=xml'), + (u'Blog', u'http://feeds.feedburner.com/satkurier/blog?format=xml')] + + def preprocess_html(self, soup): + image = soup.find(attrs={'id': 'news_mini_photo'}) + if image: + image.extract() + header = soup.find('h1') + header.replaceWith(header.prettify() + image.prettify()) + for alink in soup.findAll('a'): + if alink.string is not None: + tstr = alink.string + alink.replaceWith(tstr) + return soup \ No newline at end of file