From 84f18e6db9e10dd6820d805b4e05c4e272c055cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Sat, 10 Nov 2012 11:51:21 +0100 Subject: [PATCH] one english recipe, two polish ones and some icons --- .bzrignore | 42 ++++++++++ recipes/autosport.recipe | 31 ++++++++ recipes/blognexto.recipe | 29 +++++++ recipes/brewiarz.recipe | 141 +++++++++++++++++++++++++++++++++ recipes/dobreprogamy.recipe | 3 +- recipes/icons/autosport.png | Bin 0 -> 415 bytes recipes/icons/blognexto.png | Bin 0 -> 699 bytes recipes/icons/brewiarz.png | Bin 0 -> 982 bytes recipes/icons/naszdziennik.png | Bin 0 -> 698 bytes recipes/icons/wprost.png | Bin 0 -> 1727 bytes 10 files changed, 244 insertions(+), 2 deletions(-) create mode 100644 recipes/autosport.recipe create mode 100644 recipes/blognexto.recipe create mode 100644 recipes/brewiarz.recipe create mode 100644 recipes/icons/autosport.png create mode 100644 recipes/icons/blognexto.png create mode 100644 recipes/icons/brewiarz.png create mode 100644 recipes/icons/naszdziennik.png create mode 100644 recipes/icons/wprost.png diff --git a/.bzrignore b/.bzrignore index b0b87a34e6..f14ff947f6 100644 --- a/.bzrignore +++ b/.bzrignore @@ -39,3 +39,45 @@ recipes/.git recipes/.gitignore recipes/README recipes/katalog_egazeciarz.recipe +recipes/tv_axnscifi.recipe +recipes/tv_comedycentral.recipe +recipes/tv_discoveryscience.recipe +recipes/tv_foxlife.recipe +recipes/tv_fox.recipe +recipes/tv_hbo.recipe +recipes/tv_kinopolska.recipe +recipes/tv_nationalgeographic.recipe +recipes/tv_polsat2.recipe +recipes/tv_polsat.recipe +recipes/tv_tv4.recipe +recipes/tv_tvn7.recipe +recipes/tv_tvn.recipe +recipes/tv_tvp1.recipe +recipes/tv_tvp2.recipe +recipes/tv_tvphd.recipe +recipes/tv_tvphistoria.recipe +recipes/tv_tvpkultura.recipe +recipes/tv_tvppolonia.recipe +recipes/tv_tvpuls.recipe +recipes/tv_viasathistory.recipe +recipes/icons/tv_axnscifi.png +recipes/icons/tv_comedycentral.png +recipes/icons/tv_discoveryscience.png +recipes/icons/tv_foxlife.png +recipes/icons/tv_fox.png +recipes/icons/tv_hbo.png +recipes/icons/tv_kinopolska.png +recipes/icons/tv_nationalgeographic.png +recipes/icons/tv_polsat2.png +recipes/icons/tv_polsat.png +recipes/icons/tv_tv4.png +recipes/icons/tv_tvn7.png +recipes/icons/tv_tvn.png +recipes/icons/tv_tvp1.png +recipes/icons/tv_tvp2.png +recipes/icons/tv_tvphd.png +recipes/icons/tv_tvphistoria.png +recipes/icons/tv_tvpkultura.png +recipes/icons/tv_tvppolonia.png +recipes/icons/tv_tvpuls.png +recipes/icons/tv_viasathistory.png diff --git a/recipes/autosport.recipe b/recipes/autosport.recipe new file mode 100644 index 0000000000..0b642abe8f --- /dev/null +++ b/recipes/autosport.recipe @@ -0,0 +1,31 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = 'MrStefan ' + +''' +www.autosport.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class autosport(BasicNewsRecipe): + title = u'Autosport' + __author__ = 'MrStefan ' + language = 'en_GB' + description =u'Daily Formula 1 and motorsport news from the leading weekly motor racing magazine. The authority on Formula 1, F1, MotoGP, GP2, Champ Car, Le Mans...' + masthead_url='http://cdn.images.autosport.com/asdotcom.gif' + remove_empty_feeds= True + oldest_article = 1 + max_articles_per_feed = 100 + remove_javascript=True + no_stylesheets=True + + keep_only_tags =[] + keep_only_tags.append(dict(name = 'h1', attrs = {'class' : 'news_headline'})) + keep_only_tags.append(dict(name = 'td', attrs = {'class' : 'news_article_author'})) + keep_only_tags.append(dict(name = 'td', attrs = {'class' : 'news_article_date'})) + keep_only_tags.append(dict(name = 'p')) + + feeds = [(u'ALL NEWS', u'http://www.autosport.com/rss/allnews.xml')] \ No newline at end of file diff --git a/recipes/blognexto.recipe b/recipes/blognexto.recipe new file mode 100644 index 0000000000..76f413bb69 --- /dev/null +++ b/recipes/blognexto.recipe @@ -0,0 +1,29 @@ +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class blognexto(BasicNewsRecipe): + title = 'BLOG.NEXTO.pl' + __author__ = 'MrStefan ' + language = 'pl' + description ='o e-publikacjach prawie wszystko' + masthead_url='http://blog.nexto.pl/wp-content/uploads/2012/04/logo-blog-nexto.pl_.jpg' + remove_empty_feeds= True + oldest_article = 7 + max_articles_per_feed = 100 + remove_javascript=True + no_stylesheets=True + + + keep_only_tags =[] + keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'content'})) + + remove_tags =[] + remove_tags.append(dict(name = 'div', attrs = {'class' : 'comment-cloud'})) + remove_tags.append(dict(name = 'p', attrs = {'class' : 'post-date1'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'fb-like'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'tags'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'postnavi'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'commments-box'})) + remove_tags.append(dict(name = 'div', attrs = {'id' : 'respond'})) + + feeds = [('Artykuly', 'http://feeds.feedburner.com/blognexto')] diff --git a/recipes/brewiarz.recipe b/recipes/brewiarz.recipe new file mode 100644 index 0000000000..81f3dc93ae --- /dev/null +++ b/recipes/brewiarz.recipe @@ -0,0 +1,141 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' + +from calibre.web.feeds.news import BasicNewsRecipe +import datetime +from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, Tag + + +class brewiarz(BasicNewsRecipe): + title = u'Brewiarz' + __author__ = 'Artur Stachecki ' + language = 'pl' + description = u'Serwis poświęcony Liturgii Godzin (brewiarzowi) - formie codziennej modlitwy Kościoła katolickiego.' + masthead_url = 'http://brewiarz.pl/images/logo2.gif' + max_articles_per_feed = 100 + remove_javascript = True + no_stylesheets = True + publication_type = 'newspaper' + next_days = 1 + + def parse_index(self): + dec2rom_dict = {"01": "i", "02": "ii", "03": "iii", "04": "iv", + "05": "v", "06": "vi", "07": "vii", "08": "viii", + "09": "ix", "10": "x", "11": "xi", "12": "xii"} + + weekday_dict = {"Sunday": "Niedziela", "Monday": "Poniedziałek", "Tuesday": "Wtorek", + "Wednesday": "Środa", "Thursday": "Czwartek", "Friday": "Piątek", "Saturday": "Sobota"} + + now = datetime.datetime.now() + + feeds = [] + for i in range(0, self.next_days): + url_date = now + datetime.timedelta(days=i) + url_date_month = url_date.strftime("%m") + url_date_month_roman = dec2rom_dict[url_date_month] + url_date_day = url_date.strftime("%d") + url_date_year = url_date.strftime("%Y")[2:] + url_date_weekday = url_date.strftime("%A") + url_date_weekday_pl = weekday_dict[url_date_weekday] + + url = "http://brewiarz.pl/" + url_date_month_roman + "_" + url_date_year + "/" + url_date_day + url_date_month + "/index.php3" + articles = self.parse_pages(url) + if articles: + title = url_date_weekday_pl + " " + url_date_day + "." + url_date_month + "." + url_date_year + feeds.append((title, articles)) + else: + sectors = self.get_sectors(url) + for subpage in sectors: + title = url_date_weekday_pl + " " + url_date_day + "." + url_date_month + "." + url_date_year + " - " + subpage.string + url = "http://brewiarz.pl/" + url_date_month_roman + "_" + url_date_year + "/" + url_date_day + url_date_month + "/" + subpage['href'] + print(url) + articles = self.parse_pages(url) + if articles: + feeds.append((title, articles)) + return feeds + + def get_sectors(self, url): + sectors = [] + soup = self.index_to_soup(url) + sectors_table = soup.find(name='table', attrs={'width': '490'}) + sector_links = sectors_table.findAll(name='a') + for sector_links_modified in sector_links: + link_parent_text = sector_links_modified.findParent(name='div').text + if link_parent_text: + sector_links_modified.text = link_parent_text.text + sectors.append(sector_links_modified) + return sectors + + def parse_pages(self, url): + current_articles = [] + soup = self.index_to_soup(url) + www = soup.find(attrs={'class': 'www'}) + if www: + box_title = www.find(text='Teksty LG') + article_box_parent = box_title.findParent('ul') + article_box_sibling = article_box_parent.findNextSibling('ul') + for li in article_box_sibling.findAll('li'): + link = li.find(name='a') + ol = link.findNextSibling(name='ol') + if ol: + sublinks = ol.findAll(name='a') + for sublink in sublinks: + link_title = self.tag_to_string(link) + " - " + self.tag_to_string(sublink) + link_url_print = re.sub('php3', 'php3?kr=_druk&wr=lg&', sublink['href']) + link_url = url[:-10] + link_url_print + current_articles.append({'title': link_title, + 'url': link_url, 'description': '', 'date': ''}) + else: + if link.findParent(name = 'ol'): + continue + else: + link_title = self.tag_to_string(link) + link_url_print = re.sub('php3', 'php3?kr=_druk&wr=lg&', link['href']) + link_url = url[:-10] + link_url_print + current_articles.append({'title': link_title, + 'url': link_url, 'description': '', 'date': ''}) + return current_articles + else: + return None + + def preprocess_html(self, soup): + footer = soup.find(name='a', attrs={'href': 'http://brewiarz.pl'}) + footer_parent = footer.findParent('div') + footer_parent.extract() + + header = soup.find(text='http://brewiarz.pl') + header_parent = header.findParent('div') + header_parent.extract() + + subheader = soup.find(text='Kolor szat:').findParent('div') + subheader.extract() + + color = soup.find('b') + color.extract() + + cleaned = self.strip_tags(soup) + + div = cleaned.findAll(name='div') + div[1].extract() + div[2].extract() + div[3].extract() + + return cleaned + + def strip_tags(self, soup_dirty): + VALID_TAGS = ['p', 'div', 'br', 'b', 'a', 'title', 'head', 'html', 'body'] + + for tag in soup_dirty.findAll(True): + if tag.name not in VALID_TAGS: + for i, x in enumerate(tag.parent.contents): + if x == tag: + break + else: + print "Can't find", tag, "in", tag.parent + continue + for r in reversed(tag.contents): + tag.parent.insert(i, r) + tag.extract() + + return soup_dirty diff --git a/recipes/dobreprogamy.recipe b/recipes/dobreprogamy.recipe index 5254694d24..a4e24ac61b 100644 --- a/recipes/dobreprogamy.recipe +++ b/recipes/dobreprogamy.recipe @@ -6,7 +6,6 @@ class Dobreprogramy_pl(BasicNewsRecipe): __author__ = 'fenuks' __licence__ ='GPL v3' category = 'IT' - language = 'pl' masthead_url='http://static.dpcdn.pl/css/Black/Images/header_logo_napis_fullVersion.png' cover_url = 'http://userlogos.org/files/logos/Karmody/dobreprogramy_01.png' description = u'Aktualności i blogi z dobreprogramy.pl' @@ -29,4 +28,4 @@ class Dobreprogramy_pl(BasicNewsRecipe): for a in soup('a'): if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: a['href']=self.index + a['href'] - return soup \ No newline at end of file + return soup diff --git a/recipes/icons/autosport.png b/recipes/icons/autosport.png new file mode 100644 index 0000000000000000000000000000000000000000..0c84c96a0b04d0c58a38159dcce014012b75e3ec GIT binary patch literal 415 zcmex=_7<>0R|=}CPo%U zR+tAV3yoWDr#Re~W# zL+$kA6SCz`JrysSb+DTB*p#T03?dMz(K(4NX>2M>!^173{3np2tkmS6Z*3LfIy8DX&rLE^s z^WLU}(59t&qZcU8kV@C^2= z12EOpp`@W15L$+!Pdya!bd$cZmQ$Q7af%GPDR8lgqQh=7V&S|SYML?&GbFSONkl~2 z(l_jUoB(rF=0-OBzo2@*03s`zwY>+<8&CdCyYJ43h(H9@JB`@Odk~3e(f-*=3HB?u zV83?fK$z_%Ep6vfIrJKqwhl16qo|*iSHRc|6mtu|GiB1ymgk`@&Vnc7007zUg!$xI hn43>QDzzQ}`~|W(WW?|Uyuknf002ovPDHLkV1ms+JWc=r literal 0 HcmV?d00001 diff --git a/recipes/icons/brewiarz.png b/recipes/icons/brewiarz.png new file mode 100644 index 0000000000000000000000000000000000000000..b47dfc95f6d715fbd085bed8766c2117ef10d36b GIT binary patch literal 982 zcmWktO=w(I6u!UjzWd(1naNBi$s}objU;Wf#wb;^;38>4I>pu+x~RBOu%s!rR+1KV zsjG?$T`9#)aU%%bxDZM=N{b3oXpBlVtwfTx(h#z2iaod z=rmj5UiBI4yQ8m+=Fr6A^vT6T@18w->f9nYG&?!M{@mM7pE-YO&cFEV*toy% z!ovKyIlpma#y@g=dd9!_;`r!(|Gf)uy)(b~f&a4q((?{@V(d!*aDQWJ;>5lhEaU$q z{NkIR8DkJ*&N*X*QJjUHsIwhuok0BH;YJXIl-WJ??%rJ$7hzJXburtPnesdjAry*4 zx}16(w8QOYOF#gbWfxqCuu=Jf9GOf&V<@BBQ5dTX5S-^kk|kl9Wf@k6Xh01z24ujb zX+#Vnt?f3lnOiJ|N&szADYl{@Ov9)Xs7&VqH6T)_+kt?ZEZzI3UhgdtFN~9%=~T)z zNmQKYRhEi0kruIx(oqtLSnKuQ3AS8Ctjs75A<`f0rvQls6M~9TDk_j| z;kC_x6VeKFbE{(+BgLc6^3CqM*9N)n^Ym~P!_x=$lKqoUy*96m5>cpCvbJ)6s}lj5 zsehNMcdpmLMmLgvSqzkP*+KBb@m{Uu1H-(7C(*!jL6RmxkhLH2mFxXmfA{jRX2TIr z)?DQ9oP7e{KhVR01_lM%tVZ;4bqvVx$U2 zE-Z>JSbO+`W5FIKDi#_l>OMynNj{t2Xz?pQ1{EW5$z?^3$<8T}u?$T*HInt&X1D@( zIm)q!DL9vx@Yl=nAcMYxa5L8TV^y&kcZj7>5v!4c@{2O3v*cQL;=(64{D04qSZ3ljd@)R|#AGWSOQbD%MOZ0hRdn;-oT0l=89 literal 0 HcmV?d00001 diff --git a/recipes/icons/naszdziennik.png b/recipes/icons/naszdziennik.png new file mode 100644 index 0000000000000000000000000000000000000000..b557a7835ecbaf29fad4d7fd8106d3d2ffef61eb GIT binary patch literal 698 zcmV;r0!96aP)Px#1ZP1_K>z@;j|==^1poj532;bRa{vGi!~g&e!~vBn4jTXf0#8XqK~y+TrISHO z+Hn-ey?U525dwDzbt@H&u3kDcQ4^(sCe02)=MI)OT&W!bK@++LiXB>zr>S@_YT!ZI z#p;lfOU*JvSvuOF3EBMa_xm08$LsvTpZECjd!P5-_q{(410LRh@nC>Xr-QqC9kp5w ztyT-&ZWqmF6Qxp#S$cnady9*U3$)v9mb2IEG0On0`+NAjUWh~@=%=Ug=lL0)wKb^K zYQ*DljE;^%r_(~O(?O+DA-2Dd?$Z+opk6Gt4?dp{fj~g2xx=U3UD)k*)UL1ba%c!z ztqzSw10y3NcqJC&>+x|v;4{heZe|8jsT7Gs0!Lqt;B-3igOvP6CWCo)mIYWW7KjNT zl}hykqR}WmtgPVe)D(O^AA>j?4tysXye3ey*~|h=CKDtQ3DW6wKOh_qBNPfTh|y?7 zBocwwpdn4h1AP$-1WX2aRp83&+Qx3;$6cDr$KaKPx)r<0+juCK4d z@AqSIaS_2_kX2QwR5(DLtTA2FtE(%VpP!>#E`zpJC=`&(1C2E?LZpbRAv z>!`?Z5%3+VjiBS0l8(?A2C)9=GXqTnpeoxpF6kaUR-dRbkR*_Ff3zE=eTT^2KBBY?VCB{w zqZ>B^M*plEgtSTp`coq#luTyqiH`LrNm`>Zsu)JeU?WcuAOuU(aGqAH$-aL5-_O7& z)F*2JNe;fzKk6LQ4eIJT)&9c(>${)ppT9Iv6tSmrTmn{gq{^Vv0YL2B11ycnbuDe+ zz?4*~+)}LTcuWls>+bzQjmT(xRC}|9LNRUY9b+azXm8#o_8MCe+r+?vG+HhPKv`U3 zB**?vb>N5*m;DGVSQ^PivVTB#=QmwPC;6&ZT~iN8>f3vZzo0&RAe2leI1%xw^-X^4|M!t_8`p0FJQ_2Ix%T@gn&*p7*D z17$Xj2*X2WW{vTK(dn~MA2#CRg*Z<~z1gs}BW&Y{ddmV!@Mlceo`ol`m47aT zr@9~xZdlNw(X?zu;wsqI32}7C_yWXj2Ey{df)~RMuE;cB*wG!fal#gSj)g3dFO7w# zI^(O;h~gc@?lN4IEDsHb?VS~I2`Fbi>N^i%OhabOMQ8h=bNQHmup&mJNL&p&xS(wR z(VUI2Ekh9P2Bkk4Hw= zR{7o!45g#ycH-YR-_OUJ^f|bixa+k2faLmt@{q3!1IjM{B6gWHEWUX^Uv4MqG_jZA z&cEGACEO$8vE1pJguNe!j_5m2b)MY+vf_o=-X6_jlG|}~f#SG|B|jlsYVlV4-BRFlNP3%UOK#|*?eNrxpk@Mb_9Q;=mG~cNsktWn;1;lZ$My? z7n!md_{g!n^znvJ@Rn_(!ow1Ro@Asloy3RAPnugLzwTu*gMwOav6w|gDW2(nZX>Ns zS4d(fm9qHR7M2!I>q3R$1!x<~u2DPX0rLTa^JQD6wA;IqF>e*evnBo0`<0^ZORwgx zZF8E@yuQ||6xLb01m^aIY}X!_=Hr&D06sVK>W#D~xOFwax3=%)l~!ClrR!SLs$ReT z>l_(8cr&Mmo^ZQZcWUCKLL@Zv{JKo5d`Y=<$V>Q_*lIK*x{5QnZNRkSM##7M)yL-l z=^G$HLK$tr7d@K)KDq6f9@tzN7% zDCe%7Tj}Jk-tXrWJ}Q5>sWtB{f9M{{FOPa`n-uy*m?if7(J~uJ!=gLa*j_z?4gD3_ zuNU+uE_@mxI#^Xw)Lyx(Dw^ZK<2qbk{p$tIiyv~m8#>|>RmFso95 zSDIHnm32y;&NpUvqi<~8VPU=5vUw16q5D%$;-;M#xFg;L5y=IaX$4t=jQlL%ARgC` z$M*AMa|082Jc0iLfxrJ8uAhL*b+>Pv_`ibOO_@3CwvHEY1KC`Dg5P`rFG#=x1w2r2 Ts5ssL3ZSTnxbQ2BQ+NFbOJ2nZ literal 0 HcmV?d00001