From d6d4f9d444cb07df8ae6512560e4af041df56d05 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 5 Feb 2011 13:37:42 -0700 Subject: [PATCH] Korespondent and Kopalnia Wiedzy by Attis --- resources/images/news/kopalniawiedzy.png | Bin 0 -> 466 bytes resources/images/news/korespondent.png | Bin 0 -> 860 bytes resources/recipes/kopalniawiedzy.recipe | 80 +++++++++++++++++++++++ resources/recipes/korespondent.recipe | 40 ++++++++++++ 4 files changed, 120 insertions(+) create mode 100644 resources/images/news/kopalniawiedzy.png create mode 100644 resources/images/news/korespondent.png create mode 100644 resources/recipes/kopalniawiedzy.recipe create mode 100644 resources/recipes/korespondent.recipe diff --git a/resources/images/news/kopalniawiedzy.png b/resources/images/news/kopalniawiedzy.png new file mode 100644 index 0000000000000000000000000000000000000000..73a4a338aa25418b42a66c4aafced68e48e6f721 GIT binary patch literal 466 zcmeAS@N?(olHy`uVBq!ia0vp^0zk~q0VEhsNC=VVPs zNr_hLqX$JYcbH2G1-d48D?CzQk&#g;P?d4#aB|qd!tyd8hDF@`;WWLK&e!XI)jf!i zIQV>p!nVIH8=f-cyyFR9rz00yX7EiSL0Q0Ny2@_5Wj{of5-cxoSy8ql{qz-GI~D`M zuND)!Jii9=y=2@Ia8NQ+>1Oe+f+D8$k2BhnrN8!+-FzFwV9)=q>Dyyd-TlYU>oM`Y zK6E?(*2ek4nw7PT8fqq@@r$MzraqKpPN`}VFP_|;z+cXGYsm%oe3g&6?_cbCre9h2^#1PayJo5F1qOv`iEBhjN@7W> zRdP`(kYX@0Ff`OPFw!+J4KXyfGB&d^0kX}l3=I13?N3C}kei>9nN|tZU|^wZV5w_h g5&}|ZVr5{gYhVV{FjeJOGf)GAr>mdKI;Vst091CT&Hw-a literal 0 HcmV?d00001 diff --git a/resources/images/news/korespondent.png b/resources/images/news/korespondent.png new file mode 100644 index 0000000000000000000000000000000000000000..e2724b11b44a42e49a50f5e0dd327489ae2aa0c3 GIT binary patch literal 860 zcmV-i1Ec(jP)8rEZC~nV0Nhl$Vi#mYAR;$fVGOh#=5zGAJ}DBKqNv ziYQ7c5H0gZ(7#d$%L`00vGpTG(wdrsg=?E*ZJzCHo}FvICI_Qlc;MxIcplz|2a{zP z{x`uWo36qUA7&B;lE68HVN#qEsLV9&s!mPBu--A}rJm7+78QZ`ioKarKfLtrWq)MX z^^?U9n``#;TiOR_ec^ReFv6y*Rd@m+kYcQ$8@OR@s4vSML@Xz2Y7b<;UkS#tyGNY1 z#=|>apSKpgu+MpG1W_W)M#>wY*E1wpM(doQO{GYBi&hoYkT?qfKwLe(|A{Ec_@vWc zIkm7_QJSAVU6^B-2LP}=`_w%=<2j~`l%^xsQnac_O_A9S06+i$;0P0M)|cF`y*v8e zu^2cc17NNFj;`@JU$s(f(5u6Br8z@KEm>nEfw&C=0D$C6uO?k*`^T4>KQ9H!mEsH5 zg1*kCvNpX&#VGF^21&uhH(%A}CXVde_OZ96;xfW8X)}kCi_0I~xtUC%$Or(y7gzA$ zl|IK)hUNADG%y>r-sFy9Am2Gtdi!V3>D*hVO76uI0@Xd@v<*$K)X6enN&~#3qXPf{ zUVd0UH|q{r006ATX8Utn05UK! zF)c7MEig4yF*7ppnFgPtR mIV~_aR4_C;F*!OhH!UzWIxsMUSv7b70000'), + lambda match: '' ), + (re.compile(u'

'), + lambda match: '') + ] + + feeds = [ + (u'Biologia', u'http://kopalniawiedzy.pl/wiadomosci_biologia.rss'), + (u'Medycyna', u'http://kopalniawiedzy.pl/wiadomosci_medycyna.rss'), + (u'Psychologia', u'http://kopalniawiedzy.pl/wiadomosci_psychologia.rss'), + (u'Technologie', u'http://kopalniawiedzy.pl/wiadomosci_technologie.rss'), + (u'Ciekawostki', u'http://kopalniawiedzy.pl/wiadomosci_ciekawostki.rss'), + (u'Artykuły', u'http://kopalniawiedzy.pl/artykuly.rss') + ] + + def is_link_wanted(self, url, tag): + return tag['class'] == 'next' + + def remove_beyond(self, tag, next): + while tag is not None and getattr(tag, 'name', None) != 'body': + after = getattr(tag, next) + while after is not None: + ns = getattr(tag, next) + after.extract() + after = ns + tag = tag.parent + + def append_page(self, soup, appendtag, position): + pager = soup.find('a',attrs={'class':'next'}) + if pager: + nexturl = self.INDEX + pager['href'] + soup2 = self.index_to_soup(nexturl) + texttag = soup2.find('div', attrs={'id':'articleContent'}) + + tag = texttag.find(attrs={'class':'pages'}) + self.remove_beyond(tag, 'nextSibling') + + newpos = len(texttag.contents) + self.append_page(soup2,texttag,newpos) + + appendtag.insert(position,texttag) + + + def preprocess_html(self, soup): + self.append_page(soup, soup.body, 3) + + for item in soup.findAll('div',attrs={'class':'pages'}): + item.extract() + + for item in soup.findAll('p', attrs={'class':'wykop'}): + item.extract() + + return soup diff --git a/resources/recipes/korespondent.recipe b/resources/recipes/korespondent.recipe new file mode 100644 index 0000000000..aa9cf6e828 --- /dev/null +++ b/resources/recipes/korespondent.recipe @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- + +__license__ = 'GPL v3' +__copyright__ = '2011, Attis ' +__version__ = 'v. 0.1' + +import re +from calibre.web.feeds.recipes import BasicNewsRecipe + +class KorespondentPL(BasicNewsRecipe): + title = u'Korespondent.pl' + publisher = u'Korespondent.pl' + description = u'Centrum wolnorynkowe - serwis ludzi wolnych' + encoding = 'utf-8' + __author__ = 'Attis' + language = 'pl' + oldest_article = 15 + max_articles_per_feed = 100 + remove_javascript = True + no_stylesheets = True + + keep_only_tags = [dict(name='div', attrs={'class':'publicystyka'})] + remove_tags = [{'name': 'meta'}, {'name':'div', 'attrs': {'class': 'zdjecie'} }] + extra_css = '.naglowek {font-size: small}\n .tytul {font-size: x-large; padding-bottom: 10px; padding-top: 30px} \n .external {font-size: small}' + + preprocess_regexps = [ + (re.compile(u'' ), + (re.compile(u'

Więcej'), + lambda match:'Więcej' ), + (re.compile(u'target="_blank"'), + lambda match:'target="_blank" class="external"' ), + (re.compile(u'

\nPoczytaj inne teksty w Serwisie wolnorynkowym Korespondent.pl.*', re.DOTALL|re.IGNORECASE), + lambda match: ''), + ] + + feeds = [(u'Serwis informacyjny', u'http://korespondent.pl/rss.xml')] +