From 702658aad09e31999210237b7aa2ae05b19d1950 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Thu, 4 Apr 2013 21:20:33 +0200 Subject: [PATCH] recipe for forbes.pl --- recipes/forbes_pl.recipe | 54 ++++++++++++++++++++++++++++++++++++ recipes/icons/forbes_pl.png | Bin 0 -> 1179 bytes 2 files changed, 54 insertions(+) create mode 100644 recipes/forbes_pl.recipe create mode 100644 recipes/icons/forbes_pl.png diff --git a/recipes/forbes_pl.recipe b/recipes/forbes_pl.recipe new file mode 100644 index 0000000000..ec65ecf279 --- /dev/null +++ b/recipes/forbes_pl.recipe @@ -0,0 +1,54 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' + +from calibre.web.feeds.news import BasicNewsRecipe +import datetime +import re +from calibre.ebooks.BeautifulSoup import Comment + +class forbes_pl(BasicNewsRecipe): + title = u'Forbes.pl' + __author__ = 'Artur Stachecki ' + language = 'pl' + description = u'Biznes, finanse, gospodarka, strategie, wiadomości gospodarcze, analizy finasowe i strategiczne.' + oldest_article = 1 + index = 'http://www.forbes.pl' + cover_url = 'http://www.forbes.pl/resources/front/images/logo.png' + max_articles_per_feed = 100 + extra_css = '.Block-Photo {float:left; max-width: 300px; margin-right: 5px;}' + preprocess_regexps = [(re.compile(ur'

()?(Czytaj|Zobacz) (też|także):.*?

', re.DOTALL), lambda match: ''), (re.compile(ur'Zobacz:.*?', re.DOTALL), lambda match: '')] + remove_javascript = True + no_stylesheets = True + now = datetime.datetime.now() + yesterday = now - datetime.timedelta(hours=24) + yesterday = yesterday.strftime("%d.%m.%Y %H:%M:%S") + pages_count = 4 + keep_only_tags = [dict(attrs={'class':['Block-Node Content-Article ', 'Block-Node Content-Article piano-closed']})] + remove_tags = [dict(attrs={'class':['Keywords Styled', 'twitter-share-button', 'Block-List-Related Block-List']})] + + feeds = [(u'Wszystkie', 'http://www.forbes.pl/rss')] + + '''def preprocess_html(self, soup): + self.append_page(soup, soup.body) + return soup + + + def append_page(self, soup, appendtag): + cleanup = False + nexturl = appendtag.find('a', attrs={'class':'next'}) + if nexturl: + cleanup = True + while nexturl: + soup2 = self.index_to_soup(self.index + nexturl['href']) + nexturl = soup2.find('a', attrs={'class':'next'}) + pagetext = soup2.findAll(id='article-body-wrapper') + if not pagetext: + pagetext = soup2.findAll(attrs={'class':'Article-Entry Styled'}) + for comment in pagetext.findAll(text=lambda text:isinstance(text, Comment)): + comment.extract() + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + if cleanup: + for r in appendtag.findAll(attrs={'class':'paginator'}): + r.extract()''' \ No newline at end of file diff --git a/recipes/icons/forbes_pl.png b/recipes/icons/forbes_pl.png new file mode 100644 index 0000000000000000000000000000000000000000..feaa47487a5f0bf88df6de03eb3db4552191f7f5 GIT binary patch literal 1179 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`jKx9jP7LeL$-D$|*pj^6T^Rm@ z;DWu&Cj&(|3p^r=85p>QL70(Y)*K0-AbW|YuPgf<4skwt!$UoVdl?v5q&;06Lo5Ut z1z6tyW?a91!~dT@KLAyd3jQ;kIdheP1*AD5;sI3mcOcD($E5#2IZmiN2axVzU_q$< z&9Gxf0E4@`1zhOcw|5L|Y-|kw{`~{ed|+`###$ha(8dH;&A`gw;jxop!Gb+-AuB68 z23A&A26lF5hQ`JUsQ7xgJcxlX3M%~I0YZqAlNBf$0L0%J8X6ul7#pv_(1K76H2@o> zuP?yx{{26OoSdydX;>IQR3do+Q|tHdpg{e_(BJRHASro`;o(CKAm0Z?Azl|8K75*i zgX2Fi9QYU*8Ser$nga136a(jj9aKR_4g0-1M!7}fb8c?=gQDg6V9g8?TeD7As2@&^!m0x@0#{xa0nSu(`M zIROn-K+1pI#5t^cX`UOaK@LpbP?XDhPo5 uIswc=B|uI`cRoZ9%jwfsfs#F_>Pg`FK8an=XpprEq{Y+K&t;ucLK6U@R)7@% literal 0 HcmV?d00001