From 5aabe909e7b9f7de873d83872f542ff495885751 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Mon, 19 Nov 2012 21:15:38 +0100 Subject: [PATCH 1/3] Rybinski blog --- recipes/rybinski.recipe | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 recipes/rybinski.recipe diff --git a/recipes/rybinski.recipe b/recipes/rybinski.recipe new file mode 100644 index 0000000000..aae73836fc --- /dev/null +++ b/recipes/rybinski.recipe @@ -0,0 +1,28 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2012, Tomasz Dlugosz ' +''' +rybinski.eu +''' + + +class Rybinski(BasicNewsRecipe): + title = u'Rybinski.eu - economy of the XXI century' + description = u'Blog ekonomiczny dra hab. Krzysztofa Rybi\u0144skiego' + language = 'pl' + __author__ = u'Tomasz D\u0142ugosz' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + + feeds = [(u'wpisy', u'http://www.rybinski.eu/?feed=rss2&lang=pl')] + + keep_only_tags = [dict(name='div', attrs={'class':'post'})] + + remove_tags = [ + dict(name = 'div', attrs = {'class' : 'post-meta-1'}), + dict(name = 'div', attrs = {'class' : 'post-meta-2'}), + dict(name = 'div', attrs = {'class' : 'post-comments'}) + ] + From 6d2aef58f8c34bd963f0e01a6b64b3d45cd527ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Thu, 22 Nov 2012 23:59:44 +0100 Subject: [PATCH 2/3] prawica.net --- recipes/icons/prawica_net.png | Bin 0 -> 609 bytes recipes/prawica_net.recipe | 41 ++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 recipes/icons/prawica_net.png create mode 100644 recipes/prawica_net.recipe diff --git a/recipes/icons/prawica_net.png b/recipes/icons/prawica_net.png new file mode 100644 index 0000000000000000000000000000000000000000..76fb4a13e58a6157f51e3149acb155bccc2982e1 GIT binary patch literal 609 zcmeAS@N?(olHy`uVBq!ia0vp^{6H+g!3-q-bzC(AQY`6?zK#qG8~eHcB(ehejKx9j zP7LeL$-D$|6p}rHd>I(3)EF2VS{N990fib~Fff!FFfhDIU|_JC!N4G1FlSew4N&56 zfKQ04Nm!3rL~mf;9KW1d;l&GLDi$SHEsL*Q;-53yG`!a-amxSy|DU}4P|&gN&!4|3 zwacB8roR99^~sA5pT7J!bmop{#*Dvz|5-%!Sw!|7zi|K6yU+f)v%h}-X&TnEXw#|J z?>>hY&wum&i)B=QV)fFuAHE(rdoQkX@sk(tCoMgc*S0pJapm1duM?`5&Rut+xNH5j zyDy%-`uOep&lhh#?LKy+Zt}K^H=cIP-hKMY!^ukz9y@=3;l@*kPT$$M@AARZcQTt+ zHBQ};)3PQ^_WmTGcNmkr-CelluOB%8vL>2>S4={E+nQaGT^mw{BhHzXDJ$GEJ z$$-J-B4bF1$i{G&jmhut{`epNROPr(`sX5ZPlm%=bd1IPZ=4i)a_L<9%s2aG&E2a% zP0<%g^(vXUHY9NMX5L*Vx792-%U066A)9paSTczNcdn0?zHBucKk zHamC8zs-xkH}rq!`y#l1?u*UOxNEQ0EPg*r-l^kxxc+^8o(3V+EO(&0R7+eVN>UO_ zQmvAUQh^kMk%6I+uA!l>p?Qd*sg Date: Fri, 23 Nov 2012 00:00:06 +0100 Subject: [PATCH 3/3] fixed fronda.recipe --- recipes/fronda.recipe | 84 +++++++++++++++++++++++++++++++++---------- 1 file changed, 66 insertions(+), 18 deletions(-) diff --git a/recipes/fronda.recipe b/recipes/fronda.recipe index 452dca9068..06b86d83bb 100644 --- a/recipes/fronda.recipe +++ b/recipes/fronda.recipe @@ -1,39 +1,87 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = u'2010, Tomasz Dlugosz ' +__copyright__ = u'2010-2012, Tomasz Dlugosz ' ''' fronda.pl ''' from calibre.web.feeds.news import BasicNewsRecipe -import re +from datetime import timedelta, date class Fronda(BasicNewsRecipe): title = u'Fronda.pl' publisher = u'Fronda.pl' - description = u'Portal po\u015bwi\u0119cony - Infformacje' + description = u'Portal po\u015bwi\u0119cony - Informacje' language = 'pl' __author__ = u'Tomasz D\u0142ugosz' oldest_article = 7 max_articles_per_feed = 100 use_embedded_content = False + no_stylesheets = True - feeds = [(u'Infformacje', u'http://fronda.pl/news/feed')] + extra_css = ''' + h1 {font-size:150%} + .body {text-align:left;} + ''' - keep_only_tags = [dict(name='h2', attrs={'class':'news_title'}), - dict(name='div', attrs={'class':'naglowek_tresc'}), - dict(name='div', attrs={'id':'czytaj'}) ] + earliest_date = date.today() - timedelta(days=oldest_article) - remove_tags = [dict(name='a', attrs={'class':'print'})] + def date_cut(self,datestr): + # eg. 5.11.2012, 12:07 + timestamp = datestr.split(',')[0] + parts = timestamp.split('.') + art_date = date(int(parts[2]),int(parts[1]),int(parts[0])) + return True if art_date < self.earliest_date else False - preprocess_regexps = [ - (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in - [ (r'

.*

', lambda match: ''), - (r'

.*

', lambda match: ''), - (r'

W.* lektury.*

', lambda match: ''), - (r'

Zobacz t.*?', lambda match: ''), - (r']*> 

', lambda match: ''), - (r'


', lambda match: ''), - (r'