From 249afa411dddc382d5bafc3d4bcc113109c4094a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Sat, 23 Mar 2013 13:14:38 +0100 Subject: [PATCH 1/5] icon for kdefamily_pl --- recipes/icons/kdefamily_pl.png | Bin 0 -> 857 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 recipes/icons/kdefamily_pl.png diff --git a/recipes/icons/kdefamily_pl.png b/recipes/icons/kdefamily_pl.png new file mode 100644 index 0000000000000000000000000000000000000000..22335bb2e4a736e00168203a08b4657640bab5fe GIT binary patch literal 857 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!63?wyl`GbKJOS+@4BLl<6e(pbstUx|vage(c z!@6@aFM%9|WRD45bDP46hOx7_4S6Fo+k-*%fF5)K?hb z6XN>+|NqyoUw3qL%$zy%-@ku9fBq~hE1NcL+Pin}%FD~2J$p8L_U!ul`i6#v%*@QI zSFfHrb!y(cdH??Z?dj>BG=1LLa~FR7`gP>UkwuFZ{r&s*_wQdNH67-DdF3^ozkmOp zG-=ZI?c2Y9|NipTtJw=ySoq{bXVl)k_n^7Cxw5jdwsnHBdz!I#UT|W`moK0G{rlV9 z-Tmm%qn55I#=g1c{sr529XNC5%#R;G{`~n>Ue{;rp8D?nhdFcRJbwK6@BhCoy)#VQ zlD>TT^6&ruHEY(~ym|A_-{0Mn=bHKCoH}#y!Gi}yMMd}T-(S6Mi-mWVMLZgGY`VyCztAq?&u@E?Bze&!6A_{{B00;zV_Ib$WXGlc&#~KY#8YS7_>+ zol)GlZqxR?`wuN&v!SNBx2~@4-~WH_KYa9z$TtfrGY>4W2q>`(EH?Ab+Od0|b6|#< ze_mbdgx)E0dnV0JNzeZJ^-E)8DCQO|@ckbMRf`X^<(Z@ismgMd3!qCAg>jC6& z7I;J!Gca%qgD@k*tT_@u!E2r_jv*44lM{d-B`Ga2AvO7VLIabZUY=Z>T^*yNM!ZLa z%ap0EJ&rL>LTAz^PMS6`mC@+Q6VV7!NePQ7Q$j Date: Sat, 23 Mar 2013 13:15:06 +0100 Subject: [PATCH 2/5] rewritten interia recipes --- recipes/interia_fakty.recipe | 47 +++++++++++++++++------ recipes/interia_sport.recipe | 74 ++++++++++++++++-------------------- 2 files changed, 67 insertions(+), 54 deletions(-) diff --git a/recipes/interia_fakty.recipe b/recipes/interia_fakty.recipe index 74cf56b267..baedd35d0c 100644 --- a/recipes/interia_fakty.recipe +++ b/recipes/interia_fakty.recipe @@ -1,7 +1,7 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = u'2010, Tomasz Dlugosz ' +__copyright__ = u'2010-2013, Tomasz Dlugosz ' ''' fakty.interia.pl ''' @@ -12,12 +12,13 @@ class InteriaFakty(BasicNewsRecipe): title = u'Interia.pl - Fakty' description = u'Fakty ze strony interia.pl' language = 'pl' - oldest_article = 7 + oldest_article = 1 __author__ = u'Tomasz D\u0142ugosz' - simultaneous_downloads = 2 no_stylesheets = True remove_javascript = True - max_articles_per_feed = 100 + remove_empty_feeds= True + use_embedded_content = False + ignore_duplicate_articles = {'title', 'url'} feeds = [(u'Kraj', u'http://kanaly.rss.interia.pl/kraj.xml'), (u'\u015awiat', u'http://kanaly.rss.interia.pl/swiat.xml'), @@ -26,14 +27,36 @@ class InteriaFakty(BasicNewsRecipe): (u'Wywiady', u'http://kanaly.rss.interia.pl/wywiady.xml'), (u'Ciekawostki', u'http://kanaly.rss.interia.pl/ciekawostki.xml')] - keep_only_tags = [dict(name='div', attrs={'id':'article'})] + keep_only_tags = [ + dict(name='h1'), + dict(name='div', attrs={'class': ['lead textContent', 'text textContent', 'source']})] - remove_tags = [ - dict(name='div', attrs={'class':'box fontSizeSwitch'}), - dict(name='div', attrs={'class':'clear'}), - dict(name='div', attrs={'class':'embed embedLeft articleEmbedArticleList articleEmbedArticleListTitle'}), - dict(name='span', attrs={'class':'keywords'})] + remove_tags = [dict(name='div', attrs={'class':['embed embedAd', 'REMOVE', 'boxHeader']})] + + preprocess_regexps = [ + (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in + [ + (r'embed embed(Left|Right|Center) articleEmbed(Audio|Wideo articleEmbedVideo|ArticleFull|ArticleTitle|ArticleListTitle|AlbumHorizontal)">', lambda match: 'REMOVE">'), + (r'
', lambda match: ''), + (r'

' +__copyright__ = u'2010-2013, Tomasz Dlugosz ' ''' sport.interia.pl ''' @@ -13,61 +13,51 @@ class InteriaSport(BasicNewsRecipe): title = u'Interia.pl - Sport' description = u'Sport ze strony interia.pl' language = 'pl' - oldest_article = 7 + oldest_article = 1 __author__ = u'Tomasz D\u0142ugosz' - simultaneous_downloads = 3 no_stylesheets = True remove_javascript = True - max_articles_per_feed = 100 + remove_empty_feeds= True + use_embedded_content = False + ignore_duplicate_articles = {'title', 'url'} feeds = [(u'Wydarzenia sportowe', u'http://kanaly.rss.interia.pl/sport.xml'), (u'Pi\u0142ka no\u017cna', u'http://kanaly.rss.interia.pl/pilka_nozna.xml'), - (u'Siatk\xf3wka', u'http://kanaly.rss.interia.pl/siatkowka.xml'), (u'Koszyk\xf3wka', u'http://kanaly.rss.interia.pl/koszykowka.xml'), - (u'NBA', u'http://kanaly.rss.interia.pl/nba.xml'), - (u'Kolarstwo', u'http://kanaly.rss.interia.pl/kolarstwo.xml'), - (u'\u017bu\u017cel', u'http://kanaly.rss.interia.pl/zuzel.xml'), (u'Tenis', u'http://kanaly.rss.interia.pl/tenis.xml')] - keep_only_tags = [dict(name='div', attrs={'id':'article'})] + keep_only_tags = [ + dict(name='h1'), + dict(name='div', attrs={'class': ['lead textContent', 'text textContent', 'source']})] - remove_tags = [dict(name='div', attrs={'class':'object gallery'}), - dict(name='div', attrs={'class':'box fontSizeSwitch'})] - - extra_css = ''' - .articleDate { - font-size: 0.5em; - color: black; - } - - .articleFoto { - display: block; - font-family: sans; - font-size: 0.5em; - text-indent: 0 - color: black; - } - - .articleText { - display: block; - margin-bottom: 1em; - margin-left: 0; - margin-right: 0; - margin-top: 1em - color: black; - } - - .articleLead { - font-size: 1.2em; - } - ''' + remove_tags = [dict(name='div', attrs={'class':['embed embedAd', 'REMOVE', 'boxHeader']})] preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in [ (r'

', lambda match: ''), - # FIXME - #(r'(
)(.*?)()(.*?)()', lambda match: '\1\2\4'), - (r'

()?(ZOBACZ|CZYTAJ) T.*?

', lambda match: '
') + (r'

()?(ZOBACZ|CZYTAJ) T.*?', lambda match: ''), + (r'embed embed(Left|Right|Center) articleEmbed(Audio|Wideo articleEmbedVideo|ArticleFull|ArticleTitle|ArticleListTitle|AlbumHorizontal)">', lambda match: 'REMOVE">'), + (r'

', lambda match: ''), + (r'

Date: Sat, 23 Mar 2013 13:19:26 +0100 Subject: [PATCH 3/5] add km_blog - blog J. Korwin-Mikke --- recipes/icons/km_blog.png | Bin 0 -> 532 bytes recipes/km_blog.recipe | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 recipes/icons/km_blog.png create mode 100644 recipes/km_blog.recipe diff --git a/recipes/icons/km_blog.png b/recipes/icons/km_blog.png new file mode 100644 index 0000000000000000000000000000000000000000..22bb9350994da8415ae65895bc31d317028605c2 GIT binary patch literal 532 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!63?wyl`GbKJOS+@4BLl<6e(pbstUx|vage(c z!@6@aFM%9|WRD45bDP46hOx7_4S6Fo+k-*%fF5l&B8y z332`Z|NozVf4+SB^5)Z*KfnHb`S#_@w?AvP?WyUTbLY{MJ*TehId*2rx;<~+z3G{? zWX`%REz_3FS+%8S-kK}7?yT8$WX-lCXD(fN^6X7X*PN2JDQB+TNh#}@vIr>qWXrxI zHC=O-Y}zwr*_J!^o@_aEre@NdC$HXIx%cGAxhqFbU5TA2AOds-V@Z%-FoVOh8)+a; zlDE5yG*iNrB|r{mfk$L9koEv$x0Bg+Kt`3Pi(`ny<+a_>LWdMYoSS0;jznqHF4%jE zYtg&<)<-*9=gm(!r*eR;W3GscX^WCW=8L}vWpv-HwA_B-T=>n_I*GeW9v_WKUnjrs z?(~cWcV5Zw<9c<-;P}RKmmhtO|B_v$_Vm>1h_V!0b*3v`8QiEBhjN@7W>RdP`(kYX@0Ff`ING}JXT4>2^cGBL6;HqkaPv@$T* xE?Tq}MMG|WN@iLmNCQwc5E+CRSXvpGS(#WuGz2Y^IRn(d;OXk;vd$@?2>^25;d%f7 literal 0 HcmV?d00001 diff --git a/recipes/km_blog.recipe b/recipes/km_blog.recipe new file mode 100644 index 0000000000..106e5f30f3 --- /dev/null +++ b/recipes/km_blog.recipe @@ -0,0 +1,37 @@ + +__license__ = 'GPL v3' +__author__ = 'teepel , Artur Stachecki ' + +''' +korwin-mikke.pl/blog +''' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class km_blog(BasicNewsRecipe): + title = u'Korwin-Mikke Blog' + __author__ = 'teepel ' + language = 'pl' + description ='Wiadomości z bloga korwin-mikke.pl/blog' + INDEX='http://korwin-mikke.pl/blog' + remove_empty_feeds= True + oldest_article = 7 + max_articles_per_feed = 100 + remove_javascript=True + no_stylesheets=True + remove_empty_feeds = True + + feeds = [(u'blog', u'http://korwin-mikke.pl/blog/rss')] + + keep_only_tags =[] + #this line should show title of the article, but it doesnt work + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'posts view'})) + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'text'})) + keep_only_tags.append(dict(name = 'h1')) + + remove_tags =[] + remove_tags.append(dict(name = 'p', attrs = {'class' : 'float_right'})) + remove_tags.append(dict(name = 'p', attrs = {'class' : 'date'})) + + remove_tags_after=[(dict(name = 'div', attrs = {'class': 'text'}))] From 489416c4a21397d01ac96bb849324c176f4f2ff9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Sat, 23 Mar 2013 13:23:48 +0100 Subject: [PATCH 4/5] remove obsolete re --- recipes/km_blog.recipe | 1 - 1 file changed, 1 deletion(-) diff --git a/recipes/km_blog.recipe b/recipes/km_blog.recipe index 106e5f30f3..614dbc03e5 100644 --- a/recipes/km_blog.recipe +++ b/recipes/km_blog.recipe @@ -7,7 +7,6 @@ korwin-mikke.pl/blog ''' from calibre.web.feeds.news import BasicNewsRecipe -import re class km_blog(BasicNewsRecipe): title = u'Korwin-Mikke Blog' From 39058f2552c4059dea5fd73ec19eb64d762aa411 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Sat, 23 Mar 2013 13:51:02 +0100 Subject: [PATCH 5/5] add trystero --- recipes/trystero.recipe | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 recipes/trystero.recipe diff --git a/recipes/trystero.recipe b/recipes/trystero.recipe new file mode 100644 index 0000000000..13a3d4b9db --- /dev/null +++ b/recipes/trystero.recipe @@ -0,0 +1,26 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2013, Tomasz Dlugosz ' + +''' +trystero.pl +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class trystero(BasicNewsRecipe): + title = 'Trystero' + __author__ = u'Tomasz D\u0142ugosz' + language = 'pl' + description =u'Trystero.pl jest niezależnym blogiem finansowym. Publikowane na nim teksty dotyczą rynku kapitałowego, ekonomii, gospodarki i życia społecznego – w takiej mniej więcej kolejności.' + oldest_article = 7 + remove_javascript=True + no_stylesheets=True + + feeds = [(u'Newsy', u'http://www.trystero.pl/feed')] + + keep_only_tags = [ + dict(name='h1'), + dict(name='div', attrs={'class': ['post-content']})] +