From 276b6d32d40f4d9db9f68265ace8fa4368421ebf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Fri, 7 Oct 2016 12:32:36 +0200 Subject: [PATCH 1/5] recipes: remove astroflesz - this site is no more --- recipes/astroflesz.recipe | 30 ------------------------------ recipes/icons/astroflesz.png | Bin 739 -> 0 bytes 2 files changed, 30 deletions(-) delete mode 100644 recipes/astroflesz.recipe delete mode 100644 recipes/icons/astroflesz.png diff --git a/recipes/astroflesz.recipe b/recipes/astroflesz.recipe deleted file mode 100644 index d44aa994e5..0000000000 --- a/recipes/astroflesz.recipe +++ /dev/null @@ -1,30 +0,0 @@ -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai -from calibre.web.feeds.news import BasicNewsRecipe - - -class Astroflesz(BasicNewsRecipe): - title = u'Astroflesz' - oldest_article = 7 - __author__ = 'fenuks' - description = u'astroflesz.pl - to portal poświęcony astronomii. Informuje zarówno o aktualnych wydarzeniach i odkryciach naukowych, jak również zapowiada ciekawe zjawiska astronomiczne' # noqa - category = 'astronomy' - language = 'pl' - cover_url = 'http://www.astroflesz.pl/templates/astroflesz/images/logo/logo.png' - ignore_duplicate_articles = {'title', 'url'} - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - remove_empty_feeds = True - remove_attributes = ['style'] - keep_only_tags = [dict(id="k2Container")] - remove_tags_after = dict(name='div', attrs={'class': 'itemLinks'}) - remove_tags = [dict(name='div', attrs={ - 'class': ['itemLinks', 'itemToolbar', 'itemRatingBlock']})] - feeds = [(u'Wszystkie', u'http://astroflesz.pl/?format=feed')] - - def postprocess_html(self, soup, first_fetch): - t = soup.find(attrs={'class': 'itemIntroText'}) - if t: - for i in t.findAll('img'): - i['style'] = 'float: left; margin-right: 5px;' - return soup diff --git a/recipes/icons/astroflesz.png b/recipes/icons/astroflesz.png deleted file mode 100644 index 2a0f5c2d878d4112a14d2c82775bcd2196ba5d9d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 739 zcmV<90v!E`P)76M=d5CVi-K}-Nk zZMl^~TPUrilx|Ni?6U1{x7~KH<7sP1oXo>HbLN}*=9@!=6B_mqryy40Rq#X;KWqo> zQ^uZo%Y@mv;iDX6ERxhBycWu3<^d|522O_lpBqK*n!62VI~hMX$@pU;cAKBxI;0V& zM0mg$BANumiR*5%Va#-+LO;Fc!hPtp_N1=8yrrMX04=}~Bklwk#L7_~2*ksp`Io6r zS5fnFX~)fLaFT`voA>;^^1{cJD|;lq;yjG8`Y23{*ml|dL)T&aLTvi>QIrXCS#DMB z!&hF^6%@bem@y_*AmcKX0#}&xM0vs!ila$gZ1l2ySm%x;@I#Q5N*cPy%#K1`<Z5o1&`CfSb| zBBeoxVfAixOGHp{G@*iiaJFe=p|)q@R#oHKqUZR)N`OTV#^pW4608h!o9+iSpS@Iy zPC+CoJ^IjU^+icmaiG|x(%NgU>M22@1x!^rWHLl{BfNV5earloI|J~sck*0OnZ+H( zooCkfFFva}bEl-NsRtz#Ka=)R0)cT@mM(*-w7DZG7c`; zzfAsK+uh&xQ<#k_mQ#y z#7X)ALv`2avNM2rV$#P+@KT~{_~DeYXn9a*s(_mC6u zW>0i=eR Date: Fri, 7 Oct 2016 21:16:58 +0200 Subject: [PATCH 2/5] recipes: remove websecurity.pl - website suspended --- recipes/icons/websecurity_pl.png | Bin 545 -> 0 bytes recipes/websecurity_pl.recipe | 27 --------------------------- 2 files changed, 27 deletions(-) delete mode 100644 recipes/icons/websecurity_pl.png delete mode 100644 recipes/websecurity_pl.recipe diff --git a/recipes/icons/websecurity_pl.png b/recipes/icons/websecurity_pl.png deleted file mode 100644 index 6db9045f33f7a554effe40ab9422ee12bdb81e82..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 545 zcmV++0^a?JP)R%vuiPf_j^(MX4T0{$|qNes% zPi-%H33~M>jp*)XlkCfG!|wY!L$Zrt>jpk#nEAf%n|V9i0Ay^{xL~Qq2TL)o+au;4 z@9MQ|`0-Q5$rZLY4-|$u7L^-WTN~X?+6rmk)Bi<28yj3EFyIRb4NK7{ZN;b&i^Qa@ znL5=z|E0UKm4R=odYx|Bh;x=gv7*(O zK-g%&pPT0#*HR2L6XNz=NKZgY2`L28k0Z~6xO0y+|2GI;*T6s7>A;)53HRhF`16ku z{rZjI%_4V^(n0k;WV;O+MTp!M9jc*q<{X+wj-!44Qc4GelP|`%()>UBMl{mK@;kJq z&ZfVC|6~Ed+aZ*>if?DYW31o+!HIHs>t<=W;WSgM()ZCjya5O?D2%fYKmgK@@k$;|`Jilv$3iGe>mm$l8E zJ7|4RQ1KPf{b%zyr|4WOvzE^!1~Jk19~o!w{$Uaoi3P$P@YGh#BG*Wh=o5p@a7*PO jYI~POxW~#vTj~A)kmY|6_5y-I00000NkvXXu0mjf*1!ZE diff --git a/recipes/websecurity_pl.recipe b/recipes/websecurity_pl.recipe deleted file mode 100644 index 9a404260b6..0000000000 --- a/recipes/websecurity_pl.recipe +++ /dev/null @@ -1,27 +0,0 @@ -__license__ = 'GPL v3' -from calibre.web.feeds.news import BasicNewsRecipe - - -class WebSecurity(BasicNewsRecipe): - title = u'WebSecurity' - __author__ = 'fenuks' - description = u'WebSecurity.pl to największy w Polsce portal o bezpieczeństwie sieciowym.' - category = '' - language = 'pl' - cover_url = 'http://websecurity.pl/images/websecurity-logo.png' - masthead_url = '' - use_embedded_content = False - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - remove_empty_feeds = True - remove_javascript = True - remove_attributes = ['style', 'font'] - ignore_duplicate_articles = {'title', 'url'} - - keep_only_tags = [ - dict(attrs={'class': 'article single'}), dict(id='content')] - remove_tags = [dict(attrs={'class': ['sociable', 'no-comments']})] - remove_tags_after = dict(attrs={'class': 'sociable'}) - feeds = [(u'Wszystkie', u'http://websecurity.pl/feed/'), (u'Aktualno\u015bci', u'http://websecurity.pl/aktualnosci/feed/'), - (u'Artyku\u0142y', u'http://websecurity.pl/artykuly/feed/'), (u'Blogosfera', u'http://websecurity.pl/blogosfera/wpisy/feed/')] From f7d82dc9a9ee1b1656a46a5a4da7e0e44c3e87c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Fri, 7 Oct 2016 21:18:13 +0200 Subject: [PATCH 3/5] recipes: remove biolog_pl - no new content since years --- recipes/biolog_pl.recipe | 31 ------------------------------- recipes/icons/biolog_pl.png | Bin 632 -> 0 bytes 2 files changed, 31 deletions(-) delete mode 100644 recipes/biolog_pl.recipe delete mode 100644 recipes/icons/biolog_pl.png diff --git a/recipes/biolog_pl.recipe b/recipes/biolog_pl.recipe deleted file mode 100644 index ae231d89a2..0000000000 --- a/recipes/biolog_pl.recipe +++ /dev/null @@ -1,31 +0,0 @@ -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai - -from calibre.web.feeds.news import BasicNewsRecipe - - -class Biolog_pl(BasicNewsRecipe): - title = u'Biolog.pl' - oldest_article = 7 - max_articles_per_feed = 100 - remove_empty_feeds = True - __author__ = 'fenuks' - description = u'Przyrodnicze aktualności ze świata nauki (codziennie aktualizowane), kurs biologii, testy i sprawdziany, forum dyskusyjne.' - category = 'biology' - language = 'pl' - masthead_url = 'http://www.biolog.pl/naukowy,portal,biolog.png' - cover_url = 'http://www.biolog.pl/naukowy,portal,biolog.png' - no_stylesheets = True - ignore_duplicate_articles = {'title', 'url'} - # keeps_only_tags=[dict(id='main')] - remove_tags_before = dict(id='main') - remove_tags_after = dict(name='a', attrs={'name': 'komentarze'}) - remove_tags = [dict(name='img', attrs={'alt': 'Komentarze'}), dict( - name='span', attrs={'class': 'menu_odsylacze'})] - feeds = [ - (u'Wszystkie', u'http://www.biolog.pl/backend.php'), - (u'Medycyna', u'http://www.biolog.pl/medycyna-rss.php'), - (u'Ekologia', u'http://www.biolog.pl/rss-ekologia.php'), - (u'Genetyka i biotechnologia', u'http://www.biolog.pl/rss-biotechnologia.php'), - (u'Botanika', u'http://www.biolog.pl/rss-botanika.php'), - (u'Le\u015bnictwo', u'http://www.biolog.pl/rss-lesnictwo.php'), - (u'Zoologia', u'http://www.biolog.pl/rss-zoologia.php')] diff --git a/recipes/icons/biolog_pl.png b/recipes/icons/biolog_pl.png deleted file mode 100644 index 9fd50301e6e3376b62996212512ad180bd5539e0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 632 zcmV-;0*C#HP)nap0&!EIb{CHAz1TY%;5MiIs8LaO^9g66%Pwl%5#j zXI#;*eNOmlXI+cs?x5Di!xYk{^HsASV=2wCM(vr4a~%K)a8h7i=`|D3V zh-!%$EfFP{XJBMuPF&>T^FhhhJq~Aq4R3%$|1$#IkRRVNM$78hrtU3JPf-N`Mua(W z!N0T;NaWv)v1Sfah11twE4d@xyIFfN4_?7+p1ODNA*5wA03t27uK8Fbt!bFDc=iE*Fh*4Rs7)4gUau7md>B SGxJ0M0000 Date: Fri, 7 Oct 2016 21:39:35 +0200 Subject: [PATCH 4/5] recipes: remove some garbage from znadplanszy --- recipes/znadplanszy_pl.recipe | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/recipes/znadplanszy_pl.recipe b/recipes/znadplanszy_pl.recipe index 7e20596824..7d28e62a0e 100644 --- a/recipes/znadplanszy_pl.recipe +++ b/recipes/znadplanszy_pl.recipe @@ -17,7 +17,5 @@ class ZnadPlanszy(BasicNewsRecipe): remove_attributes = ['style', 'font'] ignore_duplicate_articles = {'title', 'url'} - remove_tags = [dict(attrs={'class': 'rounded-container'})] - remove_tags_after = dict(attrs={'id': 'dotEPUBcontent'}) - remove_tags_before = dict(attrs={'class': 'content units nine alpha'}) + keep_only_tags = dict(name='article') feeds = [(u'Wszystkie', 'http://znadplanszy.pl/full-feed/posts/')] From d41b8fc707dafd092f31ae2d503a054f3a93b2d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Fri, 7 Oct 2016 21:45:22 +0200 Subject: [PATCH 5/5] recipes: remove some garbage from alejakomiksu --- recipes/alejakomiksu_com.recipe | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/recipes/alejakomiksu_com.recipe b/recipes/alejakomiksu_com.recipe index 34cdede724..e03ee25db9 100644 --- a/recipes/alejakomiksu_com.recipe +++ b/recipes/alejakomiksu_com.recipe @@ -10,8 +10,6 @@ class AlejaKomiksu(BasicNewsRecipe): category = 'comics' language = 'pl' extra_css = 'ul {list-style-type: none;} .gfx_news {float: right;}' - preprocess_regexps = [(re.compile(ur'((
  • (Do poczytania)|(Nowości):
  • )|(

    Komentarze

    )).*', - re.DOTALL | re.IGNORECASE), lambda match: '')] cover_url = 'http://www.alejakomiksu.com/gfx/build/logo.png' masthead_url = 'http://www.alejakomiksu.com/gfx/build/logo.png' use_embedded_content = False @@ -23,7 +21,7 @@ class AlejaKomiksu(BasicNewsRecipe): remove_attributes = ['style', 'font'] ignore_duplicate_articles = {'title', 'url'} - keep_only_tags = [dict(attrs={'class': 'cont_tresc'})] + keep_only_tags = dict(attrs={'class': ['akNews__header','akNews__body']}) feeds = [(u'Wiadomości', 'http://www.alejakomiksu.com/rss.php5')]