From 87f5420d850fb0b09129dd6f14e82dfde06411c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Mon, 14 Oct 2019 22:02:39 +0200 Subject: [PATCH 1/4] remove adventure zone for no new content --- recipes/adventure_zone_pl.recipe | 45 ---------------------------- recipes/icons/adventure_zone_pl.png | Bin 791 -> 0 bytes 2 files changed, 45 deletions(-) delete mode 100644 recipes/adventure_zone_pl.recipe delete mode 100644 recipes/icons/adventure_zone_pl.png diff --git a/recipes/adventure_zone_pl.recipe b/recipes/adventure_zone_pl.recipe deleted file mode 100644 index afb8e080d9..0000000000 --- a/recipes/adventure_zone_pl.recipe +++ /dev/null @@ -1,45 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - - -class Adventure_zone(BasicNewsRecipe): - title = u'Adventure Zone' - __author__ = 'fenuks' - description = u'Czytaj więcej o przygodzie - codzienne nowinki. Szukaj u nas solucji i poradników, czytaj recenzje i zapowiedzi. Także galeria, pliki oraz forum dla wszystkich fanów gier przygodowych.' # noqa - category = 'games' - language = 'pl' - BASEURL = 'http://www.adventure-zone.info/fusion/' - no_stylesheets = True - extra_css = '.image {float: left; margin-right: 5px;}' - oldest_article = 20 - max_articles_per_feed = 100 - cover_url = 'http://www.adventure-zone.info/inne/logoaz_2012.png' - remove_attributes = ['style'] - use_embedded_content = False - keep_only_tags = [dict(attrs={'class': 'content'})] - remove_tags = [dict(attrs={'class': 'footer'})] - feeds = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/rss/index.php')] - - _trigger_words = ('zapowied', 'recenzj', 'solucj', 'poradnik') - - @staticmethod - def _is_linked_text(title): - return 'zapowied' in title or 'recenz' in title or 'solucj' in title or 'poradnik' in title - - def skip_ad_pages(self, soup): - skip_tag = soup.body.find(attrs={'class':'subject'}) - skip_tag = skip_tag.findAll(name='a', href=True) - title = soup.title.renderContents().decode('utf-8').lower() - if self._is_linked_text(title): - for r in skip_tag: - word = r.renderContents().decode('utf-8') - if not word: - continue - word = word.lower() - if self._is_linked_text(word): - return self.index_to_soup(self.BASEURL+r['href'], raw=True) - - def preprocess_html(self, soup): - for link in soup.findAll('a', href=True): - if not link['href'].startswith('http'): - link['href'] = self.BASEURL + link['href'] - return soup diff --git a/recipes/icons/adventure_zone_pl.png b/recipes/icons/adventure_zone_pl.png deleted file mode 100644 index f00db43dab8ae3fac8acb2c99115c07788d60d02..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 791 zcmV+y1L*vTP)}8zSxS*EEO#RxfW}YN?PHg&0YG|6%8Kf*KNJGrCkoGPUo+1ycUckg47c<&mP0Ctk zm3M7e3J`oBsP#47Ax)tf=o}e|%C|xLr*DPWFl!_yBf2v&Q@U|3iSB+M3=j7Gs#fPs z&I#HbHIsfn)X!%R?c7W?wrc6Zy=!N29wC6?gb>g|KNR?X_itVQ_&|oKhk=yeS9$0b>MJaJl@GKQbL<0H{t zz4?jAU^LF2gL)->0$m~8{yM6Z|=X}kw`3YXs1|Rp# zrZXoA*6=w28jVINv?5%vHsfh|<&MKIKNr5(w!Ll3lEMB*6B1%hTFw|tI^qI<-kKC)~yYA&v7p^=h6@M-4Nza+4q4Oy-on=xg)v1{gO^^WV`QiO#ER_JMe&k~E z!K1uT!Q%Ou+3B#^%EU27cXu*sNXPX-HK%s|MA17bkQn%6$knH2`)aj8r9> Date: Mon, 14 Oct 2019 22:38:55 +0200 Subject: [PATCH 2/4] update fdb_pl recipe --- recipes/fdb_pl.recipe | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/recipes/fdb_pl.recipe b/recipes/fdb_pl.recipe index e281682f1c..3dbd074f1a 100644 --- a/recipes/fdb_pl.recipe +++ b/recipes/fdb_pl.recipe @@ -9,7 +9,7 @@ class FDBPl(BasicNewsRecipe): category = 'film' language = 'pl' extra_css = '.options-left > li {display: inline;} em {display: block;}' - cover_url = 'http://fdb.pl/assets/fdb2/logo.png' + cover_url = 'https://i1.fdbimg.pl/hygg2xp1/480x300_magq39.jpg' use_embedded_content = False oldest_article = 7 max_articles_per_feed = 100 @@ -19,25 +19,25 @@ class FDBPl(BasicNewsRecipe): remove_attributes = ['style', 'font'] ignore_duplicate_articles = {'title', 'url'} - keep_only_tags = [dict(attrs={'class': 'news-item news-first'})] + keep_only_tags = [dict(attrs={'class': ['row justify-content-center', 'figure']})] remove_tags = [ - dict(attrs={'class': ['dig dig-first', 'ads clearfix', 'comments']})] + dict(attrs={'class': ['news-footer infinite-scroll-breakepoit', 'list-inline text-muted m-0']})] feeds = [] def parse_index(self): feeds = [] feeds.append((u'Wiadomości', self.get_articles( - 'http://fdb.pl/wiadomosci?page={0}', 2))) + 'https://fdb.pl/wiadomosci?page={0}', 2))) return feeds def get_articles(self, url, pages=1): articles = [] for nr in range(1, pages + 1): soup = self.index_to_soup(url.format(nr)) - for tag in soup.findAll(attrs={'class': 'news-item clearfix'}): - node = tag.find('h2') + for tag in soup.findAll(attrs={'class': 'col-xs-6 col-sm-4 col-md-4 col-lg-3'}): + node = tag.find('h5') title = node.a.string - url = 'http://fdb.pl' + node.a['href'] + url = node.a['href'] date = '' articles.append({'title': title, 'url': url, From ed7476adb45e3646c914a958e8afb1f1c9cb6257 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Mon, 14 Oct 2019 22:43:26 +0200 Subject: [PATCH 3/4] frazpc.pl is no more --- recipes/frazpc.recipe | 43 --------------------------------------- recipes/icons/frazpc.png | Bin 271 -> 0 bytes 2 files changed, 43 deletions(-) delete mode 100644 recipes/frazpc.recipe delete mode 100644 recipes/icons/frazpc.png diff --git a/recipes/frazpc.recipe b/recipes/frazpc.recipe deleted file mode 100644 index 7158f101ac..0000000000 --- a/recipes/frazpc.recipe +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env python2 - -__license__ = 'GPL v3' -__copyright__ = u'2010-2011, Tomasz Dlugosz ' -''' -frazpc.pl -''' - -from calibre.web.feeds.news import BasicNewsRecipe -import re - - -class FrazPC(BasicNewsRecipe): - title = u'frazpc.pl' - publisher = u'frazpc.pl' - description = u'Tw\xf3j Vortal Technologiczny' - language = 'pl' - __author__ = u'Tomasz D\u0142ugosz' - oldest_article = 7 - max_articles_per_feed = 100 - use_embedded_content = False - no_stylesheets = True - remove_empty_feeds = True - cover_url = 'http://www.frazpc.pl/images/logo.png' - feeds = [ - (u'Aktualno\u015bci', u'http://www.frazpc.pl/feed/aktualnosci'), - (u'Artyku\u0142y', u'http://www.frazpc.pl/feed/artykuly') - ] - - keep_only_tags = [dict(name='div', attrs={'class': 'article'})] - - remove_tags = [ - dict(name='div', attrs={'class': 'title-wrapper'}), - dict(name='p', attrs={'class': 'tags'}), - dict(name='p', attrs={'class': 'article-links'}), - dict(name='div', attrs={'class': 'comments_box'}) - ] - - remove_tags_after = dict(name='div', attrs={'class': 'content'}) - preprocess_regexps = [(re.compile( - r'\| Komentarze \([0-9]*\)'), lambda match: '')] - - remove_attributes = ['width', 'height'] diff --git a/recipes/icons/frazpc.png b/recipes/icons/frazpc.png deleted file mode 100644 index aed3d9e856eca4c232ea39e03fcbe0018e34ff15..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 271 zcmV+q0r38bP)mcuw5o`bfo3}u1oCc-+fcOto{v^zR?N~HF z2VyxOW`o*%4C)G9ApQnblLj+@6NnF>8ITF{Bh-*BP=CLI%2&X{0EZz_XwhYiymdlJQ|)^Gp-002ovPDHLkV1i8OWX1ph From 2db371f7dedfd655c78d2dd580dfad0954ca804a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Mon, 14 Oct 2019 23:10:05 +0200 Subject: [PATCH 4/4] update filmweb --- recipes/film_web.recipe | 32 +++++++++++++++----------------- recipes/icons/film_web.png | Bin 1687 -> 454 bytes 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/recipes/film_web.recipe b/recipes/film_web.recipe index ef71e548d2..7ed105bc2b 100644 --- a/recipes/film_web.recipe +++ b/recipes/film_web.recipe @@ -6,10 +6,10 @@ class FilmWebPl(BasicNewsRecipe): title = 'FilmWeb' __author__ = 'fenuks' description = u'Filmweb.pl - Filmy takie jak Ty Filmweb to największy i najczęściej odwiedzany polski serwis filmowy.' - cover_url = 'http://1.fwcdn.pl/an/867323/63321_1.11.jpg' + cover_url = 'https://1.fwcdn.pl/an/np/49468/2018/15037.2.jpg' category = 'movies' language = 'pl' - index = 'http://www.filmweb.pl' + index = 'https://www.filmweb.pl' oldest_article = 8 max_articles_per_feed = 100 no_stylesheets = True @@ -28,25 +28,23 @@ class FilmWebPl(BasicNewsRecipe): remove_tags = [dict(attrs={'class':['infoParent', 'likeBar', 'droptions-box pull-right', 'photoDesc', 'imageLicense', 'play big', 'shadow embed__icon--svg']})] remove_attributes = ['style',] - keep_only_tags = [dict(attrs={'class': ['newsHdr hdrWithAuthor ', 'reviewHdr', 'newsContent newsPage', 'newsContent']})] - # remove_tags_before = dict(attrs={'class': 'hdr hdr-mega'}) - # remove_tags_after = dict(attrs={'class': 'newsContent'}) - feeds = [(u'Filmy', u'http://www.filmweb.pl/feed/news/category/film'), - (u'Seriale', u'http://www.filmweb.pl/feed/news/category/serial'), - (u'Box office', u'http://www.filmweb.pl/feed/news/category/boxoffice'), - (u'Telewizja', u'http://www.filmweb.pl/feed/news/category/tv'), - (u'Festiwale, nagrody i przeglądy', u'http://www.filmweb.pl/feed/news/category/festival'), - (u'Multimedia', u'http://www.filmweb.pl/feed/news/category/multimedia'), - (u'Dystrybucja dvd/blu-ray', u'http://www.filmweb.pl/feed/news/category/dvd'), - (u'Gry wideo', u'http://www.filmweb.pl/feed/news/category/game'), - (u'Różne', u'http://www.filmweb.pl/feed/news/category/other'), - (u'Recenzje redakcji', u'http://www.filmweb.pl/feed/reviews/latest'), - (u'Recenzje użytkowników', u'http://www.filmweb.pl/feed/user-reviews/latest') + keep_only_tags = [dict(attrs={'class': ['newsHdr hdrWithAuthor ', 'reviewHdr', 'newsContent newsPage', 'newsContent', 'hdr hdr-mega']})] + feeds = [(u'Filmy', u'https://www.filmweb.pl/feed/news/category/film'), + (u'Seriale', u'https://www.filmweb.pl/feed/news/category/serial'), + (u'Box office', u'https://www.filmweb.pl/feed/news/category/boxoffice'), + (u'Telewizja', u'https://www.filmweb.pl/feed/news/category/tv'), + (u'Festiwale, nagrody i przeglądy', u'https://www.filmweb.pl/feed/news/category/festival'), + (u'Multimedia', u'https://www.filmweb.pl/feed/news/category/multimedia'), + (u'Dystrybucja dvd/blu-ray', u'https://www.filmweb.pl/feed/news/category/dvd'), + (u'Gry wideo', u'https://www.filmweb.pl/feed/news/category/game'), + (u'Różne', u'https://www.filmweb.pl/feed/news/category/other'), + (u'Recenzje redakcji', u'https://www.filmweb.pl/feed/reviews/latest'), + (u'Recenzje użytkowników', u'https://www.filmweb.pl/feed/user-reviews/latest') ] def preprocess_html(self, soup): for a in soup('a', href=True): - if 'http://' not in a['href'] and 'https://' not in a['href']: + if 'https://' not in a['href']: a['href'] = self.index + a['href'] return soup diff --git a/recipes/icons/film_web.png b/recipes/icons/film_web.png index 9e3dc044262029c67fc195c5038422707e88b39b..1791a862ba01ba74f4d91c68753ea1982eaf9173 100644 GIT binary patch literal 454 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!73?$#)eFPFdff`*(rm z&r#m@>t+9*;Qp{h^7kQuZ@Wc5ZIODjPWJmw@waQ`|NsA=usp04sEfBG$S;^VWbO{F zzmK9UjxGD%!jf|&4=BNy<3r0i(j-nwiVgXA(cpNVmf4x$Si^A;+txN6}-&DEk3 zGlXX_FJ+1qHz>$^aO=j-jn$0I=eOP5U7l}Q+t66}-R=BcrOl0Q=lmYre7aRQqwl-h z#@w614q<2R?bW@^nDcU#DX+lmNCy^%dJ`qjC4OPXK)0xtxJHzuB$lLFB^RXvDF!10 zLrYyl16@Ot5F;Zi6ALRN3vB}**`yv64Y~O#nQ4_+G?+j%tbMS-1*n0+)78&q Iol`;+0Qm%*{r~^~ delta 1683 zcmV;E25kAp1D6eu7=I830000_VmTv6#~GNX$ldjFjxp+4iG@0AayHg)iSlRC`cx?s#e>V$Kj4U$NTl) z=db4;^tbdFz$mM?74X>Z8UjRFoI#vsjw)}^qA}bMu2M1nW`Fyf{qv3#tV}=e(aFdE zWUni|21hu?`ZnziV#nLP>^KhnBl6FolLlDVj>-e^%Dy?^c&c>*nBUKJy;~Qzu0tRD z_To5j^m{)TB=VceDX>rIcffzOUryjT_EWDM&$CAy>@E9q2N>_oN+}y1HU72zUi#VR zQb{1;@Ye&(*ngnUa90x`GVUb;5tA5kpq6ZQ2PsE0&u5fo$a9J+2P^$6xmcqMZ|>2w z8lle| z4EHfF!0~C3uK>nVH-InfW&Od5ajCO_$t_z6dk;H$Mt|~Bc4&5(ZNo>|<=7i>kr|iA5kWYk_aL&PLsQPdW-JQ-=_7im@|TH{_6u>PJ$`n zU7NtP1bhr2MA!j6ixLdQD zp4B#0N?oC07c3?Y5PV{hvcz6&=PvT1ENY9R=18?Pkr?^-aw`0>x>XKoYk7&>N@!Nm)p#@mj09+${f;}h^sqJULSsW;BpZQqbn2SPk^YWx0TMtE4`lYkz;W4D>vL^8rF{+N*X~Z6~ra^RFjdS5VK} z24$ShtfD&8s2Wj=keo~SQjF1lgfy}A#YVCEN693di)FNSy%lJ(u0pSr)DUNJGGY=Q zC=Iz0mPlU!a>L0=&_0azoiv&p0A3u{zXf#NsX~DB&{jf2)Zt?m0-}UR8}sbGD;x@DVr9psT!V1N)3IT&PWGZ}u)!y=0!VEYZw*6MxdThDF|9 z*;-CNNO#P-O%%!jvD^152oP4y2Z=h9N5GIbBmk^Qk=cM${I>MH=>5)>sZ0wz5Q%x` zE;-Uinhd9`p*`;G)Vbl^5k%_3b&>f79m+nvQSBIWQ>ACt+G(~4lBk@$o%}M7Du>z5 z@7)%arcI;hC~`?8`WdegP=6!sE`V^K4vs+CoqhtmX6tf4IeDih$P}6#*!N2 zVn!&&)e{)v4j~vT=Xbnhw(#37Vf*!M*IZ2&__qhtJ(?PIl^@Dq=8brMf@;BC{Zr^~ zCEfrAO7{)GE0p~iT&Xd01vPJtjOctizopWKV`6I3QG>FUGG{WwG=KdHN3u*=Lz>jr z?zz^xg_e&@j0+QnWgHGz=M-HPoi|#{6AQKo0t`IS|E8b=a!BM{(0xC*9{A