From 4cd864cbc6efc3318feaac91aff457bf402f65c2 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Tue, 22 Aug 2023 11:24:33 +0530 Subject: [PATCH] firstpost.recipe --- recipes/firstpost.recipe | 82 ++++++++++++++++++++++++++++++++++++ recipes/icons/firstpost.png | Bin 0 -> 1051 bytes 2 files changed, 82 insertions(+) create mode 100644 recipes/firstpost.recipe create mode 100644 recipes/icons/firstpost.png diff --git a/recipes/firstpost.recipe b/recipes/firstpost.recipe new file mode 100644 index 0000000000..ef42c90e89 --- /dev/null +++ b/recipes/firstpost.recipe @@ -0,0 +1,82 @@ +from calibre.web.feeds.news import BasicNewsRecipe, classes +from calibre.ptempfile import PersistentTemporaryFile + +# Firstpost feeds mix sections into other feeds, like explainers end up in opinion feed and opinions end up in India feed. +# change google_feeds to True to fetch right sections. +google_feeds = False + +class firstpost(BasicNewsRecipe): + title = 'Firstpost' + __author__ = 'unkn0wn' + description = ( + 'Firstpost.com will serve as a trusted guide to the crush of news and ideas around you.' + ' With thoughtful analysis and fearless views our team of editors and writers will track' + ' news in India and the world and provide a perspective that is reflective of a changing dynamic.' + ) + no_stylesheets = True + use_embedded_content = False + encoding = 'utf-8' + language = 'en_IN' + remove_attributes = ['height', 'width', 'style'] + masthead_url = 'https://images.firstpost.com/wp-content/uploads/2016/03/FP-Logo.png?impolicy=website&width=600&height=60' + max_articles_per_feed = 25 + remove_empty_feeds = True + ignore_duplicate_articles = {'title', 'url'} + extra_css = ''' + .category-name, .author-info { font-size:small; color:#202020; } + .wp-caption-text { font-size:small; text-align:center; } + ''' + + keep_only_tags = [ + classes('article-sect') + ] + + remove_tags = [ + classes('art-rel-articles tags-wrap'), + dict(name='svg'), + ] + + feeds = [] + + sections = [ + 'india', 'politics', 'opinion', 'explainers', 'business', + 'world', 'web-stories', 'tech', 'artandculture', 'health', 'health-supplement', + # 'photos', 'entertainment', 'living', 'education', 'sports', 'firstcricket', + ] + if not google_feeds: + for sec in sections: + a = 'https://www.firstpost.com/rss/{}.xml' + feeds.append((sec.capitalize(), a.format(sec))) + else: + articles_are_obfuscated = True + + def get_obfuscated_article(self, url): + br = self.get_browser() + soup = self.index_to_soup(url) + link = soup.find('a', href=True) + skip_sections =[ # add sections you want to skip + '/video/', '/videos/', '/media/', '/vantage/' + ] + if any(x in link['href'] for x in skip_sections): + self.log('Aborting Article ', link['href']) + self.abort_article('skipping video links') + self.log('Downloading ', link['href']) + html = br.open(link['href']).read() + pt = PersistentTemporaryFile('.html') + pt.write(html) + pt.close() + return pt.name + + for sec in sections: + a = 'https://news.google.com/rss/search?q=when:27h+allinurl:firstpost.com{}&hl=en-IN&gl=IN&ceid=IN:en' + feeds.append((sec.capitalize(), a.format('%2F' + sec + '%2F'))) + # feeds.append(('Others', a.format(''))) + + def preprocess_html(self, soup): + if h2 := soup.find('h2', attrs={'class':'category-name'}): + h2.name = 'p' + if h := soup.find('h2', attrs={'class':'inner-copy'}): + h.name = 'p' + for img in soup.findAll('img', attrs={'data-src':True}): + img['src'] = img['data-src'] + return soup diff --git a/recipes/icons/firstpost.png b/recipes/icons/firstpost.png new file mode 100644 index 0000000000000000000000000000000000000000..693d6365d15604bfed512e088dbc6ac72ccc62df GIT binary patch literal 1051 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!73?$#)eFPFvNcITwWnidMV_;}#VPNVDNrxVIzuCVXD8iWJ?e4 zT%n*SKP@vSRiUJ^AXT9vw}64cqIYU&;3g$Qjz7;us#|yjJo=1hoxC5m_k8)ET#2tQ zSDcSpJIk=;!UdCw`~KHmod2W3c+zH{{J0#$jgv#e^^S#|U3*)txpMaM%b7-p@A)1M zie1I0X|-p?f$fp%x%;g6!ajy>U)5&L)b&7Mv(L>PCvMJO({R*LnlX)gdc^IUX47vh zc5pDuGX6B7^!|>W>1y_g3D!qB_s)F~G`laVLwV=R%t@zaOYlG0+~E}wW0li=fW`ax zD;{m{#_F^ET!Pbr=>P-VjtG5SyiUdt7*P?&$BN5riF7}{(N}$ z;nC={H=TEkSD$G4$a7;Ovx2g?dwr8&z6LkP#{V-|gB(_0;Mg|Lr__L@|MfmymP5;$ zZ=A?d|F%!uaRc9$Uz*QZ?$q6KiD<}?)>y8%T!^(HZjWF=%+C(Ww{6b{;Shg;Sm$LI#oo@E^UjWg<k4v#X{@Yrs{LFhgJ7YDEEt%gjY59Gp1BUS3mwGB3HB zC6CePe|Jq{-qi~stg2E)YrR&~-TIj+z`d?*rr)IfUT2n^$~|Z0#(sF#bpKN`%fjE8 z>)KC>2|mr1qw~SqCGg#rEw@_vew}GQA{$`(yDcafBqh6U-kC;rK?`&w}0IC^^rcK>f7o2KiAE^Y+tiE-#gIfWdJat`FOfG zhHzX@_F!gaWnUlq>) zj$;BLEd~-2vCa#6J_M<-o#1V9&}7h@C0Y2wvd152u4;*EL`h0wNvc(HQ7VvPFfuSQ z)-|xuH8Kh@FtswVv@$i*HZZUBQ;SOy zfjI%ECB{x#Ux9igL3)Do(@M${i&7aJQ}UBi6+Ckj(^G>|6H_V+Po{!$FoUP7pUXO@ GgeCwE{Fh_^ literal 0 HcmV?d00001