From 9305014fd284fe98342c4baf3e021644f5d0503f Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Sat, 2 Aug 2025 19:09:28 +0530 Subject: [PATCH] Update press_information_bureau.recipe --- recipes/press_information_bureau.recipe | 27 +++++++++++++------------ 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/recipes/press_information_bureau.recipe b/recipes/press_information_bureau.recipe index 1558fb82e1..c5667e7e5c 100644 --- a/recipes/press_information_bureau.recipe +++ b/recipes/press_information_bureau.recipe @@ -1,30 +1,31 @@ +#!/usr/bin/env python from calibre.web.feeds.news import BasicNewsRecipe, classes class PIB(BasicNewsRecipe): - title = u'Press Information Bureau' + title = 'Press Information Bureau' language = 'en_IN' __author__ = 'unkn0wn' no_stylesheets = True use_embedded_content = False - remove_attributes = ['style','height','width'] + remove_attributes = ['style', 'height', 'width'] ignore_duplicate_articles = {'url'} - description = ('The Press Information Bureau (PIB) is the nodal agency of the Government of India' - ' to disseminate information to the print and electronic media on government policies,' - ' programmes, initiatives and achievements. Best downloaded at the end of the day!') + description = ( + 'The Press Information Bureau (PIB) is the nodal agency of the Government of India' + ' to disseminate information to the print and electronic media on government policies,' + ' programmes, initiatives and achievements. Best downloaded at the end of the day!' + ) extra_css = ''' #ltrSubtitle{color:#404040;} blockquote{color:#404040;} .ReleaseDateSubHeaddateTime{font-style:italic; font-size:small;} ''' - masthead_url = 'https://tse3.mm.bing.net/th?id=OIP.4QE8KPl1dZ3_BoR3X92aqgHaIH' - keep_only_tags = [ - classes('innner-page-main-about-us-content-right-part') - ] - remove_tags = [ - classes('ReleaseLang log_oo') - ] + masthead_url = 'https://tse3.mm.bing.net/th?id=OIP.4QE8KPl1dZ3_BoR3X92aqgHaIH' + cover_url = 'https://static.pib.gov.in/WriteReadData/specificdocs/photo/2024/jun/ph2024624343601.jpg' + + keep_only_tags = [classes('innner-page-main-about-us-content-right-part')] + remove_tags = [classes('ReleaseLang log_oo')] def parse_index(self): soup = self.index_to_soup('https://pib.gov.in/Allrel.aspx') @@ -37,7 +38,7 @@ class PIB(BasicNewsRecipe): for a in div.findAll('a', href=True): url = a['href'] if url.startswith('/'): - url = 'https://pib.gov.in' + url + url = 'https://pib.gov.in' + url title = self.tag_to_string(a) self.log('\t', title, '\n\t\t', url) articles.append({'title': title, 'url': url})