diff --git a/recipes/hindu_business_line_print_edition.recipe b/recipes/hindu_business_line_print_edition.recipe new file mode 100644 index 0000000000..2f77a977db --- /dev/null +++ b/recipes/hindu_business_line_print_edition.recipe @@ -0,0 +1,75 @@ +from calibre.web.feeds.news import BasicNewsRecipe, classes +import string + + +class BusinessLine(BasicNewsRecipe): + title = 'The Hindu BusinessLine | Print Edition' + __author__ = 'unkn0wn' + description = ( + 'The Hindu BusinessLine is known for its credibility, accuracy, in-depth analysis of markets and sober coverage' + ' of business news. BusinessLine reduces the daily grind of business to relevant, readable, byte-sized stories.' + ' The newspaper is extensively followed by the decision makers and change leaders from the world of business.' + ) + no_stylesheets = True + use_embedded_content = False + encoding = 'utf-8' + language = 'en_IN' + remove_attributes = ['height', 'width', 'padding-bottom'] + masthead_url = 'https://www.thehindubusinessline.com/theme/images/bl-online/bllogo.png' + ignore_duplicate_articles = {'title', 'url'} + + def get_cover_url(self): + soup = self.index_to_soup( + 'https://www.magzter.com/IN/THG-publishing-pvt-ltd/The-Hindu-Business-Line/Newspaper/' + ) + for citem in soup.findAll( + 'meta', content=lambda s: s and s.endswith('view/3.jpg') + ): + return citem['content'] + + keep_only_tags = [ + classes( + 'tp-title-inf bi-line leadtext lead-img-caption slide-moadal img-container' + ), + dict( + name='div', attrs={'id': lambda x: x and x.startswith('content-body-')} + ) + ] + + remove_tags = [ + classes( + 'swiper-button-prev left-arrow swiper-button-next right-arrow close cursor tagsBtm share-topic comment-rules vuukle-div paywallbox ' + ) + ] + + def parse_index(self): + soup = self.index_to_soup( + 'https://www.thehindubusinessline.com/todays-paper/' + ) + ans = self.bl_parse_index(soup) + return ans + + def bl_parse_index(self, soup): + feeds = [] + for section in soup.findAll(**classes('section-container')): + div = section.find_previous_sibling('div', **classes('section-header')) + a = div.find('a', **classes('section-list-heading')) + secname = string.capwords(self.tag_to_string(a).strip()) + self.log(secname) + articles = [] + for a in section.findAll('a', href=True): + url = a['href'] + title = self.tag_to_string(a) + articles.append({'title': title, 'url': url}) + self.log('\t', title) + self.log('\t\t', url) + if articles: + feeds.append((secname, articles)) + return feeds + + def preprocess_html(self, soup): + for image in soup.findAll('source', attrs={'srcset': True}): + image['src'] = image['srcset'] + for img in soup.findAll('img', attrs={'data-src-template': True}): + img['src'] = img['data-src-template'] + return soup