From 37e21f78eeafada7ceb9fe448bde6fb9983f6a26 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Sat, 26 Aug 2023 13:15:29 +0530 Subject: [PATCH 1/2] Update theeconomictimes_india_print_edition.recipe --- .../theeconomictimes_india_print_edition.recipe | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/recipes/theeconomictimes_india_print_edition.recipe b/recipes/theeconomictimes_india_print_edition.recipe index 35e358b7e9..c610386a3e 100644 --- a/recipes/theeconomictimes_india_print_edition.recipe +++ b/recipes/theeconomictimes_india_print_edition.recipe @@ -22,8 +22,9 @@ class TheEconomicTimes(BasicNewsRecipe): masthead_url = 'https://upload.wikimedia.org/wikipedia/commons/9/98/The_Economic_Times_logo.svg' ignore_duplicate_articles = {'title', 'url'} extra_css = ''' - .summary {color:#404040; font-style:italic;} - time{font-size:small;} + .artByline {font-size:small;} + .artImg, .imgBox {font-size:small; color:#202020; text-align:center;} + img {display:block; margin:0 auto;} ''' def get_cover_url(self): @@ -45,13 +46,14 @@ class TheEconomicTimes(BasicNewsRecipe): keep_only_tags = [ dict(name='h1'), classes( - 'artByline artSyn artImg artText publisher publish_on slideshowPackage' + 'artByline pageContent' ), ] remove_tags = [ + dict(name='button'), classes( - 'story_title storyCollection shareBar sr_widget_free jsSrWidgetFree srwidgetfree_3' - ' sr_paid jsSrWidgetPaid ar_wrp arwd_ld_chk adBox custom_ad mgid orn_free_r bold' + 'story_title storyCollection shareBar sr_widget_free jsSrWidgetFree srwidgetfree_3 showWhatsMsg artSyn' + ' sr_paid jsSrWidgetPaid ar_wrp arwd_ld_chk adBox custom_ad mgid orn_free_r bold primeSWrapper external_widget' ), ] @@ -95,11 +97,8 @@ class TheEconomicTimes(BasicNewsRecipe): return feeds def preprocess_html(self, soup): - h2 = soup.find(**classes('summary')) - if h2: - h2.name = 'p' for image in soup.findAll('img', attrs={'src': True}): image['src'] = image['src'].replace("width-300", "width-640") for img in soup.findAll('img', attrs={'data-original': True}): - img['src'] = img['data-original'].replace('photo', 'thumb').replace('quality-100', 'quality-100,width-600,resizemode-4') + img['src'] = img['data-original'] return soup From 04638feeb9fa99a24e4d385d43e0f39d60d56113 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Sat, 26 Aug 2023 13:17:01 +0530 Subject: [PATCH 2/2] Bloomberg --- recipes/bloomberg-business-week.recipe | 7 ++++--- recipes/bloomberg.recipe | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/recipes/bloomberg-business-week.recipe b/recipes/bloomberg-business-week.recipe index 593a548bec..e9812e7157 100644 --- a/recipes/bloomberg-business-week.recipe +++ b/recipes/bloomberg-business-week.recipe @@ -176,15 +176,16 @@ class Bloomberg(BasicNewsRecipe): caption = '' + data['lede']['alt'] + '' if m: - time.sleep(10) + time.sleep(5) body = data['body'] else: body = '' body_data = data['body']['content'] for x in body_data: - pause = random.choice((0.25, 0.5, 0.75, 1)) - time.sleep(pause) body += get_contents(x) + pause = random.choice((4, 5, 6, 7, 8, 9)) + self.log('Delay: ', pause, ' seconds') + time.sleep(pause) return '' + cat + title + subhead + auth + lede + caption + '
' + body + '
' def preprocess_html(self, soup): diff --git a/recipes/bloomberg.recipe b/recipes/bloomberg.recipe index ac7e0f851e..b7ee45be05 100644 --- a/recipes/bloomberg.recipe +++ b/recipes/bloomberg.recipe @@ -170,15 +170,16 @@ class Bloomberg(BasicNewsRecipe): caption = '' + data['lede']['alt'] + '' if m: - time.sleep(3) + time.sleep(5) body = data['body'] elif m2: body = '' body_data = data['body']['content'] for x in body_data: - pause = random.choice((0.25, 0.5, 0.75, 1)) - time.sleep(pause) body += get_contents(x) + pause = random.choice((4, 5, 6, 7, 8, 9)) + self.log('Delay: ', pause, ' seconds') + time.sleep(pause) return '' + cat + title + subhead + auth + lede + caption + '
' + body + '
' def preprocess_html(self, soup):