This commit is contained in:
Kovid Goyal 2023-08-27 07:58:32 +05:30
commit fd25c117c5
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
3 changed files with 16 additions and 15 deletions

View File

@ -176,15 +176,16 @@ class Bloomberg(BasicNewsRecipe):
caption = '<span class="cap">' + data['lede']['alt'] + '</span>' caption = '<span class="cap">' + data['lede']['alt'] + '</span>'
if m: if m:
time.sleep(10) time.sleep(5)
body = data['body'] body = data['body']
else: else:
body = '' body = ''
body_data = data['body']['content'] body_data = data['body']['content']
for x in body_data: for x in body_data:
pause = random.choice((0.25, 0.5, 0.75, 1))
time.sleep(pause)
body += get_contents(x) body += get_contents(x)
pause = random.choice((4, 5, 6, 7, 8, 9))
self.log('Delay: ', pause, ' seconds')
time.sleep(pause)
return '<html><body>' + cat + title + subhead + auth + lede + caption + '<div>' + body + '</div></body></html>' return '<html><body>' + cat + title + subhead + auth + lede + caption + '<div>' + body + '</div></body></html>'
def preprocess_html(self, soup): def preprocess_html(self, soup):

View File

@ -170,15 +170,16 @@ class Bloomberg(BasicNewsRecipe):
caption = '<span class="cap">' + data['lede']['alt'] + '</span>' caption = '<span class="cap">' + data['lede']['alt'] + '</span>'
if m: if m:
time.sleep(3) time.sleep(5)
body = data['body'] body = data['body']
elif m2: elif m2:
body = '' body = ''
body_data = data['body']['content'] body_data = data['body']['content']
for x in body_data: for x in body_data:
pause = random.choice((0.25, 0.5, 0.75, 1))
time.sleep(pause)
body += get_contents(x) body += get_contents(x)
pause = random.choice((4, 5, 6, 7, 8, 9))
self.log('Delay: ', pause, ' seconds')
time.sleep(pause)
return '<html><body>' + cat + title + subhead + auth + lede + caption + '<div>' + body + '</div></body></html>' return '<html><body>' + cat + title + subhead + auth + lede + caption + '<div>' + body + '</div></body></html>'
def preprocess_html(self, soup): def preprocess_html(self, soup):

View File

@ -22,8 +22,9 @@ class TheEconomicTimes(BasicNewsRecipe):
masthead_url = 'https://upload.wikimedia.org/wikipedia/commons/9/98/The_Economic_Times_logo.svg' masthead_url = 'https://upload.wikimedia.org/wikipedia/commons/9/98/The_Economic_Times_logo.svg'
ignore_duplicate_articles = {'title', 'url'} ignore_duplicate_articles = {'title', 'url'}
extra_css = ''' extra_css = '''
.summary {color:#404040; font-style:italic;} .artByline {font-size:small;}
time{font-size:small;} .artImg, .imgBox {font-size:small; color:#202020; text-align:center;}
img {display:block; margin:0 auto;}
''' '''
def get_cover_url(self): def get_cover_url(self):
@ -45,13 +46,14 @@ class TheEconomicTimes(BasicNewsRecipe):
keep_only_tags = [ keep_only_tags = [
dict(name='h1'), dict(name='h1'),
classes( classes(
'artByline artSyn artImg artText publisher publish_on slideshowPackage' 'artByline pageContent'
), ),
] ]
remove_tags = [ remove_tags = [
dict(name='button'),
classes( classes(
'story_title storyCollection shareBar sr_widget_free jsSrWidgetFree srwidgetfree_3' 'story_title storyCollection shareBar sr_widget_free jsSrWidgetFree srwidgetfree_3 showWhatsMsg artSyn'
' sr_paid jsSrWidgetPaid ar_wrp arwd_ld_chk adBox custom_ad mgid orn_free_r bold' ' sr_paid jsSrWidgetPaid ar_wrp arwd_ld_chk adBox custom_ad mgid orn_free_r bold primeSWrapper external_widget'
), ),
] ]
@ -95,11 +97,8 @@ class TheEconomicTimes(BasicNewsRecipe):
return feeds return feeds
def preprocess_html(self, soup): def preprocess_html(self, soup):
h2 = soup.find(**classes('summary'))
if h2:
h2.name = 'p'
for image in soup.findAll('img', attrs={'src': True}): for image in soup.findAll('img', attrs={'src': True}):
image['src'] = image['src'].replace("width-300", "width-640") image['src'] = image['src'].replace("width-300", "width-640")
for img in soup.findAll('img', attrs={'data-original': True}): for img in soup.findAll('img', attrs={'data-original': True}):
img['src'] = img['data-original'].replace('photo', 'thumb').replace('quality-100', 'quality-100,width-600,resizemode-4') img['src'] = img['data-original']
return soup return soup