Update hindu.recipe

This commit is contained in:
unkn0w7n 2023-04-27 15:12:11 +05:30 committed by GitHub
parent 831fb0cf4d
commit 02e3ba7588
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,7 +1,7 @@
import json import json
import re import re
from collections import defaultdict from collections import defaultdict
from datetime import datetime from datetime import date
from calibre.web.feeds.news import BasicNewsRecipe, classes from calibre.web.feeds.news import BasicNewsRecipe, classes
@ -26,9 +26,10 @@ class TheHindu(BasicNewsRecipe):
extra_css = ''' extra_css = '''
.caption {font-size:small; text-align:center;} .caption {font-size:small; text-align:center;}
.author {font-size:small; font-weight:bold;} .author, .dateLine {font-size:small; font-weight:bold;}
.subhead, .subhead_lead {font-weight:bold;} .subhead, .subhead_lead {font-weight:bold;}
img {display:block; margin:0 auto;} img {display:block; margin:0 auto;}
.italic {font-style:italic; color:#202020;}
''' '''
ignore_duplicate_articles = {'url'} ignore_duplicate_articles = {'url'}
@ -52,20 +53,22 @@ class TheHindu(BasicNewsRecipe):
BasicNewsRecipe.__init__(self, *args, **kwargs) BasicNewsRecipe.__init__(self, *args, **kwargs)
if self.output_profile.short_name.startswith('kindle'): if self.output_profile.short_name.startswith('kindle'):
if not past_edition: if not past_edition:
self.title = 'The Hindu ' + datetime.today().strftime('%b %d, %Y') self.title = 'The Hindu ' + date.today().strftime('%b %d, %Y')
def parse_index(self): def parse_index(self):
global local_edition global local_edition
if local_edition or past_edition: if local_edition or past_edition:
if local_edition is None: if local_edition is None:
local_edition = 'th_chennai' local_edition = 'th_chennai'
today = datetime.today().strftime('%Y-%m-%d') today = date.today().strftime('%Y-%m-%d')
if past_edition: if past_edition:
today = past_edition today = past_edition
self.log('Downloading past edition of', local_edition + ' from ' + today) self.log('Downloading past edition of', local_edition + ' from ' + today)
url = absurl('/todays-paper/' + today + '/' + local_edition + '/') url = absurl('/todays-paper/' + today + '/' + local_edition + '/')
else: else:
url = 'https://www.thehindu.com/todays-paper/' url = 'https://www.thehindu.com/todays-paper/'
raw = self.index_to_soup(url, raw=True) raw = self.index_to_soup(url, raw=True)
soup = self.index_to_soup(raw) soup = self.index_to_soup(raw)
ans = self.hindu_parse_index(soup) ans = self.hindu_parse_index(soup)
@ -83,8 +86,8 @@ class TheHindu(BasicNewsRecipe):
if not self.tag_to_string(script).strip().startswith('let grouped_articles = {}'): if not self.tag_to_string(script).strip().startswith('let grouped_articles = {}'):
continue continue
if script is not None: if script is not None:
art = re.search(r'grouped_articles = ({\"[^<]+?]})', self.tag_to_string(script)) art = re.search(r'grouped_articles = ({\".*)', self.tag_to_string(script))
data = json.loads(art.group(1)) data = json.JSONDecoder().raw_decode(art.group(1))[0]
feeds_dict = defaultdict(list) feeds_dict = defaultdict(list)