From 64359fbbe58e79fd8641bc9692f2d6af4c7fa709 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Tue, 19 Sep 2023 12:17:22 +0530 Subject: [PATCH] Times of India Print edition --- recipes/icons/toi.png | Bin 96 -> 305 bytes recipes/icons/toiprint.png | Bin 0 -> 305 bytes recipes/toiprint.recipe | 107 +++++++++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+) create mode 100644 recipes/icons/toiprint.png create mode 100644 recipes/toiprint.recipe diff --git a/recipes/icons/toi.png b/recipes/icons/toi.png index c7f427aafc67c5374439f207a641399f0c7b1024..26da33fff69fee0cccae7ec62f9b67947043a694 100644 GIT binary patch delta 289 zcmV++0p9*#u>z17e+UBr001a04^sdD0A^53R7K1I0L%aY%?1X_007AV0LuUX$N&KN z-rn}w+UA&-|NsBxl$6d74%J6T_SV+yudnEzpYFK0-EC~tK0fim!O$rw{`2$DA0FLj zX54Xc`sL-)BqY!l7U-g))2IQ6FrwN0001ne@R3^R2Y?wj@u4{FbqSJ zI!PJbC~UyElwJP+Cs*K!y=;~uM@LF*qJ!AcZ@|1d?Y;p(pbS2XvKZ5)ioTB4TyJfM zw!hP0p|GK@3}bJ>3n@;8*i=L3G6j$PBLkThXjk$IC=Q0J4{38RI7HC)U3Md1^ zBhT}-ov@3=tg?*nCTa93M?-Q2sDk(PkLm|Ga$ouYx&Q?`f&TcC00000NkvXXu0mjf DNJWX9 literal 0 HcmV?d00001 diff --git a/recipes/toiprint.recipe b/recipes/toiprint.recipe new file mode 100644 index 0000000000..35391dd02f --- /dev/null +++ b/recipes/toiprint.recipe @@ -0,0 +1,107 @@ +from calibre.web.feeds.news import BasicNewsRecipe +import json +from datetime import date + + +# default edition is Delhi i.e., 'cap' +# Hyderabad - 'toih'; Delhi - 'cap'; Mumbai - 'toim'; Banglore - 'toibgc'; +# There are others too, try to figure it out, visit toi epaper link. + +# for example, replace 'cap' with 'toih', if you want Hyderabad edition. +le = 'cap' # local edition; + + +date0 = date.today().strftime('%Y/%m/%d') +date_ = date.today().strftime('%d_%m_%Y') + +# for older edition change both date0 and date_ below. +# date0 = '2023/09/15' +# date_ = '15_09_2023' + +year, month, day = (int(x) for x in date0.split('/')) +dt = date(year, month, day) + +index = 'https://asset.harnscloud.com/PublicationData/TOI/' + le + '/' + date0 +img_index = 'https://cmsimages.timesgroup.com/image-resizer?epaper_s3_path=PublicationData/TOI/' + le + '/' + date0 + +class toiprint(BasicNewsRecipe): + title = 'TOI Print Edition' + language = 'en_IN' + __author__ = 'unkn0wn' + masthead_url = 'https://static.toiimg.com/photo/98333929.cms' + timefmt = ' [' + dt.strftime('%b %d, %Y') + ']' + description = 'Articles from the Times of India epaper, digital edition' + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + if self.output_profile.short_name.startswith('kindle'): + self.title = 'TOI Print Edition ' + dt.strftime('%b %d, %Y') + + extra_css = ''' + .sub { color:#5c5c5c; } + .auth { font-size:small; } + .cap { text-align:center; font-size:small; } + img { display:block; margin:0 auto; } + ''' + + def get_cover_url(self): + cover = 'https://asset.harnscloud.com/PublicationData/TOI/' + le + '/' \ + + date0 + '/Page/' + date_ + '_001_' + le + '.jpg' + self.log('cover_url ', cover) + return cover + + def parse_index(self): + url = index + '/DayIndex/' + date_ + '_' + le + '.json' + raw = self.index_to_soup(url, raw=True) + data = json.loads(raw) + if 'DigitalIndex' not in data: + raise ValueError( + 'The Times of India Newspaper is not published today.' + ) + data = data['DigitalIndex'] + feeds = [] + for link in data: + sec_name = link['PageTitle'] + self.log(sec_name) + articles = [] + if 'Views' in link: + for sec in link['Views']: + if 'Articles' in sec: + for art in sec['Articles']: + if not 'ArticleName' in art: + continue + link = art['ArticleName'] + page = link.split('_')[-3] + url = page + '-' + link + title = art.get('ArticleTitle', 'unknown') + desc = 'Page No.' + page + ' | ' + art.get('ColumnTitle', '') + self.log('\t', title, '\n\t', desc, '\n\t\t', url) + articles.append({'title': title, 'description':desc, 'url': url}) + if articles: + feeds.append((sec_name, articles)) + return feeds + + def preprocess_raw_html(self, raw, *a): + data = json.loads(raw) + body = '' + for x in data: + if x['TagName'] == 'ArticleTitle': + body += '

' + x['ZoneText'] + '

' + if x['TagName'] == 'ColumnTitle': + body += '

' + x['ZoneText'] + '

' + if x['TagName'] == 'Author': + body += '

' + x['ZoneText'].replace('
', '') + '

' + if x['TagName'] in {'ArticleBody', 'Information'}: + body += x['ZoneText'] + if x['TagName'] == 'LinkTo': + body += '

' + x['ZoneText'] + '

' + if x['TagName'] == 'Photographs': + pag = x['ZoneID'].split('_')[-4] + body += '
'.format(img_index + '/Photographs/' + pag + '/' \ + + x['ZoneID'] + '.jpg&bucket=andre-toi-out&q=50') + if x['TagName'] == 'ImageCaption': + body += '
' + x['ZoneText'] + '

' + return '

' + body.replace('
', '

') + '

' + + def print_version(self, url): + return index + '/ArticleZoneJson/' + url.replace('-', '/') + '.json'