From da9d8e996c6013815c40e3b0713d3afee6008588 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Thu, 28 Dec 2023 13:57:11 +0530 Subject: [PATCH] Hindustan Times Print Edition recipe --- recipes/hindustan_times_print.recipe | 113 ++++++++++++++++++++++++ recipes/icons/hindustan_times_print.png | Bin 0 -> 1592 bytes 2 files changed, 113 insertions(+) create mode 100644 recipes/hindustan_times_print.recipe create mode 100644 recipes/icons/hindustan_times_print.png diff --git a/recipes/hindustan_times_print.recipe b/recipes/hindustan_times_print.recipe new file mode 100644 index 0000000000..295aaf8660 --- /dev/null +++ b/recipes/hindustan_times_print.recipe @@ -0,0 +1,113 @@ +from calibre.web.feeds.news import BasicNewsRecipe +import json +from datetime import date +from collections import defaultdict + +# figure out your local_edition from the fetch news log of this recipe +local_edition = 'Delhi' + +today = date.today().strftime('%d/%m/%Y') + +# for older edition, change today +# today = '22/12/2023' + +day, month, year = (int(x) for x in today.split('/')) +dt = date(year, month, day) +today = today.replace('/', '%2F') + +index = 'https://epaper.hindustantimes.com' + +class ht(BasicNewsRecipe): + title = 'Hindustan Times Print Edition' + language = 'en_IN' + __author__ = 'unkn0wn' + masthead_url = 'https://www.htmedia.in/wp-content/uploads/2020/08/HT-dot-com-logo-product.png' + timefmt = ' [' + dt.strftime('%b %d, %Y') + ']' + description = 'Articles from the Hindustan Times epaper, digital edition' + encoding = 'utf-8' + delay = 1 + ignore_duplicate_articles = {'title'} + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + if self.output_profile.short_name.startswith('kindle'): + self.title = 'HT Print Edition ' + dt.strftime('%b %d, %Y') + + extra_css = ''' + .cap { text-align:center; font-size:small; } + img { display:block; margin:0 auto; } + ''' + + def parse_index(self): + + self.log( + '\n***\nif this recipe fails, report it on: ' + 'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n' + ) + + get_edition = index + '/Home/GetEditionSupplementHierarchy?EditionDate=' + today + edi_data = json.loads(self.index_to_soup(get_edition, raw=True)) + cities = [] + for edi in edi_data: + cities.append(edi['EditionName']) + self.log('## For your local_edition, modify this recipe to match your city from the names below\n(', ', '.join(cities), ')\n') + for edi in edi_data: + if edi['EditionName'] == local_edition: + edi_name = edi['EditionName'] + edi_id = str(edi['EditionId']) + self.log('Downloading', edi_name, 'Edition') + + url = index + '/Home/GetAllpages?editionid=' + edi_id + '&editiondate=' + today + main_data = json.loads(self.index_to_soup(url, raw=True)) + + feeds_dict = defaultdict(list) + + for page in main_data: + page_no = page['PageNumber'] + sec_name = page['NewsProPageTitle'] + if sec_name == 'Full Page Ad': + continue + if sec_name.startswith('Front'): + self.cover_url = page['HighResolution'] + art = index + '/Home/getingRectangleObject?pageid=' + str(page['PageId']) + self.log(sec_name, ' ', page_no) + art_data = json.loads(self.index_to_soup(art, raw=True)) + for snaps in art_data: + section = sec_name + url = str(snaps['OrgId']) + title = ' '.join(snaps['StoryTitle'].split()[:15]) + if not title: + continue + desc = page_no + self.log('\t', title, ' ', desc) + feeds_dict[section].append({"title": title, "description": desc, "url": url}) + return [(section, articles) for section, articles in feeds_dict.items()] + + + def preprocess_raw_html(self, raw, *a): + data = json.loads(raw) + body = '' + for x in data['StoryContent']: + if x['Headlines']: + if len(x['Headlines']) > 0: + body += '

' + x['Headlines'][0].replace('\n', ' ') + '

' + for y in x['Headlines'][1:]: + body += '

' + y.replace('\n', ' ') + '

' + if data['LinkPicture']: + for pics in data['LinkPicture']: + if pics['fullpathlinkpic']: + body += '
'.format(pics['fullpathlinkpic']) + if pics['caption']: + body += '
' + pics['caption'] + '

' + for x in data['StoryContent']: + if x['Body']: + body += x['Body'] + # if data['filepathstorypic']: # this gives you a snap image of the article from page + # body += '

'.format(data['filepathstorypic'].replace('\\', '/')) + return '
' + body + '
' + + def print_version(self, url): + return index + '/User/ShowArticleView?OrgId=' + url + + def populate_article_metadata(self, article, soup, first): + article.url = '***' diff --git a/recipes/icons/hindustan_times_print.png b/recipes/icons/hindustan_times_print.png new file mode 100644 index 0000000000000000000000000000000000000000..01ab8981a575302fe28a2a6a8fd3d506052c5378 GIT binary patch literal 1592 zcmV-82FLk{P)hK~#9!rB(%O<46~NB1@4h<^PYFnVFfHnVFfHZ<(1H%T2NklsOCz zr;u%$GV^VkdhfHx+uSmZM%H*{-uvD>racdpig>lM#^Q-r=-0WlLQmON%CgXMz4ZI| z%f$1kJ0Lm0k-B>I&Gf}zv?zx*S5UT%aviCOUu-VSrqBPND{o2>9w_%quc3*{Nav>f zFzJ1yb&0;9_YknkO1bJNqc#RikG=hqx{IerQY{zpfQNZ815$fD9XJv&@6j?n)#-D-|YoH&||7Kjt|YXv;Faf!KR5q>=R4vb7i4>i~J83`c{SF zw^r2AgykGD=J;NH6Xm$p*S2ooF%o0qOBLa)t?d`+J&@}Rt!u8Yad0&Gg{=-q*BFIh z&(QYWMp6{mr5@H=A2><{R0C;{?Yab{< z4H;>ZSsUxbZ`*JJFqo0vAqd1UdbW+eToI;pGhY~?uO^T*jyMVdsW$ka5b&an)%o#m zT!o@G*0liu$_@4mj)~%6B6+n*fYKvv7ZGECB&JDzG&i5@0#<>hDuQ<2tb-&LiT4jP4!ZC0#ONdpp z_A-FSOM`~SuC)5LRpI9Lk%SB|c8#o}56m_Aog%~_khM)R4*;&u_bv#=DO~|j=JGrO z4z{>VTW&)KGZX`Opg3S_2XShQB**XqcnJIiCb^}+$E8`!`v!oPIOH`sj=pz9zckmv zdLUp%*2Ox1bVlE+2*aw_8BUI!vBiC7ksoWg?4<^Q()R#xgmz~DT4_*zGHkTI3gI(u zyx6c+`Xz(;NHOKicMZs8=l~4K!)d_ zeVAA0p(}168qcT`mht()=gR_5mxZ1x3&N66H@uLge$c>ZNZ{C^90$ppSi|MvOAi(Y zZp!!K0=o}v)K2sajCrcy-~jlnbYOzuzagVUyEe7M){F{YwgW0Tty$^fE0uzis%@`+ zf@%l#p){)CQM*;$J?`M9JlT$1meDi3^ucbWpF#a_*hL?<{dNjtxRx(^`~EMoK)BNty{63o000062 literal 0 HcmV?d00001