From da9d8e996c6013815c40e3b0713d3afee6008588 Mon Sep 17 00:00:00 2001
From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com>
Date: Thu, 28 Dec 2023 13:57:11 +0530
Subject: [PATCH] Hindustan Times Print Edition recipe

---
 recipes/hindustan_times_print.recipe    | 113 ++++++++++++++++++++++++
 recipes/icons/hindustan_times_print.png | Bin 0 -> 1592 bytes
 2 files changed, 113 insertions(+)
 create mode 100644 recipes/hindustan_times_print.recipe
 create mode 100644 recipes/icons/hindustan_times_print.png

diff --git a/recipes/hindustan_times_print.recipe b/recipes/hindustan_times_print.recipe
new file mode 100644
index 0000000000..295aaf8660
--- /dev/null
+++ b/recipes/hindustan_times_print.recipe
@@ -0,0 +1,113 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+import json
+from datetime import date
+from collections import defaultdict
+
+# figure out your local_edition from the fetch news log of this recipe
+local_edition = 'Delhi'
+
+today = date.today().strftime('%d/%m/%Y')
+
+# for older edition, change today
+# today = '22/12/2023'
+
+day, month, year = (int(x) for x in today.split('/'))
+dt = date(year, month, day)
+today = today.replace('/', '%2F')
+
+index = 'https://epaper.hindustantimes.com'
+
+class ht(BasicNewsRecipe):
+    title = 'Hindustan Times Print Edition'
+    language = 'en_IN'
+    __author__ = 'unkn0wn'
+    masthead_url = 'https://www.htmedia.in/wp-content/uploads/2020/08/HT-dot-com-logo-product.png'
+    timefmt = ' [' + dt.strftime('%b %d, %Y') + ']'
+    description = 'Articles from the Hindustan Times epaper, digital edition'
+    encoding = 'utf-8'
+    delay = 1
+    ignore_duplicate_articles = {'title'}
+
+    def __init__(self, *args, **kwargs):
+        BasicNewsRecipe.__init__(self, *args, **kwargs)
+        if self.output_profile.short_name.startswith('kindle'):
+            self.title = 'HT Print Edition ' + dt.strftime('%b %d, %Y')
+
+    extra_css = '''
+        .cap { text-align:center; font-size:small; }
+        img { display:block; margin:0 auto; }
+    '''
+
+    def parse_index(self):
+
+        self.log(
+            '\n***\nif this recipe fails, report it on: '
+            'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n'
+        )
+
+        get_edition = index + '/Home/GetEditionSupplementHierarchy?EditionDate=' + today
+        edi_data = json.loads(self.index_to_soup(get_edition, raw=True))
+        cities = []
+        for edi in edi_data:
+            cities.append(edi['EditionName'])
+        self.log('## For your local_edition, modify this recipe to match your city from the names below\n(', ', '.join(cities), ')\n')
+        for edi in edi_data:
+            if edi['EditionName'] == local_edition:
+                edi_name = edi['EditionName']
+                edi_id = str(edi['EditionId'])
+        self.log('Downloading', edi_name, 'Edition')
+
+        url = index + '/Home/GetAllpages?editionid=' + edi_id + '&editiondate=' + today
+        main_data = json.loads(self.index_to_soup(url, raw=True))
+
+        feeds_dict = defaultdict(list)
+
+        for page in main_data:
+            page_no = page['PageNumber']
+            sec_name = page['NewsProPageTitle']
+            if sec_name == 'Full Page Ad':
+                continue
+            if sec_name.startswith('Front'):
+                self.cover_url = page['HighResolution']
+            art = index + '/Home/getingRectangleObject?pageid=' + str(page['PageId'])
+            self.log(sec_name, ' ', page_no)
+            art_data = json.loads(self.index_to_soup(art, raw=True))
+            for snaps in art_data:
+                section = sec_name
+                url = str(snaps['OrgId'])
+                title = ' '.join(snaps['StoryTitle'].split()[:15])
+                if not title:
+                    continue
+                desc = page_no
+                self.log('\t', title, ' ', desc)
+                feeds_dict[section].append({"title": title, "description": desc, "url": url})
+        return [(section, articles) for section, articles in feeds_dict.items()]
+
+
+    def preprocess_raw_html(self, raw, *a):
+        data = json.loads(raw)
+        body = ''
+        for x in data['StoryContent']:
+            if x['Headlines']:
+                if len(x['Headlines']) > 0:
+                    body += '<h1>' + x['Headlines'][0].replace('\n', ' ') + '</h1>'
+                for y in x['Headlines'][1:]:
+                    body += '<h4>' + y.replace('\n', ' ') + '</h4>'
+        if data['LinkPicture']:
+            for pics in data['LinkPicture']:
+                if pics['fullpathlinkpic']:
+                    body += '<div><img src="{}"></div>'.format(pics['fullpathlinkpic'])
+                if pics['caption']:
+                    body += '<div class="cap">' + pics['caption'] + '</div><p>'
+        for x in data['StoryContent']:
+            if x['Body']:
+                body += x['Body']
+        # if data['filepathstorypic']: # this gives you a snap image of the article from page
+        #     body += '<div><img src="{}"></div>'.format(data['filepathstorypic'].replace('\\', '/'))
+        return '<html><body><div>' + body + '</div></body></html>'
+
+    def print_version(self, url):
+        return index + '/User/ShowArticleView?OrgId=' + url
+
+    def populate_article_metadata(self, article, soup, first):
+        article.url = '***'
diff --git a/recipes/icons/hindustan_times_print.png b/recipes/icons/hindustan_times_print.png
new file mode 100644
index 0000000000000000000000000000000000000000..01ab8981a575302fe28a2a6a8fd3d506052c5378
GIT binary patch
literal 1592
zcmV-82FLk{P)<h;3K|Lk000e1NJLTq001BW001Be0ssI2{21+{00006bW%=J00000
z003^L2ax~(1?@>hK~#9!rB(%O<46~NB1@4h<^PYFnVFfHnVFfHZ<(1H%T2NklsOCz
zr;u%$GV^VkdhfHx+uSmZM%H*{-uvD>racdpig>lM#^Q-r=-0WlLQmON%CgXMz4ZI|
z%f$1kJ0Lm0k-B>I&Gf}zv?zx*S5UT%aviCOUu-VSrqBPND{o2>9w_%quc3*{Nav>f
zFzJ1yb&<xo699g(i6dOly6NvFbkP+DfWgVfuY4lu!5B~^&9P&#=ZPcsGGmZ3%=F?1
zjLAM_|9&c&f{>0;9_YknkO1bJNqc#RikG=hqx{IerQY{zpfQNZ815$fD<AXuAwI{Z
z(l~gHlgo9ecGf%gIgPT$DekDX%ETH|g*zb64?)QaTLp^a=;Et6;Ahz=-9ni*%Ctfv
z{HEAxoe+@eO@Ya<Qkn%5Bv?yS*)n0aU7c%(n}DsvocTL9VJYeT&y@z=s){^O8ho`f
za%P_U>9XJv&@6j?n)#-D-|YoH&||7Kjt|YXv;Faf!KR5q>=R4vb7i4>i~J83`c{SF
zw^r2AgykGD=J;NH6Xm$p*S2ooF%o0qOBLa)t?d`+J&@}Rt!u8Yad0&Gg{=-q*BFIh
z&(QYWMp6{<jSY-&SvFOkAcYZzg!B2Id_g4EKUC7vql>mr5@H=A2><{R0C;{?Yab{<
z4H;>ZSsUxbZ`*JJFqo0vAqd1UdbW+eToI;pGhY~?uO^T*jyMVdsW$ka5b&an)%o#m
zT!o@G*0liu$_@4mj)~%6B6+n*fYKv<LmTSaR9QA0!E!UaF?&b<ude7O+?V~EgAEXa
zi*xCyV@OIVX%)=zkIA;bUKL@&0uKb283lE=&DTAE&7slc)kXnk1TV0@v%q(gNyv(}
z<7kdQexBYl$KMEe#ccSdRnil$68*jatP%Q0!B5s#{_5JwUi(~GaD82?<S-kmg-!+F
z3IM?1o6Qs4TN?&OF3>v7ZGECB&JDzG&i5@0#<>hDuQ<2tb-&LiT4jP4!ZC0#ONdpp
z_A-FSOM`~SuC)5LRpI9Lk%SB|c8#o}56m_Aog%~_khM)R4*;&u_bv#=DO~|j=JGrO
z4z{>VTW&)KGZX`Opg3S_2XShQB**XqcnJIiCb^}+$E8`!`v!oPIOH`sj=pz9zckmv
zdLUp%*2Ox1bVlE+2*aw_8BUI!vBiC7ksoWg?4<^Q()R#xgmz~DT4_*zGHkTI3gI(u
z<g{qpNjev7V3D~1-~^ouF(}G`a(kij0@z@-zYzfQgK-s@WZPk<l!{;nIRQA2(Yn-`
zw(spxrlQA6gPGBGD4ox+IXe247v#BN8}`I0UdaaR1xv!s09X=ghBWsS`OnsSfWd(+
z3Lic8l2n_(9hS(jer~G+L3Ci`B#kpK)+y`3%8UWEoU+AroYskGf(<%H64&TGp3VV(
z_uxJGUSyHKJq<S-1jNKCS|_J<fi0LTfrhw1dO;dk8ENSnNnEOTBVu(s0KDA;$O7Av
zA{2;7TpDf$9x!?mNqF7uChr*<=LhCmXJ>yx6c+`Xz(;NHOKicM<W#{GL@3j8uFlmw
zv<(;=A}vDK0Nw!`AiP)Cw2b@GE9D_R!&=_bJH9h2ivJ|X*t)d22MSj~Ibeu#t&A&g
z;I{~TmaVwCdz`E<k^J0J3q3-0z(7DCLH%Tpo`AeB*OocHIARUlS(>Zs8=l~4K!)d_
zeVAA0p(}168qcT`mht()=gR_5mxZ1x3&N66H@uLge$c>ZNZ{C^90$ppSi|MvOAi(Y
zZp!!K0=o}v)K2sajCrcy-~jlnbYOzuzagVUyEe7M){F{YwgW0Tty$^fE0uzis%@`+
zf@%l#p){)CQM*;$J?`M9JlT$1meDi3^ucbWpF#a_*hL?<{dNjtxR<u&yTfZ+zMafi
z?Yr^%_)r?JOf$XoA!Bmjt)u5)TE{CH`9U?mw<yhT+FqL9McD|_eK)@c(7vmWkSgCF
q3z2u__T5S=^+=I&`>x(^`~EMoK)BNty{63o0000<MNUMnLSTY-%?>62

literal 0
HcmV?d00001