From 64359fbbe58e79fd8641bc9692f2d6af4c7fa709 Mon Sep 17 00:00:00 2001
From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com>
Date: Tue, 19 Sep 2023 12:17:22 +0530
Subject: [PATCH] Times of India Print edition

---
 recipes/icons/toi.png      | Bin 96 -> 305 bytes
 recipes/icons/toiprint.png | Bin 0 -> 305 bytes
 recipes/toiprint.recipe    | 107 +++++++++++++++++++++++++++++++++++++
 3 files changed, 107 insertions(+)
 create mode 100644 recipes/icons/toiprint.png
 create mode 100644 recipes/toiprint.recipe
diff --git a/recipes/icons/toi.png b/recipes/icons/toi.png
index c7f427aafc67c5374439f207a641399f0c7b1024..26da33fff69fee0cccae7ec62f9b67947043a694 100644
GIT binary patch
delta 289
zcmV++0p9*#u>z17e+UBr001a04^sdD0A^53R7K1I0L%aY%?1X_007AV0LuUX$N&KN
z-rn}w+UA&-|NsBxl$6d74%J6T_SV+yudnEzpYFK0-EC~tK0fim!O$rw{`2$DA0FLj
zX54Xc`sL-)BqY!l7U-g)<BN;ldwcuq>)2IQ6FrwN0001ne@R3^R2Y?wj@u4{FbqSJ
zI!PJbC~UyElwJP+Cs*K!y=;~uM@LF*qJ!AcZ@|1d?Y;p(pbS2XvKZ5)ioTB4TyJfM
zw!hP0p|GK@3}bJ>3n@;8*i=L3G6j$PBLkThXjk$IC<DVI&-1mNu#3g4vW)L0Y4j;a
nLvjVEg7@{08|nu+a$ouYx&Q?`f&TcC00000NkvXXu0mjfSjLHS

delta 78
zcmdnUlrTZUgOQnmfkD+aR|!Zl2l#}z{{R1f`4Q<8KrUlRkY6x^!?PP{Kn}O3i(?3f
cY_ftNkYm8W$o1Sq1ju6WboFyt=akR{03hKLo&W#<

diff --git a/recipes/icons/toiprint.png b/recipes/icons/toiprint.png
new file mode 100644
index 0000000000000000000000000000000000000000..26da33fff69fee0cccae7ec62f9b67947043a694
GIT binary patch
literal 305
zcmV-10nYx3P)<h;3K|Lk000e1NJLTq000mG000mO0{{R3C@l|D0001HP)t-s%mDz*
z007Mf2Fd^c$p8S$0077U0QlbC_S)L!n3w<m|K*gF&JPaNM@RP7*6gpZ=%1hNxVYVI
zY}7tJ@xj5+DJlN*^Uxn2-DhUpadG<P<<cZ1&=wZxqN3xAi{5*C`|Io2RaFx`moESS
z0ES6KK~xx(jgH$6gD?z3lR8Nm-6(9pxRhP~|0h@AiM?!=B1cC`Y@&nM(Qm-KI_<sz
zK%fjhi?SHgrHa0e)?9CGhqk}dV4<*~t_)*u!3!x)h1gU>=Q0J4{38RI7HC)U3Md1^
zBhT}-ov@3=tg?*nCTa93M?-Q2sDk(PkLm|Ga$ouYx&Q?`f&TcC00000NkvXXu0mjf
DNJWX9

literal 0
HcmV?d00001

diff --git a/recipes/toiprint.recipe b/recipes/toiprint.recipe
new file mode 100644
index 0000000000..35391dd02f
--- /dev/null
+++ b/recipes/toiprint.recipe
@@ -0,0 +1,107 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+import json
+from datetime import date
+
+
+# default edition is Delhi i.e., 'cap'
+# Hyderabad - 'toih'; Delhi - 'cap'; Mumbai - 'toim'; Banglore - 'toibgc'; 
+# There are others too, try to figure it out, visit toi epaper link.
+
+# for example, replace 'cap' with 'toih', if you want Hyderabad edition.
+le = 'cap' # local edition;
+
+
+date0 = date.today().strftime('%Y/%m/%d')
+date_ = date.today().strftime('%d_%m_%Y')
+
+# for older edition change both date0 and date_ below. 
+# date0 = '2023/09/15'
+# date_ = '15_09_2023'
+
+year, month, day = (int(x) for x in date0.split('/'))
+dt = date(year, month, day)
+
+index = 'https://asset.harnscloud.com/PublicationData/TOI/' + le + '/' + date0
+img_index = 'https://cmsimages.timesgroup.com/image-resizer?epaper_s3_path=PublicationData/TOI/' + le + '/' + date0
+
+class toiprint(BasicNewsRecipe):
+    title = 'TOI Print Edition'
+    language = 'en_IN'
+    __author__ = 'unkn0wn'
+    masthead_url = 'https://static.toiimg.com/photo/98333929.cms'
+    timefmt = ' [' + dt.strftime('%b %d, %Y') + ']'
+    description = 'Articles from the Times of India epaper, digital edition'
+
+    def __init__(self, *args, **kwargs):
+        BasicNewsRecipe.__init__(self, *args, **kwargs)
+        if self.output_profile.short_name.startswith('kindle'):
+            self.title = 'TOI Print Edition ' + dt.strftime('%b %d, %Y')
+
+    extra_css = '''
+        .sub { color:#5c5c5c; }
+        .auth { font-size:small; }
+        .cap { text-align:center; font-size:small; }
+        img { display:block; margin:0 auto; }
+    '''
+
+    def get_cover_url(self):
+        cover = 'https://asset.harnscloud.com/PublicationData/TOI/' + le + '/' \
+            + date0 + '/Page/' + date_ + '_001_' + le + '.jpg'
+        self.log('cover_url ', cover)
+        return cover
+
+    def parse_index(self):
+        url = index + '/DayIndex/' + date_ + '_' + le + '.json'
+        raw = self.index_to_soup(url, raw=True)
+        data = json.loads(raw)
+        if 'DigitalIndex' not in data:
+            raise ValueError(
+                    'The Times of India Newspaper is not published today.'
+                )
+        data = data['DigitalIndex']
+        feeds = []
+        for link in data:
+            sec_name = link['PageTitle']
+            self.log(sec_name)
+            articles = []
+            if 'Views' in link:
+                for sec in link['Views']:
+                    if 'Articles' in sec:
+                        for art in sec['Articles']:
+                            if not 'ArticleName' in art:
+                                continue
+                            link = art['ArticleName']
+                            page = link.split('_')[-3]
+                            url =  page + '-' + link
+                            title = art.get('ArticleTitle', 'unknown')
+                            desc = 'Page No.' + page + ' | ' + art.get('ColumnTitle', '')
+                            self.log('\t', title, '\n\t', desc, '\n\t\t', url)
+                            articles.append({'title': title, 'description':desc, 'url': url})
+            if articles:
+                feeds.append((sec_name, articles))
+        return feeds
+
+    def preprocess_raw_html(self, raw, *a):
+        data = json.loads(raw)
+        body = ''
+        for x in data:
+            if x['TagName'] == 'ArticleTitle':
+                body += '<h1>' + x['ZoneText'] + '</h1>'
+            if x['TagName'] == 'ColumnTitle':
+                body += '<p class="sub"><b>' + x['ZoneText'] + '</b></p>'
+            if x['TagName'] == 'Author':
+                body += '<p class="auth"><i>' + x['ZoneText'].replace('<br>', '') + '</i></p>'
+            if x['TagName'] in {'ArticleBody', 'Information'}:
+                body += x['ZoneText']
+            if x['TagName'] == 'LinkTo':
+                body += '<p><i>' + x['ZoneText'] + '</i></p>'
+            if x['TagName'] == 'Photographs':
+                pag = x['ZoneID'].split('_')[-4]
+                body += '<div><img src="{}"></div>'.format(img_index + '/Photographs/' + pag + '/' \
+                     + x['ZoneID'] + '.jpg&bucket=andre-toi-out&q=50')
+            if x['TagName'] == 'ImageCaption':
+                body += '<div class="cap">' + x['ZoneText'] + '</div><p>'
+        return '<html><body><div>' +  body.replace('<br>', '<p>') + '</div></body></html>'
+    
+    def print_version(self, url):
+        return index + '/ArticleZoneJson/' + url.replace('-', '/') + '.json'