calibre/recipes/toiprint.recipe
Kovid Goyal f741b0a685
Update TOI Print Edition
Only gets articles stubs, full articles need a TOI+ subscription
2026-02-24 19:55:35 +05:30

69 lines
2.4 KiB
Python

from datetime import date
from calibre.web.feeds.news import BasicNewsRecipe, classes
# default edition is Delhi i.e., 'cap'
# Hyderabad - 'toih'; Delhi - 'cap'; Mumbai - 'toim'; Banglore - 'toibgc';
# Chennai - 'toich'; Chandigarh - 'toicgct'; Jaipur - 'toijc'; Kolkata - 'toikc';
# There are others too, try to figure it out, visit toi epaper link.
le = 'cap' # local edition;
date0 = date.today().strftime('%Y/%m/%d')
# for older edition change date0 below.
# date0 = '2023/09/15'
year, month, day = (int(x) for x in date0.split('/'))
dt = date(year, month, day)
date_ = dt.strftime('%d_%m_%Y')
img_index = 'https://cmsimages.timesgroup.com/image-resizer?epaper_s3_path=PublicationData/TOI/' + le + '/' + date0
class TOIPring(BasicNewsRecipe):
title = 'TOI Print Edition'
language = 'en_IN'
__author__ = 'Kovid Goyal'
masthead_url = 'https://static.toiimg.com/photo/98333929.cms'
timefmt = ' [' + dt.strftime('%b %d, %Y') + ']'
description = 'Articles from the Times of India epaper, digital edition'
encoding = 'utf-8'
remove_empty_feeds = True
keep_only_tags = [classes('printeditioncontentwrapper')]
remove_tags = [
classes('header-container popupWrapper footer_wrapper icon_share_wrap'),
{'id': 'blocker'},
{'name': 'style'},
]
extra_css = '''
.sub { color:#202020; }
.auth { font-size:small; font-weight:bold; color:#202020; }
.cap { text-align:center; font-size:small; }
img { display:block; margin:0 auto; }
.info { font-size:small; color:#404040; }
.lead { color:#404040; }
'''
def parse_index(self):
url = 'https://epaper.indiatimes.com/english-news-paper-today-toi-print-edition/'
url = 'file:///t/raw.html'
soup = self.index_to_soup(url)
ans = []
for sec in soup.find_all('section', attrs={'data-content':True}):
h1 = sec.find('h1')
section = self.tag_to_string(h1)
self.log(section)
articles = []
listing = sec.find_next_sibling(type='listing')
for a in listing.find_all('a', href=True):
url = a['href']
title = self.tag_to_string(a.find('h2'))
self.log(' ', title)
articles.append({'title': title, 'url':url})
if articles:
ans.append((section, articles))
return ans