mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-09-29 15:31:08 -04:00
48 lines
1.5 KiB
Python
48 lines
1.5 KiB
Python
from calibre.web.feeds.news import BasicNewsRecipe, classes
|
|
|
|
|
|
class GKT(BasicNewsRecipe):
|
|
title = u'General Knowledge Today'
|
|
language = 'en_IN'
|
|
__author__ = 'Kovid Goyal'
|
|
oldest_article = 7 # days
|
|
max_articles_per_feed = 20
|
|
|
|
no_stylesheets = True
|
|
no_javascript = True
|
|
auto_cleanup = True
|
|
|
|
def parse_index(self):
|
|
securl = 'https://www.gktoday.in/current-affairs/'
|
|
ans = {}
|
|
|
|
def p_tags(h1):
|
|
for sib in h1.next_siblings:
|
|
if sib.name == 'h1':
|
|
break
|
|
if sib.name == 'p':
|
|
yield sib
|
|
|
|
def find_cat(ps):
|
|
for p in ps:
|
|
for a in p.findAll('a', rel='tag'):
|
|
return self.tag_to_string(a)
|
|
|
|
for i in range(1, 6):
|
|
page = '' if i == 1 else 'page/' + str(i)
|
|
self.log('Trying:', securl + page)
|
|
soup = self.index_to_soup(securl + page)
|
|
container = soup.find(**classes('left_middle_content'))
|
|
for h1 in container.findAll('h1'):
|
|
title = self.tag_to_string(h1)
|
|
a = h1.find('a')
|
|
if a is None:
|
|
continue
|
|
url = a['href']
|
|
ps = tuple(p_tags(h1))
|
|
category = find_cat(ps) or 'Unknown'
|
|
ans.setdefault(category, []).append({
|
|
'title': title, 'url': url, 'description': self.tag_to_string(ps[0])})
|
|
self.log('\t' + title + ' ' + url)
|
|
return list(ans.items())
|