mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update General Knowledge Today
Fixes #1457724 [Enhancement Request](https://bugs.launchpad.net/calibre/+bug/1457724)
This commit is contained in:
parent
f22af669e8
commit
cdd376a4b4
@ -4,7 +4,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class Politics(BasicNewsRecipe):
|
class Politics(BasicNewsRecipe):
|
||||||
title = u'General Knowledge Today'
|
title = u'General Knowledge Today'
|
||||||
language = 'en_IN'
|
language = 'en_IN'
|
||||||
__author__ = 'Kanika G'
|
__author__ = 'Kovid Goyal'
|
||||||
oldest_article = 7 # days
|
oldest_article = 7 # days
|
||||||
max_articles_per_feed = 20
|
max_articles_per_feed = 20
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
@ -13,21 +13,26 @@ class Politics(BasicNewsRecipe):
|
|||||||
no_javascript = True
|
no_javascript = True
|
||||||
auto_cleanup = True
|
auto_cleanup = True
|
||||||
|
|
||||||
|
def parse_gkt_section(self, url):
|
||||||
|
root = self.index_to_soup(url, as_tree=True)
|
||||||
|
for a in root.xpath('//h1[@class="post-title"]/a[@href]'):
|
||||||
|
title = self.tag_to_string(a).strip()
|
||||||
|
url = a.get('href')
|
||||||
|
if title and url:
|
||||||
|
self.log('\tFound article:', title, 'at', url)
|
||||||
|
yield {'title':title, 'url':url}
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
soup = self.index_to_soup('http://www.gktoday.in/')
|
url = 'http://www.gktoday.in/'
|
||||||
|
root = self.index_to_soup(url, as_tree=True)
|
||||||
# Find TOC
|
ans = []
|
||||||
toc = soup.find('div', attrs={'class':'entry clearfix'})
|
for h3 in root.xpath('//h3[@class="widget-title" and contains(text(), "Current Affairs Category")]'):
|
||||||
articles = []
|
for a in h3.getparent().xpath('descendant::li/a[@href]'):
|
||||||
for li in toc.findAll('li'):
|
category = self.tag_to_string(a).strip()
|
||||||
a = li.find('a')
|
url = a.get('href')
|
||||||
info = self.tag_to_string(a)
|
self.log('Found section:', category)
|
||||||
url = a['href']
|
articles = list(self.parse_gkt_section(url)) + list(self.parse_gkt_section(url + '/page/2'))
|
||||||
desc = ''
|
if articles:
|
||||||
self.log('Found article:', info)
|
ans.append((category, articles))
|
||||||
self.log('\t', url)
|
break
|
||||||
self.log('\t', desc)
|
return ans
|
||||||
articles.append({'title':info, 'url':url, 'date':'',
|
|
||||||
'description':desc})
|
|
||||||
|
|
||||||
return [('Current Issue', articles)]
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user