Update The Friday Times

This commit is contained in:
Kovid Goyal 2014-11-25 09:05:32 +05:30
parent 9edef381e8
commit 9575a482a7

View File

@ -1,26 +1,44 @@
from calibre.web.feeds.news import BasicNewsRecipe
class TheFridayTimes(BasicNewsRecipe):
title = u'The Friday Times'
__author__ = 'Krittika Goyal, ireadtheinternet'
language = 'en_PK'
__author__ = 'Krittika Goyal'
encoding = 'utf8'
version = 1.1
title = u'The Friday Times'
category = u'news, Pakistan'
description = u"Pakistan's First Independent Weekly Paper"
no_stylesheets = True
no_javascript = True
auto_cleanup = True
no_stylesheets = True
no_javascript = True
ignore_duplicate_articles = {'url'}
keep_only_tags = [
dict(name='div', attrs={'class':'sidebar_content'}),
dict(name='div', attrs={'class':'comment_inner'})
]
remove_tags = [
dict(name='p', attrs={'class':'no-break'}),
dict(name='div', attrs={'class':'related_posts'}),
dict(name='div', attrs={'id':'respond'})
]
def parse_index(self):
toc = self.index_to_soup('http://www.thefridaytimes.com/beta3/tft/index.php')
toc_page = self.index_to_soup('http://www.thefridaytimes.com/tft/')
toc = toc_page.find('div', attrs={'class':'sidebar_left_home_wrapper'})
articles = []
for story in toc.findAll('a', attrs={'class':'homemainlinks'}):
title = self.tag_to_string(story)
url = 'http://www.thefridaytimes.com/beta3/tft/' + story['href']
for story in toc.findAll('a'):
# skip the links with an image, they are repeated further down
if story.find('img') is not None:
continue
url = story['href']
# If no title, use url as title
title = story.get('title', url)
self.log('Found article:', story)
self.log('\t', url)
articles.append({'title':title, 'url':url, 'date':'',
'description':''})
articles.append({'title':title, 'url':url, 'date':'','description':''})
return [('Current Issue', articles)]