mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
47 lines
1.4 KiB
Plaintext
47 lines
1.4 KiB
Plaintext
from calibre.web.feeds.news import BasicNewsRecipe
|
|
|
|
|
|
class TheFridayTimes(BasicNewsRecipe):
|
|
__author__ = 'Krittika Goyal, ireadtheinternet'
|
|
language = 'en_PK'
|
|
encoding = 'utf8'
|
|
version = 1.1
|
|
|
|
title = u'The Friday Times'
|
|
category = u'news, Pakistan'
|
|
description = u"Pakistan's First Independent Weekly Paper"
|
|
|
|
no_stylesheets = True
|
|
no_javascript = True
|
|
ignore_duplicate_articles = {'url'}
|
|
|
|
keep_only_tags = [
|
|
dict(name='div', attrs={'class': 'sidebar_content'}),
|
|
]
|
|
|
|
remove_tags = [
|
|
dict(name='p', attrs={'class': 'no-break'}),
|
|
dict(name='div', attrs={'class': 'related_posts'}),
|
|
dict(name='div', attrs={'id': 'respond'})
|
|
]
|
|
|
|
def parse_index(self):
|
|
toc_page = self.index_to_soup('http://www.thefridaytimes.com/tft/')
|
|
toc = toc_page.find(
|
|
'div', attrs={'class': 'sidebar_left_home_wrapper'})
|
|
|
|
articles = []
|
|
for story in toc.findAll('a'):
|
|
# skip the links with an image, they are repeated further down
|
|
if story.find('img') is not None:
|
|
continue
|
|
url = story['href']
|
|
# If no title, use url as title
|
|
title = story.get('title', url)
|
|
self.log('Found article:', story)
|
|
self.log('\t', url)
|
|
articles.append({'title': title, 'url': url,
|
|
'date': '', 'description': ''})
|
|
|
|
return [('Current Issue', articles)]
|