diff --git a/recipes/the_friday_times.recipe b/recipes/the_friday_times.recipe index 2ca8443684..3114cebe4b 100644 --- a/recipes/the_friday_times.recipe +++ b/recipes/the_friday_times.recipe @@ -1,26 +1,44 @@ from calibre.web.feeds.news import BasicNewsRecipe class TheFridayTimes(BasicNewsRecipe): - title = u'The Friday Times' + __author__ = 'Krittika Goyal, ireadtheinternet' language = 'en_PK' - __author__ = 'Krittika Goyal' + encoding = 'utf8' + version = 1.1 + title = u'The Friday Times' + category = u'news, Pakistan' + description = u"Pakistan's First Independent Weekly Paper" - no_stylesheets = True - no_javascript = True - auto_cleanup = True + no_stylesheets = True + no_javascript = True + ignore_duplicate_articles = {'url'} + keep_only_tags = [ + dict(name='div', attrs={'class':'sidebar_content'}), + dict(name='div', attrs={'class':'comment_inner'}) + ] + + remove_tags = [ + dict(name='p', attrs={'class':'no-break'}), + dict(name='div', attrs={'class':'related_posts'}), + dict(name='div', attrs={'id':'respond'}) + ] def parse_index(self): - toc = self.index_to_soup('http://www.thefridaytimes.com/beta3/tft/index.php') + toc_page = self.index_to_soup('http://www.thefridaytimes.com/tft/') + toc = toc_page.find('div', attrs={'class':'sidebar_left_home_wrapper'}) + articles = [] - for story in toc.findAll('a', attrs={'class':'homemainlinks'}): - title = self.tag_to_string(story) - url = 'http://www.thefridaytimes.com/beta3/tft/' + story['href'] + for story in toc.findAll('a'): + # skip the links with an image, they are repeated further down + if story.find('img') is not None: + continue + url = story['href'] + # If no title, use url as title + title = story.get('title', url) self.log('Found article:', story) self.log('\t', url) - articles.append({'title':title, 'url':url, 'date':'', - 'description':''}) + articles.append({'title':title, 'url':url, 'date':'','description':''}) return [('Current Issue', articles)] -