from calibre.web.feeds.news import BasicNewsRecipe class TheFridayTimes(BasicNewsRecipe): __author__ = 'Krittika Goyal, ireadtheinternet' language = 'en_PK' encoding = 'utf8' version = 1.1 title = u'The Friday Times' category = u'news, Pakistan' description = u"Pakistan's First Independent Weekly Paper" no_stylesheets = True no_javascript = True ignore_duplicate_articles = {'url'} keep_only_tags = [ dict(name='div', attrs={'class': 'sidebar_content'}), ] remove_tags = [ dict(name='p', attrs={'class': 'no-break'}), dict(name='div', attrs={'class': 'related_posts'}), dict(name='div', attrs={'id': 'respond'}) ] def parse_index(self): toc_page = self.index_to_soup('http://www.thefridaytimes.com/tft/') toc = toc_page.find( 'div', attrs={'class': 'sidebar_left_home_wrapper'}) articles = [] for story in toc.findAll('a'): # skip the links with an image, they are repeated further down if story.find('img') is not None: continue url = story['href'] # If no title, use url as title title = story.get('title', url) self.log('Found article:', story) self.log('\t', url) articles.append({'title': title, 'url': url, 'date': '', 'description': ''}) return [('Current Issue', articles)]