calibre/recipes/the_friday_times.recipe

from calibre.web.feeds.news import BasicNewsRecipe


class TheFridayTimes(BasicNewsRecipe):
    __author__ = 'Krittika Goyal, ireadtheinternet'
    language = 'en_PK'
    encoding = 'utf8'
    version = 1.1

    title = u'The Friday Times'
    category = u'news, Pakistan'
    description = u"Pakistan's First Independent Weekly Paper"

    no_stylesheets = True
    no_javascript = True
    ignore_duplicate_articles = {'url'}

    keep_only_tags = [
        dict(name='div', attrs={'class': 'sidebar_content'}),
    ]

    remove_tags = [
        dict(name='p', attrs={'class': 'no-break'}),
        dict(name='div', attrs={'class': 'related_posts'}),
        dict(name='div', attrs={'id': 'respond'})
    ]

    def parse_index(self):
        toc_page = self.index_to_soup('http://www.thefridaytimes.com/tft/')
        toc = toc_page.find(
            'div', attrs={'class': 'sidebar_left_home_wrapper'})

        articles = []
        for story in toc.findAll('a'):
            # skip the links with an image, they are repeated further down
            if story.find('img') is not None:
                continue
            url = story['href']
            # If no title, use url as title
            title = story.get('title', url)
            self.log('Found article:', story)
            self.log('\t', url)
            articles.append({'title': title, 'url': url,
                             'date': '', 'description': ''})

        return [('Current Issue', articles)]