from calibre.web.feeds.news import BasicNewsRecipe


class TheNewsRecipe(BasicNewsRecipe):
    __license__ = 'GPL v3'
    __author__ = 'kwetal'
    language = 'en_PK'
    version = 1

    title = u'The News'
    publisher = u'Jang Group'
    category = u'News, Pakistan'
    description = u'English Newspaper from Pakistan'

    use_embedded_content = False
    remove_empty_feeds = True
    oldest_article = 2
    max_articles_per_feed = 100

    no_stylesheets = True
    remove_javascript = True
    encoding = 'iso-8859-1'

    remove_tags = []
    remove_tags.append(dict(name='img', attrs={'src': 'images/thenews.gif'}))
    remove_tags.append(dict(name='img', attrs={'src': 'images/shim.gif'}))

    # Feeds from http://thenews.com.pk/rss.asp
    feeds = []
    feeds.append(
        (u'Latest Stories', u'http://www.thenews.com.pk/rss/thenews_updates.xml'))
    feeds.append(
        (u'Top Stories', u'http://www.thenews.com.pk/rss/thenews_topstories.xml'))
    feeds.append(
        (u'World News', u'http://www.thenews.com.pk/rss/thenews_world.xml'))
    feeds.append(
        (u'National News', u'http://www.thenews.com.pk/rss/thenews_national.xml'))
    feeds.append(
        (u'Business News', u'http://www.thenews.com.pk/rss/thenews_business.xml'))
    feeds.append(
        (u'Karachi News', u'http://www.thenews.com.pk/rss/thenews_karachi.xml'))
    feeds.append(
        (u'Lahore News', u'http://www.thenews.com.pk/rss/thenews_lahore.xml'))
    feeds.append(
        (u'Islamabad News', u'http://www.thenews.com.pk/rss/thenews_islamabad.xml'))
    feeds.append(
        (u'Peshawar News', u'http://www.thenews.com.pk/rss/thenews_peshawar.xml'))
    feeds.append(
        (u'Editorial', u'http://www.thenews.com.pk/rss/thenews_editorial.xml'))
    feeds.append(
        (u'Opinion', u'http://www.thenews.com.pk/rss/thenews_opinion.xml'))
    feeds.append(
        (u'Sports News', u'http://www.thenews.com.pk/rss/thenews_sports.xml'))
    feeds.append(
        (u'Newspost', u'http://www.thenews.com.pk/rss/thenews_newspost.xml'))

    conversion_options = {'comments': description, 'tags': category, 'language': 'en',
                          'publisher': publisher, 'linearize_tables': True}

    extra_css = '''
                body{font-family:verdana,arial,helvetica,geneva,sans-serif;}
                .heading_txt {font-size: x-large; font-weight: bold; text-align: left;}
                .small_txt {text-align: left;}
                .dateline {font-size: x-small; color: #696969; margin-top: 1em; margin-bottom: 1em}
                '''

    def print_version(self, url):
        ignore, sep, main = url.rpartition('/')

        if main.startswith('updates.asp'):
            return url.replace('updates.asp', 'print.asp')
        elif main.startswith('top_story_detail.asp'):
            return url.replace('top_story_detail.asp', 'print3.asp')
        elif main.startswith('daily_detail.asp'):
            return url.replace('daily_detail.asp', 'print1.asp')
        else:
            return None

    def preprocess_html(self, soup):
        for tr in soup.findAll('tr', attrs={'bgcolor': True}):
            del tr['bgcolor']

        td = soup.find('td', attrs={'class': 'small_txt', 'height': '20'})
        if td:
            del td['height']
            td['class'] = 'dateline'

        return soup