#!/usr/bin/env python2
# vim:fileencoding=utf-8
from calibre import random_user_agent
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag


def classes(classes):
    q = frozenset(classes.split(' '))
    return dict(attrs={
        'class': lambda x: x and frozenset(x.split()).intersection(q)})


def new_tag(soup, name, attrs=()):
    impl = getattr(soup, 'new_tag', None)
    if impl is not None:
        return impl(name, attrs=dict(attrs))
    return Tag(soup, name, attrs=attrs or None)


class TheIndependentNew(BasicNewsRecipe):

    title = u'The Independent'
    __author__ = 'Krittika Goyal'
    description             = 'The latest in UK News and World News from The \
                               Independent. Wide range of international and local news, sports \
                               news, commentary and opinion pieces.Independent News - Breaking news \
                               that matters. Your daily comprehensive news source - The \
                               Independent Newspaper'
    publisher = 'The Independent'
    oldest_article = 2.0
    ignore_duplicate_articles = {'title', 'url'}
    remove_empty_feeds = True
    category = 'news, UK'
    no_stylesheets = True
    use_embedded_content = False
    remove_empty_feeds = True
    language = 'en_GB'
    publication_type = 'newspaper'
    encoding = 'utf-8'
    compress_news_images = True

    keep_only_tags = [
        classes('headline sub-headline breadcrumb author publish-date hero-image body-content'),
    ]
    remove_tags = [
        classes('inline-related inline-readmore ad-wrapper icon-gallery i-gallery')
    ]
    remove_attributes = ['style']

    def get_browser(self, *a, **kw):
        # This site returns images in JPEG-XR format if the user agent is IE
        if not hasattr(self, 'non_ie_ua'):
            try:
                self.non_ie_ua = random_user_agent(allow_ie=False)
            except TypeError:
                self.non_ie_ua = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.111 Safari/537.36'
        kw['user_agent'] = self.non_ie_ua
        br = BasicNewsRecipe.get_browser(self, *a, **kw)
        return br

    def preprocess_html(self, soup):
        for img in soup.findAll('amp-img'):
            img.name = 'img'
            img['srcset'] = ''

        for div in soup.findAll(attrs={'class': 'full-gallery'}):
            imgs = {}
            for li in div.findAll('li', attrs={'data-gallery-item': True, 'data-original': True}):
                imgs[li['data-gallery-item']] = li['data-original']
                li.extract()
            for li in div.findAll('li', attrs={'data-gallery-legend': True}):
                src = imgs.get(li['data-gallery-legend'])
                if src is not None:
                    img = new_tag(soup, 'img')
                    img['src'] = src
                    img['style'] = 'display:block'
                    li.append(img)
        return soup

    feeds = [
        (u'News - UK',
         u'http://www.independent.co.uk/news/uk/rss'),
        (u'News - World',
         u'http://www.independent.co.uk/news/world/rss'),
        (u'News - Business',
         u'http://www.independent.co.uk/news/business/rss'),
        (u'News - People',
         u'http://www.independent.co.uk/news/people/rss'),
        (u'News - Science',
         u'http://www.independent.co.uk/news/science/rss'),
        (u'News - Media',
         u'http://www.independent.co.uk/news/media/rss'),
        (u'News - Education',
         u'http://www.independent.co.uk/news/education/rss'),
        (u'News - Obituaries',
         u'http://www.independent.co.uk/news/obituaries/rss'),
        (u'News - Corrections',
         u'http://www.independent.co.uk/news/corrections/rss'
         ),
        (u'Voices',
         u'http://www.independent.co.uk/voices/rss'
         ),
        (u'Environment',
         u'http://www.independent.co.uk/environment/rss'),
        (u'Sport - Athletics',
         u'http://www.independent.co.uk/sport/general/athletics/rss'
         ),
        (u'Sport - Cricket',
         u'http://www.independent.co.uk/sport/cricket/rss'),
        (u'Sport - Football',
         u'http://www.independent.co.uk/sport/football/rss'),
        (u'Sport - Golf',
         u'http://www.independent.co.uk/sport/golf/rss'),
        (u'Sport - Motor racing',
         u'http://www.independent.co.uk/sport/motor-racing/rss'
         ),
        (u'Sport - Olympics',
         u'http://www.independent.co.uk/sport/olympics/rss'),
        (u'Sport - Racing',
         u'http://www.independent.co.uk/sport/racing/rss'),
        (u'Sport - Rugby League',
         u'http://www.independent.co.uk/sport/general/rugby-league/rss'),
        (u'Sport - Rugby Union',
         u'http://www.independent.co.uk/sport/rugby/rugby-union/rss'
         ),
        (u'Sport - Sailing',
         u'http://www.independent.co.uk/sport/general/sailing/rss'
         ),
        (u'Sport - Tennis',
         u'http://www.independent.co.uk/sport/tennis/rss'),
        (u'Sport - Others',
         u'http://www.independent.co.uk/sport/general/others/rss'
         ),
        (u'Life & Style - Fashion',
         u'http://www.independent.co.uk/life-style/fashion/rss'
         ),
        (u'Life & Style -Food & Drink',
         u'http://www.independent.co.uk/life-style/food-and-drink/rss'
         ),
        (u'Life & Style - Health and Families',
         u'http://www.independent.co.uk/life-style/health-and-families/rss'
         ),
        (u'Life & Style - History',
         u'http://www.independent.co.uk/life-style/history/rss'
         ),
        (u'Life & Style - Gadgets & Tech',
         u'http://www.independent.co.uk/life-style/gadgets-and-tech/rss'
         ),
        (u'Life & Style - Motoring',
         u'http://www.independent.co.uk/life-style/motoring/rss'
         ),
        (u'Arts & Ents - Art',
         u'http://www.independent.co.uk/arts-entertainment/art/rss'
         ),
        (u'Arts & Ents - Architecture',
         u'http://www.independent.co.uk/arts-entertainment/architecture/rss'
         ),
        (u'Arts & Ents - Music',
         u'http://www.independent.co.uk/arts-entertainment/music/rss'
         ),
        (u'Arts & Ents - Classical',
         u'http://www.independent.co.uk/arts-entertainment/classical/rss'
         ),
        (u'Arts & Ents - Films',
         u'http://www.independent.co.uk/arts-entertainment/films/rss'
         ),
        (u'Arts & Ents - TV',
         u'http://www.independent.co.uk/arts-entertainment/tv/rss'
         ),
        (u'Arts & Ents - Theatre and Dance',
         u'http://www.independent.co.uk/arts-entertainment/theatre-dance/rss'
         ),
        (u'Arts & Ents - Comedy',
         u'http://www.independent.co.uk/arts-entertainment/comedy/rss'
         ),
        (u'Arts & Ents - Books',
         u'http://www.independent.co.uk/arts-entertainment/books/rss'
         ),
        (u'Travel', u'http://www.independent.co.uk/travel/rss'
         ),
        (u'Money', u'http://www.independent.co.uk/money/rss'),
        (u'IndyBest',
         u'http://www.independent.co.uk/extras/indybest/rss'),
    ]