#!/usr/bin/env  python2
from __future__ import unicode_literals, division, absolute_import, print_function
__license__ = 'GPL v3'
__copyright__ = '2018, PJ Paul'
'''
Recipe for Arts and Letters Daily website
'''

from calibre.web.feeds.news import BasicNewsRecipe
import re
from datetime import date as dt
from datetime import timedelta
from datetime import datetime
from itertools import compress


class ALD(BasicNewsRecipe):
    title = 'Arts and Letters Daily'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
    recursions = 0
    ignore_duplicate_articles = {'url'}
    index = 'https://www.aldaily.com/alt/'
    cover_url = 'https://www.aldaily.com/static/images/header.gif'
    __author__ = 'https://github.com/pjpaulpj'
    language = 'en'
    encoding = 'utf-8'

    def parse_index(self):
        articles_note = []
        new_books = []
        essays = []
        feeds = []
        soup = self.index_to_soup(self.index)
        delta = timedelta(days=self.oldest_article)
        now = dt.today()
        oldest_date = now - delta

        # Extract a list of dates from the page.
        # Subset this out to the list of target dates for extraction.
        date_list = []
        for div in soup.findAll('div', attrs={'id': "dayheader"}):
            date_list.append(self.tag_to_string(div))
        date_list_clean = [re.sub(r'[^\w]', ' ', date) for date in date_list]
        date_list_bool = [
            datetime.strptime(date, '%b %d %Y').date() >= oldest_date
            for date in date_list_clean
        ]
        compress_date = list(compress(date_list, date_list_bool))

        # Process each paragraph one by one.
        # Stop when the text of the previous div is not in the target date list.
        for div in soup.findAll('div', attrs={'class': "mobile-front"}):
            for p in div.findAll('p'):
                if self.tag_to_string(p.findPreviousSibling('div')) in compress_date:
                    if p.find('a'):
                        title = self.tag_to_string(p)
                        link = p.find('a')['href']
                        if self.tag_to_string(p.findPreviousSibling('h3')
                                              ) == "Articles of Note":
                            articles_note.append({
                                'title': title,
                                'url': link,
                                'description': '',
                                'date': ''
                            })
                        elif self.tag_to_string(p.findPreviousSibling('h3')
                                                ) == "New Books":
                            new_books.append({
                                'title': title,
                                'url': link,
                                'description': '',
                                'date': ''
                            })
                        else:
                            essays.append({
                                'title': title,
                                'url': link,
                                'description': '',
                                'date': ''
                            })
                else:
                    break
        feeds.append(('Articles of Note', articles_note))
        feeds.append(('New Books', new_books))
        feeds.append(('Essays and Opinions', essays))
        return feeds