from datetime import date
import re
from calibre.web.feeds.news import BasicNewsRecipe

class VanityFair(BasicNewsRecipe):
	title          = u"Vanity Fair"
	description    = 'Vanity Fair Magazine (U.S.)'
	language       = 'en'
	__author__     = 'Barty'
	max_articles_per_feed = 100
	no_stylesheets = False
	auto_cleanup   = False
	timefmt        = ' [%B %Y]'
	oldest_article = 365

	masthead_url   = 'http://www.vanityfair.com/etc/designs/vanityfair/images/shell/print-logo.png'

	INDEX          = 'http://www.vanityfair.com'
	CATEGORIES     = [
		# comment out categories you don't want
		# (user friendly name, url suffix, max number of articles to load)
		('Hollywood','hollywood',10),
		('Culture','culture',10),
		('Business','business',10),
		('Politics','politics',10),
		('Society','society',10),
		('Style','style',10),
		('VF Daily','online/daily',10),
		("James Wolcott's Blog",'online/wolcott',10),
		("The Oscars",'online/oscars',10),
		]
	# set this to False if you don't want to put the first article
	# that appears in each section to a "Featured" section
	FEATURED_CAT   = True


	remove_tags    = [
		{'name':['nav']},
		{'class':re.compile(r'_(header|rubric|share|subnav|leaderboard)|comments-count|ecom_placement')}
		]
	remove_tags_after = [{'class':'cn_blogpost'},{'id':'wrapper'}]

	def parse_index(self):
		self.cover_url = 'http://www.vanityfair.com/magazine/toc/contents-%s/_jcr_content/par/cn_contentwell/par-main/cn_pagination_contai/cn_image.size.cover_vanityfair_300.jpg' % (date.today().strftime('%Y%m'))
		feeds = []
		seen_urls = set([])
		features = []

		for category in self.CATEGORIES:

			(cat_name, tag, max_articles) = category
			self.log('Reading category:', cat_name)
			articles = []

			page = "%s/%s" % (self.INDEX, tag)
			soup = self.index_to_soup(page)
			headers = soup.findAll(attrs={'class':'headline '})
			add_featured = self.FEATURED_CAT

			for header in headers:
				self.log(self.tag_to_string(header))
				atags = header.findAll('a')
				# if there's more than one a tag, it's some kind of list, skip
				if not atags or len(atags)>1:
					continue
				atag = atags[0]
				url = atag['href']
				if url.startswith('/'):
					url = self.INDEX + url
				if url in seen_urls:
					continue
				seen_urls.add(url)
				title = self.tag_to_string(atag)
				self.log('\tFound article:', title)
				self.log('\t', url)
				par = header.findParent('article') if tag.startswith('online/') else header.findParent('section')
				if par is not None:
					desc = par.find(attrs={'class':'body '})
				desc = self.tag_to_string(desc) if desc else ''
				#self.log('\t', desc)
				if add_featured:
					features.append({'title':title,'url':url,'description':desc})
					add_featured = False
				else:
					articles.append({'title':title,'url':url,'description':desc})
					if len(articles) >= max_articles:
						break

			if articles:
				feeds.append((cat_name, articles))

		if features:
			feeds.insert(0,('Featured', features))

		return feeds

	def print_version(self, url):
		return url.replace('.html', '.print')