diff --git a/recipes/vanityfair.recipe b/recipes/vanityfair.recipe deleted file mode 100644 index ac12310979..0000000000 --- a/recipes/vanityfair.recipe +++ /dev/null @@ -1,102 +0,0 @@ -from datetime import date -import re -from calibre.web.feeds.news import BasicNewsRecipe - - -class VanityFair(BasicNewsRecipe): - title = u"Vanity Fair" - description = 'Vanity Fair Magazine (U.S.)' - language = 'en' - __author__ = 'Barty' - max_articles_per_feed = 100 - no_stylesheets = False - auto_cleanup = False - timefmt = ' [%B %Y]' - oldest_article = 365 - - masthead_url = 'http://www.vanityfair.com/etc/designs/vanityfair/images/shell/print-logo.png' - - INDEX = 'http://www.vanityfair.com' - CATEGORIES = [ - # comment out categories you don't want - # (user friendly name, url suffix, max number of articles to load) - ('Hollywood', 'hollywood', 10), - ('Culture', 'culture', 10), - ('Business', 'business', 10), - ('Politics', 'politics', 10), - ('Society', 'society', 10), - ('Style', 'style', 10), - ('VF Daily', 'online/daily', 10), - ("James Wolcott's Blog", 'online/wolcott', 10), - ("The Oscars", 'online/oscars', 10), - ] - # set this to False if you don't want to put the first article - # that appears in each section to a "Featured" section - FEATURED_CAT = True - - remove_tags = [ - {'name': ['nav']}, - {'class': re.compile( - r'_(header|rubric|share|subnav|leaderboard)|comments-count|ecom_placement')} - ] - remove_tags_after = [{'class': 'cn_blogpost'}, {'id': 'wrapper'}] - - def parse_index(self): - self.cover_url = 'http://www.vanityfair.com/magazine/toc/contents-%s/_jcr_content/par/cn_contentwell/par-main/cn_pagination_contai/cn_image.size.cover_vanityfair_300.jpg' % ( # noqa - date.today().strftime('%Y%m')) - feeds = [] - seen_urls = set() - features = [] - - for category in self.CATEGORIES: - - (cat_name, tag, max_articles) = category - self.log('Reading category:', cat_name) - articles = [] - - page = "%s/%s" % (self.INDEX, tag) - soup = self.index_to_soup(page) - headers = soup.findAll(attrs={'class': 'headline '}) - add_featured = self.FEATURED_CAT - - for header in headers: - self.log(self.tag_to_string(header)) - atags = header.findAll('a') - # if there's more than one a tag, it's some kind of list, skip - if not atags or len(atags) > 1: - continue - atag = atags[0] - url = atag['href'] - if url.startswith('/'): - url = self.INDEX + url - if url in seen_urls: - continue - seen_urls.add(url) - title = self.tag_to_string(atag) - self.log('\tFound article:', title) - self.log('\t', url) - par = header.findParent('article') if tag.startswith( - 'online/') else header.findParent('section') - if par is not None: - desc = par.find(attrs={'class': 'body '}) - desc = self.tag_to_string(desc) if desc else '' - if add_featured: - features.append( - {'title': title, 'url': url, 'description': desc}) - add_featured = False - else: - articles.append( - {'title': title, 'url': url, 'description': desc}) - if len(articles) >= max_articles: - break - - if articles: - feeds.append((cat_name, articles)) - - if features: - feeds.insert(0, ('Featured', features)) - - return feeds - - def print_version(self, url): - return url.replace('.html', '.print')