Remove non-working recipe

This commit is contained in:
Kovid Goyal 2020-04-22 20:53:37 +05:30
parent 0872338ae7
commit 3d6c8dd286
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -1,102 +0,0 @@
from datetime import date
import re
from calibre.web.feeds.news import BasicNewsRecipe
class VanityFair(BasicNewsRecipe):
title = u"Vanity Fair"
description = 'Vanity Fair Magazine (U.S.)'
language = 'en'
__author__ = 'Barty'
max_articles_per_feed = 100
no_stylesheets = False
auto_cleanup = False
timefmt = ' [%B %Y]'
oldest_article = 365
masthead_url = 'http://www.vanityfair.com/etc/designs/vanityfair/images/shell/print-logo.png'
INDEX = 'http://www.vanityfair.com'
CATEGORIES = [
# comment out categories you don't want
# (user friendly name, url suffix, max number of articles to load)
('Hollywood', 'hollywood', 10),
('Culture', 'culture', 10),
('Business', 'business', 10),
('Politics', 'politics', 10),
('Society', 'society', 10),
('Style', 'style', 10),
('VF Daily', 'online/daily', 10),
("James Wolcott's Blog", 'online/wolcott', 10),
("The Oscars", 'online/oscars', 10),
]
# set this to False if you don't want to put the first article
# that appears in each section to a "Featured" section
FEATURED_CAT = True
remove_tags = [
{'name': ['nav']},
{'class': re.compile(
r'_(header|rubric|share|subnav|leaderboard)|comments-count|ecom_placement')}
]
remove_tags_after = [{'class': 'cn_blogpost'}, {'id': 'wrapper'}]
def parse_index(self):
self.cover_url = 'http://www.vanityfair.com/magazine/toc/contents-%s/_jcr_content/par/cn_contentwell/par-main/cn_pagination_contai/cn_image.size.cover_vanityfair_300.jpg' % ( # noqa
date.today().strftime('%Y%m'))
feeds = []
seen_urls = set()
features = []
for category in self.CATEGORIES:
(cat_name, tag, max_articles) = category
self.log('Reading category:', cat_name)
articles = []
page = "%s/%s" % (self.INDEX, tag)
soup = self.index_to_soup(page)
headers = soup.findAll(attrs={'class': 'headline '})
add_featured = self.FEATURED_CAT
for header in headers:
self.log(self.tag_to_string(header))
atags = header.findAll('a')
# if there's more than one a tag, it's some kind of list, skip
if not atags or len(atags) > 1:
continue
atag = atags[0]
url = atag['href']
if url.startswith('/'):
url = self.INDEX + url
if url in seen_urls:
continue
seen_urls.add(url)
title = self.tag_to_string(atag)
self.log('\tFound article:', title)
self.log('\t', url)
par = header.findParent('article') if tag.startswith(
'online/') else header.findParent('section')
if par is not None:
desc = par.find(attrs={'class': 'body '})
desc = self.tag_to_string(desc) if desc else ''
if add_featured:
features.append(
{'title': title, 'url': url, 'description': desc})
add_featured = False
else:
articles.append(
{'title': title, 'url': url, 'description': desc})
if len(articles) >= max_articles:
break
if articles:
feeds.append((cat_name, articles))
if features:
feeds.insert(0, ('Featured', features))
return feeds
def print_version(self, url):
return url.replace('.html', '.print')