mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Remove non-working recipe
This commit is contained in:
parent
0872338ae7
commit
3d6c8dd286
@ -1,102 +0,0 @@
|
|||||||
from datetime import date
|
|
||||||
import re
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class VanityFair(BasicNewsRecipe):
|
|
||||||
title = u"Vanity Fair"
|
|
||||||
description = 'Vanity Fair Magazine (U.S.)'
|
|
||||||
language = 'en'
|
|
||||||
__author__ = 'Barty'
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = False
|
|
||||||
auto_cleanup = False
|
|
||||||
timefmt = ' [%B %Y]'
|
|
||||||
oldest_article = 365
|
|
||||||
|
|
||||||
masthead_url = 'http://www.vanityfair.com/etc/designs/vanityfair/images/shell/print-logo.png'
|
|
||||||
|
|
||||||
INDEX = 'http://www.vanityfair.com'
|
|
||||||
CATEGORIES = [
|
|
||||||
# comment out categories you don't want
|
|
||||||
# (user friendly name, url suffix, max number of articles to load)
|
|
||||||
('Hollywood', 'hollywood', 10),
|
|
||||||
('Culture', 'culture', 10),
|
|
||||||
('Business', 'business', 10),
|
|
||||||
('Politics', 'politics', 10),
|
|
||||||
('Society', 'society', 10),
|
|
||||||
('Style', 'style', 10),
|
|
||||||
('VF Daily', 'online/daily', 10),
|
|
||||||
("James Wolcott's Blog", 'online/wolcott', 10),
|
|
||||||
("The Oscars", 'online/oscars', 10),
|
|
||||||
]
|
|
||||||
# set this to False if you don't want to put the first article
|
|
||||||
# that appears in each section to a "Featured" section
|
|
||||||
FEATURED_CAT = True
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
{'name': ['nav']},
|
|
||||||
{'class': re.compile(
|
|
||||||
r'_(header|rubric|share|subnav|leaderboard)|comments-count|ecom_placement')}
|
|
||||||
]
|
|
||||||
remove_tags_after = [{'class': 'cn_blogpost'}, {'id': 'wrapper'}]
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
self.cover_url = 'http://www.vanityfair.com/magazine/toc/contents-%s/_jcr_content/par/cn_contentwell/par-main/cn_pagination_contai/cn_image.size.cover_vanityfair_300.jpg' % ( # noqa
|
|
||||||
date.today().strftime('%Y%m'))
|
|
||||||
feeds = []
|
|
||||||
seen_urls = set()
|
|
||||||
features = []
|
|
||||||
|
|
||||||
for category in self.CATEGORIES:
|
|
||||||
|
|
||||||
(cat_name, tag, max_articles) = category
|
|
||||||
self.log('Reading category:', cat_name)
|
|
||||||
articles = []
|
|
||||||
|
|
||||||
page = "%s/%s" % (self.INDEX, tag)
|
|
||||||
soup = self.index_to_soup(page)
|
|
||||||
headers = soup.findAll(attrs={'class': 'headline '})
|
|
||||||
add_featured = self.FEATURED_CAT
|
|
||||||
|
|
||||||
for header in headers:
|
|
||||||
self.log(self.tag_to_string(header))
|
|
||||||
atags = header.findAll('a')
|
|
||||||
# if there's more than one a tag, it's some kind of list, skip
|
|
||||||
if not atags or len(atags) > 1:
|
|
||||||
continue
|
|
||||||
atag = atags[0]
|
|
||||||
url = atag['href']
|
|
||||||
if url.startswith('/'):
|
|
||||||
url = self.INDEX + url
|
|
||||||
if url in seen_urls:
|
|
||||||
continue
|
|
||||||
seen_urls.add(url)
|
|
||||||
title = self.tag_to_string(atag)
|
|
||||||
self.log('\tFound article:', title)
|
|
||||||
self.log('\t', url)
|
|
||||||
par = header.findParent('article') if tag.startswith(
|
|
||||||
'online/') else header.findParent('section')
|
|
||||||
if par is not None:
|
|
||||||
desc = par.find(attrs={'class': 'body '})
|
|
||||||
desc = self.tag_to_string(desc) if desc else ''
|
|
||||||
if add_featured:
|
|
||||||
features.append(
|
|
||||||
{'title': title, 'url': url, 'description': desc})
|
|
||||||
add_featured = False
|
|
||||||
else:
|
|
||||||
articles.append(
|
|
||||||
{'title': title, 'url': url, 'description': desc})
|
|
||||||
if len(articles) >= max_articles:
|
|
||||||
break
|
|
||||||
|
|
||||||
if articles:
|
|
||||||
feeds.append((cat_name, articles))
|
|
||||||
|
|
||||||
if features:
|
|
||||||
feeds.insert(0, ('Featured', features))
|
|
||||||
|
|
||||||
return feeds
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
return url.replace('.html', '.print')
|
|
Loading…
x
Reference in New Issue
Block a user