mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Vanity Fair by Barty
This commit is contained in:
parent
75be93397b
commit
23caca5f47
98
recipes/vanityfair.recipe
Normal file
98
recipes/vanityfair.recipe
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
from datetime import date
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class VanityFair(BasicNewsRecipe):
|
||||||
|
title = u"Vanity Fair"
|
||||||
|
description = 'Vanity Fair Magazine (U.S.)'
|
||||||
|
language = 'en'
|
||||||
|
__author__ = 'Barty'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = False
|
||||||
|
auto_cleanup = False
|
||||||
|
timefmt = ' [%B %Y]'
|
||||||
|
oldest_article = 365
|
||||||
|
|
||||||
|
masthead_url = 'http://www.vanityfair.com/etc/designs/vanityfair/images/shell/print-logo.png'
|
||||||
|
|
||||||
|
INDEX = 'http://www.vanityfair.com'
|
||||||
|
CATEGORIES = [
|
||||||
|
# comment out categories you don't want
|
||||||
|
# (user friendly name, url suffix, max number of articles to load)
|
||||||
|
('Hollywood','hollywood',10),
|
||||||
|
('Culture','culture',10),
|
||||||
|
('Business','business',10),
|
||||||
|
('Politics','politics',10),
|
||||||
|
('Society','society',10),
|
||||||
|
('Style','style',10),
|
||||||
|
('VF Daily','online/daily',10),
|
||||||
|
("James Wolcott's Blog",'online/wolcott',10),
|
||||||
|
("The Oscars",'online/oscars',10),
|
||||||
|
]
|
||||||
|
# set this to False if you don't want to put the first article
|
||||||
|
# that appears in each section to a "Featured" section
|
||||||
|
FEATURED_CAT = True
|
||||||
|
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
{'name':['nav']},
|
||||||
|
{'class':re.compile(r'_(header|rubric|share|subnav|leaderboard)|comments-count|ecom_placement')}
|
||||||
|
]
|
||||||
|
remove_tags_after = [{'class':'cn_blogpost'},{'id':'wrapper'}]
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
self.cover_url = 'http://www.vanityfair.com/magazine/toc/contents-%s/_jcr_content/par/cn_contentwell/par-main/cn_pagination_contai/cn_image.size.cover_vanityfair_300.jpg' % (date.today().strftime('%Y%m'))
|
||||||
|
feeds = []
|
||||||
|
seen_urls = set([])
|
||||||
|
features = []
|
||||||
|
|
||||||
|
for category in self.CATEGORIES:
|
||||||
|
|
||||||
|
(cat_name, tag, max_articles) = category
|
||||||
|
self.log('Reading category:', cat_name)
|
||||||
|
articles = []
|
||||||
|
|
||||||
|
page = "%s/%s" % (self.INDEX, tag)
|
||||||
|
soup = self.index_to_soup(page)
|
||||||
|
headers = soup.findAll(attrs={'class':'headline '})
|
||||||
|
add_featured = self.FEATURED_CAT
|
||||||
|
|
||||||
|
for header in headers:
|
||||||
|
self.log(self.tag_to_string(header))
|
||||||
|
atags = header.findAll('a')
|
||||||
|
# if there's more than one a tag, it's some kind of list, skip
|
||||||
|
if not atags or len(atags)>1:
|
||||||
|
continue
|
||||||
|
atag = atags[0]
|
||||||
|
url = atag['href']
|
||||||
|
if url.startswith('/'):
|
||||||
|
url = self.INDEX + url
|
||||||
|
if url in seen_urls:
|
||||||
|
continue
|
||||||
|
seen_urls.add(url)
|
||||||
|
title = self.tag_to_string(atag)
|
||||||
|
self.log('\tFound article:', title)
|
||||||
|
self.log('\t', url)
|
||||||
|
par = header.findParent('article') if tag.startswith('online/') else header.findParent('section')
|
||||||
|
if par is not None:
|
||||||
|
desc = par.find(attrs={'class':'body '})
|
||||||
|
desc = self.tag_to_string(desc) if desc else ''
|
||||||
|
#self.log('\t', desc)
|
||||||
|
if add_featured:
|
||||||
|
features.append({'title':title,'url':url,'description':desc})
|
||||||
|
add_featured = False
|
||||||
|
else:
|
||||||
|
articles.append({'title':title,'url':url,'description':desc})
|
||||||
|
if len(articles) >= max_articles:
|
||||||
|
break
|
||||||
|
|
||||||
|
if articles:
|
||||||
|
feeds.append((cat_name, articles))
|
||||||
|
|
||||||
|
if features:
|
||||||
|
feeds.insert(0,('Featured', features))
|
||||||
|
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url.replace('.html', '.print')
|
Loading…
x
Reference in New Issue
Block a user