Update New York Magazine

Fixes #1673965 [New York Magazine downs loads most articles as symbols.  only a few avaliable and readable](https://bugs.launchpad.net/calibre/+bug/1673965)
This commit is contained in:
Kovid Goyal 2017-03-19 13:43:27 +05:30
parent 5bf14d6afc
commit ae92ec6efb

View File

@ -8,6 +8,12 @@ theatlantic.com
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
def classes(classes):
q = frozenset(classes.split(' '))
return dict(attrs={
'class': lambda x: x and frozenset(x.split()).intersection(q)})
class NewYorkMagazine(BasicNewsRecipe): class NewYorkMagazine(BasicNewsRecipe):
title = 'New York Magazine' title = 'New York Magazine'
@ -16,14 +22,20 @@ class NewYorkMagazine(BasicNewsRecipe):
language = 'en' language = 'en'
no_stylesheets = True no_stylesheets = True
remove_javascript = True remove_javascript = True
encoding = 'iso-8859-1' encoding = 'utf-8'
recursions = 1 recursions = 1
match_regexps = [r'http://nymag.com/.+/index[0-9]{1,2}.html$'] match_regexps = [r'http://nymag.com/.+/index[0-9]{1,2}.html$']
keep_only_tags = [dict(id='main')] keep_only_tags = [
classes('lede-text headline-primary article-timestamp by-authors'),
dict(id='main'),
dict(itemprop='articleBody'),
]
remove_tags = [ remove_tags = [
dict(attrs={'class': ['start-discussion']}), classes('related-stories start-discussion'),
dict(id=['minibrowserbox', 'article-related', 'article-tools']) dict(id=['minibrowserbox', 'article-related', 'article-tools'])
] ]
remove_attributes = ['srcset']
handle_gzip = True
PREFIX = 'http://nymag.com' PREFIX = 'http://nymag.com'