Update New Yorker

Fixes #1570632 [byline does not appear in retrieved newyorker recipe content](https://bugs.launchpad.net/calibre/+bug/1570632)
This commit is contained in:
Kovid Goyal 2016-04-15 17:16:49 +05:30
parent 78bb3ce28c
commit d7d35b16e9

View File

@ -19,9 +19,8 @@ class NewYorker(BasicNewsRecipe):
url_list = [] url_list = []
language = 'en' language = 'en'
__author__ = 'Krittika Goyal' __author__ = 'Kovid Goyal'
no_stylesheets = True no_stylesheets = True
auto_cleanup = True
timefmt = ' [%b %d]' timefmt = ' [%b %d]'
encoding = 'utf-8' encoding = 'utf-8'
extra_css = ''' extra_css = '''
@ -30,6 +29,14 @@ class NewYorker(BasicNewsRecipe):
.caption { font-size: xx-small; font-style: italic; font-weight: normal; } .caption { font-size: xx-small; font-style: italic; font-weight: normal; }
''' '''
needs_subscription = 'optional' needs_subscription = 'optional'
keep_only_tags = [
dict(itemprop=['headline', 'alternativeHeadline', 'author', 'articleBody']),
]
remove_tags = [
dict(attrs={'class':lambda x: x and set(x.split()).intersection({'content-ad-wrapper', 'social-hover'})}),
dict(id=['newsletter-signup']),
]
def parse_index(self): def parse_index(self):
soup = self.index_to_soup('http://www.newyorker.com/magazine?intcid=magazine') soup = self.index_to_soup('http://www.newyorker.com/magazine?intcid=magazine')
@ -70,6 +77,13 @@ class NewYorker(BasicNewsRecipe):
return feeds return feeds
def preprocess_html(self, soup):
for img in soup.findAll('img'):
ds = img['data-src']
if ds:
img['src'] = ds
return soup
# The New Yorker changes the content it delivers based on cookies, so the # The New Yorker changes the content it delivers based on cookies, so the
# following ensures that we send no cookies # following ensures that we send no cookies
def get_browser(self, *args, **kwargs): def get_browser(self, *args, **kwargs):