mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 02:34:06 -04:00
Update New Yorker
Fixes #1570632 [byline does not appear in retrieved newyorker recipe content](https://bugs.launchpad.net/calibre/+bug/1570632)
This commit is contained in:
parent
78bb3ce28c
commit
d7d35b16e9
@ -19,9 +19,8 @@ class NewYorker(BasicNewsRecipe):
|
||||
|
||||
url_list = []
|
||||
language = 'en'
|
||||
__author__ = 'Krittika Goyal'
|
||||
__author__ = 'Kovid Goyal'
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
timefmt = ' [%b %d]'
|
||||
encoding = 'utf-8'
|
||||
extra_css = '''
|
||||
@ -30,6 +29,14 @@ class NewYorker(BasicNewsRecipe):
|
||||
.caption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
||||
'''
|
||||
needs_subscription = 'optional'
|
||||
keep_only_tags = [
|
||||
dict(itemprop=['headline', 'alternativeHeadline', 'author', 'articleBody']),
|
||||
]
|
||||
remove_tags = [
|
||||
dict(attrs={'class':lambda x: x and set(x.split()).intersection({'content-ad-wrapper', 'social-hover'})}),
|
||||
dict(id=['newsletter-signup']),
|
||||
|
||||
]
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup('http://www.newyorker.com/magazine?intcid=magazine')
|
||||
@ -70,6 +77,13 @@ class NewYorker(BasicNewsRecipe):
|
||||
|
||||
return feeds
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for img in soup.findAll('img'):
|
||||
ds = img['data-src']
|
||||
if ds:
|
||||
img['src'] = ds
|
||||
return soup
|
||||
|
||||
# The New Yorker changes the content it delivers based on cookies, so the
|
||||
# following ensures that we send no cookies
|
||||
def get_browser(self, *args, **kwargs):
|
||||
|
Loading…
x
Reference in New Issue
Block a user