mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 18:54:09 -04:00
Update New Yorker
Fixes #1570632 [byline does not appear in retrieved newyorker recipe content](https://bugs.launchpad.net/calibre/+bug/1570632)
This commit is contained in:
parent
78bb3ce28c
commit
d7d35b16e9
@ -19,9 +19,8 @@ class NewYorker(BasicNewsRecipe):
|
|||||||
|
|
||||||
url_list = []
|
url_list = []
|
||||||
language = 'en'
|
language = 'en'
|
||||||
__author__ = 'Krittika Goyal'
|
__author__ = 'Kovid Goyal'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
auto_cleanup = True
|
|
||||||
timefmt = ' [%b %d]'
|
timefmt = ' [%b %d]'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
@ -30,6 +29,14 @@ class NewYorker(BasicNewsRecipe):
|
|||||||
.caption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
.caption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
||||||
'''
|
'''
|
||||||
needs_subscription = 'optional'
|
needs_subscription = 'optional'
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(itemprop=['headline', 'alternativeHeadline', 'author', 'articleBody']),
|
||||||
|
]
|
||||||
|
remove_tags = [
|
||||||
|
dict(attrs={'class':lambda x: x and set(x.split()).intersection({'content-ad-wrapper', 'social-hover'})}),
|
||||||
|
dict(id=['newsletter-signup']),
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
soup = self.index_to_soup('http://www.newyorker.com/magazine?intcid=magazine')
|
soup = self.index_to_soup('http://www.newyorker.com/magazine?intcid=magazine')
|
||||||
@ -70,6 +77,13 @@ class NewYorker(BasicNewsRecipe):
|
|||||||
|
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for img in soup.findAll('img'):
|
||||||
|
ds = img['data-src']
|
||||||
|
if ds:
|
||||||
|
img['src'] = ds
|
||||||
|
return soup
|
||||||
|
|
||||||
# The New Yorker changes the content it delivers based on cookies, so the
|
# The New Yorker changes the content it delivers based on cookies, so the
|
||||||
# following ensures that we send no cookies
|
# following ensures that we send no cookies
|
||||||
def get_browser(self, *args, **kwargs):
|
def get_browser(self, *args, **kwargs):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user