Update NYTimes

This commit is contained in:
Kovid Goyal 2017-01-28 09:25:31 +05:30
parent d025bc327d
commit 1af85ad42e
2 changed files with 8 additions and 0 deletions

View File

@ -240,6 +240,7 @@ class NYTimes(BasicNewsRecipe):
dict(attrs={'class': lambda x: x and 'skip-to-text-link' in x.split()}), dict(attrs={'class': lambda x: x and 'skip-to-text-link' in x.split()}),
dict(attrs={'class': lambda x: x and 'sharetools' in x.split()}), dict(attrs={'class': lambda x: x and 'sharetools' in x.split()}),
dict(attrs={'class': lambda x: x and 'ad' in x.split()}), dict(attrs={'class': lambda x: x and 'ad' in x.split()}),
dict(attrs={'class': lambda x: x and 'visually-hidden' in x.split()}),
dict(name='div', attrs={'class': re.compile('toolsList')}), # bits dict(name='div', attrs={'class': re.compile('toolsList')}), # bits
dict(name='div', attrs={ dict(name='div', attrs={
'class': re.compile('postNavigation')}), # bits 'class': re.compile('postNavigation')}), # bits
@ -1061,6 +1062,9 @@ class NYTimes(BasicNewsRecipe):
aside.extract() aside.extract()
soup = self.strip_anchors(soup, True) soup = self.strip_anchors(soup, True)
for t in soup.findAll('time', attrs={'class':'dateline'}):
t.name = 'div'
if soup.find('div', attrs={'id': 'blogcontent'}) is None: if soup.find('div', attrs={'id': 'blogcontent'}) is None:
if first_fetch: if first_fetch:
aside = soup.find('div', 'aside') aside = soup.find('div', 'aside')

View File

@ -240,6 +240,7 @@ class NYTimes(BasicNewsRecipe):
dict(attrs={'class': lambda x: x and 'skip-to-text-link' in x.split()}), dict(attrs={'class': lambda x: x and 'skip-to-text-link' in x.split()}),
dict(attrs={'class': lambda x: x and 'sharetools' in x.split()}), dict(attrs={'class': lambda x: x and 'sharetools' in x.split()}),
dict(attrs={'class': lambda x: x and 'ad' in x.split()}), dict(attrs={'class': lambda x: x and 'ad' in x.split()}),
dict(attrs={'class': lambda x: x and 'visually-hidden' in x.split()}),
dict(name='div', attrs={'class': re.compile('toolsList')}), # bits dict(name='div', attrs={'class': re.compile('toolsList')}), # bits
dict(name='div', attrs={ dict(name='div', attrs={
'class': re.compile('postNavigation')}), # bits 'class': re.compile('postNavigation')}), # bits
@ -1069,6 +1070,9 @@ class NYTimes(BasicNewsRecipe):
aside.extract() aside.extract()
soup = self.strip_anchors(soup, True) soup = self.strip_anchors(soup, True)
for t in soup.findAll('time', attrs={'class':'dateline'}):
t.name = 'div'
if soup.find('div', attrs={'id': 'blogcontent'}) is None: if soup.find('div', attrs={'id': 'blogcontent'}) is None:
if first_fetch: if first_fetch:
aside = soup.find('div', 'aside') aside = soup.find('div', 'aside')