Update The Onion

This commit is contained in:
Kovid Goyal 2015-01-21 22:27:33 +05:30
parent 589c719dc6
commit 8a62bc3860

View File

@ -43,12 +43,12 @@ class TheOnion(BasicNewsRecipe):
remove_tags = [ remove_tags = [
dict(name=['object','link','iframe','base','meta']) dict(name=['object','link','iframe','base','meta'])
,dict(attrs={'class':lambda x: x and 'share-tools' in x.split()}) ,dict(attrs={'class':lambda x: x and 'share-tools' in x.split()})
,dict(name='div', attrs={'id':['topshare', 'bottomshare']})
] ]
feeds = [ feeds = [
(u'Daily' , u'http://feeds.theonion.com/theonion/daily' ) (u'Daily' , u'http://feeds.theonion.com/theonion/daily')
,(u'Sports' , u'http://feeds.theonion.com/theonion/sports' ) ,(u'Sports' , u'http://feeds.theonion.com/theonion/sports')
] ]
def get_browser(self): def get_browser(self):
@ -65,7 +65,7 @@ class TheOnion(BasicNewsRecipe):
def get_article_url(self, article): def get_article_url(self, article):
artl = BasicNewsRecipe.get_article_url(self, article) artl = BasicNewsRecipe.get_article_url(self, article)
if artl.startswith('http://www.theonion.com/audio/'): if artl.startswith('http://www.theonion.com/audio/'):
artl = None artl = None
return artl return artl
def preprocess_html(self, soup): def preprocess_html(self, soup):
@ -74,19 +74,18 @@ class TheOnion(BasicNewsRecipe):
for item in soup.findAll('a'): for item in soup.findAll('a'):
limg = item.find('img') limg = item.find('img')
if item.string is not None: if item.string is not None:
str = item.string str = item.string
item.replaceWith(str) item.replaceWith(str)
else: else:
if limg: if limg:
item.name = 'div' item.name = 'div'
item.attrs = [] item.attrs = []
if not limg.has_key('alt'): if not limg.has_key('alt'):
limg['alt'] = 'image' limg['alt'] = 'image'
else: else:
str = self.tag_to_string(item) str = self.tag_to_string(item)
item.replaceWith(str) item.replaceWith(str)
for item in soup.findAll('img'): for item in soup.findAll('img'):
if item.has_key('data-src'): if item.has_key('data-src'):
item['src'] = item['data-src'] item['src'] = item['data-src']
return soup return soup