Fix #760384 (wsj recipe does not handle absolute urls)

This commit is contained in:
Kovid Goyal 2011-04-13 20:34:34 -06:00
parent d80786c9bb
commit 14603c6858

View File

@ -81,6 +81,11 @@ class WallStreetJournal(BasicNewsRecipe):
feeds.append((title, articles)) feeds.append((title, articles))
return feeds return feeds
def abs_wsj_url(self, href):
if not href.startswith('http'):
href = 'http://online.wsj.com' + href
return href
def parse_index(self): def parse_index(self):
soup = self.wsj_get_index() soup = self.wsj_get_index()
@ -99,14 +104,14 @@ class WallStreetJournal(BasicNewsRecipe):
pageone = a['href'].endswith('pageone') pageone = a['href'].endswith('pageone')
if pageone: if pageone:
title = 'Front Section' title = 'Front Section'
url = 'http://online.wsj.com' + a['href'] url = self.abs_wsj_url(a['href'])
feeds = self.wsj_add_feed(feeds,title,url) feeds = self.wsj_add_feed(feeds,title,url)
title = "What's News" title = "What's News"
url = url.replace('pageone','whatsnews') url = url.replace('pageone','whatsnews')
feeds = self.wsj_add_feed(feeds,title,url) feeds = self.wsj_add_feed(feeds,title,url)
else: else:
title = self.tag_to_string(a) title = self.tag_to_string(a)
url = 'http://online.wsj.com' + a['href'] url = self.abs_wsj_url(a['href'])
feeds = self.wsj_add_feed(feeds,title,url) feeds = self.wsj_add_feed(feeds,title,url)
return feeds return feeds
@ -163,7 +168,7 @@ class WallStreetJournal(BasicNewsRecipe):
title = self.tag_to_string(a).strip() + ' [%s]'%meta title = self.tag_to_string(a).strip() + ' [%s]'%meta
else: else:
title = self.tag_to_string(a).strip() title = self.tag_to_string(a).strip()
url = 'http://online.wsj.com'+a['href'] url = self.abs_wsj_url(a['href'])
desc = '' desc = ''
for p in container.findAll('p'): for p in container.findAll('p'):
desc = self.tag_to_string(p) desc = self.tag_to_string(p)