From 14603c68582da19e037179af45f043b835bb0ecd Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 13 Apr 2011 20:34:34 -0600 Subject: [PATCH] Fix #760384 (wsj recipe does not handle absolute urls) --- recipes/wsj.recipe | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/recipes/wsj.recipe b/recipes/wsj.recipe index f2854e65ca..cf84722bac 100644 --- a/recipes/wsj.recipe +++ b/recipes/wsj.recipe @@ -81,6 +81,11 @@ class WallStreetJournal(BasicNewsRecipe): feeds.append((title, articles)) return feeds + def abs_wsj_url(self, href): + if not href.startswith('http'): + href = 'http://online.wsj.com' + href + return href + def parse_index(self): soup = self.wsj_get_index() @@ -99,14 +104,14 @@ class WallStreetJournal(BasicNewsRecipe): pageone = a['href'].endswith('pageone') if pageone: title = 'Front Section' - url = 'http://online.wsj.com' + a['href'] + url = self.abs_wsj_url(a['href']) feeds = self.wsj_add_feed(feeds,title,url) title = "What's News" url = url.replace('pageone','whatsnews') feeds = self.wsj_add_feed(feeds,title,url) else: title = self.tag_to_string(a) - url = 'http://online.wsj.com' + a['href'] + url = self.abs_wsj_url(a['href']) feeds = self.wsj_add_feed(feeds,title,url) return feeds @@ -163,7 +168,7 @@ class WallStreetJournal(BasicNewsRecipe): title = self.tag_to_string(a).strip() + ' [%s]'%meta else: title = self.tag_to_string(a).strip() - url = 'http://online.wsj.com'+a['href'] + url = self.abs_wsj_url(a['href']) desc = '' for p in container.findAll('p'): desc = self.tag_to_string(p)