mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #760384 (wsj recipe does not handle absolute urls)
This commit is contained in:
parent
d80786c9bb
commit
14603c6858
@ -81,6 +81,11 @@ class WallStreetJournal(BasicNewsRecipe):
|
|||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
|
def abs_wsj_url(self, href):
|
||||||
|
if not href.startswith('http'):
|
||||||
|
href = 'http://online.wsj.com' + href
|
||||||
|
return href
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
soup = self.wsj_get_index()
|
soup = self.wsj_get_index()
|
||||||
|
|
||||||
@ -99,14 +104,14 @@ class WallStreetJournal(BasicNewsRecipe):
|
|||||||
pageone = a['href'].endswith('pageone')
|
pageone = a['href'].endswith('pageone')
|
||||||
if pageone:
|
if pageone:
|
||||||
title = 'Front Section'
|
title = 'Front Section'
|
||||||
url = 'http://online.wsj.com' + a['href']
|
url = self.abs_wsj_url(a['href'])
|
||||||
feeds = self.wsj_add_feed(feeds,title,url)
|
feeds = self.wsj_add_feed(feeds,title,url)
|
||||||
title = "What's News"
|
title = "What's News"
|
||||||
url = url.replace('pageone','whatsnews')
|
url = url.replace('pageone','whatsnews')
|
||||||
feeds = self.wsj_add_feed(feeds,title,url)
|
feeds = self.wsj_add_feed(feeds,title,url)
|
||||||
else:
|
else:
|
||||||
title = self.tag_to_string(a)
|
title = self.tag_to_string(a)
|
||||||
url = 'http://online.wsj.com' + a['href']
|
url = self.abs_wsj_url(a['href'])
|
||||||
feeds = self.wsj_add_feed(feeds,title,url)
|
feeds = self.wsj_add_feed(feeds,title,url)
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
@ -163,7 +168,7 @@ class WallStreetJournal(BasicNewsRecipe):
|
|||||||
title = self.tag_to_string(a).strip() + ' [%s]'%meta
|
title = self.tag_to_string(a).strip() + ' [%s]'%meta
|
||||||
else:
|
else:
|
||||||
title = self.tag_to_string(a).strip()
|
title = self.tag_to_string(a).strip()
|
||||||
url = 'http://online.wsj.com'+a['href']
|
url = self.abs_wsj_url(a['href'])
|
||||||
desc = ''
|
desc = ''
|
||||||
for p in container.findAll('p'):
|
for p in container.findAll('p'):
|
||||||
desc = self.tag_to_string(p)
|
desc = self.tag_to_string(p)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user