mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update Wall Street Journal
This commit is contained in:
parent
24ffd08b39
commit
568726db1f
@ -80,7 +80,7 @@ class WSJ(JavascriptRecipe):
|
||||
href = 'http://online.wsj.com' + href
|
||||
return href
|
||||
|
||||
def wsj_find_articles(self, url):
|
||||
def wsj_find_articles(self, url, ahed=False):
|
||||
root = self.index_to_soup(url)
|
||||
|
||||
for x in CSSSelect('div.whatsNews-simple')(root):
|
||||
@ -111,6 +111,20 @@ class WSJ(JavascriptRecipe):
|
||||
|
||||
self.log('\tFound article:', title)
|
||||
self.log('\t\t', desc)
|
||||
|
||||
if ahed:
|
||||
for h2 in root.xpath('//li[@class="ahed_listitem"]/h2'):
|
||||
a = h2.xpath('descendant::a')[0]
|
||||
title = self.tag_to_string(a)
|
||||
url = self.abs_wsj_url(a.get('href'))
|
||||
desc = ''
|
||||
p = h2.xpath('following-sibling::p')
|
||||
if p:
|
||||
desc = self.tag_to_string(p[0])
|
||||
articles.append({'title':title, 'url':url, 'description':desc, 'date':''})
|
||||
self.log('Found article:', title)
|
||||
self.log('\t\t', desc)
|
||||
|
||||
return articles
|
||||
|
||||
def wsj_find_wn_articles(self, url):
|
||||
@ -145,7 +159,7 @@ class WSJ(JavascriptRecipe):
|
||||
if url.endswith('whatsnews'):
|
||||
articles = self.wsj_find_wn_articles(url)
|
||||
else:
|
||||
articles = self.wsj_find_articles(url)
|
||||
articles = self.wsj_find_articles(url, ahed=title == 'Front Section')
|
||||
except:
|
||||
articles = []
|
||||
if articles:
|
||||
|
@ -75,7 +75,7 @@ class WSJ(JavascriptRecipe):
|
||||
href = 'http://online.wsj.com' + href
|
||||
return href
|
||||
|
||||
def wsj_find_articles(self, url):
|
||||
def wsj_find_articles(self, url, ahed=False):
|
||||
root = self.index_to_soup(url)
|
||||
|
||||
for x in CSSSelect('div.whatsNews-simple')(root):
|
||||
@ -106,6 +106,19 @@ class WSJ(JavascriptRecipe):
|
||||
|
||||
self.log('\tFound article:', title)
|
||||
self.log('\t\t', desc)
|
||||
if ahed:
|
||||
for h2 in root.xpath('//li[@class="ahed_listitem"]/h2'):
|
||||
a = h2.xpath('descendant::a')[0]
|
||||
title = self.tag_to_string(a)
|
||||
url = self.abs_wsj_url(a.get('href'))
|
||||
desc = ''
|
||||
p = h2.xpath('following-sibling::p')
|
||||
if p:
|
||||
desc = self.tag_to_string(p[0])
|
||||
articles.append({'title':title, 'url':url, 'description':desc, 'date':''})
|
||||
self.log('Found article:', title)
|
||||
self.log('\t\t', desc)
|
||||
|
||||
return articles
|
||||
|
||||
def wsj_find_wn_articles(self, url):
|
||||
@ -140,7 +153,7 @@ class WSJ(JavascriptRecipe):
|
||||
if url.endswith('whatsnews'):
|
||||
articles = self.wsj_find_wn_articles(url)
|
||||
else:
|
||||
articles = self.wsj_find_articles(url)
|
||||
articles = self.wsj_find_articles(url, ahed=title == 'Front Section')
|
||||
except:
|
||||
articles = []
|
||||
if articles:
|
||||
|
Loading…
x
Reference in New Issue
Block a user