mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update Wall Street Journal
This commit is contained in:
parent
24ffd08b39
commit
568726db1f
@ -80,7 +80,7 @@ class WSJ(JavascriptRecipe):
|
|||||||
href = 'http://online.wsj.com' + href
|
href = 'http://online.wsj.com' + href
|
||||||
return href
|
return href
|
||||||
|
|
||||||
def wsj_find_articles(self, url):
|
def wsj_find_articles(self, url, ahed=False):
|
||||||
root = self.index_to_soup(url)
|
root = self.index_to_soup(url)
|
||||||
|
|
||||||
for x in CSSSelect('div.whatsNews-simple')(root):
|
for x in CSSSelect('div.whatsNews-simple')(root):
|
||||||
@ -111,6 +111,20 @@ class WSJ(JavascriptRecipe):
|
|||||||
|
|
||||||
self.log('\tFound article:', title)
|
self.log('\tFound article:', title)
|
||||||
self.log('\t\t', desc)
|
self.log('\t\t', desc)
|
||||||
|
|
||||||
|
if ahed:
|
||||||
|
for h2 in root.xpath('//li[@class="ahed_listitem"]/h2'):
|
||||||
|
a = h2.xpath('descendant::a')[0]
|
||||||
|
title = self.tag_to_string(a)
|
||||||
|
url = self.abs_wsj_url(a.get('href'))
|
||||||
|
desc = ''
|
||||||
|
p = h2.xpath('following-sibling::p')
|
||||||
|
if p:
|
||||||
|
desc = self.tag_to_string(p[0])
|
||||||
|
articles.append({'title':title, 'url':url, 'description':desc, 'date':''})
|
||||||
|
self.log('Found article:', title)
|
||||||
|
self.log('\t\t', desc)
|
||||||
|
|
||||||
return articles
|
return articles
|
||||||
|
|
||||||
def wsj_find_wn_articles(self, url):
|
def wsj_find_wn_articles(self, url):
|
||||||
@ -145,7 +159,7 @@ class WSJ(JavascriptRecipe):
|
|||||||
if url.endswith('whatsnews'):
|
if url.endswith('whatsnews'):
|
||||||
articles = self.wsj_find_wn_articles(url)
|
articles = self.wsj_find_wn_articles(url)
|
||||||
else:
|
else:
|
||||||
articles = self.wsj_find_articles(url)
|
articles = self.wsj_find_articles(url, ahed=title == 'Front Section')
|
||||||
except:
|
except:
|
||||||
articles = []
|
articles = []
|
||||||
if articles:
|
if articles:
|
||||||
|
@ -75,7 +75,7 @@ class WSJ(JavascriptRecipe):
|
|||||||
href = 'http://online.wsj.com' + href
|
href = 'http://online.wsj.com' + href
|
||||||
return href
|
return href
|
||||||
|
|
||||||
def wsj_find_articles(self, url):
|
def wsj_find_articles(self, url, ahed=False):
|
||||||
root = self.index_to_soup(url)
|
root = self.index_to_soup(url)
|
||||||
|
|
||||||
for x in CSSSelect('div.whatsNews-simple')(root):
|
for x in CSSSelect('div.whatsNews-simple')(root):
|
||||||
@ -106,6 +106,19 @@ class WSJ(JavascriptRecipe):
|
|||||||
|
|
||||||
self.log('\tFound article:', title)
|
self.log('\tFound article:', title)
|
||||||
self.log('\t\t', desc)
|
self.log('\t\t', desc)
|
||||||
|
if ahed:
|
||||||
|
for h2 in root.xpath('//li[@class="ahed_listitem"]/h2'):
|
||||||
|
a = h2.xpath('descendant::a')[0]
|
||||||
|
title = self.tag_to_string(a)
|
||||||
|
url = self.abs_wsj_url(a.get('href'))
|
||||||
|
desc = ''
|
||||||
|
p = h2.xpath('following-sibling::p')
|
||||||
|
if p:
|
||||||
|
desc = self.tag_to_string(p[0])
|
||||||
|
articles.append({'title':title, 'url':url, 'description':desc, 'date':''})
|
||||||
|
self.log('Found article:', title)
|
||||||
|
self.log('\t\t', desc)
|
||||||
|
|
||||||
return articles
|
return articles
|
||||||
|
|
||||||
def wsj_find_wn_articles(self, url):
|
def wsj_find_wn_articles(self, url):
|
||||||
@ -140,7 +153,7 @@ class WSJ(JavascriptRecipe):
|
|||||||
if url.endswith('whatsnews'):
|
if url.endswith('whatsnews'):
|
||||||
articles = self.wsj_find_wn_articles(url)
|
articles = self.wsj_find_wn_articles(url)
|
||||||
else:
|
else:
|
||||||
articles = self.wsj_find_articles(url)
|
articles = self.wsj_find_articles(url, ahed=title == 'Front Section')
|
||||||
except:
|
except:
|
||||||
articles = []
|
articles = []
|
||||||
if articles:
|
if articles:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user