mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update Wall Street Journal
This commit is contained in:
parent
0e5279537f
commit
f030b414ea
@ -36,6 +36,7 @@ def classes(classes):
|
||||
return dict(attrs={
|
||||
'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||
|
||||
|
||||
USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Firefox/45.0'
|
||||
|
||||
|
||||
@ -150,11 +151,12 @@ class WSJ(BasicNewsRecipe):
|
||||
|
||||
articles = []
|
||||
|
||||
for a in CSSSelect('a.mjLinkItem[href]')(root):
|
||||
container = a.xpath('ancestor::li')
|
||||
meta = CSSSelect('.meta_sectionName')(a)
|
||||
if meta:
|
||||
for container in root.xpath('//li[contains(@class, "mjItemMain")]'):
|
||||
meta = container.xpath('descendant::span[@class="meta_sectionName"]')
|
||||
if not meta:
|
||||
continue
|
||||
meta = meta[0]
|
||||
a = meta.xpath('ancestor::a')[0]
|
||||
meta.getparent().remove(meta)
|
||||
meta = self.tag_to_string(meta)
|
||||
title = self.tag_to_string(a)
|
||||
@ -163,9 +165,11 @@ class WSJ(BasicNewsRecipe):
|
||||
url = self.abs_wsj_url(a.get('href'))
|
||||
desc = ''
|
||||
if container:
|
||||
for p in CSSSelect('p')(container[0]):
|
||||
desc = self.tag_to_string(p)
|
||||
if 'Subscriber Content' not in desc:
|
||||
for p in container.xpath('descendant::p'):
|
||||
q = self.tag_to_string(p)
|
||||
if 'Subscriber Content' in q:
|
||||
continue
|
||||
desc += q
|
||||
break
|
||||
|
||||
articles.append({'title': title, 'url': url,
|
||||
@ -217,14 +221,15 @@ class WSJ(BasicNewsRecipe):
|
||||
return articles
|
||||
|
||||
def wsj_add_feed(self, feeds, title, url):
|
||||
self.log('Found section:', title)
|
||||
self.log('Found section:', title, '[' + url + ']')
|
||||
try:
|
||||
if url.endswith('whatsnews'):
|
||||
articles = self.wsj_find_wn_articles(url)
|
||||
else:
|
||||
articles = self.wsj_find_articles(
|
||||
url, ahed=title == 'Front Section')
|
||||
except:
|
||||
except Exception:
|
||||
self.log.exception('Failed to parse section:', title)
|
||||
articles = []
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
@ -109,11 +109,12 @@ class WSJ(BasicNewsRecipe):
|
||||
|
||||
articles = []
|
||||
|
||||
for a in CSSSelect('a.mjLinkItem[href]')(root):
|
||||
container = a.xpath('ancestor::li')
|
||||
meta = CSSSelect('.meta_sectionName')(a)
|
||||
if meta:
|
||||
for container in root.xpath('//li[contains(@class, "mjItemMain")]'):
|
||||
meta = container.xpath('descendant::span[@class="meta_sectionName"]')
|
||||
if not meta:
|
||||
continue
|
||||
meta = meta[0]
|
||||
a = meta.xpath('ancestor::a')[0]
|
||||
meta.getparent().remove(meta)
|
||||
meta = self.tag_to_string(meta)
|
||||
title = self.tag_to_string(a)
|
||||
@ -122,9 +123,11 @@ class WSJ(BasicNewsRecipe):
|
||||
url = self.abs_wsj_url(a.get('href'))
|
||||
desc = ''
|
||||
if container:
|
||||
for p in CSSSelect('p')(container[0]):
|
||||
desc = self.tag_to_string(p)
|
||||
if 'Subscriber Content' not in desc:
|
||||
for p in container.xpath('descendant::p'):
|
||||
q = self.tag_to_string(p)
|
||||
if 'Subscriber Content' in q:
|
||||
continue
|
||||
desc += q
|
||||
break
|
||||
|
||||
articles.append({'title': title, 'url': url,
|
||||
@ -176,14 +179,15 @@ class WSJ(BasicNewsRecipe):
|
||||
return articles
|
||||
|
||||
def wsj_add_feed(self, feeds, title, url):
|
||||
self.log('Found section:', title)
|
||||
self.log('Found section:', title, '[' + url + ']')
|
||||
try:
|
||||
if url.endswith('whatsnews'):
|
||||
articles = self.wsj_find_wn_articles(url)
|
||||
else:
|
||||
articles = self.wsj_find_articles(
|
||||
url, ahed=title == 'Front Section')
|
||||
except:
|
||||
except Exception:
|
||||
self.log.exception('Failed to parse section:', title)
|
||||
articles = []
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
Loading…
x
Reference in New Issue
Block a user