mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
WSJ changed its todays paper markup again
Fixes #1807945 [Wall Street Journal fetch does not work](https://bugs.launchpad.net/calibre/+bug/1807945)
This commit is contained in:
parent
25b8f19c01
commit
b9eeeaace3
@ -147,18 +147,13 @@ class WSJ(BasicNewsRecipe):
|
|||||||
root = self.index_to_soup(url, as_tree=True)
|
root = self.index_to_soup(url, as_tree=True)
|
||||||
CSSSelect = Select(root)
|
CSSSelect = Select(root)
|
||||||
articles = []
|
articles = []
|
||||||
for container in CSSSelect('.style__grid_3gzjbqouVfPMK84Adb3MFE .article'):
|
for container in root.xpath('descendant::div[contains(@class, "WSJTheme__list-item_")]'):
|
||||||
meta = next(CSSSelect('.type', container))
|
heading = next(CSSSelect('h2, h3', container))
|
||||||
parent = meta.getparent()
|
a = next(CSSSelect('a', heading))
|
||||||
meta = self.tag_to_string(meta)
|
|
||||||
title = next(CSSSelect('.title', parent))
|
|
||||||
a = next(CSSSelect('a', title))
|
|
||||||
title = self.tag_to_string(a)
|
title = self.tag_to_string(a)
|
||||||
if meta:
|
|
||||||
title += ' [%s]' % meta
|
|
||||||
url = self.abs_wsj_url(a.get('href'))
|
url = self.abs_wsj_url(a.get('href'))
|
||||||
desc = ''
|
desc = ''
|
||||||
for p in CSSSelect('p.description', container):
|
for p in container.xpath('descendant::p[contains(@class, "WSJTheme__description_")]'):
|
||||||
q = self.tag_to_string(p)
|
q = self.tag_to_string(p)
|
||||||
if 'Subscriber Content' in q:
|
if 'Subscriber Content' in q:
|
||||||
continue
|
continue
|
||||||
|
@ -147,18 +147,13 @@ class WSJ(BasicNewsRecipe):
|
|||||||
root = self.index_to_soup(url, as_tree=True)
|
root = self.index_to_soup(url, as_tree=True)
|
||||||
CSSSelect = Select(root)
|
CSSSelect = Select(root)
|
||||||
articles = []
|
articles = []
|
||||||
for container in CSSSelect('.style__grid_3gzjbqouVfPMK84Adb3MFE .article'):
|
for container in root.xpath('descendant::div[contains(@class, "WSJTheme__list-item_")]'):
|
||||||
meta = next(CSSSelect('.type', container))
|
heading = next(CSSSelect('h2, h3', container))
|
||||||
parent = meta.getparent()
|
a = next(CSSSelect('a', heading))
|
||||||
meta = self.tag_to_string(meta)
|
|
||||||
title = next(CSSSelect('.title', parent))
|
|
||||||
a = next(CSSSelect('a', title))
|
|
||||||
title = self.tag_to_string(a)
|
title = self.tag_to_string(a)
|
||||||
if meta:
|
|
||||||
title += ' [%s]' % meta
|
|
||||||
url = self.abs_wsj_url(a.get('href'))
|
url = self.abs_wsj_url(a.get('href'))
|
||||||
desc = ''
|
desc = ''
|
||||||
for p in CSSSelect('p.description', container):
|
for p in container.xpath('descendant::p[contains(@class, "WSJTheme__description_")]'):
|
||||||
q = self.tag_to_string(p)
|
q = self.tag_to_string(p)
|
||||||
if 'Subscriber Content' in q:
|
if 'Subscriber Content' in q:
|
||||||
continue
|
continue
|
||||||
|
Loading…
x
Reference in New Issue
Block a user