WSJ changed its todays paper markup again

Fixes #1807945 [Wall Street Journal fetch does not work](https://bugs.launchpad.net/calibre/+bug/1807945)
This commit is contained in:
Kovid Goyal 2018-12-11 18:25:08 +05:30
parent 25b8f19c01
commit b9eeeaace3
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 8 additions and 18 deletions

View File

@ -147,18 +147,13 @@ class WSJ(BasicNewsRecipe):
root = self.index_to_soup(url, as_tree=True)
CSSSelect = Select(root)
articles = []
for container in CSSSelect('.style__grid_3gzjbqouVfPMK84Adb3MFE .article'):
meta = next(CSSSelect('.type', container))
parent = meta.getparent()
meta = self.tag_to_string(meta)
title = next(CSSSelect('.title', parent))
a = next(CSSSelect('a', title))
for container in root.xpath('descendant::div[contains(@class, "WSJTheme__list-item_")]'):
heading = next(CSSSelect('h2, h3', container))
a = next(CSSSelect('a', heading))
title = self.tag_to_string(a)
if meta:
title += ' [%s]' % meta
url = self.abs_wsj_url(a.get('href'))
desc = ''
for p in CSSSelect('p.description', container):
for p in container.xpath('descendant::p[contains(@class, "WSJTheme__description_")]'):
q = self.tag_to_string(p)
if 'Subscriber Content' in q:
continue

View File

@ -147,18 +147,13 @@ class WSJ(BasicNewsRecipe):
root = self.index_to_soup(url, as_tree=True)
CSSSelect = Select(root)
articles = []
for container in CSSSelect('.style__grid_3gzjbqouVfPMK84Adb3MFE .article'):
meta = next(CSSSelect('.type', container))
parent = meta.getparent()
meta = self.tag_to_string(meta)
title = next(CSSSelect('.title', parent))
a = next(CSSSelect('a', title))
for container in root.xpath('descendant::div[contains(@class, "WSJTheme__list-item_")]'):
heading = next(CSSSelect('h2, h3', container))
a = next(CSSSelect('a', heading))
title = self.tag_to_string(a)
if meta:
title += ' [%s]' % meta
url = self.abs_wsj_url(a.get('href'))
desc = ''
for p in CSSSelect('p.description', container):
for p in container.xpath('descendant::p[contains(@class, "WSJTheme__description_")]'):
q = self.tag_to_string(p)
if 'Subscriber Content' in q:
continue