From b9eeeaace3e1855174a516bb3226deadcabd179c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 11 Dec 2018 18:25:08 +0530 Subject: [PATCH] WSJ changed its todays paper markup again Fixes #1807945 [Wall Street Journal fetch does not work](https://bugs.launchpad.net/calibre/+bug/1807945) --- recipes/wsj.recipe | 13 ++++--------- recipes/wsj_free.recipe | 13 ++++--------- 2 files changed, 8 insertions(+), 18 deletions(-) diff --git a/recipes/wsj.recipe b/recipes/wsj.recipe index 13ec7e607c..8cf03260b7 100644 --- a/recipes/wsj.recipe +++ b/recipes/wsj.recipe @@ -147,18 +147,13 @@ class WSJ(BasicNewsRecipe): root = self.index_to_soup(url, as_tree=True) CSSSelect = Select(root) articles = [] - for container in CSSSelect('.style__grid_3gzjbqouVfPMK84Adb3MFE .article'): - meta = next(CSSSelect('.type', container)) - parent = meta.getparent() - meta = self.tag_to_string(meta) - title = next(CSSSelect('.title', parent)) - a = next(CSSSelect('a', title)) + for container in root.xpath('descendant::div[contains(@class, "WSJTheme__list-item_")]'): + heading = next(CSSSelect('h2, h3', container)) + a = next(CSSSelect('a', heading)) title = self.tag_to_string(a) - if meta: - title += ' [%s]' % meta url = self.abs_wsj_url(a.get('href')) desc = '' - for p in CSSSelect('p.description', container): + for p in container.xpath('descendant::p[contains(@class, "WSJTheme__description_")]'): q = self.tag_to_string(p) if 'Subscriber Content' in q: continue diff --git a/recipes/wsj_free.recipe b/recipes/wsj_free.recipe index d521ebd7a7..831c483c41 100644 --- a/recipes/wsj_free.recipe +++ b/recipes/wsj_free.recipe @@ -147,18 +147,13 @@ class WSJ(BasicNewsRecipe): root = self.index_to_soup(url, as_tree=True) CSSSelect = Select(root) articles = [] - for container in CSSSelect('.style__grid_3gzjbqouVfPMK84Adb3MFE .article'): - meta = next(CSSSelect('.type', container)) - parent = meta.getparent() - meta = self.tag_to_string(meta) - title = next(CSSSelect('.title', parent)) - a = next(CSSSelect('a', title)) + for container in root.xpath('descendant::div[contains(@class, "WSJTheme__list-item_")]'): + heading = next(CSSSelect('h2, h3', container)) + a = next(CSSSelect('a', heading)) title = self.tag_to_string(a) - if meta: - title += ' [%s]' % meta url = self.abs_wsj_url(a.get('href')) desc = '' - for p in CSSSelect('p.description', container): + for p in container.xpath('descendant::p[contains(@class, "WSJTheme__description_")]'): q = self.tag_to_string(p) if 'Subscriber Content' in q: continue