diff --git a/recipes/wsj.recipe b/recipes/wsj.recipe index 3b2b9e503b..da28f081b3 100644 --- a/recipes/wsj.recipe +++ b/recipes/wsj.recipe @@ -5,11 +5,11 @@ from __future__ import absolute_import, division, print_function, unicode_literals import json - try: from urllib.parse import quote except ImportError: from urllib import quote + from mechanize import Request from calibre import random_user_agent @@ -150,13 +150,13 @@ class WSJ(BasicNewsRecipe): root = self.index_to_soup(url, as_tree=True) CSSSelect = Select(root) articles = [] - for container in root.xpath('descendant::div[contains(@class, "WSJTheme__list-item_")]'): + for container in root.xpath('descendant::div[contains(@class, "WSJTheme--list-item-")]'): heading = next(CSSSelect('h2, h3', container)) a = next(CSSSelect('a', heading)) title = self.tag_to_string(a) url = self.abs_wsj_url(a.get('href')) desc = '' - for p in container.xpath('descendant::p[contains(@class, "WSJTheme__description_")]'): + for p in container.xpath('descendant::p[contains(@class, "WSJTheme--description-")]'): q = self.tag_to_string(p) if 'Subscriber Content' in q: continue @@ -173,7 +173,7 @@ class WSJ(BasicNewsRecipe): def wsj_find_wn_articles(self, feeds, root, CSSSelect): articles = [] - for a in CSSSelect('.style__strap_2m6gCW_c_6WZKkU--eRUWv'): + for a in CSSSelect('.style--strap--3DsLojSy'): if 'WHAT\'S NEWS' in self.tag_to_string(a).upper(): whats_news = a.getparent() break @@ -196,7 +196,6 @@ class WSJ(BasicNewsRecipe): return articles def wsj_add_feed(self, feeds, title, url): - self.log('Found section:', title, '[' + url + ']') try: articles = self.wsj_find_articles(url) if not articles: @@ -212,18 +211,21 @@ class WSJ(BasicNewsRecipe): # return self.test_wsj_index() root = self.index_to_soup(self.wsj_itp_page, as_tree=True) CSSSelect = Select(root) + # from calibre.utils.ipython import ipython + # ipython({'root': root, 'CSSSelect': CSSSelect, 'raw': self.wsj_itp_page}) for inp in CSSSelect('.DayPickerInput > input'): if inp.get('placeholder'): self.timefmt = inp.get('placeholder') break feeds = [] - for a in CSSSelect('.WSJTheme__nav-container_sPVwT3FiPlWjFGtr5KH3d .WSJTheme__section-link_XGDsdx5qPlnC8BZPxQ63R'): + for a in CSSSelect('.WSJTheme--nav-container--sPVwT3Fi .WSJTheme--section-link--XGDsdx5q'): frontpage = a.get('href').endswith('frontpage') title = self.tag_to_string(a).capitalize().strip().replace('U.s.', 'U.S.') if not title: continue url = self.abs_wsj_url(a.get('href')) + self.log('Found section:', title, 'at', url) self.wsj_add_feed(feeds, title, url) if frontpage: self.wsj_find_wn_articles(feeds, root, CSSSelect) diff --git a/recipes/wsj_free.recipe b/recipes/wsj_free.recipe index b140dc072d..e04e210114 100644 --- a/recipes/wsj_free.recipe +++ b/recipes/wsj_free.recipe @@ -150,13 +150,13 @@ class WSJ(BasicNewsRecipe): root = self.index_to_soup(url, as_tree=True) CSSSelect = Select(root) articles = [] - for container in root.xpath('descendant::div[contains(@class, "WSJTheme__list-item_")]'): + for container in root.xpath('descendant::div[contains(@class, "WSJTheme--list-item-")]'): heading = next(CSSSelect('h2, h3', container)) a = next(CSSSelect('a', heading)) title = self.tag_to_string(a) url = self.abs_wsj_url(a.get('href')) desc = '' - for p in container.xpath('descendant::p[contains(@class, "WSJTheme__description_")]'): + for p in container.xpath('descendant::p[contains(@class, "WSJTheme--description-")]'): q = self.tag_to_string(p) if 'Subscriber Content' in q: continue @@ -173,7 +173,7 @@ class WSJ(BasicNewsRecipe): def wsj_find_wn_articles(self, feeds, root, CSSSelect): articles = [] - for a in CSSSelect('.style__strap_2m6gCW_c_6WZKkU--eRUWv'): + for a in CSSSelect('.style--strap--3DsLojSy'): if 'WHAT\'S NEWS' in self.tag_to_string(a).upper(): whats_news = a.getparent() break @@ -196,7 +196,6 @@ class WSJ(BasicNewsRecipe): return articles def wsj_add_feed(self, feeds, title, url): - self.log('Found section:', title, '[' + url + ']') try: articles = self.wsj_find_articles(url) if not articles: @@ -212,18 +211,21 @@ class WSJ(BasicNewsRecipe): # return self.test_wsj_index() root = self.index_to_soup(self.wsj_itp_page, as_tree=True) CSSSelect = Select(root) + # from calibre.utils.ipython import ipython + # ipython({'root': root, 'CSSSelect': CSSSelect, 'raw': self.wsj_itp_page}) for inp in CSSSelect('.DayPickerInput > input'): if inp.get('placeholder'): self.timefmt = inp.get('placeholder') break feeds = [] - for a in CSSSelect('.WSJTheme__nav-container_sPVwT3FiPlWjFGtr5KH3d .WSJTheme__section-link_XGDsdx5qPlnC8BZPxQ63R'): + for a in CSSSelect('.WSJTheme--nav-container--sPVwT3Fi .WSJTheme--section-link--XGDsdx5q'): frontpage = a.get('href').endswith('frontpage') title = self.tag_to_string(a).capitalize().strip().replace('U.s.', 'U.S.') if not title: continue url = self.abs_wsj_url(a.get('href')) + self.log('Found section:', title, 'at', url) self.wsj_add_feed(feeds, title, url) if frontpage: self.wsj_find_wn_articles(feeds, root, CSSSelect)