Update Wall Street Journal

2025-07-09 03:04:10 -04:00 · 2017-02-05 21:21:40 +05:30 · 2017-02-05 21:21:40 +05:30 · f030b414ea
commit f030b414ea
parent 0e5279537f
2 changed files with 35 additions and 26 deletions
--- a/recipes/wsj.recipe
+++ b/recipes/wsj.recipe
@ -36,6 +36,7 @@ def classes(classes):
    return dict(attrs={
        'class': lambda x: x and frozenset(x.split()).intersection(q)})
 USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Firefox/45.0'
@ -150,23 +151,26 @@ class WSJ(BasicNewsRecipe):
        articles = []
-        for a in CSSSelect('a.mjLinkItem[href]')(root):
+        for container in root.xpath('//li[contains(@class, "mjItemMain")]'):
-            container = a.xpath('ancestor::li')
+            meta = container.xpath('descendant::span[@class="meta_sectionName"]')
-            meta = CSSSelect('.meta_sectionName')(a)
+            if not meta:
-            if meta:
+                continue
-                meta = meta[0]
+            meta = meta[0]
-                meta.getparent().remove(meta)
+            a = meta.xpath('ancestor::a')[0]
-                meta = self.tag_to_string(meta)
+            meta.getparent().remove(meta)
            meta = self.tag_to_string(meta)
            title = self.tag_to_string(a)
            if meta:
                title += ' [%s]' % meta
            url = self.abs_wsj_url(a.get('href'))
            desc = ''
            if container:
-                for p in CSSSelect('p')(container[0]):
+                for p in container.xpath('descendant::p'):
-                    desc = self.tag_to_string(p)
+                    q = self.tag_to_string(p)
-                    if 'Subscriber Content' not in desc:
+                    if 'Subscriber Content' in q:
-                        break
+                        continue
                    desc += q
                    break
            articles.append({'title': title, 'url': url,
                             'description': desc, 'date': ''})
@ -217,14 +221,15 @@ class WSJ(BasicNewsRecipe):
        return articles
    def wsj_add_feed(self, feeds, title, url):
-        self.log('Found section:', title)
+        self.log('Found section:', title, '[' + url + ']')
        try:
            if url.endswith('whatsnews'):
                articles = self.wsj_find_wn_articles(url)
            else:
                articles = self.wsj_find_articles(
                    url, ahed=title == 'Front Section')
-        except:
+        except Exception:
            self.log.exception('Failed to parse section:', title)
            articles = []
        if articles:
            feeds.append((title, articles))
--- a/recipes/wsj_free.recipe
+++ b/recipes/wsj_free.recipe
@ -109,23 +109,26 @@ class WSJ(BasicNewsRecipe):
        articles = []
-        for a in CSSSelect('a.mjLinkItem[href]')(root):
+        for container in root.xpath('//li[contains(@class, "mjItemMain")]'):
-            container = a.xpath('ancestor::li')
+            meta = container.xpath('descendant::span[@class="meta_sectionName"]')
-            meta = CSSSelect('.meta_sectionName')(a)
+            if not meta:
-            if meta:
+                continue
-                meta = meta[0]
+            meta = meta[0]
-                meta.getparent().remove(meta)
+            a = meta.xpath('ancestor::a')[0]
-                meta = self.tag_to_string(meta)
+            meta.getparent().remove(meta)
            meta = self.tag_to_string(meta)
            title = self.tag_to_string(a)
            if meta:
                title += ' [%s]' % meta
            url = self.abs_wsj_url(a.get('href'))
            desc = ''
            if container:
-                for p in CSSSelect('p')(container[0]):
+                for p in container.xpath('descendant::p'):
-                    desc = self.tag_to_string(p)
+                    q = self.tag_to_string(p)
-                    if 'Subscriber Content' not in desc:
+                    if 'Subscriber Content' in q:
-                        break
+                        continue
                    desc += q
                    break
            articles.append({'title': title, 'url': url,
                             'description': desc, 'date': ''})
@ -176,14 +179,15 @@ class WSJ(BasicNewsRecipe):
        return articles
    def wsj_add_feed(self, feeds, title, url):
-        self.log('Found section:', title)
+        self.log('Found section:', title, '[' + url + ']')
        try:
            if url.endswith('whatsnews'):
                articles = self.wsj_find_wn_articles(url)
            else:
                articles = self.wsj_find_articles(
                    url, ahed=title == 'Front Section')
-        except:
+        except Exception:
            self.log.exception('Failed to parse section:', title)
            articles = []
        if articles:
            feeds.append((title, articles))