From 81f8814f751d045581b08521ff717499b6a7b963 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 17 May 2016 09:41:17 +0530
Subject: [PATCH] Update Barrons

Gets rid of the use of javascript_login since that no longer works in
modern calibre
---
 recipes/barrons.recipe | 77 ++++++++++++++++++++++++++++++------------
 recipes/wsj.recipe     |  1 +
 2 files changed, 56 insertions(+), 22 deletions(-)
diff --git a/recipes/barrons.recipe b/recipes/barrons.recipe
index 06c8e500e4..22bf676b77 100644
--- a/recipes/barrons.recipe
+++ b/recipes/barrons.recipe
@@ -1,5 +1,18 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
+
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+import json
+from mechanize import Request
+from urllib import quote
+
 from calibre.web.feeds.news import BasicNewsRecipe
 
+USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Firefox/45.0'
+
 class Barrons(BasicNewsRecipe):
 
     title = 'Barron\'s'
@@ -14,11 +27,9 @@ class Barrons(BasicNewsRecipe):
     no_stylesheets = True
     match_regexps = ['http://online.barrons.com/.*?html\?mod=.*?|file:.*']
     conversion_options = {'linearize_tables': True}
-    ##delay = 1
 
     # Don't grab articles more than 7 days old
     oldest_article = 7
-    use_javascript_to_login = True
     requires_version = (0, 9, 16)
 
     keep_only_tags = [dict(attrs={'class':lambda x: x and (x.startswith('sector one column') or x.startswith('sector two column'))})]
@@ -29,12 +40,46 @@ class Barrons(BasicNewsRecipe):
         dict(name='span', attrs={'data-country-code':True, 'data-ticker-code':True}),
     ]
 
-    def javascript_login(self, br, username, password):
-        br.visit('http://commerce.barrons.com/auth/login')
-        f = br.select_form(nr=0)
-        f['username'] = username
-        f['password'] = password
-        br.submit(timeout=120)
+    def get_browser(self):
+        # To understand the signin logic read signin.js from
+        # https://id.barrons.com/access/pages/barrons/us/login_standalone.html?mg=com-barrons
+        # This is the same login servie as used by WSJ
+        br = BasicNewsRecipe.get_browser(self, user_agent=USER_AGENT)
+        url = 'https://id.barrons.com/access/pages/barrons/us/login_standalone.html?mg=com-barrons'
+        # br.set_debug_http(True)
+        br.open(url).read()
+        rurl = 'https://id.barrons.com/auth/submitlogin.json'
+        rq = Request(rurl, headers={
+            'Accept': 'application/json, text/javascript, */*; q=0.01',
+            'Accept-Language': 'en-US,en;q=0.8',
+            'Content-Type': 'application/json',
+            'Referer': url,
+            'X-HTTP-Method-Override': 'POST',
+            'X-Requested-With': 'XMLHttpRequest',
+        }, data=json.dumps({
+            'username': self.username,
+            'password': self.password,
+            'realm': 'default',
+            'savelogin': 'true',
+            'template': 'default',
+            'url': quote('http://online.barrons.com'),
+        }))
+        r = br.open(rq)
+        if r.code != 200:
+            raise ValueError('Failed to login, check username and password')
+        data = json.loads(r.read())
+        # from pprint import pprint
+        # pprint(data)
+        if data.get('result') != 'success':
+            raise ValueError(
+                'Failed to login (XHR failed), check username and password')
+        br.set_cookie('m', data['username'], '.barrons.com')
+        raw = br.open(data['url']).read()
+        # open('/t/raw.html', 'wb').write(raw)
+        if b'>Logout<' not in raw:
+            raise ValueError(
+                'Failed to login (auth URL failed), check username and password')
+        return br
 
     # Use the print version of a page when available.
     def print_version(self, url):
@@ -53,26 +98,14 @@ class Barrons(BasicNewsRecipe):
 # Comment out the feeds you don't want retrieved.
 # Because these feeds are sorted alphabetically when converted to LRF, you may want to number them to put them in the order you desire
 
-    def get_feeds(self):
-        return [
+    feeds = [
         ('This Week\'s Magazine', 'http://online.barrons.com/xml/rss/3_7510.xml'),
         ('Online Exclusives', 'http://online.barrons.com/xml/rss/3_7515.xml'),
         ('Companies', 'http://online.barrons.com/xml/rss/3_7516.xml'),
         ('Markets', 'http://online.barrons.com/xml/rss/3_7517.xml'),
         ('Technology', 'http://online.barrons.com/xml/rss/3_7518.xml'),
         ('Funds/Q&A', 'http://online.barrons.com/xml/rss/3_7519.xml'),
-        ]
+    ]
 
     def get_article_url(self, article):
         return article.get('link', None)
-
-    def get_cover_url(self):
-        cover_url = None
-        index = 'http://online.barrons.com/home-page'
-        soup = self.index_to_soup(index)
-        link_item = soup.find('ul',attrs={'class':'newsItem barronsMag'})
-        if link_item:
-            cover_url = link_item.img['src']
-        return cover_url
-
-
diff --git a/recipes/wsj.recipe b/recipes/wsj.recipe
index 29a4d4f4d6..755e5d85bc 100644
--- a/recipes/wsj.recipe
+++ b/recipes/wsj.recipe
@@ -93,6 +93,7 @@ class WSJ(BasicNewsRecipe):
     def get_browser(self):
         # To understand the signin logic read signin.js from
         # https://id.wsj.com/access/pages/wsj/us/signin.html
+        # This is the same login servie as used by Barrons
         br = BasicNewsRecipe.get_browser(self, user_agent=USER_AGENT)
         # self.wsj_itp_page = open('/t/raw.html').read()
         # return br