Update Discover Magazine

2025-07-09 03:04:10 -04:00 · 2016-01-31 00:45:22 +05:30 · 2016-01-31 00:45:22 +05:30 · cb14e8f549
commit cb14e8f549
parent cf8fcfe82b
2 changed files with 83 additions and 34 deletions
--- a/recipes/discover_magazine.recipe
+++ b/recipes/discover_magazine.recipe
@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en'
 discovermagazine.com
 '''

-import re
+import re, mechanize, json, cookielib
 from calibre.web.feeds.news import BasicNewsRecipe

 class DiscoverMagazine(BasicNewsRecipe):
@ -35,29 +35,53 @@ class DiscoverMagazine(BasicNewsRecipe):

    # Login stuff
    needs_subscription = True
-    use_javascript_to_login = True
-    requires_version = (0, 9, 20)

-    def javascript_login(self, br, username, password):
-        br.visit('http://discovermagazine.com', timeout=120)
-        f = br.select_form('div.login.section div.form')
-        f['username'] = username
-        f['password'] = password
-        br.submit('input[id="signInButton"]', timeout=120)
-        br.run_for_a_time(20)
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser(self)
+        rq = mechanize.Request(
+            'https://secure.kalmbach.com/kserv/api/authentication/login', headers={
+                'Content-Type': 'application/json; charset=UTF-8',
+                'Referer': 'http://discovermagazine.com',
+                'Accept': 'application/json, text/javascript, */*; q=0.01',
+                'Accept-Language': 'en-US,en;q=0.5',
+                'Origin': 'http://discovermagazine.com',
+            }, data=json.dumps(
+                {'appId': '2', 'email':self.username, 'password':self.password}))
+        br.set_debug_http(True)
+        br.open(rq)
+        data = json.loads(br.open(rq).read())
+        if not data.get('success'):
+            raise ValueError('Failed to login')
+        session_id = data['sessionId']
+        if hasattr(br, 'set_cookie'):
+            br.set_cookie('KSERV', session_id, 'discovermagazine.com')
+        else:
+            c = cookielib.Cookie(
+                None, 'KSERV', session_id,
+                None, False,
+                'discovermagazine.com', True, False,
+                '/', True,
+                False, None, False, None, None, None)
+            br.cookiejar.set_cookie(c)
+        res = br.open('http://discovermagazine.com')
+        br.set_debug_http(False)
+        raw = res.read()
+        if '>Logout<' not in raw:
+            raise ValueError('Failed to login')
+        return br
+
    # End login stuff

-
    def append_page(self, soup, appendtag, position):
        pager = soup.find('span',attrs={'class':'next'})
        if pager:
-           nexturl = pager.a['href']
-           soup2 = self.index_to_soup(nexturl)
-           texttag = soup2.find('div', attrs={'class':'articlebody'})
-           newpos = len(texttag.contents)
-           self.append_page(soup2,texttag,newpos)
-           texttag.extract()
-           appendtag.insert(position,texttag)
+            nexturl = pager.a['href']
+            soup2 = self.index_to_soup(nexturl)
+            texttag = soup2.find('div', attrs={'class':'articlebody'})
+            newpos = len(texttag.contents)
+            self.append_page(soup2,texttag,newpos)
+            texttag.extract()
+            appendtag.insert(position,texttag)

    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Language" content="en-US"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
@ -65,7 +89,7 @@ class DiscoverMagazine(BasicNewsRecipe):
        self.append_page(soup, soup.body, 3)
        pager = soup.find('div',attrs={'class':'listingBar'})
        if pager:
-           pager.extract()
+            pager.extract()
        return soup

    def postprocess_html(self, soup, first_fetch):
--- a/recipes/discover_magazine_monthly.recipe
+++ b/recipes/discover_magazine_monthly.recipe
@ -7,39 +7,64 @@ __copyright__ = '2015 Michael Marotta <mikefm at gmail.net>'
 '''
 discovermagazine.com
 '''
-import re
+import re, json, cookielib
+import mechanize
 from calibre.web.feeds.news import BasicNewsRecipe

 class DiscoverMagazine(BasicNewsRecipe):

    title      = 'Discover Magazine Monthly'
-    __author__ = 'Michael Marotta'
+    __author__ = 'Kovid Goyal'
    description = 'Monthly magazine version of Discover Magazine (not rss feed).'
    language = 'en'
    encoding = 'utf-8'
    simultaneous_downloads = 20
    tags = 'news, technology, science'
-    INDEX = 'http://www.discovermagazine.com'
+    INDEX = 'http://discovermagazine.com'

    keep_only_tags = [
        {'attrs':{'class':['headline', 'deck', 'belowDeck', 'mediaContainer', 'segment', 'cover']}},
    ]
    remove_tags = [dict(name='div', attrs={'class': ['ladder', 'mobile', 'popular', 'open', 'scistarter']})]

-    # Login stuff
+    # Login {{{
    needs_subscription = True
-    use_javascript_to_login = True
-    requires_version = (0, 9, 20)

-    def javascript_login(self, br, username, password):
-        br.visit('http://discovermagazine.com', timeout=120)
-        f = br.select_form('div.login.section div.form')
-        f['username'] = username
-        f['password'] = password
-        br.submit('input[id="signInButton"]', timeout=120)
-        br.run_for_a_time(20)
-        # br.show_browser()
-    # End login stuff
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser(self)
+        rq = mechanize.Request(
+            'https://secure.kalmbach.com/kserv/api/authentication/login', headers={
+                'Content-Type': 'application/json; charset=UTF-8',
+                'Referer': 'http://discovermagazine.com',
+                'Accept': 'application/json, text/javascript, */*; q=0.01',
+                'Accept-Language': 'en-US,en;q=0.5',
+                'Origin': 'http://discovermagazine.com',
+            }, data=json.dumps(
+                {'appId': '2', 'email':self.username, 'password':self.password}))
+        br.set_debug_http(True)
+        br.open(rq)
+        data = json.loads(br.open(rq).read())
+        if not data.get('success'):
+            raise ValueError('Failed to login')
+        session_id = data['sessionId']
+        if hasattr(br, 'set_cookie'):
+            br.set_cookie('KSERV', session_id, 'discovermagazine.com')
+        else:
+            c = cookielib.Cookie(
+                None, 'KSERV', session_id,
+                None, False,
+                'discovermagazine.com', True, False,
+                '/', True,
+                False, None, False, None, None, None)
+            br.cookiejar.set_cookie(c)
+        res = br.open('http://discovermagazine.com')
+        br.set_debug_http(False)
+        raw = res.read()
+        if '>Logout<' not in raw:
+            raise ValueError('Failed to login')
+        return br
+
+    # End login }}}

    no_stylesheets = True
    preprocess_regexps     = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''),