From 8761f0cb60b87b0280c6665facf1d893ac4dc001 Mon Sep 17 00:00:00 2001 From: a10kiloham Date: Tue, 6 Aug 2019 15:51:38 +0100 Subject: [PATCH] Update login mechanism for Times Online Fixes #1025 (Update login mechanism) Fixes #1026 (Fix login mechanism) --- recipes/sunday_times_magazine.recipe | 47 ++++++++++++++++++---------- recipes/times_online.recipe | 45 ++++++++++++++++---------- 2 files changed, 60 insertions(+), 32 deletions(-) diff --git a/recipes/sunday_times_magazine.recipe b/recipes/sunday_times_magazine.recipe index b7bebff615..f59dd15422 100644 --- a/recipes/sunday_times_magazine.recipe +++ b/recipes/sunday_times_magazine.recipe @@ -1,13 +1,13 @@ __license__ = 'GPL v3' -__copyright__ = '2010-2013, Darko Miletic ' +__copyright__ = '2010-2019' ''' www.thetimes.co.uk/magazine/the-sunday-times-magazine/ ''' + +from mechanize import Request + +from calibre import random_user_agent from calibre.web.feeds.news import BasicNewsRecipe -try: - from urllib.parse import urlencode -except ImportError: - from urllib import urlencode def classes(classes): @@ -31,8 +31,9 @@ class TimesOnline(BasicNewsRecipe): delay = 1 needs_subscription = True publication_type = 'newspaper' - INDEX = 'http://www.thetimes.co.uk/' - PREFIX = u'http://www.thetimes.co.uk/' + INDEX = 'https://www.thetimes.co.uk' + LOGIN = 'https://login.thetimes.co.uk/' + PREFIX = u'https://www.thetimes.co.uk' extra_css = """ .author-name,.authorName{font-style: italic} .published-date,.multi-position-photo-text{ @@ -48,16 +49,30 @@ class TimesOnline(BasicNewsRecipe): 'publisher': publisher, 'language': language} - def get_browser(self): - br = BasicNewsRecipe.get_browser(self) - br.open('http://www.thetimes.co.uk/') - if self.username is not None and self.password is not None: - data = urlencode({ - 'gotoUrl': self.INDEX, - 'username': self.username, - 'password': self.password}) - br.open('https://login.thetimes.co.uk/', data) + def get_browser(self, *a, **kw): + start_url = self.INDEX + kw['user_agent'] = random_user_agent(allow_ie=False) + br = BasicNewsRecipe.get_browser(self, *a, **kw) + self.log('Starting login process...') + res = br.open(start_url) + sso_url = res.geturl() + self.log(sso_url) + request_query = { + 'username': self.username, + 'password': self.password, + 's': 1, + 'gotoUrl': self.INDEX, + } + rq = Request(self.LOGIN, headers={ + 'Accept': 'text/html', + 'Accept-Language': 'en-US,en;q=0.8', + 'X-HTTP-Method-Override': 'POST', + 'X-Requested-With': 'XMLHttpRequest', + }, data=request_query) + self.log('Sending login request...') + res = br.open(rq) return br + # }}} def get_cover_url(self): from datetime import date diff --git a/recipes/times_online.recipe b/recipes/times_online.recipe index 76bf09d467..ad3a0ce576 100644 --- a/recipes/times_online.recipe +++ b/recipes/times_online.recipe @@ -1,16 +1,15 @@ __license__ = 'GPL v3' -__copyright__ = '2010-2017, Bobby Steel , Darko Miletic' +__copyright__ = '2010-2019, Bobby Steel , Darko Miletic' ''' www.thetimes.co.uk ''' -import html5lib -try: - from urllib.parse import urlencode -except ImportError: - from urllib import urlencode -from lxml import html +from mechanize import Request +from calibre import random_user_agent from calibre.web.feeds.news import BasicNewsRecipe +import html5lib +from lxml import html + def classes(classes): q = frozenset(classes.split(' ')) @@ -35,6 +34,7 @@ class TimesOnline(BasicNewsRecipe): needs_subscription = True publication_type = 'newspaper' INDEX = 'http://www.thetimes.co.uk/' + LOGIN = 'https://login.thetimes.co.uk/' PREFIX = u'http://www.thetimes.co.uk' extra_css = """ .author-name,.authorName{font-style: italic} @@ -78,15 +78,28 @@ class TimesOnline(BasicNewsRecipe): br.open(cover) return cover - def get_browser(self): - br = BasicNewsRecipe.get_browser(self) - br.open('http://www.thetimes.co.uk/') - if self.username is not None and self.password is not None: - data = urlencode({ - 'gotoUrl': self.INDEX, - 'username': self.username, - 'password': self.password}) - br.open('https://login.thetimes.co.uk/', data) + def get_browser(self, *a, **kw): + start_url = self.INDEX + kw['user_agent'] = random_user_agent(allow_ie=False) + br = BasicNewsRecipe.get_browser(self, *a, **kw) + self.log('Starting login process...') + res = br.open(start_url) + sso_url = res.geturl() + self.log(sso_url) + request_query = { + 'username': self.username, + 'password': self.password, + 's': 1, + 'gotoUrl': self.INDEX, + } + rq = Request(self.LOGIN, headers={ + 'Accept': 'text/html', + 'Accept-Language': 'en-US,en;q=0.8', + 'X-HTTP-Method-Override': 'POST', + 'X-Requested-With': 'XMLHttpRequest', + }, data=request_query) + self.log('Sending login request...') + res = br.open(rq) return br remove_tags = [