mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Better workaround for wsj free index page getting stuck
This commit is contained in:
parent
1b6faaa3bc
commit
93a8e83b93
@ -6,6 +6,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera
|
|||||||
|
|
||||||
import json, time, random
|
import json, time, random
|
||||||
from base64 import standard_b64encode
|
from base64 import standard_b64encode
|
||||||
|
from datetime import date, timedelta
|
||||||
|
|
||||||
from mechanize import Request
|
from mechanize import Request
|
||||||
|
|
||||||
@ -97,8 +98,7 @@ class WSJ(BasicNewsRecipe):
|
|||||||
|
|
||||||
def get_browser_for_wsj(self, *a, **kw):
|
def get_browser_for_wsj(self, *a, **kw):
|
||||||
br = BasicNewsRecipe.get_browser(self, *a, **kw)
|
br = BasicNewsRecipe.get_browser(self, *a, **kw)
|
||||||
if needs_subscription:
|
br.set_cookie('wsjregion', 'na,us', '.wsj.com')
|
||||||
br.set_cookie('wsjregion', 'na,us', '.wsj.com')
|
|
||||||
br.set_cookie('gdprApplies', 'false', '.wsj.com')
|
br.set_cookie('gdprApplies', 'false', '.wsj.com')
|
||||||
br.set_cookie('ccpaApplies', 'false', '.wsj.com')
|
br.set_cookie('ccpaApplies', 'false', '.wsj.com')
|
||||||
return br
|
return br
|
||||||
@ -167,6 +167,16 @@ class WSJ(BasicNewsRecipe):
|
|||||||
def get_browser(self, *a, **kw):
|
def get_browser(self, *a, **kw):
|
||||||
br = self.get_browser_for_wsj(*a, **kw)
|
br = self.get_browser_for_wsj(*a, **kw)
|
||||||
res = br.open(self.WSJ_ITP)
|
res = br.open(self.WSJ_ITP)
|
||||||
|
url = res.geturl()
|
||||||
|
if '/20210913/' in url:
|
||||||
|
today = date.today()
|
||||||
|
q = today.isoformat().replace('-', '')
|
||||||
|
try:
|
||||||
|
res = br.open(url.replace('/20210913/', '/' + q + '/'))
|
||||||
|
except Exception:
|
||||||
|
today -= timedelta(days=1)
|
||||||
|
q = today.isoformat().replace('-', '')
|
||||||
|
res = br.open(url.replace('/20210913/', '/' + q + '/'))
|
||||||
self.log('Print edition resolved url:', res.geturl())
|
self.log('Print edition resolved url:', res.geturl())
|
||||||
self.wsj_itp_page = res.read()
|
self.wsj_itp_page = res.read()
|
||||||
return br
|
return br
|
||||||
|
@ -6,6 +6,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera
|
|||||||
|
|
||||||
import json, time, random
|
import json, time, random
|
||||||
from base64 import standard_b64encode
|
from base64 import standard_b64encode
|
||||||
|
from datetime import date, timedelta
|
||||||
|
|
||||||
from mechanize import Request
|
from mechanize import Request
|
||||||
|
|
||||||
@ -97,8 +98,7 @@ class WSJ(BasicNewsRecipe):
|
|||||||
|
|
||||||
def get_browser_for_wsj(self, *a, **kw):
|
def get_browser_for_wsj(self, *a, **kw):
|
||||||
br = BasicNewsRecipe.get_browser(self, *a, **kw)
|
br = BasicNewsRecipe.get_browser(self, *a, **kw)
|
||||||
if needs_subscription:
|
br.set_cookie('wsjregion', 'na,us', '.wsj.com')
|
||||||
br.set_cookie('wsjregion', 'na,us', '.wsj.com')
|
|
||||||
br.set_cookie('gdprApplies', 'false', '.wsj.com')
|
br.set_cookie('gdprApplies', 'false', '.wsj.com')
|
||||||
br.set_cookie('ccpaApplies', 'false', '.wsj.com')
|
br.set_cookie('ccpaApplies', 'false', '.wsj.com')
|
||||||
return br
|
return br
|
||||||
@ -167,6 +167,16 @@ class WSJ(BasicNewsRecipe):
|
|||||||
def get_browser(self, *a, **kw):
|
def get_browser(self, *a, **kw):
|
||||||
br = self.get_browser_for_wsj(*a, **kw)
|
br = self.get_browser_for_wsj(*a, **kw)
|
||||||
res = br.open(self.WSJ_ITP)
|
res = br.open(self.WSJ_ITP)
|
||||||
|
url = res.geturl()
|
||||||
|
if '/20210913/' in url:
|
||||||
|
today = date.today()
|
||||||
|
q = today.isoformat().replace('-', '')
|
||||||
|
try:
|
||||||
|
res = br.open(url.replace('/20210913/', '/' + q + '/'))
|
||||||
|
except Exception:
|
||||||
|
today -= timedelta(days=1)
|
||||||
|
q = today.isoformat().replace('-', '')
|
||||||
|
res = br.open(url.replace('/20210913/', '/' + q + '/'))
|
||||||
self.log('Print edition resolved url:', res.geturl())
|
self.log('Print edition resolved url:', res.geturl())
|
||||||
self.wsj_itp_page = res.read()
|
self.wsj_itp_page = res.read()
|
||||||
return br
|
return br
|
||||||
|
Loading…
x
Reference in New Issue
Block a user