This commit is contained in:
Kovid Goyal 2014-11-25 22:55:26 +05:30
parent 4cd960d9d9
commit 0c98870eff
2 changed files with 2 additions and 13 deletions

View File

@ -152,9 +152,8 @@ class WSJ(JavascriptRecipe):
break
for a in CSSSelect('div.itpSectionHeaderPdf a[href]')(root):
href = a.get('href')
if href:
break
ans['cover'] = browser.download_file(href)
break
feeds = ans['index'] = []
for a in CSSSelect('div.itpHeader ul.tab a[href]')(root):

View File

@ -27,7 +27,6 @@ class WSJ(JavascriptRecipe):
no_stylesheets = True
ignore_duplicate_articles = {'url'}
remove_attributes = ['style', 'data-scrim']
needs_subscription = True
keep_only_tags = (
'h1', # 'h2.subhead', 'h2.subHed.deck',
@ -46,14 +45,6 @@ class WSJ(JavascriptRecipe):
'div.nc-exp-artmeta',
)
def do_login(self, br, username, password):
br.visit(
'https://id.wsj.com/access/pages/wsj/us/login_standalone.html?mg=com-wsj', timeout=120) # noqa
f = br.select_form(nr=0)
f['username'] = username
f['password'] = password
br.submit(timeout=120)
def preprocess_stage2(self, article, browser, url, recursion_level):
# Slideshow and expandable images need to be processed here to
# set the src attribute correctly
@ -156,9 +147,8 @@ class WSJ(JavascriptRecipe):
break
for a in CSSSelect('div.itpSectionHeaderPdf a[href]')(root):
href = a.get('href')
if href:
break
ans['cover'] = browser.download_file(href)
break
feeds = ans['index'] = []
for a in CSSSelect('div.itpHeader ul.tab a[href]')(root):