mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
TIME recipe no longer uses Qt WebKit
This commit is contained in:
parent
d8457fe972
commit
9f4b6f9991
@ -1,73 +1,59 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
|
# vim:fileencoding=utf-8
|
||||||
|
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
|
||||||
from __future__ import (unicode_literals, division, absolute_import,
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
print_function)
|
print_function)
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2013, Kovid Goyal <kovid@kovidgoyal.net>'
|
|
||||||
|
|
||||||
'''
|
|
||||||
time.com
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.jsnews import JavascriptRecipe
|
|
||||||
from calibre.ptempfile import PersistentTemporaryFile
|
|
||||||
from lxml import html
|
|
||||||
import json
|
import json
|
||||||
|
from calibre.constants import iswindows
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
|
|
||||||
# Keep the login method as standalone, so it can be easily tested
|
|
||||||
def do_login(browser, username, password):
|
|
||||||
from calibre.web.jsbrowser.browser import Timeout
|
|
||||||
browser.visit(
|
|
||||||
'http://subscription-assets.time.com/prod/assets/themes/magazines/SUBS/templates/velocity/site/td-pcslogin/login.html')
|
|
||||||
form = browser.select_form('#sign-in-form')
|
|
||||||
form['username'] = username
|
|
||||||
form['password'] = password
|
|
||||||
browser.submit('#sign-in-button')
|
|
||||||
try:
|
|
||||||
browser.wait_for_element('body.is-signed-in', timeout=180)
|
|
||||||
except Timeout:
|
|
||||||
raise ValueError(
|
|
||||||
'Failed to login to time.com, check your username and password and try again in a little while.')
|
|
||||||
|
|
||||||
class Time(JavascriptRecipe):
|
class TimeMagazine(BasicNewsRecipe):
|
||||||
title = u'Time'
|
|
||||||
__author__ = 'Kovid Goyal'
|
|
||||||
description = 'Weekly US magazine.'
|
|
||||||
language = 'en'
|
|
||||||
needs_subscription = True
|
|
||||||
requires_version = (0, 9, 35)
|
|
||||||
|
|
||||||
no_stylesheets = True
|
title = 'TIME Magazine'
|
||||||
remove_javascript = True
|
__author__ = 'Kovid Goyal'
|
||||||
time_initial_phase = True
|
__author__ = 'Kovid Goyal'
|
||||||
|
description = 'Weekly US magazine.'
|
||||||
|
language = 'en'
|
||||||
|
needs_subscription = True
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
def do_login(self, browser, username, password):
|
def get_browser(self):
|
||||||
do_login(browser, username, password)
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
|
# self.time_magazine_page = open('/t/raw.html').read()
|
||||||
def print_version(self, url):
|
# return br
|
||||||
return self.turl_map[url]
|
base = 'http://subscription-assets.time.com/prod/assets/themes/magazines/SUBS/templates/velocity/site/td-pcslogin/'
|
||||||
|
url = base + 'login.html'
|
||||||
def get_publication_data(self, browser):
|
br.open(url)
|
||||||
# raw = open('/t/time.html', 'rb').read().decode('utf-8')
|
br.select_form(nr=0)
|
||||||
browser.visit('http://time.com/magazine')
|
br.form.action = 'https://auth.time.com/login.php?rurl={}&turl={}'.format(
|
||||||
raw = browser.html
|
url, 'http://time.com/magazine')
|
||||||
|
br['username'] = self.username
|
||||||
root = html.fromstring(raw)
|
br['password'] = self.password
|
||||||
self.time_initial_phase = False
|
r = br.submit()
|
||||||
dates = ''.join(root.xpath('//*[@class="rail-article-magazine-issue"]/date/text()'))
|
# print(111111, r.geturl())
|
||||||
if dates:
|
self.time_magazine_page = r.read()
|
||||||
self.timefmt = ' [%s]'%dates
|
return br
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
import html5lib
|
||||||
|
root = html5lib.parse(
|
||||||
|
self.time_magazine_page, treebuilder='lxml', namespaceHTMLElements=False).getroot()
|
||||||
for script in root.iterdescendants('script'):
|
for script in root.iterdescendants('script'):
|
||||||
if script.text and script.text.startswith('Time.bootstrap ='):
|
if script.text and script.text.startswith('Time.bootstrap ='):
|
||||||
data = json.loads(script.text.partition('=')[2].lstrip())
|
data = json.loads(script.text.partition('=')[2].lstrip())
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
raise ValueError('The time website has changed, this recipe needs to be rewritten')
|
raise ValueError(
|
||||||
|
'The TIME website has changed, this recipe needs to be rewritten')
|
||||||
data = data['magazine']['us'][0]
|
data = data['magazine']['us'][0]
|
||||||
self.turl_map = {}
|
self.timefmt = ' [%s]' % data['title'].split('|')[0].strip()
|
||||||
ans = {}
|
self.cover_url = data['hero']['src']['large']
|
||||||
articles = []
|
articles = []
|
||||||
|
self.turl_map = {}
|
||||||
for article in data['articles']:
|
for article in data['articles']:
|
||||||
title = article.get('friendly_title') or article.get('short_title')
|
title = article.get('friendly_title') or article.get('short_title')
|
||||||
if title == 'In the Latest Issue' or 'content' not in article:
|
if title == 'In the Latest Issue' or 'content' not in article:
|
||||||
@ -83,32 +69,21 @@ class Time(JavascriptRecipe):
|
|||||||
authors = ''
|
authors = ''
|
||||||
for aut in article.get('authors') or ():
|
for aut in article.get('authors') or ():
|
||||||
authors += '<p>' + aut.get('bio') + '</p>'
|
authors += '<p>' + aut.get('bio') + '</p>'
|
||||||
articles.append({'title':title, 'url':url, 'desc':desc})
|
articles.append({'title': title, 'url': url, 'desc': desc})
|
||||||
text = '<html><head><meta charset="utf-8"></head><body><h1>{}</h1>{}<div><img src={}></div><div>{}</div></body></html>'.format(
|
text = '<html><head><meta charset="utf-8"></head><body><h1>{}</h1>{}<div><img src="{}"></div><div>{}</div></body></html>'.format(
|
||||||
title, authors, cover_url, article['content'])
|
title, authors, cover_url, article['content'])
|
||||||
with PersistentTemporaryFile('-time-recipe.html') as f:
|
with PersistentTemporaryFile('-time-recipe.html') as f:
|
||||||
f.write(text.encode('utf-8'))
|
f.write(text.encode('utf-8'))
|
||||||
self.turl_map[url] = 'file:///' + f.name
|
name = ('/' if iswindows else '') + f.name
|
||||||
# from pprint import pprint
|
self.turl_map[url] = 'file://' + name
|
||||||
# pprint(data['hero'])
|
return [('Articles', articles)]
|
||||||
|
|
||||||
ans['cover'] = browser.get_resource(data['hero']['src']['large'])
|
def print_version(self, url):
|
||||||
ans['index'] = [('Articles', articles)]
|
return self.turl_map[url]
|
||||||
return ans
|
|
||||||
|
|
||||||
def preprocess_stage1(self, article, browser, url, recursion_level):
|
def preprocess_html(self, soup):
|
||||||
for img in browser.css_select('img[data-lazy-src]', all=True):
|
for img in soup.findAll('img', attrs={'data-lazy-src':True}):
|
||||||
img.setAttribute('src', img.attribute('data-lazy-src'))
|
img['src'] = img['data-lazy-src']
|
||||||
|
for img in soup.findAll('img', src=lambda x: not x):
|
||||||
def load_complete(self, browser, url, rl):
|
img.extract()
|
||||||
if self.time_initial_phase:
|
return soup
|
||||||
browser.wait_for_element('footer.article-footer')
|
|
||||||
return True
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
# Test the login
|
|
||||||
import sys
|
|
||||||
from calibre import jsbrowser
|
|
||||||
br = jsbrowser(default_timeout=120)
|
|
||||||
do_login(br, sys.argv[-2], sys.argv[-1])
|
|
||||||
br.show_browser()
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user