mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
88 lines
3.5 KiB
Python
88 lines
3.5 KiB
Python
#!/usr/bin/env python
|
|
# vim:fileencoding=utf-8
|
|
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
|
|
|
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
import json
|
|
from calibre.constants import iswindows
|
|
from calibre.web.feeds.news import BasicNewsRecipe
|
|
from calibre.ptempfile import PersistentTemporaryFile
|
|
|
|
|
|
class TimeMagazine(BasicNewsRecipe):
|
|
|
|
title = 'TIME Magazine'
|
|
__author__ = 'Kovid Goyal'
|
|
description = 'Weekly US magazine.'
|
|
language = 'en'
|
|
needs_subscription = True
|
|
no_stylesheets = True
|
|
remove_javascript = True
|
|
|
|
def get_browser(self):
|
|
br = BasicNewsRecipe.get_browser(self)
|
|
# self.time_magazine_page = open('/t/raw.html').read()
|
|
# return br
|
|
base = 'http://subscription-assets.time.com/prod/assets/themes/magazines/SUBS/templates/velocity/site/td-pcslogin/'
|
|
url = base + 'login.html'
|
|
br.open(url)
|
|
br.select_form(nr=0)
|
|
br.form.action = 'https://auth.time.com/login.php?rurl={}&turl={}'.format(
|
|
url, 'http://time.com/magazine')
|
|
br['username'] = self.username
|
|
br['password'] = self.password
|
|
r = br.submit()
|
|
# print(111111, r.geturl())
|
|
self.time_magazine_page = r.read()
|
|
return br
|
|
|
|
def parse_index(self):
|
|
import html5lib
|
|
root = html5lib.parse(
|
|
self.time_magazine_page, treebuilder='lxml', namespaceHTMLElements=False).getroot()
|
|
for script in root.iterdescendants('script'):
|
|
if script.text and script.text.startswith('Time.bootstrap ='):
|
|
data = json.loads(script.text.partition('=')[2].lstrip())
|
|
break
|
|
else:
|
|
raise ValueError(
|
|
'The TIME website has changed, this recipe needs to be rewritten')
|
|
data = data['magazine']['us'][0]
|
|
self.timefmt = ' [%s]' % data['title'].split('|')[0].strip()
|
|
self.cover_url = data['hero']['src']['large']
|
|
articles = []
|
|
self.turl_map = {}
|
|
for article in data['articles']:
|
|
title = article.get('friendly_title') or article.get('short_title')
|
|
if title == 'In the Latest Issue' or 'content' not in article:
|
|
continue
|
|
url = article['shortlink']
|
|
desc = article.get('excerpt') or ''
|
|
self.log(title, ' at ', url)
|
|
self.log('\t', desc)
|
|
try:
|
|
cover_url = article['hero']['src']['large']
|
|
except Exception:
|
|
cover_url = ''
|
|
authors = ''
|
|
for aut in article.get('authors') or ():
|
|
authors += '<p>' + aut.get('bio') + '</p>'
|
|
articles.append({'title': title, 'url': url, 'desc': desc})
|
|
text = '<html><head><meta charset="utf-8"></head><body><h1>{}</h1>{}<div><img src="{}"></div><div>{}</div></body></html>'.format(
|
|
title, authors, cover_url, article['content'])
|
|
with PersistentTemporaryFile('-time-recipe.html') as f:
|
|
f.write(text.encode('utf-8'))
|
|
name = ('/' if iswindows else '') + f.name
|
|
self.turl_map[url] = 'file://' + name
|
|
return [('Articles', articles)]
|
|
|
|
def print_version(self, url):
|
|
return self.turl_map[url]
|
|
|
|
def preprocess_html(self, soup):
|
|
for img in soup.findAll('img', attrs={'data-lazy-src': True}):
|
|
img['src'] = img['data-lazy-src']
|
|
for img in soup.findAll('img', src=lambda x: not x):
|
|
img.extract()
|
|
return soup
|