#!/usr/bin/env python # vim:fileencoding=utf-8 # License: GPLv3 Copyright: 2016, Kovid Goyal from __future__ import absolute_import, division, print_function, unicode_literals import json from calibre.constants import iswindows from calibre.web.feeds.news import BasicNewsRecipe from calibre.ptempfile import PersistentTemporaryFile class TimeMagazine(BasicNewsRecipe): title = 'TIME Magazine' __author__ = 'Kovid Goyal' description = 'Weekly US magazine.' language = 'en' needs_subscription = True no_stylesheets = True remove_javascript = True def get_browser(self): br = BasicNewsRecipe.get_browser(self) # self.time_magazine_page = open('/t/raw.html').read() # return br base = 'http://subscription-assets.time.com/prod/assets/themes/magazines/SUBS/templates/velocity/site/td-pcslogin/' url = base + 'login.html' br.open(url) br.select_form(nr=0) br.form.action = 'https://auth.time.com/login.php?rurl={}&turl={}'.format( url, 'http://time.com/magazine') br['username'] = self.username br['password'] = self.password r = br.submit() # print(111111, r.geturl()) self.time_magazine_page = r.read() return br def parse_index(self): import html5lib root = html5lib.parse( self.time_magazine_page, treebuilder='lxml', namespaceHTMLElements=False).getroot() for script in root.iterdescendants('script'): if script.text and script.text.startswith('Time.bootstrap ='): data = json.loads(script.text.partition('=')[2].lstrip()) break else: raise ValueError( 'The TIME website has changed, this recipe needs to be rewritten') data = data['magazine']['us'][0] self.timefmt = ' [%s]' % data['title'].split('|')[0].strip() self.cover_url = data['hero']['src']['large'] articles = [] self.turl_map = {} for article in data['articles']: title = article.get('friendly_title') or article.get('short_title') if title == 'In the Latest Issue' or 'content' not in article: continue url = article['shortlink'] desc = article.get('excerpt') or '' self.log(title, ' at ', url) self.log('\t', desc) try: cover_url = article['hero']['src']['large'] except Exception: cover_url = '' authors = '' for aut in article.get('authors') or (): authors += '

' + aut.get('bio') + '

' articles.append({'title': title, 'url': url, 'desc': desc}) text = '

{}

'.format( title, authors, cover_url, article['content']) with PersistentTemporaryFile('-time-recipe.html') as f: f.write(text.encode('utf-8')) name = ('/' if iswindows else '') + f.name self.turl_map[url] = 'file://' + name return [('Articles', articles)] def print_version(self, url): return self.turl_map[url] def preprocess_html(self, soup): for img in soup.findAll('img', attrs={'data-lazy-src': True}): img['src'] = img['data-lazy-src'] for img in soup.findAll('img', src=lambda x: not x): img.extract() return soup