#!/usr/bin/env python # vim:fileencoding=utf-8 from calibre.web.feeds.recipes import BasicNewsRecipe def absurl(url): if url.startswith("//"): return "https:" + url if url.startswith("/"): return "https://www.psychologytoday.com" + url return url def classes(classes): q = frozenset(classes.split(' ')) return dict(attrs={ 'class': lambda x: x and frozenset(x.split()).intersection(q)}) class PsychologyToday(BasicNewsRecipe): title = 'Psychology Today' __author__ = 'Kovid Goyal' description = ('This magazine takes information from the latest research' ' in the field of psychology and makes it useful to people in their everyday' ' lives. Its coverage encompasses self-improvement, relationships, the mind-body' ' connection, health, family, the workplace and culture.') language = 'en' encoding = 'UTF-8' no_stylesheets = True publication_type = 'magazine' keep_only_tags = [dict(attrs={'id': 'block-pt-content'})] remove_tags = [classes('pt-social-media')] def parse_index(self): soup = self.index_to_soup('https://www.psychologytoday.com/us/magazine/archive') a = soup.find(**classes('magazine-thumbnail')).a self.timefmt = ' [%s]' % a['title'] self.cover_url = absurl(a.img['src']) soup = self.index_to_soup(absurl(a['href'])) articles = [] for article in soup.find('div', role='article').findAll('article'): title = self.tag_to_string(article.find(['h2','h3'])).strip() url = absurl(article.find(['h2','h3']).a['href']) self.log('\n', title, 'at', url) desc = self.tag_to_string(article.find('p',**classes('description'))).strip() author = self.tag_to_string(article.find('p',**classes('byline')).a).strip() if desc: self.log(desc) else: desc = '' articles.append({'title': title, 'url': url, 'description': desc, 'author': author}) return [('Current Issue', articles)]