mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update Psychology Today
Fixes #1475142 [Cannot download "Psychology Today" from "Fetch News".](https://bugs.launchpad.net/calibre/+bug/1475142)
This commit is contained in:
parent
6daf4d61b9
commit
31fb52fcd2
@ -1,63 +1,45 @@
|
|||||||
import re
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class PsychologyToday(BasicNewsRecipe):
|
class PsychologyToday(BasicNewsRecipe):
|
||||||
|
|
||||||
title = 'Psychology Today'
|
title = 'Psychology Today'
|
||||||
__author__ = 'Rick Shang'
|
__author__ = 'Kovid Goyal'
|
||||||
|
|
||||||
description = 'This magazine takes information from the latest research in the field of psychology and makes it useful to people in their everyday lives. Its coverage encompasses self-improvement, relationships, the mind-body connection, health, family, the workplace and culture.'
|
description = ('This magazine takes information from the latest research'
|
||||||
|
' in the field of psychology and makes it useful to people in their everyday'
|
||||||
|
' lives. Its coverage encompasses self-improvement, relationships, the mind-body'
|
||||||
|
' connection, health, family, the workplace and culture.')
|
||||||
language = 'en'
|
language = 'en'
|
||||||
category = 'news'
|
|
||||||
encoding = 'UTF-8'
|
encoding = 'UTF-8'
|
||||||
auto_cleanup = True
|
|
||||||
#keep_only_tags = [dict(attrs={'class':['print-title', 'print-submitted', 'print-content', 'print-footer', 'print-source_url', 'print-links']})]
|
|
||||||
no_javascript = True
|
no_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(role='main'),
|
||||||
|
]
|
||||||
|
remove_tags = [
|
||||||
|
dict(attrs={'class':['pt-social-media', 'fb-like-button']}),
|
||||||
|
]
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
articles = []
|
|
||||||
soup = self.index_to_soup('http://www.psychologytoday.com/magazine')
|
soup = self.index_to_soup('http://www.psychologytoday.com/magazine')
|
||||||
|
div = soup.find(id='block-views-magazine-issues-block')
|
||||||
|
a = div.findAll('h3', attrs={'class':'magazine-published-date'})[1].find('a')
|
||||||
#Go to the main body
|
self.timefmt = ' [%s]' % self.tag_to_string(a).capitalize()
|
||||||
div = soup.find('div',attrs={'id':'content-content'})
|
soup = self.index_to_soup('http://www.psychologytoday.com' + a['href'])
|
||||||
#Find cover & date
|
self.cover_url = soup.find(role='main').find('img', src=lambda x:x and '/field_magazine_cover/' in x)['src'].partition('?')[0]
|
||||||
cover_item = div.find('div', attrs={'class':'collections-header-image'})
|
div = soup.find(id='block-system-main')
|
||||||
cover = cover_item.find('img',src=True)
|
|
||||||
self.cover_url = cover['src']
|
|
||||||
date = self.tag_to_string(cover['title'])
|
|
||||||
self.timefmt = u' [%s]'%date
|
|
||||||
|
|
||||||
articles = []
|
articles = []
|
||||||
for post in div.findAll('div', attrs={'class':'collections-node-feature collection-node-even'}):
|
for x in div.findAll(attrs={'class':'field__item'}):
|
||||||
title = self.tag_to_string(post.find('h2'))
|
h2 = x.find('h2')
|
||||||
author_item=post.find('div', attrs={'class':'collection-node-byline'})
|
title = self.tag_to_string(h2)
|
||||||
author = re.sub(r'.*by\s',"",self.tag_to_string(author_item).strip())
|
url = 'http://www.psychologytoday.com' + h2.find('a')['href']
|
||||||
title = title + u' (%s)'%author
|
self.log('\n', title, 'at', url)
|
||||||
url= 'http://www.psychologytoday.com'+post.find('a', href=True)['href']
|
desc = ''
|
||||||
#print_page=article_page.find('li', attrs={'class':'print_html first'})
|
for y in x.findAll(attrs={'class':['subtext', 'collection__subtitle']}):
|
||||||
#url='http://www.psychologytoday.com'+print_page.find('a',href=True)['href']
|
desc += self.tag_to_string(y) + ' '
|
||||||
desc = self.tag_to_string(post.find('div', attrs={'class':'collection-node-description'})).strip()
|
if desc:
|
||||||
self.log('Found article:', title)
|
self.log(desc)
|
||||||
self.log('\t', url)
|
articles.append({'title':title, 'url':url, 'description':desc})
|
||||||
self.log('\t', desc)
|
|
||||||
articles.append({'title':title, 'url':url, 'date':'','description':desc})
|
|
||||||
for post in div.findAll('div', attrs={'class':'collections-node-feature collection-node-odd'}):
|
|
||||||
title = self.tag_to_string(post.find('h2'))
|
|
||||||
author_item=post.find('div', attrs={'class':'collection-node-byline'})
|
|
||||||
author = re.sub(r'.*by\s',"",self.tag_to_string(author_item).strip())
|
|
||||||
title = title + u' (%s)'%author
|
|
||||||
url= 'http://www.psychologytoday.com'+post.find('a', href=True)['href']
|
|
||||||
#print_page=article_page.find('li', attrs={'class':'print_html first'})
|
|
||||||
#url='http://www.psychologytoday.com'+print_page.find('a',href=True)['href']
|
|
||||||
desc = self.tag_to_string(post.find('div', attrs={'class':'collection-node-description'})).strip()
|
|
||||||
self.log('Found article:', title)
|
|
||||||
self.log('\t', url)
|
|
||||||
self.log('\t', desc)
|
|
||||||
articles.append({'title':title, 'url':url, 'date':'','description':desc})
|
|
||||||
|
|
||||||
|
|
||||||
return [('Current Issue', articles)]
|
return [('Current Issue', articles)]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user