mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update Reason Magazine
This commit is contained in:
parent
2e2fcaaf28
commit
71f6d8b162
@ -1,6 +1,7 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# vim:fileencoding=utf-8
|
# vim:fileencoding=utf-8
|
||||||
# License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
|
# License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
from __future__ import unicode_literals
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from calibre import prepare_string_for_xml
|
from calibre import prepare_string_for_xml
|
||||||
@ -64,7 +65,7 @@ def extract_html(soup):
|
|||||||
|
|
||||||
class Reason(BasicNewsRecipe):
|
class Reason(BasicNewsRecipe):
|
||||||
|
|
||||||
title = 'Reason Magazine'
|
title = 'Reason'
|
||||||
description = 'Free minds and free markets'
|
description = 'Free minds and free markets'
|
||||||
INDEX = 'https://reason.com/magazine/'
|
INDEX = 'https://reason.com/magazine/'
|
||||||
__author__ = 'Howard Cornett'
|
__author__ = 'Howard Cornett'
|
||||||
@ -74,8 +75,8 @@ class Reason(BasicNewsRecipe):
|
|||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
classes(
|
classes(
|
||||||
'next-post-link the-tags tag rcom-social tools comments-header-show logo-header'
|
'next-post-link the-tags tag rcom-social-tools most-read-container comments-header-show'
|
||||||
' navbar navbar-expanded-lg primary content-info sidebar magicSidebar advertisement logo entry-subtitle'
|
' logo-header navbar navbar-expanded-lg primary content-info sidebar magicSidebar advertisement logo entry-subtitle'
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -128,42 +129,57 @@ class Reason(BasicNewsRecipe):
|
|||||||
self.cover_url = cover['src']
|
self.cover_url = cover['src']
|
||||||
current_section, current_articles = 'Cover Story', []
|
current_section, current_articles = 'Cover Story', []
|
||||||
feeds = []
|
feeds = []
|
||||||
for div in soup.findAll('div', **classes('issue-header-right toc-category-list')):
|
for div in soup.findAll('div', attrs={'class': lambda x: x and set(x.split()).intersection({'issue-header-right', 'toc-category-list'})}):
|
||||||
for h3 in div.findAll('h3', **classes('toc-department')):
|
for h3 in div.findAll('h3', attrs={'class': True}):
|
||||||
if current_articles:
|
cls = h3['class']
|
||||||
feeds.append((current_section, current_articles))
|
if hasattr(cls, 'split'):
|
||||||
current_articles = []
|
cls = cls.split()
|
||||||
current_section = self.tag_to_string(h3)
|
if 'toc-department' in cls:
|
||||||
self.log('\nFound section:', current_section)
|
if current_articles:
|
||||||
title = h3.find_next_sibling().a.text
|
feeds.append((current_section, current_articles))
|
||||||
url = h3.find_next_sibling().a['href']
|
current_articles = []
|
||||||
desc = h3.find_next_sibling().p.text
|
current_section = self.tag_to_string(h3)
|
||||||
current_articles.append({
|
self.log('\nFound section:', current_section)
|
||||||
'title': title,
|
title = h3.find_next_sibling().a.text
|
||||||
'url': url,
|
url = h3.find_next_sibling().a['href']
|
||||||
'description': desc
|
desc = h3.find_next_sibling().p.text
|
||||||
})
|
current_articles.append({
|
||||||
for h2 in div.findAll('h2', **classes('toc-department')):
|
'title': title,
|
||||||
if current_articles:
|
'url': url,
|
||||||
feeds.append((current_section, current_articles))
|
'description': desc
|
||||||
current_articles = []
|
})
|
||||||
current_section = self.tag_to_string(h2)
|
for h2 in div.findAll('h2', attrs={'class': True}):
|
||||||
self.log('\nFound section:', current_section)
|
cls = h2['class']
|
||||||
for article in div.findAll('article', attrs={'class': True}):
|
if hasattr(cls, 'split'):
|
||||||
h4 = article.find('h4')
|
cls = cls.split()
|
||||||
if h4.a is not None:
|
if 'toc-department' in cls:
|
||||||
title = h4.a.text
|
if current_articles:
|
||||||
url = h4.a['href']
|
feeds.append((current_section, current_articles))
|
||||||
else:
|
current_articles = []
|
||||||
title = ''
|
current_section = self.tag_to_string(h2)
|
||||||
url = ''
|
self.log('\nFound section:', current_section)
|
||||||
desc = h4.find_next_sibling().text
|
for article in div.findAll('article', attrs={'class': True}):
|
||||||
current_articles.append({
|
h4 = article.find('h4')
|
||||||
'title': title,
|
if h4.a is not None:
|
||||||
'url': url,
|
title = h4.a.text
|
||||||
'description': desc
|
url = h4.a['href']
|
||||||
})
|
else:
|
||||||
|
title = ''
|
||||||
|
url = ''
|
||||||
|
desc = h4.find_next_sibling().text
|
||||||
|
current_articles.append({
|
||||||
|
'title': title,
|
||||||
|
'url': url,
|
||||||
|
'description': desc
|
||||||
|
})
|
||||||
|
|
||||||
if current_articles:
|
if current_articles:
|
||||||
feeds.append((current_section, current_articles))
|
feeds.append((current_section, current_articles))
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
print(extract_html(BeautifulSoup(open(sys.argv[-1]).read())))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user