Added two recipes for the Slate Star Codex blog, a periodic recipe and one for the complete archives

This commit is contained in:
Ned Letcher 2015-06-20 02:15:16 +10:00
parent 810f9ac2f9
commit e4e3dd04ff
2 changed files with 106 additions and 0 deletions

View File

@ -0,0 +1,52 @@
#!/usr/bin/env python2
from calibre.web.feeds.news import BasicNewsRecipe
import re
__license__ = 'GPL v3'
__copyright__ = '2015, Ned Letcher - ned@nedned.net'
"""A calibre custom news recipe for creating eBooks of periodic
updates from the blog Slate Star Codex.
If calibre is installed on your system you can generate an epub
of recent SSC posts from your command line like so:
$ ebook-convert slate_star_codex.recipe .epub
Author: Ned Letcher
"""
class SlateStarCodex(BasicNewsRecipe):
title = u'Slate Star Codex'
description = 'IN A MAD WORLD, ALL BLOGGING IS PSYCHIATRY BLOGGING'
max_articles_per_feed = 20
language = 'en'
no_stylesheets = True
# alternative candidate for keep_only_tags:
# [{'attrs':{'class':['pjgm-posttitle', 'pjgm-postmeta', 'pjgm-postcontent']}}]
keep_only_tags = [dict(name='div', attrs={'class':re.compile(r'\bpost\b')})]
remove_tags = [dict(name='div', attrs={'class':re.compile(r'\bsharedaddy\b')})]
def get_archived_posts(self):
soup = self.index_to_soup('http://slatestarcodex.com/archives/')
entries = soup.findAll(attrs={'class':'sya_postcontent'})
posts = []
for entry in entries:
atag = entry.find('a')
url = atag['href']
post = {
'title' : atag.contents[0],
'url' : url,
'date' : "-".join(url.strip('/').split('/')[-4:-1]),
}
posts.append(post)
return posts
def parse_index(self):
posts = self.get_archived_posts()
return [[self.title, posts]]

View File

@ -0,0 +1,54 @@
#!/usr/bin/env python2
from calibre.web.feeds.news import BasicNewsRecipe
import re
__license__ = 'GPL v3'
__copyright__ = '2015, Ned Letcher - ned@nedned.net'
"""A calibre custom news recipe for creating eBooks of *all* posts
from the blog Slate Star Codex.
If calibre is installed on your system you can generate an epub of the
entire SSC archive from your command line like so:
$ ebook-convert slate_star_codex.recipe .epub
Author: Ned Letcher
"""
class SlateStarCodexArchive(BasicNewsRecipe):
title = u'Slate Star Codex (Complete Archive)'
description = 'IN A MAD WORLD, ALL BLOGGING IS PSYCHIATRY BLOGGING'
simultaneous_downloads = 10
max_articles_per_feed = 1000
reverse_article_order = True
language = 'en'
no_stylesheets = True
# alternative candidate for keep_only_tags:
# [{'attrs':{'class':['pjgm-posttitle', 'pjgm-postmeta', 'pjgm-postcontent']}}]
keep_only_tags = [dict(name='div', attrs={'class':re.compile(r'\bpost\b')})]
remove_tags = [dict(name='div', attrs={'class':re.compile(r'\bsharedaddy\b')})]
def get_archived_posts(self):
soup = self.index_to_soup('http://slatestarcodex.com/archives/')
entries = soup.findAll(attrs={'class':'sya_postcontent'})
posts = []
for entry in entries:
atag = entry.find('a')
url = atag['href']
post = {
'title' : atag.contents[0],
'url' : url,
'date' : "-".join(url.strip('/').split('/')[-4:-1]),
}
posts.append(post)
return posts
def parse_index(self):
posts = self.get_archived_posts()
return [[self.title, posts]]