From e4e3dd04ff3c2f03fa7d6ac31b26823d5355a364 Mon Sep 17 00:00:00 2001 From: Ned Letcher Date: Sat, 20 Jun 2015 02:15:16 +1000 Subject: [PATCH 1/2] Added two recipes for the Slate Star Codex blog, a periodic recipe and one for the complete archives --- recipes/slate_star_codex.recipe | 52 ++++++++++++++++++++++++ recipes/slate_star_codex_archive.recipe | 54 +++++++++++++++++++++++++ 2 files changed, 106 insertions(+) create mode 100644 recipes/slate_star_codex.recipe create mode 100644 recipes/slate_star_codex_archive.recipe diff --git a/recipes/slate_star_codex.recipe b/recipes/slate_star_codex.recipe new file mode 100644 index 0000000000..c4ca9c5338 --- /dev/null +++ b/recipes/slate_star_codex.recipe @@ -0,0 +1,52 @@ +#!/usr/bin/env python2 + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +__license__ = 'GPL v3' +__copyright__ = '2015, Ned Letcher - ned@nedned.net' + + +"""A calibre custom news recipe for creating eBooks of periodic +updates from the blog Slate Star Codex. + +If calibre is installed on your system you can generate an epub +of recent SSC posts from your command line like so: + +$ ebook-convert slate_star_codex.recipe .epub + +Author: Ned Letcher + +""" + + +class SlateStarCodex(BasicNewsRecipe): + title = u'Slate Star Codex' + description = 'IN A MAD WORLD, ALL BLOGGING IS PSYCHIATRY BLOGGING' + max_articles_per_feed = 20 + language = 'en' + no_stylesheets = True + # alternative candidate for keep_only_tags: + # [{'attrs':{'class':['pjgm-posttitle', 'pjgm-postmeta', 'pjgm-postcontent']}}] + keep_only_tags = [dict(name='div', attrs={'class':re.compile(r'\bpost\b')})] + remove_tags = [dict(name='div', attrs={'class':re.compile(r'\bsharedaddy\b')})] + + def get_archived_posts(self): + soup = self.index_to_soup('http://slatestarcodex.com/archives/') + entries = soup.findAll(attrs={'class':'sya_postcontent'}) + + posts = [] + for entry in entries: + atag = entry.find('a') + url = atag['href'] + post = { + 'title' : atag.contents[0], + 'url' : url, + 'date' : "-".join(url.strip('/').split('/')[-4:-1]), + } + posts.append(post) + return posts + + def parse_index(self): + posts = self.get_archived_posts() + return [[self.title, posts]] diff --git a/recipes/slate_star_codex_archive.recipe b/recipes/slate_star_codex_archive.recipe new file mode 100644 index 0000000000..3bd397a865 --- /dev/null +++ b/recipes/slate_star_codex_archive.recipe @@ -0,0 +1,54 @@ +#!/usr/bin/env python2 + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +__license__ = 'GPL v3' +__copyright__ = '2015, Ned Letcher - ned@nedned.net' + + +"""A calibre custom news recipe for creating eBooks of *all* posts +from the blog Slate Star Codex. + +If calibre is installed on your system you can generate an epub of the +entire SSC archive from your command line like so: + +$ ebook-convert slate_star_codex.recipe .epub + +Author: Ned Letcher + +""" + + +class SlateStarCodexArchive(BasicNewsRecipe): + title = u'Slate Star Codex (Complete Archive)' + description = 'IN A MAD WORLD, ALL BLOGGING IS PSYCHIATRY BLOGGING' + simultaneous_downloads = 10 + max_articles_per_feed = 1000 + reverse_article_order = True + language = 'en' + no_stylesheets = True + # alternative candidate for keep_only_tags: + # [{'attrs':{'class':['pjgm-posttitle', 'pjgm-postmeta', 'pjgm-postcontent']}}] + keep_only_tags = [dict(name='div', attrs={'class':re.compile(r'\bpost\b')})] + remove_tags = [dict(name='div', attrs={'class':re.compile(r'\bsharedaddy\b')})] + + def get_archived_posts(self): + soup = self.index_to_soup('http://slatestarcodex.com/archives/') + entries = soup.findAll(attrs={'class':'sya_postcontent'}) + + posts = [] + for entry in entries: + atag = entry.find('a') + url = atag['href'] + post = { + 'title' : atag.contents[0], + 'url' : url, + 'date' : "-".join(url.strip('/').split('/')[-4:-1]), + } + posts.append(post) + return posts + + def parse_index(self): + posts = self.get_archived_posts() + return [[self.title, posts]] From ab65bd18c0512926caad7ce23de4d08b949242aa Mon Sep 17 00:00:00 2001 From: Ned Letcher Date: Sun, 21 Jun 2015 17:48:25 +1000 Subject: [PATCH 2/2] Changes based on pull request feedback * removed inappropriate archive recipe * added __author__ and encoding attributes to remaining recipe --- recipes/slate_star_codex.recipe | 16 +++----- recipes/slate_star_codex_archive.recipe | 54 ------------------------- 2 files changed, 5 insertions(+), 65 deletions(-) delete mode 100644 recipes/slate_star_codex_archive.recipe diff --git a/recipes/slate_star_codex.recipe b/recipes/slate_star_codex.recipe index c4ca9c5338..694900ed73 100644 --- a/recipes/slate_star_codex.recipe +++ b/recipes/slate_star_codex.recipe @@ -4,27 +4,21 @@ from calibre.web.feeds.news import BasicNewsRecipe import re __license__ = 'GPL v3' -__copyright__ = '2015, Ned Letcher - ned@nedned.net' +__copyright__ = '2015, Ned Letcher ' -"""A calibre custom news recipe for creating eBooks of periodic -updates from the blog Slate Star Codex. - -If calibre is installed on your system you can generate an epub -of recent SSC posts from your command line like so: - -$ ebook-convert slate_star_codex.recipe .epub - -Author: Ned Letcher - +""" +calibre recipe for Slate Star Codex. """ class SlateStarCodex(BasicNewsRecipe): title = u'Slate Star Codex' description = 'IN A MAD WORLD, ALL BLOGGING IS PSYCHIATRY BLOGGING' + __author__ = 'Ned Letcher' max_articles_per_feed = 20 language = 'en' + encoding = 'utf-8' no_stylesheets = True # alternative candidate for keep_only_tags: # [{'attrs':{'class':['pjgm-posttitle', 'pjgm-postmeta', 'pjgm-postcontent']}}] diff --git a/recipes/slate_star_codex_archive.recipe b/recipes/slate_star_codex_archive.recipe deleted file mode 100644 index 3bd397a865..0000000000 --- a/recipes/slate_star_codex_archive.recipe +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/env python2 - -from calibre.web.feeds.news import BasicNewsRecipe -import re - -__license__ = 'GPL v3' -__copyright__ = '2015, Ned Letcher - ned@nedned.net' - - -"""A calibre custom news recipe for creating eBooks of *all* posts -from the blog Slate Star Codex. - -If calibre is installed on your system you can generate an epub of the -entire SSC archive from your command line like so: - -$ ebook-convert slate_star_codex.recipe .epub - -Author: Ned Letcher - -""" - - -class SlateStarCodexArchive(BasicNewsRecipe): - title = u'Slate Star Codex (Complete Archive)' - description = 'IN A MAD WORLD, ALL BLOGGING IS PSYCHIATRY BLOGGING' - simultaneous_downloads = 10 - max_articles_per_feed = 1000 - reverse_article_order = True - language = 'en' - no_stylesheets = True - # alternative candidate for keep_only_tags: - # [{'attrs':{'class':['pjgm-posttitle', 'pjgm-postmeta', 'pjgm-postcontent']}}] - keep_only_tags = [dict(name='div', attrs={'class':re.compile(r'\bpost\b')})] - remove_tags = [dict(name='div', attrs={'class':re.compile(r'\bsharedaddy\b')})] - - def get_archived_posts(self): - soup = self.index_to_soup('http://slatestarcodex.com/archives/') - entries = soup.findAll(attrs={'class':'sya_postcontent'}) - - posts = [] - for entry in entries: - atag = entry.find('a') - url = atag['href'] - post = { - 'title' : atag.contents[0], - 'url' : url, - 'date' : "-".join(url.strip('/').split('/')[-4:-1]), - } - posts.append(post) - return posts - - def parse_index(self): - posts = self.get_archived_posts() - return [[self.title, posts]]