diff --git a/recipes/thecodelesscode.recipe b/recipes/thecodelesscode.recipe new file mode 100644 index 0000000000..af0ea57a3f --- /dev/null +++ b/recipes/thecodelesscode.recipe @@ -0,0 +1,179 @@ +#!/usr/bin/env python2 + +from datetime import date +from lxml import etree + +__copyright__ = '2015, April King ' +__license__ = 'GPL v3' +__version__ = '1.2' + +''' +http://www.thecodelesscode.com/ +''' + +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup as bs + +class CodelessCode(BasicNewsRecipe): + __author__ = 'April King' + title = u'The Codeless Code' + category = 'fiction, programming, technology' + chapters = {} # ie, Mousetrap -> 182 + compress_news_images = True + compress_news_images_max_size = 100 + cover_url = 'http://www.thecodelesscode.com/pages/case-9/Lotus-050.jpg' + credits = [ u'

{0}

'.format(title), + u'

By Qi

', + u'

An illustrated collection of (sometimes violent) fables concerning the Art and Philosophy of software development, written in the spirit of Zen kōans

', + u'

eBook conversion courtesy of {0}

'.format(__author__) ] + description = u'The Art and Philosophy of software development, written in the spirit of Zen kōans' + extra_css = '.article_date { display: none; float: right; } \ + .chapter_title { font-size: 1.75em; margin-top: 0; } \ + .chapter_title::first-letter { font-size: 1.35em; font-weight: 500; letter-spacing: -.05em; } \ + h2 { margin-top: 0; } \ + .image_wrapper { text-align: center; }' + index = 'http://www.thecodelesscode.com/contents' + language = 'en' + max_articles_per_feed = 1000 # I can only wish + path_remappings = {} # IE, /case/182 -> articles_72/index.html + publication_type = 'blog' + publisher = 'Qi' + scale_news_images = (600, 400) + simultaneous_downloads = 1 + url = 'http://www.thecodelesscode.com' + + def create_opf(self, feeds, dir=None): + ''' + Generate a mapping of the original URL, ie, http://thecodelesscode.com/case/100 to the + internal Calibre file system, eg, ../article_7/index_u39.html + ''' + for feed in feeds: + for article in feed: + orig_path = article.orig_url.split(self.url, 2)[-1] # http://thecodelesscode.com/case/100 -> /case/100 + article_id = article.id.split('#')[-1] # internal id#10 -> 10 + article_path = article.url.split('index')[0] + 'index.html' # article_X/index.html -> article_X/ + + self.path_remappings[orig_path] = article_path + + BasicNewsRecipe.create_opf(self, feeds, dir=dir) + + def parse_index(self): + koans = [] + + # Retrieve the contents page, containing the ToC + soup = self.index_to_soup(self.index) + + for koan in soup.findAll('tr'): + # BS has some trouble with the weird layout + tag = koan.find('a') + + if tag == None: continue + if 'random' in tag['href']: continue + + # Minor coding error causes calibre to glitch; use the current date for the most recent title + koan_date = koan.find('td', attrs={'class' : 'toc-date' }) + if koan_date == None: + koan_date = date.isoformat(date.today()) + else: + koan_date = koan_date.string + + title = tag.string + url = self.url + tag['href'] + + if u'The Applicant' in title: continue # Only the main story + + koans.append({ + 'content': '', + 'date': koan_date, + 'description': '', + 'title': title, + 'url' : url, + }) + + # ie, Mousetrap -> 182 + self.chapters[title] = url.split('/')[-1] + + # Oldest koans first + koans.reverse() + + # Log and then get out of here + self.log("Found {0} koans".format(len(koans))) + return( [(self.title, koans)] ) + + def preprocess_html(self, soup): + title = soup.find('h1', attrs = {'class': 'title'}).find('a', attrs = {'class' : 'subtle'}).string + + # Add a title at the beginning of each chapter + if title in self.chapters: + title = '
{0}
'.format(title) + + # Load up the actual story + koan = soup.find('div', attrs = {'class' : 'story koan'}) + + # Kind of a hack-y way to get .children in BS3 -> + contents = list(koan.contents) + koan = bs(title) + + for i in reversed(contents): + koan.insert(1, i) + + # Remove all anchors that don't contain /case/, leaving them as just their text + # Note that we'll come back and clean up /case/ links when the URLs are remapped + # during postprocess_book() + anchors = koan.findAll('a') + if anchors != []: + for anchor in anchors: + if '/case/' in anchor['href']: + pass + elif 'note' in anchor['href']: + anchor.replaceWith('') + else: + # Again, a hacky way to get the contents of the tag, thanks to BS3 + contents = list(anchor.contents) + linktext = bs() + for i in reversed(contents): + linktext.insert(1, i) + anchor.replaceWith(linktext) + + # Find all the images, and wrap them up in an image_wrapper div + for i in range(0, len(koan.contents), 1): + if not hasattr(koan.contents[i], 'name'): continue # skip carriage returns + if koan.contents[i].name == u'img': + div = bs('
') + div.div.insert(0, koan.contents[i]) + koan.insert(i, div) + + return(koan) + + def postprocess_book(self, oeb, opts, log): + # Go through each internal representation of each HTML file, and fix all the broken hrefs, if possible + for item in oeb.manifest.items: + if item.media_type == 'text/html': + + for node in item.data.xpath('//*[@href]'): + naughty_href = node.get('href') + + if naughty_href in self.path_remappings: + node.set('href', '../' + self.path_remappings[ naughty_href ] ) + href = node.get('href') + self.log("Remapped href {0} --> {1}".format(naughty_href, href)) + + # Remove the superfluous extra feed page at the beginning of the book, replacing it + # with the proper credits + for item in oeb.manifest.hrefs['index.html'].data.xpath('//*[local-name()="ul"]'): + item.getparent().remove(item) + + for item in oeb.manifest.hrefs['index.html'].data.xpath('//*[local-name()="p"]'): + item.getparent().remove(item) + + for item in oeb.manifest.hrefs['index.html'].data.xpath('//*[local-name()="div"]'): + for credit in self.credits[::-1]: + item.insert(0, etree.fromstring(credit)) + + # Change the creator from "calibre" to the actual author + # Also, we don't need the date in the ebook's title + oeb.metadata.items['creator'][0].value = self.publisher + oeb.metadata.items['description'][0].value = oeb.metadata.items['description'][0].value.split('\n\nArticles in this issue')[0] + oeb.metadata.items['publication_type'][0].value = self.title + oeb.metadata.items['publisher'][0].value = self.publisher + oeb.metadata.items['title'][0].value = self.title