mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Revert "Adding The Codeless Code (thecodelesscode.com)"
This reverts commit 7ff5ed6b62561c9920caccca5c98200943759066.
This commit is contained in:
parent
7ff5ed6b62
commit
fb3f9d7898
@ -1,179 +0,0 @@
|
|||||||
#!/usr/bin/env python2
|
|
||||||
|
|
||||||
from datetime import date
|
|
||||||
from lxml import etree
|
|
||||||
|
|
||||||
__copyright__ = '2015, April King <april@twoevils.org>'
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__version__ = '1.2'
|
|
||||||
|
|
||||||
'''
|
|
||||||
http://www.thecodelesscode.com/
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup as bs
|
|
||||||
|
|
||||||
class CodelessCode(BasicNewsRecipe):
|
|
||||||
__author__ = 'April King'
|
|
||||||
title = u'The Codeless Code'
|
|
||||||
category = 'fiction, programming, technology'
|
|
||||||
chapters = {} # ie, Mousetrap -> 182
|
|
||||||
compress_news_images = True
|
|
||||||
compress_news_images_max_size = 100
|
|
||||||
cover_url = 'http://www.thecodelesscode.com/pages/case-9/Lotus-050.jpg'
|
|
||||||
credits = [ u'<h2 class="chapter_title">{0}</h2>'.format(title),
|
|
||||||
u'<p>By <em>Qi</em></p>',
|
|
||||||
u'<p>An illustrated collection of (sometimes violent) fables concerning the Art and Philosophy of software development, written in the spirit of Zen kōans</p>',
|
|
||||||
u'<p>eBook conversion courtesy of <em>{0}</em></p>'.format(__author__) ]
|
|
||||||
description = u'The Art and Philosophy of software development, written in the spirit of Zen kōans'
|
|
||||||
extra_css = '.article_date { display: none; float: right; } \
|
|
||||||
.chapter_title { font-size: 1.75em; margin-top: 0; } \
|
|
||||||
.chapter_title::first-letter { font-size: 1.35em; font-weight: 500; letter-spacing: -.05em; } \
|
|
||||||
h2 { margin-top: 0; } \
|
|
||||||
.image_wrapper { text-align: center; }'
|
|
||||||
index = 'http://www.thecodelesscode.com/contents'
|
|
||||||
language = 'en'
|
|
||||||
max_articles_per_feed = 1000 # I can only wish
|
|
||||||
path_remappings = {} # IE, /case/182 -> articles_72/index.html
|
|
||||||
publication_type = 'blog'
|
|
||||||
publisher = 'Qi'
|
|
||||||
scale_news_images = (600, 400)
|
|
||||||
simultaneous_downloads = 1
|
|
||||||
url = 'http://www.thecodelesscode.com'
|
|
||||||
|
|
||||||
def create_opf(self, feeds, dir=None):
|
|
||||||
'''
|
|
||||||
Generate a mapping of the original URL, ie, http://thecodelesscode.com/case/100 to the
|
|
||||||
internal Calibre file system, eg, ../article_7/index_u39.html
|
|
||||||
'''
|
|
||||||
for feed in feeds:
|
|
||||||
for article in feed:
|
|
||||||
orig_path = article.orig_url.split(self.url, 2)[-1] # http://thecodelesscode.com/case/100 -> /case/100
|
|
||||||
article_id = article.id.split('#')[-1] # internal id#10 -> 10
|
|
||||||
article_path = article.url.split('index')[0] + 'index.html' # article_X/index.html -> article_X/
|
|
||||||
|
|
||||||
self.path_remappings[orig_path] = article_path
|
|
||||||
|
|
||||||
BasicNewsRecipe.create_opf(self, feeds, dir=dir)
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
koans = []
|
|
||||||
|
|
||||||
# Retrieve the contents page, containing the ToC
|
|
||||||
soup = self.index_to_soup(self.index)
|
|
||||||
|
|
||||||
for koan in soup.findAll('tr'):
|
|
||||||
# BS has some trouble with the weird layout
|
|
||||||
tag = koan.find('a')
|
|
||||||
|
|
||||||
if tag == None: continue
|
|
||||||
if 'random' in tag['href']: continue
|
|
||||||
|
|
||||||
# Minor coding error causes calibre to glitch; use the current date for the most recent title
|
|
||||||
koan_date = koan.find('td', attrs={'class' : 'toc-date' })
|
|
||||||
if koan_date == None:
|
|
||||||
koan_date = date.isoformat(date.today())
|
|
||||||
else:
|
|
||||||
koan_date = koan_date.string
|
|
||||||
|
|
||||||
title = tag.string
|
|
||||||
url = self.url + tag['href']
|
|
||||||
|
|
||||||
if u'The Applicant' in title: continue # Only the main story
|
|
||||||
|
|
||||||
koans.append({
|
|
||||||
'content': '',
|
|
||||||
'date': koan_date,
|
|
||||||
'description': '',
|
|
||||||
'title': title,
|
|
||||||
'url' : url,
|
|
||||||
})
|
|
||||||
|
|
||||||
# ie, Mousetrap -> 182
|
|
||||||
self.chapters[title] = url.split('/')[-1]
|
|
||||||
|
|
||||||
# Oldest koans first
|
|
||||||
koans.reverse()
|
|
||||||
|
|
||||||
# Log and then get out of here
|
|
||||||
self.log("Found {0} koans".format(len(koans)))
|
|
||||||
return( [(self.title, koans)] )
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
title = soup.find('h1', attrs = {'class': 'title'}).find('a', attrs = {'class' : 'subtle'}).string
|
|
||||||
|
|
||||||
# Add a title at the beginning of each chapter
|
|
||||||
if title in self.chapters:
|
|
||||||
title = '<div class="chapter_title">{0}</div>'.format(title)
|
|
||||||
|
|
||||||
# Load up the actual story
|
|
||||||
koan = soup.find('div', attrs = {'class' : 'story koan'})
|
|
||||||
|
|
||||||
# Kind of a hack-y way to get .children in BS3 <a><b><c></c></b></a> -> <b><c></c></b>
|
|
||||||
contents = list(koan.contents)
|
|
||||||
koan = bs(title)
|
|
||||||
|
|
||||||
for i in reversed(contents):
|
|
||||||
koan.insert(1, i)
|
|
||||||
|
|
||||||
# Remove all anchors that don't contain /case/, leaving them as just their text
|
|
||||||
# Note that we'll come back and clean up /case/ links when the URLs are remapped
|
|
||||||
# during postprocess_book()
|
|
||||||
anchors = koan.findAll('a')
|
|
||||||
if anchors != []:
|
|
||||||
for anchor in anchors:
|
|
||||||
if '/case/' in anchor['href']:
|
|
||||||
pass
|
|
||||||
elif 'note' in anchor['href']:
|
|
||||||
anchor.replaceWith('')
|
|
||||||
else:
|
|
||||||
# Again, a hacky way to get the contents of the tag, thanks to BS3
|
|
||||||
contents = list(anchor.contents)
|
|
||||||
linktext = bs()
|
|
||||||
for i in reversed(contents):
|
|
||||||
linktext.insert(1, i)
|
|
||||||
anchor.replaceWith(linktext)
|
|
||||||
|
|
||||||
# Find all the images, and wrap them up in an image_wrapper div
|
|
||||||
for i in range(0, len(koan.contents), 1):
|
|
||||||
if not hasattr(koan.contents[i], 'name'): continue # skip carriage returns
|
|
||||||
if koan.contents[i].name == u'img':
|
|
||||||
div = bs('<div class="image_wrapper"></div>')
|
|
||||||
div.div.insert(0, koan.contents[i])
|
|
||||||
koan.insert(i, div)
|
|
||||||
|
|
||||||
return(koan)
|
|
||||||
|
|
||||||
def postprocess_book(self, oeb, opts, log):
|
|
||||||
# Go through each internal representation of each HTML file, and fix all the broken hrefs, if possible
|
|
||||||
for item in oeb.manifest.items:
|
|
||||||
if item.media_type == 'text/html':
|
|
||||||
|
|
||||||
for node in item.data.xpath('//*[@href]'):
|
|
||||||
naughty_href = node.get('href')
|
|
||||||
|
|
||||||
if naughty_href in self.path_remappings:
|
|
||||||
node.set('href', '../' + self.path_remappings[ naughty_href ] )
|
|
||||||
href = node.get('href')
|
|
||||||
self.log("Remapped href {0} --> {1}".format(naughty_href, href))
|
|
||||||
|
|
||||||
# Remove the superfluous extra feed page at the beginning of the book, replacing it
|
|
||||||
# with the proper credits
|
|
||||||
for item in oeb.manifest.hrefs['index.html'].data.xpath('//*[local-name()="ul"]'):
|
|
||||||
item.getparent().remove(item)
|
|
||||||
|
|
||||||
for item in oeb.manifest.hrefs['index.html'].data.xpath('//*[local-name()="p"]'):
|
|
||||||
item.getparent().remove(item)
|
|
||||||
|
|
||||||
for item in oeb.manifest.hrefs['index.html'].data.xpath('//*[local-name()="div"]'):
|
|
||||||
for credit in self.credits[::-1]:
|
|
||||||
item.insert(0, etree.fromstring(credit))
|
|
||||||
|
|
||||||
# Change the creator from "calibre" to the actual author
|
|
||||||
# Also, we don't need the date in the ebook's title
|
|
||||||
oeb.metadata.items['creator'][0].value = self.publisher
|
|
||||||
oeb.metadata.items['description'][0].value = oeb.metadata.items['description'][0].value.split('\n\nArticles in this issue')[0]
|
|
||||||
oeb.metadata.items['publication_type'][0].value = self.title
|
|
||||||
oeb.metadata.items['publisher'][0].value = self.publisher
|
|
||||||
oeb.metadata.items['title'][0].value = self.title
|
|
Loading…
x
Reference in New Issue
Block a user