#!/usr/bin/env python
# vim:fileencoding=utf-8
import os
import tempfile
from calibre.web.feeds.recipes import BasicNewsRecipe
class Dilbert(BasicNewsRecipe):
title = u'Dilbert'
__author__ = 'TechnoCat'
description = 'Dilbert, by Scott Adams. Includes last three or so comics and blog entries.'
cover_url = 'http://dilbert.com/assets/dilbert-logo-4152bd0c31f7de7443b4bc90abd818da.png'
auto_cleanup = True
encoding = 'utf8'
language = 'en'
needs_subscription = False
no_stylesheets = True
oldest_article = 7
remove_javascript = True
recursions = 0
max_articles_per_feed = 20
debugMessages = True
BASE_URL = 'http://dilbert.com' # Note no www.
COMIC_DIV_TAG = 'img-comic-container'
BLOG_DIV_TAG = 'media'
tempfiles = []
# Creates a temp file for the wrapped image url
def writeImage(self, title, imageURL) :
tempFile = tempfile.NamedTemporaryFile(delete=False)
self.tempfiles.append(tempFile)
tempFile.write('
'+title+'')
tempFile.write(imageURL.prettify())
tempFile.write('')
tempFile.flush()
tempFile.close()
return tempFile.name
def cleanUpTempFiles(self):
for tempFile in self.tempfiles:
tempFile.close()
os.unlink(tempFile.name)
def cleanup(self):
self.cleanUpTempFiles()
# Extract comic links from the soup
# Returns a list of comics (articles) as:
# {
# 'title' : article title,
# 'url' : URL of print version,
# 'date' : The publication date of the article as a string,
# 'description' : A summary of the article
# 'content' : The full article (can be an empty string). This is used by FullContentProfile
# }
def comicFeed(self, soup) :
feedset = []
for comicContainer in soup.findAll('div', {'class': self.COMIC_DIV_TAG}) :
comic = comicContainer.find('img')
if comic is not None:
filelink = self.writeImage(comic['alt'], comic)
feedset.append(
dict(title=comic['alt'], url='file://'+filelink, description=comic['alt'], content=''))
return feedset
def blogFeed(self, soup) :
feedset = []
for blogContainer in soup.findAll('div', {'class': self.BLOG_DIV_TAG}) :
blog = blogContainer.find('a', {'class':'link-blended'})
if blog is not None:
feedset.append(
dict(title=blog['title'], url=blog['href'], description=blog['title'], content=''))
return feedset
def parse_index(self):
root = self.index_to_soup(self.BASE_URL)
comics = self.comicFeed(root)
blogs = self.blogFeed(root)
return [('Comics', comics), ('Blog Entries', blogs)]