mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
81 lines
2.8 KiB
Python
81 lines
2.8 KiB
Python
#!/usr/bin/env python
|
|
# vim:fileencoding=utf-8
|
|
|
|
import os
|
|
import tempfile
|
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
|
|
|
|
class Dilbert(BasicNewsRecipe):
|
|
title = u'Dilbert'
|
|
__author__ = 'TechnoCat'
|
|
description = 'Dilbert, by Scott Adams. Includes last three or so comics and blog entries.'
|
|
cover_url = 'http://dilbert.com/assets/dilbert-logo-4152bd0c31f7de7443b4bc90abd818da.png'
|
|
auto_cleanup = True
|
|
encoding = 'utf8'
|
|
language = 'en'
|
|
needs_subscription = False
|
|
no_stylesheets = True
|
|
oldest_article = 7
|
|
remove_javascript = True
|
|
recursions = 0
|
|
max_articles_per_feed = 20
|
|
debugMessages = True
|
|
BASE_URL = 'http://dilbert.com' # Note no www.
|
|
COMIC_DIV_TAG = 'img-comic-container'
|
|
BLOG_DIV_TAG = 'media'
|
|
tempfiles = []
|
|
|
|
# Creates a temp file for the wrapped image url
|
|
def writeImage(self, title, imageURL) :
|
|
tempFile = tempfile.NamedTemporaryFile(delete=False)
|
|
self.tempfiles.append(tempFile)
|
|
tempFile.write('<html><head><title>'+title+'</title></head><body>')
|
|
tempFile.write(imageURL.prettify())
|
|
tempFile.write('</body></html>')
|
|
tempFile.flush()
|
|
tempFile.close()
|
|
return tempFile.name
|
|
|
|
def cleanUpTempFiles(self):
|
|
for tempFile in self.tempfiles:
|
|
tempFile.close()
|
|
os.unlink(tempFile.name)
|
|
|
|
def cleanup(self):
|
|
self.cleanUpTempFiles()
|
|
|
|
# Extract comic links from the soup
|
|
# Returns a list of comics (articles) as:
|
|
# {
|
|
# 'title' : article title,
|
|
# 'url' : URL of print version,
|
|
# 'date' : The publication date of the article as a string,
|
|
# 'description' : A summary of the article
|
|
# 'content' : The full article (can be an empty string). This is used by FullContentProfile
|
|
# }
|
|
def comicFeed(self, soup) :
|
|
feedset = []
|
|
for comicContainer in soup.findAll('div', {'class': self.COMIC_DIV_TAG}) :
|
|
comic = comicContainer.find('img')
|
|
if comic is not None:
|
|
filelink = self.writeImage(comic['alt'], comic)
|
|
feedset.append(
|
|
dict(title=comic['alt'], url='file://'+filelink, description=comic['alt'], content=''))
|
|
return feedset
|
|
|
|
def blogFeed(self, soup) :
|
|
feedset = []
|
|
for blogContainer in soup.findAll('div', {'class': self.BLOG_DIV_TAG}) :
|
|
blog = blogContainer.find('a', {'class':'link-blended'})
|
|
if blog is not None:
|
|
feedset.append(
|
|
dict(title=blog['title'], url=blog['href'], description=blog['title'], content=''))
|
|
return feedset
|
|
|
|
def parse_index(self):
|
|
root = self.index_to_soup(self.BASE_URL)
|
|
comics = self.comicFeed(root)
|
|
blogs = self.blogFeed(root)
|
|
return [('Comics', comics), ('Blog Entries', blogs)]
|