Update Dilbert

This commit is contained in:
Kovid Goyal 2016-11-25 08:04:15 +05:30
parent 9392c1972b
commit 1d618a5627

View File

@ -1,42 +1,80 @@
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
http://www.dilbert.com
DrMerry added cover Image 2011-11-12
'''
#!/usr/bin/env python2
# vim:fileencoding=utf-8
import os
import tempfile
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
import re
class DilbertBig(BasicNewsRecipe):
title = 'Dilbert'
__author__ = 'Darko Miletic and Starson17 contribution of DrMerry'
description = 'Dilbert'
reverse_article_order = True
oldest_article = 15
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
publisher = 'UNITED FEATURE SYNDICATE, INC.'
category = 'comic'
class Dilbert(BasicNewsRecipe):
title = u'Dilbert'
__author__ = 'TechnoCat'
description = 'Dilbert, by Scott Adams. Includes last three or so comics and blog entries.'
cover_url = 'http://dilbert.com/assets/dilbert-logo-4152bd0c31f7de7443b4bc90abd818da.png'
auto_cleanup = True
encoding = 'utf8'
language = 'en'
cover_url = 'http://dilbert.com/mobile/mobile/dilbert.app.icon.png'
needs_subscription = False
no_stylesheets = True
oldest_article = 7
remove_javascript = True
recursions = 0
max_articles_per_feed = 20
debugMessages = True
BASE_URL = 'http://dilbert.com' # Note no www.
COMIC_DIV_TAG = 'img-comic-container'
BLOG_DIV_TAG = 'media'
tempfiles = []
conversion_options = {
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
}
# Creates a temp file for the wrapped image url
def writeImage(self, title, imageURL) :
tempFile = tempfile.NamedTemporaryFile(delete=False)
self.tempfiles.append(tempFile)
tempFile.write('<html><head><title>'+title+'</title></head><body>')
tempFile.write(imageURL.prettify())
tempFile.write('</body></html>')
tempFile.flush()
tempFile.close()
return tempFile.name
feeds = [(u'Dilbert', u'http://feed.dilbert.com/dilbert/daily_strip')]
def cleanUpTempFiles(self):
for tempFile in self.tempfiles:
tempFile.close()
os.unlink(tempFile.name)
preprocess_regexps = [
(re.compile('strip\..*\.gif', re.DOTALL | re.IGNORECASE),
lambda match: 'strip.zoom.gif')
]
def cleanup(self):
self.cleanUpTempFiles()
def preprocess_html(self, soup):
for tag in soup.findAll(name='input'):
image = BeautifulSoup('<img src=' + tag['value'] + '></img>')
return image
# Extract comic links from the soup
# Returns a list of comics (articles) as:
# {
# 'title' : article title,
# 'url' : URL of print version,
# 'date' : The publication date of the article as a string,
# 'description' : A summary of the article
# 'content' : The full article (can be an empty string). This is used by FullContentProfile
# }
def comicFeed(self, soup) :
feedset = []
for comicContainer in soup.findAll('div', {'class': self.COMIC_DIV_TAG}) :
comic = comicContainer.find('img')
if comic is not None:
filelink = self.writeImage(comic['alt'], comic)
feedset.append(
dict(title=comic['alt'], url='file://'+filelink, description=comic['alt'], content=''))
return feedset
def blogFeed(self, soup) :
feedset = []
for blogContainer in soup.findAll('div', {'class': self.BLOG_DIV_TAG}) :
blog = blogContainer.find('a', {'class':'link-blended'})
if blog is not None:
feedset.append(
dict(title=blog['title'], url=blog['href'], description=blog['title'], content=''))
return feedset
def parse_index(self):
root = self.index_to_soup(self.BASE_URL)
comics = self.comicFeed(root)
blogs = self.blogFeed(root)
return [('Comics', comics), ('Blog Entries', blogs)]