diff --git a/recipes/dilbert.recipe b/recipes/dilbert.recipe index eaf27aa885..52774253c5 100644 --- a/recipes/dilbert.recipe +++ b/recipes/dilbert.recipe @@ -1,42 +1,80 @@ -__license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' -''' -http://www.dilbert.com -DrMerry added cover Image 2011-11-12 -''' +#!/usr/bin/env python2 +# vim:fileencoding=utf-8 +import os +import tempfile from calibre.web.feeds.recipes import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import BeautifulSoup -import re -class DilbertBig(BasicNewsRecipe): - title = 'Dilbert' - __author__ = 'Darko Miletic and Starson17 contribution of DrMerry' - description = 'Dilbert' - reverse_article_order = True - oldest_article = 15 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - encoding = 'utf-8' - publisher = 'UNITED FEATURE SYNDICATE, INC.' - category = 'comic' +class Dilbert(BasicNewsRecipe): + title = u'Dilbert' + __author__ = 'TechnoCat' + description = 'Dilbert, by Scott Adams. Includes last three or so comics and blog entries.' + cover_url = 'http://dilbert.com/assets/dilbert-logo-4152bd0c31f7de7443b4bc90abd818da.png' + auto_cleanup = True + encoding = 'utf8' language = 'en' - cover_url = 'http://dilbert.com/mobile/mobile/dilbert.app.icon.png' + needs_subscription = False + no_stylesheets = True + oldest_article = 7 + remove_javascript = True + recursions = 0 + max_articles_per_feed = 20 + debugMessages = True + BASE_URL = 'http://dilbert.com' # Note no www. + COMIC_DIV_TAG = 'img-comic-container' + BLOG_DIV_TAG = 'media' + tempfiles = [] - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher - } + # Creates a temp file for the wrapped image url + def writeImage(self, title, imageURL) : + tempFile = tempfile.NamedTemporaryFile(delete=False) + self.tempfiles.append(tempFile) + tempFile.write(''+title+'') + tempFile.write(imageURL.prettify()) + tempFile.write('') + tempFile.flush() + tempFile.close() + return tempFile.name - feeds = [(u'Dilbert', u'http://feed.dilbert.com/dilbert/daily_strip')] + def cleanUpTempFiles(self): + for tempFile in self.tempfiles: + tempFile.close() + os.unlink(tempFile.name) - preprocess_regexps = [ - (re.compile('strip\..*\.gif', re.DOTALL | re.IGNORECASE), - lambda match: 'strip.zoom.gif') - ] + def cleanup(self): + self.cleanUpTempFiles() - def preprocess_html(self, soup): - for tag in soup.findAll(name='input'): - image = BeautifulSoup('') - return image + # Extract comic links from the soup + # Returns a list of comics (articles) as: + # { + # 'title' : article title, + # 'url' : URL of print version, + # 'date' : The publication date of the article as a string, + # 'description' : A summary of the article + # 'content' : The full article (can be an empty string). This is used by FullContentProfile + # } + def comicFeed(self, soup) : + feedset = [] + for comicContainer in soup.findAll('div', {'class': self.COMIC_DIV_TAG}) : + comic = comicContainer.find('img') + if comic is not None: + filelink = self.writeImage(comic['alt'], comic) + feedset.append( + dict(title=comic['alt'], url='file://'+filelink, description=comic['alt'], content='')) + return feedset + + def blogFeed(self, soup) : + feedset = [] + for blogContainer in soup.findAll('div', {'class': self.BLOG_DIV_TAG}) : + blog = blogContainer.find('a', {'class':'link-blended'}) + if blog is not None: + feedset.append( + dict(title=blog['title'], url=blog['href'], description=blog['title'], content='')) + return feedset + + def parse_index(self): + root = self.index_to_soup(self.BASE_URL) + comics = self.comicFeed(root) + blogs = self.blogFeed(root) + return [('Comics', comics), ('Blog Entries', blogs)]