Update Dilbert

2025-08-11 09:13:57 -04:00 · 2016-11-25 08:04:15 +05:30 · 2016-11-25 08:04:15 +05:30 · 1d618a5627
commit 1d618a5627
parent 9392c1972b
1 changed files with 71 additions and 33 deletions
--- a/recipes/dilbert.recipe
+++ b/recipes/dilbert.recipe
@ -1,42 +1,80 @@
-__license__ = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
-'''
-http://www.dilbert.com
-DrMerry added cover Image 2011-11-12
-'''
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8

+import os
+import tempfile
 from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
-import re


-class DilbertBig(BasicNewsRecipe):
-    title = 'Dilbert'
-    __author__ = 'Darko Miletic and Starson17 contribution of DrMerry'
-    description = 'Dilbert'
-    reverse_article_order = True
-    oldest_article = 15
-    max_articles_per_feed = 100
-    no_stylesheets = True
-    use_embedded_content = False
-    encoding = 'utf-8'
-    publisher = 'UNITED FEATURE SYNDICATE, INC.'
-    category = 'comic'
+class Dilbert(BasicNewsRecipe):
+    title = u'Dilbert'
+    __author__ = 'TechnoCat'
+    description = 'Dilbert, by Scott Adams. Includes last three or so comics and blog entries.'
+    cover_url = 'http://dilbert.com/assets/dilbert-logo-4152bd0c31f7de7443b4bc90abd818da.png'
+    auto_cleanup = True
+    encoding = 'utf8'
    language = 'en'
-    cover_url = 'http://dilbert.com/mobile/mobile/dilbert.app.icon.png'
+    needs_subscription = False
+    no_stylesheets = True
+    oldest_article = 7
+    remove_javascript = True
+    recursions = 0
+    max_articles_per_feed = 20
+    debugMessages = True
+    BASE_URL = 'http://dilbert.com'  # Note no www.
+    COMIC_DIV_TAG = 'img-comic-container'
+    BLOG_DIV_TAG  = 'media'
+    tempfiles = []

-    conversion_options = {
-        'comments': description, 'tags': category, 'language': language, 'publisher': publisher
-    }
+    # Creates a temp file for the wrapped image url
+    def writeImage(self, title, imageURL) :
+        tempFile = tempfile.NamedTemporaryFile(delete=False)
+        self.tempfiles.append(tempFile)
+        tempFile.write('<html><head><title>'+title+'</title></head><body>')
+        tempFile.write(imageURL.prettify())
+        tempFile.write('</body></html>')
+        tempFile.flush()
+        tempFile.close()
+        return tempFile.name

-    feeds = [(u'Dilbert', u'http://feed.dilbert.com/dilbert/daily_strip')]
+    def cleanUpTempFiles(self):
+        for tempFile in self.tempfiles:
+            tempFile.close()
+            os.unlink(tempFile.name)

-    preprocess_regexps = [
-        (re.compile('strip\..*\.gif', re.DOTALL | re.IGNORECASE),
-         lambda match: 'strip.zoom.gif')
-    ]
+    def cleanup(self):
+        self.cleanUpTempFiles()

-    def preprocess_html(self, soup):
-        for tag in soup.findAll(name='input'):
-            image = BeautifulSoup('<img src=' + tag['value'] + '></img>')
-        return image
+    # Extract comic links from the soup
+    # Returns a list of comics (articles) as:
+    # {
+    # 'title'       : article title,
+    # 'url'         : URL of print version,
+    # 'date'        : The publication date of the article as a string,
+    # 'description' : A summary of the article
+    # 'content'     : The full article (can be an empty string). This is used by FullContentProfile
+    # }
+    def comicFeed(self, soup) :
+        feedset = []
+        for comicContainer in soup.findAll('div', {'class': self.COMIC_DIV_TAG}) :
+            comic = comicContainer.find('img')
+            if comic is not None:
+                filelink = self.writeImage(comic['alt'], comic)
+                feedset.append(
+                    dict(title=comic['alt'], url='file://'+filelink,  description=comic['alt'], content=''))
+        return feedset
+
+    def blogFeed(self, soup) :
+        feedset = []
+        for blogContainer in soup.findAll('div', {'class': self.BLOG_DIV_TAG}) :
+            blog = blogContainer.find('a', {'class':'link-blended'})
+            if blog is not None:
+                feedset.append(
+                    dict(title=blog['title'], url=blog['href'], description=blog['title'], content=''))
+        return feedset
+
+    def parse_index(self):
+        root = self.index_to_soup(self.BASE_URL)
+        comics = self.comicFeed(root)
+        blogs =  self.blogFeed(root)
+        return [('Comics', comics), ('Blog Entries', blogs)]