mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update Dilbert
This commit is contained in:
parent
9392c1972b
commit
1d618a5627
@ -1,42 +1,80 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
http://www.dilbert.com
|
||||
DrMerry added cover Image 2011-11-12
|
||||
'''
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
import re
|
||||
|
||||
|
||||
class DilbertBig(BasicNewsRecipe):
|
||||
title = 'Dilbert'
|
||||
__author__ = 'Darko Miletic and Starson17 contribution of DrMerry'
|
||||
description = 'Dilbert'
|
||||
reverse_article_order = True
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
publisher = 'UNITED FEATURE SYNDICATE, INC.'
|
||||
category = 'comic'
|
||||
class Dilbert(BasicNewsRecipe):
|
||||
title = u'Dilbert'
|
||||
__author__ = 'TechnoCat'
|
||||
description = 'Dilbert, by Scott Adams. Includes last three or so comics and blog entries.'
|
||||
cover_url = 'http://dilbert.com/assets/dilbert-logo-4152bd0c31f7de7443b4bc90abd818da.png'
|
||||
auto_cleanup = True
|
||||
encoding = 'utf8'
|
||||
language = 'en'
|
||||
cover_url = 'http://dilbert.com/mobile/mobile/dilbert.app.icon.png'
|
||||
needs_subscription = False
|
||||
no_stylesheets = True
|
||||
oldest_article = 7
|
||||
remove_javascript = True
|
||||
recursions = 0
|
||||
max_articles_per_feed = 20
|
||||
debugMessages = True
|
||||
BASE_URL = 'http://dilbert.com' # Note no www.
|
||||
COMIC_DIV_TAG = 'img-comic-container'
|
||||
BLOG_DIV_TAG = 'media'
|
||||
tempfiles = []
|
||||
|
||||
conversion_options = {
|
||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||
}
|
||||
# Creates a temp file for the wrapped image url
|
||||
def writeImage(self, title, imageURL) :
|
||||
tempFile = tempfile.NamedTemporaryFile(delete=False)
|
||||
self.tempfiles.append(tempFile)
|
||||
tempFile.write('<html><head><title>'+title+'</title></head><body>')
|
||||
tempFile.write(imageURL.prettify())
|
||||
tempFile.write('</body></html>')
|
||||
tempFile.flush()
|
||||
tempFile.close()
|
||||
return tempFile.name
|
||||
|
||||
feeds = [(u'Dilbert', u'http://feed.dilbert.com/dilbert/daily_strip')]
|
||||
def cleanUpTempFiles(self):
|
||||
for tempFile in self.tempfiles:
|
||||
tempFile.close()
|
||||
os.unlink(tempFile.name)
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile('strip\..*\.gif', re.DOTALL | re.IGNORECASE),
|
||||
lambda match: 'strip.zoom.gif')
|
||||
]
|
||||
def cleanup(self):
|
||||
self.cleanUpTempFiles()
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for tag in soup.findAll(name='input'):
|
||||
image = BeautifulSoup('<img src=' + tag['value'] + '></img>')
|
||||
return image
|
||||
# Extract comic links from the soup
|
||||
# Returns a list of comics (articles) as:
|
||||
# {
|
||||
# 'title' : article title,
|
||||
# 'url' : URL of print version,
|
||||
# 'date' : The publication date of the article as a string,
|
||||
# 'description' : A summary of the article
|
||||
# 'content' : The full article (can be an empty string). This is used by FullContentProfile
|
||||
# }
|
||||
def comicFeed(self, soup) :
|
||||
feedset = []
|
||||
for comicContainer in soup.findAll('div', {'class': self.COMIC_DIV_TAG}) :
|
||||
comic = comicContainer.find('img')
|
||||
if comic is not None:
|
||||
filelink = self.writeImage(comic['alt'], comic)
|
||||
feedset.append(
|
||||
dict(title=comic['alt'], url='file://'+filelink, description=comic['alt'], content=''))
|
||||
return feedset
|
||||
|
||||
def blogFeed(self, soup) :
|
||||
feedset = []
|
||||
for blogContainer in soup.findAll('div', {'class': self.BLOG_DIV_TAG}) :
|
||||
blog = blogContainer.find('a', {'class':'link-blended'})
|
||||
if blog is not None:
|
||||
feedset.append(
|
||||
dict(title=blog['title'], url=blog['href'], description=blog['title'], content=''))
|
||||
return feedset
|
||||
|
||||
def parse_index(self):
|
||||
root = self.index_to_soup(self.BASE_URL)
|
||||
comics = self.comicFeed(root)
|
||||
blogs = self.blogFeed(root)
|
||||
return [('Comics', comics), ('Blog Entries', blogs)]
|
||||
|
Loading…
x
Reference in New Issue
Block a user