Fix Dilbert feed for new Dilbert website

This commit is contained in:
Kovid Goyal 2008-01-29 04:43:22 +00:00
parent 178936b977
commit 997367ed56
2 changed files with 48 additions and 19 deletions

View File

@ -15,7 +15,7 @@
'''
'''
import tempfile, time, calendar, re, operator
import tempfile, time, calendar, re, operator, atexit, shutil, os
from htmlentitydefs import name2codepoint
from libprs500 import __appname__, iswindows, browser
@ -100,15 +100,10 @@ class DefaultProfile(object):
self.url = 'file:'+ ('' if iswindows else '//') + self.build_index()
except NotImplementedError:
self.url = None
def __del__(self):
import os, shutil
if os.path.isdir(self.temp_dir):
shutil.rmtree(self.temp_dir)
atexit.register(cleanup, self.temp_dir)
def build_index(self):
'''Build an RSS based index.html'''
import os
articles = self.parse_feeds()
@ -168,6 +163,8 @@ class DefaultProfile(object):
'''
if not tag:
return ''
if isinstance(tag, basestring):
return tag
strings = []
for item in tag.contents:
if isinstance(item, (NavigableString, CData)):
@ -180,6 +177,19 @@ class DefaultProfile(object):
strings.append(item['alt'])
return u''.join(strings)
def get_article_url(self, item):
'''
Return the article URL given an item Tag from a feed, or None if no valid URL is found
@param: A BeautifulSoup Tag instance corresponding to the <item> tag from a feed.
'''
url = None
for element in self.url_search_order:
url = item.find(element)
if url:
break
return url
def parse_feeds(self, require_url=True):
'''
Create list of articles from a list of feeds.
@ -220,15 +230,14 @@ class DefaultProfile(object):
continue
pubdate = self.tag_to_string(pubdate)
pubdate = pubdate.replace('+0000', 'GMT')
for element in self.url_search_order:
url = item.find(element)
if url:
break
if require_url and (not url or not url.string):
url = self.get_article_url(item)
url = self.tag_to_string(url)
if require_url and not url:
self.logger.debug('Skipping article as it does not have a link url')
continue
url = self.tag_to_string(url)
content = item.find('content:encoded')
if not content:
@ -362,7 +371,6 @@ class FullContentProfile(DefaultProfile):
def build_index(self):
'''Build an RSS based index.html'''
import os
articles = self.parse_feeds(require_url=False)
def build_sub_index(title, items):
@ -449,3 +457,10 @@ def create_class(src):
if item.__name__ not in ['DefaultProfile', 'FullContentProfile']:
return item
def cleanup(tdir):
try:
if os.path.isdir(tdir):
shutil.rmtree(tdir)
except:
#print tdir
pass

View File

@ -19,7 +19,7 @@
'''
Fetch Dilbert.
'''
import os
from libprs500.ebooks.lrf.web.profiles import DefaultProfile
@ -27,7 +27,7 @@ class Dilbert(DefaultProfile):
title = 'Dilbert'
timefmt = ' [%d %b %Y]'
max_recursions = 1
max_recursions = 2
max_articles_per_feed = 6
html_description = True
no_stylesheets = True
@ -35,3 +35,17 @@ class Dilbert(DefaultProfile):
def get_feeds(self):
return [ ('Dilbert', 'http://feeds.feedburner.com/tapestrydilbert') ]
def get_article_url(self, item):
return item.find('enclosure')['url']
def build_index(self):
index = os.path.join(self.temp_dir, 'index.html')
articles = list(self.parse_feeds(require_url=False).values())[0]
res = ''
for item in articles:
res += '<h3>%s</h3><img style="page-break-after:always" src="%s" />\n'%(item['title'], item['url'])
res = '<html><body><h1>Dilbert</h1>%s</body></html'%res
open(index, 'wb').write(res)
return index