mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix Dilbert feed for new Dilbert website
This commit is contained in:
parent
178936b977
commit
997367ed56
@ -15,7 +15,7 @@
|
|||||||
'''
|
'''
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import tempfile, time, calendar, re, operator
|
import tempfile, time, calendar, re, operator, atexit, shutil, os
|
||||||
from htmlentitydefs import name2codepoint
|
from htmlentitydefs import name2codepoint
|
||||||
|
|
||||||
from libprs500 import __appname__, iswindows, browser
|
from libprs500 import __appname__, iswindows, browser
|
||||||
@ -100,15 +100,10 @@ class DefaultProfile(object):
|
|||||||
self.url = 'file:'+ ('' if iswindows else '//') + self.build_index()
|
self.url = 'file:'+ ('' if iswindows else '//') + self.build_index()
|
||||||
except NotImplementedError:
|
except NotImplementedError:
|
||||||
self.url = None
|
self.url = None
|
||||||
|
atexit.register(cleanup, self.temp_dir)
|
||||||
def __del__(self):
|
|
||||||
import os, shutil
|
|
||||||
if os.path.isdir(self.temp_dir):
|
|
||||||
shutil.rmtree(self.temp_dir)
|
|
||||||
|
|
||||||
def build_index(self):
|
def build_index(self):
|
||||||
'''Build an RSS based index.html'''
|
'''Build an RSS based index.html'''
|
||||||
import os
|
|
||||||
articles = self.parse_feeds()
|
articles = self.parse_feeds()
|
||||||
|
|
||||||
|
|
||||||
@ -168,6 +163,8 @@ class DefaultProfile(object):
|
|||||||
'''
|
'''
|
||||||
if not tag:
|
if not tag:
|
||||||
return ''
|
return ''
|
||||||
|
if isinstance(tag, basestring):
|
||||||
|
return tag
|
||||||
strings = []
|
strings = []
|
||||||
for item in tag.contents:
|
for item in tag.contents:
|
||||||
if isinstance(item, (NavigableString, CData)):
|
if isinstance(item, (NavigableString, CData)):
|
||||||
@ -180,6 +177,19 @@ class DefaultProfile(object):
|
|||||||
strings.append(item['alt'])
|
strings.append(item['alt'])
|
||||||
return u''.join(strings)
|
return u''.join(strings)
|
||||||
|
|
||||||
|
def get_article_url(self, item):
|
||||||
|
'''
|
||||||
|
Return the article URL given an item Tag from a feed, or None if no valid URL is found
|
||||||
|
@param: A BeautifulSoup Tag instance corresponding to the <item> tag from a feed.
|
||||||
|
'''
|
||||||
|
url = None
|
||||||
|
for element in self.url_search_order:
|
||||||
|
url = item.find(element)
|
||||||
|
if url:
|
||||||
|
break
|
||||||
|
return url
|
||||||
|
|
||||||
|
|
||||||
def parse_feeds(self, require_url=True):
|
def parse_feeds(self, require_url=True):
|
||||||
'''
|
'''
|
||||||
Create list of articles from a list of feeds.
|
Create list of articles from a list of feeds.
|
||||||
@ -220,15 +230,14 @@ class DefaultProfile(object):
|
|||||||
continue
|
continue
|
||||||
pubdate = self.tag_to_string(pubdate)
|
pubdate = self.tag_to_string(pubdate)
|
||||||
pubdate = pubdate.replace('+0000', 'GMT')
|
pubdate = pubdate.replace('+0000', 'GMT')
|
||||||
for element in self.url_search_order:
|
|
||||||
url = item.find(element)
|
url = self.get_article_url(item)
|
||||||
if url:
|
|
||||||
break
|
|
||||||
|
url = self.tag_to_string(url)
|
||||||
if require_url and (not url or not url.string):
|
if require_url and not url:
|
||||||
self.logger.debug('Skipping article as it does not have a link url')
|
self.logger.debug('Skipping article as it does not have a link url')
|
||||||
continue
|
continue
|
||||||
url = self.tag_to_string(url)
|
|
||||||
|
|
||||||
content = item.find('content:encoded')
|
content = item.find('content:encoded')
|
||||||
if not content:
|
if not content:
|
||||||
@ -362,7 +371,6 @@ class FullContentProfile(DefaultProfile):
|
|||||||
|
|
||||||
def build_index(self):
|
def build_index(self):
|
||||||
'''Build an RSS based index.html'''
|
'''Build an RSS based index.html'''
|
||||||
import os
|
|
||||||
articles = self.parse_feeds(require_url=False)
|
articles = self.parse_feeds(require_url=False)
|
||||||
|
|
||||||
def build_sub_index(title, items):
|
def build_sub_index(title, items):
|
||||||
@ -448,4 +456,11 @@ def create_class(src):
|
|||||||
if hasattr(item, 'build_index'):
|
if hasattr(item, 'build_index'):
|
||||||
if item.__name__ not in ['DefaultProfile', 'FullContentProfile']:
|
if item.__name__ not in ['DefaultProfile', 'FullContentProfile']:
|
||||||
return item
|
return item
|
||||||
|
|
||||||
|
def cleanup(tdir):
|
||||||
|
try:
|
||||||
|
if os.path.isdir(tdir):
|
||||||
|
shutil.rmtree(tdir)
|
||||||
|
except:
|
||||||
|
#print tdir
|
||||||
|
pass
|
@ -19,7 +19,7 @@
|
|||||||
'''
|
'''
|
||||||
Fetch Dilbert.
|
Fetch Dilbert.
|
||||||
'''
|
'''
|
||||||
|
import os
|
||||||
|
|
||||||
from libprs500.ebooks.lrf.web.profiles import DefaultProfile
|
from libprs500.ebooks.lrf.web.profiles import DefaultProfile
|
||||||
|
|
||||||
@ -27,11 +27,25 @@ class Dilbert(DefaultProfile):
|
|||||||
|
|
||||||
title = 'Dilbert'
|
title = 'Dilbert'
|
||||||
timefmt = ' [%d %b %Y]'
|
timefmt = ' [%d %b %Y]'
|
||||||
max_recursions = 1
|
max_recursions = 2
|
||||||
max_articles_per_feed = 6
|
max_articles_per_feed = 6
|
||||||
html_description = True
|
html_description = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
def get_feeds(self):
|
def get_feeds(self):
|
||||||
return [ ('Dilbert', 'http://feeds.feedburner.com/tapestrydilbert') ]
|
return [ ('Dilbert', 'http://feeds.feedburner.com/tapestrydilbert') ]
|
||||||
|
|
||||||
|
def get_article_url(self, item):
|
||||||
|
return item.find('enclosure')['url']
|
||||||
|
|
||||||
|
def build_index(self):
|
||||||
|
index = os.path.join(self.temp_dir, 'index.html')
|
||||||
|
articles = list(self.parse_feeds(require_url=False).values())[0]
|
||||||
|
res = ''
|
||||||
|
for item in articles:
|
||||||
|
res += '<h3>%s</h3><img style="page-break-after:always" src="%s" />\n'%(item['title'], item['url'])
|
||||||
|
res = '<html><body><h1>Dilbert</h1>%s</body></html'%res
|
||||||
|
open(index, 'wb').write(res)
|
||||||
|
return index
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user