mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update xkcd and add doghousediaries by NiLuJe
This commit is contained in:
commit
9884e55a3b
52
recipes/doghousediaries.recipe
Normal file
52
recipes/doghousediaries.recipe
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010-2012, NiLuJe <niluje at ak-team.com>'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Fetch DoghouseDiaries.
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class DoghouseDiaries(BasicNewsRecipe):
|
||||||
|
title = 'Doghouse Diaries'
|
||||||
|
description = 'A webcomic.'
|
||||||
|
__author__ = 'NiLuJe'
|
||||||
|
language = 'en'
|
||||||
|
|
||||||
|
use_embedded_content = False
|
||||||
|
# 14 comics per fetch (not really days... but we can't easily get the date of individual comics, short of parsing each one...)
|
||||||
|
oldest_article = 14
|
||||||
|
|
||||||
|
cover_url = 'http://www.thedoghousediaries.com/logos/logo3.png'
|
||||||
|
masthead_url = 'http://www.thedoghousediaries.com/logos/logo3.png'
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='img', attrs={'class': re.compile("comic-item*")}), dict(name='h1'), dict(name='div', attrs={'class':'entry'}), dict(name='p', id='alttext')]
|
||||||
|
remove_tags = [dict(name='div', attrs={'class':'pin-it-btn-wrapper'}), dict(name='span'), dict(name='div', id='wp_fb_like_button')]
|
||||||
|
remove_attributes = ['width', 'height']
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
|
# Turn image bubblehelp into a paragraph (NOTE: We run before the remove_tags cleanup, so we need to make sure we only parse the comic-item img, not the pinterest one pulled by the entry div)
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'(<img.*src="http://thedoghousediaries.com/comics/.*title=")([^"]+)(".*>)'),
|
||||||
|
lambda m: '%s%s<p id="alttext"><strong>%s</strong></p>' % (m.group(1), m.group(3), m.group(2)))
|
||||||
|
]
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
INDEX = 'http://www.thedoghousediaries.com/'
|
||||||
|
|
||||||
|
soup = self.index_to_soup(INDEX)
|
||||||
|
articles = []
|
||||||
|
# Since the feed sucks, and there's no real archive, we use the 'Quick Archive' thingie, but we can't get the date from here, so stop after 14 comics...
|
||||||
|
for item in soup.findAll('option', {}, True, None, self.oldest_article+1):
|
||||||
|
# Skip the quick archive itself
|
||||||
|
if ( item['value'] != '0' ):
|
||||||
|
articles.append({
|
||||||
|
'title': self.tag_to_string(item).encode('UTF-8'),
|
||||||
|
'url': item['value'],
|
||||||
|
'description': '',
|
||||||
|
'content': '',
|
||||||
|
})
|
||||||
|
|
||||||
|
return [('Doghouse Diaries', articles)]
|
||||||
|
|
@ -2,6 +2,8 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
'''
|
'''
|
||||||
Changelog:
|
Changelog:
|
||||||
|
2012-04-06
|
||||||
|
Fixed empty articles, added masthead img (NiLuJe)
|
||||||
2011-09-24
|
2011-09-24
|
||||||
Changed cover (drMerry)
|
Changed cover (drMerry)
|
||||||
'''
|
'''
|
||||||
@ -13,7 +15,8 @@ import time, re
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class XkcdCom(BasicNewsRecipe):
|
class XkcdCom(BasicNewsRecipe):
|
||||||
cover_url = 'http://imgs.xkcd.com/s/9be30a7.png'
|
cover_url = 'http://imgs.xkcd.com/static/terrible_small_logo.png'
|
||||||
|
masthead_url = 'http://imgs.xkcd.com/static/terrible_small_logo.png'
|
||||||
title = 'xkcd'
|
title = 'xkcd'
|
||||||
description = 'A webcomic of romance and math humor.'
|
description = 'A webcomic of romance and math humor.'
|
||||||
__author__ = 'Martin Pitt updated by DrMerry.'
|
__author__ = 'Martin Pitt updated by DrMerry.'
|
||||||
@ -21,13 +24,14 @@ class XkcdCom(BasicNewsRecipe):
|
|||||||
|
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
oldest_article = 60
|
oldest_article = 60
|
||||||
keep_only_tags = [dict(id='middleContainer')]
|
#keep_only_tags = [dict(id='middleContainer')]
|
||||||
remove_tags = [dict(name='ul'), dict(name='h3'), dict(name='br')]
|
#remove_tags = [dict(name='ul'), dict(name='h3'), dict(name='br')]
|
||||||
|
keep_only_tags = [dict(id='comic')]
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
# turn image bubblehelp into a paragraph
|
# turn image bubblehelp into a paragraph, and put alt in a heading
|
||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
(re.compile(r'(<img.*title=")([^"]+)(".*>)'),
|
(re.compile(r'(<img.*title=")([^"]+)(".alt=")([^"]+)(".*>)'),
|
||||||
lambda m: '%s%s<p>%s</p>' % (m.group(1), m.group(3), m.group(2)))
|
lambda m: '<h1>%s</h1>%s%s%s<p>%s</p>' % (m.group(4), m.group(1), m.group(3), m.group(5), m.group(2)))
|
||||||
]
|
]
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user