From a7cab66b6f851d27e23d1799eecc85c6769aeca7 Mon Sep 17 00:00:00 2001
From: NiLuJe <ninuje@gmail.com>
Date: Fri, 14 Sep 2012 17:47:38 +0200
Subject: [PATCH 1/2] Tweak XKCD recipe: Add a masthead image, put the strip
 title in an h1 tag

---
 recipes/xkcd.recipe | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/recipes/xkcd.recipe b/recipes/xkcd.recipe
index 42dceda65b..2aa704992e 100644
--- a/recipes/xkcd.recipe
+++ b/recipes/xkcd.recipe
@@ -2,6 +2,8 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
 Changelog:
+2012-04-06
+Fixed empty articles, added masthead img (NiLuJe)
 2011-09-24
 Changed cover (drMerry)
 '''
@@ -13,7 +15,8 @@ import time, re
 from calibre.web.feeds.news import BasicNewsRecipe
 
 class XkcdCom(BasicNewsRecipe):
-    cover_url = 'http://imgs.xkcd.com/s/9be30a7.png'
+    cover_url = 'http://imgs.xkcd.com/static/terrible_small_logo.png'
+    masthead_url = 'http://imgs.xkcd.com/static/terrible_small_logo.png'
     title = 'xkcd'
     description = 'A webcomic of romance and math humor.'
     __author__ = 'Martin Pitt updated by DrMerry.'
@@ -21,13 +24,14 @@ class XkcdCom(BasicNewsRecipe):
 
     use_embedded_content   = False
     oldest_article = 60
-    keep_only_tags = [dict(id='middleContainer')]
-    remove_tags = [dict(name='ul'), dict(name='h3'), dict(name='br')]
+    #keep_only_tags = [dict(id='middleContainer')]
+    #remove_tags = [dict(name='ul'), dict(name='h3'), dict(name='br')]
+    keep_only_tags = [dict(id='comic')]
     no_stylesheets = True
-    # turn image bubblehelp into a paragraph
+    # turn image bubblehelp into a paragraph, and put alt in a heading
     preprocess_regexps = [
-        (re.compile(r'(<img.*title=")([^"]+)(".*>)'),
-         lambda m: '%s%s<p>%s</p>' % (m.group(1), m.group(3), m.group(2)))
+        (re.compile(r'(<img.*title=")([^"]+)(".alt=")([^"]+)(".*>)'),
+         lambda m: '<h1>%s</h1>%s%s%s<p>%s</p>' % (m.group(4), m.group(1), m.group(3), m.group(5), m.group(2)))
     ]
 
     def parse_index(self):

From ad0123a2b03c4e7cf375dd45cb6f66f93615ac93 Mon Sep 17 00:00:00 2001
From: NiLuJe <ninuje@gmail.com>
Date: Fri, 14 Sep 2012 17:49:03 +0200
Subject: [PATCH 2/2] Add a recipe for DogHouse Diaries, an online comic

---
 recipes/doghousediaries.recipe | 52 ++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)
 create mode 100644 recipes/doghousediaries.recipe

diff --git a/recipes/doghousediaries.recipe b/recipes/doghousediaries.recipe
new file mode 100644
index 0000000000..e52db094b1
--- /dev/null
+++ b/recipes/doghousediaries.recipe
@@ -0,0 +1,52 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010-2012, NiLuJe <niluje at ak-team.com>'
+
+'''
+Fetch DoghouseDiaries.
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class DoghouseDiaries(BasicNewsRecipe):
+    title = 'Doghouse Diaries'
+    description = 'A webcomic.'
+    __author__ = 'NiLuJe'
+    language = 'en'
+
+    use_embedded_content = False
+    # 14 comics per fetch (not really days... but we can't easily get the date of individual comics, short of parsing each one...)
+    oldest_article = 14
+
+    cover_url = 'http://www.thedoghousediaries.com/logos/logo3.png'
+    masthead_url = 'http://www.thedoghousediaries.com/logos/logo3.png'
+
+    keep_only_tags = [dict(name='img', attrs={'class': re.compile("comic-item*")}), dict(name='h1'), dict(name='div', attrs={'class':'entry'}), dict(name='p', id='alttext')]
+    remove_tags = [dict(name='div', attrs={'class':'pin-it-btn-wrapper'}), dict(name='span'), dict(name='div', id='wp_fb_like_button')]
+    remove_attributes = ['width', 'height']
+    no_stylesheets = True
+
+    # Turn image bubblehelp into a paragraph (NOTE: We run before the remove_tags cleanup, so we need to make sure we only parse the comic-item img, not the pinterest one pulled by the entry div)
+    preprocess_regexps = [
+        (re.compile(r'(<img.*src="http://thedoghousediaries.com/comics/.*title=")([^"]+)(".*>)'),
+         lambda m: '%s%s<p id="alttext"><strong>%s</strong></p>' % (m.group(1), m.group(3), m.group(2)))
+    ]
+
+    def parse_index(self):
+        INDEX = 'http://www.thedoghousediaries.com/'
+
+        soup = self.index_to_soup(INDEX)
+        articles = []
+        # Since the feed sucks, and there's no real archive, we use the 'Quick Archive' thingie, but we can't get the date from here, so stop after 14 comics...
+        for item in soup.findAll('option', {}, True, None, self.oldest_article+1):
+            # Skip the quick archive itself
+            if ( item['value'] != '0' ):
+                articles.append({
+                    'title': self.tag_to_string(item).encode('UTF-8'),
+                    'url': item['value'],
+                    'description': '',
+                    'content': '',
+                })
+
+        return [('Doghouse Diaries', articles)]
+