Explosm by Andromeda Rabbit

2026-01-06 12:10:18 -05:00 · 2011-01-30 08:49:51 -07:00 · 2011-01-30 08:49:51 -07:00 · 0a32d53528
commit 0a32d53528
parent 8eaa9137f7 76837bbd7e
2 changed files with 56 additions and 0 deletions
--- a/resources/recipes/explosm.recipe
+++ b/resources/recipes/explosm.recipe
@ -0,0 +1,54 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class Explosm(BasicNewsRecipe):
+    title              = u'Explosm Rotated'
+    __author__        = 'Andromeda Rabbit'
+    description      = 'Explosm'
+    language            = 'en'
+    use_embedded_content = False
+    no_stylesheets    = True
+    oldest_article    = 24
+    remove_javascript   = True
+    remove_empty_feeds  = True
+    max_articles_per_feed = 10
+
+    feeds = [
+             (u'Explosm Feed', u'http://feeds.feedburner.com/Explosm')
+             ]
+
+    #match_regexps = [r'http://www.explosm.net/comics/.*']
+
+    keep_only_tags   = [dict(name='img', attrs={'alt':'Cyanide and Happiness, a daily webcomic'})]
+    remove_tags = [dict(name='div'), dict(name='span'), dict(name='table'), dict(name='br'), dict(name='nobr'), dict(name='a'), dict(name='b')]
+
+    extra_css = '''
+                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}'''
+
+    def get_cover_url(self):
+        return 'http://cdn.shopify.com/s/files/1/0059/1872/products/cyanidetitle_large.jpg?1295846286'
+
+    def parse_feeds(self):
+        feeds = BasicNewsRecipe.parse_feeds(self)
+
+        for curfeed in feeds:
+            delList = []
+            for a,curarticle in enumerate(curfeed.articles):
+                if re.search(r'http://www.explosm.net/comics', curarticle.url) == None:
+                    delList.append(curarticle)
+            if len(delList)>0:
+                for d in delList:
+                    index = curfeed.articles.index(d)
+                    curfeed.articles[index:index+1] = []
+
+        return feeds
+
+    def skip_ad_pages(self, soup):
+        # Skip ad pages served before actual article
+        skip_tag = soup.find(name='img', attrs={'alt':'Cyanide and Happiness, a daily webcomic'})
+        if skip_tag is None:
+            return soup
+        return None
--- a/src/calibre/ebooks/txt/txtml.py
+++ b/src/calibre/ebooks/txt/txtml.py
@ -222,6 +222,8 @@ class TXTMLizer(object):
        # Scene breaks.
        if tag == 'hr':
            text.append('\n\n* * *\n\n')
+        elif style['margin-top']:
+            text.append('\n\n' + '\n' * round(style['margin-top']))

        # Process tags that contain text.
        if hasattr(elem, 'text') and elem.text: