Explosm by Andromeda Rabbit

2025-07-09 03:04:10 -04:00 · 2011-01-30 08:49:51 -07:00 · 2011-01-30 08:49:51 -07:00 · 0a32d53528
commit 0a32d53528
parent 8eaa9137f7 76837bbd7e
2 changed files with 56 additions and 0 deletions
--- a/resources/recipes/explosm.recipe
+++ b/resources/recipes/explosm.recipe
@ -0,0 +1,54 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class Explosm(BasicNewsRecipe):
    title              = u'Explosm Rotated'
    __author__        = 'Andromeda Rabbit'
    description      = 'Explosm'
    language            = 'en'
    use_embedded_content = False
    no_stylesheets    = True
    oldest_article    = 24
    remove_javascript   = True
    remove_empty_feeds  = True
    max_articles_per_feed = 10
    feeds = [
             (u'Explosm Feed', u'http://feeds.feedburner.com/Explosm')
             ]
    #match_regexps = [r'http://www.explosm.net/comics/.*']
    keep_only_tags   = [dict(name='img', attrs={'alt':'Cyanide and Happiness, a daily webcomic'})]
    remove_tags = [dict(name='div'), dict(name='span'), dict(name='table'), dict(name='br'), dict(name='nobr'), dict(name='a'), dict(name='b')]
    extra_css = '''
                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}'''
    def get_cover_url(self):
        return 'http://cdn.shopify.com/s/files/1/0059/1872/products/cyanidetitle_large.jpg?1295846286'
    def parse_feeds(self):
        feeds = BasicNewsRecipe.parse_feeds(self)
        for curfeed in feeds:
            delList = []
            for a,curarticle in enumerate(curfeed.articles):
                if re.search(r'http://www.explosm.net/comics', curarticle.url) == None:
                    delList.append(curarticle)
            if len(delList)>0:
                for d in delList:
                    index = curfeed.articles.index(d)
                    curfeed.articles[index:index+1] = []
        return feeds
    def skip_ad_pages(self, soup):
        # Skip ad pages served before actual article
        skip_tag = soup.find(name='img', attrs={'alt':'Cyanide and Happiness, a daily webcomic'})
        if skip_tag is None:
            return soup
        return None
--- a/src/calibre/ebooks/txt/txtml.py
+++ b/src/calibre/ebooks/txt/txtml.py
@ -222,6 +222,8 @@ class TXTMLizer(object):
        # Scene breaks.
        if tag == 'hr':
            text.append('\n\n* * *\n\n')
        elif style['margin-top']:
            text.append('\n\n' + '\n' * round(style['margin-top']))
        # Process tags that contain text.
        if hasattr(elem, 'text') and elem.text: