diff --git a/resources/recipes/explosm.recipe b/resources/recipes/explosm.recipe new file mode 100644 index 0000000000..8cdff609cb --- /dev/null +++ b/resources/recipes/explosm.recipe @@ -0,0 +1,54 @@ +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class Explosm(BasicNewsRecipe): + title = u'Explosm Rotated' + __author__ = 'Andromeda Rabbit' + description = 'Explosm' + language = 'en' + use_embedded_content = False + no_stylesheets = True + oldest_article = 24 + remove_javascript = True + remove_empty_feeds = True + max_articles_per_feed = 10 + + feeds = [ + (u'Explosm Feed', u'http://feeds.feedburner.com/Explosm') + ] + + #match_regexps = [r'http://www.explosm.net/comics/.*'] + + keep_only_tags = [dict(name='img', attrs={'alt':'Cyanide and Happiness, a daily webcomic'})] + remove_tags = [dict(name='div'), dict(name='span'), dict(name='table'), dict(name='br'), dict(name='nobr'), dict(name='a'), dict(name='b')] + + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;}''' + + def get_cover_url(self): + return 'http://cdn.shopify.com/s/files/1/0059/1872/products/cyanidetitle_large.jpg?1295846286' + + def parse_feeds(self): + feeds = BasicNewsRecipe.parse_feeds(self) + + for curfeed in feeds: + delList = [] + for a,curarticle in enumerate(curfeed.articles): + if re.search(r'http://www.explosm.net/comics', curarticle.url) == None: + delList.append(curarticle) + if len(delList)>0: + for d in delList: + index = curfeed.articles.index(d) + curfeed.articles[index:index+1] = [] + + return feeds + + def skip_ad_pages(self, soup): + # Skip ad pages served before actual article + skip_tag = soup.find(name='img', attrs={'alt':'Cyanide and Happiness, a daily webcomic'}) + if skip_tag is None: + return soup + return None diff --git a/src/calibre/ebooks/txt/txtml.py b/src/calibre/ebooks/txt/txtml.py index 00992a8612..bf33e5540a 100644 --- a/src/calibre/ebooks/txt/txtml.py +++ b/src/calibre/ebooks/txt/txtml.py @@ -222,6 +222,8 @@ class TXTMLizer(object): # Scene breaks. if tag == 'hr': text.append('\n\n* * *\n\n') + elif style['margin-top']: + text.append('\n\n' + '\n' * round(style['margin-top'])) # Process tags that contain text. if hasattr(elem, 'text') and elem.text: