mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Explosm by Andromeda Rabbit
This commit is contained in:
commit
0a32d53528
54
resources/recipes/explosm.recipe
Normal file
54
resources/recipes/explosm.recipe
Normal file
@ -0,0 +1,54 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class Explosm(BasicNewsRecipe):
|
||||
title = u'Explosm Rotated'
|
||||
__author__ = 'Andromeda Rabbit'
|
||||
description = 'Explosm'
|
||||
language = 'en'
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
oldest_article = 24
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
max_articles_per_feed = 10
|
||||
|
||||
feeds = [
|
||||
(u'Explosm Feed', u'http://feeds.feedburner.com/Explosm')
|
||||
]
|
||||
|
||||
#match_regexps = [r'http://www.explosm.net/comics/.*']
|
||||
|
||||
keep_only_tags = [dict(name='img', attrs={'alt':'Cyanide and Happiness, a daily webcomic'})]
|
||||
remove_tags = [dict(name='div'), dict(name='span'), dict(name='table'), dict(name='br'), dict(name='nobr'), dict(name='a'), dict(name='b')]
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}'''
|
||||
|
||||
def get_cover_url(self):
|
||||
return 'http://cdn.shopify.com/s/files/1/0059/1872/products/cyanidetitle_large.jpg?1295846286'
|
||||
|
||||
def parse_feeds(self):
|
||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||
|
||||
for curfeed in feeds:
|
||||
delList = []
|
||||
for a,curarticle in enumerate(curfeed.articles):
|
||||
if re.search(r'http://www.explosm.net/comics', curarticle.url) == None:
|
||||
delList.append(curarticle)
|
||||
if len(delList)>0:
|
||||
for d in delList:
|
||||
index = curfeed.articles.index(d)
|
||||
curfeed.articles[index:index+1] = []
|
||||
|
||||
return feeds
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
# Skip ad pages served before actual article
|
||||
skip_tag = soup.find(name='img', attrs={'alt':'Cyanide and Happiness, a daily webcomic'})
|
||||
if skip_tag is None:
|
||||
return soup
|
||||
return None
|
@ -222,6 +222,8 @@ class TXTMLizer(object):
|
||||
# Scene breaks.
|
||||
if tag == 'hr':
|
||||
text.append('\n\n* * *\n\n')
|
||||
elif style['margin-top']:
|
||||
text.append('\n\n' + '\n' * round(style['margin-top']))
|
||||
|
||||
# Process tags that contain text.
|
||||
if hasattr(elem, 'text') and elem.text:
|
||||
|
Loading…
x
Reference in New Issue
Block a user