mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Explosm by Andromeda Rabbit
This commit is contained in:
commit
0a32d53528
54
resources/recipes/explosm.recipe
Normal file
54
resources/recipes/explosm.recipe
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
class Explosm(BasicNewsRecipe):
|
||||||
|
title = u'Explosm Rotated'
|
||||||
|
__author__ = 'Andromeda Rabbit'
|
||||||
|
description = 'Explosm'
|
||||||
|
language = 'en'
|
||||||
|
use_embedded_content = False
|
||||||
|
no_stylesheets = True
|
||||||
|
oldest_article = 24
|
||||||
|
remove_javascript = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
max_articles_per_feed = 10
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Explosm Feed', u'http://feeds.feedburner.com/Explosm')
|
||||||
|
]
|
||||||
|
|
||||||
|
#match_regexps = [r'http://www.explosm.net/comics/.*']
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='img', attrs={'alt':'Cyanide and Happiness, a daily webcomic'})]
|
||||||
|
remove_tags = [dict(name='div'), dict(name='span'), dict(name='table'), dict(name='br'), dict(name='nobr'), dict(name='a'), dict(name='b')]
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||||
|
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||||
|
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||||
|
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}'''
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
return 'http://cdn.shopify.com/s/files/1/0059/1872/products/cyanidetitle_large.jpg?1295846286'
|
||||||
|
|
||||||
|
def parse_feeds(self):
|
||||||
|
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||||
|
|
||||||
|
for curfeed in feeds:
|
||||||
|
delList = []
|
||||||
|
for a,curarticle in enumerate(curfeed.articles):
|
||||||
|
if re.search(r'http://www.explosm.net/comics', curarticle.url) == None:
|
||||||
|
delList.append(curarticle)
|
||||||
|
if len(delList)>0:
|
||||||
|
for d in delList:
|
||||||
|
index = curfeed.articles.index(d)
|
||||||
|
curfeed.articles[index:index+1] = []
|
||||||
|
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
def skip_ad_pages(self, soup):
|
||||||
|
# Skip ad pages served before actual article
|
||||||
|
skip_tag = soup.find(name='img', attrs={'alt':'Cyanide and Happiness, a daily webcomic'})
|
||||||
|
if skip_tag is None:
|
||||||
|
return soup
|
||||||
|
return None
|
@ -222,6 +222,8 @@ class TXTMLizer(object):
|
|||||||
# Scene breaks.
|
# Scene breaks.
|
||||||
if tag == 'hr':
|
if tag == 'hr':
|
||||||
text.append('\n\n* * *\n\n')
|
text.append('\n\n* * *\n\n')
|
||||||
|
elif style['margin-top']:
|
||||||
|
text.append('\n\n' + '\n' * round(style['margin-top']))
|
||||||
|
|
||||||
# Process tags that contain text.
|
# Process tags that contain text.
|
||||||
if hasattr(elem, 'text') and elem.text:
|
if hasattr(elem, 'text') and elem.text:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user