calibre/resources/recipes/gawker.recipe
2010-08-23 09:54:42 -06:00

45 lines
1.5 KiB
Plaintext

__license__ = 'GPL v3'
__copyright__ = '2010, NA'
'''
gawker.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Gawker(BasicNewsRecipe):
title = 'Gawker'
__author__ = 'NA'
description = "Gawker, Gossip from Manhattan and the Beltway to Hollywood and the Valley."
publisher = 'gawker.com'
category = 'news, gossip'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
language = 'en'
masthead_url = 'http://cache.gawkerassets.com/assets/gawker.com/img/logo.png'
extra_css = '''
body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif}
img{margin-bottom: 1em}
h1{font-family :Arial,Helvetica,sans-serif; font-size:large}
'''
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
remove_attributes = ['width','height']
keep_only_tags = [dict(attrs={'class':'content permalink'})]
remove_tags_before = dict(name='h1')
remove_tags = [dict(attrs={'class':'contactinfo'})]
remove_tags_after = dict(attrs={'class':'contactinfo'})
feeds = [(u'Articles', u'http://feeds.gawker.com/gawker/full')]
def preprocess_html(self, soup):
return self.adeify_images(soup)