calibre/recipes/statesman.recipe

41 lines
1.7 KiB
Plaintext

from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1278049615(BasicNewsRecipe):
title = u'Statesman'
pubisher = 'http://www.statesman.com/'
description = 'Austin Texas Daily Newspaper'
category = 'News, Austin, Texas'
__author__ = 'rty'
oldest_article = 3
max_articles_per_feed = 100
feeds = [(u'News',
u'http://www.statesman.com/section-rss.do?source=news&includeSubSections=true'),
(u'Local', u'http://www.statesman.com/section-rss.do?source=local&includeSubSections=true'),
(u'Business', u'http://www.statesman.com/section-rss.do?source=business&includeSubSections=true'),
(u'Life', u'http://www.statesman.com/section-rss.do?source=life&includesubsection=true'),
(u'Editorial', u'http://www.statesman.com/section-rss.do?source=opinion&includesubsections=true'),
(u'Sports', u'http://www.statesman.com/section-rss.do?source=sports&includeSubSections=true')
]
masthead_url = "http://www.statesman.com/images/cmg-logo.gif"
#temp_files = []
#articles_are_obfuscated = True
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'en'
encoding = 'utf-8'
conversion_options = {'linearize_tables':True}
remove_tags = [
dict(name='div', attrs={'id':'cxArticleOptions'}),
{'class':['perma', 'comments', 'trail', 'share-buttons',
'toggle_show_on']},
]
keep_only_tags = [
dict(name='div', attrs={'class':'cxArticleHeader'}),
dict(name='div', attrs={'id':['cxArticleBodyText',
'content']}),
]