__license__ = 'GPL v3' __copyright__ = '2010, Larry Chan ' ''' Singtao STNN ''' from calibre.web.feeds.recipes import BasicNewsRecipe class SingtaoSTNN(BasicNewsRecipe): title = 'Singtao STNN' __author__ = 'Larry Chan, larry1chan' description = 'Chinese News' oldest_article = 2 max_articles_per_feed = 100 simultaneous_downloads = 5 no_stylesheets = True #delay = 1 use_embedded_content = False encoding = 'gb2312' publisher = 'Singtao STNN' category = 'news, China, world' language = 'zh' publication_type = 'newsportal' extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' masthead_url = 'http://www.stnn.cc/images/0806/logo_080728.gif' conversion_options = { 'comments' : description ,'tags' : category ,'language' : language ,'publisher' : publisher ,'linearize_tables': True } remove_tags_before = dict(name='div', attrs={'class':['page_box']}) remove_tags_after = dict(name='div', attrs={'class':['pagelist']}) keep_only_tags = [ dict(name='div', attrs={'class':['font_title clearfix']}), dict(name='div', attrs={'id':['content_zoom']}) ] remove_attributes = ['width','height','href'] # for a full list of rss check out [url]http://www.stnn.cc/rss/[/url] feeds = [ (u'Headline News', u'http://www.stnn.cc/rss/news/index.xml'), (u'Breaking News', u'http://www.stnn.cc/rss/tufa/index.xml'), (u'Finance', u'http://www.stnn.cc/rss/fin/index.xml'), (u'Entertainment', u'http://www.stnn.cc/rss/ent/index.xml'), (u'International', u'http://www.stnn.cc/rss/guoji/index.xml'), (u'China', u'http://www.stnn.cc/rss/china/index.xml'), (u'Opnion', u'http://www.stnn.cc/rss/fin_op/index.xml'), (u'Blog', u'http://blog.stnn.cc/uploadfile/rssblogtypehotlog.xml'), (u'Hong Kong', u'http://www.stnn.cc/rss/hongkong/index.xml') ]