Improved LA Times recipe

This commit is contained in:
Kovid Goyal 2009-09-04 10:16:24 -06:00
parent bfb4199452
commit a8c7424a30

View File

@ -10,8 +10,8 @@ from calibre.web.feeds.news import BasicNewsRecipe
class LATimes(BasicNewsRecipe):
title = u'The Los Angeles Times'
__author__ = u'Darko Miletic'
description = u'News from Los Angeles'
__author__ = u'Darko Miletic and Sujata Raman'
description = u'News from Los Angeles'
oldest_article = 7
max_articles_per_feed = 100
language = _('English')
@ -24,12 +24,42 @@ class LATimes(BasicNewsRecipe):
'comment' : description
, 'language' : lang
}
keep_only_tags = [dict(name='div', attrs={'class':'story' })]
remove_tags_after = [dict(name='div', attrs={'id':'story-body' })]
remove_tags = [dict(name='div', attrs={'class':['thumbnail','articlerail','tools']})]
feeds = [(u'News', u'http://feeds.latimes.com/latimes/news')]
extra_css = '''
h1{font-family :Georgia,"Times New Roman",Times,serif; font-size:large; }
h2{font-family :Georgia,"Times New Roman",Times,serif; font-size:x-small;}
.story{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;}
.entry-body{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;}
.entry-more{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;}
.credit{color:#666666; font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
.small{color:#666666; font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
.byline{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
.date{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;color:#930000; font-style:italic;}
.time{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;color:#930000; font-style:italic;}
.copyright{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;color:#930000; }
.subhead{font-family :Georgia,"Times New Roman",Times,serif; font-size:x-small;}
'''
keep_only_tags = [dict(name='div', attrs={'class':["story" ,"entry"] })]
remove_tags = [ dict(name='div', attrs={'class':['articlerail',"sphereTools","tools","toppaginate","entry-footer-left","entry-footer-right"]}),
dict(name='div', attrs={'id':["moduleArticleToolsContainer",]}),
dict(name='ul', attrs={'class':["article-nav clearfix",]}),
dict(name='p', attrs={'class':["entry-footer",]}),
dict(name=['iframe'])
]
feeds = [(u'News', u'http://feeds.latimes.com/latimes/news')
,(u'Local','http://feeds.latimes.com/latimes/news/local')
,(u'Most Emailed','http://feeds.latimes.com/MostEmailed')
,(u'California Politics','http://feeds.latimes.com/latimes/news/local/politics/cal/')
,('OrangeCounty','http://feeds.latimes.com/latimes/news/local/orange/')
,('National','http://feeds.latimes.com/latimes/news/nationworld/nation')
,('Politics','http://feeds.latimes.com/latimes/news/politics/')
,('Business','http://feeds.latimes.com/latimes/business')
,('Sports','http://feeds.latimes.com/latimes/sports/')
,('Entertainment','http://feeds.latimes.com/latimes/entertainment/')
]
def get_article_url(self, article):
return article.get('feedburner_origlink')