From 7b4c73c1112bfef2a6f29c9e378ce1f713d8047c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 8 Sep 2011 09:00:00 -0600 Subject: [PATCH] Update Honolulu Star Advertiser --- recipes/staradvertiser.recipe | 55 +++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/recipes/staradvertiser.recipe b/recipes/staradvertiser.recipe index cce450f1ce..c991649b45 100644 --- a/recipes/staradvertiser.recipe +++ b/recipes/staradvertiser.recipe @@ -1,5 +1,5 @@ __license__ = 'GPL v3' -__copyright__ = '2009-2011, Darko Miletic ' +__copyright__ = '2011, M. Ching modified from work 2009-2011 Darko Miletic ' ''' staradvertiser.com ''' @@ -7,12 +7,13 @@ staradvertiser.com from calibre.web.feeds.news import BasicNewsRecipe class Starbulletin(BasicNewsRecipe): - title = 'Honolulu Star Advertiser' + title = 'Honolulu Star-Advertiser' __author__ = 'Darko Miletic' description = 'Latest national and local Hawaii sports news' publisher = 'Honolulu Star-Advertiser' category = 'news, Honolulu, Hawaii' oldest_article = 2 + needs_subscription = True max_articles_per_feed = 100 language = 'en' no_stylesheets = True @@ -20,12 +21,12 @@ class Starbulletin(BasicNewsRecipe): encoding = 'utf8' publication_type = 'newspaper' masthead_url = 'http://media.staradvertiser.com/designimages/star-advertiser-logo-small.gif' - extra_css = """ - body{font-family: Verdana,Arial,Helvetica,sans-serif} - h1,.brown,.postCredit{color: #663300} - .storyDeck{font-size: 1.2em; font-weight: bold} - img{display: block} - """ +# extra_css = """ +# body{font-family: Verdana,Arial,Helvetica,sans-serif} +# h1,.brown,.hsa_postCredit{color: #663300} +# .storyDeck{font-size: 1.2em; font-weight: bold} +# img{display: block} +# """ conversion_options = { 'comment' : description @@ -35,26 +36,36 @@ class Starbulletin(BasicNewsRecipe): , 'linearize_tables' : True } keep_only_tags = [ - dict(attrs={'id':'storyTitle'}) - ,dict(attrs={'class':['storyDeck','postCredit']}) - ,dict(name='span',attrs={'class':'brown'}) + dict(attrs={'id':'hsa_storyTitle'}) + ,dict(attrs={'class':['hsa_dateStamp','hsa_postCredit','storyDeck']}) + ,dict(name='span',attrs={'class':['hsa_dateStamp','hsa_postCredit']}) + ,dict(name='div',attrs={'class':'storytext article-important'}) ,dict(name='div',attrs={'class':'storytext'}) ] remove_tags = [ - dict(name=['object','link','script','span','meta','base','iframe']) + dict(name=['object','link','script','meta','base','iframe']) +# removed 'span' from preceding list to permit keeping of author and timestamp ,dict(attrs={'class':['insideStoryImage','insideStoryAd']}) ,dict(attrs={'name':'fb_share'}) ] - feeds = [ - (u'Headlines' , u'http://www.staradvertiser.com/staradvertiser_headlines.rss' ) - ,(u'News' , u'http://www.staradvertiser.com/news/index.rss' ) - ,(u'Sports' , u'http://www.staradvertiser.com/sports/index.rss' ) - ,(u'Features' , u'http://www.staradvertiser.com/features/index.rss' ) - ,(u'Editorials', u'http://www.staradvertiser.com/editorials/index.rss' ) - ,(u'Business' , u'http://www.staradvertiser.com/business/index.rss' ) - ,(u'Travel' , u'http://www.staradvertiser.com/travel/index.rss' ) - ] + def get_browser(self): + br = BasicNewsRecipe.get_browser() + if self.username is not None and self.password is not None: + br.open('http://www.staradvertiser.com/manage/Login/') + br.select_form(name='loginForm') + br['email'] = self.username + br['password'] = self.password + br.submit() + return br + + feeds = [ + (u'Breaking News', u'http://www.staradvertiser.com/news/breaking/index.rss') + ,(u'News', u'http://www.staradvertiser.com/newspremium/index.rss') + ,(u'Business', u'http://www.staradvertiser.com/businesspremium/index.rss') + ,(u'Sports', u'http://www.staradvertiser.com/sportspremium/index.rss') + ,(u'Features', u'http://www.staradvertiser.com/featurespremium/index.rss') + ] def preprocess_html(self, soup): for item in soup.findAll(style=True): @@ -75,4 +86,4 @@ class Starbulletin(BasicNewsRecipe): if not item.has_key('alt'): item['alt'] = 'image' return soup - \ No newline at end of file +