diff --git a/recipes/the_scotsman.recipe b/recipes/the_scotsman.recipe index 6db64cfcaa..ebffdf2698 100644 --- a/recipes/the_scotsman.recipe +++ b/recipes/the_scotsman.recipe @@ -1,4 +1,3 @@ - __license__ = 'GPL v3' __copyright__ = '2008 - 2011, Darko Miletic ' ''' @@ -12,54 +11,39 @@ class TheScotsman(BasicNewsRecipe): __author__ = 'Darko Miletic' description = 'News from Scotland' publisher = 'Johnston Publishing Ltd.' - category = 'news, politics, Scotland, UK' + category = 'news, politics, Scotland, UK' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False - auto_cleanup = True language = 'en_GB' encoding = 'utf-8' publication_type = 'newspaper' - remove_empty_feeds = True + remove_empty_feeds = True masthead_url = 'http://www.scotsman.com/webimage/swts_thescotsman_image_e_7_25526!image/3142543874.png_gen/derivatives/default/3142543874.png' extra_css = 'body{font-family: Arial,Helvetica,sans-serif}' - - - #keep_only_tags = [dict(attrs={'class':'editorialSection'})] - #remove_tags_after = dict(attrs={'class':'socialBookmarkPanel'}) - #remove_tags = [ - #dict(name=['meta','iframe','object','embed','link']), - #dict(attrs={'class':['secondaryArticlesNav','socialBookmarkPanel']}), - #dict(attrs={'id':'relatedArticles'}) - #] + + keep_only_tags = [dict(name='div', attrs={'class':'article'})] remove_attributes = ['lang'] - + conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher , 'language' : language } - + feeds = [ - ('Latest News' , 'http://www.scotsman.com/cmlink/1.957140' ), - ('UK' , 'http://www.scotsman.com/cmlink/1.957142' ), - ('Scotland' , 'http://www.scotsman.com/cmlink/1.957141' ), - ('International', 'http://www.scotsman.com/cmlink/1.957143' ), - ('Politics' , 'http://www.scotsman.com/cmlink/1.957044' ), + ('Latest News' , 'http://www.scotsman.com/cmlink/1.957140'), + ('UK' , 'http://www.scotsman.com/cmlink/1.957142'), + ('Scotland' , 'http://www.scotsman.com/cmlink/1.957141'), + ('International', 'http://www.scotsman.com/cmlink/1.957143'), + ('Politics' , 'http://www.scotsman.com/cmlink/1.957044'), ('Arts' , 'http://www.scotsman.com/cmlink/1.1804825'), - ('Entertainment', 'http://www.scotsman.com/cmlink/1.957053' ), - ('Sports' , 'http://www.scotsman.com/cmlink/1.957151' ), - ('Business' , 'http://www.scotsman.com/cmlink/1.957156' ), - ('Features' , 'http://www.scotsman.com/cmlink/1.957149' ), - ('Opinion' , 'http://www.scotsman.com/cmlink/1.957054' ) + ('Entertainment', 'http://www.scotsman.com/cmlink/1.957053'), + ('Sports' , 'http://www.scotsman.com/cmlink/1.957151'), + ('Business' , 'http://www.scotsman.com/cmlink/1.957156'), + ('Features' , 'http://www.scotsman.com/cmlink/1.957149'), + ('Opinion' , 'http://www.scotsman.com/cmlink/1.957054') ] - - #def preprocess_html(self, soup): - #for item in soup.findAll(style=True): - #del item['style'] - #for item in soup.findAll('img'): - #if not item.has_key('alt'): - #item['alt'] = 'image' - #return soup \ No newline at end of file +