__license__ = 'GPL v3' __copyright__ = '2008 - 2011, Darko Miletic ' ''' www.scotsman.com/the-scotsman ''' from calibre.web.feeds.news import BasicNewsRecipe class TheScotsman(BasicNewsRecipe): title = 'The Scotsman' __author__ = 'Darko Miletic' description = 'News from Scotland' publisher = 'Johnston Publishing Ltd.' category = 'news, politics, Scotland, UK' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False language = 'en_GB' encoding = 'utf-8' publication_type = 'newspaper' remove_empty_feeds = True masthead_url = 'http://www.scotsman.com/webimage/swts_thescotsman_image_e_7_25526!image/3142543874.png_gen/derivatives/default/3142543874.png' extra_css = 'body{font-family: Arial,Helvetica,sans-serif}' keep_only_tags = [dict(attrs={'class':'editorialSection'})] remove_tags_after = dict(attrs={'class':'socialBookmarkPanel'}) remove_tags = [ dict(name=['meta','iframe','object','embed','link']), dict(attrs={'class':['secondaryArticlesNav','socialBookmarkPanel']}), dict(attrs={'id':'relatedArticles'}) ] remove_attributes = ['lang'] conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher , 'language' : language } feeds = [ ('Latest News' , 'http://www.scotsman.com/cmlink/1.957140' ), ('UK' , 'http://www.scotsman.com/cmlink/1.957142' ), ('Scotland' , 'http://www.scotsman.com/cmlink/1.957141' ), ('International', 'http://www.scotsman.com/cmlink/1.957143' ), ('Politics' , 'http://www.scotsman.com/cmlink/1.957044' ), ('Arts' , 'http://www.scotsman.com/cmlink/1.1804825'), ('Entertainment', 'http://www.scotsman.com/cmlink/1.957053' ), ('Sports' , 'http://www.scotsman.com/cmlink/1.957151' ), ('Business' , 'http://www.scotsman.com/cmlink/1.957156' ), ('Features' , 'http://www.scotsman.com/cmlink/1.957149' ), ('Opinion' , 'http://www.scotsman.com/cmlink/1.957054' ) ] def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] for item in soup.findAll('img'): if not item.has_key('alt'): item['alt'] = 'image' return soup