diff --git a/resources/recipes/gulfnews.recipe b/resources/recipes/gulfnews.recipe new file mode 100644 index 0000000000..4c40aa3f37 --- /dev/null +++ b/resources/recipes/gulfnews.recipe @@ -0,0 +1,64 @@ +__license__ = 'GPL v3' +__copyright__ = '2011, Darko Miletic ' +''' +gulfnews.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class GulfNews(BasicNewsRecipe): + title = 'Gulf News' + __author__ = 'Darko Miletic' + description = 'News from United Arab Emirrates, persian gulf and rest of the world' + publisher = 'Al Nisr Publishing LLC' + category = 'news, politics, UAE, world' + oldest_article = 2 + max_articles_per_feed = 200 + no_stylesheets = True + encoding = 'utf8' + use_embedded_content = False + language = 'en' + remove_empty_feeds = True + publication_type = 'newsportal' + masthead_url = 'http://gulfnews.com/media/img/gulf_news_logo.jpg' + extra_css = """ + body{font-family: Arial,Helvetica,sans-serif } + img{margin-bottom: 0.4em; display:block} + h1{font-family: Georgia, 'Times New Roman', Times, serif} + ol,ul{list-style: none} + .synopsis{font-size: small} + .details{font-size: x-small} + .image{font-size: xx-small} + """ + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + remove_tags = [ + dict(name=['meta','link','object','embed']) + ,dict(attrs={'class':['quickLinks','ratings']}) + ,dict(attrs={'id':'imageSelector'}) + ] + remove_attributes=['lang'] + keep_only_tags=[ + dict(name='h1') + ,dict(attrs={'class':['synopsis','details','image','article']}) + ] + + + feeds = [ + (u'UAE News' , u'http://gulfnews.com/cmlink/1.446094') + ,(u'Business' , u'http://gulfnews.com/cmlink/1.446098') + ,(u'Entertainment' , u'http://gulfnews.com/cmlink/1.446095') + ,(u'Sport' , u'http://gulfnews.com/cmlink/1.446096') + ,(u'Life' , u'http://gulfnews.com/cmlink/1.446097') + ] + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup