__license__ = 'GPL v3' __copyright__ = '2011, Darko Miletic ' ''' www.arabianbusiness.com ''' from calibre.web.feeds.news import BasicNewsRecipe class Arabian_Business(BasicNewsRecipe): title = 'Arabian Business' __author__ = 'Darko Miletic' description = 'Comprehensive Guide to Middle East Business & Gulf Industry News including,Banking & Finance,Construction,Energy,Media & Marketing,Real Estate,Transportation,Travel,Technology,Politics,Healthcare,Lifestyle,Jobs & UAE guide.Top Gulf & Dubai Business News.' # noqa publisher = 'Arabian Business Publishing Ltd.' oldest_article = 2 max_articles_per_feed = 200 no_stylesheets = True encoding = 'utf8' use_embedded_content = False language = 'en' remove_empty_feeds = True publication_type = 'newsportal' masthead_url = 'http://www.arabianbusiness.com/skins/ab.main/gfx/arabianbusiness_logo_sm.gif' extra_css = """ body{font-family: Georgia,serif } img{margin-bottom: 0.4em; margin-top: 0.4em; display:block} .byline,.dateline{font-size: small; display: inline; font-weight: bold} ul{list-style: none outside none;} """ conversion_options = { 'comment': description, 'publisher': publisher, 'language': language } remove_tags_before = dict(attrs={'id': 'article-title'}) remove_tags = [ dict(name=['meta', 'link', 'base', 'iframe', 'embed', 'object']), dict( attrs={'class': 'printfooter'}) ] remove_attributes = ['lang'] feeds = [ (u'Africa', u'http://www.arabianbusiness.com/world/Africa/?service=rss'), (u'Americas', u'http://www.arabianbusiness.com/world/americas/?service=rss'), (u'Asia Pacific', u'http://www.arabianbusiness.com/world/asia-pacific/?service=rss'), (u'Europe', u'http://www.arabianbusiness.com/world/europe/?service=rss'), (u'Middle East', u'http://www.arabianbusiness.com/world/middle-east/?service=rss'), (u'South Asia', u'http://www.arabianbusiness.com/world/south-asia/?service=rss'), (u'Banking & Finance', u'http://www.arabianbusiness.com/industries/banking-finance/?service=rss'), (u'Construction', u'http://www.arabianbusiness.com/industries/construction/?service=rss'), (u'Education', u'http://www.arabianbusiness.com/industries/education/?service=rss'), (u'Energy', u'http://www.arabianbusiness.com/industries/energy/?service=rss'), (u'Healthcare', u'http://www.arabianbusiness.com/industries/healthcare/?service=rss'), (u'Media', u'http://www.arabianbusiness.com/industries/media/?service=rss'), (u'Real Estate', u'http://www.arabianbusiness.com/industries/real-estate/?service=rss'), (u'Retail', u'http://www.arabianbusiness.com/industries/retail/?service=rss'), (u'Technology', u'http://www.arabianbusiness.com/industries/technology/?service=rss'), (u'Transport', u'http://www.arabianbusiness.com/industries/transport/?service=rss'), (u'Travel', u'http://www.arabianbusiness.com/industries/travel-hospitality/?service=rss'), (u'Equities', u'http://www.arabianbusiness.com/markets/equities/?service=rss'), (u'Commodities', u'http://www.arabianbusiness.com/markets/commodities/?service=rss'), (u'Currencies', u'http://www.arabianbusiness.com/markets/currencies/?service=rss'), (u'Market Data', u'http://www.arabianbusiness.com/markets/market-data/?service=rss'), (u'Comment', u'http://www.arabianbusiness.com/opinion/comment/?service=rss'), (u'Think Tank', u'http://www.arabianbusiness.com/opinion/think-tank/?service=rss'), (u'Arts', u'http://www.arabianbusiness.com/lifestyle/arts/?service=rss'), (u'Cars', u'http://www.arabianbusiness.com/lifestyle/cars/?service=rss'), (u'Food', u'http://www.arabianbusiness.com/lifestyle/food/?service=rss'), (u'Sport', u'http://www.arabianbusiness.com/lifestyle/sport/?service=rss') ] def print_version(self, url): return url + '?service=printer&page=' def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] for alink in soup.findAll('a'): if alink.string is not None: tstr = alink.string alink.replaceWith(tstr) return soup