from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag def new_tag(soup, name, attrs=()): impl = getattr(soup, 'new_tag', None) if impl is not None: return impl(name, attrs=dict(attrs)) return Tag(soup, name, attrs=attrs or None) class MoneyControlRecipe(BasicNewsRecipe): __license__ = 'GPL v3' __author__ = 'kwetal' language = 'en_IN' locale = 'en_IN' encoding = 'iso-8859-1' version = 1 title = u'Money Control' publisher = u'moneycontrol.com' category = u'News, Financial, India' description = u'Financial news from India' oldest_article = 7 max_articles_per_feed = 100 use_embedded_content = False no_stylesheets = True remove_javascript = True feeds = [] feeds.append( (u'Latest News', u'http://www.moneycontrol.com/rss/latestnews.xml')) feeds.append( (u'All Stories', u'http://www.moneycontrol.com/rss/allstories.xml')) def print_version(self, url): return url.replace('/stocksnews.php?', '/news_print.php?') + '&sr_no=0' # The articles contain really horrible html. More than one
and section, not properly closed tags, lots and lots of # tags and some weird