diff --git a/recipes/hindustan_times.recipe b/recipes/hindustan_times.recipe index 610b4286e8..ab7a276aa0 100644 --- a/recipes/hindustan_times.recipe +++ b/recipes/hindustan_times.recipe @@ -1,11 +1,10 @@ from calibre.web.feeds.news import BasicNewsRecipe -import urllib, re class HindustanTimes(BasicNewsRecipe): title = u'Hindustan Times' language = 'en_IN' __author__ = 'Krittika Goyal' - oldest_article = 1 #days + oldest_article = 1 # days max_articles_per_feed = 25 use_embedded_content = False @@ -13,38 +12,40 @@ class HindustanTimes(BasicNewsRecipe): auto_cleanup = True feeds = [ - ('News', - 'http://feeds.hindustantimes.com/HT-NewsSectionPage-Topstories'), - ('Views', - 'http://feeds.hindustantimes.com/HT-ViewsSectionpage-Topstories'), - ('Cricket', - 'http://feeds.hindustantimes.com/HT-Cricket-TopStories'), - ('Business', - 'http://feeds.hindustantimes.com/HT-BusinessSectionpage-TopStories'), - ('Entertainment', - 'http://feeds.hindustantimes.com/HT-HomePage-Entertainment'), - ('Lifestyle', - 'http://feeds.hindustantimes.com/HT-Homepage-LifestyleNews'), + ('News', + 'http://feeds.hindustantimes.com/HT-HomePage-TopStories'), + ('India', + 'http://feeds.hindustantimes.com/HT-India'), + ('World', + 'http://feeds.hindustantimes.com/HT-World'), + ('Business', + 'http://feeds.hindustantimes.com/HT-Business'), + ('Fashion', + 'http://feeds.hindustantimes.com/HT-Fashion'), + ('Sex & Relationships', + 'http://feeds.hindustantimes.com/HT-Sexandrelationships'), + ('Travel', + 'http://feeds.hindustantimes.com/HT-Travel'), + ('Books', + 'http://feeds.hindustantimes.com/HT-Books'), ] def get_article_url(self, article): ''' HT uses a variant of the feedportal RSS ad display mechanism ''' - try: - s = article.summary - return urllib.unquote( - re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1)) - except: - pass - url = BasicNewsRecipe.get_article_url(self, article) - res = self.browser.open_novisit(url) - url = res.geturl().split('/')[-2] - encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&', - '0D': '?', '0E': '-', '0N': '.com', '0L': 'http://', '0S': - 'www.'} - for k, v in encoding.iteritems(): - url = url.replace(k, v) - return url + url = article.get('feedburner_origlink', None) + if url is not None: + idx = url.find('0L0S') + url = url[idx:] + encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&', + '0D': '?', '0E': '-', '0N': '.com', '0L': 'http://', '0S': + 'www.'} + for k, v in encoding.iteritems(): + url = url.replace(k, v) + if url.endswith('/story01.htm'): + url = url.rpartition('/')[0] + return url + return article.get('link', None)