From b1a9283b2ed46c6e5fc8b116dbf56a370b1e5063 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 21 Dec 2011 08:10:43 +0530 Subject: [PATCH] Fix Hindustan Times --- recipes/hindustan_times.recipe | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/recipes/hindustan_times.recipe b/recipes/hindustan_times.recipe index f228757c70..610b4286e8 100644 --- a/recipes/hindustan_times.recipe +++ b/recipes/hindustan_times.recipe @@ -1,4 +1,5 @@ from calibre.web.feeds.news import BasicNewsRecipe +import urllib, re class HindustanTimes(BasicNewsRecipe): title = u'Hindustan Times' @@ -26,4 +27,24 @@ class HindustanTimes(BasicNewsRecipe): 'http://feeds.hindustantimes.com/HT-Homepage-LifestyleNews'), ] + def get_article_url(self, article): + ''' + HT uses a variant of the feedportal RSS ad display mechanism + ''' + try: + s = article.summary + return urllib.unquote( + re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1)) + except: + pass + url = BasicNewsRecipe.get_article_url(self, article) + res = self.browser.open_novisit(url) + url = res.geturl().split('/')[-2] + encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&', + '0D': '?', '0E': '-', '0N': '.com', '0L': 'http://', '0S': + 'www.'} + for k, v in encoding.iteritems(): + url = url.replace(k, v) + return url +