Update Hindustan Times

2025-08-11 09:13:57 -04:00 · 2014-04-20 09:13:53 +05:30 · 2014-04-20 09:13:53 +05:30 · 1ac8f8147f
commit 1ac8f8147f
parent 6a5833ddc7
1 changed files with 30 additions and 29 deletions
--- a/recipes/hindustan_times.recipe
+++ b/recipes/hindustan_times.recipe
@ -1,5 +1,4 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import urllib, re
 class HindustanTimes(BasicNewsRecipe):
    title          = u'Hindustan Times'
@ -14,37 +13,39 @@ class HindustanTimes(BasicNewsRecipe):
    feeds          = [
        ('News',
-            'http://feeds.hindustantimes.com/HT-NewsSectionPage-Topstories'),
+         'http://feeds.hindustantimes.com/HT-HomePage-TopStories'),
-            ('Views',
+        ('India',
-            'http://feeds.hindustantimes.com/HT-ViewsSectionpage-Topstories'),
+         'http://feeds.hindustantimes.com/HT-India'),
-            ('Cricket',
+        ('World',
-            'http://feeds.hindustantimes.com/HT-Cricket-TopStories'),
+         'http://feeds.hindustantimes.com/HT-World'),
        ('Business',
-            'http://feeds.hindustantimes.com/HT-BusinessSectionpage-TopStories'),
+         'http://feeds.hindustantimes.com/HT-Business'),
-            ('Entertainment',
+        ('Fashion',
-            'http://feeds.hindustantimes.com/HT-HomePage-Entertainment'),
+         'http://feeds.hindustantimes.com/HT-Fashion'),
-            ('Lifestyle',
+        ('Sex & Relationships',
-            'http://feeds.hindustantimes.com/HT-Homepage-LifestyleNews'),
+         'http://feeds.hindustantimes.com/HT-Sexandrelationships'),
        ('Travel',
         'http://feeds.hindustantimes.com/HT-Travel'),
        ('Books',
         'http://feeds.hindustantimes.com/HT-Books'),
 ]
    def get_article_url(self, article):
        '''
        HT uses a variant of the feedportal RSS ad display mechanism
        '''
-        try:
+        url = article.get('feedburner_origlink', None)
-            s = article.summary
+        if url is not None:
-            return urllib.unquote(
+            idx = url.find('0L0S')
-                re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
+            url = url[idx:]
        except:
            pass
        url = BasicNewsRecipe.get_article_url(self, article)
        res = self.browser.open_novisit(url)
        url = res.geturl().split('/')[-2]
            encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
                    '0D': '?', '0E': '-', '0N': '.com', '0L': 'http://', '0S':
                    'www.'}
            for k, v in encoding.iteritems():
                url = url.replace(k, v)
            if url.endswith('/story01.htm'):
                url = url.rpartition('/')[0]
            return url
        return article.get('link', None)