Update Hindustan Times

2025-08-11 09:13:57 -04:00 · 2014-04-20 09:13:53 +05:30 · 2014-04-20 09:13:53 +05:30 · 1ac8f8147f
commit 1ac8f8147f
parent 6a5833ddc7
1 changed files with 30 additions and 29 deletions
--- a/recipes/hindustan_times.recipe
+++ b/recipes/hindustan_times.recipe
@ -1,11 +1,10 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import urllib, re
 class HindustanTimes(BasicNewsRecipe):
    title          = u'Hindustan Times'
    language       = 'en_IN'
    __author__     = 'Krittika Goyal'
-    oldest_article = 1 #days
+    oldest_article = 1  # days
    max_articles_per_feed = 25
    use_embedded_content = False
@ -13,38 +12,40 @@ class HindustanTimes(BasicNewsRecipe):
    auto_cleanup = True
    feeds          = [
-            ('News',
+        ('News',
-            'http://feeds.hindustantimes.com/HT-NewsSectionPage-Topstories'),
+         'http://feeds.hindustantimes.com/HT-HomePage-TopStories'),
-            ('Views',
+        ('India',
-            'http://feeds.hindustantimes.com/HT-ViewsSectionpage-Topstories'),
+         'http://feeds.hindustantimes.com/HT-India'),
-            ('Cricket',
+        ('World',
-            'http://feeds.hindustantimes.com/HT-Cricket-TopStories'),
+         'http://feeds.hindustantimes.com/HT-World'),
-            ('Business',
+        ('Business',
-            'http://feeds.hindustantimes.com/HT-BusinessSectionpage-TopStories'),
+         'http://feeds.hindustantimes.com/HT-Business'),
-            ('Entertainment',
+        ('Fashion',
-            'http://feeds.hindustantimes.com/HT-HomePage-Entertainment'),
+         'http://feeds.hindustantimes.com/HT-Fashion'),
-            ('Lifestyle',
+        ('Sex & Relationships',
-            'http://feeds.hindustantimes.com/HT-Homepage-LifestyleNews'),
+         'http://feeds.hindustantimes.com/HT-Sexandrelationships'),
        ('Travel',
         'http://feeds.hindustantimes.com/HT-Travel'),
        ('Books',
         'http://feeds.hindustantimes.com/HT-Books'),
 ]
    def get_article_url(self, article):
        '''
        HT uses a variant of the feedportal RSS ad display mechanism
        '''
-        try:
+        url = article.get('feedburner_origlink', None)
-            s = article.summary
+        if url is not None:
-            return urllib.unquote(
+            idx = url.find('0L0S')
-                re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
+            url = url[idx:]
-        except:
+            encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
-            pass
+                    '0D': '?', '0E': '-', '0N': '.com', '0L': 'http://', '0S':
-        url = BasicNewsRecipe.get_article_url(self, article)
+                    'www.'}
-        res = self.browser.open_novisit(url)
+            for k, v in encoding.iteritems():
-        url = res.geturl().split('/')[-2]
+                url = url.replace(k, v)
-        encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
+            if url.endswith('/story01.htm'):
-                '0D': '?', '0E': '-', '0N': '.com', '0L': 'http://', '0S':
+                url = url.rpartition('/')[0]
-                'www.'}
+            return url
-        for k, v in encoding.iteritems():
+        return article.get('link', None)
            url = url.replace(k, v)
        return url