Update Hindustan Times

2025-11-25 07:45:01 -05:00 · 2014-04-20 09:13:53 +05:30 · 2014-04-20 09:13:53 +05:30 · 1ac8f8147f
commit 1ac8f8147f
parent 6a5833ddc7
1 changed files with 30 additions and 29 deletions
--- a/recipes/hindustan_times.recipe
+++ b/recipes/hindustan_times.recipe
@ -1,11 +1,10 @@
 from calibre.web.feeds.news import BasicNewsRecipe
-import urllib, re

 class HindustanTimes(BasicNewsRecipe):
    title          = u'Hindustan Times'
    language       = 'en_IN'
    __author__     = 'Krittika Goyal'
-    oldest_article = 1 #days
+    oldest_article = 1  # days
    max_articles_per_feed = 25
    use_embedded_content = False

@ -13,38 +12,40 @@ class HindustanTimes(BasicNewsRecipe):
    auto_cleanup = True

    feeds          = [
-            ('News',
-            'http://feeds.hindustantimes.com/HT-NewsSectionPage-Topstories'),
-            ('Views',
-            'http://feeds.hindustantimes.com/HT-ViewsSectionpage-Topstories'),
-            ('Cricket',
-            'http://feeds.hindustantimes.com/HT-Cricket-TopStories'),
-            ('Business',
-            'http://feeds.hindustantimes.com/HT-BusinessSectionpage-TopStories'),
-            ('Entertainment',
-            'http://feeds.hindustantimes.com/HT-HomePage-Entertainment'),
-            ('Lifestyle',
-            'http://feeds.hindustantimes.com/HT-Homepage-LifestyleNews'),
+        ('News',
+         'http://feeds.hindustantimes.com/HT-HomePage-TopStories'),
+        ('India',
+         'http://feeds.hindustantimes.com/HT-India'),
+        ('World',
+         'http://feeds.hindustantimes.com/HT-World'),
+        ('Business',
+         'http://feeds.hindustantimes.com/HT-Business'),
+        ('Fashion',
+         'http://feeds.hindustantimes.com/HT-Fashion'),
+        ('Sex & Relationships',
+         'http://feeds.hindustantimes.com/HT-Sexandrelationships'),
+        ('Travel',
+         'http://feeds.hindustantimes.com/HT-Travel'),
+        ('Books',
+         'http://feeds.hindustantimes.com/HT-Books'),
 ]

    def get_article_url(self, article):
        '''
        HT uses a variant of the feedportal RSS ad display mechanism
        '''
-        try:
-            s = article.summary
-            return urllib.unquote(
-                re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
-        except:
-            pass
-        url = BasicNewsRecipe.get_article_url(self, article)
-        res = self.browser.open_novisit(url)
-        url = res.geturl().split('/')[-2]
-        encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
-                '0D': '?', '0E': '-', '0N': '.com', '0L': 'http://', '0S':
-                'www.'}
-        for k, v in encoding.iteritems():
-            url = url.replace(k, v)
-        return url
+        url = article.get('feedburner_origlink', None)
+        if url is not None:
+            idx = url.find('0L0S')
+            url = url[idx:]
+            encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
+                    '0D': '?', '0E': '-', '0N': '.com', '0L': 'http://', '0S':
+                    'www.'}
+            for k, v in encoding.iteritems():
+                url = url.replace(k, v)
+            if url.endswith('/story01.htm'):
+                url = url.rpartition('/')[0]
+            return url
+        return article.get('link', None)