Fix #1039285 (Updated recipe for The Times UK)

2025-07-08 02:34:06 -04:00 · 2012-08-21 09:23:42 +05:30 · 2012-08-21 09:23:42 +05:30 · db26e18d7c
commit db26e18d7c
parent 4131aa18da
1 changed files with 8 additions and 10 deletions
--- a/recipes/times_online.recipe
+++ b/recipes/times_online.recipe
@ -1,6 +1,6 @@

 __license__   = 'GPL v3'
-__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.thetimes.co.uk
 '''
@ -21,6 +21,7 @@ class TimesOnline(BasicNewsRecipe):
    encoding              = 'utf-8'
    delay                 = 1
    needs_subscription    = True
+    auto_cleanup          = False    
    publication_type      = 'newspaper'
    masthead_url          = 'http://www.thetimes.co.uk/tto/public/img/the_times_460.gif'
    INDEX                 = 'http://www.thetimes.co.uk'
@ -41,13 +42,14 @@ class TimesOnline(BasicNewsRecipe):

    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
-        br.open('http://www.timesplus.co.uk/tto/news/?login=false&url=http://www.thetimes.co.uk/tto/news/?lightbox=false')
+        br.open('http://www.thetimes.co.uk/tto/news/')
        if self.username is not None and self.password is not None:
-            data = urllib.urlencode({ 'userName':self.username
+            data = urllib.urlencode({ 
+                                      'gotoUrl' :self.INDEX
+                                     ,'username':self.username
                                     ,'password':self.password
-                                     ,'keepMeLoggedIn':'false'
                                   })
-            br.open('https://www.timesplus.co.uk/iam/app/authenticate',data)
+            br.open('https://acs.thetimes.co.uk/user/login',data)
        return br

    remove_tags      = [
@ -58,6 +60,7 @@ class TimesOnline(BasicNewsRecipe):
    keep_only_tags   = [
                          dict(attrs={'class':'heading' })
                         ,dict(attrs={'class':'f-author'})
+                         ,dict(attrs={'class':['media','byline-timestamp']})
                         ,dict(attrs={'id':'bodycopy'})
                       ]

@ -79,11 +82,6 @@ class TimesOnline(BasicNewsRecipe):
               ,(u'Arts'        , PREFIX + u'arts/?view=list'         )
            ]

-    def preprocess_html(self, soup):
-        for item in soup.findAll(style=True):
-            del item['style']
-        return self.adeify_images(soup)
-
    def parse_index(self):
        totalfeeds = []
        lfeeds = self.get_feeds()