From d41cabce250a0dde970709e1dce09401bc341627 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 22 Dec 2009 20:03:58 -0700
Subject: [PATCH] Fix #4269 (The Straits Times feed - error in parser?)

---
 resources/recipes/straitstimes.recipe | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)
diff --git a/resources/recipes/straitstimes.recipe b/resources/recipes/straitstimes.recipe
index 64e50e2f60..5faf616774 100644
--- a/resources/recipes/straitstimes.recipe
+++ b/resources/recipes/straitstimes.recipe
@@ -1,4 +1,3 @@
-#!/usr/bin/env  python
 
 __license__   = 'GPL v3'
 __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
@@ -6,6 +5,7 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 www.straitstimes.com
 '''
 
+import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 
 class StraitsTimes(BasicNewsRecipe):
@@ -29,9 +29,21 @@ class StraitsTimes(BasicNewsRecipe):
                             ,'publisher' : publisher
                          }
 
-    remove_tags = [dict(name=['object','link','map'])]
+    preprocess_regexps = [
+                           (re.compile(
+                            r'<meta name="description" content="[^"]+"\s*/?>',
+                            re.IGNORECASE|re.DOTALL),
+                            lambda m:''),
+                           (re.compile(r'<!--.+?-->', re.IGNORECASE|re.DOTALL),
+                               lambda m: ''),
+                         ]
+    remove_tags = [
+                     dict(name=['object','link','map'])
+                    ,dict(name='div',attrs={'align':'left'})
+                  ]
 
-    keep_only_tags = [dict(name='div', attrs={'class':['top_headline','story_text']})]
+    keep_only_tags = [dict(name='div', attrs={'class':'stleft'})]
+    remove_tags_after=dict(name='div',attrs={'class':'hr_thin'})
 
     feeds = [
                (u'Singapore'       , u'http://www.straitstimes.com/STI/STIFILES/rss/break_singapore.xml' )
@@ -47,4 +59,3 @@ class StraitsTimes(BasicNewsRecipe):
         for item in soup.findAll(style=True):
             del item['style']
         return soup
-