Fix Times of India

2025-07-09 03:04:10 -04:00 · 2011-09-24 10:52:22 -06:00 · 2011-09-24 10:52:22 -06:00 · 0f138cd43c
commit 0f138cd43c
parent 6bf872b5be
1 changed files with 7 additions and 7 deletions
--- a/recipes/toi.recipe
+++ b/recipes/toi.recipe
@ -9,11 +9,12 @@ class TimesOfIndia(BasicNewsRecipe):
    max_articles_per_feed = 25
    no_stylesheets = True
-    keep_only_tags = [{'class':['maintable12', 'prttabl']}]
+    remove_attributes = ['style']
    keep_only_tags = [{'class':re.compile(r'maintable12|prttabl')}]
    remove_tags = [
-            dict(style=lambda x: x and 'float' in x),
+            {'class':re.compile('tabsintbgshow|prvnxtbg')},
-            {'class':['prvnxtbg', 'footbdrin', 'bcclftr']},
+            {'id':['fbrecommend', 'relmaindiv']}
-    ]
+            ]
    feeds          = [
 ('Top Stories',
@ -41,6 +42,8 @@ class TimesOfIndia(BasicNewsRecipe):
 ]
    def get_article_url(self, article):
        # Times of India sometimes serves an ad page instead of the article,
        # this code, detects and circumvents that
        url = BasicNewsRecipe.get_article_url(self, article)
        if '/0Ltimesofindia' in url:
            url = url.partition('/0L')[-1]
@ -61,6 +64,3 @@ class TimesOfIndia(BasicNewsRecipe):
        return url
    def preprocess_html(self, soup):
        return soup