From 3d455a02eadf71f9d57ed2c451c35d6033bb9ef9 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 16 Feb 2009 13:48:58 -0800
Subject: [PATCH] New recipe for The Hindu by Kovid Goyal

---
 src/calibre/web/feeds/news.py                 |  2 +-
 src/calibre/web/feeds/recipes/__init__.py     |  1 +
 src/calibre/web/feeds/recipes/recipe_hindu.py | 47 +++++++++++++++++++
 3 files changed, 49 insertions(+), 1 deletion(-)
 create mode 100644 src/calibre/web/feeds/recipes/recipe_hindu.py
diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py
index edcf315d62..4773d551c3 100644
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@@ -190,7 +190,7 @@ class BasicNewsRecipe(object, LoggingInterface):
     #: For the format for specifying a tag see :attr:`BasicNewsRecipe.remove_tags`.
     #: For example::
     #:
-    #:     remove_tags_before = [dict(id='content')]
+    #:     remove_tags_before = dict(id='content')
     #:
     #: will remove all
     #: tags before the first element with `id="content"`.
diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py
index dd2b81c3a8..3723483fb7 100644
--- a/src/calibre/web/feeds/recipes/__init__.py
+++ b/src/calibre/web/feeds/recipes/__init__.py
@@ -29,6 +29,7 @@ recipe_modules = ['recipe_' + r for r in (
            'jb_online', 'estadao', 'o_globo', 'vijesti', 'elmundo', 'the_oz',
            'honoluluadvertiser', 'starbulletin', 'exiled', 'indy_star', 'dna',
            'pobjeda', 'chicago_breaking_news', 'glasgow_herald', 'linuxdevices',
+           'hindu'
           )]
 
 import re, imp, inspect, time, os
diff --git a/src/calibre/web/feeds/recipes/recipe_hindu.py b/src/calibre/web/feeds/recipes/recipe_hindu.py
new file mode 100644
index 0000000000..073eb7afd1
--- /dev/null
+++ b/src/calibre/web/feeds/recipes/recipe_hindu.py
@@ -0,0 +1,47 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class TheHindu(BasicNewsRecipe):
+    title                 = u'The Hindu'
+    language              = _('English')
+    oldest_article        = 7
+    __author__            = _('Kovid Goyal')
+    max_articles_per_feed = 100
+    
+    remove_tags_before = {'name':'font', 'class':'storyhead'}
+    preprocess_regexps = [
+                (re.compile(r'<!-- story ends -->.*', re.DOTALL), 
+                 lambda match: '</body></html>'),                                                    
+                          ]
+    
+    feeds          = [
+      (u'Main - Font Page', u'http://www.hindu.com/rss/01hdline.xml'), 
+      (u'Main - National', u'http://www.hindu.com/rss/02hdline.xml'), 
+      (u'Main - International', u'http://www.hindu.com/rss/03hdline.xml'), 
+      (u'Main - Opinion', u'http://www.hindu.com/rss/05hdline.xml'), 
+      (u'Main - Business', u'http://www.hindu.com/rss/06hdline.xml'), 
+      (u'Main - Sport', u'http://www.hindu.com/rss/07hdline.xml'), 
+      (u'Main - Weather / Religion / Crossword / Cartoon', 
+       u'http://www.hindu.com/rss/10hdline.xml'), 
+      (u'Main - Engagements', u'http://www.hindu.com/rss/26hdline.xml'), 
+      (u'Supplement - Literary Review', 
+       u'http://www.hindu.com/rss/lrhdline.xml'), 
+      (u'Supplement - Sunday Magazine', 
+       u'http://www.hindu.com/rss/maghdline.xml'), 
+      (u'Supplement - Open Page', u'http://www.hindu.com/rss/ophdline.xml'), 
+      (u'Supplement - Business Review', 
+       u'http://www.hindu.com/rss/bizhdline.xml'), 
+      (u'Supplement - Book Review', 
+       u'http://www.hindu.com/rss/brhdline.xml'), 
+      (u'Supplement - Science & Technology', 
+       u'http://www.hindu.com/rss/setahdline.xml')
+      ]
+    
+    def postprocess_html(self, soup, first_fetch):
+        for t in soup.findAll(['table', 'tr', 'td']):
+            t.name = 'div'
+        return soup
\ No newline at end of file