From 54a01f796138e28e8453a205ff44955f5c4b3f0d Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Fri, 13 Aug 2010 13:24:51 -0600
Subject: [PATCH] Fix #6486 (Economic Times of India -  epub in Nook)

---
 .../recipes/theeconomictimes_india.recipe     | 41 ++++++++-----------
 1 file changed, 18 insertions(+), 23 deletions(-)

diff --git a/resources/recipes/theeconomictimes_india.recipe b/resources/recipes/theeconomictimes_india.recipe
index 8756e08e66..5fef377f6e 100644
--- a/resources/recipes/theeconomictimes_india.recipe
+++ b/resources/recipes/theeconomictimes_india.recipe
@@ -1,13 +1,10 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 economictimes.indiatimes.com
 '''
 
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import Tag
 
 class TheEconomicTimes(BasicNewsRecipe):
     title                  = 'The Economic Times India'
@@ -21,18 +18,21 @@ class TheEconomicTimes(BasicNewsRecipe):
     use_embedded_content   = False
     simultaneous_downloads = 1
     encoding               = 'utf-8'
-    lang                   = 'en-IN'
-    language = 'en_IN'
-
-
-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        , '--ignore-tables'
-                        ]
-
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
+    language               = 'en_IN'
+    publication_type       = 'newspaper'    
+    masthead_url           = 'http://economictimes.indiatimes.com/photo/2676871.cms'
+    extra_css              = """ body{font-family: Arial,Helvetica,sans-serif} 
+                                .heading1{font-size: xx-large; font-weight: bold} """
+    
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : language
+                        }
+                        
+    keep_only_tags = [dict(attrs={'class':['heading1','headingnext','Normal']})]
+    remove_tags    = [dict(name=['object','link','embed','iframe','base','table','meta'])]
 
     feeds          = [(u'All articles', u'http://economictimes.indiatimes.com/rssfeedsdefault.cms')]
 
@@ -47,11 +47,6 @@ class TheEconomicTimes(BasicNewsRecipe):
         return rurl
 
     def preprocess_html(self, soup):
-        soup.html['xml:lang'] = self.lang
-        soup.html['lang']     = self.lang
-        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
-        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
-        soup.head.insert(0,mlang)
-        soup.head.insert(1,mcharset)
+        for item in soup.findAll(style=True):
+            del item['style']    
         return self.adeify_images(soup)
-