From 54a01f796138e28e8453a205ff44955f5c4b3f0d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 13 Aug 2010 13:24:51 -0600 Subject: [PATCH] Fix #6486 (Economic Times of India - epub in Nook) --- .../recipes/theeconomictimes_india.recipe | 41 ++++++++----------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/resources/recipes/theeconomictimes_india.recipe b/resources/recipes/theeconomictimes_india.recipe index 8756e08e66..5fef377f6e 100644 --- a/resources/recipes/theeconomictimes_india.recipe +++ b/resources/recipes/theeconomictimes_india.recipe @@ -1,13 +1,10 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' -__copyright__ = '2008-2009, Darko Miletic ' +__copyright__ = '2008-2010, Darko Miletic ' ''' economictimes.indiatimes.com ''' from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag class TheEconomicTimes(BasicNewsRecipe): title = 'The Economic Times India' @@ -21,18 +18,21 @@ class TheEconomicTimes(BasicNewsRecipe): use_embedded_content = False simultaneous_downloads = 1 encoding = 'utf-8' - lang = 'en-IN' - language = 'en_IN' - - - html2lrf_options = [ - '--comment', description - , '--category', category - , '--publisher', publisher - , '--ignore-tables' - ] - - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' + language = 'en_IN' + publication_type = 'newspaper' + masthead_url = 'http://economictimes.indiatimes.com/photo/2676871.cms' + extra_css = """ body{font-family: Arial,Helvetica,sans-serif} + .heading1{font-size: xx-large; font-weight: bold} """ + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + keep_only_tags = [dict(attrs={'class':['heading1','headingnext','Normal']})] + remove_tags = [dict(name=['object','link','embed','iframe','base','table','meta'])] feeds = [(u'All articles', u'http://economictimes.indiatimes.com/rssfeedsdefault.cms')] @@ -47,11 +47,6 @@ class TheEconomicTimes(BasicNewsRecipe): return rurl def preprocess_html(self, soup): - soup.html['xml:lang'] = self.lang - soup.html['lang'] = self.lang - mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) - mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")]) - soup.head.insert(0,mlang) - soup.head.insert(1,mcharset) + for item in soup.findAll(style=True): + del item['style'] return self.adeify_images(soup) -