diff --git a/resources/recipes/businessworldin.recipe b/resources/recipes/businessworldin.recipe index 99d56e850f..e44682d7e1 100644 --- a/resources/recipes/businessworldin.recipe +++ b/resources/recipes/businessworldin.recipe @@ -1,7 +1,5 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' +__copyright__ = '2009-2010, Darko Miletic ' ''' www.businessworld.in ''' @@ -22,7 +20,11 @@ class BusinessWorldMagazine(BasicNewsRecipe): use_embedded_content = False encoding = 'utf-8' language = 'en_IN' - + extra_css = """ + img{display: block; margin-bottom: 0.5em} + body{font-family: Arial,Helvetica,sans-serif} + h2{color: gray; display: block} + """ conversion_options = { 'comment' : description @@ -42,7 +44,26 @@ class BusinessWorldMagazine(BasicNewsRecipe): articles = [] linklist = [] soup = self.index_to_soup(self.INDEX) - + + tough = soup.find('div', attrs={'id':'tough'}) + if tough: + for item in tough.findAll('h1'): + description = '' + title_prefix = '' + feed_link = item.find('a') + if feed_link and feed_link.has_key('href'): + url = self.ROOT + feed_link['href'] + if not self.is_in_list(linklist,url): + title = title_prefix + self.tag_to_string(feed_link) + date = strftime(self.timefmt) + articles.append({ + 'title' :title + ,'date' :date + ,'url' :url + ,'description':description + }) + linklist.append(url) + for item in soup.findAll('div', attrs={'class':'nametitle'}): description = '' title_prefix = '' @@ -62,8 +83,8 @@ class BusinessWorldMagazine(BasicNewsRecipe): return [(soup.head.title.string, articles)] - keep_only_tags = [dict(name='div', attrs={'id':['register-panel','printwrapper']})] - remove_tags = [dict(name=['object','link'])] + keep_only_tags = [dict(name='div', attrs={'id':'printwrapper'})] + remove_tags = [dict(name=['object','link','meta','base','iframe','link','table'])] def print_version(self, url): return url.replace('/bw/','/bw/storyContent/')