From ffee7f8da1f250fb7b51d6eb1e080385c6618f6e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 8 Feb 2009 22:29:39 -0800 Subject: [PATCH] New recipe for DNA: India by Kovid Goyal. Also updated Outlook India recipe to work with EPUB output --- src/calibre/web/feeds/recipes/__init__.py | 2 +- src/calibre/web/feeds/recipes/recipe_dna.py | 41 +++++++++++++++++++ .../web/feeds/recipes/recipe_outlook_india.py | 6 ++- 3 files changed, 46 insertions(+), 3 deletions(-) create mode 100644 src/calibre/web/feeds/recipes/recipe_dna.py diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index 623d9df6b7..60ae0761cf 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -27,7 +27,7 @@ recipe_modules = ['recipe_' + r for r in ( 'shacknews', 'teleread', 'granma', 'juventudrebelde', 'juventudrebelde_english', 'la_tercera', 'el_mercurio_chile', 'la_cuarta', 'lanacion_chile', 'la_segunda', 'jb_online', 'estadao', 'o_globo', 'vijesti', 'elmundo', 'the_oz', - 'honoluluadvertiser', 'starbulletin', 'exiled', 'indy_star', + 'honoluluadvertiser', 'starbulletin', 'exiled', 'indy_star', 'dna', )] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/recipe_dna.py b/src/calibre/web/feeds/recipes/recipe_dna.py new file mode 100644 index 0000000000..6ec9ba4665 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_dna.py @@ -0,0 +1,41 @@ +''' +dnaindia.com +''' +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class DNAIndia(BasicNewsRecipe): + + title = 'DNA India' + description = 'Mumbai news, India news, World news, breaking news' + __author__ = 'Kovid Goyal' + language = _('English') + + feeds = [ + ('Top News', 'http://www.dnaindia.com/syndication/rss_topnews.xml'), + ('Popular News', 'http://www.dnaindia.com/syndication/rss_popular.xml'), + ('Recent Columns', 'http://www.dnaindia.com/syndication/rss_column.xml'), + ('Mumbai', 'http://www.dnaindia.com/syndication/rss,catid-1.xml'), + ('India', 'http://www.dnaindia.com/syndication/rss,catid-2.xml'), + ('World', 'http://www.dnaindia.com/syndication/rss,catid-9.xml'), + ('Money', 'http://www.dnaindia.com/syndication/rss,catid-4.xml'), + ('Sports', 'http://www.dnaindia.com/syndication/rss,catid-6.xml'), + ('After Hours', 'http://www.dnaindia.com/syndication/rss,catid-7.xml'), + ('Digital Life', 'http://www.dnaindia.com/syndication/rss,catid-1089741.xml'), + ] + remove_tags = [{'id':'footer'}, {'class':['bottom', 'categoryHead']}] + + def print_version(self, url): + match = re.search(r'newsid=(\d+)', url) + if not match: + return url + return 'http://www.dnaindia.com/dnaprint.asp?newsid='+match.group(1) + + def postprocess_html(self, soup, first_fetch): + for t in soup.findAll(['table', 'tr', 'td']): + t.name = 'div' + + a = soup.find(href='http://www.3dsyndication.com/') + if a is not None: + a.parent.extract() + return soup \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_outlook_india.py b/src/calibre/web/feeds/recipes/recipe_outlook_india.py index e84a53f04c..8576b41f39 100644 --- a/src/calibre/web/feeds/recipes/recipe_outlook_india.py +++ b/src/calibre/web/feeds/recipes/recipe_outlook_india.py @@ -13,11 +13,10 @@ class OutlookIndia(BasicNewsRecipe): title = 'Outlook India' __author__ = 'Kovid Goyal' - description = 'Weekly news magazine focussed on India.' + description = 'Weekly news magazine focused on India.' language = _('English') recursions = 1 match_regexp = r'full.asp.*&pn=\d+' - html2lrf_options = ['--ignore-tables'] remove_tags = [ dict(name='img', src="images/space.gif"), @@ -81,5 +80,8 @@ class OutlookIndia(BasicNewsRecipe): bad.append(table) for b in bad: b.extract() + soup = soup.findAll('html')[0] + for t in soup.findAll(['table', 'tr', 'td']): + t.name = 'div' return soup