diff --git a/src/libprs500/web/feeds/recipes/__init__.py b/src/libprs500/web/feeds/recipes/__init__.py index 44f9e8f3f6..650c1bd158 100644 --- a/src/libprs500/web/feeds/recipes/__init__.py +++ b/src/libprs500/web/feeds/recipes/__init__.py @@ -18,7 +18,7 @@ Builtin recipes. ''' recipes = ['newsweek', 'atlantic', 'economist', 'dilbert', 'portfolio', - 'nytimes', 'usatoday'] + 'nytimes', 'usatoday', 'outlook_india'] import re, time from libprs500.web.feeds.news import BasicNewsRecipe, CustomIndexRecipe diff --git a/src/libprs500/web/feeds/recipes/outlook_india.py b/src/libprs500/web/feeds/recipes/outlook_india.py new file mode 100644 index 0000000000..01e509fdf6 --- /dev/null +++ b/src/libprs500/web/feeds/recipes/outlook_india.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python + +## Copyright (C) 2008 Kovid Goyal kovid@kovidgoyal.net +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License along +## with this program; if not, write to the Free Software Foundation, Inc., +## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +''' +outlookindia.com +''' + +from libprs500.web.feeds.news import BasicNewsRecipe +import re + +class OutlookIndia(BasicNewsRecipe): + + title = 'Outlook India' + recursions = 1 + match_regexp = r'full.asp.*&pn=\d+' + html2lrf_options = ['--ignore-tables'] + + remove_tags = [ + dict(name='img', src="images/space.gif"), + dict(name=lambda tag: tag.name == 'tr' and tag.find('img', src="image/tl.gif") is not None ), + dict(name=lambda tag: tag.name == 'table' and tag.find('font', attrs={'class':'fontemailfeed'}) is not None), + ] + + preprocess_regexps = [ + (re.compile(r'