From 6a516d57039c19d505111a87eca3e73a180bb8c8 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 29 Apr 2010 08:16:59 -0600 Subject: [PATCH] Improved Reuters --- resources/recipes/reuters.recipe | 33 +++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/resources/recipes/reuters.recipe b/resources/recipes/reuters.recipe index d64099ce8e..8cc638a760 100644 --- a/resources/recipes/reuters.recipe +++ b/resources/recipes/reuters.recipe @@ -7,14 +7,31 @@ class Reuters(BasicNewsRecipe): title = 'Reuters' description = 'Global news' - __author__ = 'Kovid Goyal' + __author__ = 'Kovid Goyal and Sujata Raman' use_embedded_content = False language = 'en' max_articles_per_feed = 10 + no_stylesheets = True + remove_javascript = True - - preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in + extra_css = ''' + body{font-family:arial,helvetica,sans;} + h1{ font-size:larger ; font-weight:bold; } + .byline{color:#006E97;font-size:x-small; font-weight:bold;} + .location{font-size:x-small; font-weight:bold;} + .timestamp{font-size:x-small; } + ''' + + keep_only_tags = [dict(name='div', attrs={'class':'column2 gridPanel grid8'})] + + + remove_tags = [dict(name='div', attrs={'id':['recommendedArticles','relatedNews','relatedVideo','relatedFactboxes']}), + dict(name='p', attrs={'class':['relatedTopics']}), + dict(name='a', attrs={'id':['fullSizeLink']}), + dict(name='div', attrs={'class':['photoNav','relatedTopicButtons','articleComments','gridPanel grid8','footerHalf gridPanel grid1','gridPanel grid2','gridPanel grid3']}),] + + preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in [ ##(r'.*?' , lambda match : ''), (r'
.*?', lambda match : ''), @@ -24,10 +41,10 @@ class Reuters(BasicNewsRecipe): (r'

Share:

.*?', lambda match : ''), (r'
.*?
', lambda match : '
'), ] - ] - + ] + + - feeds = [ ('Top Stories', 'http://feeds.reuters.com/reuters/topNews?format=xml'), ('US News', 'http://feeds.reuters.com/reuters/domesticNews?format=xml'), ('World News', 'http://feeds.reuters.com/reuters/worldNews?format=xml'), @@ -37,6 +54,4 @@ class Reuters(BasicNewsRecipe): ('Technology News', 'http://feeds.reuters.com/reuters/technologyNews?format=xml'), ('Oddly Enough News', 'http://feeds.reuters.com/reuters/oddlyEnoughNews?format=xml') ] - - def print_version(self, url): - return ('http://www.reuters.com/article/id' + url + '?sp=true') +