From 2dea2852a73dd6075e0227bfca43bf579f9d8daa Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Thu, 25 Aug 2011 22:03:29 -0600
Subject: [PATCH] Improve Reuters
---
recipes/reuters.recipe | 35 +++++++++++++++++------------------
1 file changed, 17 insertions(+), 18 deletions(-)
diff --git a/recipes/reuters.recipe b/recipes/reuters.recipe
index 8cc638a760..313e6b8607 100644
--- a/recipes/reuters.recipe
+++ b/recipes/reuters.recipe
@@ -16,23 +16,25 @@ class Reuters(BasicNewsRecipe):
remove_javascript = True
extra_css = '''
- body{font-family:arial,helvetica,sans;}
+ body{font-family:arial,helvetica,sans;}
h1{ font-size:larger ; font-weight:bold; }
.byline{color:#006E97;font-size:x-small; font-weight:bold;}
.location{font-size:x-small; font-weight:bold;}
.timestamp{font-size:x-small; }
- '''
+ '''
keep_only_tags = [dict(name='div', attrs={'class':'column2 gridPanel grid8'})]
-
remove_tags = [dict(name='div', attrs={'id':['recommendedArticles','relatedNews','relatedVideo','relatedFactboxes']}),
dict(name='p', attrs={'class':['relatedTopics']}),
- dict(name='a', attrs={'id':['fullSizeLink']}),
- dict(name='div', attrs={'class':['photoNav','relatedTopicButtons','articleComments','gridPanel grid8','footerHalf gridPanel grid1','gridPanel grid2','gridPanel grid3']}),]
+ dict(name='a', attrs={'id':['fullSizeLink']}),
+ dict(name='div', attrs={'class':['photoNav','relatedTopicButtons','articleComments','gridPanel grid8','footerHalf gridPanel grid1','gridPanel grid2','gridPanel grid3']}),
+ # Remove the Tweet, Share this, Email and Print links below article title too!
+ dict(name='div', attrs={'class':['columnRight']}),
+ ]
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
-[
+ [
##(r'.*?' , lambda match : ''),
(r'
.*?', lambda match : ''),
(r'
Share:
.*?', lambda match : '