From 08887c9a2713a08f6ea4e9b539d1f89d4211a19f Mon Sep 17 00:00:00 2001 From: Tom Scholl Date: Mon, 28 Mar 2011 21:17:32 +0000 Subject: [PATCH 1/5] Minor fixes to news recipes idnes and smith --- recipes/idnes.recipe | 2 +- recipes/smith.recipe | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/recipes/idnes.recipe b/recipes/idnes.recipe index 0bd4de2327..238c90694c 100644 --- a/recipes/idnes.recipe +++ b/recipes/idnes.recipe @@ -34,7 +34,7 @@ class iHeuteRecipe(BasicNewsRecipe): dict(name='table', attrs={'class':['video-16ku9']})] remove_tags_after = [dict(name='div',attrs={'id':['related','related2']})] - keep_only_tags = [dict(name='div', attrs={'class':['art-full adwords-text','dil-day']}) + keep_only_tags = [dict(name='div', attrs={'class':['art-full adwords-text','dil-day','art-full']}) ,dict(name='table',attrs={'class':['kemel-box']})] def print_version(self, url): diff --git a/recipes/smith.recipe b/recipes/smith.recipe index 98f7d98517..06075b8d1b 100644 --- a/recipes/smith.recipe +++ b/recipes/smith.recipe @@ -7,6 +7,7 @@ class SmithsonianMagazine(BasicNewsRecipe): __author__ = 'Krittika Goyal' oldest_article = 31#days max_articles_per_feed = 50 + use_embedded_content = False #encoding = 'latin1' recursions = 1 match_regexps = ['&page=[2-9]$'] From f3a522ff5594ba9d04bf103dd838f9651868caba Mon Sep 17 00:00:00 2001 From: Tom Scholl Date: Fri, 1 Apr 2011 21:51:59 +0000 Subject: [PATCH 2/5] Removed spammy ticket adverts from guardian recipe --- recipes/guardian.recipe | 1 + 1 file changed, 1 insertion(+) diff --git a/recipes/guardian.recipe b/recipes/guardian.recipe index a4329d279e..6211997b06 100644 --- a/recipes/guardian.recipe +++ b/recipes/guardian.recipe @@ -36,6 +36,7 @@ class Guardian(BasicNewsRecipe): remove_tags = [ dict(name='div', attrs={'class':["video-content","videos-third-column"]}), dict(name='div', attrs={'id':["article-toolbox","subscribe-feeds",]}), + dict(name='div', attrs={'class':["guardian-tickets promo-component",]}), dict(name='ul', attrs={'class':["pagination"]}), dict(name='ul', attrs={'id':["content-actions"]}), #dict(name='img'), From 778ed36afaca1a687e7918ab2293988dc0047860 Mon Sep 17 00:00:00 2001 From: Tom Scholl Date: Sat, 2 Apr 2011 16:23:40 +0000 Subject: [PATCH 3/5] Fixes for International Herald Tribune recipe --- recipes/iht.recipe | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/recipes/iht.recipe b/recipes/iht.recipe index 040ff83927..85a87ccba4 100644 --- a/recipes/iht.recipe +++ b/recipes/iht.recipe @@ -15,10 +15,10 @@ class InternationalHeraldTribune(BasicNewsRecipe): language = 'en' oldest_article = 1 - max_articles_per_feed = 10 + max_articles_per_feed = 30 no_stylesheets = True - remove_tags = [dict(name='div', attrs={'class':'footer'}), + remove_tags = [dict(name='div', attrs={'class':['footer','header']}), dict(name=['form'])] preprocess_regexps = [ (re.compile(r'