From 1572ca42a754d7b94f82e1111842faad5635923a Mon Sep 17 00:00:00 2001 From: bobbysteel Date: Wed, 5 Jul 2017 22:42:39 +0100 Subject: [PATCH 1/4] Fix for no articles returned May need further cleaning but at least returns articles now. Fixes https://bugs.launchpad.net/calibre/+bug/1674042 --- recipes/politico.recipe | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/recipes/politico.recipe b/recipes/politico.recipe index db7ebfc3cd..024021a4ae 100644 --- a/recipes/politico.recipe +++ b/recipes/politico.recipe @@ -70,19 +70,8 @@ class Politico(BasicNewsRecipe): del item['style'] return soup - url_pat = re.compile(r' Date: Wed, 5 Jul 2017 22:51:00 +0100 Subject: [PATCH 2/4] Remove site boilerplate --- recipes/politico.recipe | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/recipes/politico.recipe b/recipes/politico.recipe index 024021a4ae..de54e419b0 100644 --- a/recipes/politico.recipe +++ b/recipes/politico.recipe @@ -35,6 +35,10 @@ class Politico(BasicNewsRecipe): html2epub_options = 'publisher="' + publisher + '"\ncomments="' + \ description + '"\ntags="' + category + '"\nlinearize_tables=True' + keep_only_tags = [ + dict(name=['article']), + ] + remove_tags = [ dict(name=['notags', 'embed', 'object', 'link', 'img']), ] From 69a53b6638b1cfce4a3e299794505d89cb704622 Mon Sep 17 00:00:00 2001 From: bobbysteel Date: Wed, 5 Jul 2017 23:03:32 +0100 Subject: [PATCH 3/4] Cleaning further --- recipes/politico.recipe | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/recipes/politico.recipe b/recipes/politico.recipe index de54e419b0..106f3d0ef9 100644 --- a/recipes/politico.recipe +++ b/recipes/politico.recipe @@ -40,7 +40,19 @@ class Politico(BasicNewsRecipe): ] remove_tags = [ - dict(name=['notags', 'embed', 'object', 'link', 'img']), + dict(name=['notags', 'embed', 'aside', 'object', 'link', 'img', 'figure']), + dict( + attrs={'class': lambda x: x and 'story-tools' in x.split()}), + dict( + attrs={'class': lambda x: x and 'story-continued' in x.split()}), + dict( + attrs={'class': lambda x: x and 'story-supplement' in x.split()}), + dict( + attrs={'class': lambda x: x and 'story-share' in x.split()}), + dict( + attrs={'class': lambda x: x and 'suggested' in x.split()}), + dict( + attrs={'class': lambda x: x and 'summary' in x.split()}), ] extra_css = ''' From 7801665cc18fcf6f94963397915fea9bb5492f27 Mon Sep 17 00:00:00 2001 From: bobbysteel Date: Wed, 5 Jul 2017 23:04:46 +0100 Subject: [PATCH 4/4] Update politico.recipe --- recipes/politico.recipe | 2 -- 1 file changed, 2 deletions(-) diff --git a/recipes/politico.recipe b/recipes/politico.recipe index 106f3d0ef9..b0cd4dc43a 100644 --- a/recipes/politico.recipe +++ b/recipes/politico.recipe @@ -51,8 +51,6 @@ class Politico(BasicNewsRecipe): attrs={'class': lambda x: x and 'story-share' in x.split()}), dict( attrs={'class': lambda x: x and 'suggested' in x.split()}), - dict( - attrs={'class': lambda x: x and 'summary' in x.split()}), ] extra_css = '''