From 986ccd6a30486a42308cc267816c04c76613910c Mon Sep 17 00:00:00 2001 From: Sergiy Kibrik Date: Fri, 25 May 2018 23:24:41 +0300 Subject: [PATCH] lwn_weekly: fix security section articles parsing As security section has no URLs in article titles, findNext() boldly returns whatever next link is encounered after the anchor. This leads to downloading and including in generated document of heavy CVE reports, as links to them usually placed after the article title. Instead we'd better search under anchor tag only, this way we'll filter useful articles' links. --- recipes/lwn_weekly.recipe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/lwn_weekly.recipe b/recipes/lwn_weekly.recipe index 441dc0ec0d..73acd60a36 100644 --- a/recipes/lwn_weekly.recipe +++ b/recipes/lwn_weekly.recipe @@ -114,7 +114,7 @@ class WeeklyLWN(BasicNewsRecipe): # Most articles have anchors in their titles, *except* the # security vulnerabilities - article_anchor = curr.findNext( + article_anchor = curr.find( name='a', attrs={'href': re.compile('^/Articles/')}) if article_anchor: