From 9699bcbb1da404c60c915b2e463d286ee12b8747 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 25 Apr 2019 09:06:26 +0530 Subject: [PATCH] Update Global Times --- recipes/globaltimes.recipe | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/recipes/globaltimes.recipe b/recipes/globaltimes.recipe index 2c1025267d..5b8d9466d8 100644 --- a/recipes/globaltimes.recipe +++ b/recipes/globaltimes.recipe @@ -18,14 +18,25 @@ class GlobalTimes(BasicNewsRecipe): no_stylesheets = True keep_only_tags = [classes('article-title article-source row-content')] + preprocess_regexps = [( + re.compile( + r'(?:<(?:br(?:\s*/)?|/br\s*)>(?:\s|' + '\xA0' + r'| )*){2,9}', re.U | re.I + ), lambda match: '

' + )] + extra_css = ''' + :root { + font-family: Arial, Helvetica, sans-serif; + } + .article-title { - font-family:Arial,Helvetica,sans-serif; - font-weight:bold;font-size:large; + font-weight: bold; + font-size: large; } .article-source, .row-content { - font-family:Arial,Helvetica,sans-serif; font-size:small; } ''' @@ -51,8 +62,8 @@ class GlobalTimes(BasicNewsRecipe): ) } ): - url = a['href'].strip( - ) # Typical url http://www.globaltimes.cn/content/5555555.shtml + # Typical url http://www.globaltimes.cn/content/5555555.shtml + url = a['href'].strip() title = self.tag_to_string(a).strip() if not title: continue @@ -68,3 +79,8 @@ class GlobalTimes(BasicNewsRecipe): if articles: feeds.append((catnames[cat], articles)) return feeds + + def postprocess_html(self, soup, first_fetch): + for p in [p for p in soup('p') if len(p) == 0]: + p.extract() + return soup