Update Global Times

This commit is contained in:
Kovid Goyal 2019-04-25 09:06:26 +05:30
parent b42963af61
commit 9699bcbb1d
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -18,14 +18,25 @@ class GlobalTimes(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
keep_only_tags = [classes('article-title article-source row-content')] keep_only_tags = [classes('article-title article-source row-content')]
preprocess_regexps = [(
re.compile(
r'(?:<(?:br(?:\s*/)?|/br\s*)>(?:\s|'
'\xA0'
r'|&nbsp;)*){2,9}', re.U | re.I
), lambda match: '<p>'
)]
extra_css = ''' extra_css = '''
:root {
font-family: Arial, Helvetica, sans-serif;
}
.article-title { .article-title {
font-family:Arial,Helvetica,sans-serif; font-weight: bold;
font-weight:bold;font-size:large; font-size: large;
} }
.article-source, .row-content { .article-source, .row-content {
font-family:Arial,Helvetica,sans-serif;
font-size:small; font-size:small;
} }
''' '''
@ -51,8 +62,8 @@ class GlobalTimes(BasicNewsRecipe):
) )
} }
): ):
url = a['href'].strip( # Typical url http://www.globaltimes.cn/content/5555555.shtml
) # Typical url http://www.globaltimes.cn/content/5555555.shtml url = a['href'].strip()
title = self.tag_to_string(a).strip() title = self.tag_to_string(a).strip()
if not title: if not title:
continue continue
@ -68,3 +79,8 @@ class GlobalTimes(BasicNewsRecipe):
if articles: if articles:
feeds.append((catnames[cat], articles)) feeds.append((catnames[cat], articles))
return feeds return feeds
def postprocess_html(self, soup, first_fetch):
for p in [p for p in soup('p') if len(p) == 0]:
p.extract()
return soup