mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update Global Times
This commit is contained in:
parent
b42963af61
commit
9699bcbb1d
@ -18,14 +18,25 @@ class GlobalTimes(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
keep_only_tags = [classes('article-title article-source row-content')]
|
keep_only_tags = [classes('article-title article-source row-content')]
|
||||||
|
|
||||||
|
preprocess_regexps = [(
|
||||||
|
re.compile(
|
||||||
|
r'(?:<(?:br(?:\s*/)?|/br\s*)>(?:\s|'
|
||||||
|
'\xA0'
|
||||||
|
r'| )*){2,9}', re.U | re.I
|
||||||
|
), lambda match: '<p>'
|
||||||
|
)]
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
|
:root {
|
||||||
|
font-family: Arial, Helvetica, sans-serif;
|
||||||
|
}
|
||||||
|
|
||||||
.article-title {
|
.article-title {
|
||||||
font-family:Arial,Helvetica,sans-serif;
|
font-weight: bold;
|
||||||
font-weight:bold;font-size:large;
|
font-size: large;
|
||||||
}
|
}
|
||||||
|
|
||||||
.article-source, .row-content {
|
.article-source, .row-content {
|
||||||
font-family:Arial,Helvetica,sans-serif;
|
|
||||||
font-size:small;
|
font-size:small;
|
||||||
}
|
}
|
||||||
'''
|
'''
|
||||||
@ -51,8 +62,8 @@ class GlobalTimes(BasicNewsRecipe):
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
):
|
):
|
||||||
url = a['href'].strip(
|
# Typical url http://www.globaltimes.cn/content/5555555.shtml
|
||||||
) # Typical url http://www.globaltimes.cn/content/5555555.shtml
|
url = a['href'].strip()
|
||||||
title = self.tag_to_string(a).strip()
|
title = self.tag_to_string(a).strip()
|
||||||
if not title:
|
if not title:
|
||||||
continue
|
continue
|
||||||
@ -68,3 +79,8 @@ class GlobalTimes(BasicNewsRecipe):
|
|||||||
if articles:
|
if articles:
|
||||||
feeds.append((catnames[cat], articles))
|
feeds.append((catnames[cat], articles))
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
|
def postprocess_html(self, soup, first_fetch):
|
||||||
|
for p in [p for p in soup('p') if len(p) == 0]:
|
||||||
|
p.extract()
|
||||||
|
return soup
|
||||||
|
Loading…
x
Reference in New Issue
Block a user