Fix CNN recipe

This commit is contained in:
Kovid Goyal 2009-08-27 10:39:08 -06:00
parent fbde0cdb08
commit 5e159653d5

View File

@ -20,7 +20,7 @@ class CNN(BasicNewsRecipe):
preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in [
(r'<head>.*?<title', lambda match : '<head><title'),
(r'</title>.*?</head>', lambda match : '</title></head>'),
(r'<body.*?<\!\-\-Article.*?>', lambda match : ''),
(r'<body.*?<\!\-\-Article.*?>', lambda match : '<body>'),
(r'<\!\-\-Article End\-\->.*?</body>', lambda match : '</body>'),
(r'(</h\d>)<ul>.*?</ul>', lambda match : match.group(1)), # drop story highlights
(r'<h2>(.*?)</h2><h1>(.*?)</h1>', lambda match : '<h1>' + match.group(1) + '</h1><h2>' + match.group(2) + '</h2>'), # sports uses h2 for main title and h1 for subtitle (???) switch these around