Some performance improvements for The Atlantic recipe

This commit is contained in:
Kovid Goyal 2014-08-17 09:28:09 +05:30
parent af642adeb9
commit 1027cf8a1e

View File

@ -1,5 +1,5 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
''' '''
@ -21,14 +21,15 @@ class TheAtlantic(BasicNewsRecipe):
{'attrs':{'class':['article-header', 'article-body', 'article-magazine']}}, {'attrs':{'class':['article-header', 'article-body', 'article-magazine']}},
] ]
remove_tags = [ remove_tags = [
{'name': ['meta', 'link']}, {'name': ['meta', 'link', 'noscript']},
{'attrs':{'class':['offset-wrapper']}}, {'attrs':{'class':['offset-wrapper', 'ad-boxfeatures-wrapper']}},
{'attrs':{'class':lambda x: x and 'article-tools' in x}}, {'attrs':{'class':lambda x: x and 'article-tools' in x}},
{'src':lambda x:x and 'spotxchange.com' in x},
] ]
no_stylesheets = True no_stylesheets = True
preprocess_regexps = [ preprocess_regexps = [
(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''), (re.compile(r'<script\b.+?</script>', re.DOTALL), lambda m: ''),
(re.compile(r'.*<html', re.DOTALL|re.IGNORECASE), lambda m: '<html'), (re.compile(r'^.*<html', re.DOTALL|re.IGNORECASE), lambda m: '<html'),
] ]
def print_version(self, url): def print_version(self, url):