Resolve internal links for The Economist

This commit is contained in:
Kovid Goyal 2015-04-11 18:46:16 +05:30
parent 495293b3f5
commit 1a2814494e
2 changed files with 12 additions and 0 deletions

View File

@ -41,6 +41,7 @@ class Economist(BasicNewsRecipe):
}
'''
oldest_article = 7.0
resolve_internal_links = True
remove_tags = [
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
dict(attrs={'class':['dblClkTrk', 'ec-article-info',
@ -150,3 +151,8 @@ class Economist(BasicNewsRecipe):
div.insert(2, img)
table.replaceWith(div)
return soup
def canonicalize_internal_url(self, url, is_link=True):
if url.endswith('/print'):
url = url.rpartition('/')[0]
return BasicNewsRecipe.canonicalize_internal_url(self, url, is_link=is_link)

View File

@ -41,6 +41,7 @@ class Economist(BasicNewsRecipe):
}
'''
oldest_article = 7.0
resolve_internal_links = True
remove_tags = [
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
dict(attrs={'class':['dblClkTrk', 'ec-article-info',
@ -149,3 +150,8 @@ class Economist(BasicNewsRecipe):
div.insert(2, img)
table.replaceWith(div)
return soup
def canonicalize_internal_url(self, url, is_link=True):
if url.endswith('/print'):
url = url.rpartition('/')[0]
return BasicNewsRecipe.canonicalize_internal_url(self, url, is_link=is_link)