Update The Atlantic

This commit is contained in:
Kovid Goyal 2014-03-25 21:51:43 +05:30
parent 4836eb97ba
commit 5c03567d8e

View File

@ -18,13 +18,14 @@ class TheAtlantic(BasicNewsRecipe):
INDEX = 'http://www.theatlantic.com/magazine/toc/0/' INDEX = 'http://www.theatlantic.com/magazine/toc/0/'
language = 'en' language = 'en'
remove_tags_before = dict(name='div', id='articleHead') keep_only_tags = [{'attrs':{'class':['article', 'articleHead', 'articleText']}}]
remove_tags_after = dict(id='copyright') remove_tags = [dict(attrs={'class':'footer'})]
remove_tags = [dict(id=['header', 'printAds', 'pageControls'])]
no_stylesheets = True no_stylesheets = True
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')] preprocess_regexps = [
(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''),
(re.compile(r'.*<html', re.DOTALL|re.IGNORECASE), lambda m: '<html'),
]
def print_version(self, url): def print_version(self, url):
return url.replace('/archive/', '/print/') return url.replace('/archive/', '/print/')
@ -40,7 +41,7 @@ class TheAtlantic(BasicNewsRecipe):
cover = soup.find('img', src=True, attrs={'class':'cover'}) cover = soup.find('img', src=True, attrs={'class':'cover'})
if cover is not None: if cover is not None:
self.cover_url = re.sub('\s','%20',re.sub('jpg.*','jpg',cover['src'])) self.cover_url = 'http:' + cover['src']
self.log(self.cover_url) self.log(self.cover_url)
feeds = [] feeds = []
@ -69,7 +70,7 @@ class TheAtlantic(BasicNewsRecipe):
if articles: if articles:
feeds.append((section_title, articles)) feeds.append((section_title, articles))
rightContent=soup.find('div', attrs = {'class':'rightContent'}) rightContent=soup.find('div', attrs={'class':'rightContent'})
for module in rightContent.findAll('div', attrs={'class':'module'}): for module in rightContent.findAll('div', attrs={'class':'module'}):
section_title = self.tag_to_string(module.find('h2')) section_title = self.tag_to_string(module.find('h2'))
articles = [] articles = []
@ -92,7 +93,6 @@ class TheAtlantic(BasicNewsRecipe):
if articles: if articles:
feeds.append((section_title, articles)) feeds.append((section_title, articles))
return feeds return feeds
def postprocess_html(self, soup, first): def postprocess_html(self, soup, first):