Fix #8078 (smithsonian recipe not working)

This commit is contained in:
Kovid Goyal 2010-12-31 19:34:42 -07:00
parent 883ede20e8
commit de06b05355

View File

@ -17,8 +17,8 @@ class SmithsonianMagazine(BasicNewsRecipe):
remove_tags = [ remove_tags = [
dict(name='iframe'), dict(name='iframe'),
dict(name='div', attrs={'class':'article_sidebar_border'}), dict(name='div', attrs={'class':'article_sidebar_border'}),
dict(name='div', attrs={'id':['article_sidebar_border', 'most-popular_large']}), dict(name='div', attrs={'id':['article_sidebar_border', 'most-popular_large', 'most-popular-body_large']}),
#dict(name='ul', attrs={'class':'article-tools'}), ##dict(name='ul', attrs={'class':'article-tools'}),
dict(name='ul', attrs={'class':'cat-breadcrumb col three last'}), dict(name='ul', attrs={'class':'cat-breadcrumb col three last'}),
] ]
@ -37,16 +37,16 @@ class SmithsonianMagazine(BasicNewsRecipe):
] ]
def preprocess_html(self, soup): def preprocess_html(self, soup):
story = soup.find(name='div', attrs={'id':'article-left'}) story = soup.find(name='div', attrs={'id':'article-body'})
#td = heading.findParent(name='td') ##td = heading.findParent(name='td')
#td.extract() ##td.extract()
soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>') soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
body = soup.find(name='body') body = soup.find(name='body')
body.insert(0, story) body.insert(0, story)
return soup return soup
def postprocess_html(self, soup, first): #def postprocess_html(self, soup, first):
for p in soup.findAll(id='articlePaginationWrapper'): p.extract() #for p in soup.findAll(id='articlePaginationWrapper'): p.extract()
if not first: #if not first:
for div in soup.findAll(id='article-head'): div.extract() #for div in soup.findAll(id='article-head'): div.extract()
return soup #return soup