Fix #8102 (Updated recipe for Wired Magazine)

This commit is contained in:
Kovid Goyal 2010-12-29 11:17:04 -07:00
parent 9ad49466f7
commit e24150ade3

View File

@ -38,12 +38,12 @@ class Wired(BasicNewsRecipe):
keep_only_tags = [dict(name='div', attrs={'class':'post'})]
remove_tags_after = dict(name='div', attrs={'class':'tweetmeme_button'})
remove_tags = [
dict(name=['object','embed','iframe','link'])
dict(name=['object','embed','iframe','link','meta','base'])
,dict(name='div', attrs={'class':['podcast_storyboard','tweetmeme_button']})
,dict(attrs={'id':'ff_bottom_nav'})
,dict(name='a',attrs={'href':'http://www.wired.com/app'})
]
remove_attributes = ['height','width']
remove_attributes = ['height','width','lang','border','clear']
def parse_index(self):
@ -78,6 +78,8 @@ class Wired(BasicNewsRecipe):
divurl = item.find('div',attrs={'class':'feature-header'})
if divurl:
divdesc = item.find('div',attrs={'class':'feature-text'})
url = divurl.a['href']
if not divurl.a['href'].startswith('http://www.wired.com'):
url = 'http://www.wired.com' + divurl.a['href']
title = self.tag_to_string(divurl.a)
description = self.tag_to_string(divdesc)
@ -127,5 +129,17 @@ class Wired(BasicNewsRecipe):
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
if item.string is not None:
tstr = item.string
item.replaceWith(tstr)
else:
item.name='span'
for atrs in ['href','target','alt','title','name','id']:
if item.has_key(atrs):
del item[atrs]
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup