mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #8102 (Updated recipe for Wired Magazine)
This commit is contained in:
parent
9ad49466f7
commit
e24150ade3
@ -38,12 +38,12 @@ class Wired(BasicNewsRecipe):
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'post'})]
|
||||
remove_tags_after = dict(name='div', attrs={'class':'tweetmeme_button'})
|
||||
remove_tags = [
|
||||
dict(name=['object','embed','iframe','link'])
|
||||
dict(name=['object','embed','iframe','link','meta','base'])
|
||||
,dict(name='div', attrs={'class':['podcast_storyboard','tweetmeme_button']})
|
||||
,dict(attrs={'id':'ff_bottom_nav'})
|
||||
,dict(name='a',attrs={'href':'http://www.wired.com/app'})
|
||||
]
|
||||
remove_attributes = ['height','width']
|
||||
remove_attributes = ['height','width','lang','border','clear']
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
@ -78,6 +78,8 @@ class Wired(BasicNewsRecipe):
|
||||
divurl = item.find('div',attrs={'class':'feature-header'})
|
||||
if divurl:
|
||||
divdesc = item.find('div',attrs={'class':'feature-text'})
|
||||
url = divurl.a['href']
|
||||
if not divurl.a['href'].startswith('http://www.wired.com'):
|
||||
url = 'http://www.wired.com' + divurl.a['href']
|
||||
title = self.tag_to_string(divurl.a)
|
||||
description = self.tag_to_string(divdesc)
|
||||
@ -127,5 +129,17 @@ class Wired(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('a'):
|
||||
if item.string is not None:
|
||||
tstr = item.string
|
||||
item.replaceWith(tstr)
|
||||
else:
|
||||
item.name='span'
|
||||
for atrs in ['href','target','alt','title','name','id']:
|
||||
if item.has_key(atrs):
|
||||
del item[atrs]
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user