News download: Convert various HTML 5 tags into <div>

This commit is contained in:
Kovid Goyal 2011-01-12 18:59:25 -07:00
parent d51bd60c9c
commit f0881c3d26

View File

@ -700,10 +700,17 @@ class BasicNewsRecipe(Recipe):
for attr in self.remove_attributes: for attr in self.remove_attributes:
for x in soup.findAll(attrs={attr:True}): for x in soup.findAll(attrs={attr:True}):
del x[attr] del x[attr]
for base in list(soup.findAll(['base', 'iframe'])): for base in list(soup.findAll(['base', 'iframe', 'canvas', 'embed',
'command', 'datalist', 'video', 'audio'])):
base.extract() base.extract()
ans = self.postprocess_html(soup, first_fetch) ans = self.postprocess_html(soup, first_fetch)
# Nuke HTML5 tags
for x in ans.findAll(['article', 'aside', 'header', 'footer', 'nav',
'figcaption', 'figure', 'section']):
x.name = 'div'
if job_info: if job_info:
url, f, a, feed_len = job_info url, f, a, feed_len = job_info
try: try: