mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
News download: Convert various HTML 5 tags into <div>
This commit is contained in:
parent
d51bd60c9c
commit
f0881c3d26
@ -700,10 +700,17 @@ class BasicNewsRecipe(Recipe):
|
|||||||
for attr in self.remove_attributes:
|
for attr in self.remove_attributes:
|
||||||
for x in soup.findAll(attrs={attr:True}):
|
for x in soup.findAll(attrs={attr:True}):
|
||||||
del x[attr]
|
del x[attr]
|
||||||
for base in list(soup.findAll(['base', 'iframe'])):
|
for base in list(soup.findAll(['base', 'iframe', 'canvas', 'embed',
|
||||||
|
'command', 'datalist', 'video', 'audio'])):
|
||||||
base.extract()
|
base.extract()
|
||||||
|
|
||||||
ans = self.postprocess_html(soup, first_fetch)
|
ans = self.postprocess_html(soup, first_fetch)
|
||||||
|
|
||||||
|
# Nuke HTML5 tags
|
||||||
|
for x in ans.findAll(['article', 'aside', 'header', 'footer', 'nav',
|
||||||
|
'figcaption', 'figure', 'section']):
|
||||||
|
x.name = 'div'
|
||||||
|
|
||||||
if job_info:
|
if job_info:
|
||||||
url, f, a, feed_len = job_info
|
url, f, a, feed_len = job_info
|
||||||
try:
|
try:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user