mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 02:34:06 -04:00
Update The Guardian
This commit is contained in:
parent
2db8cac86a
commit
c82f04c3b1
@ -40,15 +40,22 @@ class Guardian(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(attrs={'class': lambda x:x and '--twitter' in x}),
|
dict(attrs={'class': lambda x:x and '--twitter' in x}),
|
||||||
|
dict(attrs={'class': lambda x:x and 'submeta' in x.split()}),
|
||||||
dict(attrs={'data-component': ['share', 'social']}),
|
dict(attrs={'data-component': ['share', 'social']}),
|
||||||
dict(attrs={'data-link-name': 'block share'}),
|
dict(attrs={'data-link-name': 'block share'}),
|
||||||
dict(attrs={'class': lambda x:x and 'inline-expand-image' in x}),
|
dict(attrs={'class': lambda x:x and 'inline-expand-image' in x}),
|
||||||
dict(attrs={'class': lambda x:x and 'modern-visible' in x.split()}),
|
dict(attrs={'class': lambda x:x and 'modern-visible' in x.split()}),
|
||||||
|
dict(name=['link', 'meta', 'style']),
|
||||||
]
|
]
|
||||||
remove_tags_after = [
|
remove_tags_after = [
|
||||||
dict(attrs={'class': lambda x: x and 'content__article-body' in x.split()}),
|
dict(attrs={'class': lambda x: x and 'content__article-body' in x.split()}),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def preprocess_raw_html(self, raw, url):
|
||||||
|
import html5lib
|
||||||
|
from lxml import html
|
||||||
|
return html.tostring(html5lib.parse(raw, namespaceHTMLElements=False, treebuilder='lxml'), encoding=unicode)
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for img in soup.findAll('img', srcset=True):
|
for img in soup.findAll('img', srcset=True):
|
||||||
img['src'] = img['srcset'].partition(' ')[0]
|
img['src'] = img['srcset'].partition(' ')[0]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user