mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Cleanup tags for removal
This commit is contained in:
parent
889f07dcdc
commit
b25718deca
@ -8,13 +8,11 @@ import html5lib
|
|||||||
from lxml import html
|
from lxml import html
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
def classes(classes):
|
def classes(classes):
|
||||||
q = frozenset(classes.split(' '))
|
q = frozenset(classes.split(' '))
|
||||||
return dict(
|
return dict(
|
||||||
attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||||
|
|
||||||
|
|
||||||
class TimesOnline(BasicNewsRecipe):
|
class TimesOnline(BasicNewsRecipe):
|
||||||
title = 'The Times & Sunday Times (UK)'
|
title = 'The Times & Sunday Times (UK)'
|
||||||
__author__ = 'Bobby Steel'
|
__author__ = 'Bobby Steel'
|
||||||
@ -87,12 +85,9 @@ class TimesOnline(BasicNewsRecipe):
|
|||||||
return br
|
return br
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
{'attrs': {'class': lambda x: x and 'is-hidden' in x}},
|
classes('is-hidden Toolbar Tooltip Topics Comments u-hide RelatedLinks ArticlePager Media-caption'),
|
||||||
{'attrs': {'class': lambda x: x and 'Toolbar' in x}},
|
|
||||||
{'attrs': {'class': lambda x: x and 'Tooltip' in x}},
|
|
||||||
{'attrs': {'class': lambda x: x and 'Topics' in x}},
|
|
||||||
{'name': ['object', 'link', 'iframe', 'base', 'meta', 'script']},
|
{'name': ['object', 'link', 'iframe', 'base', 'meta', 'script']},
|
||||||
{'attrs': {'class': ['u-hide', 'RelatedLinks', 'Comments Article-container','ArticlePager','Media-caption']}}]
|
]
|
||||||
|
|
||||||
remove_attributes = ['lang']
|
remove_attributes = ['lang']
|
||||||
keep_only_tags = [{
|
keep_only_tags = [{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user