mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
Update WSJ
Site appears to be in the process of transitioning to a new react based architecture. Roll eyes.
This commit is contained in:
parent
f0c948eee9
commit
07480ba07c
@ -32,6 +32,19 @@ def classes(classes):
|
|||||||
'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||||
|
|
||||||
|
|
||||||
|
def prefixed_classes(classes):
|
||||||
|
q = frozenset(classes.split(' '))
|
||||||
|
|
||||||
|
def matcher(x):
|
||||||
|
if x:
|
||||||
|
for candidate in frozenset(x.split()):
|
||||||
|
for x in q:
|
||||||
|
if candidate.startswith(x):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
return {'attrs': {'class': matcher}}
|
||||||
|
|
||||||
|
|
||||||
class WSJ(BasicNewsRecipe):
|
class WSJ(BasicNewsRecipe):
|
||||||
|
|
||||||
if needs_subscription:
|
if needs_subscription:
|
||||||
@ -57,8 +70,9 @@ class WSJ(BasicNewsRecipe):
|
|||||||
dict(name='span', itemprop='author', rel='author'),
|
dict(name='span', itemprop='author', rel='author'),
|
||||||
dict(name='article', id='article-contents articleBody'.split()),
|
dict(name='article', id='article-contents articleBody'.split()),
|
||||||
dict(name='div', id='article_story_body ncTitleArea snipper-ad-login'.split()),
|
dict(name='div', id='article_story_body ncTitleArea snipper-ad-login'.split()),
|
||||||
dict(classes('nc-exp-artbody errorNotFound')),
|
classes('nc-exp-artbody errorNotFound'),
|
||||||
dict(attrs={'data-module-zone': 'article_snippet'}),
|
dict(attrs={'data-module-zone': 'article_snippet'}),
|
||||||
|
prefixed_classes('Headline__StyledHeadline- MediaLayout__Layout- ArticleByline__Container- ArticleTimestamp__Timestamp- ArticleBody__Container-'),
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
@ -288,6 +302,6 @@ class WSJ(BasicNewsRecipe):
|
|||||||
return [
|
return [
|
||||||
('Testing', [
|
('Testing', [
|
||||||
{'title': 'Article One',
|
{'title': 'Article One',
|
||||||
'url': 'https://www.wsj.com/articles/gms-plan-to-drop-chevy-cruze-hits-ohio-town-hard-1543314600'}, # noqa
|
'url': 'https://www.wsj.com/articles/egg-prices-jump-as-bird-flu-hits-poultry-flocks-11648900800'}, # noqa
|
||||||
]),
|
]),
|
||||||
]
|
]
|
||||||
|
@ -32,6 +32,19 @@ def classes(classes):
|
|||||||
'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||||
|
|
||||||
|
|
||||||
|
def prefixed_classes(classes):
|
||||||
|
q = frozenset(classes.split(' '))
|
||||||
|
|
||||||
|
def matcher(x):
|
||||||
|
if x:
|
||||||
|
for candidate in frozenset(x.split()):
|
||||||
|
for x in q:
|
||||||
|
if candidate.startswith(x):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
return {'attrs': {'class': matcher}}
|
||||||
|
|
||||||
|
|
||||||
class WSJ(BasicNewsRecipe):
|
class WSJ(BasicNewsRecipe):
|
||||||
|
|
||||||
if needs_subscription:
|
if needs_subscription:
|
||||||
@ -57,8 +70,9 @@ class WSJ(BasicNewsRecipe):
|
|||||||
dict(name='span', itemprop='author', rel='author'),
|
dict(name='span', itemprop='author', rel='author'),
|
||||||
dict(name='article', id='article-contents articleBody'.split()),
|
dict(name='article', id='article-contents articleBody'.split()),
|
||||||
dict(name='div', id='article_story_body ncTitleArea snipper-ad-login'.split()),
|
dict(name='div', id='article_story_body ncTitleArea snipper-ad-login'.split()),
|
||||||
dict(classes('nc-exp-artbody errorNotFound')),
|
classes('nc-exp-artbody errorNotFound'),
|
||||||
dict(attrs={'data-module-zone': 'article_snippet'}),
|
dict(attrs={'data-module-zone': 'article_snippet'}),
|
||||||
|
prefixed_classes('Headline__StyledHeadline- MediaLayout__Layout- ArticleByline__Container- ArticleTimestamp__Timestamp- ArticleBody__Container-'),
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
@ -288,6 +302,6 @@ class WSJ(BasicNewsRecipe):
|
|||||||
return [
|
return [
|
||||||
('Testing', [
|
('Testing', [
|
||||||
{'title': 'Article One',
|
{'title': 'Article One',
|
||||||
'url': 'https://www.wsj.com/articles/gms-plan-to-drop-chevy-cruze-hits-ohio-town-hard-1543314600'}, # noqa
|
'url': 'https://www.wsj.com/articles/egg-prices-jump-as-bird-flu-hits-poultry-flocks-11648900800'}, # noqa
|
||||||
]),
|
]),
|
||||||
]
|
]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user