mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
update science journals
fix paragraph tags
This commit is contained in:
parent
f37f73d78d
commit
1c3957c486
@ -21,26 +21,30 @@ class scienceadv(BasicNewsRecipe):
|
|||||||
no_javascript = True
|
no_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_attributes = ['style', 'height', 'width']
|
remove_attributes = ['style', 'height', 'width']
|
||||||
masthead_url = 'https://www.science.org/pb-assets/images/logos/sciadv-logo-1620488349693.svg'
|
masthead_url = (
|
||||||
|
'https://www.science.org/pb-assets/images/logos/sciadv-logo-1620488349693.svg'
|
||||||
|
)
|
||||||
language = 'en'
|
language = 'en'
|
||||||
simultaneous_downloads = 1
|
simultaneous_downloads = 1
|
||||||
browser_type = 'webengine'
|
browser_type = 'webengine'
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = """
|
||||||
.news-article__figure__caption, .calibre-nuked-tag-figcaption, .card-related {font-size:small;}
|
.news-article__figure__caption, .calibre-nuked-tag-figcaption, .card-related {font-size:small;}
|
||||||
.core-self-citation, .meta-panel__left-content, .news-article__hero__top-meta {font-size:small;}
|
.core-self-citation, .meta-panel__left-content, .news-article__hero__top-meta {font-size:small;}
|
||||||
.contributors, .news-article__hero__bottom-meta, #bibliography, #elettersSection {font-size:small;}
|
.contributors, .news-article__hero__bottom-meta, #bibliography, #elettersSection {font-size:small;}
|
||||||
img {display:block; margin:0 auto;}
|
img {display:block; margin:0 auto;}
|
||||||
.core-lede {font-style:italic; color:#202020;}
|
.core-lede {font-style:italic; color:#202020;}
|
||||||
'''
|
"""
|
||||||
|
|
||||||
ignore_duplicate_articles = {'url'}
|
ignore_duplicate_articles = {'url'}
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
classes('meta-panel__left-content news-article__hero__info news-article__hero__figure bodySection'),
|
classes(
|
||||||
|
'meta-panel__left-content news-article__hero__info news-article__hero__figure bodySection'
|
||||||
|
),
|
||||||
dict(name='h1', attrs={'property': 'name'}),
|
dict(name='h1', attrs={'property': 'name'}),
|
||||||
dict(name='div', **classes('core-lede contributors core-self-citation')),
|
dict(name='div', **classes('core-lede contributors core-self-citation')),
|
||||||
dict(attrs={'data-core-wrapper':'content'})
|
dict(attrs={'data-core-wrapper': 'content'}),
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
@ -51,13 +55,13 @@ class scienceadv(BasicNewsRecipe):
|
|||||||
'issue': {
|
'issue': {
|
||||||
'short': 'Enter the Issue Number you want to download\n(Vol/Issue format)',
|
'short': 'Enter the Issue Number you want to download\n(Vol/Issue format)',
|
||||||
'long': 'For example, 385/6710',
|
'long': 'For example, 385/6710',
|
||||||
'default': 'current'
|
'default': 'current',
|
||||||
},
|
},
|
||||||
'res': {
|
'res': {
|
||||||
'short': 'For hi-res images, select a resolution from the\nfollowing options: 800, 1000, 1200 or 1500',
|
'short': 'For hi-res images, select a resolution from the\nfollowing options: 800, 1000, 1200 or 1500',
|
||||||
'long': 'This is useful for non e-ink devices, and for a lower file size\nthan the default, use 400 or 300.',
|
'long': 'This is useful for non e-ink devices, and for a lower file size\nthan the default, use 400 or 300.',
|
||||||
'default': '600'
|
'default': '600',
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
@ -68,6 +72,8 @@ class scienceadv(BasicNewsRecipe):
|
|||||||
if w and isinstance(w, str):
|
if w and isinstance(w, str):
|
||||||
res = '/cdn-cgi/image/width=' + w
|
res = '/cdn-cgi/image/width=' + w
|
||||||
img['src'] = absurl(res + img['src'])
|
img['src'] = absurl(res + img['src'])
|
||||||
|
for div in soup.findAll('div', attrs={'role': 'paragraph'}):
|
||||||
|
div.name = 'p'
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def postprocess_html(self, soup, first_fetch):
|
def postprocess_html(self, soup, first_fetch):
|
||||||
|
@ -19,26 +19,30 @@ class science(BasicNewsRecipe):
|
|||||||
no_javascript = True
|
no_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_attributes = ['style', 'height', 'width']
|
remove_attributes = ['style', 'height', 'width']
|
||||||
masthead_url = 'https://www.science.org/pb-assets/images/styleguide/logo-1672180580750.svg'
|
masthead_url = (
|
||||||
|
'https://www.science.org/pb-assets/images/styleguide/logo-1672180580750.svg'
|
||||||
|
)
|
||||||
language = 'en'
|
language = 'en'
|
||||||
simultaneous_downloads = 1
|
simultaneous_downloads = 1
|
||||||
browser_type = 'webengine'
|
browser_type = 'webengine'
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = """
|
||||||
.news-article__figure__caption, .calibre-nuked-tag-figcaption, .card-related {font-size:small;}
|
.news-article__figure__caption, .calibre-nuked-tag-figcaption, .card-related {font-size:small;}
|
||||||
.core-self-citation, .meta-panel__left-content, .news-article__hero__top-meta {font-size:small;}
|
.core-self-citation, .meta-panel__left-content, .news-article__hero__top-meta {font-size:small;}
|
||||||
.contributors, .news-article__hero__bottom-meta, #bibliography, #elettersSection {font-size:small;}
|
.contributors, .news-article__hero__bottom-meta, #bibliography, #elettersSection {font-size:small;}
|
||||||
img {display:block; margin:0 auto;}
|
img {display:block; margin:0 auto;}
|
||||||
.core-lede {font-style:italic; color:#202020;}
|
.core-lede {font-style:italic; color:#202020;}
|
||||||
'''
|
"""
|
||||||
|
|
||||||
ignore_duplicate_articles = {'url'}
|
ignore_duplicate_articles = {'url'}
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
classes('meta-panel__left-content news-article__hero__info news-article__hero__figure bodySection'),
|
classes(
|
||||||
|
'meta-panel__left-content news-article__hero__info news-article__hero__figure bodySection'
|
||||||
|
),
|
||||||
dict(name='h1', attrs={'property': 'name'}),
|
dict(name='h1', attrs={'property': 'name'}),
|
||||||
dict(name='div', **classes('core-lede contributors core-self-citation')),
|
dict(name='div', **classes('core-lede contributors core-self-citation')),
|
||||||
dict(attrs={'data-core-wrapper':'content'})
|
dict(attrs={'data-core-wrapper': 'content'}),
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
@ -49,13 +53,13 @@ class science(BasicNewsRecipe):
|
|||||||
'issue': {
|
'issue': {
|
||||||
'short': 'Enter the Issue Number you want to download\n(Vol/Issue format)',
|
'short': 'Enter the Issue Number you want to download\n(Vol/Issue format)',
|
||||||
'long': 'For example, 385/6710',
|
'long': 'For example, 385/6710',
|
||||||
'default': 'current'
|
'default': 'current',
|
||||||
},
|
},
|
||||||
'res': {
|
'res': {
|
||||||
'short': 'For hi-res images, select a resolution from the\nfollowing options: 800, 1000, 1200 or 1500',
|
'short': 'For hi-res images, select a resolution from the\nfollowing options: 800, 1000, 1200 or 1500',
|
||||||
'long': 'This is useful for non e-ink devices, and for a lower file size\nthan the default, use 400 or 300.',
|
'long': 'This is useful for non e-ink devices, and for a lower file size\nthan the default, use 400 or 300.',
|
||||||
'default': '600'
|
'default': '600',
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
@ -66,6 +70,8 @@ class science(BasicNewsRecipe):
|
|||||||
if w and isinstance(w, str):
|
if w and isinstance(w, str):
|
||||||
res = '/cdn-cgi/image/width=' + w
|
res = '/cdn-cgi/image/width=' + w
|
||||||
img['src'] = absurl(res + img['src'])
|
img['src'] = absurl(res + img['src'])
|
||||||
|
for div in soup.findAll('div', attrs={'role': 'paragraph'}):
|
||||||
|
div.name = 'p'
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def postprocess_html(self, soup, first_fetch):
|
def postprocess_html(self, soup, first_fetch):
|
||||||
|
@ -21,26 +21,30 @@ class scienceadv(BasicNewsRecipe):
|
|||||||
no_javascript = True
|
no_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_attributes = ['style', 'height', 'width']
|
remove_attributes = ['style', 'height', 'width']
|
||||||
masthead_url = 'https://www.science.org/pb-assets/images/logos/sciimmunol-logo-1620488349717.svg'
|
masthead_url = (
|
||||||
|
'https://www.science.org/pb-assets/images/logos/sciimmunol-logo-1620488349717.svg'
|
||||||
|
)
|
||||||
language = 'en'
|
language = 'en'
|
||||||
simultaneous_downloads = 1
|
simultaneous_downloads = 1
|
||||||
browser_type = 'webengine'
|
browser_type = 'webengine'
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = """
|
||||||
.news-article__figure__caption, .calibre-nuked-tag-figcaption, .card-related {font-size:small;}
|
.news-article__figure__caption, .calibre-nuked-tag-figcaption, .card-related {font-size:small;}
|
||||||
.core-self-citation, .meta-panel__left-content, .news-article__hero__top-meta {font-size:small;}
|
.core-self-citation, .meta-panel__left-content, .news-article__hero__top-meta {font-size:small;}
|
||||||
.contributors, .news-article__hero__bottom-meta, #bibliography, #elettersSection {font-size:small;}
|
.contributors, .news-article__hero__bottom-meta, #bibliography, #elettersSection {font-size:small;}
|
||||||
img {display:block; margin:0 auto;}
|
img {display:block; margin:0 auto;}
|
||||||
.core-lede {font-style:italic; color:#202020;}
|
.core-lede {font-style:italic; color:#202020;}
|
||||||
'''
|
"""
|
||||||
|
|
||||||
ignore_duplicate_articles = {'url'}
|
ignore_duplicate_articles = {'url'}
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
classes('meta-panel__left-content news-article__hero__info news-article__hero__figure bodySection'),
|
classes(
|
||||||
|
'meta-panel__left-content news-article__hero__info news-article__hero__figure bodySection'
|
||||||
|
),
|
||||||
dict(name='h1', attrs={'property': 'name'}),
|
dict(name='h1', attrs={'property': 'name'}),
|
||||||
dict(name='div', **classes('core-lede contributors core-self-citation')),
|
dict(name='div', **classes('core-lede contributors core-self-citation')),
|
||||||
dict(attrs={'data-core-wrapper':'content'})
|
dict(attrs={'data-core-wrapper': 'content'}),
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
@ -51,13 +55,13 @@ class scienceadv(BasicNewsRecipe):
|
|||||||
'issue': {
|
'issue': {
|
||||||
'short': 'Enter the Issue Number you want to download\n(Vol/Issue format)',
|
'short': 'Enter the Issue Number you want to download\n(Vol/Issue format)',
|
||||||
'long': 'For example, 385/6710',
|
'long': 'For example, 385/6710',
|
||||||
'default': 'current'
|
'default': 'current',
|
||||||
},
|
},
|
||||||
'res': {
|
'res': {
|
||||||
'short': 'For hi-res images, select a resolution from the\nfollowing options: 800, 1000, 1200 or 1500',
|
'short': 'For hi-res images, select a resolution from the\nfollowing options: 800, 1000, 1200 or 1500',
|
||||||
'long': 'This is useful for non e-ink devices, and for a lower file size\nthan the default, use 400 or 300.',
|
'long': 'This is useful for non e-ink devices, and for a lower file size\nthan the default, use 400 or 300.',
|
||||||
'default': '600'
|
'default': '600',
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
@ -68,6 +72,8 @@ class scienceadv(BasicNewsRecipe):
|
|||||||
if w and isinstance(w, str):
|
if w and isinstance(w, str):
|
||||||
res = '/cdn-cgi/image/width=' + w
|
res = '/cdn-cgi/image/width=' + w
|
||||||
img['src'] = absurl(res + img['src'])
|
img['src'] = absurl(res + img['src'])
|
||||||
|
for div in soup.findAll('div', attrs={'role': 'paragraph'}):
|
||||||
|
div.name = 'p'
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def postprocess_html(self, soup, first_fetch):
|
def postprocess_html(self, soup, first_fetch):
|
||||||
|
@ -26,21 +26,23 @@ class scienceadv(BasicNewsRecipe):
|
|||||||
simultaneous_downloads = 1
|
simultaneous_downloads = 1
|
||||||
browser_type = 'webengine'
|
browser_type = 'webengine'
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = """
|
||||||
.news-article__figure__caption, .calibre-nuked-tag-figcaption, .card-related {font-size:small;}
|
.news-article__figure__caption, .calibre-nuked-tag-figcaption, .card-related {font-size:small;}
|
||||||
.core-self-citation, .meta-panel__left-content, .news-article__hero__top-meta {font-size:small;}
|
.core-self-citation, .meta-panel__left-content, .news-article__hero__top-meta {font-size:small;}
|
||||||
.contributors, .news-article__hero__bottom-meta, #bibliography, #elettersSection {font-size:small;}
|
.contributors, .news-article__hero__bottom-meta, #bibliography, #elettersSection {font-size:small;}
|
||||||
img {display:block; margin:0 auto;}
|
img {display:block; margin:0 auto;}
|
||||||
.core-lede {font-style:italic; color:#202020;}
|
.core-lede {font-style:italic; color:#202020;}
|
||||||
'''
|
"""
|
||||||
|
|
||||||
ignore_duplicate_articles = {'url'}
|
ignore_duplicate_articles = {'url'}
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
classes('meta-panel__left-content news-article__hero__info news-article__hero__figure bodySection'),
|
classes(
|
||||||
|
'meta-panel__left-content news-article__hero__info news-article__hero__figure bodySection'
|
||||||
|
),
|
||||||
dict(name='h1', attrs={'property': 'name'}),
|
dict(name='h1', attrs={'property': 'name'}),
|
||||||
dict(name='div', **classes('core-lede contributors core-self-citation')),
|
dict(name='div', **classes('core-lede contributors core-self-citation')),
|
||||||
dict(attrs={'data-core-wrapper':'content'})
|
dict(attrs={'data-core-wrapper': 'content'}),
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
@ -51,13 +53,13 @@ class scienceadv(BasicNewsRecipe):
|
|||||||
'issue': {
|
'issue': {
|
||||||
'short': 'Enter the Issue Number you want to download\n(Vol/Issue format)',
|
'short': 'Enter the Issue Number you want to download\n(Vol/Issue format)',
|
||||||
'long': 'For example, 385/6710',
|
'long': 'For example, 385/6710',
|
||||||
'default': 'current'
|
'default': 'current',
|
||||||
},
|
},
|
||||||
'res': {
|
'res': {
|
||||||
'short': 'For hi-res images, select a resolution from the\nfollowing options: 800, 1000, 1200 or 1500',
|
'short': 'For hi-res images, select a resolution from the\nfollowing options: 800, 1000, 1200 or 1500',
|
||||||
'long': 'This is useful for non e-ink devices, and for a lower file size\nthan the default, use 400 or 300.',
|
'long': 'This is useful for non e-ink devices, and for a lower file size\nthan the default, use 400 or 300.',
|
||||||
'default': '600'
|
'default': '600',
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
@ -68,6 +70,8 @@ class scienceadv(BasicNewsRecipe):
|
|||||||
if w and isinstance(w, str):
|
if w and isinstance(w, str):
|
||||||
res = '/cdn-cgi/image/width=' + w
|
res = '/cdn-cgi/image/width=' + w
|
||||||
img['src'] = absurl(res + img['src'])
|
img['src'] = absurl(res + img['src'])
|
||||||
|
for div in soup.findAll('div', attrs={'role': 'paragraph'}):
|
||||||
|
div.name = 'p'
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def postprocess_html(self, soup, first_fetch):
|
def postprocess_html(self, soup, first_fetch):
|
||||||
|
@ -20,26 +20,30 @@ class scienceadv(BasicNewsRecipe):
|
|||||||
no_javascript = True
|
no_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_attributes = ['style', 'height', 'width']
|
remove_attributes = ['style', 'height', 'width']
|
||||||
masthead_url = 'https://www.science.org/pb-assets/images/logos/signaling-logo-1620488350150.svg'
|
masthead_url = (
|
||||||
|
'https://www.science.org/pb-assets/images/logos/signaling-logo-1620488350150.svg'
|
||||||
|
)
|
||||||
language = 'en'
|
language = 'en'
|
||||||
simultaneous_downloads = 1
|
simultaneous_downloads = 1
|
||||||
browser_type = 'webengine'
|
browser_type = 'webengine'
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = """
|
||||||
.news-article__figure__caption, .calibre-nuked-tag-figcaption, .card-related {font-size:small;}
|
.news-article__figure__caption, .calibre-nuked-tag-figcaption, .card-related {font-size:small;}
|
||||||
.core-self-citation, .meta-panel__left-content, .news-article__hero__top-meta {font-size:small;}
|
.core-self-citation, .meta-panel__left-content, .news-article__hero__top-meta {font-size:small;}
|
||||||
.contributors, .news-article__hero__bottom-meta, #bibliography, #elettersSection {font-size:small;}
|
.contributors, .news-article__hero__bottom-meta, #bibliography, #elettersSection {font-size:small;}
|
||||||
img {display:block; margin:0 auto;}
|
img {display:block; margin:0 auto;}
|
||||||
.core-lede {font-style:italic; color:#202020;}
|
.core-lede {font-style:italic; color:#202020;}
|
||||||
'''
|
"""
|
||||||
|
|
||||||
ignore_duplicate_articles = {'url'}
|
ignore_duplicate_articles = {'url'}
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
classes('meta-panel__left-content news-article__hero__info news-article__hero__figure bodySection'),
|
classes(
|
||||||
|
'meta-panel__left-content news-article__hero__info news-article__hero__figure bodySection'
|
||||||
|
),
|
||||||
dict(name='h1', attrs={'property': 'name'}),
|
dict(name='h1', attrs={'property': 'name'}),
|
||||||
dict(name='div', **classes('core-lede contributors core-self-citation')),
|
dict(name='div', **classes('core-lede contributors core-self-citation')),
|
||||||
dict(attrs={'data-core-wrapper':'content'})
|
dict(attrs={'data-core-wrapper': 'content'}),
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
@ -50,13 +54,13 @@ class scienceadv(BasicNewsRecipe):
|
|||||||
'issue': {
|
'issue': {
|
||||||
'short': 'Enter the Issue Number you want to download\n(Vol/Issue format)',
|
'short': 'Enter the Issue Number you want to download\n(Vol/Issue format)',
|
||||||
'long': 'For example, 385/6710',
|
'long': 'For example, 385/6710',
|
||||||
'default': 'current'
|
'default': 'current',
|
||||||
},
|
},
|
||||||
'res': {
|
'res': {
|
||||||
'short': 'For hi-res images, select a resolution from the\nfollowing options: 800, 1000, 1200 or 1500',
|
'short': 'For hi-res images, select a resolution from the\nfollowing options: 800, 1000, 1200 or 1500',
|
||||||
'long': 'This is useful for non e-ink devices, and for a lower file size\nthan the default, use 400 or 300.',
|
'long': 'This is useful for non e-ink devices, and for a lower file size\nthan the default, use 400 or 300.',
|
||||||
'default': '600'
|
'default': '600',
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
@ -67,6 +71,8 @@ class scienceadv(BasicNewsRecipe):
|
|||||||
if w and isinstance(w, str):
|
if w and isinstance(w, str):
|
||||||
res = '/cdn-cgi/image/width=' + w
|
res = '/cdn-cgi/image/width=' + w
|
||||||
img['src'] = absurl(res + img['src'])
|
img['src'] = absurl(res + img['src'])
|
||||||
|
for div in soup.findAll('div', attrs={'role': 'paragraph'}):
|
||||||
|
div.name = 'p'
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def postprocess_html(self, soup, first_fetch):
|
def postprocess_html(self, soup, first_fetch):
|
||||||
|
@ -21,26 +21,30 @@ class scienceadv(BasicNewsRecipe):
|
|||||||
no_javascript = True
|
no_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_attributes = ['style', 'height', 'width']
|
remove_attributes = ['style', 'height', 'width']
|
||||||
masthead_url = 'https://www.science.org/pb-assets/images/logos/stm-logo-1620488350153.svg'
|
masthead_url = (
|
||||||
|
'https://www.science.org/pb-assets/images/logos/stm-logo-1620488350153.svg'
|
||||||
|
)
|
||||||
language = 'en'
|
language = 'en'
|
||||||
simultaneous_downloads = 1
|
simultaneous_downloads = 1
|
||||||
browser_type = 'webengine'
|
browser_type = 'webengine'
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = """
|
||||||
.news-article__figure__caption, .calibre-nuked-tag-figcaption, .card-related {font-size:small;}
|
.news-article__figure__caption, .calibre-nuked-tag-figcaption, .card-related {font-size:small;}
|
||||||
.core-self-citation, .meta-panel__left-content, .news-article__hero__top-meta {font-size:small;}
|
.core-self-citation, .meta-panel__left-content, .news-article__hero__top-meta {font-size:small;}
|
||||||
.contributors, .news-article__hero__bottom-meta, #bibliography, #elettersSection {font-size:small;}
|
.contributors, .news-article__hero__bottom-meta, #bibliography, #elettersSection {font-size:small;}
|
||||||
img {display:block; margin:0 auto;}
|
img {display:block; margin:0 auto;}
|
||||||
.core-lede {font-style:italic; color:#202020;}
|
.core-lede {font-style:italic; color:#202020;}
|
||||||
'''
|
"""
|
||||||
|
|
||||||
ignore_duplicate_articles = {'url'}
|
ignore_duplicate_articles = {'url'}
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
classes('meta-panel__left-content news-article__hero__info news-article__hero__figure bodySection'),
|
classes(
|
||||||
|
'meta-panel__left-content news-article__hero__info news-article__hero__figure bodySection'
|
||||||
|
),
|
||||||
dict(name='h1', attrs={'property': 'name'}),
|
dict(name='h1', attrs={'property': 'name'}),
|
||||||
dict(name='div', **classes('core-lede contributors core-self-citation')),
|
dict(name='div', **classes('core-lede contributors core-self-citation')),
|
||||||
dict(attrs={'data-core-wrapper':'content'})
|
dict(attrs={'data-core-wrapper': 'content'}),
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
@ -51,13 +55,13 @@ class scienceadv(BasicNewsRecipe):
|
|||||||
'issue': {
|
'issue': {
|
||||||
'short': 'Enter the Issue Number you want to download\n(Vol/Issue format)',
|
'short': 'Enter the Issue Number you want to download\n(Vol/Issue format)',
|
||||||
'long': 'For example, 385/6710',
|
'long': 'For example, 385/6710',
|
||||||
'default': 'current'
|
'default': 'current',
|
||||||
},
|
},
|
||||||
'res': {
|
'res': {
|
||||||
'short': 'For hi-res images, select a resolution from the\nfollowing options: 800, 1000, 1200 or 1500',
|
'short': 'For hi-res images, select a resolution from the\nfollowing options: 800, 1000, 1200 or 1500',
|
||||||
'long': 'This is useful for non e-ink devices, and for a lower file size\nthan the default, use 400 or 300.',
|
'long': 'This is useful for non e-ink devices, and for a lower file size\nthan the default, use 400 or 300.',
|
||||||
'default': '600'
|
'default': '600',
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
@ -68,6 +72,8 @@ class scienceadv(BasicNewsRecipe):
|
|||||||
if w and isinstance(w, str):
|
if w and isinstance(w, str):
|
||||||
res = '/cdn-cgi/image/width=' + w
|
res = '/cdn-cgi/image/width=' + w
|
||||||
img['src'] = absurl(res + img['src'])
|
img['src'] = absurl(res + img['src'])
|
||||||
|
for div in soup.findAll('div', attrs={'role': 'paragraph'}):
|
||||||
|
div.name = 'p'
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def postprocess_html(self, soup, first_fetch):
|
def postprocess_html(self, soup, first_fetch):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user