diff --git a/recipes/icons/sciimmunol.png b/recipes/icons/sciimmunol.png new file mode 100644 index 0000000000..a7cc08250f Binary files /dev/null and b/recipes/icons/sciimmunol.png differ diff --git a/recipes/icons/scirobotics.png b/recipes/icons/scirobotics.png new file mode 100644 index 0000000000..a7cc08250f Binary files /dev/null and b/recipes/icons/scirobotics.png differ diff --git a/recipes/icons/scisignaling.png b/recipes/icons/scisignaling.png new file mode 100644 index 0000000000..a7cc08250f Binary files /dev/null and b/recipes/icons/scisignaling.png differ diff --git a/recipes/icons/scistm.png b/recipes/icons/scistm.png new file mode 100644 index 0000000000..a7cc08250f Binary files /dev/null and b/recipes/icons/scistm.png differ diff --git a/recipes/science_advances.recipe b/recipes/science_advances.recipe index 293a91defa..509d37a9b5 100644 --- a/recipes/science_advances.recipe +++ b/recipes/science_advances.recipe @@ -1,50 +1,101 @@ #!/usr/bin/env python -# vim:fileencoding=utf-8 -from __future__ import absolute_import, division, print_function, unicode_literals +from calibre.web.feeds.news import BasicNewsRecipe, classes -from calibre.web.feeds.news import BasicNewsRecipe +def absurl(url): + if url.startswith('/'): + url = 'https://www.science.org' + url + return url -def check_words(words): - return lambda x: x and frozenset(words.split()).intersection(x.split()) - - -class ScienceAdvances(BasicNewsRecipe): +class scienceadv(BasicNewsRecipe): title = 'Science Advances' - __author__ = 'Jose Ortiz' + __author__ = 'unkn0wn' description = ( - 'Science Advances is a peer-reviewed multidisciplinary open-access' - ' scientific journal established in early 2015. The journal\'s scope' - ' includes all areas of science, including the life sciences, physical' - ' sciences, social sciences, computer sciences, and environmental' - ' sciences.' + 'Science Advances is the American Association for the Advancement of Science’s (AAAS) open access ' + 'multidisciplinary journal, publishing impactful research papers and reviews in any area of science, in ' + 'both disciplinary-specific and broad, interdisciplinary areas. The mission of Science Advances is to provide ' + ';fair, fast, and expert peer review to authors and a vetted selection of research, freely available to readers.' ) + encoding = 'utf-8' + no_javascript = True + no_stylesheets = True + remove_attributes = ['style', 'height', 'width'] + masthead_url = 'https://www.science.org/pb-assets/images/logos/sciadv-logo-1620488349693.svg' language = 'en' - encoding = 'UTF-8' - max_articles_per_feed = 100 - publication_type = 'magazine' - keep_only_tags = [dict(name='article', attrs={'class': check_words('primary')})] - feeds = [ - ( - 'Science Advances: Current Issue', - 'http://advances.sciencemag.org/rss/current.xml' - ), + simultaneous_downloads = 1 + browser_type = 'qt' + + extra_css = ''' + .news-article__figure__caption {font-size:small; text-align:center;} + .core-self-citation, .meta-panel__left-content, .news-article__hero__top-meta {font-size:small;} + .contributors, .news-article__hero__bottom-meta, #bibliography, #elettersSection {font-size:small;} + img {display:block; margin:0 auto;} + .core-lede {font-style:italic; color:#202020;} + ''' + + ignore_duplicate_articles = {'url'} + + keep_only_tags = [ + classes('meta-panel__left-content news-article__hero__info news-article__hero__figure bodySection'), + dict(name='h1', attrs={'property':'name'}), + dict(name='div', **classes('core-lede contributors core-self-citation')), + dict(attrs={'data-core-wrapper':'content'}) ] - def get_cover_url(self): - soup = self.index_to_soup('http://advances.sciencemag.org/') - img = soup.find(id='content-block').find( - 'img', attrs={'class': check_words('cover-img')} - ) - return img['src'] + remove_tags = [ + classes('pb-ad news-article__hero__scroller news-article__version-of-story') + ] + + recipe_specific_options = { + 'issue': { + 'short': 'Enter the Issue Number you want to download\n(Vol/Issue format)', + 'long': 'For example, 385/6710', + 'default': 'current' + } + } def preprocess_html(self, soup): - for img in soup.findAll('img', attrs={'data-src': True}): - if img['data-src'].endswith('medium.gif'): - img['src'] = img['data-src'][:-10] + 'large.jpg' - a = img.findParent(attrs={'href': True}) - if a is not None and a['href'].startswith(img['src']): - del a['href'] - else: - img['src'] = img['data-src'] + for p in soup.findAll(attrs={'role':'paragraph'}): + p.name = 'p' + p.attrs = {} return soup + + def parse_index(self): + issue_url = 'https://www.science.org/toc/sciadv/current' + d = self.recipe_specific_options.get('issue') + if d and isinstance(d, str): + issue_url = 'https://www.science.org/toc/sciadv/' + d + + soup = self.index_to_soup(issue_url) + tme = soup.find(**classes('journal-issue__vol')) + if tme: + self.timefmt = ' [%s]' % self.tag_to_string(tme).strip().replace('|', ' | ') + det = soup.find(attrs={'id':'journal-issue-details'}) + if det: + self.description = self.tag_to_string(det).strip() + cov = soup.find(**classes('cover-image__image')) + if cov: + self.cover_url = absurl(cov.img['src']) + + feeds = [] + + div = soup.find('div', attrs={'class':'toc__body'}) + for sec in div.findAll('section', **classes('toc__section')): + name = sec.find(**classes('sidebar-article-title--decorated')) + section = self.tag_to_string(name).strip() + self.log(section) + + articles = [] + + for card in sec.findAll(**classes('card-header')): + ti = card.find(**classes('article-title')) + url = absurl(ti.a['href']) + title = self.tag_to_string(ti).strip() + desc = '' + meta = card.find(**classes('card-meta')) + if meta: + desc = self.tag_to_string(meta).strip() + self.log(' ', title, '\n\t', desc, '\n\t\t', url) + articles.append({'title': title, 'description':desc, 'url': url}) + feeds.append((section, articles)) + return feeds diff --git a/recipes/sciimmunol.recipe b/recipes/sciimmunol.recipe new file mode 100644 index 0000000000..972106860c --- /dev/null +++ b/recipes/sciimmunol.recipe @@ -0,0 +1,101 @@ +#!/usr/bin/env python +from calibre.web.feeds.news import BasicNewsRecipe, classes + +def absurl(url): + if url.startswith('/'): + url = 'https://www.science.org' + url + return url + + +class scienceadv(BasicNewsRecipe): + title = 'Science Immunology' + __author__ = 'unkn0wn' + description = ( + 'Immunology is in a period of unprecedented expansion and progress. The broad influence of the immune system on diverse ' + 'aspects of health and disease is beginning to be appreciated, and innovative immunological interventions are proving successful ' + 'in the clinic. New tools are revealing the extent of this influence with exceptional precision and reach. Science Immunology ' + 'provides a platform for the most exciting findings in this growing field.' + ) + encoding = 'utf-8' + no_javascript = True + no_stylesheets = True + remove_attributes = ['style', 'height', 'width'] + masthead_url = 'https://www.science.org/pb-assets/images/logos/sciimmunol-logo-1620488349717.svg' + language = 'en' + simultaneous_downloads = 1 + browser_type = 'qt' + + extra_css = ''' + .news-article__figure__caption {font-size:small; text-align:center;} + .core-self-citation, .meta-panel__left-content, .news-article__hero__top-meta {font-size:small;} + .contributors, .news-article__hero__bottom-meta, #bibliography, #elettersSection {font-size:small;} + img {display:block; margin:0 auto;} + .core-lede {font-style:italic; color:#202020;} + ''' + + ignore_duplicate_articles = {'url'} + + keep_only_tags = [ + classes('meta-panel__left-content news-article__hero__info news-article__hero__figure bodySection'), + dict(name='h1', attrs={'property':'name'}), + dict(name='div', **classes('core-lede contributors core-self-citation')), + dict(attrs={'data-core-wrapper':'content'}) + ] + + remove_tags = [ + classes('pb-ad news-article__hero__scroller news-article__version-of-story') + ] + + recipe_specific_options = { + 'issue': { + 'short': 'Enter the Issue Number you want to download\n(Vol/Issue format)', + 'long': 'For example, 385/6710', + 'default': 'current' + } + } + + def preprocess_html(self, soup): + for p in soup.findAll(attrs={'role':'paragraph'}): + p.name = 'p' + p.attrs = {} + return soup + + def parse_index(self): + issue_url = 'https://www.science.org/toc/sciimmunol/current' + d = self.recipe_specific_options.get('issue') + if d and isinstance(d, str): + issue_url = 'https://www.science.org/toc/sciimmunol/' + d + + soup = self.index_to_soup(issue_url) + tme = soup.find(**classes('journal-issue__vol')) + if tme: + self.timefmt = ' [%s]' % self.tag_to_string(tme).strip().replace('|', ' | ') + det = soup.find(attrs={'id':'journal-issue-details'}) + if det: + self.description = self.tag_to_string(det).strip() + cov = soup.find(**classes('cover-image__image')) + if cov: + self.cover_url = absurl(cov.img['src']) + + feeds = [] + + div = soup.find('div', attrs={'class':'toc__body'}) + for sec in div.findAll('section', **classes('toc__section')): + name = sec.find(**classes('sidebar-article-title--decorated')) + section = self.tag_to_string(name).strip() + self.log(section) + + articles = [] + + for card in sec.findAll(**classes('card-header')): + ti = card.find(**classes('article-title')) + url = absurl(ti.a['href']) + title = self.tag_to_string(ti).strip() + desc = '' + meta = card.find(**classes('card-meta')) + if meta: + desc = self.tag_to_string(meta).strip() + self.log(' ', title, '\n\t', desc, '\n\t\t', url) + articles.append({'title': title, 'description':desc, 'url': url}) + feeds.append((section, articles)) + return feeds diff --git a/recipes/scirobotocs.recipe b/recipes/scirobotocs.recipe new file mode 100644 index 0000000000..7032396347 --- /dev/null +++ b/recipes/scirobotocs.recipe @@ -0,0 +1,101 @@ +#!/usr/bin/env python +from calibre.web.feeds.news import BasicNewsRecipe, classes + +def absurl(url): + if url.startswith('/'): + url = 'https://www.science.org' + url + return url + + +class scienceadv(BasicNewsRecipe): + title = 'Science Robotics' + __author__ = 'unkn0wn' + description = ( + 'Science Robotics provides a much-needed forum for the latest technological advances and for the critical social, ethical ' + 'and policy issues surrounding robotics. Science Robotics caters to both researchers and general stakeholders. It is multidisciplinary, ' + 'covering the traditional disciplines of robotics, as well as emerging trends such as advanced materials and bio-inspired designs; it covers ' + 'all scales, from very large systems to micro/nano robots; its scope is broad, addressing both theoretical advances and practical applications.' + ) + encoding = 'utf-8' + no_javascript = True + no_stylesheets = True + remove_attributes = ['style', 'height', 'width'] + masthead_url = 'https://www.science.org/pb-assets/images/logos/scirobotics-logo-1620488350107.svg' + language = 'en' + simultaneous_downloads = 1 + browser_type = 'qt' + + extra_css = ''' + .news-article__figure__caption {font-size:small; text-align:center;} + .core-self-citation, .meta-panel__left-content, .news-article__hero__top-meta {font-size:small;} + .contributors, .news-article__hero__bottom-meta, #bibliography, #elettersSection {font-size:small;} + img {display:block; margin:0 auto;} + .core-lede {font-style:italic; color:#202020;} + ''' + + ignore_duplicate_articles = {'url'} + + keep_only_tags = [ + classes('meta-panel__left-content news-article__hero__info news-article__hero__figure bodySection'), + dict(name='h1', attrs={'property':'name'}), + dict(name='div', **classes('core-lede contributors core-self-citation')), + dict(attrs={'data-core-wrapper':'content'}) + ] + + remove_tags = [ + classes('pb-ad news-article__hero__scroller news-article__version-of-story') + ] + + recipe_specific_options = { + 'issue': { + 'short': 'Enter the Issue Number you want to download\n(Vol/Issue format)', + 'long': 'For example, 385/6710', + 'default': 'current' + } + } + + def preprocess_html(self, soup): + for p in soup.findAll(attrs={'role':'paragraph'}): + p.name = 'p' + p.attrs = {} + return soup + + def parse_index(self): + issue_url = 'https://www.science.org/toc/scirobotics/current' + d = self.recipe_specific_options.get('issue') + if d and isinstance(d, str): + issue_url = 'https://www.science.org/toc/scirobotics/' + d + + soup = self.index_to_soup(issue_url) + tme = soup.find(**classes('journal-issue__vol')) + if tme: + self.timefmt = ' [%s]' % self.tag_to_string(tme).strip().replace('|', ' | ') + det = soup.find(attrs={'id':'journal-issue-details'}) + if det: + self.description = self.tag_to_string(det).strip() + cov = soup.find(**classes('cover-image__image')) + if cov: + self.cover_url = absurl(cov.img['src']) + + feeds = [] + + div = soup.find('div', attrs={'class':'toc__body'}) + for sec in div.findAll('section', **classes('toc__section')): + name = sec.find(**classes('sidebar-article-title--decorated')) + section = self.tag_to_string(name).strip() + self.log(section) + + articles = [] + + for card in sec.findAll(**classes('card-header')): + ti = card.find(**classes('article-title')) + url = absurl(ti.a['href']) + title = self.tag_to_string(ti).strip() + desc = '' + meta = card.find(**classes('card-meta')) + if meta: + desc = self.tag_to_string(meta).strip() + self.log(' ', title, '\n\t', desc, '\n\t\t', url) + articles.append({'title': title, 'description':desc, 'url': url}) + feeds.append((section, articles)) + return feeds diff --git a/recipes/scisignaling.recipe b/recipes/scisignaling.recipe new file mode 100644 index 0000000000..e50c816410 --- /dev/null +++ b/recipes/scisignaling.recipe @@ -0,0 +1,100 @@ +#!/usr/bin/env python +from calibre.web.feeds.news import BasicNewsRecipe, classes + +def absurl(url): + if url.startswith('/'): + url = 'https://www.science.org' + url + return url + + +class scienceadv(BasicNewsRecipe): + title = 'Science Signaling' + __author__ = 'unkn0wn' + description = ( + 'Science Signaling offers researchers the most up-to-date resource for groundbreaking research and commentary ' + 'in the dynamic field of cellular signaling. From basic science to design of therapeutics and from molecules to ' + 'networks and systems design, this weekly e-resource keeps your researchers, faculty, and students ahead of the curve.' + ) + encoding = 'utf-8' + no_javascript = True + no_stylesheets = True + remove_attributes = ['style', 'height', 'width'] + masthead_url = 'https://www.science.org/pb-assets/images/logos/signaling-logo-1620488350150.svg' + language = 'en' + simultaneous_downloads = 1 + browser_type = 'qt' + + extra_css = ''' + .news-article__figure__caption {font-size:small; text-align:center;} + .core-self-citation, .meta-panel__left-content, .news-article__hero__top-meta {font-size:small;} + .contributors, .news-article__hero__bottom-meta, #bibliography, #elettersSection {font-size:small;} + img {display:block; margin:0 auto;} + .core-lede {font-style:italic; color:#202020;} + ''' + + ignore_duplicate_articles = {'url'} + + keep_only_tags = [ + classes('meta-panel__left-content news-article__hero__info news-article__hero__figure bodySection'), + dict(name='h1', attrs={'property':'name'}), + dict(name='div', **classes('core-lede contributors core-self-citation')), + dict(attrs={'data-core-wrapper':'content'}) + ] + + remove_tags = [ + classes('pb-ad news-article__hero__scroller news-article__version-of-story') + ] + + recipe_specific_options = { + 'issue': { + 'short': 'Enter the Issue Number you want to download\n(Vol/Issue format)', + 'long': 'For example, 385/6710', + 'default': 'current' + } + } + + def preprocess_html(self, soup): + for p in soup.findAll(attrs={'role':'paragraph'}): + p.name = 'p' + p.attrs = {} + return soup + + def parse_index(self): + issue_url = 'https://www.science.org/toc/signaling/current' + d = self.recipe_specific_options.get('issue') + if d and isinstance(d, str): + issue_url = 'https://www.science.org/toc/signaling/' + d + + soup = self.index_to_soup(issue_url) + tme = soup.find(**classes('journal-issue__vol')) + if tme: + self.timefmt = ' [%s]' % self.tag_to_string(tme).strip().replace('|', ' | ') + det = soup.find(attrs={'id':'journal-issue-details'}) + if det: + self.description = self.tag_to_string(det).strip() + cov = soup.find(**classes('cover-image__image')) + if cov: + self.cover_url = absurl(cov.img['src']) + + feeds = [] + + div = soup.find('div', attrs={'class':'toc__body'}) + for sec in div.findAll('section', **classes('toc__section')): + name = sec.find(**classes('sidebar-article-title--decorated')) + section = self.tag_to_string(name).strip() + self.log(section) + + articles = [] + + for card in sec.findAll(**classes('card-header')): + ti = card.find(**classes('article-title')) + url = absurl(ti.a['href']) + title = self.tag_to_string(ti).strip() + desc = '' + meta = card.find(**classes('card-meta')) + if meta: + desc = self.tag_to_string(meta).strip() + self.log(' ', title, '\n\t', desc, '\n\t\t', url) + articles.append({'title': title, 'description':desc, 'url': url}) + feeds.append((section, articles)) + return feeds diff --git a/recipes/scistm.recipe b/recipes/scistm.recipe new file mode 100644 index 0000000000..9838fd6311 --- /dev/null +++ b/recipes/scistm.recipe @@ -0,0 +1,101 @@ +#!/usr/bin/env python +from calibre.web.feeds.news import BasicNewsRecipe, classes + +def absurl(url): + if url.startswith('/'): + url = 'https://www.science.org' + url + return url + + +class scienceadv(BasicNewsRecipe): + title = 'Science Translational Medicine' + __author__ = 'unkn0wn' + description = ( + 'Science Translational Medicine is the leading weekly online journal publishing translational research at the intersection of science, ' + 'engineering and medicine. The goal of Science Translational Medicine is to promote human health by providing a forum for communicating ' + 'the latest research advances from biomedical, translational, and clinical researchers from all established and emerging disciplines relevant ' + 'to medicine. In addition to original research, Science Translational Medicine also publishes Reviews, Editorials, Focus articles, and Viewpoints.' + ) + encoding = 'utf-8' + no_javascript = True + no_stylesheets = True + remove_attributes = ['style', 'height', 'width'] + masthead_url = 'https://www.science.org/pb-assets/images/logos/stm-logo-1620488350153.svg' + language = 'en' + simultaneous_downloads = 1 + browser_type = 'qt' + + extra_css = ''' + .news-article__figure__caption {font-size:small; text-align:center;} + .core-self-citation, .meta-panel__left-content, .news-article__hero__top-meta {font-size:small;} + .contributors, .news-article__hero__bottom-meta, #bibliography, #elettersSection {font-size:small;} + img {display:block; margin:0 auto;} + .core-lede {font-style:italic; color:#202020;} + ''' + + ignore_duplicate_articles = {'url'} + + keep_only_tags = [ + classes('meta-panel__left-content news-article__hero__info news-article__hero__figure bodySection'), + dict(name='h1', attrs={'property':'name'}), + dict(name='div', **classes('core-lede contributors core-self-citation')), + dict(attrs={'data-core-wrapper':'content'}) + ] + + remove_tags = [ + classes('pb-ad news-article__hero__scroller news-article__version-of-story') + ] + + recipe_specific_options = { + 'issue': { + 'short': 'Enter the Issue Number you want to download\n(Vol/Issue format)', + 'long': 'For example, 385/6710', + 'default': 'current' + } + } + + def preprocess_html(self, soup): + for p in soup.findAll(attrs={'role':'paragraph'}): + p.name = 'p' + p.attrs = {} + return soup + + def parse_index(self): + issue_url = 'https://www.science.org/toc/stm/current' + d = self.recipe_specific_options.get('issue') + if d and isinstance(d, str): + issue_url = 'https://www.science.org/toc/stm/' + d + + soup = self.index_to_soup(issue_url) + tme = soup.find(**classes('journal-issue__vol')) + if tme: + self.timefmt = ' [%s]' % self.tag_to_string(tme).strip().replace('|', ' | ') + det = soup.find(attrs={'id':'journal-issue-details'}) + if det: + self.description = self.tag_to_string(det).strip() + cov = soup.find(**classes('cover-image__image')) + if cov: + self.cover_url = absurl(cov.img['src']) + + feeds = [] + + div = soup.find('div', attrs={'class':'toc__body'}) + for sec in div.findAll('section', **classes('toc__section')): + name = sec.find(**classes('sidebar-article-title--decorated')) + section = self.tag_to_string(name).strip() + self.log(section) + + articles = [] + + for card in sec.findAll(**classes('card-header')): + ti = card.find(**classes('article-title')) + url = absurl(ti.a['href']) + title = self.tag_to_string(ti).strip() + desc = '' + meta = card.find(**classes('card-meta')) + if meta: + desc = self.tag_to_string(meta).strip() + self.log(' ', title, '\n\t', desc, '\n\t\t', url) + articles.append({'title': title, 'description':desc, 'url': url}) + feeds.append((section, articles)) + return feeds