From c9a05cefd6998db7eb6eb25a2d0b500a2cf26617 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Thu, 13 Feb 2025 09:08:30 +0530 Subject: [PATCH 1/4] Update science_journal.recipe --- recipes/science_journal.recipe | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/recipes/science_journal.recipe b/recipes/science_journal.recipe index 0317146c76..622e1a9405 100644 --- a/recipes/science_journal.recipe +++ b/recipes/science_journal.recipe @@ -1,5 +1,5 @@ #!/usr/bin/env python -from calibre.web.feeds.news import BasicNewsRecipe, classes +from calibre.web.feeds.news import BasicNewsRecipe, classes, prefixed_classes def absurl(url): @@ -25,7 +25,7 @@ class science(BasicNewsRecipe): browser_type = 'webengine' extra_css = ''' - .news-article__figure__caption, .figc {font-size:small;} + .news-article__figure__caption, .calibre-nuked-tag-figcaption, .card-related {font-size:small;} .core-self-citation, .meta-panel__left-content, .news-article__hero__top-meta {font-size:small;} .contributors, .news-article__hero__bottom-meta, #bibliography, #elettersSection {font-size:small;} img {display:block; margin:0 auto;} @@ -59,9 +59,6 @@ class science(BasicNewsRecipe): } def preprocess_html(self, soup): - for p in soup.findAll(attrs={'role':'paragraph'}): - p.name = 'p' - p.attrs = {} for img in soup.findAll('img', attrs={'src':True}): if img['src'].endswith('.jpg'): res = '/cdn-cgi/image/width=600' @@ -69,8 +66,12 @@ class science(BasicNewsRecipe): if w and isinstance(w, str): res = '/cdn-cgi/image/width=' + w img['src'] = absurl(res + img['src']) - for figc in soup.findAll('figcaption'): - figc['class'] = 'figc' + return soup + + def postprocess_html(self, soup, first_fetch): + bd = soup.find('body') + if bd: + p.attrs = {} return soup def parse_index(self): @@ -92,7 +93,7 @@ class science(BasicNewsRecipe): feeds = [] - for sec in soup.findAll('section', **classes('toc__section')): + for sec in soup.findAll('section', **prefixed_classes('toc__section')): name = sec.find(**classes('sidebar-article-title--decorated')) section = self.tag_to_string(name).strip() self.log(section) From 8cf9531f73eca25e4055fbb00e0b58021245c778 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Thu, 13 Feb 2025 09:10:45 +0530 Subject: [PATCH 2/4] Update science_advances.recipe --- recipes/science_advances.recipe | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/recipes/science_advances.recipe b/recipes/science_advances.recipe index cd2a6818f0..bf7cc0e5a9 100644 --- a/recipes/science_advances.recipe +++ b/recipes/science_advances.recipe @@ -1,5 +1,5 @@ -#!/usr/bin/env python -from calibre.web.feeds.news import BasicNewsRecipe, classes + #!/usr/bin/env python +from calibre.web.feeds.news import BasicNewsRecipe, classes, prefixed_classes def absurl(url): @@ -27,7 +27,7 @@ class scienceadv(BasicNewsRecipe): browser_type = 'webengine' extra_css = ''' - .news-article__figure__caption, .figc {font-size:small;} + .news-article__figure__caption, .calibre-nuked-tag-figcaption, .card-related {font-size:small;} .core-self-citation, .meta-panel__left-content, .news-article__hero__top-meta {font-size:small;} .contributors, .news-article__hero__bottom-meta, #bibliography, #elettersSection {font-size:small;} img {display:block; margin:0 auto;} @@ -61,9 +61,6 @@ class scienceadv(BasicNewsRecipe): } def preprocess_html(self, soup): - for p in soup.findAll(attrs={'role':'paragraph'}): - p.name = 'p' - p.attrs = {} for img in soup.findAll('img', attrs={'src':True}): if img['src'].endswith('.jpg'): res = '/cdn-cgi/image/width=600' @@ -71,8 +68,12 @@ class scienceadv(BasicNewsRecipe): if w and isinstance(w, str): res = '/cdn-cgi/image/width=' + w img['src'] = absurl(res + img['src']) - for figc in soup.findAll('figcaption'): - figc['class'] = 'figc' + return soup + + def postprocess_html(self, soup, first_fetch): + bd = soup.find('body') + if bd: + p.attrs = {} return soup def parse_index(self): @@ -94,7 +95,7 @@ class scienceadv(BasicNewsRecipe): feeds = [] - for sec in soup.findAll('section', **classes('toc__section')): + for sec in soup.findAll('section', **prefixed_classes('toc__section')): name = sec.find(**classes('sidebar-article-title--decorated')) section = self.tag_to_string(name).strip() self.log(section) From fcd921e0b58452be8b8002d8f951c1732095a81c Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Thu, 13 Feb 2025 09:13:18 +0530 Subject: [PATCH 3/4] ... --- recipes/sciimmunol.recipe | 17 +++++++++-------- recipes/scirobotics.recipe | 17 +++++++++-------- recipes/scisignaling.recipe | 17 +++++++++-------- recipes/scistm.recipe | 17 +++++++++-------- 4 files changed, 36 insertions(+), 32 deletions(-) diff --git a/recipes/sciimmunol.recipe b/recipes/sciimmunol.recipe index 21d79fd2d8..d4646ae41e 100644 --- a/recipes/sciimmunol.recipe +++ b/recipes/sciimmunol.recipe @@ -1,5 +1,5 @@ #!/usr/bin/env python -from calibre.web.feeds.news import BasicNewsRecipe, classes +from calibre.web.feeds.news import BasicNewsRecipe, classes, prefixed_classes def absurl(url): @@ -27,7 +27,7 @@ class scienceadv(BasicNewsRecipe): browser_type = 'webengine' extra_css = ''' - .news-article__figure__caption, .figc {font-size:small;} + .news-article__figure__caption, .calibre-nuked-tag-figcaption, .card-related {font-size:small;} .core-self-citation, .meta-panel__left-content, .news-article__hero__top-meta {font-size:small;} .contributors, .news-article__hero__bottom-meta, #bibliography, #elettersSection {font-size:small;} img {display:block; margin:0 auto;} @@ -61,9 +61,6 @@ class scienceadv(BasicNewsRecipe): } def preprocess_html(self, soup): - for p in soup.findAll(attrs={'role':'paragraph'}): - p.name = 'p' - p.attrs = {} for img in soup.findAll('img', attrs={'src':True}): if img['src'].endswith('.jpg'): res = '/cdn-cgi/image/width=600' @@ -71,8 +68,12 @@ class scienceadv(BasicNewsRecipe): if w and isinstance(w, str): res = '/cdn-cgi/image/width=' + w img['src'] = absurl(res + img['src']) - for figc in soup.findAll('figcaption'): - figc['class'] = 'figc' + return soup + + def postprocess_html(self, soup, first_fetch): + bd = soup.find('body') + if bd: + p.attrs = {} return soup def parse_index(self): @@ -94,7 +95,7 @@ class scienceadv(BasicNewsRecipe): feeds = [] - for sec in soup.findAll('section', **classes('toc__section')): + for sec in soup.findAll('section', **prefixed_classes('toc__section')): name = sec.find(**classes('sidebar-article-title--decorated')) section = self.tag_to_string(name).strip() self.log(section) diff --git a/recipes/scirobotics.recipe b/recipes/scirobotics.recipe index 45a4df16da..c4d88c6e84 100644 --- a/recipes/scirobotics.recipe +++ b/recipes/scirobotics.recipe @@ -1,5 +1,5 @@ #!/usr/bin/env python -from calibre.web.feeds.news import BasicNewsRecipe, classes +from calibre.web.feeds.news import BasicNewsRecipe, classes, prefixed_classes def absurl(url): @@ -27,7 +27,7 @@ class scienceadv(BasicNewsRecipe): browser_type = 'webengine' extra_css = ''' - .news-article__figure__caption, .figc {font-size:small;} + .news-article__figure__caption, .calibre-nuked-tag-figcaption, .card-related {font-size:small;} .core-self-citation, .meta-panel__left-content, .news-article__hero__top-meta {font-size:small;} .contributors, .news-article__hero__bottom-meta, #bibliography, #elettersSection {font-size:small;} img {display:block; margin:0 auto;} @@ -61,9 +61,6 @@ class scienceadv(BasicNewsRecipe): } def preprocess_html(self, soup): - for p in soup.findAll(attrs={'role':'paragraph'}): - p.name = 'p' - p.attrs = {} for img in soup.findAll('img', attrs={'src':True}): if img['src'].endswith('.jpg'): res = '/cdn-cgi/image/width=600' @@ -71,8 +68,12 @@ class scienceadv(BasicNewsRecipe): if w and isinstance(w, str): res = '/cdn-cgi/image/width=' + w img['src'] = absurl(res + img['src']) - for figc in soup.findAll('figcaption'): - figc['class'] = 'figc' + return soup + + def postprocess_html(self, soup, first_fetch): + bd = soup.find('body') + if bd: + p.attrs = {} return soup def parse_index(self): @@ -94,7 +95,7 @@ class scienceadv(BasicNewsRecipe): feeds = [] - for sec in soup.findAll('section', **classes('toc__section')): + for sec in soup.findAll('section', **prefixed_classes('toc__section')): name = sec.find(**classes('sidebar-article-title--decorated')) section = self.tag_to_string(name).strip() self.log(section) diff --git a/recipes/scisignaling.recipe b/recipes/scisignaling.recipe index 18769af889..fabdbcdbd5 100644 --- a/recipes/scisignaling.recipe +++ b/recipes/scisignaling.recipe @@ -1,5 +1,5 @@ #!/usr/bin/env python -from calibre.web.feeds.news import BasicNewsRecipe, classes +from calibre.web.feeds.news import BasicNewsRecipe, classes, prefixed_classes def absurl(url): @@ -26,7 +26,7 @@ class scienceadv(BasicNewsRecipe): browser_type = 'webengine' extra_css = ''' - .news-article__figure__caption, .figc {font-size:small;} + .news-article__figure__caption, .calibre-nuked-tag-figcaption, .card-related {font-size:small;} .core-self-citation, .meta-panel__left-content, .news-article__hero__top-meta {font-size:small;} .contributors, .news-article__hero__bottom-meta, #bibliography, #elettersSection {font-size:small;} img {display:block; margin:0 auto;} @@ -60,9 +60,6 @@ class scienceadv(BasicNewsRecipe): } def preprocess_html(self, soup): - for p in soup.findAll(attrs={'role':'paragraph'}): - p.name = 'p' - p.attrs = {} for img in soup.findAll('img', attrs={'src':True}): if img['src'].endswith('.jpg'): res = '/cdn-cgi/image/width=600' @@ -70,8 +67,12 @@ class scienceadv(BasicNewsRecipe): if w and isinstance(w, str): res = '/cdn-cgi/image/width=' + w img['src'] = absurl(res + img['src']) - for figc in soup.findAll('figcaption'): - figc['class'] = 'figc' + return soup + + def postprocess_html(self, soup, first_fetch): + bd = soup.find('body') + if bd: + p.attrs = {} return soup def parse_index(self): @@ -93,7 +94,7 @@ class scienceadv(BasicNewsRecipe): feeds = [] - for sec in soup.findAll('section', **classes('toc__section')): + for sec in soup.findAll('section', **prefixed_classes('toc__section')): name = sec.find(**classes('sidebar-article-title--decorated')) section = self.tag_to_string(name).strip() self.log(section) diff --git a/recipes/scistm.recipe b/recipes/scistm.recipe index 101b60d7c4..901996de95 100644 --- a/recipes/scistm.recipe +++ b/recipes/scistm.recipe @@ -1,5 +1,5 @@ #!/usr/bin/env python -from calibre.web.feeds.news import BasicNewsRecipe, classes +from calibre.web.feeds.news import BasicNewsRecipe, classes, prefixed_classes def absurl(url): @@ -27,7 +27,7 @@ class scienceadv(BasicNewsRecipe): browser_type = 'webengine' extra_css = ''' - .news-article__figure__caption, .figc {font-size:small;} + .news-article__figure__caption, .calibre-nuked-tag-figcaption, .card-related {font-size:small;} .core-self-citation, .meta-panel__left-content, .news-article__hero__top-meta {font-size:small;} .contributors, .news-article__hero__bottom-meta, #bibliography, #elettersSection {font-size:small;} img {display:block; margin:0 auto;} @@ -61,9 +61,6 @@ class scienceadv(BasicNewsRecipe): } def preprocess_html(self, soup): - for p in soup.findAll(attrs={'role':'paragraph'}): - p.name = 'p' - p.attrs = {} for img in soup.findAll('img', attrs={'src':True}): if img['src'].endswith('.jpg'): res = '/cdn-cgi/image/width=600' @@ -71,8 +68,12 @@ class scienceadv(BasicNewsRecipe): if w and isinstance(w, str): res = '/cdn-cgi/image/width=' + w img['src'] = absurl(res + img['src']) - for figc in soup.findAll('figcaption'): - figc['class'] = 'figc' + return soup + + def postprocess_html(self, soup, first_fetch): + bd = soup.find('body') + if bd: + p.attrs = {} return soup def parse_index(self): @@ -94,7 +95,7 @@ class scienceadv(BasicNewsRecipe): feeds = [] - for sec in soup.findAll('section', **classes('toc__section')): + for sec in soup.findAll('section', **prefixed_classes('toc__section')): name = sec.find(**classes('sidebar-article-title--decorated')) section = self.tag_to_string(name).strip() self.log(section) From c0d9a2da4eef4b11d9feb9e5d0a09d516365790f Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Thu, 13 Feb 2025 09:14:31 +0530 Subject: [PATCH 4/4] ... --- recipes/science_advances.recipe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/science_advances.recipe b/recipes/science_advances.recipe index bf7cc0e5a9..0ee895a019 100644 --- a/recipes/science_advances.recipe +++ b/recipes/science_advances.recipe @@ -1,4 +1,4 @@ - #!/usr/bin/env python +#!/usr/bin/env python from calibre.web.feeds.news import BasicNewsRecipe, classes, prefixed_classes