pep8

2025-07-09 03:04:10 -04:00 · 2022-06-30 21:36:14 +05:30 · 2022-06-30 21:36:14 +05:30 · 65c55a6d44
commit 65c55a6d44
parent 248fbd3192
2 changed files with 86 additions and 46 deletions
--- a/recipes/nrc.nl.recipe
+++ b/recipes/nrc.nl.recipe
@ -2,11 +2,11 @@
 from calibre.web.feeds.recipes import BasicNewsRecipe
 import datetime
 import json
 from time import sleep
 from mechanize import Request
 from contextlib import closing
 import re
 class NRC(BasicNewsRecipe):
    title = 'NRC'
    __author__ = 'Cristi Ghera'
@ -17,18 +17,24 @@ class NRC(BasicNewsRecipe):
    country = 'NL'
    category = 'news, politics, Netherlands'
    resolve_internal_links = True
-    remove_tags_before = {'class':'article__header-and-content'}
+    remove_tags_before = {'class': 'article__header-and-content'}
-    remove_tags_after  = {'class':'article__header-and-content'}
+    remove_tags_after = {'class': 'article__header-and-content'}
    remove_tags = [
-        dict(attrs={'class':['article__footer',
+        dict(
-                             'lees-ook',
+            attrs={
-                             'luister-naar',
+                'class': [
-                             'print-layout-warning',
+                    'article__footer',
-                             'newslettersignup',
+                    'lees-ook',
-                             'article__byline',
+                    'luister-naar',
-                             'article__published-in',
+                    'print-layout-warning',
-                             'article__featured-image__caption__producer',
+                    'newslettersignup',
-                             'metabox',]}),
+                    'article__byline',
                    'article__published-in',
                    'article__featured-image__caption__producer',
                    'metabox',
                ]
            }
        ),
        dict(name=['script', 'noscript', 'style']),
    ]
    remove_attributes = ["class", "id", "name", "style"]
@ -36,24 +42,26 @@ class NRC(BasicNewsRecipe):
    no_stylesheets = True
    ignore_duplicate_articles = {'url'}
    delay = 0.3
-    
+
    touchscreen = True
-    
+
    frontpage = None
-    
+
    title_regexp = None
-    
+
    @staticmethod
    def _monthly_list_url(date, fmt="%Y/%m/"):
        return "https://www.nrc.nl/de/data/NH/" + date.strftime(fmt)
-    
+
    def _clean_article_title(self, title):
        if not title:
            return title
        if self.title_regexp is None:
-            self.title_regexp = re.compile(r'<span class="keyword">([^<]+)</span>\s*')
+            self.title_regexp = re.compile(
                r'<span class="keyword">([^<]+)</span>\s*'
            )
        return self.title_regexp.sub(r"\1 ", title)
-    
+
    def parse_index(self):
        sections = []
        today = datetime.date.today()
@ -64,15 +72,22 @@ class NRC(BasicNewsRecipe):
        }
        monthly_list_urls = [
            self._monthly_list_url(today),
-            self._monthly_list_url(datetime.date(today.year, today.month, 1) - datetime.timedelta(days=1))
+            self._monthly_list_url(
                datetime.date(today.year, today.month, 1) -
                datetime.timedelta(days=1)
            )
        ]
        issue_url = None
        issue_date = None
        for monthly_list_url in monthly_list_urls:
-            with closing(self.browser.open(Request(monthly_list_url, None, headers))) as r:
+            with closing(
                self.browser.open(Request(monthly_list_url, None, headers))
            ) as r:
                issues = json.loads(r.read())
                if len(issues) > 0:
-                    issue_date = datetime.datetime.strptime(issues[0]["published_at"], "%Y-%m-%dT%H:%M:%SZ")
+                    issue_date = datetime.datetime.strptime(
                        issues[0]["published_at"], "%Y-%m-%dT%H:%M:%SZ"
                    )
                    issue_url = self._monthly_list_url(issue_date, "%Y/%m/%d/")
                    self.frontpage = issues[0]["frontpage"]
                    break
@ -93,14 +108,12 @@ class NRC(BasicNewsRecipe):
                if doc not in documents:
                    self.log.warn('Document not found:', doc)
                    continue
-                articles.append(dict(
+                articles.append(
-                    title=documents[doc]["headline"],
+                    dict(
-                    url=documents[doc]["url"]
+                        title=documents[doc]["headline"], url=documents[doc]["url"]
-                ))
+                    )
-            sections.append((
+                )
-                section["name"],
+            sections.append((section["name"], articles))
                articles
            ))
        return sections
    def preprocess_html(self, soup):
@ -119,4 +132,4 @@ class NRC(BasicNewsRecipe):
        return soup
    def get_cover_url(self):
-        return self.frontpage
+        return self.frontpage
--- a/recipes/volksrant.recipe
+++ b/recipes/volksrant.recipe
@ -2,6 +2,7 @@
 from calibre.web.feeds.recipes import BasicNewsRecipe
 import uuid
 class Volkskrant(BasicNewsRecipe):
    title = 'Volkskrant'
    __author__ = 'Cristi Ghera'
@ -10,9 +11,20 @@ class Volkskrant(BasicNewsRecipe):
    needs_subscription = False
    resolve_internal_links = True
    remove_tags_before = dict(id='main-content')
-    remove_tags_after  = dict(id='main-content')
+    remove_tags_after = dict(id='main-content')
    remove_tags = [
-        dict(attrs={'class':['article-footer__sharing', 'artstyle__editorial-tips', 'artstyle__advertisement','artstyle__container__icon','artstyle__disabled-embed','container__title__icon',]}),
+        dict(
            attrs={
                'class': [
                    'article-footer__sharing',
                    'artstyle__editorial-tips',
                    'artstyle__advertisement',
                    'artstyle__container__icon',
                    'artstyle__disabled-embed',
                    'container__title__icon',
                ]
            }
        ),
        dict(attrs={'data-element-id': ['article-element-authors']}),
        dict(name=['script', 'noscript', 'style']),
    ]
@ -20,15 +32,17 @@ class Volkskrant(BasicNewsRecipe):
    encoding = 'utf-8'
    no_stylesheets = True
    ignore_duplicate_articles = {'url'}
-    
+
    def parse_index(self):
-        soup = self.index_to_soup('https://www.volkskrant.nl/privacy-wall/accept?redirectUri=%2Feditie%2Fvandaag%2F&authId=' + str(uuid.uuid4()))
+        soup = self.index_to_soup(
            'https://www.volkskrant.nl/privacy-wall/accept?redirectUri=%2Feditie%2Fvandaag%2F&authId=' + str(uuid.uuid4())
        )
        containers = soup.findAll('section', attrs={'class': 'section--horizontal'})
        sections = []
        for container in containers:
            section_title = self.tag_to_string(container.find('h2')).strip()
            articles = []
-            
+
            for art in container.findAll('article'):
                a = art.find('a')
                url = a['href']
@ -37,9 +51,18 @@ class Volkskrant(BasicNewsRecipe):
                if '/editie/' not in url:
                    continue
                header = a.find('header')
-                teaser_label = self.tag_to_string(header.find('h4').find('span', attrs={'class': 'teaser__label'})).strip()
+                teaser_label = self.tag_to_string(
-                teaser_sublabel = self.tag_to_string(header.find('h4').find('span', attrs={'class': 'teaser__sublabel'})).strip()
+                    header.find('h4').find('span', attrs={'class': 'teaser__label'})
-                teaser_title = self.tag_to_string(header.find('h3').find('span', attrs={'class': 'teaser__title__value--short'})).strip()
+                ).strip()
                teaser_sublabel = self.tag_to_string(
                    header.find('h4'
                                ).find('span', attrs={'class': 'teaser__sublabel'})
                ).strip()
                teaser_title = self.tag_to_string(
                    header.find('h3').find(
                        'span', attrs={'class': 'teaser__title__value--short'}
                    )
                ).strip()
                if teaser_label.lower() == "podcast":
                    continue
                parts = []
@ -52,12 +75,16 @@ class Volkskrant(BasicNewsRecipe):
                article_title = ' \u2022 '.join(parts)
                pubdate = ''
                description = ''
-                articles.append(dict(title=article_title,
+                articles.append(
-                                    url=url,
+                    dict(
-                                    date=pubdate,
+                        title=article_title,
-                                    description=description,
+                        url=url,
-                                    content=''))
+                        date=pubdate,
-            
+                        description=description,
                        content=''
                    )
                )
            sections.append((section_title, articles))
        return sections
@ -66,4 +93,4 @@ class Volkskrant(BasicNewsRecipe):
            if tag.name == 'img':
                if tag['src'][0] == '/':
                    tag['src'] = 'https://www.volkskrant.nl' + tag['src']
-        return soup
+        return soup