From 19956792a119b706e6b73a4b2743e73cd5b4b42d Mon Sep 17 00:00:00 2001 From: Henrik Holm Date: Sat, 5 Oct 2024 19:57:52 +0200 Subject: [PATCH 1/9] Change language: `se` -> `sv` --- recipes/fokus.recipe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/fokus.recipe b/recipes/fokus.recipe index 505c3c3d45..726104e2e8 100644 --- a/recipes/fokus.recipe +++ b/recipes/fokus.recipe @@ -38,7 +38,7 @@ class Fokus(BasicNewsRecipe): description = "The last 7 days of news and articles from the Swedish current-affairs magazine 'Fokus'" encoding = 'utf-8' __author__ = 'Henrik Holm (https://github.com/h-holm)' - language = 'se' + language = 'sv' ignore_duplicate_articles = {'title', 'url'} masthead_url = 'https://cdn.fokus.se/app/uploads/fokus/2022/05/12214931/fokus-logo.svg' no_stylesheets = True From ee5b24a93798d0f26dd679434bc3aeebe9920853 Mon Sep 17 00:00:00 2001 From: Henrik Holm Date: Sat, 5 Oct 2024 23:34:06 +0200 Subject: [PATCH 2/9] Prefix inner function with "_" --- recipes/fokus.recipe | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/recipes/fokus.recipe b/recipes/fokus.recipe index 726104e2e8..7475ece9ca 100644 --- a/recipes/fokus.recipe +++ b/recipes/fokus.recipe @@ -121,11 +121,12 @@ class Fokus(BasicNewsRecipe): return {'title': title, 'url': url, 'description': desc, 'date': swedish_date_str} def parse_web_section(self, soup, slug): - def log(article): + def _log(article): log_message = f"\t{article['title']} : {article['date']} : {article['url']}" if article.get('description'): log_message += f" : {article['description']}" self.log(log_message) + try: article_blurbs = soup.find_all('article', {'class': 'Blurb'}) except AttributeError: From 6db1fe5af45a88a22217577e9a8f4009ab2c5cf0 Mon Sep 17 00:00:00 2001 From: Henrik Holm Date: Sun, 6 Oct 2024 00:06:05 +0200 Subject: [PATCH 3/9] Dynamically identify all unique web sections --- recipes/fokus.recipe | 81 +++++++++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 34 deletions(-) diff --git a/recipes/fokus.recipe b/recipes/fokus.recipe index 7475ece9ca..bfe187da99 100644 --- a/recipes/fokus.recipe +++ b/recipes/fokus.recipe @@ -2,30 +2,9 @@ # vim:fileencoding=utf-8 from datetime import datetime, timezone -from calibre.web.feeds.news import BasicNewsRecipe +from bs4 import BeautifulSoup -WEB_SECTIONS = [ - ('Inrikes', 'inrikes'), - ('Utrikes', 'utrikes'), - ('Aktuellt', 'aktuellt'), - ('Politik', 'politik'), - ('Ekonomi', 'ekonomi'), - ('Kultur', 'kultur'), - ('Analys', 'analys'), - ('Vetenskap', 'vetenskap'), - ('Krönikor', 'kronika'), - ('Opinion', 'opinion'), - ('Veckans Fokus', 'veckans-fokus'), - ('Synvinkel', 'synvinkel'), - ('Minnesord', 'minnesord'), - ('Debatt', 'debatt'), - ('Andra kammaren', 'andra-kammaren'), - ('Skuggkabinettet', 'skuggkabinettet'), - ('Intervju', 'intervju'), - ('Mötet', 'motet'), - ('Veckans bråk', 'veckans-brak'), - ('Johans Blogg', 'johans-blogg'), -] +from calibre.web.feeds.news import BasicNewsRecipe class NoArticles(Exception): @@ -95,6 +74,41 @@ class Fokus(BasicNewsRecipe): br.submit() return br + def get_web_sections(self, main_url: str) -> list[tuple[str, str]]: + """Return a list of tuples of (1) the URL and (2) the name of each section found on the Fokus website. + + For example, if the Fokus website currently includes an 'Aktuellt' section, create a + `('https://www.fokus.se/aktuellt', 'Aktuellt')` tuple. + + Args: + main_url (str): The entrypoint URL of the Fokus website. + + Yields: + list[tuple[str, str]]: Pairs of (1) the URL and (2) the human-readable name of each Fokus section. + """ + soup = self.index_to_soup(main_url) + + # Identify all unique
  • tags of class 'menu-item-type-taxonomy'. The class subsetting excludes sections that + # are not suited for reading, e.g., the "Podcast" and "Läs E-Tidningen" sections. + unique_urls = set() + urls_and_section_names = list() + for li_tag in soup.find_all('li', class_='menu-item-type-taxonomy'): + # The
  • tag contains (should contain) an anchor that in turn contains the URL and link name. + a_tag = li_tag.find('a') + url = a_tag.get('href').rstrip('/') + link_name = a_tag.text.strip() + + # Skip this
  • tag as we have already extracted its URL and link name from another
  • tag. + if url in unique_urls: + continue + unique_urls.add(url) + + self.log(f"Identified section '{link_name}' at URL '{url}'") + urls_and_section_names.append((url, link_name)) + + self.log(f'Identified a total of {len(urls_and_section_names)} unique sections.') + return urls_and_section_names + def parse_article_blurb(self, article_blurb): desc = '' if a_tag := article_blurb.find('a', href=True): @@ -120,7 +134,7 @@ class Fokus(BasicNewsRecipe): desc += f' ({self.tag_to_string(in_cooperation_with_tag)})' return {'title': title, 'url': url, 'description': desc, 'date': swedish_date_str} - def parse_web_section(self, soup, slug): + def parse_web_section(self, section_soup: BeautifulSoup): def _log(article): log_message = f"\t{article['title']} : {article['date']} : {article['url']}" if article.get('description'): @@ -128,27 +142,26 @@ class Fokus(BasicNewsRecipe): self.log(log_message) try: - article_blurbs = soup.find_all('article', {'class': 'Blurb'}) + article_blurbs = section_soup.find_all('article', {'class': 'Blurb'}) except AttributeError: article_blurbs = [] if not article_blurbs: - raise ValueError(f'Failed to find article blurbs for slug: {slug}') + raise ValueError('Failed to find article blurbs.') for article_blurb in article_blurbs: - if (article := self.parse_article_blurb(article_blurb)): - log(article) + if article := self.parse_article_blurb(article_blurb): + _log(article) yield article def parse_index(self): feeds = [] - for section_title, slug in WEB_SECTIONS: - url = f'{self.main_url}/{slug}' + for section_url, section_title in self.get_web_sections(self.main_url): try: - soup = self.index_to_soup(url) + soup = self.index_to_soup(section_url) except Exception: - self.log.error(f'Failed to download section: {url}') + self.log.error(f"Failed to download section '{section_title}' via URL '{section_url}'") continue - self.log(f'Found section: {section_title}') - articles = list(self.parse_web_section(soup, slug)) + breakpoint() + articles = list(self.parse_web_section(soup)) if articles: feeds.append((section_title, articles)) if not feeds: From 6630793a75e0d83cae0269ad6a6ae81dce9d9647 Mon Sep 17 00:00:00 2001 From: Henrik Holm Date: Sun, 6 Oct 2024 00:06:37 +0200 Subject: [PATCH 4/9] Format using `ruff` rules specified in `pyproject.toml` --- recipes/fokus.recipe | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/recipes/fokus.recipe b/recipes/fokus.recipe index bfe187da99..b484e6aace 100644 --- a/recipes/fokus.recipe +++ b/recipes/fokus.recipe @@ -120,13 +120,11 @@ class Fokus(BasicNewsRecipe): if time_tag := a_tag.find('time', {'class': 'Blurb__date'}): swedish_date_str = self.tag_to_string(time_tag) datetime_str = time_tag['datetime'] - datetime_time = datetime.strptime( - datetime_str, '%Y-%m-%dT%H:%M:%S%z') + datetime_time = datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M:%S%z') now = datetime.now(timezone.utc) delta = now - datetime_time if delta.days > self.max_age: - self.log.debug( - f"\tSkipping article '{title}' as it is too old") + self.log.debug(f"\tSkipping article '{title}' as it is too old") else: if desc_tag := a_tag.find('div', {'class': 'Blurb__summary'}): desc = self.tag_to_string(desc_tag) From 4ebc9ba1102a81df458e6624da92b24422ba99b8 Mon Sep 17 00:00:00 2001 From: Henrik Holm Date: Sun, 6 Oct 2024 00:27:40 +0200 Subject: [PATCH 5/9] Use `[]` instead of `list()` --- recipes/fokus.recipe | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/recipes/fokus.recipe b/recipes/fokus.recipe index b484e6aace..0a59eacc5f 100644 --- a/recipes/fokus.recipe +++ b/recipes/fokus.recipe @@ -91,7 +91,7 @@ class Fokus(BasicNewsRecipe): # Identify all unique
  • tags of class 'menu-item-type-taxonomy'. The class subsetting excludes sections that # are not suited for reading, e.g., the "Podcast" and "Läs E-Tidningen" sections. unique_urls = set() - urls_and_section_names = list() + section_urls_and_names = [] for li_tag in soup.find_all('li', class_='menu-item-type-taxonomy'): # The
  • tag contains (should contain) an anchor that in turn contains the URL and link name. a_tag = li_tag.find('a') @@ -104,10 +104,10 @@ class Fokus(BasicNewsRecipe): unique_urls.add(url) self.log(f"Identified section '{link_name}' at URL '{url}'") - urls_and_section_names.append((url, link_name)) + section_urls_and_names.append((url, link_name)) - self.log(f'Identified a total of {len(urls_and_section_names)} unique sections.') - return urls_and_section_names + self.log(f'Identified a total of {len(section_urls_and_names)} unique sections.') + return section_urls_and_names def parse_article_blurb(self, article_blurb): desc = '' From 896a9d65610a56f1771410d95759e41ac4cc6046 Mon Sep 17 00:00:00 2001 From: Henrik Holm Date: Sun, 6 Oct 2024 00:40:05 +0200 Subject: [PATCH 6/9] Use a dict instead of a list of tuples --- recipes/fokus.recipe | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/recipes/fokus.recipe b/recipes/fokus.recipe index 0a59eacc5f..f453b37e02 100644 --- a/recipes/fokus.recipe +++ b/recipes/fokus.recipe @@ -74,37 +74,37 @@ class Fokus(BasicNewsRecipe): br.submit() return br - def get_web_sections(self, main_url: str) -> list[tuple[str, str]]: - """Return a list of tuples of (1) the URL and (2) the name of each section found on the Fokus website. + def get_web_sections(self, main_url: str) -> dict[str, str]: + """Return a dict of (1) section URL and (2) section name key-value pairs found at `main_url`. - For example, if the Fokus website currently includes an 'Aktuellt' section, create a - `('https://www.fokus.se/aktuellt', 'Aktuellt')` tuple. + For example, if the Fokus website currently includes an 'Aktuellt' section, the dict should include an entry on + the form: `{'https://www.fokus.se/aktuellt': 'Aktuellt'}`. Args: main_url (str): The entrypoint URL of the Fokus website. Yields: - list[tuple[str, str]]: Pairs of (1) the URL and (2) the human-readable name of each Fokus section. + dict[str, str]: (1) URLs and (2) human-readable names of Fokus sections. """ soup = self.index_to_soup(main_url) # Identify all unique
  • tags of class 'menu-item-type-taxonomy'. The class subsetting excludes sections that # are not suited for reading, e.g., the "Podcast" and "Läs E-Tidningen" sections. - unique_urls = set() - section_urls_and_names = [] + section_urls_and_names = {} for li_tag in soup.find_all('li', class_='menu-item-type-taxonomy'): # The
  • tag contains (should contain) an anchor that in turn contains the URL and link name. a_tag = li_tag.find('a') url = a_tag.get('href').rstrip('/') - link_name = a_tag.text.strip() + section_name = a_tag.text.strip() - # Skip this
  • tag as we have already extracted its URL and link name from another
  • tag. - if url in unique_urls: - continue - unique_urls.add(url) + if url in section_urls_and_names: + # If this section URL has already been extracted from another
  • tag, it can be the case that the + # section name differs within this duplicate pair. In this case, use whichever section name is longer. + if len(section_name) >= len(section_urls_and_names[url]): + section_urls_and_names[url] = section_name - self.log(f"Identified section '{link_name}' at URL '{url}'") - section_urls_and_names.append((url, link_name)) + self.log(f"Identified section '{section_name}' at URL '{url}'") + section_urls_and_names[url] = section_name self.log(f'Identified a total of {len(section_urls_and_names)} unique sections.') return section_urls_and_names @@ -152,7 +152,7 @@ class Fokus(BasicNewsRecipe): def parse_index(self): feeds = [] - for section_url, section_title in self.get_web_sections(self.main_url): + for section_url, section_title in self.get_web_sections(self.main_url).items(): try: soup = self.index_to_soup(section_url) except Exception: From fb191f6e7a182f2a971f03323ea138ab9f768046 Mon Sep 17 00:00:00 2001 From: Henrik Holm Date: Sun, 6 Oct 2024 03:22:57 +0200 Subject: [PATCH 7/9] Refactor to skip empty sections and avoid duplicate articles --- recipes/fokus.recipe | 176 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 145 insertions(+), 31 deletions(-) diff --git a/recipes/fokus.recipe b/recipes/fokus.recipe index f453b37e02..2074c69994 100644 --- a/recipes/fokus.recipe +++ b/recipes/fokus.recipe @@ -3,6 +3,7 @@ from datetime import datetime, timezone from bs4 import BeautifulSoup +from bs4.element import Tag from calibre.web.feeds.news import BasicNewsRecipe @@ -86,6 +87,7 @@ class Fokus(BasicNewsRecipe): Yields: dict[str, str]: (1) URLs and (2) human-readable names of Fokus sections. """ + self.log(f"Identifying all sections under '{main_url}'...") soup = self.index_to_soup(main_url) # Identify all unique
  • tags of class 'menu-item-type-taxonomy'. The class subsetting excludes sections that @@ -103,69 +105,181 @@ class Fokus(BasicNewsRecipe): if len(section_name) >= len(section_urls_and_names[url]): section_urls_and_names[url] = section_name - self.log(f"Identified section '{section_name}' at URL '{url}'") + self.log(f"Identified section '{section_name}' at URL '{url}'.") section_urls_and_names[url] = section_name self.log(f'Identified a total of {len(section_urls_and_names)} unique sections.') return section_urls_and_names - def parse_article_blurb(self, article_blurb): - desc = '' + def parse_article_blurb(self, article_blurb: Tag) -> dict[str, str, str, str] | None: + """Given a
    tag of class 'Blurb', parse it into a dict. + + Args: + article_blurb (Tag): An
    tag hosting metadata and the URL of an article. + + Returns: + dict[str, str, str, str]: A dict on a `{'url': str, 'title': str, 'description': str, 'date': str}` format. + """ if a_tag := article_blurb.find('a', href=True): - url = a_tag['href'] + url = a_tag['href'].strip().rstrip('/') if url.startswith('/'): url = f'{self.main_url}{url}' + if title_tag := a_tag.find('h2', {'class': 'Blurb__title'}): - title = self.tag_to_string(title_tag) + title = self.tag_to_string(title_tag).strip() if time_tag := a_tag.find('time', {'class': 'Blurb__date'}): - swedish_date_str = self.tag_to_string(time_tag) + swedish_date_str = self.tag_to_string(time_tag).rstrip() + + # Skip articles older than `self.max_age`. datetime_str = time_tag['datetime'] datetime_time = datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M:%S%z') now = datetime.now(timezone.utc) delta = now - datetime_time if delta.days > self.max_age: - self.log.debug(f"\tSkipping article '{title}' as it is too old") - else: - if desc_tag := a_tag.find('div', {'class': 'Blurb__summary'}): - desc = self.tag_to_string(desc_tag) - if in_cooperation_with_tag := a_tag.find('p', {'class': 'Blurb__meta'}): - desc += f' ({self.tag_to_string(in_cooperation_with_tag)})' - return {'title': title, 'url': url, 'description': desc, 'date': swedish_date_str} + self.log.debug(f"\tSkipping article as it is too old: '{title}'") + return - def parse_web_section(self, section_soup: BeautifulSoup): - def _log(article): + desc = '' + if desc_tag := a_tag.find('div', {'class': 'Blurb__summary'}): + desc = self.tag_to_string(desc_tag).strip() + if in_cooperation_with_tag := a_tag.find('p', {'class': 'Blurb__meta'}): + desc += f' ({self.tag_to_string(in_cooperation_with_tag).strip()})' + + return {'url': url, 'title': title, 'description': desc, 'date': swedish_date_str} + return + + def _get_article_blurbs(self, soup: BeautifulSoup) -> dict[str, dict[str, str, str, str]]: + """Given a Fokus webpage `soup`, return a dict of unique article entries found on the page. + + The key of a given entry in the output dictionary is the article URL. The corresponding value is a dictionary + on a `{'url': str, 'title': str, 'description': str, 'date': str}` format. + + Args: + soup (BeautifulSoup): The `bs4.BeautifulSoup` soup of a Fokus webpage. + + Returns: + dict[str, dict[str, str, str, str]]: A dict with article URLs as keys and 'article dicts' as values. + """ + + def _log(article) -> None: + """Log a digestible summary of the input `article` blurb.""" log_message = f"\t{article['title']} : {article['date']} : {article['url']}" if article.get('description'): log_message += f" : {article['description']}" - self.log(log_message) + self.log.debug(log_message) try: - article_blurbs = section_soup.find_all('article', {'class': 'Blurb'}) + article_blurbs = soup.find_all('article', {'class': 'Blurb'}) except AttributeError: article_blurbs = [] + if not article_blurbs: - raise ValueError('Failed to find article blurbs.') + raise ValueError('Failed to identify any article blurbs.') + + parsed_blurbs = {} for article_blurb in article_blurbs: if article := self.parse_article_blurb(article_blurb): _log(article) - yield article + # If an entry with the same URL already exists, keep whichever entry has the longer description. + if article['url'] in article_blurbs: + if len(article['description']) <= len(parsed_blurbs[article['url']]['description']): + continue + parsed_blurbs[article['url']] = article - def parse_index(self): - feeds = [] - for section_url, section_title in self.get_web_sections(self.main_url).items(): + return parsed_blurbs + + def get_article_blurbs(self, sections: dict[str, str]) -> dict[str, dict[str, str, str, str]]: + """Create and return a dict of all unique article blurbs found in all `sections`. + + The key of a given entry in the output dictionary is the article URL. The corresponding value is a dictionary + on a `{'url': str, 'title': str, 'description': str, 'date': str}` format. + + Args: + sections (dict[str, str]): A dict on a `{section_url: section_name}` format. + + Returns: + dict[str, dict[str, str, str, str]]: A dict with article URLs as keys and 'article dicts' as values. + """ + self.log(f'Identifying all articles under all {len(sections)} sections...') + + article_blurbs = {} + for section_url, section_title in sections.items(): try: - soup = self.index_to_soup(section_url) + section_soup = self.index_to_soup(section_url) except Exception: self.log.error(f"Failed to download section '{section_title}' via URL '{section_url}'") continue - breakpoint() - articles = list(self.parse_web_section(soup)) - if articles: - feeds.append((section_title, articles)) - if not feeds: + self.log(f"Identifying all articles under '{section_url}'...") + for article_url, article_blurb in self._get_article_blurbs(section_soup).items(): + # If the article URL has already been encountered, keep only the article blurb with the longer + # description string. + if article_url not in article_blurbs: + article_blurbs[article_url] = article_blurb + elif len(article_blurb['description']) > len(article_blurbs[article_url]['description']): + article_blurbs[article_url] = article_blurb + + self.log(f'A total of {len(article_blurbs)} articles were identified in the {len(sections)} sections.') + return article_blurbs + + def assign_articles_to_sections( + self, + sections: dict[str, str], + articles: dict[str, dict[str, str, str, str]], + ) -> dict[str, list[dict[str, str, str, str]]]: + """Assign each article in `articles` to a section in `sections`. + + Args: + sections (dict[str, str]): A dict of section URLs as keys and section titles as values. + articles (dict[str, dict[str, str, str, str]]): A dict of article URLs as keys and article dicts as values. + + Returns: + dict[str, list[dict[str, str, str, str]]]: A dict on a `{section_title: list[article_dict]}` format. + """ + self.log(f'Assigning each of the {len(articles)} articles to either of the {len(sections)} sections...') + section_to_articles = {} + for article_url, article_dict in articles.items(): + last_url = article_url + while article_url not in sections and len(article_url) > len(self.main_url): + article_url = article_url.rsplit('/', 1)[0] + + # Prevent an infinite loop. + if article_url == last_url: + break + last_url = article_url + + # If no section corresponding to the URL exists, default to the 'Home Page' section. + section_title = sections[article_url] if article_url in sections else sections[self.main_url] + if section_title not in section_to_articles: + section_to_articles[section_title] = [] + section_to_articles[section_title].append(article_dict) + + # Log how many sections contained no articles younger than `self.max_age`. + if diff := len(sections) - len(section_to_articles): + self.log(f'{diff} sections contained no articles younger than {self.max_age} days.') + + return section_to_articles + + def parse_index(self): + # Identify all sections in the web version of Fokus. + sections = self.get_web_sections(self.main_url) + + # Add an entry for the start page. + sections[self.main_url] = 'Home Page' + + # From the section URLs and the main URL, identify all unique articles. + articles = self.get_article_blurbs(sections) + if not articles: raise NoArticles( - 'Could not find any articles. Either the fokus.se server is having issues and ' - 'you should try later or the website format has changed and the recipe needs ' - 'to be updated.' + f"Could not find any articles. Either the '{self.main_url}' server is experiencing issues, in which " + 'case you should try again later, or the website format has changed and the recipe needs updating.' ) + + # Assign each identified article to a section based on its URL. + section_to_articles = self.assign_articles_to_sections(sections, articles) + + # Convert to the expected `list[tuple[str, dict[str, str, str, str]]]` format. + feeds = [(section_url, article_dicts) for section_url, article_dicts in section_to_articles.items()] + num_articles = sum(len(article_dicts) for article_dicts in section_to_articles.values()) + self.log(f'A total of {num_articles} articles belonging to {len(section_to_articles)} sections were kept.') + return feeds From 2d36740a904ac71201f12432fef7c0923bc82604 Mon Sep 17 00:00:00 2001 From: Henrik Holm Date: Sun, 6 Oct 2024 03:50:08 +0200 Subject: [PATCH 8/9] Reduce image width: 100% -> 75% --- recipes/fokus.recipe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/fokus.recipe b/recipes/fokus.recipe index 2074c69994..2cd978ae52 100644 --- a/recipes/fokus.recipe +++ b/recipes/fokus.recipe @@ -26,7 +26,7 @@ class Fokus(BasicNewsRecipe): needs_subscription = 'optional' max_age = 7 # days remove_empty_feeds = True - extra_css = 'img { display: block; width: 100%; height: auto }' + extra_css = 'img { display: block; width: 75%; height: auto }' remove_tags = [ dict(name='div', attrs={'class': 'External-ad'}), From 4bceaea751d142cb751ee2078229f38fea1772a6 Mon Sep 17 00:00:00 2001 From: Henrik Holm Date: Sun, 6 Oct 2024 03:50:36 +0200 Subject: [PATCH 9/9] Update divs used to identify article content --- recipes/fokus.recipe | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/recipes/fokus.recipe b/recipes/fokus.recipe index 2cd978ae52..ed306dff0a 100644 --- a/recipes/fokus.recipe +++ b/recipes/fokus.recipe @@ -62,7 +62,8 @@ class Fokus(BasicNewsRecipe): # dict(name='p', class_='Meta__author'), # Author. # dict(name='time', class_='Meta__updated'), # Last updated. # Main article. - dict(name='div', class_='mediaconnect-protected-content'), + dict(name='div', class_='sesamy-protected-content'), + dict(name='div', class_='wp-block-core-paragraph'), ] def get_browser(self):