fix: remove network calls from tests (#2055)

* abstracted scraper get_html method

* applied mock to all scrapers

* fixed incorrect var reference
This commit is contained in:
Michael Genson 2023-02-05 12:14:57 -06:00 committed by GitHub
parent 20160346d7
commit 4fc4ba934d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 34 additions and 26 deletions

View File

@ -3,6 +3,8 @@ from mealie.services.scraper.scraped_extras import ScrapedExtras
from .scraper_strategies import ABCScraperStrategy, RecipeScraperOpenGraph, RecipeScraperPackage from .scraper_strategies import ABCScraperStrategy, RecipeScraperOpenGraph, RecipeScraperPackage
DEFAULT_SCRAPER_STRATEGIES: list[type[ABCScraperStrategy]] = [RecipeScraperPackage, RecipeScraperOpenGraph]
class RecipeScraper: class RecipeScraper:
""" """
@ -14,10 +16,7 @@ class RecipeScraper:
def __init__(self, scrapers: list[type[ABCScraperStrategy]] | None = None) -> None: def __init__(self, scrapers: list[type[ABCScraperStrategy]] | None = None) -> None:
if scrapers is None: if scrapers is None:
scrapers = [ scrapers = DEFAULT_SCRAPER_STRATEGIES
RecipeScraperPackage,
RecipeScraperOpenGraph,
]
self.scrapers = scrapers self.scrapers = scrapers

View File

@ -81,6 +81,10 @@ class ABCScraperStrategy(ABC):
self.logger = get_logger() self.logger = get_logger()
self.url = url self.url = url
@abstractmethod
async def get_html(self, url: str) -> str:
...
@abstractmethod @abstractmethod
async def parse(self) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]: async def parse(self) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
"""Parse a recipe from a web URL. """Parse a recipe from a web URL.
@ -95,6 +99,9 @@ class ABCScraperStrategy(ABC):
class RecipeScraperPackage(ABCScraperStrategy): class RecipeScraperPackage(ABCScraperStrategy):
async def get_html(self, url: str) -> str:
return await safe_scrape_html(url)
def clean_scraper(self, scraped_data: SchemaScraperFactory.SchemaScraper, url: str) -> tuple[Recipe, ScrapedExtras]: def clean_scraper(self, scraped_data: SchemaScraperFactory.SchemaScraper, url: str) -> tuple[Recipe, ScrapedExtras]:
def try_get_default(func_call: Callable | None, get_attr: str, default: Any, clean_func=None): def try_get_default(func_call: Callable | None, get_attr: str, default: Any, clean_func=None):
value = default value = default
@ -160,7 +167,8 @@ class RecipeScraperPackage(ABCScraperStrategy):
return recipe, extras return recipe, extras
async def scrape_url(self) -> SchemaScraperFactory.SchemaScraper | Any | None: async def scrape_url(self) -> SchemaScraperFactory.SchemaScraper | Any | None:
recipe_html = await safe_scrape_html(self.url) recipe_html = await self.get_html(self.url)
try: try:
scraped_schema = scrape_html(recipe_html, org_url=self.url) scraped_schema = scrape_html(recipe_html, org_url=self.url)
except (NoSchemaFoundInWildMode, AttributeError): except (NoSchemaFoundInWildMode, AttributeError):
@ -204,8 +212,8 @@ class RecipeScraperOpenGraph(ABCScraperStrategy):
Abstract class for all recipe parsers. Abstract class for all recipe parsers.
""" """
async def get_html(self) -> str: async def get_html(self, url: str) -> str:
return await safe_scrape_html(self.url) return await safe_scrape_html(url)
def get_recipe_fields(self, html) -> dict | None: def get_recipe_fields(self, html) -> dict | None:
""" """
@ -245,7 +253,7 @@ class RecipeScraperOpenGraph(ABCScraperStrategy):
""" """
Parse a recipe from a given url. Parse a recipe from a given url.
""" """
html = await self.get_html() html = await self.get_html(self.url)
og_data = self.get_recipe_fields(html) og_data = self.get_recipe_fields(html)

View File

@ -1,6 +1,5 @@
import json import json
from pathlib import Path from pathlib import Path
from typing import Optional, Union
import pytest import pytest
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
@ -12,7 +11,7 @@ from slugify import slugify
from mealie.schema.recipe.recipe import RecipeCategory from mealie.schema.recipe.recipe import RecipeCategory
from mealie.services.recipe.recipe_data_service import RecipeDataService from mealie.services.recipe.recipe_data_service import RecipeDataService
from mealie.services.scraper.scraper_strategies import RecipeScraperOpenGraph from mealie.services.scraper.recipe_scraper import DEFAULT_SCRAPER_STRATEGIES
from tests import data, utils from tests import data, utils
from tests.utils import api_routes from tests.utils import api_routes
from tests.utils.factories import random_string from tests.utils.factories import random_string
@ -31,9 +30,9 @@ def get_init(html_path: Path):
def init_override( def init_override(
self, self,
url, url,
proxies: Optional[str] = None, proxies: str | None = None,
timeout: Optional[Union[float, tuple, None]] = None, timeout: float | tuple | None = None,
wild_mode: Optional[bool] = False, wild_mode: bool | None = False,
**_, **_,
): ):
page_data = html_path.read_bytes() page_data = html_path.read_bytes()
@ -48,7 +47,7 @@ def get_init(html_path: Path):
def open_graph_override(html: str): def open_graph_override(html: str):
def get_html(self) -> str: async def get_html(self, url: str) -> str:
return html return html
return get_html return get_html
@ -68,11 +67,12 @@ def test_create_by_url(
get_init(recipe_data.html_file), get_init(recipe_data.html_file),
) )
# Override the get_html method of the RecipeScraperOpenGraph to return the test html # Override the get_html method of the RecipeScraperOpenGraph to return the test html
monkeypatch.setattr( for scraper_cls in DEFAULT_SCRAPER_STRATEGIES:
RecipeScraperOpenGraph, monkeypatch.setattr(
"get_html", scraper_cls,
open_graph_override(recipe_data.html_file.read_text()), "get_html",
) open_graph_override(recipe_data.html_file.read_text()),
)
# Skip image downloader # Skip image downloader
monkeypatch.setattr( monkeypatch.setattr(
RecipeDataService, RecipeDataService,
@ -113,12 +113,13 @@ def test_create_by_url_with_tags(
"__init__", "__init__",
get_init(html_file), get_init(html_file),
) )
# Override the get_html method of the RecipeScraperOpenGraph to return the test html # Override the get_html method of all scraper strategies to return the test html
monkeypatch.setattr( for scraper_cls in DEFAULT_SCRAPER_STRATEGIES:
RecipeScraperOpenGraph, monkeypatch.setattr(
"get_html", scraper_cls,
open_graph_override(html_file.read_text()), "get_html",
) open_graph_override(html_file.read_text()),
)
# Skip image downloader # Skip image downloader
monkeypatch.setattr( monkeypatch.setattr(
RecipeDataService, RecipeDataService,
@ -198,7 +199,7 @@ def test_read_update(
assert len(recipe["recipeCategory"]) == len(recipe_categories) assert len(recipe["recipeCategory"]) == len(recipe_categories)
test_name = [x.name for x in recipe_categories] test_name = [x.name for x in recipe_categories]
for cats in zip(recipe["recipeCategory"], recipe_categories): for cats in zip(recipe["recipeCategory"], recipe_categories, strict=False):
assert cats[0]["name"] in test_name assert cats[0]["name"] in test_name