fix: remove network calls from tests (#2055)

* abstracted scraper get_html method

* applied mock to all scrapers

* fixed incorrect var reference
This commit is contained in:
Michael Genson 2023-02-05 12:14:57 -06:00 committed by GitHub
parent 20160346d7
commit 4fc4ba934d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 34 additions and 26 deletions

View File

@ -3,6 +3,8 @@ from mealie.services.scraper.scraped_extras import ScrapedExtras
from .scraper_strategies import ABCScraperStrategy, RecipeScraperOpenGraph, RecipeScraperPackage
DEFAULT_SCRAPER_STRATEGIES: list[type[ABCScraperStrategy]] = [RecipeScraperPackage, RecipeScraperOpenGraph]
class RecipeScraper:
"""
@ -14,10 +16,7 @@ class RecipeScraper:
def __init__(self, scrapers: list[type[ABCScraperStrategy]] | None = None) -> None:
if scrapers is None:
scrapers = [
RecipeScraperPackage,
RecipeScraperOpenGraph,
]
scrapers = DEFAULT_SCRAPER_STRATEGIES
self.scrapers = scrapers

View File

@ -81,6 +81,10 @@ class ABCScraperStrategy(ABC):
self.logger = get_logger()
self.url = url
@abstractmethod
async def get_html(self, url: str) -> str:
...
@abstractmethod
async def parse(self) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
"""Parse a recipe from a web URL.
@ -95,6 +99,9 @@ class ABCScraperStrategy(ABC):
class RecipeScraperPackage(ABCScraperStrategy):
async def get_html(self, url: str) -> str:
return await safe_scrape_html(url)
def clean_scraper(self, scraped_data: SchemaScraperFactory.SchemaScraper, url: str) -> tuple[Recipe, ScrapedExtras]:
def try_get_default(func_call: Callable | None, get_attr: str, default: Any, clean_func=None):
value = default
@ -160,7 +167,8 @@ class RecipeScraperPackage(ABCScraperStrategy):
return recipe, extras
async def scrape_url(self) -> SchemaScraperFactory.SchemaScraper | Any | None:
recipe_html = await safe_scrape_html(self.url)
recipe_html = await self.get_html(self.url)
try:
scraped_schema = scrape_html(recipe_html, org_url=self.url)
except (NoSchemaFoundInWildMode, AttributeError):
@ -204,8 +212,8 @@ class RecipeScraperOpenGraph(ABCScraperStrategy):
Abstract class for all recipe parsers.
"""
async def get_html(self) -> str:
return await safe_scrape_html(self.url)
async def get_html(self, url: str) -> str:
return await safe_scrape_html(url)
def get_recipe_fields(self, html) -> dict | None:
"""
@ -245,7 +253,7 @@ class RecipeScraperOpenGraph(ABCScraperStrategy):
"""
Parse a recipe from a given url.
"""
html = await self.get_html()
html = await self.get_html(self.url)
og_data = self.get_recipe_fields(html)

View File

@ -1,6 +1,5 @@
import json
from pathlib import Path
from typing import Optional, Union
import pytest
from bs4 import BeautifulSoup
@ -12,7 +11,7 @@ from slugify import slugify
from mealie.schema.recipe.recipe import RecipeCategory
from mealie.services.recipe.recipe_data_service import RecipeDataService
from mealie.services.scraper.scraper_strategies import RecipeScraperOpenGraph
from mealie.services.scraper.recipe_scraper import DEFAULT_SCRAPER_STRATEGIES
from tests import data, utils
from tests.utils import api_routes
from tests.utils.factories import random_string
@ -31,9 +30,9 @@ def get_init(html_path: Path):
def init_override(
self,
url,
proxies: Optional[str] = None,
timeout: Optional[Union[float, tuple, None]] = None,
wild_mode: Optional[bool] = False,
proxies: str | None = None,
timeout: float | tuple | None = None,
wild_mode: bool | None = False,
**_,
):
page_data = html_path.read_bytes()
@ -48,7 +47,7 @@ def get_init(html_path: Path):
def open_graph_override(html: str):
def get_html(self) -> str:
async def get_html(self, url: str) -> str:
return html
return get_html
@ -68,11 +67,12 @@ def test_create_by_url(
get_init(recipe_data.html_file),
)
# Override the get_html method of the RecipeScraperOpenGraph to return the test html
monkeypatch.setattr(
RecipeScraperOpenGraph,
"get_html",
open_graph_override(recipe_data.html_file.read_text()),
)
for scraper_cls in DEFAULT_SCRAPER_STRATEGIES:
monkeypatch.setattr(
scraper_cls,
"get_html",
open_graph_override(recipe_data.html_file.read_text()),
)
# Skip image downloader
monkeypatch.setattr(
RecipeDataService,
@ -113,12 +113,13 @@ def test_create_by_url_with_tags(
"__init__",
get_init(html_file),
)
# Override the get_html method of the RecipeScraperOpenGraph to return the test html
monkeypatch.setattr(
RecipeScraperOpenGraph,
"get_html",
open_graph_override(html_file.read_text()),
)
# Override the get_html method of all scraper strategies to return the test html
for scraper_cls in DEFAULT_SCRAPER_STRATEGIES:
monkeypatch.setattr(
scraper_cls,
"get_html",
open_graph_override(html_file.read_text()),
)
# Skip image downloader
monkeypatch.setattr(
RecipeDataService,
@ -198,7 +199,7 @@ def test_read_update(
assert len(recipe["recipeCategory"]) == len(recipe_categories)
test_name = [x.name for x in recipe_categories]
for cats in zip(recipe["recipeCategory"], recipe_categories):
for cats in zip(recipe["recipeCategory"], recipe_categories, strict=False):
assert cats[0]["name"] in test_name