mirror of
https://github.com/mealie-recipes/mealie.git
synced 2025-05-24 01:12:54 -04:00
fix: remove network calls from tests (#2055)
* abstracted scraper get_html method * applied mock to all scrapers * fixed incorrect var reference
This commit is contained in:
parent
20160346d7
commit
4fc4ba934d
@ -3,6 +3,8 @@ from mealie.services.scraper.scraped_extras import ScrapedExtras
|
||||
|
||||
from .scraper_strategies import ABCScraperStrategy, RecipeScraperOpenGraph, RecipeScraperPackage
|
||||
|
||||
DEFAULT_SCRAPER_STRATEGIES: list[type[ABCScraperStrategy]] = [RecipeScraperPackage, RecipeScraperOpenGraph]
|
||||
|
||||
|
||||
class RecipeScraper:
|
||||
"""
|
||||
@ -14,10 +16,7 @@ class RecipeScraper:
|
||||
|
||||
def __init__(self, scrapers: list[type[ABCScraperStrategy]] | None = None) -> None:
|
||||
if scrapers is None:
|
||||
scrapers = [
|
||||
RecipeScraperPackage,
|
||||
RecipeScraperOpenGraph,
|
||||
]
|
||||
scrapers = DEFAULT_SCRAPER_STRATEGIES
|
||||
|
||||
self.scrapers = scrapers
|
||||
|
||||
|
@ -81,6 +81,10 @@ class ABCScraperStrategy(ABC):
|
||||
self.logger = get_logger()
|
||||
self.url = url
|
||||
|
||||
@abstractmethod
|
||||
async def get_html(self, url: str) -> str:
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
async def parse(self) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
|
||||
"""Parse a recipe from a web URL.
|
||||
@ -95,6 +99,9 @@ class ABCScraperStrategy(ABC):
|
||||
|
||||
|
||||
class RecipeScraperPackage(ABCScraperStrategy):
|
||||
async def get_html(self, url: str) -> str:
|
||||
return await safe_scrape_html(url)
|
||||
|
||||
def clean_scraper(self, scraped_data: SchemaScraperFactory.SchemaScraper, url: str) -> tuple[Recipe, ScrapedExtras]:
|
||||
def try_get_default(func_call: Callable | None, get_attr: str, default: Any, clean_func=None):
|
||||
value = default
|
||||
@ -160,7 +167,8 @@ class RecipeScraperPackage(ABCScraperStrategy):
|
||||
return recipe, extras
|
||||
|
||||
async def scrape_url(self) -> SchemaScraperFactory.SchemaScraper | Any | None:
|
||||
recipe_html = await safe_scrape_html(self.url)
|
||||
recipe_html = await self.get_html(self.url)
|
||||
|
||||
try:
|
||||
scraped_schema = scrape_html(recipe_html, org_url=self.url)
|
||||
except (NoSchemaFoundInWildMode, AttributeError):
|
||||
@ -204,8 +212,8 @@ class RecipeScraperOpenGraph(ABCScraperStrategy):
|
||||
Abstract class for all recipe parsers.
|
||||
"""
|
||||
|
||||
async def get_html(self) -> str:
|
||||
return await safe_scrape_html(self.url)
|
||||
async def get_html(self, url: str) -> str:
|
||||
return await safe_scrape_html(url)
|
||||
|
||||
def get_recipe_fields(self, html) -> dict | None:
|
||||
"""
|
||||
@ -245,7 +253,7 @@ class RecipeScraperOpenGraph(ABCScraperStrategy):
|
||||
"""
|
||||
Parse a recipe from a given url.
|
||||
"""
|
||||
html = await self.get_html()
|
||||
html = await self.get_html(self.url)
|
||||
|
||||
og_data = self.get_recipe_fields(html)
|
||||
|
||||
|
@ -1,6 +1,5 @@
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Optional, Union
|
||||
|
||||
import pytest
|
||||
from bs4 import BeautifulSoup
|
||||
@ -12,7 +11,7 @@ from slugify import slugify
|
||||
|
||||
from mealie.schema.recipe.recipe import RecipeCategory
|
||||
from mealie.services.recipe.recipe_data_service import RecipeDataService
|
||||
from mealie.services.scraper.scraper_strategies import RecipeScraperOpenGraph
|
||||
from mealie.services.scraper.recipe_scraper import DEFAULT_SCRAPER_STRATEGIES
|
||||
from tests import data, utils
|
||||
from tests.utils import api_routes
|
||||
from tests.utils.factories import random_string
|
||||
@ -31,9 +30,9 @@ def get_init(html_path: Path):
|
||||
def init_override(
|
||||
self,
|
||||
url,
|
||||
proxies: Optional[str] = None,
|
||||
timeout: Optional[Union[float, tuple, None]] = None,
|
||||
wild_mode: Optional[bool] = False,
|
||||
proxies: str | None = None,
|
||||
timeout: float | tuple | None = None,
|
||||
wild_mode: bool | None = False,
|
||||
**_,
|
||||
):
|
||||
page_data = html_path.read_bytes()
|
||||
@ -48,7 +47,7 @@ def get_init(html_path: Path):
|
||||
|
||||
|
||||
def open_graph_override(html: str):
|
||||
def get_html(self) -> str:
|
||||
async def get_html(self, url: str) -> str:
|
||||
return html
|
||||
|
||||
return get_html
|
||||
@ -68,11 +67,12 @@ def test_create_by_url(
|
||||
get_init(recipe_data.html_file),
|
||||
)
|
||||
# Override the get_html method of the RecipeScraperOpenGraph to return the test html
|
||||
monkeypatch.setattr(
|
||||
RecipeScraperOpenGraph,
|
||||
"get_html",
|
||||
open_graph_override(recipe_data.html_file.read_text()),
|
||||
)
|
||||
for scraper_cls in DEFAULT_SCRAPER_STRATEGIES:
|
||||
monkeypatch.setattr(
|
||||
scraper_cls,
|
||||
"get_html",
|
||||
open_graph_override(recipe_data.html_file.read_text()),
|
||||
)
|
||||
# Skip image downloader
|
||||
monkeypatch.setattr(
|
||||
RecipeDataService,
|
||||
@ -113,12 +113,13 @@ def test_create_by_url_with_tags(
|
||||
"__init__",
|
||||
get_init(html_file),
|
||||
)
|
||||
# Override the get_html method of the RecipeScraperOpenGraph to return the test html
|
||||
monkeypatch.setattr(
|
||||
RecipeScraperOpenGraph,
|
||||
"get_html",
|
||||
open_graph_override(html_file.read_text()),
|
||||
)
|
||||
# Override the get_html method of all scraper strategies to return the test html
|
||||
for scraper_cls in DEFAULT_SCRAPER_STRATEGIES:
|
||||
monkeypatch.setattr(
|
||||
scraper_cls,
|
||||
"get_html",
|
||||
open_graph_override(html_file.read_text()),
|
||||
)
|
||||
# Skip image downloader
|
||||
monkeypatch.setattr(
|
||||
RecipeDataService,
|
||||
@ -198,7 +199,7 @@ def test_read_update(
|
||||
assert len(recipe["recipeCategory"]) == len(recipe_categories)
|
||||
|
||||
test_name = [x.name for x in recipe_categories]
|
||||
for cats in zip(recipe["recipeCategory"], recipe_categories):
|
||||
for cats in zip(recipe["recipeCategory"], recipe_categories, strict=False):
|
||||
assert cats[0]["name"] in test_name
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user