mirror of
https://github.com/mealie-recipes/mealie.git
synced 2025-06-03 13:44:55 -04:00
fix: remove network calls from tests (#2055)
* abstracted scraper get_html method * applied mock to all scrapers * fixed incorrect var reference
This commit is contained in:
parent
20160346d7
commit
4fc4ba934d
@ -3,6 +3,8 @@ from mealie.services.scraper.scraped_extras import ScrapedExtras
|
|||||||
|
|
||||||
from .scraper_strategies import ABCScraperStrategy, RecipeScraperOpenGraph, RecipeScraperPackage
|
from .scraper_strategies import ABCScraperStrategy, RecipeScraperOpenGraph, RecipeScraperPackage
|
||||||
|
|
||||||
|
DEFAULT_SCRAPER_STRATEGIES: list[type[ABCScraperStrategy]] = [RecipeScraperPackage, RecipeScraperOpenGraph]
|
||||||
|
|
||||||
|
|
||||||
class RecipeScraper:
|
class RecipeScraper:
|
||||||
"""
|
"""
|
||||||
@ -14,10 +16,7 @@ class RecipeScraper:
|
|||||||
|
|
||||||
def __init__(self, scrapers: list[type[ABCScraperStrategy]] | None = None) -> None:
|
def __init__(self, scrapers: list[type[ABCScraperStrategy]] | None = None) -> None:
|
||||||
if scrapers is None:
|
if scrapers is None:
|
||||||
scrapers = [
|
scrapers = DEFAULT_SCRAPER_STRATEGIES
|
||||||
RecipeScraperPackage,
|
|
||||||
RecipeScraperOpenGraph,
|
|
||||||
]
|
|
||||||
|
|
||||||
self.scrapers = scrapers
|
self.scrapers = scrapers
|
||||||
|
|
||||||
|
@ -81,6 +81,10 @@ class ABCScraperStrategy(ABC):
|
|||||||
self.logger = get_logger()
|
self.logger = get_logger()
|
||||||
self.url = url
|
self.url = url
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def get_html(self, url: str) -> str:
|
||||||
|
...
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
async def parse(self) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
|
async def parse(self) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]:
|
||||||
"""Parse a recipe from a web URL.
|
"""Parse a recipe from a web URL.
|
||||||
@ -95,6 +99,9 @@ class ABCScraperStrategy(ABC):
|
|||||||
|
|
||||||
|
|
||||||
class RecipeScraperPackage(ABCScraperStrategy):
|
class RecipeScraperPackage(ABCScraperStrategy):
|
||||||
|
async def get_html(self, url: str) -> str:
|
||||||
|
return await safe_scrape_html(url)
|
||||||
|
|
||||||
def clean_scraper(self, scraped_data: SchemaScraperFactory.SchemaScraper, url: str) -> tuple[Recipe, ScrapedExtras]:
|
def clean_scraper(self, scraped_data: SchemaScraperFactory.SchemaScraper, url: str) -> tuple[Recipe, ScrapedExtras]:
|
||||||
def try_get_default(func_call: Callable | None, get_attr: str, default: Any, clean_func=None):
|
def try_get_default(func_call: Callable | None, get_attr: str, default: Any, clean_func=None):
|
||||||
value = default
|
value = default
|
||||||
@ -160,7 +167,8 @@ class RecipeScraperPackage(ABCScraperStrategy):
|
|||||||
return recipe, extras
|
return recipe, extras
|
||||||
|
|
||||||
async def scrape_url(self) -> SchemaScraperFactory.SchemaScraper | Any | None:
|
async def scrape_url(self) -> SchemaScraperFactory.SchemaScraper | Any | None:
|
||||||
recipe_html = await safe_scrape_html(self.url)
|
recipe_html = await self.get_html(self.url)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
scraped_schema = scrape_html(recipe_html, org_url=self.url)
|
scraped_schema = scrape_html(recipe_html, org_url=self.url)
|
||||||
except (NoSchemaFoundInWildMode, AttributeError):
|
except (NoSchemaFoundInWildMode, AttributeError):
|
||||||
@ -204,8 +212,8 @@ class RecipeScraperOpenGraph(ABCScraperStrategy):
|
|||||||
Abstract class for all recipe parsers.
|
Abstract class for all recipe parsers.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
async def get_html(self) -> str:
|
async def get_html(self, url: str) -> str:
|
||||||
return await safe_scrape_html(self.url)
|
return await safe_scrape_html(url)
|
||||||
|
|
||||||
def get_recipe_fields(self, html) -> dict | None:
|
def get_recipe_fields(self, html) -> dict | None:
|
||||||
"""
|
"""
|
||||||
@ -245,7 +253,7 @@ class RecipeScraperOpenGraph(ABCScraperStrategy):
|
|||||||
"""
|
"""
|
||||||
Parse a recipe from a given url.
|
Parse a recipe from a given url.
|
||||||
"""
|
"""
|
||||||
html = await self.get_html()
|
html = await self.get_html(self.url)
|
||||||
|
|
||||||
og_data = self.get_recipe_fields(html)
|
og_data = self.get_recipe_fields(html)
|
||||||
|
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
import json
|
import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional, Union
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
@ -12,7 +11,7 @@ from slugify import slugify
|
|||||||
|
|
||||||
from mealie.schema.recipe.recipe import RecipeCategory
|
from mealie.schema.recipe.recipe import RecipeCategory
|
||||||
from mealie.services.recipe.recipe_data_service import RecipeDataService
|
from mealie.services.recipe.recipe_data_service import RecipeDataService
|
||||||
from mealie.services.scraper.scraper_strategies import RecipeScraperOpenGraph
|
from mealie.services.scraper.recipe_scraper import DEFAULT_SCRAPER_STRATEGIES
|
||||||
from tests import data, utils
|
from tests import data, utils
|
||||||
from tests.utils import api_routes
|
from tests.utils import api_routes
|
||||||
from tests.utils.factories import random_string
|
from tests.utils.factories import random_string
|
||||||
@ -31,9 +30,9 @@ def get_init(html_path: Path):
|
|||||||
def init_override(
|
def init_override(
|
||||||
self,
|
self,
|
||||||
url,
|
url,
|
||||||
proxies: Optional[str] = None,
|
proxies: str | None = None,
|
||||||
timeout: Optional[Union[float, tuple, None]] = None,
|
timeout: float | tuple | None = None,
|
||||||
wild_mode: Optional[bool] = False,
|
wild_mode: bool | None = False,
|
||||||
**_,
|
**_,
|
||||||
):
|
):
|
||||||
page_data = html_path.read_bytes()
|
page_data = html_path.read_bytes()
|
||||||
@ -48,7 +47,7 @@ def get_init(html_path: Path):
|
|||||||
|
|
||||||
|
|
||||||
def open_graph_override(html: str):
|
def open_graph_override(html: str):
|
||||||
def get_html(self) -> str:
|
async def get_html(self, url: str) -> str:
|
||||||
return html
|
return html
|
||||||
|
|
||||||
return get_html
|
return get_html
|
||||||
@ -68,11 +67,12 @@ def test_create_by_url(
|
|||||||
get_init(recipe_data.html_file),
|
get_init(recipe_data.html_file),
|
||||||
)
|
)
|
||||||
# Override the get_html method of the RecipeScraperOpenGraph to return the test html
|
# Override the get_html method of the RecipeScraperOpenGraph to return the test html
|
||||||
monkeypatch.setattr(
|
for scraper_cls in DEFAULT_SCRAPER_STRATEGIES:
|
||||||
RecipeScraperOpenGraph,
|
monkeypatch.setattr(
|
||||||
"get_html",
|
scraper_cls,
|
||||||
open_graph_override(recipe_data.html_file.read_text()),
|
"get_html",
|
||||||
)
|
open_graph_override(recipe_data.html_file.read_text()),
|
||||||
|
)
|
||||||
# Skip image downloader
|
# Skip image downloader
|
||||||
monkeypatch.setattr(
|
monkeypatch.setattr(
|
||||||
RecipeDataService,
|
RecipeDataService,
|
||||||
@ -113,12 +113,13 @@ def test_create_by_url_with_tags(
|
|||||||
"__init__",
|
"__init__",
|
||||||
get_init(html_file),
|
get_init(html_file),
|
||||||
)
|
)
|
||||||
# Override the get_html method of the RecipeScraperOpenGraph to return the test html
|
# Override the get_html method of all scraper strategies to return the test html
|
||||||
monkeypatch.setattr(
|
for scraper_cls in DEFAULT_SCRAPER_STRATEGIES:
|
||||||
RecipeScraperOpenGraph,
|
monkeypatch.setattr(
|
||||||
"get_html",
|
scraper_cls,
|
||||||
open_graph_override(html_file.read_text()),
|
"get_html",
|
||||||
)
|
open_graph_override(html_file.read_text()),
|
||||||
|
)
|
||||||
# Skip image downloader
|
# Skip image downloader
|
||||||
monkeypatch.setattr(
|
monkeypatch.setattr(
|
||||||
RecipeDataService,
|
RecipeDataService,
|
||||||
@ -198,7 +199,7 @@ def test_read_update(
|
|||||||
assert len(recipe["recipeCategory"]) == len(recipe_categories)
|
assert len(recipe["recipeCategory"]) == len(recipe_categories)
|
||||||
|
|
||||||
test_name = [x.name for x in recipe_categories]
|
test_name = [x.name for x in recipe_categories]
|
||||||
for cats in zip(recipe["recipeCategory"], recipe_categories):
|
for cats in zip(recipe["recipeCategory"], recipe_categories, strict=False):
|
||||||
assert cats[0]["name"] in test_name
|
assert cats[0]["name"] in test_name
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user