Fix bug in instruction normalization

This commit is contained in:
Richard Mitic 2021-01-06 18:37:35 +01:00
parent de17085e04
commit 8ab1bdeb4a
2 changed files with 29 additions and 44 deletions

View File

@ -28,7 +28,7 @@ def normalize_image_url(image) -> str:
def normalize_instructions(instructions) -> List[dict]: def normalize_instructions(instructions) -> List[dict]:
# One long string split by (possibly multiple) new lines # One long string split by (possibly multiple) new lines
if type(instructions) == str: if type(instructions) == str:
return [{"text": line.strip()} for line in filter(None, instructions.split("\n"))] return [{"text": line.strip()} for line in filter(None, instructions.splitlines())]
# Plain strings in a list # Plain strings in a list
elif type(instructions) == list and type(instructions[0]) == str: elif type(instructions) == list and type(instructions[0]) == str:

View File

@ -7,48 +7,33 @@ from services.scrape_services import normalize_data, normalize_instructions
CWD = Path(__file__).parent CWD = Path(__file__).parent
RAW_RECIPE_DIR = CWD.joinpath("data", "recipes-raw") RAW_RECIPE_DIR = CWD.joinpath("data", "recipes-raw")
def pytest_generate_tests(metafunc):
# called once per each test function
funcarglist = metafunc.cls.params[metafunc.function.__name__]
argnames = sorted(funcarglist[0])
metafunc.parametrize(
argnames, [[funcargs[name] for name in argnames] for funcargs in funcarglist]
)
@pytest.mark.parametrize("json_file,num_steps", [
def raw_recipe_info(file_name: str, num_steps: int) -> dict: ("best-homemade-salsa-recipe.json", 2),
return {"json_file": RAW_RECIPE_DIR.joinpath(file_name), "num_steps": num_steps} ("blue-cheese-stuffed-turkey-meatballs-with-raspberry-balsamic-glaze-2.json", 3),
("bon_appetit.json", 8),
("chunky-apple-cake.json", 4),
class TestScraper: ("dairy-free-impossible-pumpkin-pie.json", 7),
# a map specifying multiple argument sets for a test method ("how-to-make-instant-pot-spaghetti.json", 8),
params = { ("instant-pot-chicken-and-potatoes.json", 4),
"test_normalize_instructions": [ ("instant-pot-kerala-vegetable-stew.json", 13),
dict(instructions="A\n\nB\n\nC\n\n"), ("jalapeno-popper-dip.json", 4),
dict(instructions=["A","B","C"]), ("microwave_sweet_potatoes_04783.json", 4),
dict(instructions=[{"@type": "HowToStep", "text": "A"}, ("moroccan-skirt-steak-with-roasted-pepper-couscous.json", 4),
{"@type": "HowToStep", "text": "B"}, ("Pizza-Knoblauch-Champignon-Paprika-vegan.html.json", 3),
{"@type": "HowToStep", "text": "C"}]), ])
], def test_normalize_data(json_file, num_steps):
"test_normalize_data": [ recipe_data = normalize_data(json.load(open(RAW_RECIPE_DIR.joinpath(json_file))))
raw_recipe_info("best-homemade-salsa-recipe.json", 2),
raw_recipe_info("blue-cheese-stuffed-turkey-meatballs-with-raspberry-balsamic-glaze-2.json", 3),
raw_recipe_info("bon_appetit.json", 8),
raw_recipe_info("chunky-apple-cake.json", 4),
raw_recipe_info("dairy-free-impossible-pumpkin-pie.json", 7),
raw_recipe_info("how-to-make-instant-pot-spaghetti.json", 8),
raw_recipe_info("instant-pot-chicken-and-potatoes.json", 4),
raw_recipe_info("instant-pot-kerala-vegetable-stew.json", 13),
raw_recipe_info("jalapeno-popper-dip.json", 4),
raw_recipe_info("microwave_sweet_potatoes_04783.json", 4),
raw_recipe_info("moroccan-skirt-steak-with-roasted-pepper-couscous.json", 4),
raw_recipe_info("Pizza-Knoblauch-Champignon-Paprika-vegan.html.json", 5),
]
}
def test_normalize_data(self, json_file, num_steps):
recipe_data = normalize_data(json.load(open(json_file)))
assert len(recipe_data["recipeInstructions"]) == num_steps assert len(recipe_data["recipeInstructions"]) == num_steps
def test_normalize_instructions(self, instructions):
@pytest.mark.parametrize("instructions", [
"A\n\nB\n\nC\n\n",
"A\nB\nC\n",
"A\r\n\r\nB\r\n\r\nC\r\n\r\n",
"A\r\nB\r\nC\r\n",
["A","B","C"],
[{"@type": "HowToStep", "text": x} for x in ["A","B","C"]]
])
def test_normalize_instructions(instructions):
assert normalize_instructions(instructions) == [{"text": "A"}, {"text": "B"}, {"text": "C"}] assert normalize_instructions(instructions) == [{"text": "A"}, {"text": "B"}, {"text": "C"}]