diff --git a/mealie/services/parser_services/brute/process.py b/mealie/services/parser_services/brute/process.py index ade777cf8a1f..30ac690bf65d 100644 --- a/mealie/services/parser_services/brute/process.py +++ b/mealie/services/parser_services/brute/process.py @@ -132,7 +132,7 @@ def parse_ingredient(tokens) -> tuple[str, str]: return ingredient, note -def parse(ing_str) -> BruteParsedIngredient: +def parse(ing_str, parser) -> BruteParsedIngredient: amount = 0.0 unit = "" ingredient = "" @@ -192,12 +192,20 @@ def parse(ing_str) -> BruteParsedIngredient: # which means this is the ingredient ingredient = tokens[1] except ValueError: - try: - # can't parse first argument as amount - # -> no unit -> parse everything as ingredient - ingredient, note = parse_ingredient(tokens) - except ValueError: - ingredient = " ".join(tokens[1:]) + # can't parse first argument as amount + # try to parse as unit and ingredient (e.g. "a tblsp salt"), with unit in first three tokens + # won't work for units that have spaces + for index, token in enumerate(tokens[:3]): + if parser.find_unit_match(token): + unit = token + ingredient, note = parse_ingredient(tokens[index + 1 :]) + break + if not unit: + try: + # no unit -> parse everything as ingredient + ingredient, note = parse_ingredient(tokens) + except ValueError: + ingredient = " ".join(tokens[1:]) if unit_note not in note: note += " " + unit_note diff --git a/mealie/services/parser_services/ingredient_parser.py b/mealie/services/parser_services/ingredient_parser.py index b487c4c85e3c..26508310353b 100644 --- a/mealie/services/parser_services/ingredient_parser.py +++ b/mealie/services/parser_services/ingredient_parser.py @@ -126,22 +126,24 @@ class ABCIngredientParser(ABC): return store_map[fuzz_result[0]] - def find_food_match(self, food: IngredientFood | CreateIngredientFood) -> IngredientFood | None: + def find_food_match(self, food: IngredientFood | CreateIngredientFood | str) -> IngredientFood | None: if isinstance(food, IngredientFood): return food - match_value = IngredientFoodModel.normalize(food.name) + food_name = food if isinstance(food, str) else food.name + match_value = IngredientFoodModel.normalize(food_name) return self.find_match( match_value, store_map=self.foods_by_alias, fuzzy_match_threshold=self.food_fuzzy_match_threshold, ) - def find_unit_match(self, unit: IngredientUnit | CreateIngredientUnit) -> IngredientUnit | None: + def find_unit_match(self, unit: IngredientUnit | CreateIngredientUnit | str) -> IngredientUnit | None: if isinstance(unit, IngredientUnit): return unit - match_value = IngredientUnitModel.normalize(unit.name) + unit_name = unit if isinstance(unit, str) else unit.name + match_value = IngredientUnitModel.normalize(unit_name) return self.find_match( match_value, store_map=self.units_by_alias, @@ -155,6 +157,16 @@ class ABCIngredientParser(ABC): if ingredient.ingredient.unit and (unit_match := self.find_unit_match(ingredient.ingredient.unit)): ingredient.ingredient.unit = unit_match + # Parser might have wrongly split a food into a unit and food. + if isinstance(ingredient.ingredient.food, CreateIngredientFood) and isinstance( + ingredient.ingredient.unit, CreateIngredientUnit + ): + if food_match := self.find_food_match( + f"{ingredient.ingredient.unit.name} {ingredient.ingredient.food.name}" + ): + ingredient.ingredient.food = food_match + ingredient.ingredient.unit = None + return ingredient @@ -164,7 +176,7 @@ class BruteForceParser(ABCIngredientParser): """ def parse_one(self, ingredient: str) -> ParsedIngredient: - bfi = brute.parse(ingredient) + bfi = brute.parse(ingredient, self) parsed_ingredient = ParsedIngredient( input=ingredient, diff --git a/tests/unit_tests/test_ingredient_parser.py b/tests/unit_tests/test_ingredient_parser.py index df7a336af714..dde303762731 100644 --- a/tests/unit_tests/test_ingredient_parser.py +++ b/tests/unit_tests/test_ingredient_parser.py @@ -135,7 +135,7 @@ test_ingredients = [ @pytest.mark.skipif(not crf_exists(), reason="CRF++ not installed") -def test_nlp_parser(): +def test_nlp_parser() -> None: models: list[CRFIngredient] = convert_list_to_crf_model([x.input for x in test_ingredients]) # Iterate over models and test_ingredients to gather @@ -147,37 +147,102 @@ def test_nlp_parser(): assert model.unit == test_ingredient.unit -def test_brute_parser(unique_user: TestUser): - # input: (quantity, unit, food, comments) - expectations = { - # Dutch - "1 theelepel koffie": (1, "theelepel", "koffie", ""), - "3 theelepels koffie": (3, "theelepels", "koffie", ""), - "1 eetlepel tarwe": (1, "eetlepel", "tarwe", ""), - "20 eetlepels bloem": (20, "eetlepels", "bloem", ""), - "1 mespunt kaneel": (1, "mespunt", "kaneel", ""), - "1 snuf(je) zout": (1, "snuf(je)", "zout", ""), - "2 tbsp minced cilantro, leaves and stems": (2, "tbsp", "minced cilantro", "leaves and stems"), - "1 large yellow onion, coarsely chopped": (1, "large", "yellow onion", "coarsely chopped"), - "1 1/2 tsp garam masala": (1.5, "tsp", "garam masala", ""), - "2 cups mango chunks, (2 large mangoes) (fresh or frozen)": ( +@pytest.mark.parametrize( + "input, quantity, unit, food, comment", + [ + pytest.param("1 theelepel koffie", 1, "theelepel", "koffie", "", id="1 theelepel koffie"), + pytest.param("3 theelepels koffie", 3, "theelepels", "koffie", "", id="3 theelepels koffie"), + pytest.param("1 eetlepel tarwe", 1, "eetlepel", "tarwe", "", id="1 eetlepel tarwe"), + pytest.param("20 eetlepels bloem", 20, "eetlepels", "bloem", "", id="20 eetlepels bloem"), + pytest.param("1 mespunt kaneel", 1, "mespunt", "kaneel", "", id="1 mespunt kaneel"), + pytest.param("1 snuf(je) zout", 1, "snuf(je)", "zout", "", id="1 snuf(je) zout"), + pytest.param( + "2 tbsp minced cilantro, leaves and stems", 2, - "cups", + "tbsp", + "minced cilantro", + "leaves and stems", + id="2 tbsp minced cilantro, leaves and stems", + ), + pytest.param( + "1 large yellow onion, coarsely chopped", + 1, + "large", + "yellow onion", + "coarsely chopped", + id="1 large yellow onion, coarsely chopped", + ), + pytest.param("1 1/2 tsp garam masala", 1.5, "tsp", "garam masala", "", id="1 1/2 tsp garam masala"), + pytest.param( + "2 cups mango chunks, (2 large mangoes) (fresh or frozen)", + 2, + "Cups", "mango chunks, (2 large mangoes)", "fresh or frozen", + id="2 cups mango chunks, (2 large mangoes) (fresh or frozen)", ), - } - + pytest.param("stalk onion", 0, "Stalk", "onion", "", id="stalk onion"), + pytest.param("a stalk bell peppers", 0, "Stalk", "bell peppers", "", id="a stalk bell peppers"), + pytest.param("a tablespoon unknownFood", 0, "Tablespoon", "unknownFood", "", id="a tablespoon unknownFood"), + pytest.param( + "stalk bell peppers, cut in pieces", + 0, + "Stalk", + "bell peppers", + "cut in pieces", + id="stalk bell peppers, cut in pieces", + ), + pytest.param( + "a stalk bell peppers, cut in pieces", + 0, + "Stalk", + "bell peppers", + "cut in pieces", + id="stalk bell peppers, cut in pieces", + ), + pytest.param("red pepper flakes", 0, "", "red pepper flakes", "", id="red pepper flakes"), + pytest.param("1 red pepper flakes", 1, "", "red pepper flakes", "", id="1 red pepper flakes"), + pytest.param("1 bell peppers", 1, "", "bell peppers", "", id="1 bell peppers"), + pytest.param("1 stalk bell peppers", 1, "Stalk", "bell peppers", "", id="1 big stalk bell peppers"), + pytest.param("a big stalk bell peppers", 0, "Stalk", "bell peppers", "", id="a big stalk bell peppers"), + pytest.param( + "1 bell peppers, cut in pieces", 1, "", "bell peppers", "cut in pieces", id="1 bell peppers, cut in pieces" + ), + pytest.param( + "bell peppers, cut in pieces", 0, "", "bell peppers", "cut in pieces", id="bell peppers, cut in pieces" + ), + ], +) +def test_brute_parser( + unique_local_group_id: UUID4, + parsed_ingredient_data: tuple[list[IngredientFood], list[IngredientUnit]], # required so database is populated + input: str, + quantity: int | float, + unit: str, + food: str, + comment: str, +): with session_context() as session: - parser = get_parser(RegisteredParser.brute, unique_user.group_id, session) + parser = get_parser(RegisteredParser.brute, unique_local_group_id, session) + parsed = parser.parse_one(input) + ing = parsed.ingredient - for key, val in expectations.items(): - parsed = parser.parse_one(key) - - assert parsed.ingredient.quantity == val[0] - assert parsed.ingredient.unit.name == val[1] - assert parsed.ingredient.food.name == val[2] - assert parsed.ingredient.note in {val[3], None} + if ing.quantity: + assert ing.quantity == quantity + else: + assert not quantity + if ing.unit: + assert ing.unit.name == unit + else: + assert not unit + if ing.food: + assert ing.food.name == food + else: + assert not food + if ing.note: + assert ing.note == comment + else: + assert not comment @pytest.mark.parametrize(