unicode fraction processing

2025-07-09 03:04:54 -04:00 · 2021-08-28 14:18:56 -08:00 · 2021-08-28 14:18:56 -08:00 · 2c80980453
commit 2c80980453
parent 1c11f6a3d7
2 changed files with 18 additions and 1 deletions
--- a/mealie/services/scraper/ingredient_nlp/processor.py
+++ b/mealie/services/scraper/ingredient_nlp/processor.py
@ -1,5 +1,6 @@
 import subprocess
 import tempfile
+import unicodedata
 from fractions import Fraction
 from pathlib import Path
 from typing import Optional
@ -41,8 +42,24 @@ def _exec_crf_test(input_text):
        )


+def fraction_finder(string: str):
+    # TODO: I'm not confident this works well enough for production needs some testing and/or refacorting
+    for c in string:
+        try:
+            name = unicodedata.name(c)
+        except ValueError:
+            continue
+        if name.startswith("VULGAR FRACTION"):
+            normalized = unicodedata.normalize("NFKC", c)
+            numerator, _slash, denominator = normalized.partition("⁄")
+            text = f"{numerator}/{denominator}"
+            return string.replace(c, text)
+
+    return string
+
+
 def convert_list_to_crf_model(list_of_ingrdeint_text: list[str]):
-    crf_output = _exec_crf_test(list_of_ingrdeint_text)
+    crf_output = _exec_crf_test([fraction_finder(x) for x in list_of_ingrdeint_text])

    crf_models = [CRFIngredient(**ingredient) for ingredient in utils.import_data(crf_output.split("\n"))]

--- a/mealie/services/scraper/ingredient_nlp/unicode_fraction_dict.py
+++ b/mealie/services/scraper/ingredient_nlp/unicode_fraction_dict.py