diff --git a/mealie/services/scraper/cleaner.py b/mealie/services/scraper/cleaner.py index b844ca545df0..3132d4ed6206 100644 --- a/mealie/services/scraper/cleaner.py +++ b/mealie/services/scraper/cleaner.py @@ -5,6 +5,9 @@ from datetime import datetime, timedelta from typing import List, Optional from slugify import slugify +from mealie.core.root_logger import get_logger + +logger = get_logger() def clean(recipe_data: dict, url=None) -> dict: @@ -201,16 +204,15 @@ def clean_time(time_entry): elif isinstance(time_entry, datetime): print(time_entry) elif isinstance(time_entry, str): - if re.match("PT.*H.*M", time_entry): + try: time_delta_object = parse_duration(time_entry) return pretty_print_timedelta(time_delta_object) + except ValueError: + logger.error(f"Could not parse time_entry `{time_entry}`") else: return str(time_entry) -# ! TODO: Cleanup Code Below - - def parse_duration(iso_duration): """Parses an ISO 8601 duration string into a datetime.timedelta instance. Args: @@ -218,48 +220,35 @@ def parse_duration(iso_duration): Returns: a datetime.timedelta instance """ - m = re.match(r"^P(?:(\d+)Y)?(?:(\d+)M)?(?:(\d+)D)?T(?:(\d+)H)?(?:(\d+)M)?(?:(\d+(?:.\d+)?)S)?$", iso_duration) + m = re.match( + r"^P((\d+)Y)?((\d+)M)?((?P\d+)D)?" + r"T((?P\d+)H)?((?P\d+)M)?((?P\d+(?:\.\d+)?)S)?$", + iso_duration, + ) if m is None: raise ValueError("invalid ISO 8601 duration string") - days = 0 - hours = 0 - minutes = 0 - seconds = 0.0 - # Years and months are not being utilized here, as there is not enough # information provided to determine which year and which month. # Python's time_delta class stores durations as days, seconds and # microseconds internally, and therefore we'd have to # convert parsed years and months to specific number of days. - if m[3]: - days = int(m[3]) - if m[4]: - hours = int(m[4]) - if m[5]: - minutes = int(m[5]) - if m[6]: - seconds = float(m[6]) + times = {"days": 0, "hours": 0, "minutes": 0, "seconds": 0} + for unit, value in times.items(): + if m.group(unit): + times[unit] = int(float(m.group(unit))) - return timedelta(days=days, hours=hours, minutes=minutes, seconds=seconds) + return timedelta(**times) def pretty_print_timedelta(t, max_components=None, max_decimal_places=2): """ Print a pretty string for a timedelta. - For example datetime.timedelta(days=2, seconds=17280) will be printed as '2 days, 4 hours, 48 minutes'. Setting max_components to e.g. 1 will change this to '2.2 days', where the + For example datetime.timedelta(days=2, seconds=17280) will be printed as '2 days 4 Hours 48 Minutes'. Setting max_components to e.g. 1 will change this to '2.2 days', where the number of decimal points can also be set. """ - time_scales = [ - timedelta(days=365), - timedelta(days=1), - timedelta(hours=1), - timedelta(minutes=1), - timedelta(seconds=1), - timedelta(microseconds=1000), - timedelta(microseconds=1), - ] + time_scale_names_dict = { timedelta(days=365): "year", timedelta(days=1): "day", @@ -270,9 +259,8 @@ def pretty_print_timedelta(t, max_components=None, max_decimal_places=2): timedelta(microseconds=1): "microsecond", } count = 0 - txt = "" - first = True - for scale in time_scales: + out_list = [] + for scale, scale_name in time_scale_names_dict.items(): if t >= scale: count += 1 n = t / scale if count == max_components else int(t / scale) @@ -281,15 +269,9 @@ def pretty_print_timedelta(t, max_components=None, max_decimal_places=2): n_txt = str(round(n, max_decimal_places)) if n_txt[-2:] == ".0": n_txt = n_txt[:-2] - txt += "{}{} {}{}".format( - "" if first else " ", - n_txt, - time_scale_names_dict[scale], - "s" if n > 1 else "", - ) - if first: - first = False - if len(txt) == 0: - txt = "none" - return txt + out_list.append(f"{n_txt} {scale_name}{'s' if n > 1 else ''}") + + if out_list == []: + return "none" + return " ".join(out_list) diff --git a/mealie/services/scraper/scraper.py b/mealie/services/scraper/scraper.py index 8d65ce604381..c066893ee2da 100644 --- a/mealie/services/scraper/scraper.py +++ b/mealie/services/scraper/scraper.py @@ -132,6 +132,10 @@ def clean_scraper(scraped_data: SchemaScraperFactory.SchemaScraper, url: str) -> except TypeError: return [] + cook_time = try_get_default(None, "performTime", None, cleaner.clean_time) or try_get_default( + None, "cookTime", None, cleaner.clean_time + ) + return Recipe( name=try_get_default(scraped_data.title, "name", "No Name Found", cleaner.clean_string), slug="", @@ -143,7 +147,7 @@ def clean_scraper(scraped_data: SchemaScraperFactory.SchemaScraper, url: str) -> recipe_instructions=get_instructions(), total_time=try_get_default(None, "totalTime", None, cleaner.clean_time), prep_time=try_get_default(None, "prepTime", None, cleaner.clean_time), - perform_time=try_get_default(None, "performTime", None, cleaner.clean_time), + perform_time=cook_time, org_url=url, ) diff --git a/tests/unit_tests/test_cleaner.py b/tests/unit_tests/test_cleaner.py index e9ef2d8a6740..e4d57d268668 100644 --- a/tests/unit_tests/test_cleaner.py +++ b/tests/unit_tests/test_cleaner.py @@ -2,6 +2,7 @@ import json import re import pytest +from datetime import timedelta from mealie.services.scraper import cleaner from mealie.services.scraper.scraper import open_graph from tests.test_config import TEST_RAW_HTML, TEST_RAW_RECIPES @@ -107,9 +108,29 @@ def test_html_with_recipe_data(): assert url_validation_regex.match(recipe_data["image"]) -def test_time_cleaner(): +@pytest.mark.parametrize( + "time_delta,expected", + [ + ("PT2H30M", "2 Hours 30 Minutes"), + ("PT30M", "30 Minutes"), + ("PT3H", "3 Hours"), + ("P1DT1H1M1S", "1 day 1 Hour 1 Minute 1 Second"), + ("P1DT1H1M1.53S", "1 day 1 Hour 1 Minute 1 Second"), + ("PT-3H", None), + ("PT", "none"), + ], +) +def test_time_cleaner(time_delta, expected): + assert cleaner.clean_time(time_delta) == expected - my_time_delta = "PT2H30M" - return_delta = cleaner.clean_time(my_time_delta) - assert return_delta == "2 Hours 30 Minutes" +@pytest.mark.parametrize( + "t,max_components,max_decimal_places,expected", + [ + (timedelta(days=2, seconds=17280), None, 2, "2 days 4 Hours 48 Minutes"), + (timedelta(days=2, seconds=17280), 1, 2, "2.2 days"), + (timedelta(days=365), None, 2, "1 year"), + ], +) +def test_pretty_print_timedelta(t, max_components, max_decimal_places, expected): + assert cleaner.pretty_print_timedelta(t, max_components, max_decimal_places) == expected