mirror of
https://github.com/mealie-recipes/mealie.git
synced 2025-07-09 03:04:54 -04:00
Fix issues parsing times not in "PT.*H.*M" format (#733)
* Add more tests to the time cleaner * Parse more time entries * Formatting * Refactor parse_duration * Refactor pretty_print_timedelta * Add some tests for pretty_print_timedelta * Add option for cook_time from schema
This commit is contained in:
parent
b81f88dc18
commit
75113cc2c7
@ -5,6 +5,9 @@ from datetime import datetime, timedelta
|
||||
from typing import List, Optional
|
||||
|
||||
from slugify import slugify
|
||||
from mealie.core.root_logger import get_logger
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
def clean(recipe_data: dict, url=None) -> dict:
|
||||
@ -201,16 +204,15 @@ def clean_time(time_entry):
|
||||
elif isinstance(time_entry, datetime):
|
||||
print(time_entry)
|
||||
elif isinstance(time_entry, str):
|
||||
if re.match("PT.*H.*M", time_entry):
|
||||
try:
|
||||
time_delta_object = parse_duration(time_entry)
|
||||
return pretty_print_timedelta(time_delta_object)
|
||||
except ValueError:
|
||||
logger.error(f"Could not parse time_entry `{time_entry}`")
|
||||
else:
|
||||
return str(time_entry)
|
||||
|
||||
|
||||
# ! TODO: Cleanup Code Below
|
||||
|
||||
|
||||
def parse_duration(iso_duration):
|
||||
"""Parses an ISO 8601 duration string into a datetime.timedelta instance.
|
||||
Args:
|
||||
@ -218,48 +220,35 @@ def parse_duration(iso_duration):
|
||||
Returns:
|
||||
a datetime.timedelta instance
|
||||
"""
|
||||
m = re.match(r"^P(?:(\d+)Y)?(?:(\d+)M)?(?:(\d+)D)?T(?:(\d+)H)?(?:(\d+)M)?(?:(\d+(?:.\d+)?)S)?$", iso_duration)
|
||||
m = re.match(
|
||||
r"^P((\d+)Y)?((\d+)M)?((?P<days>\d+)D)?"
|
||||
r"T((?P<hours>\d+)H)?((?P<minutes>\d+)M)?((?P<seconds>\d+(?:\.\d+)?)S)?$",
|
||||
iso_duration,
|
||||
)
|
||||
if m is None:
|
||||
raise ValueError("invalid ISO 8601 duration string")
|
||||
|
||||
days = 0
|
||||
hours = 0
|
||||
minutes = 0
|
||||
seconds = 0.0
|
||||
|
||||
# Years and months are not being utilized here, as there is not enough
|
||||
# information provided to determine which year and which month.
|
||||
# Python's time_delta class stores durations as days, seconds and
|
||||
# microseconds internally, and therefore we'd have to
|
||||
# convert parsed years and months to specific number of days.
|
||||
|
||||
if m[3]:
|
||||
days = int(m[3])
|
||||
if m[4]:
|
||||
hours = int(m[4])
|
||||
if m[5]:
|
||||
minutes = int(m[5])
|
||||
if m[6]:
|
||||
seconds = float(m[6])
|
||||
times = {"days": 0, "hours": 0, "minutes": 0, "seconds": 0}
|
||||
for unit, value in times.items():
|
||||
if m.group(unit):
|
||||
times[unit] = int(float(m.group(unit)))
|
||||
|
||||
return timedelta(days=days, hours=hours, minutes=minutes, seconds=seconds)
|
||||
return timedelta(**times)
|
||||
|
||||
|
||||
def pretty_print_timedelta(t, max_components=None, max_decimal_places=2):
|
||||
"""
|
||||
Print a pretty string for a timedelta.
|
||||
For example datetime.timedelta(days=2, seconds=17280) will be printed as '2 days, 4 hours, 48 minutes'. Setting max_components to e.g. 1 will change this to '2.2 days', where the
|
||||
For example datetime.timedelta(days=2, seconds=17280) will be printed as '2 days 4 Hours 48 Minutes'. Setting max_components to e.g. 1 will change this to '2.2 days', where the
|
||||
number of decimal points can also be set.
|
||||
"""
|
||||
time_scales = [
|
||||
timedelta(days=365),
|
||||
timedelta(days=1),
|
||||
timedelta(hours=1),
|
||||
timedelta(minutes=1),
|
||||
timedelta(seconds=1),
|
||||
timedelta(microseconds=1000),
|
||||
timedelta(microseconds=1),
|
||||
]
|
||||
|
||||
time_scale_names_dict = {
|
||||
timedelta(days=365): "year",
|
||||
timedelta(days=1): "day",
|
||||
@ -270,9 +259,8 @@ def pretty_print_timedelta(t, max_components=None, max_decimal_places=2):
|
||||
timedelta(microseconds=1): "microsecond",
|
||||
}
|
||||
count = 0
|
||||
txt = ""
|
||||
first = True
|
||||
for scale in time_scales:
|
||||
out_list = []
|
||||
for scale, scale_name in time_scale_names_dict.items():
|
||||
if t >= scale:
|
||||
count += 1
|
||||
n = t / scale if count == max_components else int(t / scale)
|
||||
@ -281,15 +269,9 @@ def pretty_print_timedelta(t, max_components=None, max_decimal_places=2):
|
||||
n_txt = str(round(n, max_decimal_places))
|
||||
if n_txt[-2:] == ".0":
|
||||
n_txt = n_txt[:-2]
|
||||
txt += "{}{} {}{}".format(
|
||||
"" if first else " ",
|
||||
n_txt,
|
||||
time_scale_names_dict[scale],
|
||||
"s" if n > 1 else "",
|
||||
)
|
||||
if first:
|
||||
first = False
|
||||
|
||||
if len(txt) == 0:
|
||||
txt = "none"
|
||||
return txt
|
||||
out_list.append(f"{n_txt} {scale_name}{'s' if n > 1 else ''}")
|
||||
|
||||
if out_list == []:
|
||||
return "none"
|
||||
return " ".join(out_list)
|
||||
|
@ -132,6 +132,10 @@ def clean_scraper(scraped_data: SchemaScraperFactory.SchemaScraper, url: str) ->
|
||||
except TypeError:
|
||||
return []
|
||||
|
||||
cook_time = try_get_default(None, "performTime", None, cleaner.clean_time) or try_get_default(
|
||||
None, "cookTime", None, cleaner.clean_time
|
||||
)
|
||||
|
||||
return Recipe(
|
||||
name=try_get_default(scraped_data.title, "name", "No Name Found", cleaner.clean_string),
|
||||
slug="",
|
||||
@ -143,7 +147,7 @@ def clean_scraper(scraped_data: SchemaScraperFactory.SchemaScraper, url: str) ->
|
||||
recipe_instructions=get_instructions(),
|
||||
total_time=try_get_default(None, "totalTime", None, cleaner.clean_time),
|
||||
prep_time=try_get_default(None, "prepTime", None, cleaner.clean_time),
|
||||
perform_time=try_get_default(None, "performTime", None, cleaner.clean_time),
|
||||
perform_time=cook_time,
|
||||
org_url=url,
|
||||
)
|
||||
|
||||
|
@ -2,6 +2,7 @@ import json
|
||||
import re
|
||||
|
||||
import pytest
|
||||
from datetime import timedelta
|
||||
from mealie.services.scraper import cleaner
|
||||
from mealie.services.scraper.scraper import open_graph
|
||||
from tests.test_config import TEST_RAW_HTML, TEST_RAW_RECIPES
|
||||
@ -107,9 +108,29 @@ def test_html_with_recipe_data():
|
||||
assert url_validation_regex.match(recipe_data["image"])
|
||||
|
||||
|
||||
def test_time_cleaner():
|
||||
@pytest.mark.parametrize(
|
||||
"time_delta,expected",
|
||||
[
|
||||
("PT2H30M", "2 Hours 30 Minutes"),
|
||||
("PT30M", "30 Minutes"),
|
||||
("PT3H", "3 Hours"),
|
||||
("P1DT1H1M1S", "1 day 1 Hour 1 Minute 1 Second"),
|
||||
("P1DT1H1M1.53S", "1 day 1 Hour 1 Minute 1 Second"),
|
||||
("PT-3H", None),
|
||||
("PT", "none"),
|
||||
],
|
||||
)
|
||||
def test_time_cleaner(time_delta, expected):
|
||||
assert cleaner.clean_time(time_delta) == expected
|
||||
|
||||
my_time_delta = "PT2H30M"
|
||||
return_delta = cleaner.clean_time(my_time_delta)
|
||||
|
||||
assert return_delta == "2 Hours 30 Minutes"
|
||||
@pytest.mark.parametrize(
|
||||
"t,max_components,max_decimal_places,expected",
|
||||
[
|
||||
(timedelta(days=2, seconds=17280), None, 2, "2 days 4 Hours 48 Minutes"),
|
||||
(timedelta(days=2, seconds=17280), 1, 2, "2.2 days"),
|
||||
(timedelta(days=365), None, 2, "1 year"),
|
||||
],
|
||||
)
|
||||
def test_pretty_print_timedelta(t, max_components, max_decimal_places, expected):
|
||||
assert cleaner.pretty_print_timedelta(t, max_components, max_decimal_places) == expected
|
||||
|
Loading…
x
Reference in New Issue
Block a user