mirror of
https://github.com/mealie-recipes/mealie.git
synced 2025-07-09 03:04:54 -04:00
Fix issues parsing times not in "PT.*H.*M" format (#733)
* Add more tests to the time cleaner * Parse more time entries * Formatting * Refactor parse_duration * Refactor pretty_print_timedelta * Add some tests for pretty_print_timedelta * Add option for cook_time from schema
This commit is contained in:
parent
b81f88dc18
commit
75113cc2c7
@ -5,6 +5,9 @@ from datetime import datetime, timedelta
|
|||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
from slugify import slugify
|
from slugify import slugify
|
||||||
|
from mealie.core.root_logger import get_logger
|
||||||
|
|
||||||
|
logger = get_logger()
|
||||||
|
|
||||||
|
|
||||||
def clean(recipe_data: dict, url=None) -> dict:
|
def clean(recipe_data: dict, url=None) -> dict:
|
||||||
@ -201,16 +204,15 @@ def clean_time(time_entry):
|
|||||||
elif isinstance(time_entry, datetime):
|
elif isinstance(time_entry, datetime):
|
||||||
print(time_entry)
|
print(time_entry)
|
||||||
elif isinstance(time_entry, str):
|
elif isinstance(time_entry, str):
|
||||||
if re.match("PT.*H.*M", time_entry):
|
try:
|
||||||
time_delta_object = parse_duration(time_entry)
|
time_delta_object = parse_duration(time_entry)
|
||||||
return pretty_print_timedelta(time_delta_object)
|
return pretty_print_timedelta(time_delta_object)
|
||||||
|
except ValueError:
|
||||||
|
logger.error(f"Could not parse time_entry `{time_entry}`")
|
||||||
else:
|
else:
|
||||||
return str(time_entry)
|
return str(time_entry)
|
||||||
|
|
||||||
|
|
||||||
# ! TODO: Cleanup Code Below
|
|
||||||
|
|
||||||
|
|
||||||
def parse_duration(iso_duration):
|
def parse_duration(iso_duration):
|
||||||
"""Parses an ISO 8601 duration string into a datetime.timedelta instance.
|
"""Parses an ISO 8601 duration string into a datetime.timedelta instance.
|
||||||
Args:
|
Args:
|
||||||
@ -218,48 +220,35 @@ def parse_duration(iso_duration):
|
|||||||
Returns:
|
Returns:
|
||||||
a datetime.timedelta instance
|
a datetime.timedelta instance
|
||||||
"""
|
"""
|
||||||
m = re.match(r"^P(?:(\d+)Y)?(?:(\d+)M)?(?:(\d+)D)?T(?:(\d+)H)?(?:(\d+)M)?(?:(\d+(?:.\d+)?)S)?$", iso_duration)
|
m = re.match(
|
||||||
|
r"^P((\d+)Y)?((\d+)M)?((?P<days>\d+)D)?"
|
||||||
|
r"T((?P<hours>\d+)H)?((?P<minutes>\d+)M)?((?P<seconds>\d+(?:\.\d+)?)S)?$",
|
||||||
|
iso_duration,
|
||||||
|
)
|
||||||
if m is None:
|
if m is None:
|
||||||
raise ValueError("invalid ISO 8601 duration string")
|
raise ValueError("invalid ISO 8601 duration string")
|
||||||
|
|
||||||
days = 0
|
|
||||||
hours = 0
|
|
||||||
minutes = 0
|
|
||||||
seconds = 0.0
|
|
||||||
|
|
||||||
# Years and months are not being utilized here, as there is not enough
|
# Years and months are not being utilized here, as there is not enough
|
||||||
# information provided to determine which year and which month.
|
# information provided to determine which year and which month.
|
||||||
# Python's time_delta class stores durations as days, seconds and
|
# Python's time_delta class stores durations as days, seconds and
|
||||||
# microseconds internally, and therefore we'd have to
|
# microseconds internally, and therefore we'd have to
|
||||||
# convert parsed years and months to specific number of days.
|
# convert parsed years and months to specific number of days.
|
||||||
|
|
||||||
if m[3]:
|
times = {"days": 0, "hours": 0, "minutes": 0, "seconds": 0}
|
||||||
days = int(m[3])
|
for unit, value in times.items():
|
||||||
if m[4]:
|
if m.group(unit):
|
||||||
hours = int(m[4])
|
times[unit] = int(float(m.group(unit)))
|
||||||
if m[5]:
|
|
||||||
minutes = int(m[5])
|
|
||||||
if m[6]:
|
|
||||||
seconds = float(m[6])
|
|
||||||
|
|
||||||
return timedelta(days=days, hours=hours, minutes=minutes, seconds=seconds)
|
return timedelta(**times)
|
||||||
|
|
||||||
|
|
||||||
def pretty_print_timedelta(t, max_components=None, max_decimal_places=2):
|
def pretty_print_timedelta(t, max_components=None, max_decimal_places=2):
|
||||||
"""
|
"""
|
||||||
Print a pretty string for a timedelta.
|
Print a pretty string for a timedelta.
|
||||||
For example datetime.timedelta(days=2, seconds=17280) will be printed as '2 days, 4 hours, 48 minutes'. Setting max_components to e.g. 1 will change this to '2.2 days', where the
|
For example datetime.timedelta(days=2, seconds=17280) will be printed as '2 days 4 Hours 48 Minutes'. Setting max_components to e.g. 1 will change this to '2.2 days', where the
|
||||||
number of decimal points can also be set.
|
number of decimal points can also be set.
|
||||||
"""
|
"""
|
||||||
time_scales = [
|
|
||||||
timedelta(days=365),
|
|
||||||
timedelta(days=1),
|
|
||||||
timedelta(hours=1),
|
|
||||||
timedelta(minutes=1),
|
|
||||||
timedelta(seconds=1),
|
|
||||||
timedelta(microseconds=1000),
|
|
||||||
timedelta(microseconds=1),
|
|
||||||
]
|
|
||||||
time_scale_names_dict = {
|
time_scale_names_dict = {
|
||||||
timedelta(days=365): "year",
|
timedelta(days=365): "year",
|
||||||
timedelta(days=1): "day",
|
timedelta(days=1): "day",
|
||||||
@ -270,9 +259,8 @@ def pretty_print_timedelta(t, max_components=None, max_decimal_places=2):
|
|||||||
timedelta(microseconds=1): "microsecond",
|
timedelta(microseconds=1): "microsecond",
|
||||||
}
|
}
|
||||||
count = 0
|
count = 0
|
||||||
txt = ""
|
out_list = []
|
||||||
first = True
|
for scale, scale_name in time_scale_names_dict.items():
|
||||||
for scale in time_scales:
|
|
||||||
if t >= scale:
|
if t >= scale:
|
||||||
count += 1
|
count += 1
|
||||||
n = t / scale if count == max_components else int(t / scale)
|
n = t / scale if count == max_components else int(t / scale)
|
||||||
@ -281,15 +269,9 @@ def pretty_print_timedelta(t, max_components=None, max_decimal_places=2):
|
|||||||
n_txt = str(round(n, max_decimal_places))
|
n_txt = str(round(n, max_decimal_places))
|
||||||
if n_txt[-2:] == ".0":
|
if n_txt[-2:] == ".0":
|
||||||
n_txt = n_txt[:-2]
|
n_txt = n_txt[:-2]
|
||||||
txt += "{}{} {}{}".format(
|
|
||||||
"" if first else " ",
|
|
||||||
n_txt,
|
|
||||||
time_scale_names_dict[scale],
|
|
||||||
"s" if n > 1 else "",
|
|
||||||
)
|
|
||||||
if first:
|
|
||||||
first = False
|
|
||||||
|
|
||||||
if len(txt) == 0:
|
out_list.append(f"{n_txt} {scale_name}{'s' if n > 1 else ''}")
|
||||||
txt = "none"
|
|
||||||
return txt
|
if out_list == []:
|
||||||
|
return "none"
|
||||||
|
return " ".join(out_list)
|
||||||
|
@ -132,6 +132,10 @@ def clean_scraper(scraped_data: SchemaScraperFactory.SchemaScraper, url: str) ->
|
|||||||
except TypeError:
|
except TypeError:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
cook_time = try_get_default(None, "performTime", None, cleaner.clean_time) or try_get_default(
|
||||||
|
None, "cookTime", None, cleaner.clean_time
|
||||||
|
)
|
||||||
|
|
||||||
return Recipe(
|
return Recipe(
|
||||||
name=try_get_default(scraped_data.title, "name", "No Name Found", cleaner.clean_string),
|
name=try_get_default(scraped_data.title, "name", "No Name Found", cleaner.clean_string),
|
||||||
slug="",
|
slug="",
|
||||||
@ -143,7 +147,7 @@ def clean_scraper(scraped_data: SchemaScraperFactory.SchemaScraper, url: str) ->
|
|||||||
recipe_instructions=get_instructions(),
|
recipe_instructions=get_instructions(),
|
||||||
total_time=try_get_default(None, "totalTime", None, cleaner.clean_time),
|
total_time=try_get_default(None, "totalTime", None, cleaner.clean_time),
|
||||||
prep_time=try_get_default(None, "prepTime", None, cleaner.clean_time),
|
prep_time=try_get_default(None, "prepTime", None, cleaner.clean_time),
|
||||||
perform_time=try_get_default(None, "performTime", None, cleaner.clean_time),
|
perform_time=cook_time,
|
||||||
org_url=url,
|
org_url=url,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -2,6 +2,7 @@ import json
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
from datetime import timedelta
|
||||||
from mealie.services.scraper import cleaner
|
from mealie.services.scraper import cleaner
|
||||||
from mealie.services.scraper.scraper import open_graph
|
from mealie.services.scraper.scraper import open_graph
|
||||||
from tests.test_config import TEST_RAW_HTML, TEST_RAW_RECIPES
|
from tests.test_config import TEST_RAW_HTML, TEST_RAW_RECIPES
|
||||||
@ -107,9 +108,29 @@ def test_html_with_recipe_data():
|
|||||||
assert url_validation_regex.match(recipe_data["image"])
|
assert url_validation_regex.match(recipe_data["image"])
|
||||||
|
|
||||||
|
|
||||||
def test_time_cleaner():
|
@pytest.mark.parametrize(
|
||||||
|
"time_delta,expected",
|
||||||
|
[
|
||||||
|
("PT2H30M", "2 Hours 30 Minutes"),
|
||||||
|
("PT30M", "30 Minutes"),
|
||||||
|
("PT3H", "3 Hours"),
|
||||||
|
("P1DT1H1M1S", "1 day 1 Hour 1 Minute 1 Second"),
|
||||||
|
("P1DT1H1M1.53S", "1 day 1 Hour 1 Minute 1 Second"),
|
||||||
|
("PT-3H", None),
|
||||||
|
("PT", "none"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_time_cleaner(time_delta, expected):
|
||||||
|
assert cleaner.clean_time(time_delta) == expected
|
||||||
|
|
||||||
my_time_delta = "PT2H30M"
|
|
||||||
return_delta = cleaner.clean_time(my_time_delta)
|
|
||||||
|
|
||||||
assert return_delta == "2 Hours 30 Minutes"
|
@pytest.mark.parametrize(
|
||||||
|
"t,max_components,max_decimal_places,expected",
|
||||||
|
[
|
||||||
|
(timedelta(days=2, seconds=17280), None, 2, "2 days 4 Hours 48 Minutes"),
|
||||||
|
(timedelta(days=2, seconds=17280), 1, 2, "2.2 days"),
|
||||||
|
(timedelta(days=365), None, 2, "1 year"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_pretty_print_timedelta(t, max_components, max_decimal_places, expected):
|
||||||
|
assert cleaner.pretty_print_timedelta(t, max_components, max_decimal_places) == expected
|
||||||
|
Loading…
x
Reference in New Issue
Block a user