Fix issues parsing times not in "PT.*H.*M" format (#733)

* Add more tests to the time cleaner

* Parse more time entries

* Formatting

* Refactor parse_duration

* Refactor pretty_print_timedelta

* Add some tests for pretty_print_timedelta

* Add option for cook_time from schema
This commit is contained in:
cadamswaite 2021-10-20 01:01:19 +01:00 committed by GitHub
parent b81f88dc18
commit 75113cc2c7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 55 additions and 48 deletions

View File

@ -5,6 +5,9 @@ from datetime import datetime, timedelta
from typing import List, Optional
from slugify import slugify
from mealie.core.root_logger import get_logger
logger = get_logger()
def clean(recipe_data: dict, url=None) -> dict:
@ -201,16 +204,15 @@ def clean_time(time_entry):
elif isinstance(time_entry, datetime):
print(time_entry)
elif isinstance(time_entry, str):
if re.match("PT.*H.*M", time_entry):
try:
time_delta_object = parse_duration(time_entry)
return pretty_print_timedelta(time_delta_object)
except ValueError:
logger.error(f"Could not parse time_entry `{time_entry}`")
else:
return str(time_entry)
# ! TODO: Cleanup Code Below
def parse_duration(iso_duration):
"""Parses an ISO 8601 duration string into a datetime.timedelta instance.
Args:
@ -218,48 +220,35 @@ def parse_duration(iso_duration):
Returns:
a datetime.timedelta instance
"""
m = re.match(r"^P(?:(\d+)Y)?(?:(\d+)M)?(?:(\d+)D)?T(?:(\d+)H)?(?:(\d+)M)?(?:(\d+(?:.\d+)?)S)?$", iso_duration)
m = re.match(
r"^P((\d+)Y)?((\d+)M)?((?P<days>\d+)D)?"
r"T((?P<hours>\d+)H)?((?P<minutes>\d+)M)?((?P<seconds>\d+(?:\.\d+)?)S)?$",
iso_duration,
)
if m is None:
raise ValueError("invalid ISO 8601 duration string")
days = 0
hours = 0
minutes = 0
seconds = 0.0
# Years and months are not being utilized here, as there is not enough
# information provided to determine which year and which month.
# Python's time_delta class stores durations as days, seconds and
# microseconds internally, and therefore we'd have to
# convert parsed years and months to specific number of days.
if m[3]:
days = int(m[3])
if m[4]:
hours = int(m[4])
if m[5]:
minutes = int(m[5])
if m[6]:
seconds = float(m[6])
times = {"days": 0, "hours": 0, "minutes": 0, "seconds": 0}
for unit, value in times.items():
if m.group(unit):
times[unit] = int(float(m.group(unit)))
return timedelta(days=days, hours=hours, minutes=minutes, seconds=seconds)
return timedelta(**times)
def pretty_print_timedelta(t, max_components=None, max_decimal_places=2):
"""
Print a pretty string for a timedelta.
For example datetime.timedelta(days=2, seconds=17280) will be printed as '2 days, 4 hours, 48 minutes'. Setting max_components to e.g. 1 will change this to '2.2 days', where the
For example datetime.timedelta(days=2, seconds=17280) will be printed as '2 days 4 Hours 48 Minutes'. Setting max_components to e.g. 1 will change this to '2.2 days', where the
number of decimal points can also be set.
"""
time_scales = [
timedelta(days=365),
timedelta(days=1),
timedelta(hours=1),
timedelta(minutes=1),
timedelta(seconds=1),
timedelta(microseconds=1000),
timedelta(microseconds=1),
]
time_scale_names_dict = {
timedelta(days=365): "year",
timedelta(days=1): "day",
@ -270,9 +259,8 @@ def pretty_print_timedelta(t, max_components=None, max_decimal_places=2):
timedelta(microseconds=1): "microsecond",
}
count = 0
txt = ""
first = True
for scale in time_scales:
out_list = []
for scale, scale_name in time_scale_names_dict.items():
if t >= scale:
count += 1
n = t / scale if count == max_components else int(t / scale)
@ -281,15 +269,9 @@ def pretty_print_timedelta(t, max_components=None, max_decimal_places=2):
n_txt = str(round(n, max_decimal_places))
if n_txt[-2:] == ".0":
n_txt = n_txt[:-2]
txt += "{}{} {}{}".format(
"" if first else " ",
n_txt,
time_scale_names_dict[scale],
"s" if n > 1 else "",
)
if first:
first = False
if len(txt) == 0:
txt = "none"
return txt
out_list.append(f"{n_txt} {scale_name}{'s' if n > 1 else ''}")
if out_list == []:
return "none"
return " ".join(out_list)

View File

@ -132,6 +132,10 @@ def clean_scraper(scraped_data: SchemaScraperFactory.SchemaScraper, url: str) ->
except TypeError:
return []
cook_time = try_get_default(None, "performTime", None, cleaner.clean_time) or try_get_default(
None, "cookTime", None, cleaner.clean_time
)
return Recipe(
name=try_get_default(scraped_data.title, "name", "No Name Found", cleaner.clean_string),
slug="",
@ -143,7 +147,7 @@ def clean_scraper(scraped_data: SchemaScraperFactory.SchemaScraper, url: str) ->
recipe_instructions=get_instructions(),
total_time=try_get_default(None, "totalTime", None, cleaner.clean_time),
prep_time=try_get_default(None, "prepTime", None, cleaner.clean_time),
perform_time=try_get_default(None, "performTime", None, cleaner.clean_time),
perform_time=cook_time,
org_url=url,
)

View File

@ -2,6 +2,7 @@ import json
import re
import pytest
from datetime import timedelta
from mealie.services.scraper import cleaner
from mealie.services.scraper.scraper import open_graph
from tests.test_config import TEST_RAW_HTML, TEST_RAW_RECIPES
@ -107,9 +108,29 @@ def test_html_with_recipe_data():
assert url_validation_regex.match(recipe_data["image"])
def test_time_cleaner():
@pytest.mark.parametrize(
"time_delta,expected",
[
("PT2H30M", "2 Hours 30 Minutes"),
("PT30M", "30 Minutes"),
("PT3H", "3 Hours"),
("P1DT1H1M1S", "1 day 1 Hour 1 Minute 1 Second"),
("P1DT1H1M1.53S", "1 day 1 Hour 1 Minute 1 Second"),
("PT-3H", None),
("PT", "none"),
],
)
def test_time_cleaner(time_delta, expected):
assert cleaner.clean_time(time_delta) == expected
my_time_delta = "PT2H30M"
return_delta = cleaner.clean_time(my_time_delta)
assert return_delta == "2 Hours 30 Minutes"
@pytest.mark.parametrize(
"t,max_components,max_decimal_places,expected",
[
(timedelta(days=2, seconds=17280), None, 2, "2 days 4 Hours 48 Minutes"),
(timedelta(days=2, seconds=17280), 1, 2, "2.2 days"),
(timedelta(days=365), None, 2, "1 year"),
],
)
def test_pretty_print_timedelta(t, max_components, max_decimal_places, expected):
assert cleaner.pretty_print_timedelta(t, max_components, max_decimal_places) == expected