fix clean_string error (#481)

This commit is contained in:
wengtad 2021-06-11 00:08:10 +08:00 committed by GitHub
parent a78fbea711
commit c2ed4a39ac
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -36,13 +36,16 @@ def clean(recipe_data: dict, url=None) -> dict:
def clean_string(text: str) -> str: def clean_string(text: str) -> str:
cleaned_text = html.unescape(text) if text == "" or text is None:
cleaned_text = re.sub("<[^<]+?>", "", cleaned_text) return ""
cleaned_text = re.sub(" +", " ", cleaned_text) else:
cleaned_text = re.sub("</p>", "\n", cleaned_text) cleaned_text = html.unescape(text)
cleaned_text = re.sub(r"\n\s*\n", "\n\n", cleaned_text) cleaned_text = re.sub("<[^<]+?>", "", cleaned_text)
cleaned_text = cleaned_text.replace("\xa0", " ").replace("\t", " ").strip() cleaned_text = re.sub(" +", " ", cleaned_text)
return cleaned_text cleaned_text = re.sub("</p>", "\n", cleaned_text)
cleaned_text = re.sub(r"\n\s*\n", "\n\n", cleaned_text)
cleaned_text = cleaned_text.replace("\xa0", " ").replace("\t", " ").strip()
return cleaned_text
def category(category: str): def category(category: str):