Kavita/Kavita.Common/Helpers/HtmlHelper.cs
Fesaa c62b20f54b
BE Tech Debt (#4497)
Co-authored-by: Joseph Milazzo <joseph.v.milazzo@gmail.com>
Co-authored-by: Joe Milazzo <josephmajora@gmail.com>
2026-03-07 10:04:08 -08:00

54 lines
2.1 KiB
C#

using System.Linq;
using System.Text.RegularExpressions;
using HtmlAgilityPack;
namespace Kavita.Common.Helpers;
#nullable enable
public static class HtmlHelper
{
private const int BodyTextLimit = 175;
public static string? GetCharacters(string? body)
{
if (string.IsNullOrEmpty(body)) return body;
var doc = new HtmlDocument();
doc.LoadHtml(body);
var textNodes = doc.DocumentNode.SelectNodes("//text()[not(parent::script)]");
if (textNodes == null) return string.Empty;
var plainText = string.Join(" ", textNodes
.Select(node => node.InnerText)
.Where(s => !s.Equals("\n")));
// Clean any leftover Markdown out
plainText = Regex.Replace(plainText, @"\*\*(.*?)\*\*", "$1"); // Bold with **
plainText = Regex.Replace(plainText, @"_(.*?)_", "$1"); // Italic with _
plainText = Regex.Replace(plainText, @"\[(.*?)\]\((.*?)\)", "$1"); // Links [text](url)
plainText = Regex.Replace(plainText, @"[_*\[\]~]", string.Empty);
plainText = Regex.Replace(plainText, @"img\d*\((.*?)\)", string.Empty);
plainText = Regex.Replace(plainText, @"~~~(.*?)~~~", "$1");
plainText = Regex.Replace(plainText, @"\+{3}(.*?)\+{3}", "$1");
plainText = Regex.Replace(plainText, @"~~(.*?)~~", "$1");
plainText = Regex.Replace(plainText, @"__(.*?)__", "$1");
plainText = Regex.Replace(plainText, @"#\s(.*?)", "$1");
// Just strip symbols
plainText = Regex.Replace(plainText, @"[_*\[\]~]", string.Empty);
plainText = Regex.Replace(plainText, @"img\d*\((.*?)\)", string.Empty);
plainText = Regex.Replace(plainText, @"~~~", string.Empty);
plainText = Regex.Replace(plainText, @"\+", string.Empty);
plainText = Regex.Replace(plainText, @"~~", string.Empty);
plainText = Regex.Replace(plainText, @"__", string.Empty);
// Take the first BodyTextLimit characters
plainText = plainText.Length > BodyTextLimit ? plainText.Substring(0, BodyTextLimit) : plainText;
return plainText + "…";
}
}