mirror of
https://github.com/Kareadita/Kavita.git
synced 2026-03-10 12:05:51 -04:00
Co-authored-by: Joseph Milazzo <joseph.v.milazzo@gmail.com> Co-authored-by: Joe Milazzo <josephmajora@gmail.com>
54 lines
2.1 KiB
C#
54 lines
2.1 KiB
C#
using System.Linq;
|
|
using System.Text.RegularExpressions;
|
|
using HtmlAgilityPack;
|
|
|
|
namespace Kavita.Common.Helpers;
|
|
|
|
#nullable enable
|
|
|
|
public static class HtmlHelper
|
|
{
|
|
private const int BodyTextLimit = 175;
|
|
|
|
public static string? GetCharacters(string? body)
|
|
{
|
|
if (string.IsNullOrEmpty(body)) return body;
|
|
|
|
var doc = new HtmlDocument();
|
|
doc.LoadHtml(body);
|
|
|
|
var textNodes = doc.DocumentNode.SelectNodes("//text()[not(parent::script)]");
|
|
if (textNodes == null) return string.Empty;
|
|
|
|
var plainText = string.Join(" ", textNodes
|
|
.Select(node => node.InnerText)
|
|
.Where(s => !s.Equals("\n")));
|
|
|
|
// Clean any leftover Markdown out
|
|
plainText = Regex.Replace(plainText, @"\*\*(.*?)\*\*", "$1"); // Bold with **
|
|
plainText = Regex.Replace(plainText, @"_(.*?)_", "$1"); // Italic with _
|
|
plainText = Regex.Replace(plainText, @"\[(.*?)\]\((.*?)\)", "$1"); // Links [text](url)
|
|
plainText = Regex.Replace(plainText, @"[_*\[\]~]", string.Empty);
|
|
plainText = Regex.Replace(plainText, @"img\d*\((.*?)\)", string.Empty);
|
|
plainText = Regex.Replace(plainText, @"~~~(.*?)~~~", "$1");
|
|
plainText = Regex.Replace(plainText, @"\+{3}(.*?)\+{3}", "$1");
|
|
plainText = Regex.Replace(plainText, @"~~(.*?)~~", "$1");
|
|
plainText = Regex.Replace(plainText, @"__(.*?)__", "$1");
|
|
plainText = Regex.Replace(plainText, @"#\s(.*?)", "$1");
|
|
|
|
|
|
// Just strip symbols
|
|
plainText = Regex.Replace(plainText, @"[_*\[\]~]", string.Empty);
|
|
plainText = Regex.Replace(plainText, @"img\d*\((.*?)\)", string.Empty);
|
|
plainText = Regex.Replace(plainText, @"~~~", string.Empty);
|
|
plainText = Regex.Replace(plainText, @"\+", string.Empty);
|
|
plainText = Regex.Replace(plainText, @"~~", string.Empty);
|
|
plainText = Regex.Replace(plainText, @"__", string.Empty);
|
|
|
|
// Take the first BodyTextLimit characters
|
|
plainText = plainText.Length > BodyTextLimit ? plainText.Substring(0, BodyTextLimit) : plainText;
|
|
|
|
return plainText + "…";
|
|
}
|
|
}
|