mirror of
https://github.com/Kareadita/Kavita.git
synced 2025-09-29 15:30:50 -04:00
Co-authored-by: Amelia <77553571+Fesaa@users.noreply.github.com> Co-authored-by: Robbie Davis <robbie@therobbiedavis.com> Co-authored-by: Fabian Pammer <fpammer@mantro.net> Co-authored-by: Vinícius Licz <vinilicz@gmail.com>
267 lines
9.6 KiB
C#
267 lines
9.6 KiB
C#
using System;
|
|
using System.Collections.Generic;
|
|
using System.Linq;
|
|
using System.Text.RegularExpressions;
|
|
using API.DTOs.Reader;
|
|
using HtmlAgilityPack;
|
|
|
|
namespace API.Helpers;
|
|
#nullable enable
|
|
|
|
public static partial class AnnotationHelper
|
|
{
|
|
private const string UiXPathScope = "//BODY/DIV[1]"; // Div[1] is the div we inject reader contents into
|
|
|
|
[GeneratedRegex("""^id\("([^"]+)"\)$""")]
|
|
private static partial Regex IdXPathRegex();
|
|
|
|
|
|
/// <summary>
|
|
/// Given an xpath that is scoped to the epub reader, transform it into a page-level xpath
|
|
/// </summary>
|
|
/// <param name="xpath"></param>
|
|
/// <returns></returns>
|
|
public static string DescopeXpath(string xpath)
|
|
{
|
|
return xpath.Replace(UiXPathScope, "//BODY").ToLowerInvariant();
|
|
}
|
|
|
|
public static void InjectSingleElementAnnotations(HtmlDocument doc, List<AnnotationDto> annotations)
|
|
{
|
|
var annotationsByElement = annotations
|
|
.GroupBy(a => a.XPath)
|
|
.ToDictionary(g => g.Key, g => g.ToList());
|
|
|
|
foreach (var (xpath, elementAnnotations) in annotationsByElement)
|
|
{
|
|
try
|
|
{
|
|
var scopedXPath = DescopeXpath(xpath);
|
|
var elem = FindElementByXPath(doc, xpath);
|
|
if (elem == null) continue;
|
|
|
|
var originalText = elem.InnerText;
|
|
|
|
// Calculate positions and sort by start position
|
|
var normalizedOriginalText = NormalizeWhitespace(originalText);
|
|
|
|
var sortedAnnotations = elementAnnotations
|
|
.Select(a => new
|
|
{
|
|
Annotation = a,
|
|
StartPos = normalizedOriginalText.IndexOf(NormalizeWhitespace(a.SelectedText), StringComparison.Ordinal)
|
|
})
|
|
.Where(a => a.StartPos >= 0)
|
|
.OrderBy(a => a.StartPos)
|
|
.ToList();
|
|
|
|
elem.RemoveAllChildren();
|
|
var currentPos = 0;
|
|
|
|
foreach (var item in sortedAnnotations)
|
|
{
|
|
// Add text before highlight
|
|
if (item.StartPos > currentPos)
|
|
{
|
|
var beforeText = originalText.Substring(currentPos, item.StartPos - currentPos);
|
|
elem.AppendChild(HtmlNode.CreateNode(beforeText));
|
|
}
|
|
|
|
// Add highlight
|
|
var highlightNode = HtmlNode.CreateNode(
|
|
$"<app-epub-highlight id=\"epub-highlight-{item.Annotation.Id}\">{item.Annotation.SelectedText}</app-epub-highlight>");
|
|
elem.AppendChild(highlightNode);
|
|
|
|
currentPos = item.StartPos + item.Annotation.SelectedText.Length;
|
|
}
|
|
|
|
// Add remaining text
|
|
if (currentPos < originalText.Length)
|
|
{
|
|
elem.AppendChild(HtmlNode.CreateNode(originalText.Substring(currentPos)));
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
/* Swallow */
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
public static void InjectMultiElementAnnotations(HtmlDocument doc, List<AnnotationDto> annotations)
|
|
{
|
|
foreach (var annotation in annotations)
|
|
{
|
|
try
|
|
{
|
|
var startXPath = DescopeXpath(annotation.XPath);
|
|
var endXPath = DescopeXpath(annotation.EndingXPath);
|
|
|
|
var startElement = FindElementByXPath(doc, startXPath);
|
|
var endElement = FindElementByXPath(doc, endXPath);
|
|
|
|
if (startElement == null || endElement == null) continue;
|
|
|
|
// Get all elements between start and end (including start and end)
|
|
var elementsInRange = GetElementsInRange(startElement, endElement);
|
|
if (elementsInRange.Count == 0) continue;
|
|
|
|
// Build full text to find our selection
|
|
var fullText = string.Join("\n\n", elementsInRange.Select(e => e.InnerText));
|
|
|
|
// Normalize both texts for comparison
|
|
var normalizedFullText = NormalizeWhitespace(fullText);
|
|
var normalizedSelectedText = NormalizeWhitespace(annotation.SelectedText);
|
|
|
|
var selectionStartPos = normalizedFullText.IndexOf(normalizedSelectedText, StringComparison.Ordinal);
|
|
|
|
if (selectionStartPos == -1) continue;
|
|
|
|
var selectionEndPos = selectionStartPos + normalizedSelectedText.Length;
|
|
|
|
// Map positions back to elements using the original (non-normalized) text
|
|
var elementTextMappings = BuildElementTextMappings(elementsInRange);
|
|
|
|
// Convert normalized positions back to original text positions
|
|
var originalSelectionStart = MapNormalizedPositionToOriginal(fullText, selectionStartPos);
|
|
var originalSelectionEnd = MapNormalizedPositionToOriginal(fullText, selectionEndPos);
|
|
|
|
// Process each element in the range
|
|
for (var i = 0; i < elementsInRange.Count; i++)
|
|
{
|
|
var element = elementsInRange[i];
|
|
var mapping = elementTextMappings[i];
|
|
|
|
var elementStart = mapping.StartPos;
|
|
var elementEnd = mapping.EndPos;
|
|
|
|
// Determine what part of this element should be highlighted
|
|
var highlightStart = Math.Max(originalSelectionStart - elementStart, 0);
|
|
var highlightEnd = Math.Min(originalSelectionEnd - elementStart, mapping.TextLength);
|
|
|
|
if (highlightEnd <= highlightStart) continue; // No highlight in this element
|
|
|
|
InjectHighlightInElement(element, highlightStart, highlightEnd, annotation.Id);
|
|
}
|
|
}
|
|
catch (Exception)
|
|
{
|
|
/* Swallow */
|
|
}
|
|
}
|
|
}
|
|
|
|
private static string NormalizeWhitespace(string text)
|
|
{
|
|
return WhitespaceRegex().Replace(text.Trim(), " ");
|
|
}
|
|
|
|
private static int MapNormalizedPositionToOriginal(string originalText, int normalizedPosition)
|
|
{
|
|
var normalizedText = NormalizeWhitespace(originalText);
|
|
|
|
if (normalizedPosition == 0) return 0;
|
|
if (normalizedPosition >= normalizedText.Length) return originalText.Length;
|
|
|
|
// Walk through both strings character by character to find the mapping
|
|
var originalPos = 0;
|
|
var normalizedPos = 0;
|
|
|
|
while (originalPos < originalText.Length && normalizedPos < normalizedPosition)
|
|
{
|
|
if (char.IsWhiteSpace(originalText[originalPos]))
|
|
{
|
|
// Skip consecutive whitespace in original
|
|
while (originalPos < originalText.Length && char.IsWhiteSpace(originalText[originalPos]))
|
|
{
|
|
originalPos++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
originalPos++;
|
|
}
|
|
|
|
// This corresponds to one space in normalized text
|
|
normalizedPos++;
|
|
}
|
|
|
|
return originalPos;
|
|
}
|
|
|
|
private static HtmlNode? FindElementByXPath(HtmlDocument doc, string xpath)
|
|
{
|
|
var idMatch = IdXPathRegex().Match(xpath);
|
|
if (!idMatch.Success) return doc.DocumentNode.SelectSingleNode(xpath.ToLowerInvariant());
|
|
|
|
var id = idMatch.Groups[1].Value;
|
|
return string.IsNullOrWhiteSpace(id) ? null : doc.GetElementbyId(id);
|
|
}
|
|
|
|
private static List<HtmlNode> GetElementsInRange(HtmlNode startElement, HtmlNode endElement)
|
|
{
|
|
var elements = new List<HtmlNode>();
|
|
var current = startElement;
|
|
|
|
elements.Add(current);
|
|
|
|
// If start and end are the same, return just that element
|
|
if (startElement == endElement) return elements;
|
|
|
|
// Traverse siblings until we reach the end element
|
|
while (current != null && current != endElement)
|
|
{
|
|
current = current.NextSibling;
|
|
if (current is {NodeType: HtmlNodeType.Element}) // Only include element nodes (skip text nodes, comments, etc.)
|
|
{
|
|
elements.Add(current);
|
|
}
|
|
}
|
|
|
|
return elements;
|
|
}
|
|
|
|
private static List<(int StartPos, int EndPos, int TextLength)> BuildElementTextMappings(List<HtmlNode> elements)
|
|
{
|
|
var mappings = new List<(int StartPos, int EndPos, int TextLength)>();
|
|
var currentPos = 0;
|
|
|
|
foreach (var element in elements)
|
|
{
|
|
var textLength = element.InnerText.Length;
|
|
mappings.Add((currentPos, currentPos + textLength, textLength));
|
|
currentPos += textLength;
|
|
}
|
|
|
|
return mappings;
|
|
}
|
|
|
|
private static void InjectHighlightInElement(HtmlNode element, int startPos, int endPos, int annotationId)
|
|
{
|
|
var originalText = element.InnerText;
|
|
element.RemoveAllChildren();
|
|
|
|
// Add text before highlight
|
|
if (startPos > 0)
|
|
{
|
|
element.AppendChild(HtmlNode.CreateNode(originalText.Substring(0, startPos)));
|
|
}
|
|
|
|
// Add highlight
|
|
var highlightText = originalText.Substring(startPos, endPos - startPos);
|
|
var highlightNode = HtmlNode.CreateNode(
|
|
$"<app-epub-highlight id=\"epub-highlight-{annotationId}\">{highlightText}</app-epub-highlight>");
|
|
element.AppendChild(highlightNode);
|
|
|
|
// Add text after highlight
|
|
if (endPos < originalText.Length)
|
|
{
|
|
element.AppendChild(HtmlNode.CreateNode(originalText.Substring(endPos)));
|
|
}
|
|
}
|
|
|
|
[GeneratedRegex(@"\s+", RegexOptions.Compiled)]
|
|
private static partial Regex WhitespaceRegex();
|
|
}
|