Progress Overhaul + Profile Page and a LOT more! (#4262)

Co-authored-by: Amelia <77553571+Fesaa@users.noreply.github.com>
Co-authored-by: Robbie Davis <robbie@therobbiedavis.com>
This commit is contained in:
Joe Milazzo
2025-12-09 10:00:11 -07:00
committed by GitHub
parent 4ac13f1f25
commit 9f29fa593d
645 changed files with 25585 additions and 4805 deletions
+209 -15
View File
@@ -6,8 +6,6 @@ using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using System.Xml;
using System.Xml.XPath;
using API.Data.Metadata;
using API.DTOs.Reader;
using API.Entities;
@@ -52,7 +50,7 @@ public interface IBookService
/// <returns></returns>
Task<string> ScopeStyles(string stylesheetHtml, string apiBase, string filename, EpubBookRef book);
/// <summary>
/// Extracts a PDF file's pages as images to an target directory
/// Extracts a PDF file's pages as images to a target directory
/// </summary>
/// <remarks>This method relies on Docnet which has explicit patches from Kavita for ARM support. This should only be used with Tachiyomi</remarks>
/// <param name="fileFilePath"></param>
@@ -62,6 +60,7 @@ public interface IBookService
Task<string> GetBookPage(int page, int chapterId, string cachedEpubPath, string baseUrl, List<PersonalToCDto> ptocBookmarks, List<AnnotationDto> annotations);
Task<Dictionary<string, int>> CreateKeyToPageMappingAsync(EpubBookRef book);
Task<IDictionary<int, int>?> GetWordCountsPerPage(string bookFilePath);
Task<int> GetWordCountBetweenXPaths(string bookFilePath, string startXpath, string endXpath);
Task<string> CopyImageToTempFromBook(int chapterId, BookmarkDto bookmarkDto, string cachedBookPath);
Task<BookResourceResultDto> GetResourceAsync(string bookFilePath, string requestedKey);
@@ -950,20 +949,10 @@ public partial class BookService : IBookService
var content = await contentFileRef.ReadContentAsync();
doc.LoadHtml(content);
var body = doc.DocumentNode.SelectSingleNode("//body");
if (body == null)
{
_logger.LogError("{FilePath} has no body tag! Generating one for support. Book may be skewed", book.FilePath);
doc.DocumentNode.SelectSingleNode("/html").AppendChild(HtmlNode.CreateNode("<body></body>"));
body = doc.DocumentNode.SelectSingleNode("//html/body");
}
var body = GetBodyOrCreate(doc, book);
// Find all words in the html body
// TEMP: REfactor this to use WordCountAnalyzerService
var textNodes = body!.SelectNodes("//text()[not(parent::script)]");
ret.Add(page, textNodes?.Sum(node => node.InnerText.Count(char.IsLetter)) ?? 0);
ret.Add(page, CountLettersInBody(body));
}
}
@@ -976,6 +965,209 @@ public partial class BookService : IBookService
return ret;
}
private HtmlNode GetBodyOrCreate(HtmlDocument doc, EpubBookRef book)
{
var body = doc.DocumentNode.SelectSingleNode("//body");
if (body == null)
{
_logger.LogError("{FilePath} has no body tag! Generating one for support. Book may be skewed", book.FilePath);
doc.DocumentNode.SelectSingleNode("/html").AppendChild(HtmlNode.CreateNode("<body></body>"));
body = doc.DocumentNode.SelectSingleNode("//html/body");
}
return body;
}
private static int CountLettersUpToNode(HtmlNode root, HtmlNode targetNode)
{
var letterCount = 0;
var foundTarget = false;
TraverseNodes(root);
return letterCount;
void TraverseNodes(HtmlNode node)
{
if (foundTarget) return;
if (node == targetNode)
{
foundTarget = true;
return;
}
// If it's a text node and not inside a script tag
if (node.NodeType == HtmlNodeType.Text && node.ParentNode?.Name != "script")
{
letterCount += node.InnerText.Count(char.IsLetter);
}
// Traverse children
foreach (var child in node.ChildNodes)
{
TraverseNodes(child);
if (foundTarget) return;
}
}
}
#region Count Letters Between XPaths
/// <summary>
/// Counts the (estimated) words for a given book from a starting xpath (or beginning if null) to and ending xpath.
/// May cross page boundaries
/// </summary>
/// <param name="bookFilePath"></param>
/// <param name="startXpath"></param>
/// <param name="endXpath"></param>
/// <returns></returns>
public async Task<int> GetWordCountBetweenXPaths(string bookFilePath, string? startXpath, string endXpath)
{
if (string.IsNullOrEmpty(endXpath)) return 0;
var totalCharacters = 0;
var foundStart = string.IsNullOrEmpty(startXpath); // If no start, begin counting immediately
var foundEnd = false;
try
{
using var book = await EpubReader.OpenBookAsync(bookFilePath, LenientBookReaderOptions);
var doc = new HtmlDocument { OptionFixNestedTags = true };
var bookPages = await book.GetReadingOrderAsync();
foreach (var contentFileRef in bookPages)
{
if (foundEnd) break; // Stop processing once we've found the end
var content = await contentFileRef.ReadContentAsync();
doc.LoadHtml(content);
var body = GetBodyOrCreate(doc, book);
var startNode = string.IsNullOrEmpty(startXpath) ? null : body.SelectSingleNode(startXpath);
var endNode = body.SelectSingleNode(endXpath);
// Case 1: Both start and end are on the same page
if (startNode != null && endNode != null)
{
totalCharacters += CountLettersBetweenNodes(body, startNode, endNode);
foundEnd = true;
break;
}
// Case 2: Found start node - begin counting from this point to end of page
if (startNode != null)
{
foundStart = true;
totalCharacters += CountLettersFromNode(body, startNode);
continue;
}
// Case 3: Found end node - count from beginning of page up to this point and stop
if (endNode != null && foundStart)
{
foundEnd = true;
totalCharacters += CountLettersUpToNode(body, endNode);
break;
}
// Case 4: Between start and end - count entire page
if (foundStart && !foundEnd)
{
totalCharacters += CountLettersInBody(body);
}
}
}
catch (Exception ex)
{
_logger.LogError(ex, "There was an issue calculating word counts between XPaths");
return 0;
}
return WordCountAnalyzerService.GetWordCount(totalCharacters);
}
/// <summary>
/// Counts letters from a starting node to the end of the container
/// </summary>
private static int CountLettersFromNode(HtmlNode container, HtmlNode startNode)
{
var letterCount = 0;
var countingStarted = false;
TraverseNodes(container);
return letterCount;
void TraverseNodes(HtmlNode node)
{
if (node == startNode)
{
countingStarted = true;
// Don't return here - we want to start counting from this node onwards
}
if (countingStarted && node.NodeType == HtmlNodeType.Text && node.ParentNode?.Name != "script")
{
letterCount += node.InnerText.Count(char.IsLetter);
}
foreach (var child in node.ChildNodes)
{
TraverseNodes(child);
}
}
}
/// <summary>
/// Counts letters between two nodes in the same container
/// </summary>
private static int CountLettersBetweenNodes(HtmlNode container, HtmlNode startNode, HtmlNode endNode)
{
var letterCount = 0;
var countingStarted = false;
var foundEnd = false;
TraverseNodes(container);
return letterCount;
void TraverseNodes(HtmlNode node)
{
if (foundEnd) return;
if (node == startNode)
{
countingStarted = true;
return; // Start counting after this node
}
if (node == endNode)
{
foundEnd = true;
return;
}
if (countingStarted && node.NodeType == HtmlNodeType.Text && node.ParentNode?.Name != "script")
{
letterCount += node.InnerText.Count(char.IsLetter);
}
foreach (var child in node.ChildNodes)
{
TraverseNodes(child);
}
}
}
#endregion
private static int CountLettersInBody(HtmlNode body)
{
var textNodes = body.SelectNodes("//text()[not(parent::script)]");
return textNodes?.Sum(node => node.InnerText.Count(char.IsLetter)) ?? 0;
}
public async Task<string> CopyImageToTempFromBook(int chapterId, BookmarkDto bookmarkDto, string cachedBookPath)
{
using var book = await EpubReader.OpenBookAsync(cachedBookPath, LenientBookReaderOptions);
@@ -1496,6 +1688,8 @@ public partial class BookService : IBookService
content = EscapeTags(content);
doc.LoadHtml(content);
var body = doc.DocumentNode.SelectSingleNode("//body");
if (body == null)