using API.DTOs.Progress; using System; using System.IO; using System.Linq; using System.Security.Cryptography; using System.Text; using System.Text.RegularExpressions; using API.Entities.Enums; using API.Services; using API.Services.Tasks.Scanner.Parser; namespace API.Helpers; /// /// All things related to Koreader /// /// Original developer: https://github.com/MFDeAngelo public static partial class KoreaderHelper { [GeneratedRegex(@"DocFragment\[(\d+)\]")] private static partial Regex DocFragmentRegex(); [GeneratedRegex(@"^\d+$")] private static partial Regex JustNumber(); /// /// Matches #_doc_fragment_10, #_doc_fragment_10_ some_anchor, #_doc_fragment10, number captured in Group 1 /// /// [GeneratedRegex(@"^#_doc_fragment_?(\d+)")] private static partial Regex DocFragmentHashRegex(); /// /// Hashes the document according to a custom Koreader hashing algorithm. /// Look at the util.partialMD5 method in the attached link. /// Note: Only applies to epub files /// /// The hashing algorithm is relatively quick as it only hashes ~10,000 bytes for the biggest of files. /// /// The path to the file to hash public static string HashContents(string filePath) { if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath)) { return null; } using var file = File.OpenRead(filePath); const int step = 1024; const int size = 1024; var md5 = MD5.Create(); var buffer = new byte[size]; for (var i = -1; i < 10; i++) { file.Position = step << 2 * i; var bytesRead = file.Read(buffer, 0, size); if (bytesRead > 0) { md5.TransformBlock(buffer, 0, bytesRead, buffer, 0); } else { break; } } file.Close(); md5.TransformFinalBlock([], 0, 0); return md5.Hash == null ? null : Convert.ToHexString(md5.Hash).ToUpper(); } /// /// Koreader can identify documents based on contents or title. /// For now, we only support by contents. /// public static string HashTitle(string filePath) { var fileName = Path.GetFileName(filePath); var fileNameBytes = Encoding.ASCII.GetBytes(fileName); var bytes = MD5.HashData(fileNameBytes); return Convert.ToHexString(bytes); } public static void UpdateProgressDto(ProgressDto progress, string koreaderPosition) { if (string.IsNullOrWhiteSpace(koreaderPosition)) return; // Handle: #_doc_fragment_26, #_doc_fragment26, #_doc_fragment_10_ some_anchor var hashMatch = DocFragmentHashRegex().Match(koreaderPosition); if (hashMatch.Success) { progress.PageNum = int.Parse(hashMatch.Groups[1].Value) - 1; progress.BookScrollId = null; return; } // Check if koreaderPosition is just a number, this indicates an Archive/PDF if (JustNumber().IsMatch(koreaderPosition)) { progress.PageNum = int.Parse(koreaderPosition) - 1; return; } var path = koreaderPosition.Split('/'); if (path.Length < 6) { // Handle cases like: /body/DocFragment[10].0 if (path.Length >= 3) { progress.PageNum = GetPageNumber(path); } return; } progress.PageNum = GetPageNumber(path); var lastPart = koreaderPosition.Split("/body/")[^1]; var lastTag = path[5].ToUpper(); // Remove trailing position indicators like .0, /text()[1].42 lastPart = lastPart.Split("/text()")[0]; // Also strip trailing .N position markers if (lastPart.Contains('.') && char.IsDigit(lastPart[^1])) { var dotIndex = lastPart.LastIndexOf('.'); if (dotIndex > 0 && lastPart[(dotIndex + 1)..].All(char.IsDigit)) { lastPart = lastPart[..dotIndex]; } } // Skip anchor tags and id() selectors - can't reliably scroll to these if (lastTag == "A" || lastPart.Contains("id(", StringComparison.InvariantCultureIgnoreCase) || lastTag.StartsWith("id(", StringComparison.InvariantCultureIgnoreCase)) { progress.BookScrollId = null; } else { // The format that Kavita accepts as a progress string. It tells Kavita where Koreader last left off. progress.BookScrollId = $"//body/{lastPart}"; } } private static int GetPageNumber(string[] path, int offset = 2) { if (offset >= path.Length) return 0; var match = DocFragmentRegex().Match(path[offset]); if (!match.Success) return 0; return int.Parse(match.Groups[1].Value) - 1; } /// /// The format that Koreader accepts as a progress string. It tells Koreader where Kavita last left off. /// /// /// Koreader stores the format as: /// /body/DocFragment[fragment_index]/body/[xpath_to_element] /// fragment_index is the page number for the xhtml files /// /// /// Helps dictate the encoding scheme to use /// public static string GetKoreaderPosition(ProgressDto progressDto) { // Add 1 back to match KOReader's 1-based indexing var fragmentIndex = progressDto.PageNum + 1; if (string.IsNullOrEmpty(progressDto.BookScrollId)) { // No scroll position - point to start of fragment // .0 is the character offset (start of element) return $"/body/DocFragment[{fragmentIndex}].0"; } var targetPath = progressDto.BookScrollId .Replace("//body/", string.Empty, StringComparison.InvariantCultureIgnoreCase); // KOReader can't handle id() XPath selectors - just return the base path if (targetPath.StartsWith("id(", StringComparison.OrdinalIgnoreCase)) { return $"/body/DocFragment[{fragmentIndex}].0"; } // Append .0 offset if the path doesn't already have a position marker var fullPath = $"/body/DocFragment[{fragmentIndex}]/body/{targetPath}"; if (!fullPath.Contains("/text()") && !fullPath.EndsWith(".0")) { fullPath += ".0"; } return fullPath; } }