mirror of
https://github.com/Kareadita/Kavita.git
synced 2026-01-16 00:50:24 -05:00
198 lines
6.5 KiB
C#
198 lines
6.5 KiB
C#
using API.DTOs.Progress;
|
|
using System;
|
|
using System.IO;
|
|
using System.Linq;
|
|
using System.Security.Cryptography;
|
|
using System.Text;
|
|
using System.Text.RegularExpressions;
|
|
|
|
namespace API.Helpers;
|
|
|
|
/// <summary>
|
|
/// All things related to Koreader
|
|
/// </summary>
|
|
/// <remarks>Original developer: https://github.com/MFDeAngelo</remarks>
|
|
public static partial class KoreaderHelper
|
|
{
|
|
[GeneratedRegex(@"DocFragment\[(\d+)\]")]
|
|
private static partial Regex DocFragmentRegex();
|
|
|
|
[GeneratedRegex(@"^\d+$")]
|
|
private static partial Regex JustNumber();
|
|
|
|
/// <summary>
|
|
/// Matches #_doc_fragment_10, #_doc_fragment_10_ some_anchor, #_doc_fragment10, number captured in Group 1
|
|
/// </summary>
|
|
/// <returns></returns>
|
|
[GeneratedRegex(@"^#_doc_fragment_?(\d+)")]
|
|
private static partial Regex DocFragmentHashRegex();
|
|
|
|
/// <summary>
|
|
/// Hashes the document according to a custom Koreader hashing algorithm.
|
|
/// Look at the util.partialMD5 method in the attached link.
|
|
/// Note: Only applies to epub files
|
|
/// </summary>
|
|
/// <remarks>The hashing algorithm is relatively quick as it only hashes ~10,000 bytes for the biggest of files.</remarks>
|
|
/// <see href="https://github.com/koreader/koreader/blob/master/frontend/util.lua#L1040"/>
|
|
/// <param name="filePath">The path to the file to hash</param>
|
|
public static string HashContents(string filePath)
|
|
{
|
|
if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath))
|
|
{
|
|
return null;
|
|
}
|
|
|
|
using var file = File.OpenRead(filePath);
|
|
|
|
const int step = 1024;
|
|
const int size = 1024;
|
|
var md5 = MD5.Create();
|
|
var buffer = new byte[size];
|
|
|
|
for (var i = -1; i < 10; i++)
|
|
{
|
|
file.Position = step << 2 * i;
|
|
var bytesRead = file.Read(buffer, 0, size);
|
|
if (bytesRead > 0)
|
|
{
|
|
md5.TransformBlock(buffer, 0, bytesRead, buffer, 0);
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
|
|
file.Close();
|
|
md5.TransformFinalBlock([], 0, 0);
|
|
|
|
return md5.Hash == null ? null : Convert.ToHexString(md5.Hash).ToUpper();
|
|
}
|
|
|
|
/// <summary>
|
|
/// Koreader can identify documents based on contents or title.
|
|
/// For now, we only support by contents.
|
|
/// </summary>
|
|
public static string HashTitle(string filePath)
|
|
{
|
|
var fileName = Path.GetFileName(filePath);
|
|
var fileNameBytes = Encoding.ASCII.GetBytes(fileName);
|
|
var bytes = MD5.HashData(fileNameBytes);
|
|
|
|
return Convert.ToHexString(bytes);
|
|
}
|
|
|
|
public static void UpdateProgressDto(ProgressDto progress, string koreaderPosition)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(koreaderPosition)) return;
|
|
|
|
// Handle: #_doc_fragment_26, #_doc_fragment26, #_doc_fragment_10_ some_anchor
|
|
var hashMatch = DocFragmentHashRegex().Match(koreaderPosition);
|
|
if (hashMatch.Success)
|
|
{
|
|
progress.PageNum = int.Parse(hashMatch.Groups[1].Value) - 1;
|
|
progress.BookScrollId = null;
|
|
return;
|
|
}
|
|
|
|
// Check if koreaderPosition is just a number, this indicates an Archive/PDF
|
|
if (JustNumber().IsMatch(koreaderPosition))
|
|
{
|
|
progress.PageNum = int.Parse(koreaderPosition) - 1;
|
|
return;
|
|
}
|
|
|
|
var path = koreaderPosition.Split('/');
|
|
if (path.Length < 6)
|
|
{
|
|
// Handle cases like: /body/DocFragment[10].0
|
|
if (path.Length >= 3)
|
|
{
|
|
progress.PageNum = GetPageNumber(path);
|
|
}
|
|
return;
|
|
}
|
|
|
|
progress.PageNum = GetPageNumber(path);
|
|
|
|
var lastPart = koreaderPosition.Split("/body/")[^1];
|
|
var lastTag = path[5].ToUpper();
|
|
|
|
// Remove trailing position indicators like .0, /text()[1].42
|
|
lastPart = lastPart.Split("/text()")[0];
|
|
|
|
// Also strip trailing .N position markers
|
|
if (lastPart.Contains('.') && char.IsDigit(lastPart[^1]))
|
|
{
|
|
var dotIndex = lastPart.LastIndexOf('.');
|
|
if (dotIndex > 0 && lastPart[(dotIndex + 1)..].All(char.IsDigit))
|
|
{
|
|
lastPart = lastPart[..dotIndex];
|
|
}
|
|
}
|
|
|
|
// Skip anchor tags and id() selectors - can't reliably scroll to these
|
|
if (lastTag == "A" || lastPart.Contains("id(", StringComparison.InvariantCultureIgnoreCase) || lastTag.StartsWith("id(", StringComparison.InvariantCultureIgnoreCase))
|
|
{
|
|
progress.BookScrollId = null;
|
|
}
|
|
else
|
|
{
|
|
// The format that Kavita accepts as a progress string. It tells Kavita where Koreader last left off.
|
|
progress.BookScrollId = $"//body/{lastPart}";
|
|
}
|
|
}
|
|
|
|
private static int GetPageNumber(string[] path, int offset = 2)
|
|
{
|
|
if (offset >= path.Length) return 0;
|
|
|
|
var match = DocFragmentRegex().Match(path[offset]);
|
|
if (!match.Success) return 0;
|
|
|
|
return int.Parse(match.Groups[1].Value) - 1;
|
|
}
|
|
|
|
/// <summary>
|
|
/// The format that Koreader accepts as a progress string. It tells Koreader where Kavita last left off.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// Koreader stores the format as:
|
|
/// /body/DocFragment[fragment_index]/body/[xpath_to_element]
|
|
/// fragment_index is the page number for the xhtml files
|
|
/// </remarks>
|
|
/// <param name="progressDto"></param>
|
|
/// <returns></returns>
|
|
public static string GetKoreaderPosition(ProgressDto? progressDto)
|
|
{
|
|
// Add 1 back to match KOReader's 1-based indexing
|
|
var fragmentIndex = (progressDto?.PageNum ?? 0) + 1;
|
|
|
|
if (string.IsNullOrEmpty(progressDto?.BookScrollId))
|
|
{
|
|
// No scroll position - point to start of fragment
|
|
// .0 is the character offset (start of element)
|
|
return $"/body/DocFragment[{fragmentIndex}].0";
|
|
}
|
|
|
|
|
|
var targetPath = progressDto.BookScrollId
|
|
.Replace("//body/", string.Empty, StringComparison.InvariantCultureIgnoreCase);
|
|
|
|
// KOReader can't handle id() XPath selectors - just return the base path
|
|
if (targetPath.StartsWith("id(", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
return $"/body/DocFragment[{fragmentIndex}].0";
|
|
}
|
|
|
|
// Append .0 offset if the path doesn't already have a position marker
|
|
var fullPath = $"/body/DocFragment[{fragmentIndex}]/body/{targetPath}";
|
|
if (!fullPath.Contains("/text()") && !fullPath.EndsWith(".0"))
|
|
{
|
|
fullPath += ".0";
|
|
}
|
|
|
|
return fullPath;
|
|
}
|
|
}
|