mirror of
https://github.com/Kareadita/Kavita.git
synced 2025-05-31 20:24:27 -04:00
* Fixed a bug on bookmark mode not finding correct image for prefetcher. * Fixed up the edit series relationship modal on tablet viewports. * On double page mode, only bookmark 1 page if only 1 pages is renderered on screen. * Added percentage read of a given library and average hours read per week to user stats. * Fixed a bug in the reader with paging in bookmark mode * Added a "This Week" option to top readers history * Added date ranges for reading time. Added dates that don't have anything, but might remove. * On phone, when applying a metadata filter, when clicking apply, collapse the filter automatically. * Disable jump bar and the resuming from last spot when a custom sort is applied. * Ensure all Regex.Replace or Matches have timeouts set * Fixed a long standing bug where fit to height on tablets wouldn't center the image * Streamlined url parsing to be more reliable * Reduced an additional db query in chapter info. * Added a missing task to convert covers to webP and added messaging to help the user understand to run it after modifying the setting. * Changed OPDS to be enabled by default for new installs. This should reduce issues with users being confused about it before it's enabled. * When there are multiple files for a chapter, show a count card on the series detail to help user understand duplicates exist. Made the unread badge smaller to avoid collision. * Added Word Count to user stats and wired up average reading per week. * Fixed word count failing on some epubs * Removed some debug code * Don't give more information than is necessary about file paths for page dimensions. * Fixed a bug where pagination area would be too small when the book's content was less that height on default mode. * Updated Default layout mode to Scroll for books. * Added bytes in the UI and at an API layer for CDisplayEx * Don't log health checks to logs at all. * Changed Word Count to Length to match the way pages work * Made reading time more clear when min hours is 0 * Apply more aggressive coalescing when remapping bad metadata keys for epubs. * Changed the amount of padding between icon and text for side nav item. * Fixed a NPE on book reader (harmless) * Fixed an ordering issue where Volume 1 was a single file but also tagged as Chapter 1 and Volume 2 was Chapter 0. Thus Volume 2 was being selected for continue point when Volume 1 should have been. * When clicking on an activity stream header from dashboard, show the title on the resulting page. * Removed a property that can't be animated * Fixed a typeahead typescript issue * Added Size into Series Info and Added some tooltip and spacing changes to better explain some fields. * Added size for volume drawers and cleaned up some date edge case handling * Fixed an annoying bug where when on mobile opening a view with a metadata filter, Kavita would open the filter automatically.
254 lines
11 KiB
C#
254 lines
11 KiB
C#
using System;
|
|
using System.Diagnostics;
|
|
using System.Linq;
|
|
using System.Threading.Tasks;
|
|
using API.Data;
|
|
using API.Data.Repositories;
|
|
using API.Entities;
|
|
using API.Entities.Enums;
|
|
using API.Helpers;
|
|
using API.SignalR;
|
|
using Hangfire;
|
|
using HtmlAgilityPack;
|
|
using Microsoft.Extensions.Logging;
|
|
using VersOne.Epub;
|
|
|
|
namespace API.Services.Tasks.Metadata;
|
|
|
|
public interface IWordCountAnalyzerService
|
|
{
|
|
[DisableConcurrentExecution(timeoutInSeconds: 60 * 60 * 60)]
|
|
[AutomaticRetry(Attempts = 2, OnAttemptsExceeded = AttemptsExceededAction.Delete)]
|
|
Task ScanLibrary(int libraryId, bool forceUpdate = false);
|
|
Task ScanSeries(int libraryId, int seriesId, bool forceUpdate = true);
|
|
}
|
|
|
|
/// <summary>
|
|
/// This service is a metadata task that generates information around time to read
|
|
/// </summary>
|
|
public class WordCountAnalyzerService : IWordCountAnalyzerService
|
|
{
|
|
private readonly ILogger<WordCountAnalyzerService> _logger;
|
|
private readonly IUnitOfWork _unitOfWork;
|
|
private readonly IEventHub _eventHub;
|
|
private readonly ICacheHelper _cacheHelper;
|
|
private readonly IReaderService _readerService;
|
|
|
|
public WordCountAnalyzerService(ILogger<WordCountAnalyzerService> logger, IUnitOfWork unitOfWork, IEventHub eventHub,
|
|
ICacheHelper cacheHelper, IReaderService readerService)
|
|
{
|
|
_logger = logger;
|
|
_unitOfWork = unitOfWork;
|
|
_eventHub = eventHub;
|
|
_cacheHelper = cacheHelper;
|
|
_readerService = readerService;
|
|
}
|
|
|
|
|
|
[DisableConcurrentExecution(timeoutInSeconds: 60 * 60 * 60)]
|
|
[AutomaticRetry(Attempts = 2, OnAttemptsExceeded = AttemptsExceededAction.Delete)]
|
|
public async Task ScanLibrary(int libraryId, bool forceUpdate = false)
|
|
{
|
|
var sw = Stopwatch.StartNew();
|
|
var library = await _unitOfWork.LibraryRepository.GetLibraryForIdAsync(libraryId, LibraryIncludes.None);
|
|
|
|
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
|
|
MessageFactory.WordCountAnalyzerProgressEvent(libraryId, 0F, ProgressEventType.Started, string.Empty));
|
|
|
|
var chunkInfo = await _unitOfWork.SeriesRepository.GetChunkInfo(library.Id);
|
|
var stopwatch = Stopwatch.StartNew();
|
|
_logger.LogInformation("[MetadataService] Refreshing Library {LibraryName}. Total Items: {TotalSize}. Total Chunks: {TotalChunks} with {ChunkSize} size", library.Name, chunkInfo.TotalSize, chunkInfo.TotalChunks, chunkInfo.ChunkSize);
|
|
|
|
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
|
|
MessageFactory.WordCountAnalyzerProgressEvent(library.Id, 0F, ProgressEventType.Started, $"Starting {library.Name}"));
|
|
|
|
for (var chunk = 1; chunk <= chunkInfo.TotalChunks; chunk++)
|
|
{
|
|
if (chunkInfo.TotalChunks == 0) continue;
|
|
stopwatch.Restart();
|
|
|
|
_logger.LogInformation("[MetadataService] Processing chunk {ChunkNumber} / {TotalChunks} with size {ChunkSize}. Series ({SeriesStart} - {SeriesEnd}",
|
|
chunk, chunkInfo.TotalChunks, chunkInfo.ChunkSize, chunk * chunkInfo.ChunkSize, (chunk + 1) * chunkInfo.ChunkSize);
|
|
|
|
var nonLibrarySeries = await _unitOfWork.SeriesRepository.GetFullSeriesForLibraryIdAsync(library.Id,
|
|
new UserParams()
|
|
{
|
|
PageNumber = chunk,
|
|
PageSize = chunkInfo.ChunkSize
|
|
});
|
|
_logger.LogDebug("[MetadataService] Fetched {SeriesCount} series for refresh", nonLibrarySeries.Count);
|
|
|
|
var seriesIndex = 0;
|
|
foreach (var series in nonLibrarySeries)
|
|
{
|
|
var index = chunk * seriesIndex;
|
|
var progress = Math.Max(0F, Math.Min(1F, index * 1F / chunkInfo.TotalSize));
|
|
|
|
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
|
|
MessageFactory.WordCountAnalyzerProgressEvent(library.Id, progress, ProgressEventType.Updated, series.Name));
|
|
|
|
try
|
|
{
|
|
await ProcessSeries(series, forceUpdate, false);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "[MetadataService] There was an exception during metadata refresh for {SeriesName}", series.Name);
|
|
}
|
|
seriesIndex++;
|
|
}
|
|
|
|
if (_unitOfWork.HasChanges())
|
|
{
|
|
await _unitOfWork.CommitAsync();
|
|
}
|
|
|
|
_logger.LogInformation(
|
|
"[MetadataService] Processed {SeriesStart} - {SeriesEnd} out of {TotalSeries} series in {ElapsedScanTime} milliseconds for {LibraryName}",
|
|
chunk * chunkInfo.ChunkSize, (chunk * chunkInfo.ChunkSize) + nonLibrarySeries.Count, chunkInfo.TotalSize, stopwatch.ElapsedMilliseconds, library.Name);
|
|
}
|
|
|
|
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
|
|
MessageFactory.WordCountAnalyzerProgressEvent(library.Id, 1F, ProgressEventType.Ended, $"Complete"));
|
|
|
|
|
|
_logger.LogInformation("[WordCountAnalyzerService] Updated metadata for {LibraryName} in {ElapsedMilliseconds} milliseconds", library.Name, sw.ElapsedMilliseconds);
|
|
|
|
}
|
|
|
|
public async Task ScanSeries(int libraryId, int seriesId, bool forceUpdate = true)
|
|
{
|
|
var sw = Stopwatch.StartNew();
|
|
var series = await _unitOfWork.SeriesRepository.GetFullSeriesForSeriesIdAsync(seriesId);
|
|
if (series == null)
|
|
{
|
|
_logger.LogError("[WordCountAnalyzerService] Series {SeriesId} was not found on Library {LibraryId}", seriesId, libraryId);
|
|
return;
|
|
}
|
|
|
|
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
|
|
MessageFactory.WordCountAnalyzerProgressEvent(libraryId, 0F, ProgressEventType.Started, series.Name));
|
|
|
|
await ProcessSeries(series, forceUpdate);
|
|
|
|
if (_unitOfWork.HasChanges())
|
|
{
|
|
await _unitOfWork.CommitAsync();
|
|
}
|
|
|
|
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
|
|
MessageFactory.WordCountAnalyzerProgressEvent(libraryId, 1F, ProgressEventType.Ended, series.Name));
|
|
|
|
_logger.LogInformation("[WordCountAnalyzerService] Updated metadata for {SeriesName} in {ElapsedMilliseconds} milliseconds", series.Name, sw.ElapsedMilliseconds);
|
|
}
|
|
|
|
|
|
public async Task ProcessSeries(Series series, bool forceUpdate = false, bool useFileName = true)
|
|
{
|
|
var isEpub = series.Format == MangaFormat.Epub;
|
|
var existingWordCount = series.WordCount;
|
|
series.WordCount = 0;
|
|
foreach (var volume in series.Volumes)
|
|
{
|
|
volume.WordCount = 0;
|
|
foreach (var chapter in volume.Chapters)
|
|
{
|
|
// This compares if it's changed since a file scan only
|
|
var firstFile = chapter.Files.FirstOrDefault();
|
|
if (firstFile == null) return;
|
|
if (!_cacheHelper.HasFileChangedSinceLastScan(firstFile.LastFileAnalysis, forceUpdate,
|
|
firstFile))
|
|
continue;
|
|
|
|
if (series.Format == MangaFormat.Epub)
|
|
{
|
|
long sum = 0;
|
|
var fileCounter = 1;
|
|
foreach (var file in chapter.Files)
|
|
{
|
|
var filePath = file.FilePath;
|
|
var pageCounter = 1;
|
|
try
|
|
{
|
|
using var book = await EpubReader.OpenBookAsync(filePath, BookService.BookReaderOptions);
|
|
|
|
var totalPages = book.Content.Html.Values;
|
|
foreach (var bookPage in totalPages)
|
|
{
|
|
var progress = Math.Max(0F,
|
|
Math.Min(1F, (fileCounter * pageCounter) * 1F / (chapter.Files.Count * totalPages.Count)));
|
|
|
|
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
|
|
MessageFactory.WordCountAnalyzerProgressEvent(series.LibraryId, progress,
|
|
ProgressEventType.Updated, useFileName ? filePath : series.Name));
|
|
sum += await GetWordCountFromHtml(bookPage);
|
|
pageCounter++;
|
|
}
|
|
|
|
fileCounter++;
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "There was an error reading an epub file for word count, series skipped");
|
|
await _eventHub.SendMessageAsync(MessageFactory.Error,
|
|
MessageFactory.ErrorEvent("There was an issue counting words on an epub",
|
|
$"{series.Name} - {file}"));
|
|
return;
|
|
}
|
|
|
|
UpdateFileAnalysis(file);
|
|
}
|
|
|
|
chapter.WordCount = sum;
|
|
series.WordCount += sum;
|
|
volume.WordCount += sum;
|
|
}
|
|
|
|
var est = _readerService.GetTimeEstimate(chapter.WordCount, chapter.Pages, isEpub);
|
|
chapter.MinHoursToRead = est.MinHours;
|
|
chapter.MaxHoursToRead = est.MaxHours;
|
|
chapter.AvgHoursToRead = est.AvgHours;
|
|
foreach (var file in chapter.Files)
|
|
{
|
|
UpdateFileAnalysis(file);
|
|
}
|
|
_unitOfWork.ChapterRepository.Update(chapter);
|
|
}
|
|
|
|
var volumeEst = _readerService.GetTimeEstimate(volume.WordCount, volume.Pages, isEpub);
|
|
volume.MinHoursToRead = volumeEst.MinHours;
|
|
volume.MaxHoursToRead = volumeEst.MaxHours;
|
|
volume.AvgHoursToRead = volumeEst.AvgHours;
|
|
_unitOfWork.VolumeRepository.Update(volume);
|
|
|
|
}
|
|
|
|
if (series.WordCount == 0 && series.WordCount != 0) series.WordCount = existingWordCount; // Restore original word count if the file hasn't changed
|
|
var seriesEstimate = _readerService.GetTimeEstimate(series.WordCount, series.Pages, isEpub);
|
|
series.MinHoursToRead = seriesEstimate.MinHours;
|
|
series.MaxHoursToRead = seriesEstimate.MaxHours;
|
|
series.AvgHoursToRead = seriesEstimate.AvgHours;
|
|
_unitOfWork.SeriesRepository.Update(series);
|
|
}
|
|
|
|
private void UpdateFileAnalysis(MangaFile file)
|
|
{
|
|
file.LastFileAnalysis = DateTime.Now;
|
|
_unitOfWork.MangaFileRepository.Update(file);
|
|
}
|
|
|
|
|
|
private static async Task<int> GetWordCountFromHtml(EpubContentFileRef bookFile)
|
|
{
|
|
var doc = new HtmlDocument();
|
|
doc.LoadHtml(await bookFile.ReadContentAsTextAsync());
|
|
|
|
var textNodes = doc.DocumentNode.SelectNodes("//body//text()[not(parent::script)]");
|
|
if (textNodes == null) return 0;
|
|
return textNodes
|
|
.Select(node => node.InnerText.Split(' ', StringSplitOptions.RemoveEmptyEntries)
|
|
.Where(s => char.IsLetter(s[0])))
|
|
.Sum(words => words.Count());
|
|
}
|
|
}
|