mirror of
https://github.com/Kareadita/Kavita.git
synced 2025-06-02 05:04:14 -04:00
* Added --card-list-item-bg-color for the card list items * Updated the card list item progress to match how cards render * Implemented the ability to configure how many backups are retained. * Fixed a bug where odd jump keys could cause a bad index error for jump bar * Commented out more code for the pagination route if we go with that. * Reverted a move of DisableConcurrentExecution to interface, as it seems to not work there. * Updated manga format utility code to pipes * Fixed bulk selection on series detail page * Fixed bulk selection on all other pages * Changed card item to OnPush * Updated image component to OnPush * Updated Series Card to OnPush * Updated Series Detail to OnPush * Lots of changes here. Integrated parentscroll support on card detail layout. Added jump bar (custom js implementation) on collection, reading list and all series pages. Updated UserParams to default to no pagination. Lots of cleanup all around * Updated some notes on a module use * Some code cleanup * Fixed up a broken test due to the mapper not being configured in the test. * Applied TabID pattern to edit collection tags * Applied css from series detail to collection detail page to remove double scrollbar * Implemented the ability to sort by Time To Read. * Throw an error to the UI when we extract an archive and it contains invalid characters in the filename for the Server OS. * Tweaked how the page scrolls for jumpbar on collection detail. We will have to polish another release * Cleaned up the styling on directory picker * Put some code in but it doesn't work for scroll to top on virtual scrolling. I'll do it later. * Fixed a container bug
247 lines
11 KiB
C#
247 lines
11 KiB
C#
using System;
|
|
using System.Diagnostics;
|
|
using System.Linq;
|
|
using System.Threading.Tasks;
|
|
using API.Data;
|
|
using API.Data.Repositories;
|
|
using API.Entities;
|
|
using API.Entities.Enums;
|
|
using API.Helpers;
|
|
using API.SignalR;
|
|
using Hangfire;
|
|
using HtmlAgilityPack;
|
|
using Microsoft.Extensions.Logging;
|
|
using VersOne.Epub;
|
|
|
|
namespace API.Services.Tasks.Metadata;
|
|
|
|
public interface IWordCountAnalyzerService
|
|
{
|
|
[DisableConcurrentExecution(timeoutInSeconds: 60 * 60 * 60)]
|
|
[AutomaticRetry(Attempts = 0, OnAttemptsExceeded = AttemptsExceededAction.Delete)]
|
|
Task ScanLibrary(int libraryId, bool forceUpdate = false);
|
|
Task ScanSeries(int libraryId, int seriesId, bool forceUpdate = true);
|
|
}
|
|
|
|
/// <summary>
|
|
/// This service is a metadata task that generates information around time to read
|
|
/// </summary>
|
|
public class WordCountAnalyzerService : IWordCountAnalyzerService
|
|
{
|
|
private readonly ILogger<WordCountAnalyzerService> _logger;
|
|
private readonly IUnitOfWork _unitOfWork;
|
|
private readonly IEventHub _eventHub;
|
|
private readonly ICacheHelper _cacheHelper;
|
|
private readonly IReaderService _readerService;
|
|
|
|
public WordCountAnalyzerService(ILogger<WordCountAnalyzerService> logger, IUnitOfWork unitOfWork, IEventHub eventHub,
|
|
ICacheHelper cacheHelper, IReaderService readerService)
|
|
{
|
|
_logger = logger;
|
|
_unitOfWork = unitOfWork;
|
|
_eventHub = eventHub;
|
|
_cacheHelper = cacheHelper;
|
|
_readerService = readerService;
|
|
}
|
|
|
|
|
|
[DisableConcurrentExecution(timeoutInSeconds: 60 * 60 * 60)]
|
|
[AutomaticRetry(Attempts = 0, OnAttemptsExceeded = AttemptsExceededAction.Delete)]
|
|
public async Task ScanLibrary(int libraryId, bool forceUpdate = false)
|
|
{
|
|
var sw = Stopwatch.StartNew();
|
|
var library = await _unitOfWork.LibraryRepository.GetLibraryForIdAsync(libraryId, LibraryIncludes.None);
|
|
|
|
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
|
|
MessageFactory.WordCountAnalyzerProgressEvent(libraryId, 0F, ProgressEventType.Started, string.Empty));
|
|
|
|
var chunkInfo = await _unitOfWork.SeriesRepository.GetChunkInfo(library.Id);
|
|
var stopwatch = Stopwatch.StartNew();
|
|
_logger.LogInformation("[MetadataService] Refreshing Library {LibraryName}. Total Items: {TotalSize}. Total Chunks: {TotalChunks} with {ChunkSize} size", library.Name, chunkInfo.TotalSize, chunkInfo.TotalChunks, chunkInfo.ChunkSize);
|
|
|
|
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
|
|
MessageFactory.WordCountAnalyzerProgressEvent(library.Id, 0F, ProgressEventType.Started, $"Starting {library.Name}"));
|
|
|
|
for (var chunk = 1; chunk <= chunkInfo.TotalChunks; chunk++)
|
|
{
|
|
if (chunkInfo.TotalChunks == 0) continue;
|
|
stopwatch.Restart();
|
|
|
|
_logger.LogInformation("[MetadataService] Processing chunk {ChunkNumber} / {TotalChunks} with size {ChunkSize}. Series ({SeriesStart} - {SeriesEnd}",
|
|
chunk, chunkInfo.TotalChunks, chunkInfo.ChunkSize, chunk * chunkInfo.ChunkSize, (chunk + 1) * chunkInfo.ChunkSize);
|
|
|
|
var nonLibrarySeries = await _unitOfWork.SeriesRepository.GetFullSeriesForLibraryIdAsync(library.Id,
|
|
new UserParams()
|
|
{
|
|
PageNumber = chunk,
|
|
PageSize = chunkInfo.ChunkSize
|
|
});
|
|
_logger.LogDebug("[MetadataService] Fetched {SeriesCount} series for refresh", nonLibrarySeries.Count);
|
|
|
|
var seriesIndex = 0;
|
|
foreach (var series in nonLibrarySeries)
|
|
{
|
|
var index = chunk * seriesIndex;
|
|
var progress = Math.Max(0F, Math.Min(1F, index * 1F / chunkInfo.TotalSize));
|
|
|
|
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
|
|
MessageFactory.WordCountAnalyzerProgressEvent(library.Id, progress, ProgressEventType.Updated, series.Name));
|
|
|
|
try
|
|
{
|
|
await ProcessSeries(series, forceUpdate, false);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "[MetadataService] There was an exception during metadata refresh for {SeriesName}", series.Name);
|
|
}
|
|
seriesIndex++;
|
|
}
|
|
|
|
if (_unitOfWork.HasChanges())
|
|
{
|
|
await _unitOfWork.CommitAsync();
|
|
}
|
|
|
|
_logger.LogInformation(
|
|
"[MetadataService] Processed {SeriesStart} - {SeriesEnd} out of {TotalSeries} series in {ElapsedScanTime} milliseconds for {LibraryName}",
|
|
chunk * chunkInfo.ChunkSize, (chunk * chunkInfo.ChunkSize) + nonLibrarySeries.Count, chunkInfo.TotalSize, stopwatch.ElapsedMilliseconds, library.Name);
|
|
}
|
|
|
|
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
|
|
MessageFactory.WordCountAnalyzerProgressEvent(library.Id, 1F, ProgressEventType.Ended, $"Complete"));
|
|
|
|
|
|
_logger.LogInformation("[WordCountAnalyzerService] Updated metadata for {LibraryName} in {ElapsedMilliseconds} milliseconds", library.Name, sw.ElapsedMilliseconds);
|
|
|
|
}
|
|
|
|
public async Task ScanSeries(int libraryId, int seriesId, bool forceUpdate = true)
|
|
{
|
|
var sw = Stopwatch.StartNew();
|
|
var series = await _unitOfWork.SeriesRepository.GetFullSeriesForSeriesIdAsync(seriesId);
|
|
if (series == null)
|
|
{
|
|
_logger.LogError("[WordCountAnalyzerService] Series {SeriesId} was not found on Library {LibraryId}", seriesId, libraryId);
|
|
return;
|
|
}
|
|
|
|
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
|
|
MessageFactory.WordCountAnalyzerProgressEvent(libraryId, 0F, ProgressEventType.Started, series.Name));
|
|
|
|
await ProcessSeries(series, forceUpdate);
|
|
|
|
if (_unitOfWork.HasChanges())
|
|
{
|
|
await _unitOfWork.CommitAsync();
|
|
}
|
|
|
|
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
|
|
MessageFactory.WordCountAnalyzerProgressEvent(libraryId, 1F, ProgressEventType.Ended, series.Name));
|
|
|
|
_logger.LogInformation("[WordCountAnalyzerService] Updated metadata for {SeriesName} in {ElapsedMilliseconds} milliseconds", series.Name, sw.ElapsedMilliseconds);
|
|
}
|
|
|
|
private async Task ProcessSeries(Series series, bool forceUpdate = false, bool useFileName = true)
|
|
{
|
|
var isEpub = series.Format == MangaFormat.Epub;
|
|
series.WordCount = 0;
|
|
foreach (var volume in series.Volumes)
|
|
{
|
|
volume.WordCount = 0;
|
|
foreach (var chapter in volume.Chapters)
|
|
{
|
|
// This compares if it's changed since a file scan only
|
|
var firstFile = chapter.Files.FirstOrDefault();
|
|
if (firstFile == null) return;
|
|
if (!_cacheHelper.HasFileChangedSinceLastScan(firstFile.LastFileAnalysis, forceUpdate,
|
|
firstFile))
|
|
continue;
|
|
|
|
if (series.Format == MangaFormat.Epub)
|
|
{
|
|
long sum = 0;
|
|
var fileCounter = 1;
|
|
foreach (var file in chapter.Files)
|
|
{
|
|
var filePath = file.FilePath;
|
|
var pageCounter = 1;
|
|
try
|
|
{
|
|
using var book = await EpubReader.OpenBookAsync(filePath, BookService.BookReaderOptions);
|
|
|
|
var totalPages = book.Content.Html.Values;
|
|
foreach (var bookPage in totalPages)
|
|
{
|
|
var progress = Math.Max(0F,
|
|
Math.Min(1F, (fileCounter * pageCounter) * 1F / (chapter.Files.Count * totalPages.Count)));
|
|
|
|
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
|
|
MessageFactory.WordCountAnalyzerProgressEvent(series.LibraryId, progress,
|
|
ProgressEventType.Updated, useFileName ? filePath : series.Name));
|
|
sum += await GetWordCountFromHtml(bookPage);
|
|
pageCounter++;
|
|
}
|
|
|
|
fileCounter++;
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "There was an error reading an epub file for word count, series skipped");
|
|
await _eventHub.SendMessageAsync(MessageFactory.Error,
|
|
MessageFactory.ErrorEvent("There was an issue counting words on an epub",
|
|
$"{series.Name} - {file}"));
|
|
return;
|
|
}
|
|
|
|
file.LastFileAnalysis = DateTime.Now;
|
|
_unitOfWork.MangaFileRepository.Update(file);
|
|
}
|
|
|
|
chapter.WordCount = sum;
|
|
series.WordCount += sum;
|
|
volume.WordCount += sum;
|
|
}
|
|
|
|
var est = _readerService.GetTimeEstimate(chapter.WordCount, chapter.Pages, isEpub);
|
|
chapter.MinHoursToRead = est.MinHours;
|
|
chapter.MaxHoursToRead = est.MaxHours;
|
|
chapter.AvgHoursToRead = est.AvgHours;
|
|
_unitOfWork.ChapterRepository.Update(chapter);
|
|
}
|
|
|
|
var volumeEst = _readerService.GetTimeEstimate(volume.WordCount, volume.Pages, isEpub);
|
|
volume.MinHoursToRead = volumeEst.MinHours;
|
|
volume.MaxHoursToRead = volumeEst.MaxHours;
|
|
volume.AvgHoursToRead = volumeEst.AvgHours;
|
|
_unitOfWork.VolumeRepository.Update(volume);
|
|
|
|
}
|
|
|
|
var seriesEstimate = _readerService.GetTimeEstimate(series.WordCount, series.Pages, isEpub);
|
|
series.MinHoursToRead = seriesEstimate.MinHours;
|
|
series.MaxHoursToRead = seriesEstimate.MaxHours;
|
|
series.AvgHoursToRead = seriesEstimate.AvgHours;
|
|
_unitOfWork.SeriesRepository.Update(series);
|
|
}
|
|
|
|
|
|
private static async Task<int> GetWordCountFromHtml(EpubContentFileRef bookFile)
|
|
{
|
|
var doc = new HtmlDocument();
|
|
doc.LoadHtml(await bookFile.ReadContentAsTextAsync());
|
|
|
|
var textNodes = doc.DocumentNode.SelectNodes("//body//text()[not(parent::script)]");
|
|
if (textNodes == null) return 0;
|
|
|
|
return textNodes
|
|
.Select(node => node.InnerText.Split(' ', StringSplitOptions.RemoveEmptyEntries)
|
|
.Where(s => char.IsLetter(s[0])))
|
|
.Select(words => words.Count())
|
|
.Where(wordCount => wordCount > 0)
|
|
.Sum();
|
|
}
|
|
|
|
|
|
}
|