Kavita/API/Services/MetadataService.cs
Joseph Milazzo 81dfd63250
Feature/performance pdf (#426)
#  Added
- Added: Added series format information to the search typeahead to help identify duplicate series in libraries

# Fixed
- Fixed: Fixed accent color not looking well on light theme
- Fixed: Attempted to fix the memory issues with PDF reading on Docker. Uses a Memory Pool for streams and removes a bitmap operation for fixing books with transparent backgrounds (#424)

# Changed
- Changed: Refactored download logs to use the same download code as rest of Kavita 

# Dev stuff
- Added timeout for Regex's to make sure during matching, malicious filenames doesn't crash user system
- Refactored a missing GetCoverImage to use Series Format rather than old Library Type

==================================================
* Added Timeout for Regex matching to ensure malicious filenames don't crash system

* Refactored GetCoverImage to use series format rather than library type

* Refactored download logs to use the download service

* Fixed accent color not looking well on light theme

* Refactored series format into dedicated component and added to search results

* Switch to using MemoryManager for Streams to attempt to minimize GC pressure and reduced bitmap manipulation for transparency hack.
2021-07-24 16:17:13 -05:00

207 lines
8.4 KiB
C#

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
using API.Comparators;
using API.Entities;
using API.Entities.Enums;
using API.Extensions;
using API.Interfaces;
using API.Interfaces.Services;
using Microsoft.Extensions.Logging;
namespace API.Services
{
public class MetadataService : IMetadataService
{
private readonly IUnitOfWork _unitOfWork;
private readonly ILogger<MetadataService> _logger;
private readonly IArchiveService _archiveService;
private readonly IBookService _bookService;
private readonly IImageService _imageService;
private readonly ChapterSortComparer _chapterSortComparer = new ChapterSortComparer();
public static readonly int ThumbnailWidth = 320; // 153w x 230h
public MetadataService(IUnitOfWork unitOfWork, ILogger<MetadataService> logger,
IArchiveService archiveService, IBookService bookService, IImageService imageService)
{
_unitOfWork = unitOfWork;
_logger = logger;
_archiveService = archiveService;
_bookService = bookService;
_imageService = imageService;
}
private static bool ShouldFindCoverImage(byte[] coverImage, bool forceUpdate = false)
{
return forceUpdate || coverImage == null || !coverImage.Any();
}
private byte[] GetCoverImage(MangaFile file, bool createThumbnail = true)
{
switch (file.Format)
{
case MangaFormat.Pdf:
case MangaFormat.Epub:
return _bookService.GetCoverImage(file.FilePath, createThumbnail);
case MangaFormat.Image:
var coverImage = _imageService.GetCoverFile(file);
return _imageService.GetCoverImage(coverImage, createThumbnail);
case MangaFormat.Archive:
return _archiveService.GetCoverImage(file.FilePath, createThumbnail);
default:
return Array.Empty<byte>();
}
}
public void UpdateMetadata(Chapter chapter, bool forceUpdate)
{
var firstFile = chapter.Files.OrderBy(x => x.Chapter).FirstOrDefault();
if (ShouldFindCoverImage(chapter.CoverImage, forceUpdate) && firstFile != null && !new FileInfo(firstFile.FilePath).IsLastWriteLessThan(firstFile.LastModified))
{
chapter.Files ??= new List<MangaFile>();
chapter.CoverImage = GetCoverImage(firstFile);
}
}
public void UpdateMetadata(Volume volume, bool forceUpdate)
{
if (volume == null || !ShouldFindCoverImage(volume.CoverImage, forceUpdate)) return;
volume.Chapters ??= new List<Chapter>();
var firstChapter = volume.Chapters.OrderBy(x => double.Parse(x.Number), _chapterSortComparer).FirstOrDefault();
// Skip calculating Cover Image (I/O) if the chapter already has it set
if (firstChapter == null || ShouldFindCoverImage(firstChapter.CoverImage, forceUpdate))
{
var firstFile = firstChapter?.Files.OrderBy(x => x.Chapter).FirstOrDefault();
if (firstFile != null && !new FileInfo(firstFile.FilePath).IsLastWriteLessThan(firstFile.LastModified))
{
volume.CoverImage = GetCoverImage(firstFile);
}
}
else
{
volume.CoverImage = firstChapter.CoverImage;
}
}
public void UpdateMetadata(Series series, bool forceUpdate)
{
if (series == null) return;
if (ShouldFindCoverImage(series.CoverImage, forceUpdate))
{
series.Volumes ??= new List<Volume>();
var firstCover = series.Volumes.GetCoverImage(series.Format);
byte[] coverImage = null;
if (firstCover == null && series.Volumes.Any())
{
// If firstCover is null and one volume, the whole series is Chapters under Vol 0.
if (series.Volumes.Count == 1)
{
coverImage = series.Volumes[0].Chapters.OrderBy(c => double.Parse(c.Number), _chapterSortComparer)
.FirstOrDefault(c => !c.IsSpecial)?.CoverImage;
}
if (coverImage == null)
{
coverImage = series.Volumes[0].Chapters.OrderBy(c => double.Parse(c.Number), _chapterSortComparer)
.FirstOrDefault()?.CoverImage;
}
}
series.CoverImage = firstCover?.CoverImage ?? coverImage;
}
UpdateSeriesSummary(series, forceUpdate);
}
private void UpdateSeriesSummary(Series series, bool forceUpdate)
{
if (!string.IsNullOrEmpty(series.Summary) && !forceUpdate) return;
var isBook = series.Library.Type == LibraryType.Book;
var firstVolume = series.Volumes.FirstWithChapters(isBook);
var firstChapter = firstVolume?.Chapters.GetFirstChapterWithFiles();
var firstFile = firstChapter?.Files.FirstOrDefault();
if (firstFile == null || (!forceUpdate && !firstFile.HasFileBeenModified())) return;
if (Parser.Parser.IsPdf(firstFile.FilePath)) return;
var summary = Parser.Parser.IsEpub(firstFile.FilePath) ? _bookService.GetSummaryInfo(firstFile.FilePath) : _archiveService.GetSummaryInfo(firstFile.FilePath);
if (string.IsNullOrEmpty(series.Summary))
{
series.Summary = summary;
}
firstFile.LastModified = DateTime.Now;
}
public void RefreshMetadata(int libraryId, bool forceUpdate = false)
{
var sw = Stopwatch.StartNew();
var library = Task.Run(() => _unitOfWork.LibraryRepository.GetFullLibraryForIdAsync(libraryId)).GetAwaiter().GetResult();
// TODO: See if we can break this up into multiple threads that process 20 series at a time then save so we can reduce amount of memory used
_logger.LogInformation("Beginning metadata refresh of {LibraryName}", library.Name);
foreach (var series in library.Series)
{
foreach (var volume in series.Volumes)
{
foreach (var chapter in volume.Chapters)
{
UpdateMetadata(chapter, forceUpdate);
}
UpdateMetadata(volume, forceUpdate);
}
UpdateMetadata(series, forceUpdate);
_unitOfWork.SeriesRepository.Update(series);
}
if (_unitOfWork.HasChanges() && Task.Run(() => _unitOfWork.CommitAsync()).Result)
{
_logger.LogInformation("Updated metadata for {LibraryName} in {ElapsedMilliseconds} milliseconds", library.Name, sw.ElapsedMilliseconds);
}
}
public void RefreshMetadataForSeries(int libraryId, int seriesId)
{
var sw = Stopwatch.StartNew();
var library = Task.Run(() => _unitOfWork.LibraryRepository.GetFullLibraryForIdAsync(libraryId)).GetAwaiter().GetResult();
var series = library.Series.SingleOrDefault(s => s.Id == seriesId);
if (series == null)
{
_logger.LogError("Series {SeriesId} was not found on Library {LibraryName}", seriesId, libraryId);
return;
}
_logger.LogInformation("Beginning metadata refresh of {SeriesName}", series.Name);
foreach (var volume in series.Volumes)
{
foreach (var chapter in volume.Chapters)
{
UpdateMetadata(chapter, true);
}
UpdateMetadata(volume, true);
}
UpdateMetadata(series, true);
_unitOfWork.SeriesRepository.Update(series);
if (_unitOfWork.HasChanges() && Task.Run(() => _unitOfWork.CommitAsync()).Result)
{
_logger.LogInformation("Updated metadata for {SeriesName} in {ElapsedMilliseconds} milliseconds", series.Name, sw.ElapsedMilliseconds);
}
}
}
}