Kavita/API/Services/MetadataService.cs
Joseph Milazzo a01613f80f
EPUB Support (#178)
* Added book filetype detection and reorganized tests due to size of file

* Added ability to get basic Parse Info from Book and Pages.

* We can now scan books and get them in a library with cover images.

* Take the first image in the epub if the cover isn't set.

* Implemented the ability to unzip the ebup to cache. Implemented a test api to load html files.

* Just some test code to figure out how to approach this.

* Fixed some merge conflicts

* Removed some dead code from merge

* Snapshot: I can now load everything properly into the UI by rewriting the urls before I send them back. I don't notice any lag from this method. It can be optimized further.

* Implemented a way to load the content in the browser not via an iframe.

* Added a note

* Anchor mappings is complete. New anchors are updated so references now resolve to javascript:void() for UI to take care of internally loading and the appropriate page is mapped to it. Anchors that are external have target="_blank" added so they don't force you out of the app and styles are of course inlined.

* Oops i need this

* Table of contents api implemented (rough) and some small enhancements to codebase for books.

* GetBookPageResources now only loads files from within the book. Nested chapter list support and images now use html parsing instead of string parsing.

* Fonts now are remapped to load from endpoint.

* book-resources now uses a key, ensuring the file is in proper format for lookup. Changed chapter list based on structure with one HEADER and nested chapters.

* Properly handle svg resource requests and when there are part anchors that are clickable, make sure we handle them in the UI by adding a kavita-page handler.

* Add Chapter group page even if one isn't set by using first page (without part) from nestedChildren.

* Added extra debug code for issue #163.

* Added new user preferences for books and updated the css so we scope it to our reading section.

* Cleaned up style code

* Implemented ability to save book preferences and some cleanup on existing apis.

* Added an api for checking if a user has read something in a library type before.

* Forgot to make sure the has reading progress is against a user lol.

* Remove cacheservice code for books, sine we use an in-memory method

* Handle svg images as well

* Enhanced cover image extraction to check for a "cover" image if the cover image wasn't set in OPF before falling back to the first image.

* Fixed an issue with special books not properly generating metadata due to not having filename set.

* Cleanup, removed warmup task code from statup/program and changed taskscheduler to schedule tasks on startup only (or if tasks are changed from UI).

* Code cleanup

* Code cleanup

* So much code. Lots of refactors to try to test scanner service. Moved a lot of the queries into Extensions to allow to easier test, even though it's hacky. Support @font-face src:url swaps with ' and ". Source summary information from epubs.

* Well...baseURL needs to come from BE and not from UI lol.

* Adjusted migrations so default values match Entity

* Removed comment

* I think I finally fixed #163! The issue was that when i checked if it had a parserInfo, i wasn't considering that the chapter range might have a - in it (0-6) and so when the code to check if range could parse out a number failed, it treated it like a special and checked range against info's filename.

* Some bugfixes

* Lots of testing, extracting code to make it easier to test. This code is buggy, but fixed a bug where 1) If we changed the normalization code, we would remove the whole db during a scan and 2) We weren't actually removing series properly.

Other than that, code is being extracted to remove duplication and centralize logic.

* More code cleanup and test cleanup to ensure scan loop is working as expected and matches expectaions from tests.

* Cleaned up the code and made it so if I change normalization, which I do in this branch, it wont break existing DBs.

* Some comic parser changes for partial chapter support.

* Added some code for directory service and scanner service along with python code to generate test files (not used yet). Fixed up all the tests.

* Code smells
2021-04-28 16:16:22 -05:00

164 lines
6.3 KiB
C#

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
using API.Entities;
using API.Entities.Enums;
using API.Entities.Interfaces;
using API.Extensions;
using API.Interfaces;
using API.Interfaces.Services;
using Microsoft.Extensions.Logging;
namespace API.Services
{
public class MetadataService : IMetadataService
{
private readonly IUnitOfWork _unitOfWork;
private readonly ILogger<MetadataService> _logger;
private readonly IArchiveService _archiveService;
private readonly IBookService _bookService;
public MetadataService(IUnitOfWork unitOfWork, ILogger<MetadataService> logger, IArchiveService archiveService, IBookService bookService)
{
_unitOfWork = unitOfWork;
_logger = logger;
_archiveService = archiveService;
_bookService = bookService;
}
private static bool ShouldFindCoverImage(byte[] coverImage, bool forceUpdate = false)
{
return forceUpdate || coverImage == null || !coverImage.Any();
}
private byte[] GetCoverImage(MangaFile file, bool createThumbnail = true)
{
if (file.Format == MangaFormat.Book)
{
return _bookService.GetCoverImage(file.FilePath, createThumbnail);
}
else
{
return _archiveService.GetCoverImage(file.FilePath, createThumbnail);
}
}
public void UpdateMetadata(Chapter chapter, bool forceUpdate)
{
var firstFile = chapter.Files.OrderBy(x => x.Chapter).FirstOrDefault();
if (ShouldFindCoverImage(chapter.CoverImage, forceUpdate) && firstFile != null && !new FileInfo(firstFile.FilePath).IsLastWriteLessThan(firstFile.LastModified))
{
chapter.Files ??= new List<MangaFile>();
chapter.CoverImage = GetCoverImage(firstFile);
}
}
public void UpdateMetadata(Volume volume, bool forceUpdate)
{
if (volume != null && ShouldFindCoverImage(volume.CoverImage, forceUpdate))
{
// TODO: Replace this with ChapterSortComparator
volume.Chapters ??= new List<Chapter>();
var firstChapter = volume.Chapters.OrderBy(x => double.Parse(x.Number)).FirstOrDefault();
// Skip calculating Cover Image (I/O) if the chapter already has it set
if (firstChapter == null || ShouldFindCoverImage(firstChapter.CoverImage))
{
var firstFile = firstChapter?.Files.OrderBy(x => x.Chapter).FirstOrDefault();
if (firstFile != null && !new FileInfo(firstFile.FilePath).IsLastWriteLessThan(firstFile.LastModified))
{
volume.CoverImage = GetCoverImage(firstFile);
}
}
else
{
volume.CoverImage = firstChapter.CoverImage;
}
}
}
public void UpdateMetadata(Series series, bool forceUpdate)
{
// TODO: Use new ChapterSortComparer() here instead
if (series == null) return;
if (ShouldFindCoverImage(series.CoverImage, forceUpdate))
{
series.Volumes ??= new List<Volume>();
var firstCover = series.Volumes.GetCoverImage(series.Library.Type);
byte[] coverImage = null;
if (firstCover == null && series.Volumes.Any())
{
// If firstCover is null and one volume, the whole series is Chapters under Vol 0.
if (series.Volumes.Count == 1)
{
coverImage = series.Volumes[0].Chapters.OrderBy(c => double.Parse(c.Number))
.FirstOrDefault(c => !c.IsSpecial)?.CoverImage;
}
if (coverImage == null)
{
coverImage = series.Volumes[0].Chapters.OrderBy(c => double.Parse(c.Number))
.FirstOrDefault()?.CoverImage;
}
}
series.CoverImage = firstCover?.CoverImage ?? coverImage;
}
UpdateSeriesSummary(series, forceUpdate);
}
private void UpdateSeriesSummary(Series series, bool forceUpdate)
{
if (!string.IsNullOrEmpty(series.Summary) && !forceUpdate) return;
var isBook = series.Library.Type == LibraryType.Book;
var firstVolume = series.Volumes.FirstWithChapters(isBook);
var firstChapter = firstVolume?.Chapters.GetFirstChapterWithFiles();
// NOTE: This suffers from code changes not taking effect due to stale data
var firstFile = firstChapter?.Files.FirstOrDefault();
if (firstFile != null &&
(forceUpdate || !firstFile.HasFileBeenModified()))
{
series.Summary = isBook ? _bookService.GetSummaryInfo(firstFile.FilePath) : _archiveService.GetSummaryInfo(firstFile.FilePath);
firstFile.LastModified = DateTime.Now;
}
}
public void RefreshMetadata(int libraryId, bool forceUpdate = false)
{
var sw = Stopwatch.StartNew();
var library = Task.Run(() => _unitOfWork.LibraryRepository.GetFullLibraryForIdAsync(libraryId)).GetAwaiter().GetResult();
// TODO: See if we can break this up into multiple threads that process 20 series at a time then save so we can reduce amount of memory used
_logger.LogInformation("Beginning metadata refresh of {LibraryName}", library.Name);
foreach (var series in library.Series)
{
foreach (var volume in series.Volumes)
{
foreach (var chapter in volume.Chapters)
{
UpdateMetadata(chapter, forceUpdate);
}
UpdateMetadata(volume, forceUpdate);
}
UpdateMetadata(series, forceUpdate);
_unitOfWork.SeriesRepository.Update(series);
}
if (_unitOfWork.HasChanges() && Task.Run(() => _unitOfWork.Complete()).Result)
{
_logger.LogInformation("Updated metadata for {LibraryName} in {ElapsedMilliseconds} milliseconds", library.Name, sw.ElapsedMilliseconds);
}
}
}
}