using System; using System.Collections.Concurrent; using System.Collections.Generic; using System.Diagnostics; using System.IO; using System.Linq; using System.Threading.Tasks; using API.Comparators; using API.Data; using API.Entities; using API.Entities.Enums; using API.Extensions; using API.Interfaces; using API.Interfaces.Services; using API.Parser; using Hangfire; using Microsoft.Extensions.Logging; namespace API.Services.Tasks { public class ScannerService : IScannerService { private readonly IUnitOfWork _unitOfWork; private readonly ILogger _logger; private readonly IArchiveService _archiveService; private readonly IMetadataService _metadataService; private readonly IBookService _bookService; private ConcurrentDictionary> _scannedSeries; private readonly NaturalSortComparer _naturalSort; public ScannerService(IUnitOfWork unitOfWork, ILogger logger, IArchiveService archiveService, IMetadataService metadataService, IBookService bookService) { _unitOfWork = unitOfWork; _logger = logger; _archiveService = archiveService; _metadataService = metadataService; _bookService = bookService; _naturalSort = new NaturalSortComparer(); } [DisableConcurrentExecution(timeoutInSeconds: 360)] [AutomaticRetry(Attempts = 0, OnAttemptsExceeded = AttemptsExceededAction.Delete)] public void ScanLibraries() { var libraries = Task.Run(() => _unitOfWork.LibraryRepository.GetLibrariesAsync()).Result.ToList(); foreach (var lib in libraries) { ScanLibrary(lib.Id, false); } } private bool ShouldSkipFolderScan(FolderPath folder, ref int skippedFolders) { // NOTE: The only way to skip folders is if Directory hasn't been modified, we aren't doing a forcedUpdate and version hasn't changed between scans. return false; // if (!_forceUpdate && Directory.GetLastWriteTime(folder.Path) < folder.LastScanned) // { // _logger.LogDebug("{FolderPath} hasn't been modified since last scan. Skipping", folder.Path); // skippedFolders += 1; // return true; // } //return false; } [DisableConcurrentExecution(360)] [AutomaticRetry(Attempts = 0, OnAttemptsExceeded = AttemptsExceededAction.Delete)] public void ScanLibrary(int libraryId, bool forceUpdate) { var sw = Stopwatch.StartNew(); _scannedSeries = new ConcurrentDictionary>(); Library library; try { library = Task.Run(() => _unitOfWork.LibraryRepository.GetFullLibraryForIdAsync(libraryId)).GetAwaiter().GetResult(); } catch (Exception ex) { // This usually only fails if user is not authenticated. _logger.LogError(ex, "There was an issue fetching Library {LibraryId}", libraryId); return; } var series = ScanLibrariesForSeries(forceUpdate, library, sw, out var totalFiles, out var scanElapsedTime); UpdateLibrary(library, series); _unitOfWork.LibraryRepository.Update(library); if (Task.Run(() => _unitOfWork.CommitAsync()).Result) { _logger.LogInformation("Processed {TotalFiles} files and {ParsedSeriesCount} series in {ElapsedScanTime} milliseconds for {LibraryName}", totalFiles, series.Keys.Count, sw.ElapsedMilliseconds + scanElapsedTime, library.Name); } else { _logger.LogCritical("There was a critical error that resulted in a failed scan. Please check logs and rescan"); } CleanupUserProgress(); BackgroundJob.Enqueue(() => _metadataService.RefreshMetadata(libraryId, forceUpdate)); } /// /// Remove any user progress rows that no longer exist since scan library ran and deleted series/volumes/chapters /// private void CleanupUserProgress() { var cleanedUp = Task.Run(() => _unitOfWork.AppUserProgressRepository.CleanupAbandonedChapters()).Result; _logger.LogInformation("Removed {Count} abandoned progress rows", cleanedUp); } private Dictionary> ScanLibrariesForSeries(bool forceUpdate, Library library, Stopwatch sw, out int totalFiles, out long scanElapsedTime) { _logger.LogInformation("Beginning scan on {LibraryName}. Forcing metadata update: {ForceUpdate}", library.Name, forceUpdate); totalFiles = 0; var skippedFolders = 0; foreach (var folderPath in library.Folders) { if (ShouldSkipFolderScan(folderPath, ref skippedFolders)) continue; // NOTE: we can refactor this to allow all filetypes and handle everything in the ProcessFile to allow mixed library types. var searchPattern = Parser.Parser.ArchiveFileExtensions; if (library.Type == LibraryType.Book) { searchPattern = Parser.Parser.BookFileExtensions; } try { totalFiles += DirectoryService.TraverseTreeParallelForEach(folderPath.Path, (f) => { try { ProcessFile(f, folderPath.Path, library.Type); } catch (FileNotFoundException exception) { _logger.LogError(exception, "The file {Filename} could not be found", f); } }, searchPattern, _logger); } catch (ArgumentException ex) { _logger.LogError(ex, "The directory '{FolderPath}' does not exist", folderPath.Path); } folderPath.LastScanned = DateTime.Now; } scanElapsedTime = sw.ElapsedMilliseconds; _logger.LogInformation("Folders Scanned {TotalFiles} files in {ElapsedScanTime} milliseconds", totalFiles, scanElapsedTime); sw.Restart(); if (skippedFolders == library.Folders.Count) { _logger.LogInformation("All Folders were skipped due to no modifications to the directories"); _unitOfWork.LibraryRepository.Update(library); _scannedSeries = null; _logger.LogInformation("Processed {TotalFiles} files in {ElapsedScanTime} milliseconds for {LibraryName}", totalFiles, sw.ElapsedMilliseconds, library.Name); return new Dictionary>(); } return SeriesWithInfos(_scannedSeries); } /// /// Returns any series where there were parsed infos /// /// /// private static Dictionary> SeriesWithInfos(IDictionary> scannedSeries) { var filtered = scannedSeries.Where(kvp => kvp.Value.Count > 0); var series = filtered.ToDictionary(v => v.Key, v => v.Value); return series; } private void UpdateLibrary(Library library, Dictionary> parsedSeries) { if (parsedSeries == null) throw new ArgumentNullException(nameof(parsedSeries)); // First, remove any series that are not in parsedSeries list var missingSeries = FindSeriesNotOnDisk(library.Series, parsedSeries).ToList(); library.Series = RemoveMissingSeries(library.Series, missingSeries, out var removeCount); if (removeCount > 0) { _logger.LogInformation("Removed {RemoveMissingSeries} series that are no longer on disk:", removeCount); foreach (var s in missingSeries) { _logger.LogDebug("Removed {SeriesName}", s.Name); } } // Add new series that have parsedInfos foreach (var (key, infos) in parsedSeries) { // Key is normalized already Series existingSeries; try { existingSeries = library.Series.SingleOrDefault(s => s.NormalizedName == key || Parser.Parser.Normalize(s.OriginalName) == key); } catch (Exception e) { _logger.LogCritical(e, "There are multiple series that map to normalized key {Key}. You can manually delete the entity via UI and rescan to fix it", key); var duplicateSeries = library.Series.Where(s => s.NormalizedName == key || Parser.Parser.Normalize(s.OriginalName) == key).ToList(); foreach (var series in duplicateSeries) { _logger.LogCritical("{Key} maps with {Series}", key, series.OriginalName); } continue; } if (existingSeries == null) { existingSeries = DbFactory.Series(infos[0].Series); library.Series.Add(existingSeries); } existingSeries.NormalizedName = Parser.Parser.Normalize(existingSeries.Name); existingSeries.OriginalName ??= infos[0].Series; existingSeries.Metadata ??= DbFactory.SeriesMetadata(new List()); } // Now, we only have to deal with series that exist on disk. Let's recalculate the volumes for each series var librarySeries = library.Series.ToList(); Parallel.ForEach(librarySeries, (series) => { try { _logger.LogInformation("Processing series {SeriesName}", series.OriginalName); UpdateVolumes(series, parsedSeries[Parser.Parser.Normalize(series.OriginalName)].ToArray()); series.Pages = series.Volumes.Sum(v => v.Pages); // Test } catch (Exception ex) { _logger.LogError(ex, "There was an exception updating volumes for {SeriesName}", series.Name); } }); } public IEnumerable FindSeriesNotOnDisk(ICollection existingSeries, Dictionary> parsedSeries) { var foundSeries = parsedSeries.Select(s => s.Key).ToList(); return existingSeries.Where(es => !es.NameInList(foundSeries)); } /// /// Removes all instances of missingSeries' Series from existingSeries Collection. Existing series is updated by /// reference and the removed element count is returned. /// /// Existing Series in DB /// Series not found on disk or can't be parsed /// /// the updated existingSeries public static ICollection RemoveMissingSeries(ICollection existingSeries, IEnumerable missingSeries, out int removeCount) { var existingCount = existingSeries.Count; var missingList = missingSeries.ToList(); existingSeries = existingSeries.Where( s => !missingList.Exists( m => m.NormalizedName.Equals(s.NormalizedName))).ToList(); removeCount = existingCount - existingSeries.Count; return existingSeries; } private void UpdateVolumes(Series series, ParserInfo[] parsedInfos) { var startingVolumeCount = series.Volumes.Count; // Add new volumes and update chapters per volume var distinctVolumes = parsedInfos.DistinctVolumes(); _logger.LogDebug("Updating {DistinctVolumes} volumes on {SeriesName}", distinctVolumes.Count, series.Name); foreach (var volumeNumber in distinctVolumes) { var volume = series.Volumes.SingleOrDefault(s => s.Name == volumeNumber); if (volume == null) { volume = DbFactory.Volume(volumeNumber); series.Volumes.Add(volume); } // NOTE: Instead of creating and adding? Why Not Merge a new volume into an existing, so no matter what, new properties,etc get propagated? _logger.LogDebug("Parsing {SeriesName} - Volume {VolumeNumber}", series.Name, volume.Name); var infos = parsedInfos.Where(p => p.Volumes == volumeNumber).ToArray(); UpdateChapters(volume, infos); volume.Pages = volume.Chapters.Sum(c => c.Pages); } // Remove existing volumes that aren't in parsedInfos var nonDeletedVolumes = series.Volumes.Where(v => parsedInfos.Select(p => p.Volumes).Contains(v.Name)).ToList(); if (series.Volumes.Count != nonDeletedVolumes.Count) { _logger.LogDebug("Removed {Count} volumes from {SeriesName} where parsed infos were not mapping with volume name", (series.Volumes.Count - nonDeletedVolumes.Count), series.Name); var deletedVolumes = series.Volumes.Except(nonDeletedVolumes); foreach (var volume in deletedVolumes) { var file = volume.Chapters.FirstOrDefault()?.Files.FirstOrDefault()?.FilePath ?? "no files"; if (new FileInfo(file).Exists) { _logger.LogError("Volume cleanup code was trying to remove a volume with a file still existing on disk. File: {File}", file); } _logger.LogDebug("Removed {SeriesName} - Volume {Volume}: {File}", series.Name, volume.Name, file); } series.Volumes = nonDeletedVolumes; } _logger.LogDebug("Updated {SeriesName} volumes from {StartingVolumeCount} to {VolumeCount}", series.Name, startingVolumeCount, series.Volumes.Count); } /// /// /// /// /// private void UpdateChapters(Volume volume, ParserInfo[] parsedInfos) { // Add new chapters foreach (var info in parsedInfos) { // Specials go into their own chapters with Range being their filename and IsSpecial = True. Non-Specials with Vol and Chap as 0 // also are treated like specials for UI grouping. Chapter chapter; try { chapter = volume.Chapters.GetChapterByRange(info); } catch (Exception ex) { _logger.LogError(ex, "{FileName} mapped as '{Series} - Vol {Volume} Ch {Chapter}' is a duplicate, skipping", info.FullFilePath, info.Series, info.Volumes, info.Chapters); continue; } if (chapter == null) { _logger.LogDebug( "Adding new chapter, {Series} - Vol {Volume} Ch {Chapter}", info.Series, info.Volumes, info.Chapters); volume.Chapters.Add(DbFactory.Chapter(info)); } else { chapter.UpdateFrom(info); } } // Add files foreach (var info in parsedInfos) { var specialTreatment = info.IsSpecialInfo(); Chapter chapter; try { chapter = volume.Chapters.GetChapterByRange(info); } catch (Exception ex) { _logger.LogError(ex, "There was an exception parsing chapter. Skipping {SeriesName} Vol {VolumeNumber} Chapter {ChapterNumber} - Special treatment: {NeedsSpecialTreatment}", info.Series, volume.Name, info.Chapters, specialTreatment); continue; } if (chapter == null) continue; AddOrUpdateFileForChapter(chapter, info); chapter.Number = Parser.Parser.MinimumNumberFromRange(info.Chapters) + string.Empty; chapter.Range = specialTreatment ? info.Filename : info.Chapters; } // Remove chapters that aren't in parsedInfos or have no files linked var existingChapters = volume.Chapters.ToList(); foreach (var existingChapter in existingChapters) { if (existingChapter.Files.Count == 0 || !parsedInfos.HasInfo(existingChapter)) { _logger.LogDebug("Removed chapter {Chapter} for Volume {VolumeNumber} on {SeriesName}", existingChapter.Range, volume.Name, parsedInfos[0].Series); volume.Chapters.Remove(existingChapter); } else { // Ensure we remove any files that no longer exist AND order existingChapter.Files = existingChapter.Files .Where(f => parsedInfos.Any(p => p.FullFilePath == f.FilePath)) .OrderBy(f => f.FilePath, _naturalSort).ToList(); existingChapter.Pages = existingChapter.Files.Sum(f => f.Pages); } } } /// /// Attempts to either add a new instance of a show mapping to the _scannedSeries bag or adds to an existing. /// /// private void TrackSeries(ParserInfo info) { if (info.Series == string.Empty) return; // Check if normalized info.Series already exists and if so, update info to use that name instead info.Series = MergeName(_scannedSeries, info); _scannedSeries.AddOrUpdate(Parser.Parser.Normalize(info.Series), new List() {info}, (_, oldValue) => { oldValue ??= new List(); if (!oldValue.Contains(info)) { oldValue.Add(info); } return oldValue; }); } public string MergeName(ConcurrentDictionary> collectedSeries, ParserInfo info) { var normalizedSeries = Parser.Parser.Normalize(info.Series); _logger.LogDebug("Checking if we can merge {NormalizedSeries}", normalizedSeries); var existingName = collectedSeries.SingleOrDefault(p => Parser.Parser.Normalize(p.Key) == normalizedSeries) .Key; // BUG: We are comparing info.Series against a normalized string. They should never match. (This can cause series to not delete or parse correctly after a rename) if (!string.IsNullOrEmpty(existingName)) // && info.Series != existingName { _logger.LogDebug("Found duplicate parsed infos, merged {Original} into {Merged}", info.Series, existingName); return existingName; } return info.Series; } /// /// Processes files found during a library scan. /// Populates a collection of for DB updates later. /// /// Path of a file /// /// Library type to determine parsing to perform private void ProcessFile(string path, string rootPath, LibraryType type) { ParserInfo info; if (type == LibraryType.Book && Parser.Parser.IsEpub(path)) { info = _bookService.ParseInfo(path); } else { info = Parser.Parser.Parse(path, rootPath, type); } if (info == null) { _logger.LogWarning("[Scanner] Could not parse series from {Path}", path); return; } if (type == LibraryType.Book && Parser.Parser.IsEpub(path) && Parser.Parser.ParseVolume(info.Series) != Parser.Parser.DefaultVolume) { info = Parser.Parser.Parse(path, rootPath, type); var info2 = _bookService.ParseInfo(path); info.Merge(info2); } TrackSeries(info); } private MangaFile CreateMangaFile(ParserInfo info) { switch (info.Format) { case MangaFormat.Archive: { return new MangaFile() { FilePath = info.FullFilePath, Format = info.Format, Pages = _archiveService.GetNumberOfPagesFromArchive(info.FullFilePath) }; } case MangaFormat.Book: { return new MangaFile() { FilePath = info.FullFilePath, Format = info.Format, Pages = _bookService.GetNumberOfPages(info.FullFilePath) }; } default: _logger.LogWarning("[Scanner] Ignoring {Filename}. Non-archives are not supported", info.Filename); break; } return null; } private void AddOrUpdateFileForChapter(Chapter chapter, ParserInfo info) { chapter.Files ??= new List(); var existingFile = chapter.Files.SingleOrDefault(f => f.FilePath == info.FullFilePath); if (existingFile != null) { existingFile.Format = info.Format; if (!existingFile.HasFileBeenModified() && existingFile.Pages > 0) { existingFile.Pages = existingFile.Format == MangaFormat.Book ? _bookService.GetNumberOfPages(info.FullFilePath) : _archiveService.GetNumberOfPagesFromArchive(info.FullFilePath); } } else { var file = CreateMangaFile(info); if (file != null) { chapter.Files.Add(file); existingFile = chapter.Files.Last(); } } if (existingFile != null) { existingFile.LastModified = new FileInfo(existingFile.FilePath).LastWriteTime; } } } }