Kavita/API/Services/ScannerService.cs

using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Threading.Tasks;
using API.Entities;
using API.Entities.Enums;
using API.Interfaces;
using API.Interfaces.Services;
using API.Parser;
using Hangfire;
using Microsoft.Extensions.Logging;

[assembly: InternalsVisibleTo("API.Tests")]
namespace API.Services
{
    public class ScannerService : IScannerService
    {
       private readonly IUnitOfWork _unitOfWork;
       private readonly ILogger<ScannerService> _logger;
       private readonly IArchiveService _archiveService;
       private readonly IMetadataService _metadataService;
       private ConcurrentDictionary<string, List<ParserInfo>> _scannedSeries;
       private bool _forceUpdate;
       private readonly TextInfo _textInfo = new CultureInfo("en-US", false).TextInfo;

       public ScannerService(IUnitOfWork unitOfWork, ILogger<ScannerService> logger, IArchiveService archiveService,
          IMetadataService metadataService)
       {
          _unitOfWork = unitOfWork;
          _logger = logger;
          _archiveService = archiveService;
          _metadataService = metadataService;
       }

       [DisableConcurrentExecution(timeoutInSeconds: 120)]
       public void ScanLibraries()
       {
          var libraries = Task.Run(() => _unitOfWork.LibraryRepository.GetLibrariesAsync()).Result.ToList();
          foreach (var lib in libraries)
          {
             ScanLibrary(lib.Id, false);
          }
       }

       private bool ShouldSkipFolderScan(FolderPath folder, ref int skippedFolders)
       {
          // NOTE: This solution isn't the best, but it has potential. We need to handle a few other cases so it works great.
          return false;

          // if (/*_environment.IsProduction() && */!_forceUpdate && Directory.GetLastWriteTime(folder.Path) < folder.LastScanned)
          // {
          //    _logger.LogDebug($"{folder.Path} hasn't been updated since last scan. Skipping.");
          //    skippedFolders += 1;
          //    return true;
          // }
          //
          // return false;
       }

       private void Cleanup()
       {
          _scannedSeries = null;
       }

       [DisableConcurrentExecution(timeoutInSeconds: 120)]
       public void ScanLibrary(int libraryId, bool forceUpdate)
       {
          _forceUpdate = forceUpdate;
          var sw = Stopwatch.StartNew();
          Cleanup();
           Library library;
           try
           {
              library = Task.Run(() => _unitOfWork.LibraryRepository.GetLibraryForIdAsync(libraryId)).Result;
           }
           catch (Exception ex)
           {
              // This usually only fails if user is not authenticated.
              _logger.LogError($"There was an issue fetching Library {libraryId}.", ex);
              return;
           }

           _scannedSeries = new ConcurrentDictionary<string, List<ParserInfo>>();
           _logger.LogInformation($"Beginning scan on {library.Name}. Forcing metadata update: {forceUpdate}");

           var totalFiles = 0;
           var skippedFolders = 0;
           foreach (var folderPath in library.Folders)
           {
              if (ShouldSkipFolderScan(folderPath, ref skippedFolders)) continue;

              try {
                 totalFiles += DirectoryService.TraverseTreeParallelForEach(folderPath.Path, (f) =>
                 {
                    try
                    {
                       ProcessFile(f, folderPath.Path);
                    }
                    catch (FileNotFoundException exception)
                    {
                       _logger.LogError(exception, $"The file {f} could not be found");
                    }
                 });
              }
              catch (ArgumentException ex) {
                 _logger.LogError(ex, $"The directory '{folderPath.Path}' does not exist");
              }

              folderPath.LastScanned = DateTime.Now;
           }

           var scanElapsedTime = sw.ElapsedMilliseconds;
           _logger.LogInformation("Folders Scanned {0} files in {1} milliseconds", totalFiles, scanElapsedTime);
           sw.Restart();
           if (skippedFolders == library.Folders.Count)
           {
              _logger.LogInformation("All Folders were skipped due to no modifications to the directories.");
              _unitOfWork.LibraryRepository.Update(library);
              _logger.LogInformation("Processed {0} files in {1} milliseconds for {2}", totalFiles, sw.ElapsedMilliseconds, library.Name);
              Cleanup();
              return;
           }

           // Remove any series where there were no parsed infos
           var filtered = _scannedSeries.Where(kvp => kvp.Value.Count != 0);
           var series = filtered.ToDictionary(v => v.Key, v => v.Value);

           UpdateLibrary(libraryId, series, library);
           _unitOfWork.LibraryRepository.Update(library);

           if (Task.Run(() => _unitOfWork.Complete()).Result)
           {

              _logger.LogInformation($"Scan completed on {library.Name}. Parsed {series.Keys.Count} series in {sw.ElapsedMilliseconds} ms.");
           }
           else
           {
              _logger.LogError("There was a critical error that resulted in a failed scan. Please check logs and rescan.");
           }

           _logger.LogInformation("Processed {0} files in {1} milliseconds for {2}", totalFiles, sw.ElapsedMilliseconds + scanElapsedTime, library.Name);
       }

       private void UpdateLibrary(int libraryId, Dictionary<string, List<ParserInfo>> parsedSeries, Library library)
       {
          var allSeries = Task.Run(() => _unitOfWork.SeriesRepository.GetSeriesForLibraryIdAsync(libraryId)).Result.ToList();

          _logger.LogInformation($"Updating Library {library.Name}");
          // Perform DB activities
          UpsertSeries(library, parsedSeries, allSeries);

          // Remove series that are no longer on disk
          RemoveSeriesNotOnDisk(allSeries, parsedSeries, library);

          foreach (var folder in library.Folders) folder.LastScanned = DateTime.Now;
       }

       protected internal void UpsertSeries(Library library, Dictionary<string, List<ParserInfo>> parsedSeries,
          List<Series> allSeries)
       {
          // NOTE: This is a great point to break the parsing into threads and join back. Each thread can take X series.
          var foundSeries = parsedSeries.Keys.ToList();
          _logger.LogDebug($"Found {foundSeries} series.");
          foreach (var seriesKey in parsedSeries.Keys)
          {
             try
             {
                var mangaSeries = ExistingOrDefault(library, allSeries, seriesKey) ?? new Series
                {
                   Name = seriesKey, // NOTE: Should I apply Title casing here
                   OriginalName = seriesKey,
                   NormalizedName = Parser.Parser.Normalize(seriesKey),
                   SortName = seriesKey,
                   Summary = ""
                };
                mangaSeries.NormalizedName = Parser.Parser.Normalize(mangaSeries.Name);


                UpdateSeries(ref mangaSeries, parsedSeries[seriesKey].ToArray());
                if (library.Series.Any(s => Parser.Parser.Normalize(s.Name) == mangaSeries.NormalizedName)) continue;
                _logger.LogInformation($"Added series {mangaSeries.Name}");
                library.Series.Add(mangaSeries);

             }
             catch (Exception ex)
             {
                _logger.LogError(ex, $"There was an error during scanning of library. {seriesKey} will be skipped.");
             }
          }
       }

       private string ToTitleCase(string str)
       {
          return _textInfo.ToTitleCase(str);
       }

       private void RemoveSeriesNotOnDisk(IEnumerable<Series> allSeries, Dictionary<string, List<ParserInfo>> series, Library library)
       {
          _logger.LogInformation("Removing any series that are no longer on disk.");
          var count = 0;
          var foundSeries = series.Select(s => Parser.Parser.Normalize(s.Key)).ToList();
          var missingSeries = allSeries.Where(existingSeries =>
             !foundSeries.Contains(existingSeries.NormalizedName) || !series.ContainsKey(existingSeries.Name) ||
             !series.ContainsKey(existingSeries.OriginalName));
          foreach (var existingSeries in missingSeries)
          {
             // Delete series, there is no file to backup any longer.
             library.Series?.Remove(existingSeries);
             count++;
          }
          _logger.LogInformation($"Removed {count} series that are no longer on disk");
       }


       /// <summary>
       /// Attempts to either add a new instance of a show mapping to the scannedSeries bag or adds to an existing.
       /// </summary>
       /// <param name="info"></param>
       private void TrackSeries(ParserInfo info)
       {
          if (info.Series == string.Empty) return;

          _scannedSeries.AddOrUpdate(info.Series, new List<ParserInfo>() {info}, (_, oldValue) =>
          {
             oldValue ??= new List<ParserInfo>();
             if (!oldValue.Contains(info))
             {
                oldValue.Add(info);
             }

             return oldValue;
          });
       }

       /// <summary>
       /// Processes files found during a library scan.
       /// Populates a collection of <see cref="ParserInfo"/> for DB updates later.
       /// </summary>
       /// <param name="path">Path of a file</param>
       /// <param name="rootPath"></param>
       private void ProcessFile(string path, string rootPath)
       {
          var info = Parser.Parser.Parse(path, rootPath);

          if (info == null)
          {
             _logger.LogWarning($"Could not parse from {path}");
             return;
          }

          TrackSeries(info);
       }

       private void UpdateSeries(ref Series series, ParserInfo[] infos)
       {
          _logger.LogInformation($"Updating entries for {series.Name}. {infos.Length} related files.");

          UpdateVolumes(series, infos);
          series.Pages = series.Volumes.Sum(v => v.Pages);

          _metadataService.UpdateMetadata(series, _forceUpdate);
          _logger.LogDebug($"Created {series.Volumes.Count} volumes on {series.Name}");
       }

       private MangaFile CreateMangaFile(ParserInfo info)
       {
          return new MangaFile()
          {
             FilePath = info.FullFilePath,
             Format = info.Format,
             NumberOfPages = info.Format == MangaFormat.Archive ? _archiveService.GetNumberOfPagesFromArchive(info.FullFilePath): 1
          };
       }


       private void UpdateChapters(Volume volume, IList<Chapter> existingChapters, IEnumerable<ParserInfo> infos)
       {
          volume.Chapters = new List<Chapter>();
          var justVolumeInfos = infos.Where(pi => pi.Volumes == volume.Name).ToArray();
          foreach (var info in justVolumeInfos)
          {
             try
             {
                var chapter = existingChapters.SingleOrDefault(c => c.Range == info.Chapters) ??
                              new Chapter()
                              {
                                 Number = Parser.Parser.MinimumNumberFromRange(info.Chapters) + "",
                                 Range = info.Chapters,
                              };

                AddOrUpdateFileForChapter(chapter, info);
                chapter.Number = Parser.Parser.MinimumNumberFromRange(info.Chapters) + "";
                chapter.Range = info.Chapters;

                if (volume.Chapters.All(c => c.Range != info.Chapters))
                {
                   volume.Chapters.Add(chapter);
                }
             }
             catch (Exception ex)
             {
                _logger.LogWarning(ex, $"There was an exception parsing {info.Series} - Volume {volume.Number}'s chapters. Skipping Chapter.");
             }
          }

          foreach (var chapter in volume.Chapters)
          {
             chapter.Pages = chapter.Files.Sum(f => f.NumberOfPages);

             _metadataService.UpdateMetadata(chapter, _forceUpdate);
          }
       }

       private void AddOrUpdateFileForChapter(Chapter chapter, ParserInfo info)
       {
          chapter.Files ??= new List<MangaFile>();
          var existingFile = chapter.Files.SingleOrDefault(f => f.FilePath == info.FullFilePath);
          if (existingFile != null)
          {
             existingFile.Format = info.Format;
             existingFile.NumberOfPages = _archiveService.GetNumberOfPagesFromArchive(info.FullFilePath);
          }
          else
          {
             if (info.Format == MangaFormat.Archive)
             {
                chapter.Files.Add(CreateMangaFile(info));
             }
             else
             {
                _logger.LogDebug($"Ignoring {info.Filename}. Non-archives are not supported yet.");
             }
          }
       }

       public static Volume ExistingOrDefault(IList<Volume> existingVolumes, ICollection<Volume> volumes, string volumeName)
       {
          return volumes.SingleOrDefault(v => v.Name == volumeName) ?? existingVolumes.SingleOrDefault(v => v.Name == volumeName);
       }

       public static Series ExistingOrDefault(Library library, IEnumerable<Series> allSeries, string seriesName)
       {
          var name = Parser.Parser.Normalize(seriesName);
          library.Series ??= new List<Series>();
          return library.Series.SingleOrDefault(s => Parser.Parser.Normalize(s.Name) == name) ??
                 allSeries.SingleOrDefault(s => Parser.Parser.Normalize(s.Name) == name);
       }


       private void UpdateVolumes(Series series, ParserInfo[] infos)
       {
          series.Volumes ??= new List<Volume>();
          _logger.LogDebug($"Updating Volumes for {series.Name}. {infos.Length} related files.");
          var existingVolumes = _unitOfWork.SeriesRepository.GetVolumes(series.Id).ToList();

          foreach (var info in infos)
          {
             try
             {
                var volume = ExistingOrDefault(existingVolumes, series.Volumes, info.Volumes) ?? new Volume
                {
                   Name = info.Volumes,
                   Number = (int) Parser.Parser.MinimumNumberFromRange(info.Volumes),
                   IsSpecial = false,
                   Chapters = new List<Chapter>()
                };

                if (series.Volumes.Any(v => v.Name == volume.Name)) continue;
                series.Volumes.Add(volume);

             }
             catch (Exception ex)
             {
                _logger.LogError(ex, $"There was an exception when creating volume {info.Volumes}. Skipping volume.");
             }
          }

          foreach (var volume in series.Volumes)
          {
             _logger.LogInformation($"Processing {series.Name} - Volume {volume.Name}");
             try
             {
                UpdateChapters(volume, volume.Chapters, infos);
                volume.Pages = volume.Chapters.Sum(c => c.Pages);
                // BUG: This code does not remove chapters that no longer exist! This means leftover chapters exist when not on disk.

                _logger.LogDebug($"Created {volume.Chapters.Count} chapters");
             } catch (Exception ex)
             {
                _logger.LogError(ex, $"There was an exception when creating volume {volume.Name}. Skipping volume.");
             }
          }

          foreach (var volume in series.Volumes)
          {
             _metadataService.UpdateMetadata(volume, _forceUpdate);
          }
       }
    }
}