using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using API.Entities.Enums;
using API.Extensions;
using API.Parser;
using API.SignalR;
using Microsoft.Extensions.Logging;
namespace API.Services.Tasks.Scanner
{
public class ParsedSeries
{
///
/// Name of the Series
///
public string Name { get; init; }
///
/// Normalized Name of the Series
///
public string NormalizedName { get; init; }
///
/// Format of the Series
///
public MangaFormat Format { get; init; }
}
public enum Modified
{
Modified = 1,
NotModified = 2
}
public class SeriesModified
{
public string FolderPath { get; set; }
public string SeriesName { get; set; }
public DateTime LastScanned { get; set; }
public MangaFormat Format { get; set; }
}
public class ParseScannedFiles
{
private readonly ILogger _logger;
private readonly IDirectoryService _directoryService;
private readonly IReadingItemService _readingItemService;
private readonly IEventHub _eventHub;
///
/// An instance of a pipeline for processing files and returning a Map of Series -> ParserInfos.
/// Each instance is separate from other threads, allowing for no cross over.
///
/// Logger of the parent class that invokes this
/// Directory Service
/// ReadingItemService Service for extracting information on a number of formats
/// For firing off SignalR events
public ParseScannedFiles(ILogger logger, IDirectoryService directoryService,
IReadingItemService readingItemService, IEventHub eventHub)
{
_logger = logger;
_directoryService = directoryService;
_readingItemService = readingItemService;
_eventHub = eventHub;
}
///
/// This will Scan all files in a folder path. For each folder within the folderPath, FolderAction will be invoked for all files contained
///
/// Scan directory by directory and for each, call folderAction
/// A library folder or series folder
/// A callback async Task to be called once all files for each folder path are found
/// If we should bypass any folder last write time checks on the scan and force I/O
public async Task ProcessFiles(string folderPath, bool scanDirectoryByDirectory,
IDictionary> seriesPaths, Func, string,Task> folderAction, bool forceCheck = false)
{
string normalizedPath;
if (scanDirectoryByDirectory)
{
var directories = _directoryService.GetDirectories(folderPath).ToList();
foreach (var directory in directories)
{
normalizedPath = Parser.Parser.NormalizePath(directory);
if (HasSeriesFolderNotChangedSinceLastScan(seriesPaths, normalizedPath, forceCheck))
{
await folderAction(new List(), directory);
}
else
{
// For a scan, this is doing everything in the directory loop before the folder Action is called...which leads to no progress indication
await folderAction(_directoryService.ScanFiles(directory), directory);
}
}
return;
}
normalizedPath = Parser.Parser.NormalizePath(folderPath);
if (HasSeriesFolderNotChangedSinceLastScan(seriesPaths, normalizedPath, forceCheck))
{
await folderAction(new List(), folderPath);
return;
}
await folderAction(_directoryService.ScanFiles(folderPath), folderPath);
}
///
/// Attempts to either add a new instance of a show mapping to the _scannedSeries bag or adds to an existing.
/// This will check if the name matches an existing series name (multiple fields)
///
/// A localized list of a series' parsed infos
///
private void TrackSeries(ConcurrentDictionary> scannedSeries, ParserInfo info)
{
if (info.Series == string.Empty) return;
// Check if normalized info.Series already exists and if so, update info to use that name instead
info.Series = MergeName(scannedSeries, info);
var normalizedSeries = Parser.Parser.Normalize(info.Series);
var normalizedSortSeries = Parser.Parser.Normalize(info.SeriesSort);
var normalizedLocalizedSeries = Parser.Parser.Normalize(info.LocalizedSeries);
try
{
var existingKey = scannedSeries.Keys.SingleOrDefault(ps =>
ps.Format == info.Format && (ps.NormalizedName.Equals(normalizedSeries)
|| ps.NormalizedName.Equals(normalizedLocalizedSeries)
|| ps.NormalizedName.Equals(normalizedSortSeries)));
existingKey ??= new ParsedSeries()
{
Format = info.Format,
Name = info.Series,
NormalizedName = normalizedSeries
};
scannedSeries.AddOrUpdate(existingKey, new List() {info}, (_, oldValue) =>
{
oldValue ??= new List();
if (!oldValue.Contains(info))
{
oldValue.Add(info);
}
return oldValue;
});
}
catch (Exception ex)
{
_logger.LogCritical(ex, "{SeriesName} matches against multiple series in the parsed series. This indicates a critical kavita issue. Key will be skipped", info.Series);
foreach (var seriesKey in scannedSeries.Keys.Where(ps =>
ps.Format == info.Format && (ps.NormalizedName.Equals(normalizedSeries)
|| ps.NormalizedName.Equals(normalizedLocalizedSeries)
|| ps.NormalizedName.Equals(normalizedSortSeries))))
{
_logger.LogCritical("Matches: {SeriesName} matches on {SeriesKey}", info.Series, seriesKey.Name);
}
}
}
///
/// Using a normalized name from the passed ParserInfo, this checks against all found series so far and if an existing one exists with
/// same normalized name, it merges into the existing one. This is important as some manga may have a slight difference with punctuation or capitalization.
///
///
/// Series Name to group this info into
private string MergeName(ConcurrentDictionary> scannedSeries, ParserInfo info)
{
var normalizedSeries = Parser.Parser.Normalize(info.Series);
var normalizedLocalSeries = Parser.Parser.Normalize(info.LocalizedSeries);
try
{
var existingName =
scannedSeries.SingleOrDefault(p =>
(Parser.Parser.Normalize(p.Key.NormalizedName).Equals(normalizedSeries) ||
Parser.Parser.Normalize(p.Key.NormalizedName).Equals(normalizedLocalSeries)) &&
p.Key.Format == info.Format)
.Key;
if (existingName != null && !string.IsNullOrEmpty(existingName.Name))
{
return existingName.Name;
}
}
catch (Exception ex)
{
_logger.LogCritical(ex, "Multiple series detected for {SeriesName} ({File})! This is critical to fix! There should only be 1", info.Series, info.FullFilePath);
var values = scannedSeries.Where(p =>
(Parser.Parser.Normalize(p.Key.NormalizedName) == normalizedSeries ||
Parser.Parser.Normalize(p.Key.NormalizedName) == normalizedLocalSeries) &&
p.Key.Format == info.Format);
foreach (var pair in values)
{
_logger.LogCritical("Duplicate Series in DB matches with {SeriesName}: {DuplicateName}", info.Series, pair.Key.Name);
}
}
return info.Series;
}
///
/// This will process series by folder groups.
///
///
///
///
///
public async Task ScanLibrariesForSeries(LibraryType libraryType,
IEnumerable folders, string libraryName, bool isLibraryScan,
IDictionary> seriesPaths, Action>> processSeriesInfos, bool forceCheck = false)
{
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, MessageFactory.FileScanProgressEvent("Starting file scan", libraryName, ProgressEventType.Started));
foreach (var folderPath in folders)
{
try
{
await ProcessFiles(folderPath, isLibraryScan, seriesPaths, async (files, folder) =>
{
var normalizedFolder = Parser.Parser.NormalizePath(folder);
if (HasSeriesFolderNotChangedSinceLastScan(seriesPaths, normalizedFolder, forceCheck))
{
var parsedInfos = seriesPaths[normalizedFolder].Select(fp => new ParserInfo()
{
Series = fp.SeriesName,
Format = fp.Format,
}).ToList();
processSeriesInfos.Invoke(new Tuple>(true, parsedInfos));
_logger.LogDebug("Skipped File Scan for {Folder} as it hasn't changed since last scan", folder);
return;
}
_logger.LogDebug("Found {Count} files for {Folder}", files.Count, folder);
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, MessageFactory.FileScanProgressEvent(folderPath, libraryName, ProgressEventType.Updated));
if (files.Count == 0)
{
_logger.LogInformation("[ScannerService] {Folder} is empty", folder);
return;
}
var scannedSeries = new ConcurrentDictionary>();
var infos = files
.Select(file => _readingItemService.ParseFile(file, folderPath, libraryType))
.Where(info => info != null)
.ToList();
MergeLocalizedSeriesWithSeries(infos);
foreach (var info in infos)
{
try
{
TrackSeries(scannedSeries, info);
}
catch (Exception ex)
{
_logger.LogError(ex, "There was an exception that occurred during tracking {FilePath}. Skipping this file", info.FullFilePath);
}
}
// It would be really cool if we can emit an event when a folder hasn't been changed so we don't parse everything, but the first item to ensure we don't delete it
// Otherwise, we can do a last step in the DB where we validate all files on disk exist and if not, delete them. (easy but slow)
foreach (var series in scannedSeries.Keys)
{
if (scannedSeries[series].Count > 0 && processSeriesInfos != null)
{
processSeriesInfos.Invoke(new Tuple>(false, scannedSeries[series]));
}
}
}, forceCheck);
}
catch (ArgumentException ex)
{
_logger.LogError(ex, "The directory '{FolderPath}' does not exist", folderPath);
}
}
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, MessageFactory.FileScanProgressEvent(string.Empty, libraryName, ProgressEventType.Ended));
}
private bool HasSeriesFolderNotChangedSinceLastScan(IDictionary> seriesPaths, string normalizedFolder, bool forceCheck = false)
{
if (forceCheck) return false;
return seriesPaths.ContainsKey(normalizedFolder) && seriesPaths[normalizedFolder].All(f => f.LastScanned.Truncate(TimeSpan.TicksPerMinute) >=
_directoryService.GetLastWriteTime(normalizedFolder).Truncate(TimeSpan.TicksPerMinute));
}
///
/// Checks if there are any ParserInfos that have a Series that matches the LocalizedSeries field in any other info. If so,
/// rewrites the infos with series name instead of the localized name, so they stack.
///
///
/// Accel World v01.cbz has Series "Accel World" and Localized Series "World of Acceleration"
/// World of Acceleration v02.cbz has Series "World of Acceleration"
/// After running this code, we'd have:
/// World of Acceleration v02.cbz having Series "Accel World" and Localized Series of "World of Acceleration"
///
/// A collection of ParserInfos
private void MergeLocalizedSeriesWithSeries(IReadOnlyCollection infos)
{
var hasLocalizedSeries = infos.Any(i => !string.IsNullOrEmpty(i.LocalizedSeries));
if (!hasLocalizedSeries) return;
var localizedSeries = infos
.Where(i => !i.IsSpecial)
.Select(i => i.LocalizedSeries)
.Distinct()
.FirstOrDefault(i => !string.IsNullOrEmpty(i));
if (string.IsNullOrEmpty(localizedSeries)) return;
// NOTE: If we have multiple series in a folder with a localized title, then this will fail. It will group into one series. User needs to fix this themselves.
string nonLocalizedSeries;
var nonLocalizedSeriesFound = infos.Where(i => !i.IsSpecial).Select(i => i.Series).Distinct().ToList();
if (nonLocalizedSeriesFound.Count == 1)
{
nonLocalizedSeries = nonLocalizedSeriesFound.First();
}
else
{
// There can be a case where there are multiple series in a folder that causes merging.
if (nonLocalizedSeriesFound.Count > 2)
{
_logger.LogError("[ScannerService] There are multiple series within one folder that contain localized series. This will cause them to group incorrectly. Please separate series into their own dedicated folder: {LocalizedSeries}", string.Join(", ", nonLocalizedSeriesFound));
}
nonLocalizedSeries = nonLocalizedSeriesFound.FirstOrDefault(s => !s.Equals(localizedSeries));
}
if (string.IsNullOrEmpty(nonLocalizedSeries)) return;
var normalizedNonLocalizedSeries = Parser.Parser.Normalize(nonLocalizedSeries);
foreach (var infoNeedingMapping in infos.Where(i =>
!Parser.Parser.Normalize(i.Series).Equals(normalizedNonLocalizedSeries)))
{
infoNeedingMapping.Series = nonLocalizedSeries;
infoNeedingMapping.LocalizedSeries = localizedSeries;
}
}
}
}