Kavita/API/Services/Tasks/Scanner/ParseScannedFiles.cs
Joseph Milazzo ebd4ec25bf
Shakeout Fixes (#422)
# Fixed
- Fixed: Clean the pdf extension from Series title for PDF types
- Fixed: Fixed a bug where a forced metadata refresh wouldn't trigger a volume to force a refresh of cover image
- Fixed: Fixed an issue where Removing series no longer on disk would not use the Series Format and thus after deleting files, they would not be removed.
- Fixed: Fixed an issue with reading a single image file, where the cache code would not properly move the file
- Fixed: For Specials, Get Next/Prev Chapter should use the filename instead of arbitrary Number (which is always 0). Use the same sorting logic when requesting volumes on series detail, so sorting can happen in the backend.

# Added
- Added: (Accessibility) Nearly every page has had a title set for it 

===============================================================================

* Clean the pdf extension from ParseSeries

* Fixed a bug where forced metadata refresh wouldn't trigger the volume to update it's image.

* Added titles to most pages to help distinguish back/forward history.

Fixed a bug in the scanner which didn't account for Format when calculating if we need to remove a series not on disk.

* For Specials, Get Next/Prev Chapter should use the filename instead of arbitrary Number (which is always 0). Use the same sorting logic when requesting volumes on series detail, so sorting can happen in the backend.

* Fixed unit tests
2021-07-23 18:02:14 -05:00

209 lines
8.0 KiB
C#

using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using API.Entities;
using API.Entities.Enums;
using API.Interfaces.Services;
using API.Parser;
using Microsoft.Extensions.Logging;
namespace API.Services.Tasks.Scanner
{
public class ParsedSeries
{
public string Name { get; init; }
public string NormalizedName { get; init; }
public MangaFormat Format { get; init; }
}
public class ParseScannedFiles
{
private readonly ConcurrentDictionary<ParsedSeries, List<ParserInfo>> _scannedSeries;
private readonly IBookService _bookService;
private readonly ILogger _logger;
/// <summary>
/// An instance of a pipeline for processing files and returning a Map of Series -> ParserInfos.
/// Each instance is separate from other threads, allowing for no cross over.
/// </summary>
/// <param name="bookService"></param>
/// <param name="logger"></param>
public ParseScannedFiles(IBookService bookService, ILogger logger)
{
_bookService = bookService;
_logger = logger;
_scannedSeries = new ConcurrentDictionary<ParsedSeries, List<ParserInfo>>();
}
public static IList<ParserInfo> GetInfosByName(Dictionary<ParsedSeries, List<ParserInfo>> parsedSeries, Series series)
{
var existingKey = parsedSeries.Keys.FirstOrDefault(ps =>
ps.Format == series.Format && ps.NormalizedName == Parser.Parser.Normalize(series.OriginalName));
existingKey ??= new ParsedSeries()
{
Format = series.Format,
Name = series.OriginalName,
NormalizedName = Parser.Parser.Normalize(series.OriginalName)
};
return parsedSeries[existingKey];
}
/// <summary>
/// Processes files found during a library scan.
/// Populates a collection of <see cref="ParserInfo"/> for DB updates later.
/// </summary>
/// <param name="path">Path of a file</param>
/// <param name="rootPath"></param>
/// <param name="type">Library type to determine parsing to perform</param>
private void ProcessFile(string path, string rootPath, LibraryType type)
{
ParserInfo info;
if (Parser.Parser.IsEpub(path))
{
info = _bookService.ParseInfo(path);
}
else
{
info = Parser.Parser.Parse(path, rootPath, type);
}
if (info == null)
{
_logger.LogWarning("[Scanner] Could not parse series from {Path}", path);
return;
}
if (Parser.Parser.IsEpub(path) && Parser.Parser.ParseVolume(info.Series) != Parser.Parser.DefaultVolume)
{
info = Parser.Parser.Parse(path, rootPath, type);
var info2 = _bookService.ParseInfo(path);
info.Merge(info2);
}
TrackSeries(info);
}
/// <summary>
/// Attempts to either add a new instance of a show mapping to the _scannedSeries bag or adds to an existing.
/// This will check if the name matches an existing series name (multiple fields) <see cref="MergeName"/>
/// </summary>
/// <param name="info"></param>
private void TrackSeries(ParserInfo info)
{
if (info.Series == string.Empty) return;
// Check if normalized info.Series already exists and if so, update info to use that name instead
info.Series = MergeName(info);
var existingKey = _scannedSeries.Keys.FirstOrDefault(ps =>
ps.Format == info.Format && ps.NormalizedName == Parser.Parser.Normalize(info.Series));
existingKey ??= new ParsedSeries()
{
Format = info.Format,
Name = info.Series,
NormalizedName = Parser.Parser.Normalize(info.Series)
};
_scannedSeries.AddOrUpdate(existingKey, new List<ParserInfo>() {info}, (_, oldValue) =>
{
oldValue ??= new List<ParserInfo>();
if (!oldValue.Contains(info))
{
oldValue.Add(info);
}
return oldValue;
});
}
/// <summary>
/// Using a normalized name from the passed ParserInfo, this checks against all found series so far and if an existing one exists with
/// same normalized name, it merges into the existing one. This is important as some manga may have a slight difference with punctuation or capitalization.
/// </summary>
/// <param name="info"></param>
/// <returns></returns>
public string MergeName(ParserInfo info)
{
var normalizedSeries = Parser.Parser.Normalize(info.Series);
_logger.LogDebug("Checking if we can merge {NormalizedSeries}", normalizedSeries);
var existingName =
_scannedSeries.SingleOrDefault(p => Parser.Parser.Normalize(p.Key.NormalizedName) == normalizedSeries && p.Key.Format == info.Format)
.Key;
if (existingName != null && !string.IsNullOrEmpty(existingName.Name))
{
_logger.LogDebug("Found duplicate parsed infos, merged {Original} into {Merged}", info.Series, existingName.Name);
return existingName.Name;
}
return info.Series;
}
/// <summary>
///
/// </summary>
/// <param name="libraryType">Type of library. Used for selecting the correct file extensions to search for and parsing files</param>
/// <param name="folders">The folders to scan. By default, this should be library.Folders, however it can be overwritten to restrict folders</param>
/// <param name="totalFiles">Total files scanned</param>
/// <param name="scanElapsedTime">Time it took to scan and parse files</param>
/// <returns></returns>
public Dictionary<ParsedSeries, List<ParserInfo>> ScanLibrariesForSeries(LibraryType libraryType, IEnumerable<string> folders, out int totalFiles,
out long scanElapsedTime)
{
var sw = Stopwatch.StartNew();
totalFiles = 0;
var searchPattern = GetLibrarySearchPattern();
foreach (var folderPath in folders)
{
try
{
totalFiles += DirectoryService.TraverseTreeParallelForEach(folderPath, (f) =>
{
try
{
ProcessFile(f, folderPath, libraryType);
}
catch (FileNotFoundException exception)
{
_logger.LogError(exception, "The file {Filename} could not be found", f);
}
}, searchPattern, _logger);
}
catch (ArgumentException ex)
{
_logger.LogError(ex, "The directory '{FolderPath}' does not exist", folderPath);
}
}
scanElapsedTime = sw.ElapsedMilliseconds;
_logger.LogInformation("Scanned {TotalFiles} files in {ElapsedScanTime} milliseconds", totalFiles,
scanElapsedTime);
return SeriesWithInfos();
}
private static string GetLibrarySearchPattern()
{
return Parser.Parser.SupportedExtensions;
}
/// <summary>
/// Returns any series where there were parsed infos
/// </summary>
/// <returns></returns>
private Dictionary<ParsedSeries, List<ParserInfo>> SeriesWithInfos()
{
var filtered = _scannedSeries.Where(kvp => kvp.Value.Count > 0);
var series = filtered.ToDictionary(v => v.Key, v => v.Value);
return series;
}
}
}