There is a theme...more regex changes. Moved the logic around parsing and falling back into Parser.Parse() and setup testing for it.

This commit is contained in:
Joseph Milazzo 2021-01-24 10:05:53 -06:00
parent a315feb569
commit 8683c81361
7 changed files with 160 additions and 73 deletions

View File

@ -8,6 +8,7 @@
<ItemGroup> <ItemGroup>
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.7.1" /> <PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.7.1" />
<PackageReference Include="NSubstitute" Version="4.2.2" />
<PackageReference Include="xunit" Version="2.4.1" /> <PackageReference Include="xunit" Version="2.4.1" />
<PackageReference Include="xunit.runner.visualstudio" Version="2.4.3"> <PackageReference Include="xunit.runner.visualstudio" Version="2.4.3">
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets> <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>

View File

@ -1,3 +1,6 @@
using System.Collections.Generic;
using API.Entities;
using API.Parser;
using Xunit; using Xunit;
using static API.Parser.Parser; using static API.Parser.Parser;
@ -62,6 +65,7 @@ namespace API.Tests
[InlineData("Tonikaku Cawaii [Volume 11].cbz", "Tonikaku Cawaii")] [InlineData("Tonikaku Cawaii [Volume 11].cbz", "Tonikaku Cawaii")]
[InlineData("Mujaki no Rakuen Vol12 ch76", "Mujaki no Rakuen")] [InlineData("Mujaki no Rakuen Vol12 ch76", "Mujaki no Rakuen")]
[InlineData("Knights of Sidonia c000 (S2 LE BD Omake - BLAME!) [Habanero Scans]", "Knights of Sidonia")] [InlineData("Knights of Sidonia c000 (S2 LE BD Omake - BLAME!) [Habanero Scans]", "Knights of Sidonia")]
[InlineData("Vol 1.cbz", "")]
public void ParseSeriesTest(string filename, string expected) public void ParseSeriesTest(string filename, string expected)
{ {
Assert.Equal(expected, ParseSeries(filename)); Assert.Equal(expected, ParseSeries(filename));
@ -142,5 +146,46 @@ namespace API.Tests
{ {
Assert.Equal(expected, ParseEdition(input)); Assert.Equal(expected, ParseEdition(input));
} }
[Fact]
public void ParseInfoTest()
{
var expected = new Dictionary<string, ParserInfo>();
var filepath = @"E:/Manga/Mujaki no Rakuen/Mujaki no Rakuen Vol12 ch76.cbz";
expected.Add(filepath, new ParserInfo
{
Series = "Mujaki no Rakuen", Volumes = "12",
Chapters = "76", Filename = "Mujaki no Rakuen Vol12 ch76.cbz", Format = MangaFormat.Archive,
FullFilePath = filepath
});
filepath = @"E:/Manga/Shimoneta to Iu Gainen ga Sonzai Shinai Taikutsu na Sekai Man-hen/Vol 1.cbz";
expected.Add(filepath, new ParserInfo
{
Series = "Shimoneta to Iu Gainen ga Sonzai Shinai Taikutsu na Sekai Man-hen", Volumes = "1",
Chapters = "0", Filename = "Vol 1.cbz", Format = MangaFormat.Archive,
FullFilePath = filepath
});
foreach (var file in expected.Keys)
{
var expectedInfo = expected[file];
var actual = Parse(file);
Assert.Equal(expectedInfo.Format, actual.Format);
Assert.Equal(expectedInfo.Series, actual.Series);
Assert.Equal(expectedInfo.Chapters, actual.Chapters);
Assert.Equal(expectedInfo.Volumes, actual.Volumes);
Assert.Equal(expectedInfo.Edition, actual.Edition);
Assert.Equal(expectedInfo.Filename, actual.Filename);
Assert.Equal(expectedInfo.FullFilePath, actual.FullFilePath);
}
}
} }
} }

View File

@ -1,7 +1,20 @@
namespace API.Tests.Services using API.Interfaces;
using API.Services;
using Microsoft.Extensions.Logging;
using NSubstitute;
namespace API.Tests.Services
{ {
public class ScannerServiceTests public class ScannerServiceTests
{ {
private readonly ScannerService _scannerService;
private readonly ILogger<ScannerService> _logger = Substitute.For<ILogger<ScannerService>>();
private readonly IUnitOfWork _unitOfWork = Substitute.For<IUnitOfWork>();
public ScannerServiceTests()
{
_scannerService = new ScannerService(_unitOfWork, _logger);
}
// TODO: Start adding tests for how scanner works so we can ensure fallbacks, etc work // TODO: Start adding tests for how scanner works so we can ensure fallbacks, etc work
} }
} }

View File

@ -1,4 +1,5 @@
using System; using System;
using System.Diagnostics;
using System.IO; using System.IO;
namespace API.Extensions namespace API.Extensions
@ -50,7 +51,6 @@ namespace API.Extensions
if (file.Directory == null) continue; if (file.Directory == null) continue;
var newName = $"{file.Directory.Name}_{file.Name}"; var newName = $"{file.Directory.Name}_{file.Name}";
var newPath = Path.Join(root.FullName, newName); var newPath = Path.Join(root.FullName, newName);
Console.WriteLine($"Renaming/Moving file to: {newPath}");
file.MoveTo(newPath); file.MoveTo(newPath);
} }

View File

@ -74,22 +74,16 @@ namespace API.Parser
// Black Bullet (This is very loose, keep towards bottom) // Black Bullet (This is very loose, keep towards bottom)
new Regex( new Regex(
@"(?<Series>.*)(_)(v|vo|c|volume)",
@"(?<Series>.*)(\b|_)(v|vo|c|volume)", RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Akiiro Bousou Biyori - 01.jpg, Beelzebub_172_RHS.zip, Cynthia the Mission 29.rar
new Regex(
@"^(?!Vol)(?<Series>.*)( |_)(\d+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled), RegexOptions.IgnoreCase | RegexOptions.Compiled),
// [BAA]_Darker_than_Black_c1 (This is very greedy, make sure it's close to last) // [BAA]_Darker_than_Black_c1 (This is very greedy, make sure it's close to last)
new Regex( new Regex(
@"(?<Series>.*)(\b|_)(c)", @"(?<Series>.*)( |_)(c)",
RegexOptions.IgnoreCase | RegexOptions.Compiled), RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Akiiro Bousou Biyori - 01.jpg
new Regex(
@"(?<Series>.*)(\b|_)(\d+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Darker Than Black (This takes anything, we have to account for perfectly named folders)
// new Regex(
// @"(?<Series>.*)",
// RegexOptions.IgnoreCase | RegexOptions.Compiled),
}; };
private static readonly Regex[] ReleaseGroupRegex = new[] private static readonly Regex[] ReleaseGroupRegex = new[]
@ -136,22 +130,38 @@ namespace API.Parser
}; };
/// <summary>
/// Parses information out of a file path. Will fallback to using directory name if Series couldn't be parsed
/// from filename.
/// </summary>
/// <param name="filePath"></param>
/// <returns><see cref="ParserInfo"/> or null if Series was empty</returns>
public static ParserInfo Parse(string filePath) public static ParserInfo Parse(string filePath)
{ {
var fileName = Path.GetFileName(filePath);
var directoryName = (new FileInfo(filePath)).Directory?.Name;
var ret = new ParserInfo() var ret = new ParserInfo()
{ {
Chapters = ParseChapter(filePath), Chapters = ParseChapter(fileName),
Series = ParseSeries(filePath), Series = ParseSeries(fileName),
Volumes = ParseVolume(filePath), Volumes = ParseVolume(fileName),
Filename = filePath, Filename = fileName,
Format = ParseFormat(filePath) Format = ParseFormat(filePath),
FullFilePath = filePath
}; };
if (ret.Series == string.Empty)
{
ret.Series = ParseSeries(directoryName);
if (ret.Series == string.Empty) ret.Series = CleanTitle(directoryName);
}
var edition = ParseEdition(filePath); var edition = ParseEdition(filePath);
if (edition != string.Empty) ret.Series = ret.Series.Replace(edition, ""); if (edition != string.Empty) ret.Series = ret.Series.Replace(edition, "");
ret.Edition = edition; ret.Edition = edition;
return ret; return ret.Series == string.Empty ? null : ret;
} }
public static MangaFormat ParseFormat(string filePath) public static MangaFormat ParseFormat(string filePath)

View File

@ -15,14 +15,15 @@ namespace API.Parser
public string Volumes { get; set; } public string Volumes { get; set; }
public string Filename { get; init; } public string Filename { get; init; }
public string FullFilePath { get; set; } public string FullFilePath { get; set; }
/// <summary> /// <summary>
/// <see cref="MangaFormat"/> that represents the type of the file (so caching service knows how to cache for reading) /// <see cref="MangaFormat"/> that represents the type of the file (so caching service knows how to cache for reading)
/// </summary> /// </summary>
public MangaFormat Format { get; set; } public MangaFormat Format { get; set; } = MangaFormat.Unknown;
/// <summary> /// <summary>
/// This can potentially story things like "Omnibus, Color, Full Contact Edition, Extra, Final, etc" /// This can potentially story things like "Omnibus, Color, Full Contact Edition, Extra, Final, etc"
/// </summary> /// </summary>
public string Edition { get; set; } public string Edition { get; set; } = "";
} }
} }

View File

@ -60,7 +60,7 @@ namespace API.Services
foreach (var folderPath in library.Folders) foreach (var folderPath in library.Folders)
{ {
try { try {
totalFiles = DirectoryService.TraverseTreeParallelForEach(folderPath.Path, (f) => totalFiles += DirectoryService.TraverseTreeParallelForEach(folderPath.Path, (f) =>
{ {
try try
{ {
@ -81,38 +81,10 @@ namespace API.Services
var series = filtered.ToImmutableDictionary(v => v.Key, v => v.Value); var series = filtered.ToImmutableDictionary(v => v.Key, v => v.Value);
// Perform DB activities // Perform DB activities
var allSeries = Task.Run(() => _unitOfWork.SeriesRepository.GetSeriesForLibraryIdAsync(libraryId)).Result.ToList(); var allSeries = UpsertSeries(libraryId, forceUpdate, series, library);
foreach (var seriesKey in series.Keys)
{
var mangaSeries = allSeries.SingleOrDefault(s => s.Name == seriesKey) ?? new Series
{
Name = seriesKey,
OriginalName = seriesKey,
SortName = seriesKey,
Summary = ""
};
try
{
mangaSeries = UpdateSeries(mangaSeries, series[seriesKey].ToArray(), forceUpdate);
_logger.LogInformation($"Created/Updated series {mangaSeries.Name} for {library.Name} library");
library.Series ??= new List<Series>();
library.Series.Add(mangaSeries);
}
catch (Exception ex)
{
_logger.LogError(ex, $"There was an error during scanning of library. {seriesKey} will be skipped.");
}
}
// Remove series that are no longer on disk // Remove series that are no longer on disk
foreach (var existingSeries in allSeries) RemoveSeriesNotOnDisk(allSeries, series, library);
{
if (!series.ContainsKey(existingSeries.Name) || !series.ContainsKey(existingSeries.OriginalName))
{
// Delete series, there is no file to backup any longer.
library.Series?.Remove(existingSeries);
}
}
_unitOfWork.LibraryRepository.Update(library); _unitOfWork.LibraryRepository.Update(library);
@ -128,28 +100,56 @@ namespace API.Services
_scannedSeries = null; _scannedSeries = null;
_logger.LogInformation("Processed {0} files in {1} milliseconds for {2}", totalFiles, sw.ElapsedMilliseconds, library.Name); _logger.LogInformation("Processed {0} files in {1} milliseconds for {2}", totalFiles, sw.ElapsedMilliseconds, library.Name);
} }
/// <summary>
/// Processes files found during a library scan. Generates a collection of <see cref="ParserInfo"/> for DB updates later.
/// </summary>
/// <param name="path">Path of a file</param>
private void ProcessFile(string path)
{
var fileName = Path.GetFileName(path);
//var directoryName = (new FileInfo(path)).Directory?.Name;
//TODO: Implement fallback for no series information here
_logger.LogDebug($"Parsing file {fileName}");
private List<Series> UpsertSeries(int libraryId, bool forceUpdate, ImmutableDictionary<string, ConcurrentBag<ParserInfo>> series, Library library)
var info = Parser.Parser.Parse(fileName); {
info.FullFilePath = path; var allSeries = Task.Run(() => _unitOfWork.SeriesRepository.GetSeriesForLibraryIdAsync(libraryId)).Result.ToList();
if (info.Series == string.Empty) foreach (var seriesKey in series.Keys)
{ {
_logger.LogInformation($"Could not parse series or volume from {fileName}"); var mangaSeries = allSeries.SingleOrDefault(s => s.Name == seriesKey) ?? new Series
return; {
Name = seriesKey,
OriginalName = seriesKey,
SortName = seriesKey,
Summary = ""
};
try
{
mangaSeries = UpdateSeries(mangaSeries, series[seriesKey].ToArray(), forceUpdate);
_logger.LogInformation($"Created/Updated series {mangaSeries.Name} for {library.Name} library");
library.Series ??= new List<Series>();
library.Series.Add(mangaSeries);
}
catch (Exception ex)
{
_logger.LogError(ex, $"There was an error during scanning of library. {seriesKey} will be skipped.");
}
} }
return allSeries;
}
private static void RemoveSeriesNotOnDisk(List<Series> allSeries, ImmutableDictionary<string, ConcurrentBag<ParserInfo>> series, Library library)
{
foreach (var existingSeries in allSeries)
{
if (!series.ContainsKey(existingSeries.Name) || !series.ContainsKey(existingSeries.OriginalName))
{
// Delete series, there is no file to backup any longer.
library.Series?.Remove(existingSeries);
}
}
}
/// <summary>
/// Attempts to either add a new instance of a show mapping to the scannedSeries bag or adds to an existing.
/// </summary>
/// <param name="info"></param>
public void TrackSeries(ParserInfo info)
{
if (info.Series == string.Empty) return;
ConcurrentBag<ParserInfo> newBag = new ConcurrentBag<ParserInfo>(); ConcurrentBag<ParserInfo> newBag = new ConcurrentBag<ParserInfo>();
// Use normalization for key lookup due to parsing disparities // Use normalization for key lookup due to parsing disparities
var existingKey = _scannedSeries.Keys.SingleOrDefault(k => k.ToLower() == info.Series.ToLower()); var existingKey = _scannedSeries.Keys.SingleOrDefault(k => k.ToLower() == info.Series.ToLower());
@ -175,6 +175,23 @@ namespace API.Services
} }
} }
/// <summary>
/// Processes files found during a library scan.
/// Populates a collection of <see cref="ParserInfo"/> for DB updates later.
/// </summary>
/// <param name="path">Path of a file</param>
private void ProcessFile(string path)
{
var info = Parser.Parser.Parse(path);
if (info == null)
{
_logger.LogInformation($"Could not parse series from {path}");
return;
}
TrackSeries(info);
}
private Series UpdateSeries(Series series, ParserInfo[] infos, bool forceUpdate) private Series UpdateSeries(Series series, ParserInfo[] infos, bool forceUpdate)
{ {
var volumes = UpdateVolumes(series, infos, forceUpdate); var volumes = UpdateVolumes(series, infos, forceUpdate);