There is a theme...more regex changes. Moved the logic around parsing and falling back into Parser.Parse() and setup testing for it.

This commit is contained in:
Joseph Milazzo 2021-01-24 10:05:53 -06:00
parent a315feb569
commit 8683c81361
7 changed files with 160 additions and 73 deletions

View File

@ -8,6 +8,7 @@
<ItemGroup>
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.7.1" />
<PackageReference Include="NSubstitute" Version="4.2.2" />
<PackageReference Include="xunit" Version="2.4.1" />
<PackageReference Include="xunit.runner.visualstudio" Version="2.4.3">
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>

View File

@ -1,3 +1,6 @@
using System.Collections.Generic;
using API.Entities;
using API.Parser;
using Xunit;
using static API.Parser.Parser;
@ -62,6 +65,7 @@ namespace API.Tests
[InlineData("Tonikaku Cawaii [Volume 11].cbz", "Tonikaku Cawaii")]
[InlineData("Mujaki no Rakuen Vol12 ch76", "Mujaki no Rakuen")]
[InlineData("Knights of Sidonia c000 (S2 LE BD Omake - BLAME!) [Habanero Scans]", "Knights of Sidonia")]
[InlineData("Vol 1.cbz", "")]
public void ParseSeriesTest(string filename, string expected)
{
Assert.Equal(expected, ParseSeries(filename));
@ -142,5 +146,46 @@ namespace API.Tests
{
Assert.Equal(expected, ParseEdition(input));
}
[Fact]
public void ParseInfoTest()
{
var expected = new Dictionary<string, ParserInfo>();
var filepath = @"E:/Manga/Mujaki no Rakuen/Mujaki no Rakuen Vol12 ch76.cbz";
expected.Add(filepath, new ParserInfo
{
Series = "Mujaki no Rakuen", Volumes = "12",
Chapters = "76", Filename = "Mujaki no Rakuen Vol12 ch76.cbz", Format = MangaFormat.Archive,
FullFilePath = filepath
});
filepath = @"E:/Manga/Shimoneta to Iu Gainen ga Sonzai Shinai Taikutsu na Sekai Man-hen/Vol 1.cbz";
expected.Add(filepath, new ParserInfo
{
Series = "Shimoneta to Iu Gainen ga Sonzai Shinai Taikutsu na Sekai Man-hen", Volumes = "1",
Chapters = "0", Filename = "Vol 1.cbz", Format = MangaFormat.Archive,
FullFilePath = filepath
});
foreach (var file in expected.Keys)
{
var expectedInfo = expected[file];
var actual = Parse(file);
Assert.Equal(expectedInfo.Format, actual.Format);
Assert.Equal(expectedInfo.Series, actual.Series);
Assert.Equal(expectedInfo.Chapters, actual.Chapters);
Assert.Equal(expectedInfo.Volumes, actual.Volumes);
Assert.Equal(expectedInfo.Edition, actual.Edition);
Assert.Equal(expectedInfo.Filename, actual.Filename);
Assert.Equal(expectedInfo.FullFilePath, actual.FullFilePath);
}
}
}
}

View File

@ -1,7 +1,20 @@
namespace API.Tests.Services
using API.Interfaces;
using API.Services;
using Microsoft.Extensions.Logging;
using NSubstitute;
namespace API.Tests.Services
{
public class ScannerServiceTests
{
private readonly ScannerService _scannerService;
private readonly ILogger<ScannerService> _logger = Substitute.For<ILogger<ScannerService>>();
private readonly IUnitOfWork _unitOfWork = Substitute.For<IUnitOfWork>();
public ScannerServiceTests()
{
_scannerService = new ScannerService(_unitOfWork, _logger);
}
// TODO: Start adding tests for how scanner works so we can ensure fallbacks, etc work
}
}

View File

@ -1,4 +1,5 @@
using System;
using System.Diagnostics;
using System.IO;
namespace API.Extensions
@ -50,7 +51,6 @@ namespace API.Extensions
if (file.Directory == null) continue;
var newName = $"{file.Directory.Name}_{file.Name}";
var newPath = Path.Join(root.FullName, newName);
Console.WriteLine($"Renaming/Moving file to: {newPath}");
file.MoveTo(newPath);
}

View File

@ -74,22 +74,16 @@ namespace API.Parser
// Black Bullet (This is very loose, keep towards bottom)
new Regex(
@"(?<Series>.*)(\b|_)(v|vo|c|volume)",
@"(?<Series>.*)(_)(v|vo|c|volume)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Akiiro Bousou Biyori - 01.jpg, Beelzebub_172_RHS.zip, Cynthia the Mission 29.rar
new Regex(
@"^(?!Vol)(?<Series>.*)( |_)(\d+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// [BAA]_Darker_than_Black_c1 (This is very greedy, make sure it's close to last)
new Regex(
@"(?<Series>.*)(\b|_)(c)",
@"(?<Series>.*)( |_)(c)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Akiiro Bousou Biyori - 01.jpg
new Regex(
@"(?<Series>.*)(\b|_)(\d+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Darker Than Black (This takes anything, we have to account for perfectly named folders)
// new Regex(
// @"(?<Series>.*)",
// RegexOptions.IgnoreCase | RegexOptions.Compiled),
};
private static readonly Regex[] ReleaseGroupRegex = new[]
@ -136,22 +130,38 @@ namespace API.Parser
};
/// <summary>
/// Parses information out of a file path. Will fallback to using directory name if Series couldn't be parsed
/// from filename.
/// </summary>
/// <param name="filePath"></param>
/// <returns><see cref="ParserInfo"/> or null if Series was empty</returns>
public static ParserInfo Parse(string filePath)
{
var fileName = Path.GetFileName(filePath);
var directoryName = (new FileInfo(filePath)).Directory?.Name;
var ret = new ParserInfo()
{
Chapters = ParseChapter(filePath),
Series = ParseSeries(filePath),
Volumes = ParseVolume(filePath),
Filename = filePath,
Format = ParseFormat(filePath)
Chapters = ParseChapter(fileName),
Series = ParseSeries(fileName),
Volumes = ParseVolume(fileName),
Filename = fileName,
Format = ParseFormat(filePath),
FullFilePath = filePath
};
if (ret.Series == string.Empty)
{
ret.Series = ParseSeries(directoryName);
if (ret.Series == string.Empty) ret.Series = CleanTitle(directoryName);
}
var edition = ParseEdition(filePath);
if (edition != string.Empty) ret.Series = ret.Series.Replace(edition, "");
ret.Edition = edition;
return ret;
return ret.Series == string.Empty ? null : ret;
}
public static MangaFormat ParseFormat(string filePath)

View File

@ -15,14 +15,15 @@ namespace API.Parser
public string Volumes { get; set; }
public string Filename { get; init; }
public string FullFilePath { get; set; }
/// <summary>
/// <see cref="MangaFormat"/> that represents the type of the file (so caching service knows how to cache for reading)
/// </summary>
public MangaFormat Format { get; set; }
public MangaFormat Format { get; set; } = MangaFormat.Unknown;
/// <summary>
/// This can potentially story things like "Omnibus, Color, Full Contact Edition, Extra, Final, etc"
/// </summary>
public string Edition { get; set; }
public string Edition { get; set; } = "";
}
}

View File

@ -60,7 +60,7 @@ namespace API.Services
foreach (var folderPath in library.Folders)
{
try {
totalFiles = DirectoryService.TraverseTreeParallelForEach(folderPath.Path, (f) =>
totalFiles += DirectoryService.TraverseTreeParallelForEach(folderPath.Path, (f) =>
{
try
{
@ -81,38 +81,10 @@ namespace API.Services
var series = filtered.ToImmutableDictionary(v => v.Key, v => v.Value);
// Perform DB activities
var allSeries = Task.Run(() => _unitOfWork.SeriesRepository.GetSeriesForLibraryIdAsync(libraryId)).Result.ToList();
foreach (var seriesKey in series.Keys)
{
var mangaSeries = allSeries.SingleOrDefault(s => s.Name == seriesKey) ?? new Series
{
Name = seriesKey,
OriginalName = seriesKey,
SortName = seriesKey,
Summary = ""
};
try
{
mangaSeries = UpdateSeries(mangaSeries, series[seriesKey].ToArray(), forceUpdate);
_logger.LogInformation($"Created/Updated series {mangaSeries.Name} for {library.Name} library");
library.Series ??= new List<Series>();
library.Series.Add(mangaSeries);
}
catch (Exception ex)
{
_logger.LogError(ex, $"There was an error during scanning of library. {seriesKey} will be skipped.");
}
}
var allSeries = UpsertSeries(libraryId, forceUpdate, series, library);
// Remove series that are no longer on disk
foreach (var existingSeries in allSeries)
{
if (!series.ContainsKey(existingSeries.Name) || !series.ContainsKey(existingSeries.OriginalName))
{
// Delete series, there is no file to backup any longer.
library.Series?.Remove(existingSeries);
}
}
RemoveSeriesNotOnDisk(allSeries, series, library);
_unitOfWork.LibraryRepository.Update(library);
@ -128,28 +100,56 @@ namespace API.Services
_scannedSeries = null;
_logger.LogInformation("Processed {0} files in {1} milliseconds for {2}", totalFiles, sw.ElapsedMilliseconds, library.Name);
}
/// <summary>
/// Processes files found during a library scan. Generates a collection of <see cref="ParserInfo"/> for DB updates later.
/// </summary>
/// <param name="path">Path of a file</param>
private void ProcessFile(string path)
{
var fileName = Path.GetFileName(path);
//var directoryName = (new FileInfo(path)).Directory?.Name;
//TODO: Implement fallback for no series information here
_logger.LogDebug($"Parsing file {fileName}");
var info = Parser.Parser.Parse(fileName);
info.FullFilePath = path;
if (info.Series == string.Empty)
private List<Series> UpsertSeries(int libraryId, bool forceUpdate, ImmutableDictionary<string, ConcurrentBag<ParserInfo>> series, Library library)
{
var allSeries = Task.Run(() => _unitOfWork.SeriesRepository.GetSeriesForLibraryIdAsync(libraryId)).Result.ToList();
foreach (var seriesKey in series.Keys)
{
_logger.LogInformation($"Could not parse series or volume from {fileName}");
return;
var mangaSeries = allSeries.SingleOrDefault(s => s.Name == seriesKey) ?? new Series
{
Name = seriesKey,
OriginalName = seriesKey,
SortName = seriesKey,
Summary = ""
};
try
{
mangaSeries = UpdateSeries(mangaSeries, series[seriesKey].ToArray(), forceUpdate);
_logger.LogInformation($"Created/Updated series {mangaSeries.Name} for {library.Name} library");
library.Series ??= new List<Series>();
library.Series.Add(mangaSeries);
}
catch (Exception ex)
{
_logger.LogError(ex, $"There was an error during scanning of library. {seriesKey} will be skipped.");
}
}
return allSeries;
}
private static void RemoveSeriesNotOnDisk(List<Series> allSeries, ImmutableDictionary<string, ConcurrentBag<ParserInfo>> series, Library library)
{
foreach (var existingSeries in allSeries)
{
if (!series.ContainsKey(existingSeries.Name) || !series.ContainsKey(existingSeries.OriginalName))
{
// Delete series, there is no file to backup any longer.
library.Series?.Remove(existingSeries);
}
}
}
/// <summary>
/// Attempts to either add a new instance of a show mapping to the scannedSeries bag or adds to an existing.
/// </summary>
/// <param name="info"></param>
public void TrackSeries(ParserInfo info)
{
if (info.Series == string.Empty) return;
ConcurrentBag<ParserInfo> newBag = new ConcurrentBag<ParserInfo>();
// Use normalization for key lookup due to parsing disparities
var existingKey = _scannedSeries.Keys.SingleOrDefault(k => k.ToLower() == info.Series.ToLower());
@ -175,6 +175,23 @@ namespace API.Services
}
}
/// <summary>
/// Processes files found during a library scan.
/// Populates a collection of <see cref="ParserInfo"/> for DB updates later.
/// </summary>
/// <param name="path">Path of a file</param>
private void ProcessFile(string path)
{
var info = Parser.Parser.Parse(path);
if (info == null)
{
_logger.LogInformation($"Could not parse series from {path}");
return;
}
TrackSeries(info);
}
private Series UpdateSeries(Series series, ParserInfo[] infos, bool forceUpdate)
{
var volumes = UpdateVolumes(series, infos, forceUpdate);