Implemented a parallelized, rough, scanning loop which parses out information and leaves us with an immutable dictionary mapping series to parsed infos for each file.

Added some Entities to help translate this into DB.
This commit is contained in:
Joseph Milazzo 2020-12-29 10:47:10 -06:00
parent 49b4ee0022
commit 0a49b07570
7 changed files with 220 additions and 7 deletions

View File

@ -13,7 +13,9 @@ namespace API.Tests
[InlineData("BTOOOM! v01 (2013) (Digital) (Shadowcat-Empire)", "1")]
[InlineData("Gokukoku no Brynhildr - c001-008 (v01) [TrinityBAKumA]", "1")]
[InlineData("Dance in the Vampire Bund v16-17 (Digital) (NiceDragon)", "16-17")]
[InlineData("Akame ga KILL! ZERO v01 (2016) (Digital) (LuCaZ).cbz", "1")]
[InlineData("v001", "1")]
[InlineData("[Suihei Kiki]_Kasumi_Otoko_no_Ko_[Taruby]_v1.1.zip", "1")]
public void ParseVolumeTest(string filename, string expected)
{
var result = ParseVolume(filename);
@ -29,6 +31,7 @@ namespace API.Tests
[InlineData("Gokukoku no Brynhildr - c001-008 (v01) [TrinityBAKumA]", "Gokukoku no Brynhildr")]
[InlineData("Dance in the Vampire Bund v16-17 (Digital) (NiceDragon)", "Dance in the Vampire Bund")]
[InlineData("v001", "")]
[InlineData("Akame ga KILL! ZERO (2016-2019) (Digital) (LuCaZ)", "Akame ga KILL! ZERO")]
public void ParseSeriesTest(string filename, string expected)
{
var result = ParseSeries(filename);
@ -44,6 +47,7 @@ namespace API.Tests
[InlineData("Gokukoku no Brynhildr - c001-008 (v01) [TrinityBAKumA]", "1-8")]
[InlineData("Dance in the Vampire Bund v16-17 (Digital) (NiceDragon)", "")]
[InlineData("c001", "1")]
[InlineData("[Suihei Kiki]_Kasumi_Otoko_no_Ko_[Taruby]_v1.12.zip", "12")]
public void ParseChaptersTest(string filename, string expected)
{
var result = ParseChapter(filename);

View File

@ -28,10 +28,6 @@
<PackageReference Include="System.IdentityModel.Tokens.Jwt" Version="6.8.0" />
</ItemGroup>
<ItemGroup>
<Folder Include="Tasks" />
</ItemGroup>
<ItemGroup>
<None Remove="Hangfire-log.db" />
</ItemGroup>

28
API/Entities/Series.cs Normal file
View File

@ -0,0 +1,28 @@
using System.Collections.Generic;
namespace API.Entities
{
public class Series
{
/// <summary>
/// The UI visible Name of the Series. This may or may not be the same as the OriginalName
/// </summary>
public string Name { get; set; }
/// <summary>
/// Original Japanese Name
/// </summary>
public string OriginalName { get; set; }
/// <summary>
/// The name used to sort the Series. By default, will be the same as Name.
/// </summary>
public string SortName { get; set; }
/// <summary>
/// Summary information related to the Series
/// </summary>
public string Summary { get; set; }
public ICollection<Volume> Volumes { get; set; }
}
}

14
API/Entities/Volume.cs Normal file
View File

@ -0,0 +1,14 @@
using System.Collections.Generic;
namespace API.Entities
{
public class Volume
{
public string Number { get; set; }
public ICollection<string> Files { get; set; }
// Many-to-Many relationships
public Series Series { get; set; }
public int SeriesId { get; set; }
}
}

View File

@ -1,4 +1,5 @@
using System;
using System.Collections.Generic;
using System.Text.RegularExpressions;
namespace API.Parser
@ -47,10 +48,20 @@ namespace API.Parser
@"(?<Series>.*)(\b|_)(v|vo|c)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Akame ga KILL! ZERO (2016-2019) (Digital) (LuCaZ)
new Regex(
@"(?<Series>.*)\(\d",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// [BAA]_Darker_than_Black_c1 (This is very greedy, make sure it's always last)
new Regex(
@"(?<Series>.*)(\b|_)(c)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Darker Than Black (This takes anything, we have to account for perfectly named folders)
new Regex(
@"(?<Series>.*)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
};
@ -72,6 +83,18 @@ namespace API.Parser
@"(c|ch)(\.? ?)(?<Chapter>\d+-?\d*)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
};
public static ParserInfo Parse(string filePath)
{
return new ParserInfo()
{
Chapters = ParseChapter(filePath),
Series = ParseSeries(filePath),
Volumes = ParseVolume(filePath),
File = filePath
};
}
public static string ParseSeries(string filename)
{

View File

@ -2,13 +2,17 @@
namespace API.Parser
{
/// <summary>
/// This represents a single file
/// </summary>
public class ParserInfo
{
// This can be multiple
public string Chapters { get; set; }
public string Series { get; set; }
// This can be multiple
public string Volume { get; set; }
public IEnumerable<string> Files { get; init; }
public string Volumes { get; set; }
public string File { get; init; }
//public IEnumerable<string> Files { get; init; }
}
}

View File

@ -1,4 +1,5 @@
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Diagnostics;
@ -10,6 +11,8 @@ using System.Threading;
using System.Threading.Tasks;
using API.DTOs;
using API.Interfaces;
using API.Parser;
using Microsoft.EntityFrameworkCore.Metadata.Internal;
using Microsoft.Extensions.Logging;
namespace API.Services
@ -18,6 +21,7 @@ namespace API.Services
{
private readonly ILogger<DirectoryService> _logger;
private static readonly string MangaFileExtensions = @"\.cbz|\.cbr|\.png|\.jpeg|\.jpg|\.zip|\.rar";
private ConcurrentDictionary<string, ConcurrentBag<ParserInfo>> _scannedSeries;
public DirectoryService(ILogger<DirectoryService> logger)
{
@ -62,17 +66,131 @@ namespace API.Services
return dirs;
}
// TODO: Refactor API layer to use this
public IEnumerable<DirectoryInfo> ListDirectories(string rootPath)
{
if (!Directory.Exists(rootPath)) return ImmutableList<DirectoryInfo>.Empty;
var di = new DirectoryInfo(rootPath);
var dirs = di.GetDirectories()
.Where(dir => !(dir.Attributes.HasFlag(FileAttributes.Hidden) || dir.Attributes.HasFlag(FileAttributes.System)))
.ToImmutableList();
return dirs;
}
private void Process(string path)
{
if (Directory.Exists(path))
{
DirectoryInfo di = new DirectoryInfo(path);
Console.WriteLine($"Parsing directory {di.Name}");
var seriesName = Parser.Parser.ParseSeries(di.Name);
if (string.IsNullOrEmpty(seriesName))
{
return;
}
// We don't need ContainsKey, this is a race condition. We can replace with TryAdd instead
if (!_scannedSeries.ContainsKey(seriesName))
{
_scannedSeries.TryAdd(seriesName, new ConcurrentBag<ParserInfo>());
}
}
else
{
var fileName = Path.GetFileName(path);
Console.WriteLine($"Parsing file {fileName}");
var info = Parser.Parser.Parse(fileName);
if (info.Volumes != string.Empty)
{
ConcurrentBag<ParserInfo> tempBag;
ConcurrentBag<ParserInfo> newBag = new ConcurrentBag<ParserInfo>();
if (_scannedSeries.TryGetValue(info.Series, out tempBag))
{
var existingInfos = tempBag.ToArray();
foreach (var existingInfo in existingInfos)
{
newBag.Add(existingInfo);
}
}
else
{
tempBag = new ConcurrentBag<ParserInfo>();
}
newBag.Add(info);
if (!_scannedSeries.TryUpdate(info.Series, newBag, tempBag))
{
_scannedSeries.TryAdd(info.Series, newBag);
}
}
}
}
public void ScanLibrary(LibraryDto library)
{
_scannedSeries = new ConcurrentDictionary<string, ConcurrentBag<ParserInfo>>();
//Dictionary<string, IList<ParserInfo>> series = new Dictionary<string, IList<ParserInfo>>();
_logger.LogInformation($"Beginning scan on {library.Name}");
foreach (var folderPath in library.Folders)
{
try {
// // Temporarily, let's build a simple scanner then optimize to parallelization
//
// // First, let's see if there are any files in rootPath
// var files = GetFiles(folderPath, MangaFileExtensions);
//
// foreach (var file in files)
// {
// // These do not have a folder, so we need to parse them directly
// var parserInfo = Parser.Parser.Parse(file);
// Console.WriteLine(parserInfo);
// }
//
// // Get Directories
// var directories = ListDirectories(folderPath);
// foreach (var directory in directories)
// {
// _logger.LogDebug($"Scanning {directory.Name}");
// var parsedSeries = Parser.Parser.ParseSeries(directory.Name);
//
// // For now, let's skip directories we can't parse information out of. (we are assuming one level deep root)
// if (string.IsNullOrEmpty(parsedSeries)) continue;
//
// _logger.LogDebug($"Parsed Series: {parsedSeries}");
//
// if (!series.ContainsKey(parsedSeries))
// {
// series[parsedSeries] = new List<ParserInfo>();
// }
//
// var foundFiles = GetFiles(directory.FullName, MangaFileExtensions);
// foreach (var foundFile in foundFiles)
// {
// var info = Parser.Parser.Parse(foundFile);
// if (info.Volumes != string.Empty)
// {
// series[parsedSeries].Add(info);
// }
// }
// }
TraverseTreeParallelForEach(folderPath, (f) =>
{
// Exceptions are no-ops.
try
{
ProcessManga(folderPath, f);
Process(f);
//ProcessManga(folderPath, f);
}
catch (FileNotFoundException) {}
catch (IOException) {}
@ -87,12 +205,38 @@ namespace API.Services
_logger.LogError($"The directory '{folderPath}' does not exist");
}
}
// var filtered = series.Where(kvp => kvp.Value.Count > 0);
// series = filtered.ToDictionary(v => v.Key, v => v.Value);
// Console.WriteLine(series);
// var filtered = _scannedSeries.Where(kvp => kvp.Value.Count > 0);
// series = filtered.ToDictionary(v => v.Key, v => v.Value);
// Console.WriteLine(series);
var filtered = _scannedSeries.Where(kvp => !kvp.Value.IsEmpty);
var series = filtered.ToImmutableDictionary(v => v.Key, v => v.Value);
Console.WriteLine(series);
// TODO: Perform DB activities on ImmutableDictionary
//_logger.LogInformation($"Scan completed on {library.Name}. Parsed {series.Keys.Count} series.");
_logger.LogInformation($"Scan completed on {library.Name}. Parsed {series.Keys.Count()} series.");
_scannedSeries = null;
}
private static void ProcessManga(string folderPath, string filename)
{
Console.WriteLine($"[ProcessManga] Folder: {folderPath}");
Console.WriteLine($"Found {filename}");
var series = Parser.Parser.ParseSeries(filename);
if (series == string.Empty)
{
series = Parser.Parser.ParseSeries(folderPath);
}
Console.WriteLine($"Series: {series}");
}