Added basic parsing and unit tests. Setting up an intermediate object to hold information from parser. Removed hangfire db from git.

This commit is contained in:
Joseph Milazzo 2020-12-27 11:14:45 -06:00
parent 4fd9943b91
commit 8c80ed090d
13 changed files with 347 additions and 8 deletions

4
.gitignore vendored
View File

@ -445,4 +445,6 @@ $RECYCLE.BIN/
appsettings.json
/API/kavita.db
/API/kavita.db-shm
/API/kavita.db-wal
/API/kavita.db-wal
/API/Hangfire.db
/API/Hangfire-log.db

View File

@ -0,0 +1,26 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net5.0</TargetFramework>
<IsPackable>false</IsPackable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.7.1" />
<PackageReference Include="xunit" Version="2.4.1" />
<PackageReference Include="xunit.runner.visualstudio" Version="2.4.3">
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
<PrivateAssets>all</PrivateAssets>
</PackageReference>
<PackageReference Include="coverlet.collector" Version="1.3.0">
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
<PrivateAssets>all</PrivateAssets>
</PackageReference>
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\API\API.csproj" />
</ItemGroup>
</Project>

82
API.Tests/ParserTest.cs Normal file
View File

@ -0,0 +1,82 @@
using Xunit;
using static API.Parser.Parser;
namespace API.Tests
{
public class ParserTests
{
[Theory]
[InlineData("Killing Bites Vol. 0001 Ch. 0001 - Galactica Scanlations (gb)", "1")]
[InlineData("My Girlfriend Is Shobitch v01 - ch. 09 - pg. 008.png", "1")]
[InlineData("Historys Strongest Disciple Kenichi_v11_c90-98.zip", "11")]
[InlineData("B_Gata_H_Kei_v01[SlowManga&OverloadScans]", "1")]
[InlineData("BTOOOM! v01 (2013) (Digital) (Shadowcat-Empire)", "1")]
[InlineData("Gokukoku no Brynhildr - c001-008 (v01) [TrinityBAKumA]", "1")]
[InlineData("Dance in the Vampire Bund v16-17 (Digital) (NiceDragon)", "16-17")]
[InlineData("v001", "1")]
public void ParseVolumeTest(string filename, string expected)
{
var result = ParseVolume(filename);
Assert.Equal(expected, result);
}
[Theory]
[InlineData("Killing Bites Vol. 0001 Ch. 0001 - Galactica Scanlations (gb)", "Killing Bites")]
[InlineData("My Girlfriend Is Shobitch v01 - ch. 09 - pg. 008.png", "My Girlfriend Is Shobitch")]
[InlineData("Historys Strongest Disciple Kenichi_v11_c90-98.zip", "Historys Strongest Disciple Kenichi")]
[InlineData("B_Gata_H_Kei_v01[SlowManga&OverloadScans]", "B Gata H Kei")]
[InlineData("BTOOOM! v01 (2013) (Digital) (Shadowcat-Empire)", "BTOOOM!")]
[InlineData("Gokukoku no Brynhildr - c001-008 (v01) [TrinityBAKumA]", "Gokukoku no Brynhildr")]
[InlineData("Dance in the Vampire Bund v16-17 (Digital) (NiceDragon)", "Dance in the Vampire Bund")]
[InlineData("v001", "")]
public void ParseSeriesTest(string filename, string expected)
{
var result = ParseSeries(filename);
Assert.Equal(expected, result);
}
[Theory]
[InlineData("Killing Bites Vol. 0001 Ch. 0001 - Galactica Scanlations (gb)", "1")]
[InlineData("My Girlfriend Is Shobitch v01 - ch. 09 - pg. 008.png", "9")]
[InlineData("Historys Strongest Disciple Kenichi_v11_c90-98.zip", "90-98")]
[InlineData("B_Gata_H_Kei_v01[SlowManga&OverloadScans]", "")]
[InlineData("BTOOOM! v01 (2013) (Digital) (Shadowcat-Empire)", "")]
[InlineData("Gokukoku no Brynhildr - c001-008 (v01) [TrinityBAKumA]", "1-8")]
[InlineData("Dance in the Vampire Bund v16-17 (Digital) (NiceDragon)", "")]
[InlineData("c001", "1")]
public void ParseChaptersTest(string filename, string expected)
{
var result = ParseChapter(filename);
Assert.Equal(expected, result);
}
[Theory]
[InlineData("0001", "1")]
[InlineData("1", "1")]
[InlineData("0013", "13")]
public void RemoveLeadingZeroesTest(string input, string expected)
{
Assert.Equal(expected, RemoveLeadingZeroes(input));
}
[Theory]
[InlineData("1", "001")]
[InlineData("10", "010")]
[InlineData("100", "100")]
public void PadZerosTest(string input, string expected)
{
Assert.Equal(expected, PadZeros(input));
}
[Theory]
[InlineData("Hello_I_am_here", "Hello I am here")]
[InlineData("Hello_I_am_here ", "Hello I am here")]
[InlineData("[ReleaseGroup] The Title", "The Title")]
[InlineData("[ReleaseGroup]_The_Title", "The Title")]
public void CleanTitleTest(string input, string expected)
{
Assert.Equal(expected, CleanTitle(input));
}
}
}

View File

@ -32,4 +32,8 @@
<Folder Include="Tasks" />
</ItemGroup>
<ItemGroup>
<None Remove="Hangfire-log.db" />
</ItemGroup>
</Project>

View File

@ -8,12 +8,10 @@ namespace API.Controllers
{
public class AdminController : BaseApiController
{
private readonly IUserRepository _userRepository;
private readonly UserManager<AppUser> _userManager;
public AdminController(IUserRepository userRepository, UserManager<AppUser> userManager)
public AdminController(UserManager<AppUser> userManager)
{
_userRepository = userRepository;
_userManager = userManager;
}

View File

@ -38,7 +38,7 @@ namespace API.Controllers
return BadRequest("Library name already exists. Please choose a unique name to the server.");
}
// TODO: We probably need to clean the folders before we insert
// TODO: We probably need to normalize the folders before we insert
var library = new Library
{
Name = createLibraryDto.Name.ToLower(),

Binary file not shown.

Binary file not shown.

200
API/Parser/Parser.cs Normal file
View File

@ -0,0 +1,200 @@
using System;
using System.Text.RegularExpressions;
namespace API.Parser
{
public static class Parser
{
//?: is a non-capturing group in C#, else anything in () will be a group
private static readonly Regex[] MangaVolumeRegex = new[]
{
// Historys Strongest Disciple Kenichi_v11_c90-98.zip
new Regex(
@"(?<Series>.*)(\b|_)v(?<Volume>\d+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Killing Bites Vol. 0001 Ch. 0001 - Galactica Scanlations (gb)
new Regex(
@"(vol. ?)(?<Volume>0*[1-9]+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Dance in the Vampire Bund v16-17
new Regex(
@"(?<Series>.*)(\b|_)v(?<Volume>\d+-?\d+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
new Regex(
@"(?:v)(?<Volume>0*[1-9]+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
};
private static readonly Regex[] MangaSeriesRegex = new[]
{
// Gokukoku no Brynhildr - c001-008 (v01) [TrinityBAKumA], Black Bullet - v4 c17 [batoto]
new Regex(
@"(?<Series>.*)( - )(?:v|vo|c)\d",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Historys Strongest Disciple Kenichi_v11_c90-98.zip, Killing Bites Vol. 0001 Ch. 0001 - Galactica Scanlations (gb)
new Regex(
@"(?<Series>.*)(\b|_)v",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Black Bullet
new Regex(
@"(?<Series>.*)(\b|_)(v|vo|c)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// [BAA]_Darker_than_Black_c1 (This is very greedy, make sure it's always last)
new Regex(
@"(?<Series>.*)(\b|_)(c)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
};
private static readonly Regex[] ReleaseGroupRegex = new[]
{
// [TrinityBAKumA Finella&anon], [BAA]_, [SlowManga&OverloadScans], [batoto]
new Regex(@"(?:\[(?<subgroup>(?!\s).+?(?<!\s))\](?:_|-|\s|\.)?)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// (Shadowcat-Empire),
// new Regex(@"(?:\[(?<subgroup>(?!\s).+?(?<!\s))\](?:_|-|\s|\.)?)",
// RegexOptions.IgnoreCase | RegexOptions.Compiled),
};
private static readonly Regex[] MangaChapterRegex = new[]
{
new Regex(
@"(c|ch)(\.? ?)(?<Chapter>\d+-?\d*)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
};
public static string ParseSeries(string filename)
{
foreach (var regex in MangaSeriesRegex)
{
var matches = regex.Matches(filename);
foreach (Match match in matches)
{
if (match.Groups["Volume"] != Match.Empty)
{
return CleanTitle(match.Groups["Series"].Value);
}
}
}
Console.WriteLine("Unable to parse {0}", filename);
return "";
}
public static string ParseVolume(string filename)
{
foreach (var regex in MangaVolumeRegex)
{
var matches = regex.Matches(filename);
foreach (Match match in matches)
{
if (match.Groups["Volume"] != Match.Empty)
{
return RemoveLeadingZeroes(match.Groups["Volume"].Value);
}
}
}
Console.WriteLine("Unable to parse {0}", filename);
return "";
}
public static string ParseChapter(string filename)
{
foreach (var regex in MangaChapterRegex)
{
var matches = regex.Matches(filename);
foreach (Match match in matches)
{
if (match.Groups["Chapter"] != Match.Empty)
{
var value = match.Groups["Chapter"].Value;
if (value.Contains("-"))
{
var tokens = value.Split("-");
var from = RemoveLeadingZeroes(tokens[0]);
var to = RemoveLeadingZeroes(tokens[1]);
return $"{from}-{to}";
}
return RemoveLeadingZeroes(match.Groups["Chapter"].Value);
}
}
}
return "";
}
/// <summary>
/// Translates _ -> spaces, trims front and back of string, removes release groups
/// </summary>
/// <param name="title"></param>
/// <returns></returns>
public static string CleanTitle(string title)
{
foreach (var regex in ReleaseGroupRegex)
{
var matches = regex.Matches(title);
foreach (Match match in matches)
{
if (match.Success)
{
title = title.Replace(match.Value, "");
}
}
}
title = title.Replace("_", " ");
return title.Trim();
}
/// <summary>
/// Pads the start of a number string with 0's so ordering works fine if there are over 100 items.
/// Handles ranges (ie 4-8) -> (004-008).
/// </summary>
/// <param name="number"></param>
/// <returns>A zero padded number</returns>
public static string PadZeros(string number)
{
if (number.Contains("-"))
{
var tokens = number.Split("-");
return $"{PerformPadding(tokens[0])}-{PerformPadding(tokens[1])}";
}
return PerformPadding(number);
}
private static string PerformPadding(string number)
{
var num = Int32.Parse(number);
return num switch
{
< 10 => "00" + num,
< 100 => "0" + num,
_ => number
};
}
public static string RemoveLeadingZeroes(string title)
{
return title.TrimStart(new Char[] { '0' });
}
}
}

14
API/Parser/ParserInfo.cs Normal file
View File

@ -0,0 +1,14 @@
using System.Collections.Generic;
namespace API.Parser
{
public class ParserInfo
{
// This can be multiple
public string Chapters { get; set; }
public string Series { get; set; }
// This can be multiple
public string Volume { get; set; }
public IEnumerable<string> Files { get; init; }
}
}

View File

@ -62,7 +62,7 @@ namespace API.Services
});
}
catch (ArgumentException) {
Console.WriteLine(@"The directory 'C:\Program Files' does not exist.");
_logger.LogError($"The directory '{folderPath}' does not exist");
}
}
}
@ -79,7 +79,7 @@ namespace API.Services
var sw = Stopwatch.StartNew();
// Determine whether to parallelize file processing on each folder based on processor count.
int procCount = System.Environment.ProcessorCount;
int procCount = Environment.ProcessorCount;
// Data structure to hold names of subfolders to be examined for files.
Stack<string> dirs = new Stack<string>();

View File

@ -1,7 +1,6 @@
{
"ConnectionStrings": {
"DefaultConnection": "Data source=kavita.db",
"HangfireConnection": "Data source=hangfire.db"
},
"TokenKey": "super secret unguessable key",
"Logging": {

View File

@ -5,6 +5,8 @@ VisualStudioVersion = 15.0.26124.0
MinimumVisualStudioVersion = 15.0.26124.0
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "API", "API\API.csproj", "{1BC0273F-FEBE-4DA1-BC04-3A3167E4C86C}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "API.Tests", "API.Tests\API.Tests.csproj", "{6F7910F2-1B95-4570-A490-519C8935B9D1}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@ -30,5 +32,17 @@ Global
{1BC0273F-FEBE-4DA1-BC04-3A3167E4C86C}.Release|x64.Build.0 = Release|Any CPU
{1BC0273F-FEBE-4DA1-BC04-3A3167E4C86C}.Release|x86.ActiveCfg = Release|Any CPU
{1BC0273F-FEBE-4DA1-BC04-3A3167E4C86C}.Release|x86.Build.0 = Release|Any CPU
{6F7910F2-1B95-4570-A490-519C8935B9D1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{6F7910F2-1B95-4570-A490-519C8935B9D1}.Debug|Any CPU.Build.0 = Debug|Any CPU
{6F7910F2-1B95-4570-A490-519C8935B9D1}.Debug|x64.ActiveCfg = Debug|Any CPU
{6F7910F2-1B95-4570-A490-519C8935B9D1}.Debug|x64.Build.0 = Debug|Any CPU
{6F7910F2-1B95-4570-A490-519C8935B9D1}.Debug|x86.ActiveCfg = Debug|Any CPU
{6F7910F2-1B95-4570-A490-519C8935B9D1}.Debug|x86.Build.0 = Debug|Any CPU
{6F7910F2-1B95-4570-A490-519C8935B9D1}.Release|Any CPU.ActiveCfg = Release|Any CPU
{6F7910F2-1B95-4570-A490-519C8935B9D1}.Release|Any CPU.Build.0 = Release|Any CPU
{6F7910F2-1B95-4570-A490-519C8935B9D1}.Release|x64.ActiveCfg = Release|Any CPU
{6F7910F2-1B95-4570-A490-519C8935B9D1}.Release|x64.Build.0 = Release|Any CPU
{6F7910F2-1B95-4570-A490-519C8935B9D1}.Release|x86.ActiveCfg = Release|Any CPU
{6F7910F2-1B95-4570-A490-519C8935B9D1}.Release|x86.Build.0 = Release|Any CPU
EndGlobalSection
EndGlobal