From 39fa750d96e438f261422dbb4e201175753ea273 Mon Sep 17 00:00:00 2001 From: Joseph Milazzo Date: Mon, 8 Feb 2021 10:53:59 -0600 Subject: [PATCH] Enhanced the parser to handle more cases and implement some negative lookups when being greedy. --- API.Tests/ParserTest.cs | 51 +++++++++++++++++++++-- API.Tests/Services/ScannerServiceTests.cs | 7 ++++ API/Parser/Parser.cs | 18 +++++--- 3 files changed, 68 insertions(+), 8 deletions(-) create mode 100644 API.Tests/Services/ScannerServiceTests.cs diff --git a/API.Tests/ParserTest.cs b/API.Tests/ParserTest.cs index d91e82092..897bc99a8 100644 --- a/API.Tests/ParserTest.cs +++ b/API.Tests/ParserTest.cs @@ -1,13 +1,23 @@ +using System; using System.Collections.Generic; using API.Entities.Enums; using API.Parser; using Xunit; +using Xunit.Abstractions; using static API.Parser.Parser; namespace API.Tests { public class ParserTests { + private readonly ITestOutputHelper _testOutputHelper; + + + public ParserTests(ITestOutputHelper testOutputHelper) + { + _testOutputHelper = testOutputHelper; + } + [Theory] [InlineData("Killing Bites Vol. 0001 Ch. 0001 - Galactica Scanlations (gb)", "1")] [InlineData("My Girlfriend Is Shobitch v01 - ch. 09 - pg. 008.png", "1")] @@ -18,6 +28,7 @@ namespace API.Tests [InlineData("Dance in the Vampire Bund v16-17 (Digital) (NiceDragon)", "16-17")] [InlineData("Akame ga KILL! ZERO v01 (2016) (Digital) (LuCaZ).cbz", "1")] [InlineData("v001", "1")] + [InlineData("Vol 1", "1")] [InlineData("No Volume", "0")] [InlineData("U12 (Under 12) Vol. 0001 Ch. 0001 - Reiwa Scans (gb)", "1")] [InlineData("[Suihei Kiki]_Kasumi_Otoko_no_Ko_[Taruby]_v1.1.zip", "1")] @@ -35,6 +46,7 @@ namespace API.Tests [InlineData("Dorohedoro v12 (2013) (Digital) (LostNerevarine-Empire).cbz", "12")] [InlineData("Yumekui_Merry_v01_c01[Bakayarou-Kuu].rar", "1")] [InlineData("Yumekui-Merry_DKThias_Chapter11v2.zip", "0")] + [InlineData("Itoshi no Karin - c001-006x1 (v01) [Renzokusei Scans]", "1")] public void ParseVolumeTest(string filename, string expected) { @@ -79,7 +91,10 @@ namespace API.Tests [InlineData("Ichiban_Ushiro_no_Daimaou_v04_ch34_[VISCANS].zip", "Ichiban Ushiro no Daimaou")] [InlineData("Rent a Girlfriend v01.cbr", "Rent a Girlfriend")] [InlineData("Yumekui_Merry_v01_c01[Bakayarou-Kuu].rar", "Yumekui Merry")] - //[InlineData("[Tempus Edax Rerum] Epigraph of the Closed Curve - Chapter 6.zip", "Epigraph of the Closed Curve")] + [InlineData("Itoshi no Karin - c001-006x1 (v01) [Renzokusei Scans]", "Itoshi no Karin")] + [InlineData("Tonikaku Kawaii Vol-1 (Ch 01-08)", "Tonikaku Kawaii")] + [InlineData("Tonikaku Kawaii (Ch 59-67) (Ongoing)", "Tonikaku Kawaii")] + [InlineData("7thGARDEN v01 (2016) (Digital) (danke).cbz", "7thGARDEN")] public void ParseSeriesTest(string filename, string expected) { Assert.Equal(expected, ParseSeries(filename)); @@ -113,6 +128,9 @@ namespace API.Tests [InlineData("Goblin Slayer Side Story - Year One 017.5", "17.5")] [InlineData("Beelzebub_53[KSH].zip", "53")] [InlineData("Black Bullet - v4 c20.5 [batoto]", "20.5")] + [InlineData("Itoshi no Karin - c001-006x1 (v01) [Renzokusei Scans]", "1-6")] + [InlineData("APOSIMZ 040 (2020) (Digital) (danke-Empire).cbz", "40")] + [InlineData("Vol 1", "0")] //[InlineData("[Tempus Edax Rerum] Epigraph of the Closed Curve - Chapter 6.zip", "6")] public void ParseChaptersTest(string filename, string expected) { @@ -174,11 +192,22 @@ namespace API.Tests [InlineData("12-14", 12)] [InlineData("24", 24)] [InlineData("18-04", 4)] - public void MinimumNumberFromRangeTest(string input, int expected) + [InlineData("18-04.5", 4.5)] + [InlineData("40", 40)] + public void MinimumNumberFromRangeTest(string input, float expected) { Assert.Equal(expected, MinimumNumberFromRange(input)); } - + + [Theory] + [InlineData("Darker Than Black", "darkerthanblack")] + [InlineData("Darker Than Black - Something", "darkerthanblacksomething")] + [InlineData("", "")] + public void NormalizeTest(string input, string expected) + { + Assert.Equal(expected, Normalize(input)); + } + [Fact] public void ParseInfoTest() @@ -241,6 +270,14 @@ namespace API.Tests FullFilePath = filepath }); + filepath = @"E:\Manga\APOSIMZ\APOSIMZ 040 (2020) (Digital) (danke-Empire).cbz"; + expected.Add(filepath, new ParserInfo + { + Series = "APOSIMZ", Volumes = "0", Edition = "", + Chapters = "40", Filename = "APOSIMZ 040 (2020) (Digital) (danke-Empire).cbz", Format = MangaFormat.Archive, + FullFilePath = filepath + }); + @@ -255,12 +292,20 @@ namespace API.Tests return; } Assert.NotNull(actual); + _testOutputHelper.WriteLine($"Validating {file}"); + _testOutputHelper.WriteLine("Format"); Assert.Equal(expectedInfo.Format, actual.Format); + _testOutputHelper.WriteLine("Series"); Assert.Equal(expectedInfo.Series, actual.Series); + _testOutputHelper.WriteLine("Chapters"); Assert.Equal(expectedInfo.Chapters, actual.Chapters); + _testOutputHelper.WriteLine("Volumes"); Assert.Equal(expectedInfo.Volumes, actual.Volumes); + _testOutputHelper.WriteLine("Edition"); Assert.Equal(expectedInfo.Edition, actual.Edition); + _testOutputHelper.WriteLine("Filename"); Assert.Equal(expectedInfo.Filename, actual.Filename); + _testOutputHelper.WriteLine("FullFilePath"); Assert.Equal(expectedInfo.FullFilePath, actual.FullFilePath); } } diff --git a/API.Tests/Services/ScannerServiceTests.cs b/API.Tests/Services/ScannerServiceTests.cs new file mode 100644 index 000000000..73463be5c --- /dev/null +++ b/API.Tests/Services/ScannerServiceTests.cs @@ -0,0 +1,7 @@ +namespace API.Tests.Services +{ + public class ScannerService + { + + } +} \ No newline at end of file diff --git a/API/Parser/Parser.cs b/API/Parser/Parser.cs index 671699c7d..60706bcbc 100644 --- a/API/Parser/Parser.cs +++ b/API/Parser/Parser.cs @@ -83,7 +83,10 @@ namespace API.Parser new Regex( @"(?.*)\(\d", RegexOptions.IgnoreCase | RegexOptions.Compiled), - + // Tonikaku Kawaii (Ch 59-67) (Ongoing) + new Regex( + @"(?.*)( |_)\((c |ch |chapter )", + RegexOptions.IgnoreCase | RegexOptions.Compiled), // Black Bullet (This is very loose, keep towards bottom) (?.*)(_)(v|vo|c|volume) new Regex( @"(?.*)(_)(v|vo|c|volume)( |_)\d+", @@ -118,9 +121,9 @@ namespace API.Parser @"v\d+\.(?\d+(?:.\d+|-\d+)?)", RegexOptions.IgnoreCase | RegexOptions.Compiled), - // Hinowa ga CRUSH! 018 (2019) (Digital) (LuCaZ).cbz,Hinowa ga CRUSH! 018.5 (2019) (Digital) (LuCaZ).cbz + // Hinowa ga CRUSH! 018 (2019) (Digital) (LuCaZ).cbz, Hinowa ga CRUSH! 018.5 (2019) (Digital) (LuCaZ).cbz new Regex( - @"(?.*) (?\d+(?:.\d+|-\d+)?)(?: \(\d{4}\))?", + @"^(?!Vol)(?.*) (?\d+(?:.\d+|-\d+)?)(?: \(\d{4}\))?", RegexOptions.IgnoreCase | RegexOptions.Compiled), // Tower Of God S01 014 (CBT) (digital).cbz new Regex( @@ -399,10 +402,15 @@ namespace API.Parser return ImageRegex.IsMatch(fileInfo.Extension); } - public static int MinimumNumberFromRange(string range) + public static float MinimumNumberFromRange(string range) { var tokens = range.Split("-"); - return tokens.Min(Int32.Parse); + return tokens.Min(float.Parse); + } + + public static string Normalize(string name) + { + return name.ToLower().Replace("-", "").Replace(" ", ""); } } } \ No newline at end of file