From 324f3d2d4f9281c7a6aef0f6acc2ca4d5bf40038 Mon Sep 17 00:00:00 2001 From: DeltaLaboratory Date: Sun, 25 Sep 2022 05:40:13 +0900 Subject: [PATCH] Extended Korean Filename Parsing Support (#1556) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Added Some Korean Volume Matches * Fixed Typo And Added Test Cases * Restore Chapter Decimal Support * Added Decimal Volume Support to -권, -화, -회 and -장 Merged -권 Pattern to -화, -회, -장 Pattern Added Decimal Test to ParseVolumeTest * Grouped Korean Tests * Fixed Regexp Comment --- API.Tests/Parser/MangaParserTests.cs | 10 ++++++++-- API/Services/Tasks/Scanner/Parser/Parser.cs | 14 +++++++------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/API.Tests/Parser/MangaParserTests.cs b/API.Tests/Parser/MangaParserTests.cs index f96620378..89b1112f5 100644 --- a/API.Tests/Parser/MangaParserTests.cs +++ b/API.Tests/Parser/MangaParserTests.cs @@ -1,3 +1,4 @@ +using System.Runtime.InteropServices; using API.Entities.Enums; using Xunit; using Xunit.Abstractions; @@ -68,8 +69,6 @@ public class MangaParserTests [InlineData("幽游白书完全版 第03卷 天下", "3")] [InlineData("阿衰online 第1册", "1")] [InlineData("【TFO汉化&Petit汉化】迷你偶像漫画卷2第25话", "2")] - [InlineData("63권#200", "63")] - [InlineData("시즌34삽화2", "34")] [InlineData("スライム倒して300年、知らないうちにレベルMAXになってました 1巻", "1")] [InlineData("スライム倒して300年、知らないうちにレベルMAXになってました 1-3巻", "1-3")] [InlineData("Dance in the Vampire Bund {Special Edition} v03.5 (2019) (Digital) (KG Manga)", "3.5")] @@ -77,6 +76,12 @@ public class MangaParserTests [InlineData("Манга Глава 2", "0")] [InlineData("Манга Тома 1-4", "1-4")] [InlineData("Манга Том 1-4", "1-4")] + [InlineData("조선왕조실톡 106화", "106")] + [InlineData("죽음 13회", "13")] + [InlineData("동의보감 13장", "13")] + [InlineData("몰?루 아카이브 7.5권", "7.5")] + [InlineData("63권#200", "63")] + [InlineData("시즌34삽화2", "34")] public void ParseVolumeTest(string filename, string expected) { Assert.Equal(expected, API.Services.Tasks.Scanner.Parser.Parser.ParseVolume(filename)); @@ -265,6 +270,7 @@ public class MangaParserTests [InlineData("Kaiju No. 8 036 (2021) (Digital)", "36")] [InlineData("Samurai Jack Vol. 01 - The threads of Time", "0")] [InlineData("【TFO汉化&Petit汉化】迷你偶像漫画第25话", "25")] + [InlineData("자유록 13회#2", "13")] [InlineData("이세계에서 고아원을 열었지만, 어째서인지 아무도 독립하려 하지 않는다 38-1화 ", "38")] [InlineData("[ハレム]ナナとカオル ~高校生のSMごっこ~ 第10話", "10")] [InlineData("Dance in the Vampire Bund {Special Edition} v03.5 (2019) (Digital) (KG Manga)", "0")] diff --git a/API/Services/Tasks/Scanner/Parser/Parser.cs b/API/Services/Tasks/Scanner/Parser/Parser.cs index cf292d66c..79d1f675a 100644 --- a/API/Services/Tasks/Scanner/Parser/Parser.cs +++ b/API/Services/Tasks/Scanner/Parser/Parser.cs @@ -119,9 +119,9 @@ public static class Parser new Regex( @"(卷|册)(?\d+)", MatchOptions, RegexTimeout), - // Korean Volume: 제n권 -> Volume n, n권 -> Volume n, 63권#200.zip -> Volume 63 (no chapter, #200 is just files inside) + // Korean Volume: 제n화|권|회|장 -> Volume n, n화|권|회|장 -> Volume n, 63권#200.zip -> Volume 63 (no chapter, #200 is just files inside) new Regex( - @"제?(?\d+)권", + @"제?(?\d+(\.\d)?)(권|회|화|장)", MatchOptions, RegexTimeout), // Korean Season: 시즌n -> Season n, new Regex( @@ -557,7 +557,7 @@ public static class Parser MatchOptions, RegexTimeout), // Korean Chapter: 제n화 -> Chapter n, 가디언즈 오브 갤럭시 죽음의 보석.E0008.7화#44 new Regex( - @"제?(?\d+\.?\d+)(화|장)", + @"제?(?\d+\.?\d+)(회|화|장)", MatchOptions, RegexTimeout), // Korean Chapter: 第10話 -> Chapter n, [ハレム]ナナとカオル ~高校生のSMごっこ~ 第1話 new Regex( @@ -698,12 +698,12 @@ public static class Parser foreach (var regex in MangaVolumeRegex) { var matches = regex.Matches(filename); - foreach (Match match in matches) + foreach (var group in matches.Select(match => match.Groups)) { - if (!match.Groups["Volume"].Success || match.Groups["Volume"] == Match.Empty) continue; + if (!group["Volume"].Success || group["Volume"] == Match.Empty) continue; - var value = match.Groups["Volume"].Value; - var hasPart = match.Groups["Part"].Success; + var value = group["Volume"].Value; + var hasPart = group["Part"].Success; return FormatValue(value, hasPart); } }