More Parser tests and more cases! Added ability to parse Editions for Manga (Omnibus, Color, etc). To be stripped from Series if present. Future can be stored on MangaFile.

This commit is contained in:
Joseph Milazzo 2021-01-24 08:34:57 -06:00
parent 9030b8de96
commit a315feb569
5 changed files with 125 additions and 31 deletions

View File

@ -1,4 +1,3 @@
using API.Parser;
using Xunit;
using static API.Parser.Parser;
@ -20,6 +19,13 @@ namespace API.Tests
[InlineData("U12 (Under 12) Vol. 0001 Ch. 0001 - Reiwa Scans (gb)", "1")]
[InlineData("[Suihei Kiki]_Kasumi_Otoko_no_Ko_[Taruby]_v1.1.zip", "1")]
[InlineData("Tonikaku Cawaii [Volume 11].cbz", "11")]
[InlineData("[WS]_Ichiban_Ushiro_no_Daimaou_v02_ch10.zip", "2")]
[InlineData("[xPearse] Kyochuu Rettou Volume 1 [English] [Manga] [Volume Scans]", "1")]
[InlineData("Tower Of God S01 014 (CBT) (digital).cbz", "1")]
[InlineData("Tenjou_Tenge_v17_c100[MT].zip", "17")]
[InlineData("Shimoneta - Manmaru Hen - c001-006 (v01) [Various].zip", "1")]
[InlineData("Future Diary v02 (2009) (Digital) (Viz).cbz", "2")]
[InlineData("Mujaki no Rakuen Vol12 ch76", "12")]
public void ParseVolumeTest(string filename, string expected)
{
Assert.Equal(expected, ParseVolume(filename));
@ -45,6 +51,17 @@ namespace API.Tests
[InlineData("Darwin's Game - Volume 14 (F).cbz", "Darwin's Game")]
[InlineData("[BAA]_Darker_than_Black_c7.zip", "Darker than Black")]
[InlineData("Kedouin Makoto - Corpse Party Musume, Chapter 19 [Dametrans].zip", "Kedouin Makoto - Corpse Party Musume")]
[InlineData("[WS]_Ichiban_Ushiro_no_Daimaou_v02_ch10.zip", "Ichiban Ushiro no Daimaou")]
[InlineData("[xPearse] Kyochuu Rettou Volume 1 [English] [Manga] [Volume Scans]", "Kyochuu Rettou")]
[InlineData("Loose_Relation_Between_Wizard_and_Apprentice_c07[AN].zip", "Loose Relation Between Wizard and Apprentice")]
[InlineData("Tower Of God S01 014 (CBT) (digital).cbz", "Tower Of God")]
[InlineData("Tenjou_Tenge_c106[MT].zip", "Tenjou Tenge")]
[InlineData("Tenjou_Tenge_v17_c100[MT].zip", "Tenjou Tenge")]
[InlineData("Shimoneta - Manmaru Hen - c001-006 (v01) [Various].zip", "Shimoneta - Manmaru Hen")]
[InlineData("Future Diary v02 (2009) (Digital) (Viz).cbz", "Future Diary")]
[InlineData("Tonikaku Cawaii [Volume 11].cbz", "Tonikaku Cawaii")]
[InlineData("Mujaki no Rakuen Vol12 ch76", "Mujaki no Rakuen")]
[InlineData("Knights of Sidonia c000 (S2 LE BD Omake - BLAME!) [Habanero Scans]", "Knights of Sidonia")]
public void ParseSeriesTest(string filename, string expected)
{
Assert.Equal(expected, ParseSeries(filename));
@ -63,6 +80,13 @@ namespace API.Tests
[InlineData("Adding volume 1 with File: Ana Satsujin Vol. 1 Ch. 5 - Manga Box (gb).cbz", "5")]
[InlineData("Hinowa ga CRUSH! 018 (2019) (Digital) (LuCaZ).cbz", "18")]
[InlineData("Cynthia The Mission - c000-006 (v06) [Desudesu&Brolen].zip", "0-6")]
[InlineData("[WS]_Ichiban_Ushiro_no_Daimaou_v02_ch10.zip", "10")]
[InlineData("Loose_Relation_Between_Wizard_and_Apprentice_c07[AN].zip", "7")]
[InlineData("Tower Of God S01 014 (CBT) (digital).cbz", "14")]
[InlineData("Tenjou_Tenge_c106[MT].zip", "106")]
[InlineData("Tenjou_Tenge_v17_c100[MT].zip", "100")]
[InlineData("Shimoneta - Manmaru Hen - c001-006 (v01) [Various].zip", "1-6")]
[InlineData("Mujaki no Rakuen Vol12 ch76", "76")]
public void ParseChaptersTest(string filename, string expected)
{
Assert.Equal(expected, ParseChapter(filename));
@ -92,6 +116,7 @@ namespace API.Tests
[InlineData("Hello_I_am_here ", "Hello I am here")]
[InlineData("[ReleaseGroup] The Title", "The Title")]
[InlineData("[ReleaseGroup]_The_Title", "The Title")]
[InlineData("[Suihei Kiki]_Kasumi_Otoko_no_Ko_[Taruby]_v1.1", "Kasumi Otoko no Ko v1.1")]
public void CleanTitleTest(string input, string expected)
{
Assert.Equal(expected, CleanTitle(input));
@ -108,5 +133,14 @@ namespace API.Tests
{
Assert.Equal(expected, IsArchive(input));
}
[Theory]
[InlineData("Tenjou Tenge Omnibus", "Omnibus")]
[InlineData("Tenjou Tenge {Full Contact Edition}", "Full Contact Edition")]
[InlineData("Tenjo Tenge {Full Contact Edition} v01 (2011) (Digital) (ASTC).cbz", "Full Contact Edition")]
public void ParseEditionTest(string input, string expected)
{
Assert.Equal(expected, ParseEdition(input));
}
}
}

View File

@ -0,0 +1,7 @@
namespace API.Tests.Services
{
public class ScannerServiceTests
{
// TODO: Start adding tests for how scanner works so we can ensure fallbacks, etc work
}
}

View File

@ -13,14 +13,13 @@ namespace API.Parser
//?: is a non-capturing group in C#, else anything in () will be a group
private static readonly Regex[] MangaVolumeRegex = new[]
{
// Historys Strongest Disciple Kenichi_v11_c90-98.zip
// Historys Strongest Disciple Kenichi_v11_c90-98.zip or Dance in the Vampire Bund v16-17
new Regex(
@"(?<Series>.*)(\b|_)v(?<Volume>\d+)",
@"(?<Series>.*)(\b|_)v(?<Volume>\d+-?\d*)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Killing Bites Vol. 0001 Ch. 0001 - Galactica Scanlations (gb)
new Regex(
@"(vol. ?)(?<Volume>0*[1-9]+)",
@"(vol\.? ?)(?<Volume>0*[1-9]+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Tonikaku Cawaii [Volume 11].cbz
new Regex(
@ -28,11 +27,11 @@ namespace API.Parser
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Dance in the Vampire Bund v16-17
new Regex(
@"(?<Series>.*)(\b|_)v(?<Volume>\d+-?\d+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Tower Of God S01 014 (CBT) (digital).cbz
new Regex(
@"(?:v)(?<Volume>0*[1-9]+)",
@"(?<Series>.*)(\b|_|)(S(?<Volume>\d+))",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
};
@ -46,21 +45,33 @@ namespace API.Parser
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Historys Strongest Disciple Kenichi_v11_c90-98.zip, Killing Bites Vol. 0001 Ch. 0001 - Galactica Scanlations (gb)
new Regex(
@"(?<Series>.*)(\b|_)v",
@"(?<Series>.*) (\b|_|-)v",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Tonikaku Cawaii [Volume 11], Darling in the FranXX - Volume 01.cbz
new Regex(
@"(?<Series>.*)(?: _|-|\[|\() ?v",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Knights of Sidonia c000 (S2 LE BD Omake - BLAME!) [Habanero Scans]
new Regex(
@"(?<Series>.*)(\bc\d+\b)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//[Suihei Kiki]_Kasumi_Otoko_no_Ko_[Taruby]_v1.1.zip
new Regex(
@"(?<Series>.*)(v|s)\d+(-\d+)?",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Hinowa ga CRUSH! 018 (2019) (Digital) (LuCaZ).cbz
new Regex(
@"(?<Series>.*) (?<Chapter>\d+) (?:\(\d{4}\)) ",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Akame ga KILL! ZERO (2016-2019) (Digital) (LuCaZ)
new Regex(
@"(?<Series>.*)\(\d",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Kedouin Makoto - Corpse Party Musume, Chapter 19 [Dametrans].zip
new Regex(
@"(?<Series>.*)(?:, Chapter )(?<Chapter>\d+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Akame ga KILL! ZERO (2016-2019) (Digital) (LuCaZ)
new Regex(
@"(?<Series>.*)\(\d",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Black Bullet (This is very loose, keep towards bottom)
new Regex(
@ -104,15 +115,30 @@ namespace API.Parser
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Hinowa ga CRUSH! 018 (2019) (Digital) (LuCaZ).cbz
new Regex(
@"(?<Series>.*) (?<Chapter>\d+) (?:\(\d{4}\)) ",
@"(?<Series>.*) (?<Chapter>\d+) (?:\(\d{4}\))",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Tower Of God S01 014 (CBT) (digital).cbz
new Regex(
@"(?<Series>.*) S(?<Volume>\d+) (?<Chapter>\d+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
};
private static readonly Regex[] MangaEditionRegex = new[]
{
//Tenjo Tenge {Full Contact Edition} v01 (2011) (Digital) (ASTC).cbz
new Regex(
@"(?<Edition>({|\(|\[).* Edition(}|\)|\]))",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Tenjo Tenge {Full Contact Edition} v01 (2011) (Digital) (ASTC).cbz
new Regex(
@"(\b|_)(?<Edition>Omnibus)(\b|_)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
};
public static ParserInfo Parse(string filePath)
{
return new ParserInfo()
var ret = new ParserInfo()
{
Chapters = ParseChapter(filePath),
Series = ParseSeries(filePath),
@ -120,6 +146,12 @@ namespace API.Parser
Filename = filePath,
Format = ParseFormat(filePath)
};
var edition = ParseEdition(filePath);
if (edition != string.Empty) ret.Series = ret.Series.Replace(edition, "");
ret.Edition = edition;
return ret;
}
public static MangaFormat ParseFormat(string filePath)
@ -128,6 +160,27 @@ namespace API.Parser
if (IsImage(filePath)) return MangaFormat.Image;
return MangaFormat.Unknown;
}
public static string ParseEdition(string filePath)
{
foreach (var regex in MangaEditionRegex)
{
var matches = regex.Matches(filePath);
foreach (Match match in matches)
{
if (match.Groups["Edition"].Success && match.Groups["Edition"].Value != string.Empty)
{
var edition = match.Groups["Edition"].Value.Replace("{", "").Replace("}", "")
.Replace("[", "").Replace("]", "").Replace("(", "").Replace(")", "");
return CleanTitle(edition);
}
}
}
Console.WriteLine("Unable to parse Edition of {0}", filePath);
return string.Empty;
}
public static string ParseSeries(string filename)
{
@ -136,16 +189,10 @@ namespace API.Parser
var matches = regex.Matches(filename);
foreach (Match match in matches)
{
// if (match.Groups["Series"] != Match.Empty)
// {
// return CleanTitle(match.Groups["Series"].Value);
// }
if (match.Groups["Series"].Success && match.Groups["Series"].Value != string.Empty)
{
return CleanTitle(match.Groups["Series"].Value);
}
//
}
}
@ -228,7 +275,7 @@ namespace API.Parser
{
title = title.Substring(0, title.Length - 1);
}
return title.Trim();
}

View File

@ -16,8 +16,13 @@ namespace API.Parser
public string Filename { get; init; }
public string FullFilePath { get; set; }
/// <summary>
/// Raw (image), Archive
/// <see cref="MangaFormat"/> that represents the type of the file (so caching service knows how to cache for reading)
/// </summary>
public MangaFormat Format { get; set; }
/// <summary>
/// This can potentially story things like "Omnibus, Color, Full Contact Edition, Extra, Final, etc"
/// </summary>
public string Edition { get; set; }
}
}

View File

@ -137,6 +137,7 @@ namespace API.Services
{
var fileName = Path.GetFileName(path);
//var directoryName = (new FileInfo(path)).Directory?.Name;
//TODO: Implement fallback for no series information here
_logger.LogDebug($"Parsing file {fileName}");
@ -207,7 +208,8 @@ namespace API.Services
};
}
private int MinimumNumberFromRange(string range)
// TODO: Implement Test
public int MinimumNumberFromRange(string range)
{
var tokens = range.Split("-");
return Int32.Parse(tokens.Length >= 1 ? tokens[0] : range);
@ -274,7 +276,7 @@ namespace API.Services
}
}
Console.WriteLine($"Adding volume {volumes.Last().Number} with File: {info.Filename}");
_logger.LogInformation($"Adding volume {volumes.Last().Number} with File: {info.Filename}");
}
foreach (var volume in volumes)
@ -321,13 +323,13 @@ namespace API.Services
/// <param name="filepath"></param>
/// <param name="createThumbnail">Create a smaller variant of file extracted from archive. Archive images are usually 1MB each.</param>
/// <returns></returns>
public static byte[] GetCoverImage(string filepath, bool createThumbnail = false)
public byte[] GetCoverImage(string filepath, bool createThumbnail = false)
{
try
{
if (string.IsNullOrEmpty(filepath) || !File.Exists(filepath) || !Parser.Parser.IsArchive(filepath)) return Array.Empty<byte>();
Console.WriteLine($"Extracting Cover image from {filepath}");
_logger.LogDebug($"Extracting Cover image from {filepath}");
using ZipArchive archive = ZipFile.OpenRead(filepath);
if (!archive.HasFiles()) return Array.Empty<byte>();
@ -358,16 +360,15 @@ namespace API.Services
}
catch (Exception ex)
{
Console.WriteLine("There was a critical error and prevented thumbnail generation.");
Console.WriteLine(ex.Message);
_logger.LogError(ex, "There was a critical error and prevented thumbnail generation.");
}
}
return ExtractEntryToImage(entry);
}
catch (Exception e)
catch (Exception ex)
{
Console.WriteLine(e);
_logger.LogError(ex, "There was an exception when reading archive stream.");
return Array.Empty<byte>();
}
}