Comic Support (#119)

* Implemented some basic regex for comic support

* Implemented support for comics

* empty filenames, like .test.jpg shouldn't be counted as image types.

* Fixed some regex for Manga's with commas or version tags in parenthesis.
This commit is contained in:
Joseph Milazzo 2021-03-28 12:09:42 -05:00 committed by GitHub
parent 55d47eb1b9
commit 7e54d332f5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 255 additions and 16 deletions

View File

@ -52,6 +52,7 @@ namespace API.Tests
[InlineData("Mob Psycho 100 v02 (2019) (Digital) (Shizu).cbz", "2")]
[InlineData("Kodomo no Jikan vol. 1.cbz", "1")]
[InlineData("Kodomo no Jikan vol. 10.cbz", "10")]
[InlineData("Kedouin Makoto - Corpse Party Musume, Chapter 12 [Dametrans][v2]", "0")]
public void ParseVolumeTest(string filename, string expected)
{
Assert.Equal(expected, ParseVolume(filename));
@ -103,11 +104,12 @@ namespace API.Tests
[InlineData("Kedouin Makoto - Corpse Party Musume, Chapter 09", "Kedouin Makoto - Corpse Party Musume")]
[InlineData("Goblin Slayer Side Story - Year One 025.5", "Goblin Slayer Side Story - Year One")]
[InlineData("Goblin Slayer - Brand New Day 006.5 (2019) (Digital) (danke-Empire)", "Goblin Slayer - Brand New Day")]
[InlineData("Kedouin Makoto - Corpse Party Musume, Chapter 01 [Dametrans][v2]", "Kedouin Makoto - Corpse Party Musume")]
public void ParseSeriesTest(string filename, string expected)
{
Assert.Equal(expected, ParseSeries(filename));
}
[Theory]
[InlineData("Killing Bites Vol. 0001 Ch. 0001 - Galactica Scanlations (gb)", "1")]
[InlineData("My Girlfriend Is Shobitch v01 - ch. 09 - pg. 008.png", "9")]
@ -143,6 +145,7 @@ namespace API.Tests
[InlineData("Vol 1", "0")]
[InlineData("VanDread-v01-c001[MD].zip", "1")]
[InlineData("Goblin Slayer Side Story - Year One 025.5", "25.5")]
[InlineData("Kedouin Makoto - Corpse Party Musume, Chapter 01", "1")]
public void ParseChaptersTest(string filename, string expected)
{
Assert.Equal(expected, ParseChapter(filename));
@ -218,6 +221,73 @@ namespace API.Tests
{
Assert.Equal(expected, Normalize(input));
}
[Theory]
[InlineData("01 Spider-Man & Wolverine 01.cbr", "Spider-Man & Wolverine")]
[InlineData("04 - Asterix the Gladiator (1964) (Digital-Empire) (WebP by Doc MaKS)", "Asterix the Gladiator")]
[InlineData("The First Asterix Frieze (WebP by Doc MaKS)", "The First Asterix Frieze")]
[InlineData("Batman & Catwoman - Trail of the Gun 01", "Batman & Catwoman - Trail of the Gun")]
[InlineData("Batman & Daredevil - King of New York", "Batman & Daredevil - King of New York")]
[InlineData("Batman & Grendel (1996) 01 - Devil's Bones", "Batman & Grendel")]
[InlineData("Batman & Robin the Teen Wonder #0", "Batman & Robin the Teen Wonder")]
[InlineData("Batman & Wildcat (1 of 3)", "Batman & Wildcat")]
[InlineData("Batman And Superman World's Finest #01", "Batman And Superman World's Finest")]
[InlineData("Babe 01", "Babe")]
[InlineData("Scott Pilgrim 01 - Scott Pilgrim's Precious Little Life (2004)", "Scott Pilgrim")]
[InlineData("Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)", "Teen Titans")]
[InlineData("Scott Pilgrim 02 - Scott Pilgrim vs. The World (2005)", "Scott Pilgrim")]
public void ParseComicSeriesTest(string filename, string expected)
{
Assert.Equal(expected, ParseComicSeries(filename));
}
[Theory]
[InlineData("01 Spider-Man & Wolverine 01.cbr", "1")]
[InlineData("04 - Asterix the Gladiator (1964) (Digital-Empire) (WebP by Doc MaKS)", "4")]
[InlineData("The First Asterix Frieze (WebP by Doc MaKS)", "0")]
[InlineData("Batman & Catwoman - Trail of the Gun 01", "1")]
[InlineData("Batman & Daredevil - King of New York", "0")]
[InlineData("Batman & Grendel (1996) 01 - Devil's Bones", "1")]
[InlineData("Batman & Robin the Teen Wonder #0", "0")]
[InlineData("Batman & Wildcat (1 of 3)", "0")]
[InlineData("Batman And Superman World's Finest #01", "1")]
[InlineData("Babe 01", "1")]
[InlineData("Scott Pilgrim 01 - Scott Pilgrim's Precious Little Life (2004)", "1")]
[InlineData("Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)", "1")]
[InlineData("Scott Pilgrim 02 - Scott Pilgrim vs. The World (2005)", "2")]
public void ParseComicVolumeTest(string filename, string expected)
{
Assert.Equal(expected, ParseComicVolume(filename));
}
[Theory]
[InlineData("01 Spider-Man & Wolverine 01.cbr", "0")]
[InlineData("04 - Asterix the Gladiator (1964) (Digital-Empire) (WebP by Doc MaKS)", "0")]
[InlineData("The First Asterix Frieze (WebP by Doc MaKS)", "0")]
[InlineData("Batman & Catwoman - Trail of the Gun 01", "0")]
[InlineData("Batman & Daredevil - King of New York", "0")]
[InlineData("Batman & Grendel (1996) 01 - Devil's Bones", "0")]
[InlineData("Batman & Robin the Teen Wonder #0", "0")]
[InlineData("Batman & Wildcat (1 of 3)", "1")]
[InlineData("Batman & Wildcat (2 of 3)", "2")]
[InlineData("Batman And Superman World's Finest #01", "0")]
[InlineData("Babe 01", "0")]
[InlineData("Scott Pilgrim 01 - Scott Pilgrim's Precious Little Life (2004)", "0")]
[InlineData("Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)", "1")]
public void ParseComicChapterTest(string filename, string expected)
{
Assert.Equal(expected, ParseComicChapter(filename));
}
[Theory]
[InlineData("test.jpg", true)]
[InlineData("test.jpeg", true)]
[InlineData("test.png", true)]
[InlineData(".test.jpg", false)]
public void IsImageTest(string filename, bool expected)
{
Assert.Equal(expected, IsImage(filename));
}
[Fact]

View File

@ -16,8 +16,8 @@
<PackageReference Include="Hangfire.AspNetCore" Version="1.7.18" />
<PackageReference Include="Hangfire.MaximumConcurrentExecutions" Version="1.1.0" />
<PackageReference Include="Hangfire.MemoryStorage.Core" Version="1.4.0" />
<PackageReference Include="Microsoft.AspNetCore.Authentication.JwtBearer" Version="5.0.1"/>
<PackageReference Include="Microsoft.AspNetCore.Authentication.OpenIdConnect" Version="5.0.1"/>
<PackageReference Include="Microsoft.AspNetCore.Authentication.JwtBearer" Version="5.0.1" />
<PackageReference Include="Microsoft.AspNetCore.Authentication.OpenIdConnect" Version="5.0.1" />
<PackageReference Include="Microsoft.AspNetCore.Identity.EntityFrameworkCore" Version="5.0.1" />
<PackageReference Include="Microsoft.EntityFrameworkCore.Design" Version="5.0.1">
<PrivateAssets>all</PrivateAssets>

View File

@ -9,7 +9,7 @@ namespace API.Parser
public static class Parser
{
public static readonly string MangaFileExtensions = @"\.cbz|\.zip|\.rar|\.cbr|.tar.gz|.7zip";
public static readonly string ImageFileExtensions = @"\.png|\.jpeg|\.jpg";
public static readonly string ImageFileExtensions = @"^(\.png|\.jpeg|\.jpg)";
private static readonly string XmlRegexExtensions = @"\.xml";
private static readonly Regex ImageRegex = new Regex(ImageFileExtensions, RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex MangaFileRegex = new Regex(MangaFileExtensions, RegexOptions.IgnoreCase | RegexOptions.Compiled);
@ -24,7 +24,7 @@ namespace API.Parser
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Historys Strongest Disciple Kenichi_v11_c90-98.zip or Dance in the Vampire Bund v16-17
new Regex(
@"(?<Series>.*)(\b|_)v(?<Volume>\d+(-\d+)?)",
@"(?<Series>.*)(\b|_)(?!\[)v(?<Volume>\d+(-\d+)?)(?!\])",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Kodomo no Jikan vol. 10
new Regex(
@ -60,6 +60,10 @@ namespace API.Parser
new Regex(
@"(?<Series>.*) (\b|_|-)v",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Kedouin Makoto - Corpse Party Musume, Chapter 19 [Dametrans].zip
new Regex(
@"(?<Series>.*)(?:, Chapter )(?<Chapter>\d+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Tonikaku Cawaii [Volume 11], Darling in the FranXX - Volume 01.cbz
new Regex(
@"(?<Series>.*)(?: _|-|\[|\() ?v",
@ -81,10 +85,6 @@ namespace API.Parser
new Regex(
@"(?<Series>.*) (?<Chapter>\d+) (?:\(\d{4}\)) ",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Kedouin Makoto - Corpse Party Musume, Chapter 19 [Dametrans].zip
new Regex(
@"(?<Series>.*)(?:, Chapter )(?<Chapter>\d+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Goblin Slayer - Brand New Day 006.5 (2019) (Digital) (danke-Empire)
new Regex(
@"(?<Series>.*) (?<Chapter>\d+(?:.\d+|-\d+)?) \(\d{4}\)",
@ -110,6 +110,106 @@ namespace API.Parser
@"(?<Series>.*)( |_)(c)\d+",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
};
private static readonly Regex[] ComicSeriesRegex = new[]
{
// 04 - Asterix the Gladiator (1964) (Digital-Empire) (WebP by Doc MaKS)
new Regex(
@"^(?<Volume>\d+) (- |_)?(?<Series>.*(\d{4})?)( |_)(\(|\d+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// 01 Spider-Man & Wolverine 01.cbr
new Regex(
@"^(?<Volume>\d+) (?:- )?(?<Series>.*) (\d+)?",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Batman & Wildcat (1 of 3)
new Regex(
@"(?<Series>.*(\d{4})?)( |_)(?:\(\d+ of \d+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)
new Regex(
@"^(?<Series>.*)(?: |_)v\d+",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Batman & Catwoman - Trail of the Gun 01, Batman & Grendel (1996) 01 - Devil's Bones, Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)
new Regex(
@"^(?<Series>.*)(?: \d+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Batman & Robin the Teen Wonder #0
new Regex(
@"^(?<Series>.*)(?: |_)#\d+",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Scott Pilgrim 02 - Scott Pilgrim vs. The World (2005)
new Regex(
@"^(?<Series>.*)(?: |_)(?<Volume>\d+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// The First Asterix Frieze (WebP by Doc MaKS)
new Regex(
@"^(?<Series>.*)(?: |_)(?!\(\d{4}|\d{4}-\d{2}\))\(",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// MUST BE LAST: Batman & Daredevil - King of New York
new Regex(
@"^(?<Series>.*)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
};
private static readonly Regex[] ComicVolumeRegex = new[]
{
// 04 - Asterix the Gladiator (1964) (Digital-Empire) (WebP by Doc MaKS)
new Regex(
@"^(?<Volume>\d+) (- |_)?(?<Series>.*(\d{4})?)( |_)(\(|\d+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// 01 Spider-Man & Wolverine 01.cbr
new Regex(
@"^(?<Volume>\d+) (?:- )?(?<Series>.*) (\d+)?",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Batman & Wildcat (1 of 3)
new Regex(
@"(?<Series>.*(\d{4})?)( |_)(?:\((?<Chapter>\d+) of \d+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)
new Regex(
@"^(?<Series>.*)(?: |_)v(?<Volume>\d+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Scott Pilgrim 02 - Scott Pilgrim vs. The World (2005)
new Regex(
@"^(?<Series>.*)(?: |_)(?<Volume>\d+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Batman & Catwoman - Trail of the Gun 01, Batman & Grendel (1996) 01 - Devil's Bones, Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)
new Regex(
@"^(?<Series>.*)(?: (?<Volume>\d+))",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Batman & Robin the Teen Wonder #0
new Regex(
@"^(?<Series>.*)(?: |_)#(?<Volume>\d+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
};
private static readonly Regex[] ComicChapterRegex = new[]
{
// 04 - Asterix the Gladiator (1964) (Digital-Empire) (WebP by Doc MaKS)
new Regex(
@"^(?<Volume>\d+) (- |_)?(?<Series>.*(\d{4})?)( |_)(\(|\d+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// 01 Spider-Man & Wolverine 01.cbr
new Regex(
@"^(?<Volume>\d+) (?:- )?(?<Series>.*) (\d+)?",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Batman & Wildcat (1 of 3)
new Regex(
@"(?<Series>.*(\d{4})?)( |_)(?:\((?<Chapter>\d+) of \d+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)
new Regex(
@"^(?<Series>.*)(?: |_)v(?<Volume>\d+)(?: |_)(c? ?)(?<Chapter>\d+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Batman & Catwoman - Trail of the Gun 01, Batman & Grendel (1996) 01 - Devil's Bones, Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)
new Regex(
@"^(?<Series>.*)(?: (?<Volume>\d+))",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Batman & Robin the Teen Wonder #0
new Regex(
@"^(?<Series>.*)(?: |_)#(?<Volume>\d+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
};
private static readonly Regex[] ReleaseGroupRegex = new[]
{
@ -184,8 +284,9 @@ namespace API.Parser
/// </summary>
/// <param name="filePath"></param>
/// <param name="rootPath">Root folder</param>
/// <param name="type">Defaults to Manga. Allows different Regex to be used for parsing.</param>
/// <returns><see cref="ParserInfo"/> or null if Series was empty</returns>
public static ParserInfo Parse(string filePath, string rootPath)
public static ParserInfo Parse(string filePath, string rootPath, LibraryType type = LibraryType.Manga)
{
var fileName = Path.GetFileName(filePath);
var directoryName = (new FileInfo(filePath)).Directory?.Name;
@ -193,9 +294,9 @@ namespace API.Parser
var ret = new ParserInfo()
{
Chapters = ParseChapter(fileName),
Series = ParseSeries(fileName),
Volumes = ParseVolume(fileName),
Chapters = type == LibraryType.Manga ? ParseChapter(fileName) : ParseComicChapter(fileName),
Series = type == LibraryType.Manga ? ParseSeries(fileName) : ParseComicSeries(fileName),
Volumes = type == LibraryType.Manga ? ParseVolume(fileName) : ParseComicVolume(fileName),
Filename = fileName,
Format = ParseFormat(filePath),
FullFilePath = filePath
@ -261,6 +362,22 @@ namespace API.Parser
return string.Empty;
}
public static string ParseComicSeries(string filename)
{
foreach (var regex in ComicSeriesRegex)
{
var matches = regex.Matches(filename);
foreach (Match match in matches)
{
if (match.Groups["Series"].Success && match.Groups["Series"].Value != string.Empty)
{
return CleanTitle(match.Groups["Series"].Value);
}
}
}
return string.Empty;
}
public static string ParseVolume(string filename)
{
@ -283,6 +400,28 @@ namespace API.Parser
return "0";
}
public static string ParseComicVolume(string filename)
{
foreach (var regex in ComicVolumeRegex)
{
var matches = regex.Matches(filename);
foreach (Match match in matches)
{
if (match.Groups["Volume"] == Match.Empty) continue;
var value = match.Groups["Volume"].Value;
if (!value.Contains("-")) return RemoveLeadingZeroes(match.Groups["Volume"].Value);
var tokens = value.Split("-");
var from = RemoveLeadingZeroes(tokens[0]);
var to = RemoveLeadingZeroes(tokens[1]);
return $"{@from}-{to}";
}
}
return "0";
}
public static string ParseChapter(string filename)
{
@ -311,6 +450,34 @@ namespace API.Parser
return "0";
}
public static string ParseComicChapter(string filename)
{
foreach (var regex in ComicChapterRegex)
{
var matches = regex.Matches(filename);
foreach (Match match in matches)
{
if (match.Groups["Chapter"] != Match.Empty)
{
var value = match.Groups["Chapter"].Value;
if (value.Contains("-"))
{
var tokens = value.Split("-");
var from = RemoveLeadingZeroes(tokens[0]);
var to = RemoveLeadingZeroes(tokens[1]);
return $"{from}-{to}";
}
return RemoveLeadingZeroes(match.Groups["Chapter"].Value);
}
}
}
return "0";
}
private static string RemoveEditionTagHolders(string title)
{
@ -408,6 +575,7 @@ namespace API.Parser
public static bool IsImage(string filePath)
{
if (filePath.StartsWith(".")) return false;
return ImageRegex.IsMatch(Path.GetExtension(filePath));
}

View File

@ -98,7 +98,7 @@ namespace API.Services.Tasks
{
try
{
ProcessFile(f, folderPath.Path);
ProcessFile(f, folderPath.Path, library.Type);
}
catch (FileNotFoundException exception)
{
@ -333,9 +333,10 @@ namespace API.Services.Tasks
/// </summary>
/// <param name="path">Path of a file</param>
/// <param name="rootPath"></param>
private void ProcessFile(string path, string rootPath)
/// <param name="type">Library type to determine parsing to perform</param>
private void ProcessFile(string path, string rootPath, LibraryType type)
{
var info = Parser.Parser.Parse(path, rootPath);
var info = Parser.Parser.Parse(path, rootPath, type);
if (info == null)
{