Misc Fixes (#839)

* Fixed a case where chapter was being parsed incorrectly when the series title ends in a number.

* Updated Kavita to support Tome/T notation found in French comics

* Added support for identifying European specials and expanded support for cleaning some tags used in European comics. During cleaning, if series starts with - or comma, remove it.

* Fixed an issue where add to collection for a single series wasn't calling the bulk action handler

* Fixed a NPE on AgeRating conversion. Fixed a bug where when looking for cover image, file extensions was throwing off sort code.

* Refactored Natural Sort ordering to better follow how Windows behaves. This is a departure from how the original code executes.

* GetCachedPagePath now uses natural sorting to pick the images for reading in a more correct order.

* Updated parser to handle a case where there was more than one space as a separator
This commit is contained in:
Joseph Milazzo 2021-12-08 13:27:54 -06:00 committed by GitHub
parent b3e4a7caa6
commit 3b90ef96b5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 146 additions and 47 deletions

View File

@ -40,22 +40,24 @@ namespace API.Tests.Comparers
)]
[InlineData(
new[] {"3and4.cbz", "The World God Only Knows - Oneshot.cbz", "5.cbz", "1and2.cbz"},
new[] {"1and2.cbz", "3and4.cbz", "5.cbz", "The World God Only Knows - Oneshot.cbz"}
new[] {"The World God Only Knows - Oneshot.cbz", "1and2.cbz", "3and4.cbz", "5.cbz"}
)]
[InlineData(
new[] {"Solo Leveling - c000 (v01) - p000 [Cover] [dig] [Yen Press] [LuCaZ].jpg", "Solo Leveling - c000 (v01) - p001 [dig] [Yen Press] [LuCaZ].jpg", "Solo Leveling - c000 (v01) - p002 [dig] [Yen Press] [LuCaZ].jpg", "Solo Leveling - c000 (v01) - p003 [dig] [Yen Press] [LuCaZ].jpg"},
new[] {"Solo Leveling - c000 (v01) - p000 [Cover] [dig] [Yen Press] [LuCaZ].jpg", "Solo Leveling - c000 (v01) - p001 [dig] [Yen Press] [LuCaZ].jpg", "Solo Leveling - c000 (v01) - p002 [dig] [Yen Press] [LuCaZ].jpg", "Solo Leveling - c000 (v01) - p003 [dig] [Yen Press] [LuCaZ].jpg"}
)]
[InlineData(
new[] {"Marvel2In1-7", "Marvel2In1-7-01", "Marvel2In1-7-02"},
new[] {"Marvel2In1-7", "Marvel2In1-7-01", "Marvel2In1-7-02"}
)]
[InlineData(
new[] {"!001", "001", "002"},
new[] {"!001", "001", "002"}
)]
public void TestNaturalSortComparer(string[] input, string[] expected)
{
Array.Sort(input, _nc);
var i = 0;
foreach (var s in input)
{
Assert.Equal(s, expected[i]);
i++;
}
Assert.Equal(expected, input);
}

View File

@ -0,0 +1,20 @@
using System.IO;
using Xunit;
using API.Extensions;
namespace API.Tests.Extensions;
public class PathExtensionsTests
{
#region GetFullPathWithoutExtension
[Theory]
[InlineData("joe.png", "joe")]
[InlineData("c:/directory/joe.png", "c:/directory/joe")]
public void GetFullPathWithoutExtension_Test(string input, string expected)
{
Assert.Equal(Path.GetFullPath(expected), input.GetFullPathWithoutExtension());
}
#endregion
}

View File

@ -70,6 +70,13 @@ namespace API.Tests.Parser
[InlineData("Green Lantern v2 017 - The Spy-Eye that doomed Green Lantern v2", "Green Lantern")]
[InlineData("Green Lantern - Circle of Fire Special - Adam Strange (2000)", "Green Lantern - Circle of Fire - Adam Strange")]
[InlineData("Identity Crisis Extra - Rags Morales Sketches (2005)", "Identity Crisis - Rags Morales Sketches")]
[InlineData("Daredevil - t6 - 10 - (2019)", "Daredevil")]
[InlineData("Batgirl T2000 #57", "Batgirl")]
[InlineData("Teen Titans t1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)", "Teen Titans")]
[InlineData("Conquistador_-Tome_2", "Conquistador")]
[InlineData("Max_l_explorateur-_Tome_0", "Max l explorateur")]
[InlineData("Chevaliers d'Héliopolis T3 - Rubedo, l'oeuvre au rouge (Jodorowsky & Jérémy)", "Chevaliers d'Héliopolis")]
[InlineData("Bd Fr-Aldebaran-Antares-t6", "Aldebaran-Antares")]
public void ParseComicSeriesTest(string filename, string expected)
{
Assert.Equal(expected, API.Parser.Parser.ParseComicSeries(filename));
@ -108,6 +115,13 @@ namespace API.Tests.Parser
[InlineData("Cyberpunk 2077 - Trauma Team 04.cbz", "0")]
[InlineData("2000 AD 0366 [1984-04-28] (flopbie)", "0")]
[InlineData("Daredevil - v6 - 10 - (2019)", "6")]
// Tome Tests
[InlineData("Daredevil - t6 - 10 - (2019)", "6")]
[InlineData("Batgirl T2000 #57", "2000")]
[InlineData("Teen Titans t1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)", "1")]
[InlineData("Conquistador_Tome_2", "2")]
[InlineData("Max_l_explorateur-_Tome_0", "0")]
[InlineData("Chevaliers d'Héliopolis T3 - Rubedo, l'oeuvre au rouge (Jodorowsky & Jérémy)", "3")]
public void ParseComicVolumeTest(string filename, string expected)
{
Assert.Equal(expected, API.Parser.Parser.ParseComicVolume(filename));
@ -162,6 +176,10 @@ namespace API.Tests.Parser
[InlineData("Zombie Tramp vs. Vampblade TPB (2016) (Digital) (TheArchivist-Empire)", true)]
[InlineData("Baldwin the Brave & Other Tales Special SP1.cbr", true)]
[InlineData("Mouse Guard Specials - Spring 1153 - Fraggle Rock FCBD 2010", true)]
[InlineData("Boule et Bill - THS -Bill à disparu", true)]
[InlineData("Asterix - HS - Les 12 travaux d'Astérix", true)]
[InlineData("Sillage Hors Série - Le Collectionneur - Concordance-DKFR", true)]
[InlineData("laughs", false)]
public void ParseComicSpecialTest(string input, bool expected)
{
Assert.Equal(expected, !string.IsNullOrEmpty(API.Parser.Parser.ParseComicSpecial(input)));

View File

@ -28,6 +28,7 @@ public class DefaultParserTests
[InlineData("C:/", "C:/Love Hina/Love Hina - Special.cbz", "Love Hina")]
[InlineData("C:/", "C:/Love Hina/Specials/Ani-Hina Art Collection.cbz", "Love Hina")]
[InlineData("C:/", "C:/Mujaki no Rakuen Something/Mujaki no Rakuen Vol12 ch76.cbz", "Mujaki no Rakuen")]
[InlineData("C:/", "C:/Something Random/Mujaki no Rakuen SP01.cbz", "Something Random")]
public void ParseFromFallbackFolders_FallbackShouldParseSeries(string rootDir, string inputPath, string expectedSeries)
{
var actual = _defaultParser.Parse(inputPath, rootDir);

View File

@ -167,6 +167,8 @@ namespace API.Tests.Parser
[InlineData("Great_Teacher_Onizuka_v16[TheSpectrum]", "Great Teacher Onizuka")]
[InlineData("[Renzokusei]_Kimi_wa_Midara_na_Boku_no_Joou_Ch5_Final_Chapter", "Kimi wa Midara na Boku no Joou")]
[InlineData("Battle Royale, v01 (2000) [TokyoPop] [Manga-Sketchbook]", "Battle Royale")]
[InlineData("Kaiju No. 8 036 (2021) (Digital)", "Kaiju No. 8")]
[InlineData("Seraph of the End - Vampire Reign 093 (2020) (Digital) (LuCaZ).cbz", "Seraph of the End - Vampire Reign")]
public void ParseSeriesTest(string filename, string expected)
{
Assert.Equal(expected, API.Parser.Parser.ParseSeries(filename));
@ -240,6 +242,7 @@ namespace API.Tests.Parser
[InlineData("Deku_&_Bakugo_-_Rising_v1_c1.1.cbz", "1.1")]
[InlineData("Chapter 63 - The Promise Made for 520 Cenz.cbr", "63")]
[InlineData("Harrison, Kim - The Good, The Bad, and the Undead - Hollows Vol 2.5.epub", "0")]
[InlineData("Kaiju No. 8 036 (2021) (Digital)", "36")]
public void ParseChaptersTest(string filename, string expected)
{
Assert.Equal(expected, API.Parser.Parser.ParseChapter(filename));

View File

@ -49,6 +49,8 @@ namespace API.Tests.Parser
[InlineData("Hello_I_am_here ", false, "Hello I am here")]
[InlineData("[ReleaseGroup] The Title", false, "The Title")]
[InlineData("[ReleaseGroup]_The_Title", false, "The Title")]
[InlineData("-The Title", false, "The Title")]
[InlineData("- The Title", false, "The Title")]
[InlineData("[Suihei Kiki]_Kasumi_Otoko_no_Ko_[Taruby]_v1.1", false, "Kasumi Otoko no Ko v1.1")]
[InlineData("Batman - Detective Comics - Rebirth Deluxe Edition Book 04 (2019) (digital) (Son of Ultron-Empire)", true, "Batman - Detective Comics - Rebirth Deluxe Edition")]
public void CleanTitleTest(string input, bool isComic, string expected)

View File

@ -44,17 +44,18 @@ namespace API.Comparators
for (var i = 0; i < x1.Length && i < y1.Length; i++)
{
if (x1[i] == y1[i]) continue;
if (x1[i] == Empty || y1[i] == Empty) continue;
returnVal = PartCompare(x1[i], y1[i]);
return _isAscending ? returnVal : -returnVal;
}
if (y1.Length > x1.Length)
{
returnVal = 1;
returnVal = -1;
}
else if (x1.Length > y1.Length)
{
returnVal = -1;
returnVal = 1;
}
else
{

View File

@ -74,6 +74,7 @@ namespace API.Data.Metadata
public static AgeRating ConvertAgeRatingToEnum(string value)
{
if (string.IsNullOrEmpty(value)) return Entities.Enums.AgeRating.Unknown;
return Enum.GetValues<AgeRating>()
.SingleOrDefault(t => t.ToDescription().ToUpperInvariant().Equals(value.ToUpperInvariant()), Entities.Enums.AgeRating.Unknown);
}

View File

@ -0,0 +1,13 @@
using System.IO;
namespace API.Extensions;
public static class PathExtensions
{
public static string GetFullPathWithoutExtension(this string filepath)
{
if (string.IsNullOrEmpty(filepath)) return filepath;
var extension = Path.GetExtension(filepath);
return Path.GetFullPath(filepath.Replace(extension, string.Empty));
}
}

View File

@ -166,7 +166,7 @@ namespace API.Parser
MatchOptions, RegexTimeout),
// Hinowa ga CRUSH! 018 (2019) (Digital) (LuCaZ).cbz
new Regex(
@"(?<Series>.*) (?<Chapter>\d+) (?:\(\d{4}\)) ",
@"(?<Series>.*)\s+(?<Chapter>\d+)\s+(?:\(\d{4}\))\s",
MatchOptions, RegexTimeout),
// Goblin Slayer - Brand New Day 006.5 (2019) (Digital) (danke-Empire)
new Regex(
@ -209,7 +209,6 @@ namespace API.Parser
new Regex(
@"^(?!Vol\.?)(?<Series>.*)( |_|-)(?<!-)(episode|chapter|(ch\.?) ?)\d+-?\d*",
MatchOptions, RegexTimeout),
// Baketeriya ch01-05.zip
new Regex(
@"^(?!Vol)(?<Series>.*)ch\d+-?\d?",
@ -240,7 +239,7 @@ namespace API.Parser
{
// Invincible Vol 01 Family matters (2005) (Digital)
new Regex(
@"(?<Series>.*)(\b|_)(vol\.?)( |_)(?<Volume>\d+(-\d+)?)",
@"(?<Series>.*)(\b|_)((vol|tome|t)\.?)( |_)(?<Volume>\d+(-\d+)?)",
MatchOptions, RegexTimeout),
// Batman Beyond 2.0 001 (2013)
new Regex(
@ -258,9 +257,9 @@ namespace API.Parser
new Regex(
@"(?<Series>.*(\d{4})?)( |_)(?:\((?<Volume>\d+) of \d+)",
MatchOptions, RegexTimeout),
// Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)
// Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus), Aldebaran-Antares-t6
new Regex(
@"^(?<Series>.+?)(?: |_)v\d+",
@"^(?<Series>.+?)(?: |_|-)(v|t)\d+",
MatchOptions, RegexTimeout),
// Amazing Man Comics chapter 25
new Regex(
@ -308,11 +307,11 @@ namespace API.Parser
{
// Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)
new Regex(
@"^(?<Series>.*)(?: |_)v(?<Volume>\d+)",
@"^(?<Series>.*)(?: |_)(t|v)(?<Volume>\d+)",
MatchOptions, RegexTimeout),
// Batgirl Vol.2000 #57 (December, 2004)
new Regex(
@"^(?<Series>.+?)(?:\s|_)vol\.?\s?(?<Volume>\d+)",
@"^(?<Series>.+?)(?:\s|_)(v|vol|tome|t)\.?(\s|_)?(?<Volume>\d+)",
MatchOptions, RegexTimeout),
};
@ -409,7 +408,7 @@ namespace API.Parser
MatchOptions, RegexTimeout),
// Hinowa ga CRUSH! 018 (2019) (Digital) (LuCaZ).cbz, Hinowa ga CRUSH! 018.5 (2019) (Digital) (LuCaZ).cbz
new Regex(
@"^(?!Vol)(?<Series>.+?)(?<!Vol)\.?\s(?<Chapter>\d+(?:.\d+|-\d+)?)(?:\s\(\d{4}\))?(\b|_|-)",
@"^(?!Vol)(?<Series>.+?)(?<!Vol)\.?\s(\d\s)?(?<Chapter>\d+(?:\.\d+|-\d+)?)(?:\s\(\d{4}\))?(\b|_|-)",
MatchOptions, RegexTimeout),
// Tower Of God S01 014 (CBT) (digital).cbz
new Regex(
@ -480,7 +479,15 @@ namespace API.Parser
{
// All Keywords, does not account for checking if contains volume/chapter identification. Parser.Parse() will handle.
new Regex(
@"(?<Special>Specials?|OneShot|One\-Shot|Extra( Chapter)?|Book \d.+?|Compendium \d.+?|Omnibus \d.+?|[_\s\-]TPB[_\s\-]|FCBD \d.+?|Absolute \d.+?|Preview \d.+?|Art Collection|Side( |_)Stories|Bonus)",
@"(?<Special>Specials?|OneShot|One\-Shot|Extra( Chapter)?|Book \d.+?|Compendium \d.+?|Omnibus \d.+?|[_\s\-]TPB[_\s\-]|FCBD \d.+?|Absolute \d.+?|Preview \d.+?|Art Collection|Side(\s|_)Stories|Bonus|Hors Série|(\W|_|-)HS(\W|_|-)|(\W|_|-)THS(\W|_|-))",
MatchOptions, RegexTimeout),
};
private static readonly Regex[] EuropeanComicRegex =
{
// All Keywords, does not account for checking if contains volume/chapter identification. Parser.Parse() will handle.
new Regex(
@"(?<Special>Bd(\s|_|-)Fr)",
MatchOptions, RegexTimeout),
};
@ -896,6 +903,23 @@ namespace API.Parser
return title;
}
private static string RemoveEuropeanTags(string title)
{
foreach (var regex in EuropeanComicRegex)
{
var matches = regex.Matches(title);
foreach (Match match in matches)
{
if (match.Success)
{
title = title.Replace(match.Value, string.Empty).Trim();
}
}
}
return title;
}
private static string RemoveComicSpecialTags(string title)
{
foreach (var regex in ComicSpecialRegex)
@ -932,6 +956,16 @@ namespace API.Parser
title = isComic ? RemoveComicSpecialTags(title) : RemoveMangaSpecialTags(title);
if (isComic)
{
title = RemoveComicSpecialTags(title);
title = RemoveEuropeanTags(title);
}
else
{
title = RemoveMangaSpecialTags(title);
}
title = title.Replace("_", " ").Trim();
if (title.EndsWith("-") || title.EndsWith(","))
@ -939,6 +973,11 @@ namespace API.Parser
title = title.Substring(0, title.Length - 1);
}
if (title.StartsWith("-") || title.StartsWith(","))
{
title = title.Substring(1);
}
return title.Trim();
}

View File

@ -48,6 +48,7 @@ namespace API.Parser
/// <summary>
/// This can potentially story things like "Omnibus, Color, Full Contact Edition, Extra, Final, etc"
/// </summary>
/// <remarks>Not Used in Database</remarks>
public string Edition { get; set; } = "";
/// <summary>
@ -70,10 +71,6 @@ namespace API.Parser
return (IsSpecial || (Volumes == "0" && Chapters == "0"));
}
// (TODO: Make this a ValueType). Has at least 1 year, maybe 2 representing a range
// public string YearRange { get; set; }
// public IList<string> Genres { get; set; } = new List<string>();
/// <summary>
/// This will contain any EXTRA comicInfo information parsed from the epub or archive. If there is an archive with comicInfo.xml AND it contains
/// series, volume information, that will override what we parsed.
@ -93,6 +90,7 @@ namespace API.Parser
Title = string.IsNullOrEmpty(Title) ? info2.Title : Title;
Series = string.IsNullOrEmpty(Series) ? info2.Series : Series;
IsSpecial = IsSpecial || info2.IsSpecial;
// TODO: Merge ComicInfos?
}
}
}

View File

@ -144,23 +144,23 @@ namespace API.Services
&& Parser.Parser.IsImage(x)
&& !x.StartsWith(Parser.Parser.MacOsMetadataFileStartsWith)).ToList();
if (fullNames.Count == 0) return null;
using var nc = new NaturalSortComparer();
var nonNestedFile = fullNames.Where(entry => (Path.GetDirectoryName(entry) ?? string.Empty).Equals(archiveName))
.OrderBy(Path.GetFullPath, new NaturalSortComparer())
.OrderBy(f => f.GetFullPathWithoutExtension(), nc) // BUG: This shouldn't take into account extension
.FirstOrDefault();
if (!string.IsNullOrEmpty(nonNestedFile)) return nonNestedFile;
// Check the first folder and sort within that to see if we can find a file, else fallback to first file with basic sort.
// Get first folder, then sort within that
var firstDirectoryFile = fullNames.OrderBy(Path.GetDirectoryName, new NaturalSortComparer()).FirstOrDefault();
var firstDirectoryFile = fullNames.OrderBy(Path.GetDirectoryName, nc).FirstOrDefault();
if (!string.IsNullOrEmpty(firstDirectoryFile))
{
var firstDirectory = Path.GetDirectoryName(firstDirectoryFile);
if (!string.IsNullOrEmpty(firstDirectory))
{
var firstDirectoryResult = fullNames.Where(f => firstDirectory.Equals(Path.GetDirectoryName(f)))
.OrderBy(Path.GetFileName, new NaturalSortComparer())
.OrderBy(Path.GetFileNameWithoutExtension, nc)
.FirstOrDefault();
if (!string.IsNullOrEmpty(firstDirectoryResult)) return firstDirectoryResult;
@ -168,7 +168,7 @@ namespace API.Services
}
var result = fullNames
.OrderBy(Path.GetFileName, new NaturalSortComparer())
.OrderBy(Path.GetFileNameWithoutExtension, nc)
.FirstOrDefault();
return string.IsNullOrEmpty(result) ? null : result;
@ -497,10 +497,10 @@ namespace API.Services
break;
}
case ArchiveLibrary.NotSupported:
_logger.LogWarning("[ExtractArchive] This archive cannot be read: {ArchivePath}. Defaulting to 0 pages", archivePath);
_logger.LogWarning("[ExtractArchive] This archive cannot be read: {ArchivePath}", archivePath);
return;
default:
_logger.LogWarning("[ExtractArchive] There was an exception when reading archive stream: {ArchivePath}. Defaulting to 0 pages", archivePath);
_logger.LogWarning("[ExtractArchive] There was an exception when reading archive stream: {ArchivePath}", archivePath);
return;
}

View File

@ -1,5 +1,6 @@
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
@ -169,7 +170,9 @@ namespace API.Services
// Calculate what chapter the page belongs to
var path = GetCachePath(chapter.Id);
var files = _directoryService.GetFilesWithExtension(path, Parser.Parser.ImageFileExtensions);
Array.Sort(files, _numericComparer);
using var nc = new NaturalSortComparer();
files = files.ToList().OrderBy(Path.GetFileNameWithoutExtension, nc).ToArray();
if (files.Length == 0)
{

View File

@ -677,7 +677,8 @@ namespace API.Services
{
var fileIndex = 1;
foreach (var file in directory.EnumerateFiles().OrderBy(file => file.FullName, new NaturalSortComparer()))
using var nc = new NaturalSortComparer();
foreach (var file in directory.EnumerateFiles().OrderBy(file => file.FullName, nc))
{
if (file.Directory == null) continue;
var paddedIndex = Parser.Parser.PadZeros(directoryIndex + "");

View File

@ -78,8 +78,9 @@ public class ImageService : IImageService
return null;
}
using var nc = new NaturalSortComparer();
var firstImage = _directoryService.GetFilesWithExtension(directory, Parser.Parser.ImageFileExtensions)
.OrderBy(f => f, new NaturalSortComparer()).FirstOrDefault();
.OrderBy(Path.GetFileNameWithoutExtension, nc).FirstOrDefault();
return firstImage;
}

View File

@ -237,13 +237,6 @@ public class MetadataService : IMetadataService
if (comicInfo == null) return;
// Summary Info
if (!string.IsNullOrEmpty(comicInfo.Summary))
{
// PERF: I can move this to the bottom as I have a comicInfo selection, save me an extra read
series.Metadata.Summary = comicInfo.Summary;
}
foreach (var chapter in series.Volumes.SelectMany(volume => volume.Chapters))
{
PersonHelper.UpdatePeople(allPeople, chapter.People.Where(p => p.Role == PersonRole.Writer).Select(p => p.Name), PersonRole.Writer,
@ -282,6 +275,12 @@ public class MetadataService : IMetadataService
.ToList();
//var firstComicInfo = comicInfos.First(i => i.)
// Summary Info
if (!string.IsNullOrEmpty(comicInfo.Summary))
{
// PERF: I can move this to the bottom as I have a comicInfo selection, save me an extra read
series.Metadata.Summary = comicInfo.Summary;
}
// Set the AgeRating as highest in all the comicInfos
series.Metadata.AgeRating = comicInfos.Max(i => ComicInfo.ConvertAgeRatingToEnum(comicInfo.AgeRating));

View File

@ -430,12 +430,6 @@ public class ScannerService : IScannerService
newSeries.Count, stopwatch.ElapsedMilliseconds, library.Name);
}
// private static bool FindSeries(Series series, ParsedSeries parsedInfoKey)
// {
// return (series.NormalizedName.Equals(parsedInfoKey.NormalizedName) || Parser.Parser.Normalize(series.OriginalName).Equals(parsedInfoKey.NormalizedName))
// && (series.Format == parsedInfoKey.Format || series.Format == MangaFormat.Unknown);
// }
private void UpdateSeries(Series series, Dictionary<ParsedSeries, List<ParserInfo>> parsedSeries)
{
try

View File

@ -109,6 +109,9 @@ export class SeriesCardComponent implements OnInit, OnChanges, OnDestroy {
case(Action.AddToReadingList):
this.actionService.addSeriesToReadingList(series, (series) => {/* No Operation */ });
break;
case(Action.AddToCollection):
this.actionService.addMultipleSeriesToCollectionTag([series], () => {/* No Operation */ });
break;
default:
break;
}

View File

@ -2,6 +2,6 @@
"sdk": {
"version": "6.0",
"rollForward": "latestMajor",
"allowPrerelease": true
"allowPrerelease": false
}
}