mirror of
https://github.com/Kareadita/Kavita.git
synced 2025-05-24 00:52:23 -04:00
Lots of Parsing Enhancements (#120)
* More cases for parsing regex * Implemented the ability to parse "Special" keywords. * Commented out some unit tests * More parsing cases * Fixed unit tests * Fixed typo in build script
This commit is contained in:
parent
7e54d332f5
commit
3e031ab458
@ -53,6 +53,8 @@ namespace API.Tests
|
||||
[InlineData("Kodomo no Jikan vol. 1.cbz", "1")]
|
||||
[InlineData("Kodomo no Jikan vol. 10.cbz", "10")]
|
||||
[InlineData("Kedouin Makoto - Corpse Party Musume, Chapter 12 [Dametrans][v2]", "0")]
|
||||
[InlineData("Vagabond_v03", "3")]
|
||||
[InlineData("Mujaki No Rakune Volume 10.cbz", "10")]
|
||||
public void ParseVolumeTest(string filename, string expected)
|
||||
{
|
||||
Assert.Equal(expected, ParseVolume(filename));
|
||||
@ -105,6 +107,11 @@ namespace API.Tests
|
||||
[InlineData("Goblin Slayer Side Story - Year One 025.5", "Goblin Slayer Side Story - Year One")]
|
||||
[InlineData("Goblin Slayer - Brand New Day 006.5 (2019) (Digital) (danke-Empire)", "Goblin Slayer - Brand New Day")]
|
||||
[InlineData("Kedouin Makoto - Corpse Party Musume, Chapter 01 [Dametrans][v2]", "Kedouin Makoto - Corpse Party Musume")]
|
||||
[InlineData("Vagabond_v03", "Vagabond")]
|
||||
[InlineData("[AN] Mahoutsukai to Deshi no Futekisetsu na Kankei Chp. 1", "Mahoutsukai to Deshi no Futekisetsu na Kankei")]
|
||||
[InlineData("Beelzebub_Side_Story_02_RHS.zip", "Beelzebub Side Story")]
|
||||
[InlineData("Corpse Party -The Anthology- Sachikos game of love Hysteric Birthday 2U Chapter 01", "Corpse Party -The Anthology- Sachikos game of love Hysteric Birthday 2U")]
|
||||
[InlineData("[BAA]_Darker_than_Black_Omake-1.zip", "Darker than Black")]
|
||||
public void ParseSeriesTest(string filename, string expected)
|
||||
{
|
||||
Assert.Equal(expected, ParseSeries(filename));
|
||||
@ -146,6 +153,10 @@ namespace API.Tests
|
||||
[InlineData("VanDread-v01-c001[MD].zip", "1")]
|
||||
[InlineData("Goblin Slayer Side Story - Year One 025.5", "25.5")]
|
||||
[InlineData("Kedouin Makoto - Corpse Party Musume, Chapter 01", "1")]
|
||||
[InlineData("To Love Ru v11 Uncensored (Ch.089-097+Omake)", "89-97")]
|
||||
[InlineData("To Love Ru v18 Uncensored (Ch.153-162.5)", "153-162.5")]
|
||||
[InlineData("[AN] Mahoutsukai to Deshi no Futekisetsu na Kankei Chp. 1", "1")]
|
||||
[InlineData("Beelzebub_Side_Story_02_RHS.zip", "2")]
|
||||
public void ParseChaptersTest(string filename, string expected)
|
||||
{
|
||||
Assert.Equal(expected, ParseChapter(filename));
|
||||
@ -197,10 +208,23 @@ namespace API.Tests
|
||||
[InlineData("Tenjou Tenge Omnibus", "Omnibus")]
|
||||
[InlineData("Tenjou Tenge {Full Contact Edition}", "Full Contact Edition")]
|
||||
[InlineData("Tenjo Tenge {Full Contact Edition} v01 (2011) (Digital) (ASTC).cbz", "Full Contact Edition")]
|
||||
[InlineData("Wotakoi - Love is Hard for Otaku Omnibus v01 (2018) (Digital) (danke-Empire)", "Omnibus")]
|
||||
[InlineData("To Love Ru v01 Uncensored (Ch.001-007)", "Uncensored")]
|
||||
[InlineData("Chobits Omnibus Edition v01 [Dark Horse]", "Omnibus Edition")]
|
||||
public void ParseEditionTest(string input, string expected)
|
||||
{
|
||||
Assert.Equal(expected, ParseEdition(input));
|
||||
}
|
||||
[Theory]
|
||||
[InlineData("Beelzebub Special OneShot - Minna no Kochikame x Beelzebub (2016) [Mangastream].cbz", true)]
|
||||
[InlineData("Beelzebub_Omake_June_2012_RHS", true)]
|
||||
[InlineData("Beelzebub_Side_Story_02_RHS.zip", false)]
|
||||
[InlineData("Darker than Black Shikkoku no Hana Special [Simple Scans].zip", true)]
|
||||
[InlineData("Darker than Black Shikkoku no Hana Fanbook Extra [Simple Scans].zip", true)]
|
||||
public void ParseMangaSpecialTest(string input, bool expected)
|
||||
{
|
||||
Assert.Equal(expected, ParseMangaSpecial(input) != "");
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData("12-14", 12)]
|
||||
@ -236,6 +260,7 @@ namespace API.Tests
|
||||
[InlineData("Scott Pilgrim 01 - Scott Pilgrim's Precious Little Life (2004)", "Scott Pilgrim")]
|
||||
[InlineData("Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)", "Teen Titans")]
|
||||
[InlineData("Scott Pilgrim 02 - Scott Pilgrim vs. The World (2005)", "Scott Pilgrim")]
|
||||
[InlineData("Wolverine - Origins 003 (2006) (digital) (Minutemen-PhD)", "Wolverine - Origins")]
|
||||
public void ParseComicSeriesTest(string filename, string expected)
|
||||
{
|
||||
Assert.Equal(expected, ParseComicSeries(filename));
|
||||
|
BIN
API/Data/kavita.db
Normal file
BIN
API/Data/kavita.db
Normal file
Binary file not shown.
@ -14,8 +14,7 @@ namespace API.Parser
|
||||
private static readonly Regex ImageRegex = new Regex(ImageFileExtensions, RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
||||
private static readonly Regex MangaFileRegex = new Regex(MangaFileExtensions, RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
||||
private static readonly Regex XmlRegex = new Regex(XmlRegexExtensions, RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
||||
|
||||
//?: is a non-capturing group in C#, else anything in () will be a group
|
||||
|
||||
private static readonly Regex[] MangaVolumeRegex = new[]
|
||||
{
|
||||
// Dance in the Vampire Bund v16-17
|
||||
@ -32,11 +31,11 @@ namespace API.Parser
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// Killing Bites Vol. 0001 Ch. 0001 - Galactica Scanlations (gb)
|
||||
new Regex(
|
||||
@"(vol\.? ?)(?<Volume>0*[1-9]+)",
|
||||
@"(vol\.? ?)(?<Volume>\d+)",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// Tonikaku Cawaii [Volume 11].cbz
|
||||
new Regex(
|
||||
@"(volume )(?<Volume>0?[1-9]+)",
|
||||
@"(volume )(?<Volume>\d+)",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
|
||||
// Tower Of God S01 014 (CBT) (digital).cbz
|
||||
@ -101,13 +100,21 @@ namespace API.Parser
|
||||
new Regex(
|
||||
@"(?<Series>.*)(_)(v|vo|c|volume)( |_)\d+",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// Mahoutsukai to Deshi no Futekisetsu na Kankei Chp. 1
|
||||
new Regex(
|
||||
@"(?<Series>.*)( |_)(?:Chp.? ?\d+)",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// Corpse Party -The Anthology- Sachikos game of love Hysteric Birthday 2U Chapter 01
|
||||
new Regex(
|
||||
@"^(?!Vol)(?<Series>.*)( |_)Chapter( |_)(\d+)", // TODO: This is breaking a ton of cases
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// Akiiro Bousou Biyori - 01.jpg, Beelzebub_172_RHS.zip, Cynthia the Mission 29.rar
|
||||
new Regex(
|
||||
@"^(?!Vol)(?<Series>.*)( |_)(\d+)",
|
||||
@"^(?!Vol)(?<Series>.*)( |_|-)(\d+)",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// [BAA]_Darker_than_Black_c1 (This is very greedy, make sure it's close to last)
|
||||
new Regex(
|
||||
@"(?<Series>.*)( |_)(c)\d+",
|
||||
@"(?<Series>.*)( |_|-)(c)\d+",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
};
|
||||
|
||||
@ -223,8 +230,9 @@ namespace API.Parser
|
||||
|
||||
private static readonly Regex[] MangaChapterRegex = new[]
|
||||
{
|
||||
// Historys Strongest Disciple Kenichi_v11_c90-98.zip, ...c90.5-100.5
|
||||
new Regex(
|
||||
@"(c|ch)(\.? ?)(?<Chapter>\d+(?:.\d+|-\d+)?)",
|
||||
@"(c|ch)(\.? ?)(?<Chapter>(\d+(\.\d)?)-?(\d+(\.\d)?)?)",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// [Suihei Kiki]_Kasumi_Otoko_no_Ko_[Taruby]_v1.1.zip
|
||||
new Regex(
|
||||
@ -251,13 +259,17 @@ namespace API.Parser
|
||||
|
||||
};
|
||||
private static readonly Regex[] MangaEditionRegex = {
|
||||
//Tenjo Tenge {Full Contact Edition} v01 (2011) (Digital) (ASTC).cbz
|
||||
// Tenjo Tenge {Full Contact Edition} v01 (2011) (Digital) (ASTC).cbz
|
||||
new Regex(
|
||||
@"(?<Edition>({|\(|\[).* Edition(}|\)|\]))",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
//Tenjo Tenge {Full Contact Edition} v01 (2011) (Digital) (ASTC).cbz
|
||||
// Tenjo Tenge {Full Contact Edition} v01 (2011) (Digital) (ASTC).cbz
|
||||
new Regex(
|
||||
@"(\b|_)(?<Edition>Omnibus)(\b|_)",
|
||||
@"(\b|_)(?<Edition>Omnibus(( |_)?Edition)?)(\b|_)?",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// To Love Ru v01 Uncensored (Ch.001-007)
|
||||
new Regex(
|
||||
@"(\b|_)(?<Edition>Uncensored)(\b|_)",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
};
|
||||
|
||||
@ -277,6 +289,14 @@ namespace API.Parser
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
};
|
||||
|
||||
private static readonly Regex[] MangaSpecialRegex =
|
||||
{
|
||||
// All Keywords, does not account for checking if contains volume/chapter identification. Parser.Parse() will handle.
|
||||
new Regex(
|
||||
@"(?<Special>Special|OneShot|One\-Shot|Omake|Extra)",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
};
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parses information out of a file path. Will fallback to using directory name if Series couldn't be parsed
|
||||
@ -314,6 +334,13 @@ namespace API.Parser
|
||||
ret.Series = CleanTitle(ret.Series.Replace(edition, ""));
|
||||
ret.Edition = edition;
|
||||
}
|
||||
|
||||
var isSpecial = ParseMangaSpecial(fileName);
|
||||
if (ret.Chapters == "0" && ret.Volumes == "0" && !string.IsNullOrEmpty(isSpecial))
|
||||
{
|
||||
ret.IsSpecial = true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
return ret.Series == string.Empty ? null : ret;
|
||||
@ -346,6 +373,23 @@ namespace API.Parser
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
public static string ParseMangaSpecial(string filePath)
|
||||
{
|
||||
foreach (var regex in MangaSpecialRegex)
|
||||
{
|
||||
var matches = regex.Matches(filePath);
|
||||
foreach (Match match in matches)
|
||||
{
|
||||
if (match.Groups["Special"].Success && match.Groups["Special"].Value != string.Empty)
|
||||
{
|
||||
return match.Groups["Special"].Value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
public static string ParseSeries(string filename)
|
||||
{
|
||||
foreach (var regex in MangaSeriesRegex)
|
||||
@ -496,6 +540,25 @@ namespace API.Parser
|
||||
return title;
|
||||
}
|
||||
|
||||
private static string RemoveSpecialTags(string title)
|
||||
{
|
||||
foreach (var regex in MangaSpecialRegex)
|
||||
{
|
||||
var matches = regex.Matches(title);
|
||||
foreach (Match match in matches)
|
||||
{
|
||||
if (match.Success)
|
||||
{
|
||||
title = title.Replace(match.Value, "");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return title;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Translates _ -> spaces, trims front and back of string, removes release groups
|
||||
/// </summary>
|
||||
@ -507,6 +570,8 @@ namespace API.Parser
|
||||
|
||||
title = RemoveEditionTagHolders(title);
|
||||
|
||||
title = RemoveSpecialTags(title);
|
||||
|
||||
title = title.Replace("_", " ").Trim();
|
||||
if (title.EndsWith("-"))
|
||||
{
|
||||
|
@ -24,5 +24,10 @@ namespace API.Parser
|
||||
/// This can potentially story things like "Omnibus, Color, Full Contact Edition, Extra, Final, etc"
|
||||
/// </summary>
|
||||
public string Edition { get; set; } = "";
|
||||
|
||||
/// <summary>
|
||||
/// If the file contains no volume/chapter information and contains Special Keywords <see cref="Parser.MangaSpecialRegex"/>
|
||||
/// </summary>
|
||||
public bool IsSpecial { get; set; } = false;
|
||||
}
|
||||
}
|
@ -49,15 +49,15 @@ namespace API.Services.Tasks
|
||||
{
|
||||
// NOTE: This solution isn't the best, but it has potential. We need to handle a few other cases so it works great.
|
||||
return false;
|
||||
|
||||
// if (/*_environment.IsProduction() && */!_forceUpdate && Directory.GetLastWriteTime(folder.Path) < folder.LastScanned)
|
||||
|
||||
// if (!_forceUpdate && Directory.GetLastWriteTime(folder.Path) < folder.LastScanned)
|
||||
// {
|
||||
// _logger.LogDebug($"{folder.Path} hasn't been updated since last scan. Skipping.");
|
||||
// _logger.LogDebug("{FolderPath} hasn't been modified since last scan. Skipping", folder.Path);
|
||||
// skippedFolders += 1;
|
||||
// return true;
|
||||
// }
|
||||
//
|
||||
// return false;
|
||||
|
||||
//return false;
|
||||
}
|
||||
|
||||
private void Cleanup()
|
||||
@ -134,7 +134,6 @@ namespace API.Services.Tasks
|
||||
|
||||
if (Task.Run(() => _unitOfWork.Complete()).Result)
|
||||
{
|
||||
|
||||
_logger.LogInformation("Scan completed on {LibraryName}. Parsed {ParsedSeriesCount} series in {ElapsedScanTime} ms", library.Name, series.Keys.Count, sw.ElapsedMilliseconds);
|
||||
}
|
||||
else
|
||||
@ -149,6 +148,13 @@ namespace API.Services.Tasks
|
||||
{
|
||||
if (parsedSeries == null) throw new ArgumentNullException(nameof(parsedSeries));
|
||||
|
||||
// For all parsedSeries, any infos that contain same series name and IsSpecial is true are combined
|
||||
// foreach (var series in parsedSeries)
|
||||
// {
|
||||
// var seriesName = series.Key;
|
||||
// if (parsedSeries.ContainsKey(seriesName))
|
||||
// }
|
||||
|
||||
// First, remove any series that are not in parsedSeries list
|
||||
var foundSeries = parsedSeries.Select(s => Parser.Parser.Normalize(s.Key)).ToList();
|
||||
var missingSeries = library.Series.Where(existingSeries =>
|
||||
@ -222,7 +228,7 @@ namespace API.Services.Tasks
|
||||
series.Volumes.Add(volume);
|
||||
}
|
||||
|
||||
volume.IsSpecial = volume.Number == 0 && infos.All(p => p.Chapters == "0");
|
||||
volume.IsSpecial = volume.Number == 0 && infos.All(p => p.Chapters == "0" || p.IsSpecial);
|
||||
_logger.LogDebug("Parsing {SeriesName} - Volume {VolumeNumber}", series.Name, volume.Name);
|
||||
UpdateChapters(volume, infos);
|
||||
volume.Pages = volume.Chapters.Sum(c => c.Pages);
|
||||
@ -314,7 +320,7 @@ namespace API.Services.Tasks
|
||||
private void TrackSeries(ParserInfo info)
|
||||
{
|
||||
if (info.Series == string.Empty) return;
|
||||
|
||||
|
||||
_scannedSeries.AddOrUpdate(info.Series, new List<ParserInfo>() {info}, (_, oldValue) =>
|
||||
{
|
||||
oldValue ??= new List<ParserInfo>();
|
||||
|
@ -136,7 +136,7 @@ namespace API
|
||||
applicationLifetime.ApplicationStopping.Register(OnShutdown);
|
||||
applicationLifetime.ApplicationStarted.Register(() =>
|
||||
{
|
||||
Console.WriteLine("Kavita - v0.3.5");
|
||||
Console.WriteLine("Kavita - v0.3.6");
|
||||
});
|
||||
}
|
||||
|
||||
|
4
build.sh
4
build.sh
@ -65,8 +65,8 @@ Package()
|
||||
# TODO: Use no-restore? Because Build should have already done it for us
|
||||
echo "Building"
|
||||
cd API
|
||||
echo dotnet publish -c release --self-contained --runtime $runtime -o "$lOutputFolder" --framework $framework
|
||||
dotnet publish -c release --self-contained --runtime $runtime -o "$lOutputFolder" --framework $framework
|
||||
echo dotnet publish -c Release --self-contained --runtime $runtime -o "$lOutputFolder" --framework $framework
|
||||
dotnet publish -c Release --self-contained --runtime $runtime -o "$lOutputFolder" --framework $framework
|
||||
|
||||
echo "Copying Install information"
|
||||
cp ../INSTALL.txt "$lOutputFolder"/README.txt
|
||||
|
Loading…
x
Reference in New Issue
Block a user