mirror of
https://github.com/Kareadita/Kavita.git
synced 2025-07-09 03:04:19 -04:00
Implemented a parallelized, rough, scanning loop which parses out information and leaves us with an immutable dictionary mapping series to parsed infos for each file.
Added some Entities to help translate this into DB.
This commit is contained in:
parent
49b4ee0022
commit
0a49b07570
@ -13,7 +13,9 @@ namespace API.Tests
|
||||
[InlineData("BTOOOM! v01 (2013) (Digital) (Shadowcat-Empire)", "1")]
|
||||
[InlineData("Gokukoku no Brynhildr - c001-008 (v01) [TrinityBAKumA]", "1")]
|
||||
[InlineData("Dance in the Vampire Bund v16-17 (Digital) (NiceDragon)", "16-17")]
|
||||
[InlineData("Akame ga KILL! ZERO v01 (2016) (Digital) (LuCaZ).cbz", "1")]
|
||||
[InlineData("v001", "1")]
|
||||
[InlineData("[Suihei Kiki]_Kasumi_Otoko_no_Ko_[Taruby]_v1.1.zip", "1")]
|
||||
public void ParseVolumeTest(string filename, string expected)
|
||||
{
|
||||
var result = ParseVolume(filename);
|
||||
@ -29,6 +31,7 @@ namespace API.Tests
|
||||
[InlineData("Gokukoku no Brynhildr - c001-008 (v01) [TrinityBAKumA]", "Gokukoku no Brynhildr")]
|
||||
[InlineData("Dance in the Vampire Bund v16-17 (Digital) (NiceDragon)", "Dance in the Vampire Bund")]
|
||||
[InlineData("v001", "")]
|
||||
[InlineData("Akame ga KILL! ZERO (2016-2019) (Digital) (LuCaZ)", "Akame ga KILL! ZERO")]
|
||||
public void ParseSeriesTest(string filename, string expected)
|
||||
{
|
||||
var result = ParseSeries(filename);
|
||||
@ -44,6 +47,7 @@ namespace API.Tests
|
||||
[InlineData("Gokukoku no Brynhildr - c001-008 (v01) [TrinityBAKumA]", "1-8")]
|
||||
[InlineData("Dance in the Vampire Bund v16-17 (Digital) (NiceDragon)", "")]
|
||||
[InlineData("c001", "1")]
|
||||
[InlineData("[Suihei Kiki]_Kasumi_Otoko_no_Ko_[Taruby]_v1.12.zip", "12")]
|
||||
public void ParseChaptersTest(string filename, string expected)
|
||||
{
|
||||
var result = ParseChapter(filename);
|
||||
|
@ -28,10 +28,6 @@
|
||||
<PackageReference Include="System.IdentityModel.Tokens.Jwt" Version="6.8.0" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<Folder Include="Tasks" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<None Remove="Hangfire-log.db" />
|
||||
</ItemGroup>
|
||||
|
28
API/Entities/Series.cs
Normal file
28
API/Entities/Series.cs
Normal file
@ -0,0 +1,28 @@
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace API.Entities
|
||||
{
|
||||
public class Series
|
||||
{
|
||||
/// <summary>
|
||||
/// The UI visible Name of the Series. This may or may not be the same as the OriginalName
|
||||
/// </summary>
|
||||
public string Name { get; set; }
|
||||
/// <summary>
|
||||
/// Original Japanese Name
|
||||
/// </summary>
|
||||
public string OriginalName { get; set; }
|
||||
/// <summary>
|
||||
/// The name used to sort the Series. By default, will be the same as Name.
|
||||
/// </summary>
|
||||
public string SortName { get; set; }
|
||||
/// <summary>
|
||||
/// Summary information related to the Series
|
||||
/// </summary>
|
||||
public string Summary { get; set; }
|
||||
|
||||
public ICollection<Volume> Volumes { get; set; }
|
||||
|
||||
|
||||
}
|
||||
}
|
14
API/Entities/Volume.cs
Normal file
14
API/Entities/Volume.cs
Normal file
@ -0,0 +1,14 @@
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace API.Entities
|
||||
{
|
||||
public class Volume
|
||||
{
|
||||
public string Number { get; set; }
|
||||
public ICollection<string> Files { get; set; }
|
||||
|
||||
// Many-to-Many relationships
|
||||
public Series Series { get; set; }
|
||||
public int SeriesId { get; set; }
|
||||
}
|
||||
}
|
@ -1,4 +1,5 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace API.Parser
|
||||
@ -47,10 +48,20 @@ namespace API.Parser
|
||||
@"(?<Series>.*)(\b|_)(v|vo|c)",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
|
||||
// Akame ga KILL! ZERO (2016-2019) (Digital) (LuCaZ)
|
||||
new Regex(
|
||||
|
||||
@"(?<Series>.*)\(\d",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
|
||||
// [BAA]_Darker_than_Black_c1 (This is very greedy, make sure it's always last)
|
||||
new Regex(
|
||||
@"(?<Series>.*)(\b|_)(c)",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// Darker Than Black (This takes anything, we have to account for perfectly named folders)
|
||||
new Regex(
|
||||
@"(?<Series>.*)",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
|
||||
|
||||
};
|
||||
@ -72,6 +83,18 @@ namespace API.Parser
|
||||
@"(c|ch)(\.? ?)(?<Chapter>\d+-?\d*)",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
};
|
||||
|
||||
|
||||
public static ParserInfo Parse(string filePath)
|
||||
{
|
||||
return new ParserInfo()
|
||||
{
|
||||
Chapters = ParseChapter(filePath),
|
||||
Series = ParseSeries(filePath),
|
||||
Volumes = ParseVolume(filePath),
|
||||
File = filePath
|
||||
};
|
||||
}
|
||||
|
||||
public static string ParseSeries(string filename)
|
||||
{
|
||||
|
@ -2,13 +2,17 @@
|
||||
|
||||
namespace API.Parser
|
||||
{
|
||||
/// <summary>
|
||||
/// This represents a single file
|
||||
/// </summary>
|
||||
public class ParserInfo
|
||||
{
|
||||
// This can be multiple
|
||||
public string Chapters { get; set; }
|
||||
public string Series { get; set; }
|
||||
// This can be multiple
|
||||
public string Volume { get; set; }
|
||||
public IEnumerable<string> Files { get; init; }
|
||||
public string Volumes { get; set; }
|
||||
public string File { get; init; }
|
||||
//public IEnumerable<string> Files { get; init; }
|
||||
}
|
||||
}
|
@ -1,4 +1,5 @@
|
||||
using System;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Generic;
|
||||
using System.Collections.Immutable;
|
||||
using System.Diagnostics;
|
||||
@ -10,6 +11,8 @@ using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using API.DTOs;
|
||||
using API.Interfaces;
|
||||
using API.Parser;
|
||||
using Microsoft.EntityFrameworkCore.Metadata.Internal;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace API.Services
|
||||
@ -18,6 +21,7 @@ namespace API.Services
|
||||
{
|
||||
private readonly ILogger<DirectoryService> _logger;
|
||||
private static readonly string MangaFileExtensions = @"\.cbz|\.cbr|\.png|\.jpeg|\.jpg|\.zip|\.rar";
|
||||
private ConcurrentDictionary<string, ConcurrentBag<ParserInfo>> _scannedSeries;
|
||||
|
||||
public DirectoryService(ILogger<DirectoryService> logger)
|
||||
{
|
||||
@ -62,17 +66,131 @@ namespace API.Services
|
||||
return dirs;
|
||||
}
|
||||
|
||||
// TODO: Refactor API layer to use this
|
||||
public IEnumerable<DirectoryInfo> ListDirectories(string rootPath)
|
||||
{
|
||||
if (!Directory.Exists(rootPath)) return ImmutableList<DirectoryInfo>.Empty;
|
||||
|
||||
var di = new DirectoryInfo(rootPath);
|
||||
var dirs = di.GetDirectories()
|
||||
.Where(dir => !(dir.Attributes.HasFlag(FileAttributes.Hidden) || dir.Attributes.HasFlag(FileAttributes.System)))
|
||||
.ToImmutableList();
|
||||
|
||||
|
||||
return dirs;
|
||||
}
|
||||
|
||||
private void Process(string path)
|
||||
{
|
||||
if (Directory.Exists(path))
|
||||
{
|
||||
DirectoryInfo di = new DirectoryInfo(path);
|
||||
Console.WriteLine($"Parsing directory {di.Name}");
|
||||
|
||||
var seriesName = Parser.Parser.ParseSeries(di.Name);
|
||||
if (string.IsNullOrEmpty(seriesName))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// We don't need ContainsKey, this is a race condition. We can replace with TryAdd instead
|
||||
if (!_scannedSeries.ContainsKey(seriesName))
|
||||
{
|
||||
_scannedSeries.TryAdd(seriesName, new ConcurrentBag<ParserInfo>());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
var fileName = Path.GetFileName(path);
|
||||
Console.WriteLine($"Parsing file {fileName}");
|
||||
|
||||
var info = Parser.Parser.Parse(fileName);
|
||||
if (info.Volumes != string.Empty)
|
||||
{
|
||||
ConcurrentBag<ParserInfo> tempBag;
|
||||
ConcurrentBag<ParserInfo> newBag = new ConcurrentBag<ParserInfo>();
|
||||
if (_scannedSeries.TryGetValue(info.Series, out tempBag))
|
||||
{
|
||||
var existingInfos = tempBag.ToArray();
|
||||
foreach (var existingInfo in existingInfos)
|
||||
{
|
||||
newBag.Add(existingInfo);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
tempBag = new ConcurrentBag<ParserInfo>();
|
||||
}
|
||||
|
||||
newBag.Add(info);
|
||||
|
||||
if (!_scannedSeries.TryUpdate(info.Series, newBag, tempBag))
|
||||
{
|
||||
_scannedSeries.TryAdd(info.Series, newBag);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
public void ScanLibrary(LibraryDto library)
|
||||
{
|
||||
_scannedSeries = new ConcurrentDictionary<string, ConcurrentBag<ParserInfo>>();
|
||||
//Dictionary<string, IList<ParserInfo>> series = new Dictionary<string, IList<ParserInfo>>();
|
||||
_logger.LogInformation($"Beginning scan on {library.Name}");
|
||||
foreach (var folderPath in library.Folders)
|
||||
{
|
||||
try {
|
||||
// // Temporarily, let's build a simple scanner then optimize to parallelization
|
||||
//
|
||||
// // First, let's see if there are any files in rootPath
|
||||
// var files = GetFiles(folderPath, MangaFileExtensions);
|
||||
//
|
||||
// foreach (var file in files)
|
||||
// {
|
||||
// // These do not have a folder, so we need to parse them directly
|
||||
// var parserInfo = Parser.Parser.Parse(file);
|
||||
// Console.WriteLine(parserInfo);
|
||||
// }
|
||||
//
|
||||
// // Get Directories
|
||||
// var directories = ListDirectories(folderPath);
|
||||
// foreach (var directory in directories)
|
||||
// {
|
||||
// _logger.LogDebug($"Scanning {directory.Name}");
|
||||
// var parsedSeries = Parser.Parser.ParseSeries(directory.Name);
|
||||
//
|
||||
// // For now, let's skip directories we can't parse information out of. (we are assuming one level deep root)
|
||||
// if (string.IsNullOrEmpty(parsedSeries)) continue;
|
||||
//
|
||||
// _logger.LogDebug($"Parsed Series: {parsedSeries}");
|
||||
//
|
||||
// if (!series.ContainsKey(parsedSeries))
|
||||
// {
|
||||
// series[parsedSeries] = new List<ParserInfo>();
|
||||
// }
|
||||
//
|
||||
// var foundFiles = GetFiles(directory.FullName, MangaFileExtensions);
|
||||
// foreach (var foundFile in foundFiles)
|
||||
// {
|
||||
// var info = Parser.Parser.Parse(foundFile);
|
||||
// if (info.Volumes != string.Empty)
|
||||
// {
|
||||
// series[parsedSeries].Add(info);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
|
||||
TraverseTreeParallelForEach(folderPath, (f) =>
|
||||
{
|
||||
// Exceptions are no-ops.
|
||||
try
|
||||
{
|
||||
ProcessManga(folderPath, f);
|
||||
Process(f);
|
||||
//ProcessManga(folderPath, f);
|
||||
}
|
||||
catch (FileNotFoundException) {}
|
||||
catch (IOException) {}
|
||||
@ -87,12 +205,38 @@ namespace API.Services
|
||||
_logger.LogError($"The directory '{folderPath}' does not exist");
|
||||
}
|
||||
}
|
||||
|
||||
// var filtered = series.Where(kvp => kvp.Value.Count > 0);
|
||||
// series = filtered.ToDictionary(v => v.Key, v => v.Value);
|
||||
// Console.WriteLine(series);
|
||||
|
||||
// var filtered = _scannedSeries.Where(kvp => kvp.Value.Count > 0);
|
||||
// series = filtered.ToDictionary(v => v.Key, v => v.Value);
|
||||
// Console.WriteLine(series);
|
||||
var filtered = _scannedSeries.Where(kvp => !kvp.Value.IsEmpty);
|
||||
var series = filtered.ToImmutableDictionary(v => v.Key, v => v.Value);
|
||||
Console.WriteLine(series);
|
||||
|
||||
// TODO: Perform DB activities on ImmutableDictionary
|
||||
|
||||
|
||||
//_logger.LogInformation($"Scan completed on {library.Name}. Parsed {series.Keys.Count} series.");
|
||||
_logger.LogInformation($"Scan completed on {library.Name}. Parsed {series.Keys.Count()} series.");
|
||||
_scannedSeries = null;
|
||||
|
||||
|
||||
}
|
||||
|
||||
private static void ProcessManga(string folderPath, string filename)
|
||||
{
|
||||
Console.WriteLine($"[ProcessManga] Folder: {folderPath}");
|
||||
|
||||
Console.WriteLine($"Found {filename}");
|
||||
var series = Parser.Parser.ParseSeries(filename);
|
||||
if (series == string.Empty)
|
||||
{
|
||||
series = Parser.Parser.ParseSeries(folderPath);
|
||||
}
|
||||
Console.WriteLine($"Series: {series}");
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user