New Scan Loop Fixes (#1452)

* Refactored ScanSeries to avoid a lot of extra work and fixed a bug where Scan Series would invoke the processing twice.

Refactored the series selection code during process such that we use Localized Name as well, for cases where the original name was changed.

Undid an optimization around Last Write time, since Linux file systems match how NTFS works.

* Fixed part of the query

* Added a NormalizedLocalizedName for quick searching in which a series needs grouping. Reworked scan loop code a bit to ensure we don't do extra work.

Tweaked the widget logic to help display better and not show "Nothing going on here".

* Fixed a bug where archives with ._ files would be counted as valid files, while they are actually just metadata files on Mac's.

* Fixed a broken unit test
This commit is contained in:
Joseph Milazzo 2022-08-20 11:35:31 -05:00 committed by GitHub
parent 252f31db3a
commit 7cb547f2f9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 1812 additions and 62 deletions

View File

@ -68,6 +68,7 @@ namespace API.Tests.Services
[InlineData("macos_none.zip", 0)]
[InlineData("macos_one.zip", 1)]
[InlineData("macos_native.zip", 21)]
[InlineData("macos_withdotunder_one.zip", 1)]
public void GetNumberOfPagesFromArchiveTest(string archivePath, int expected)
{
var testDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ArchiveService/Archives");

View File

@ -156,12 +156,14 @@ namespace API.Controllers
}
series.Name = updateSeries.Name.Trim();
series.NormalizedName = Parser.Parser.Normalize(series.Name);
if (!string.IsNullOrEmpty(updateSeries.SortName.Trim()))
{
series.SortName = updateSeries.SortName.Trim();
}
series.LocalizedName = updateSeries.LocalizedName.Trim();
series.NormalizedLocalizedName = Parser.Parser.Normalize(series.LocalizedName);
series.NameLocked = updateSeries.NameLocked;
series.SortNameLocked = updateSeries.SortNameLocked;

View File

@ -0,0 +1,38 @@
using System.Linq;
using System.Threading.Tasks;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Logging;
namespace API.Data;
/// <summary>
/// v0.5.6 introduced Normalized Localized Name, which allows for faster lookups and less memory usage. This migration will calculate them once
/// </summary>
public static class MigrateNormalizedLocalizedName
{
public static async Task Migrate(IUnitOfWork unitOfWork, DataContext dataContext, ILogger<Program> logger)
{
if (!await dataContext.Series.Where(s => s.NormalizedLocalizedName == null).AnyAsync())
{
return;
}
logger.LogInformation("Running MigrateNormalizedLocalizedName migration. Please be patient, this may take some time");
foreach (var series in await dataContext.Series.ToListAsync())
{
series.NormalizedLocalizedName = Parser.Parser.Normalize(series.LocalizedName ?? string.Empty);
logger.LogInformation("Updated {SeriesName} normalized localized name: {LocalizedName}", series.Name, series.NormalizedLocalizedName);
unitOfWork.SeriesRepository.Update(series);
}
if (unitOfWork.HasChanges())
{
await unitOfWork.CommitAsync();
}
logger.LogInformation("MigrateNormalizedLocalizedName migration finished");
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,25 @@
using Microsoft.EntityFrameworkCore.Migrations;
#nullable disable
namespace API.Data.Migrations
{
public partial class NormalizedLocalizedName : Migration
{
protected override void Up(MigrationBuilder migrationBuilder)
{
migrationBuilder.AddColumn<string>(
name: "NormalizedLocalizedName",
table: "Series",
type: "TEXT",
nullable: true);
}
protected override void Down(MigrationBuilder migrationBuilder)
{
migrationBuilder.DropColumn(
name: "NormalizedLocalizedName",
table: "Series");
}
}
}

View File

@ -818,6 +818,9 @@ namespace API.Data.Migrations
b.Property<bool>("NameLocked")
.HasColumnType("INTEGER");
b.Property<string>("NormalizedLocalizedName")
.HasColumnType("TEXT");
b.Property<string>("NormalizedName")
.HasColumnType("TEXT");

View File

@ -121,6 +121,7 @@ public interface ISeriesRepository
Task<int> GetSeriesIdByFolder(string folder);
Task<Series> GetSeriesByFolderPath(string folder);
Task<Series> GetFullSeriesByName(string series, int libraryId);
Task<Series> GetFullSeriesByAnyName(string seriesName, string localizedName, int libraryId);
Task RemoveSeriesNotInList(IList<ParsedSeries> seenSeries, int libraryId);
Task<IDictionary<string, IList<SeriesModified>>> GetFolderPathMap(int libraryId);
}
@ -1168,10 +1169,65 @@ public class SeriesRepository : ISeriesRepository
return await _context.Series.SingleOrDefaultAsync(s => s.FolderPath.Equals(normalized));
}
/// <summary>
/// Finds a series by series name for a given library.
/// </summary>
/// <remarks>This pulls everything with the Series, so should be used only when needing tracking on all related tables</remarks>
/// <param name="series"></param>
/// <param name="libraryId"></param>
/// <returns></returns>
public Task<Series> GetFullSeriesByName(string series, int libraryId)
{
var localizedSeries = Parser.Parser.Normalize(series);
return _context.Series
.Where(s => s.NormalizedName.Equals(Parser.Parser.Normalize(series)) && s.LibraryId == libraryId)
.Where(s => (s.NormalizedName.Equals(localizedSeries)
|| s.LocalizedName.Equals(series)) && s.LibraryId == libraryId)
.Include(s => s.Metadata)
.ThenInclude(m => m.People)
.Include(s => s.Metadata)
.ThenInclude(m => m.Genres)
.Include(s => s.Library)
.Include(s => s.Volumes)
.ThenInclude(v => v.Chapters)
.ThenInclude(cm => cm.People)
.Include(s => s.Volumes)
.ThenInclude(v => v.Chapters)
.ThenInclude(c => c.Tags)
.Include(s => s.Volumes)
.ThenInclude(v => v.Chapters)
.ThenInclude(c => c.Genres)
.Include(s => s.Metadata)
.ThenInclude(m => m.Tags)
.Include(s => s.Volumes)
.ThenInclude(v => v.Chapters)
.ThenInclude(c => c.Files)
.AsSplitQuery()
.SingleOrDefaultAsync();
}
/// <summary>
/// Finds a series by series name or localized name for a given library.
/// </summary>
/// <remarks>This pulls everything with the Series, so should be used only when needing tracking on all related tables</remarks>
/// <param name="seriesName"></param>
/// <param name="localizedName"></param>
/// <param name="libraryId"></param>
/// <returns></returns>
public Task<Series> GetFullSeriesByAnyName(string seriesName, string localizedName, int libraryId)
{
var localizedSeries = Parser.Parser.Normalize(seriesName);
var normalizedLocalized = Parser.Parser.Normalize(localizedName);
return _context.Series
.Where(s => s.NormalizedName.Equals(localizedSeries)
|| s.NormalizedName.Equals(normalizedLocalized)
|| s.NormalizedLocalizedName.Equals(localizedSeries)
|| s.NormalizedLocalizedName.Equals(normalizedLocalized))
.Where(s => s.LibraryId == libraryId)
.Include(s => s.Metadata)
.ThenInclude(m => m.People)
.Include(s => s.Metadata)

View File

@ -18,6 +18,10 @@ public class Series : IEntityDate, IHasReadTimeEstimate
/// </summary>
public string NormalizedName { get; set; }
/// <summary>
/// Used internally for localized name matching. <see cref="Parser.Parser.Normalize"/>
/// </summary>
public string NormalizedLocalizedName { get; set; }
/// <summary>
/// The name used to sort the Series. By default, will be the same as Name.
/// </summary>
public string SortName { get; set; }

View File

@ -1033,9 +1033,15 @@ namespace API.Parser
return IsImage(filename) && CoverImageRegex.IsMatch(filename);
}
/// <summary>
/// Validates that a Path doesn't start with certain blacklisted folders, like __MACOSX, @Recently-Snapshot, etc and that if a full path, the filename
/// doesn't start with ._, which is a metadata file on MACOSX.
/// </summary>
/// <param name="path"></param>
/// <returns></returns>
public static bool HasBlacklistedFolderInPath(string path)
{
return path.Contains("__MACOSX") || path.StartsWith("@Recently-Snapshot") || path.StartsWith("@recycle") || path.StartsWith("._") || path.Contains(".qpkg");
return path.Contains("__MACOSX") || path.StartsWith("@Recently-Snapshot") || path.StartsWith("@recycle") || path.StartsWith("._") || Path.GetFileName(path).StartsWith("._") || path.Contains(".qpkg");
}

View File

@ -641,17 +641,11 @@ namespace API.Services
/// <summary>
/// Recursively scans a folder and returns the max last write time on any folders
/// </summary>
/// <remarks>This is required vs just an attribute check as NTFS does not bubble up certain events from nested folders.
/// This will also ignore recursive nature if the device is not NTFS</remarks>
/// <param name="folderPath"></param>
/// <returns>Max Last Write Time</returns>
public DateTime GetLastWriteTime(string folderPath)
{
if (!FileSystem.Directory.Exists(folderPath)) throw new IOException($"{folderPath} does not exist");
if (new DriveInfo(FileSystem.Path.GetPathRoot(folderPath)).DriveFormat != "NTFS")
{
return FileSystem.Directory.GetLastWriteTime(folderPath);
}
var directories = GetAllDirectories(folderPath).ToList();
if (directories.Count == 0) return FileSystem.Directory.GetLastWriteTime(folderPath);

View File

@ -8,7 +8,6 @@ using API.Data;
using API.DTOs;
using API.DTOs.CollectionTags;
using API.DTOs.Metadata;
using API.DTOs.Reader;
using API.DTOs.SeriesDetail;
using API.Entities;
using API.Entities.Enums;

View File

@ -79,29 +79,47 @@ public class ProcessSeries : IProcessSeries
{
if (!parsedInfos.Any()) return;
var seriesAdded = false;
var scanWatch = Stopwatch.StartNew();
var seriesName = parsedInfos.First().Series;
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, MessageFactory.LibraryScanProgressEvent(library.Name, ProgressEventType.Updated, seriesName));
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.LibraryScanProgressEvent(library.Name, ProgressEventType.Updated, seriesName));
_logger.LogInformation("[ScannerService] Beginning series update on {SeriesName}", seriesName);
// Check if there is a Series
var seriesAdded = false;
var series = await _unitOfWork.SeriesRepository.GetFullSeriesByName(parsedInfos.First().Series, library.Id);
var firstInfo = parsedInfos.First();
Series series = null;
try
{
series =
await _unitOfWork.SeriesRepository.GetFullSeriesByAnyName(firstInfo.Series, firstInfo.LocalizedSeries,
library.Id);
}
catch (Exception ex)
{
_logger.LogError(ex, "There was an exception finding existing series for {SeriesName} with Localized name of {LocalizedName}. This indicates you have duplicate series with same name or localized name in the library. Correct this and rescan", firstInfo.Series, firstInfo.LocalizedSeries);
return;
}
if (series == null)
{
seriesAdded = true;
series = DbFactory.Series(parsedInfos.First().Series);
series = DbFactory.Series(firstInfo.Series);
series.LocalizedName = firstInfo.LocalizedSeries;
}
if (series.LibraryId == 0) series.LibraryId = library.Id;
try
{
_logger.LogInformation("[ScannerService] Processing series {SeriesName}", series.OriginalName);
UpdateVolumes(series, parsedInfos);
series.Pages = series.Volumes.Sum(v => v.Pages);
series.NormalizedName = Parser.Parser.Normalize(series.Name);
series.NormalizedLocalizedName = Parser.Parser.Normalize(series.LocalizedName);
series.OriginalName ??= parsedInfos[0].Series;
if (series.Format == MangaFormat.Unknown)
{

View File

@ -198,6 +198,8 @@ public class ScannerService : IScannerService
await Task.WhenAll(processTasks);
// At this point, we've already inserted the series into the DB OR we haven't and seenSeries has our series
// We now need to do any leftover work, like removing
// We need to handle if parsedSeries is empty but seenSeries has our series
if (seenSeries.Any(s => s.NormalizedName.Equals(series.NormalizedName)) && parsedSeries.Keys.Count == 0)
{
@ -208,65 +210,56 @@ public class ScannerService : IScannerService
"All folders have not been changed since last scan. Scan will be aborted."));
_processSeries.EnqueuePostSeriesProcessTasks(series.LibraryId, seriesId, false);
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, MessageFactory.LibraryScanProgressEvent(library.Name, ProgressEventType.Ended, series.Name));
return;
}
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, MessageFactory.LibraryScanProgressEvent(library.Name, ProgressEventType.Ended, series.Name));
// Remove any parsedSeries keys that don't belong to our series. This can occur when users store 2 series in the same folder
RemoveParsedInfosNotForSeries(parsedSeries, series);
// If nothing was found, first validate any of the files still exist. If they don't then we have a deletion and can skip the rest of the logic flow
if (parsedSeries.Count == 0)
{
var anyFilesExist =
(await _unitOfWork.SeriesRepository.GetFilesForSeries(series.Id)).Any(m => File.Exists(m.FilePath));
// If nothing was found, first validate any of the files still exist. If they don't then we have a deletion and can skip the rest of the logic flow
if (parsedSeries.Count == 0)
{
var anyFilesExist =
(await _unitOfWork.SeriesRepository.GetFilesForSeries(series.Id)).Any(m => File.Exists(m.FilePath));
if (!anyFilesExist)
{
try
{
_unitOfWork.SeriesRepository.Remove(series);
await CommitAndSend(1, sw, scanElapsedTime, series);
}
catch (Exception ex)
{
_logger.LogCritical(ex, "There was an error during ScanSeries to delete the series as no files could be found. Aborting scan");
await _unitOfWork.RollbackAsync();
return;
}
}
else
{
// I think we should just fail and tell user to fix their setup. This is extremely expensive for an edge case
_logger.LogCritical("We weren't able to find any files in the series scan, but there should be. Please correct your naming convention or put Series in a dedicated folder. Aborting scan");
await _eventHub.SendMessageAsync(MessageFactory.Error,
MessageFactory.ErrorEvent($"Error scanning {series.Name}", "We weren't able to find any files in the series scan, but there should be. Please correct your naming convention or put Series in a dedicated folder. Aborting scan"));
await _unitOfWork.RollbackAsync();
return;
}
// At this point, parsedSeries will have at least one key and we can perform the update. If it still doesn't, just return and don't do anything
if (parsedSeries.Count == 0) return;
}
if (!anyFilesExist)
{
try
{
_unitOfWork.SeriesRepository.Remove(series);
await CommitAndSend(1, sw, scanElapsedTime, series);
await _eventHub.SendMessageAsync(MessageFactory.SeriesRemoved,
MessageFactory.SeriesRemovedEvent(seriesId, string.Empty, series.LibraryId), false);
}
catch (Exception ex)
{
_logger.LogCritical(ex, "There was an error during ScanSeries to delete the series as no files could be found. Aborting scan");
await _unitOfWork.RollbackAsync();
return;
}
}
else
{
// I think we should just fail and tell user to fix their setup. This is extremely expensive for an edge case
_logger.LogCritical("We weren't able to find any files in the series scan, but there should be. Please correct your naming convention or put Series in a dedicated folder. Aborting scan");
await _eventHub.SendMessageAsync(MessageFactory.Error,
MessageFactory.ErrorEvent($"Error scanning {series.Name}", "We weren't able to find any files in the series scan, but there should be. Please correct your naming convention or put Series in a dedicated folder. Aborting scan"));
await _unitOfWork.RollbackAsync();
return;
}
// At this point, parsedSeries will have at least one key and we can perform the update. If it still doesn't, just return and don't do anything
if (parsedSeries.Count == 0) return;
}
try
{
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, MessageFactory.LibraryScanProgressEvent(library.Name, ProgressEventType.Started, series.Name));
var parsedInfos = ParseScannedFiles.GetInfosByName(parsedSeries, series);
await _processSeries.ProcessSeriesAsync(parsedInfos, library);
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, MessageFactory.LibraryScanProgressEvent(library.Name, ProgressEventType.Ended, series.Name));
await CommitAndSend(1, sw, scanElapsedTime, series);
}
catch (Exception ex)
{
_logger.LogCritical(ex, "There was an error during ScanSeries to update the series");
await _unitOfWork.RollbackAsync();
}
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, MessageFactory.LibraryScanProgressEvent(library.Name, ProgressEventType.Ended, series.Name));
// Tell UI that this series is done
await _eventHub.SendMessageAsync(MessageFactory.ScanSeries,
MessageFactory.ScanSeriesEvent(library.Id, seriesId, series.Name));
await _metadataService.RemoveAbandonedMetadataKeys();
BackgroundJob.Enqueue(() => _cacheService.CleanupChapters(chapterIds));
BackgroundJob.Enqueue(() => _directoryService.ClearDirectory(_directoryService.TempDirectory));

View File

@ -176,6 +176,7 @@ namespace API
var logger = serviceProvider.GetRequiredService<ILogger<Program>>();
var userManager = serviceProvider.GetRequiredService<UserManager<AppUser>>();
var themeService = serviceProvider.GetRequiredService<IThemeService>();
var dataContext = serviceProvider.GetRequiredService<DataContext>();
await MigrateBookmarks.Migrate(directoryService, unitOfWork,
logger, cacheService);
@ -185,6 +186,9 @@ namespace API
await MigrateRemoveExtraThemes.Migrate(unitOfWork, themeService);
// Only needed for v0.5.5.x and v0.5.6
await MigrateNormalizedLocalizedName.Migrate(unitOfWork, dataContext, logger);
// Update the version in the DB after all migrations are run
var installVersion = await unitOfWork.SettingsRepository.GetSettingAsync(ServerSettingKey.InstallVersion);
installVersion.Value = BuildInfo.Version.ToString();

View File

@ -228,8 +228,7 @@ export class AccountService implements OnDestroy {
private refreshToken() {
if (this.currentUser === null || this.currentUser === undefined) return of();
//console.log('refreshing token and updating user account');
return this.httpClient.post<{token: string, refreshToken: string}>(this.baseUrl + 'account/refresh-token', {token: this.currentUser.token, refreshToken: this.currentUser.refreshToken}).pipe(map(user => {
if (this.currentUser) {
this.currentUser.token = user.token;

View File

@ -165,7 +165,7 @@
<li class="list-group-item dark-menu-item" *ngIf="onlineUsers.length > 1">
<div>{{onlineUsers.length}} Users online</div>
</li>
<li class="list-group-item dark-menu-item" *ngIf="activeEvents < 1 && onlineUsers.length <= 1">Not much going on here</li>
<li class="list-group-item dark-menu-item" *ngIf="activeEvents === 0 && onlineUsers.length <= 1">Not much going on here</li>
<li class="list-group-item dark-menu-item" *ngIf="debugMode">Active Events: {{activeEvents}}</li>
</ng-container>
</ul>