Add People Dedup and multiple progress fixes (#14848)

This commit is contained in:
JPVenson 2025-09-25 00:20:30 +03:00 committed by GitHub
parent 897975fc57
commit 5a6d9180fe
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 257 additions and 112 deletions

View File

@ -50,6 +50,8 @@ public class CleanDatabaseScheduledTask : ILibraryPostScanTask
_logger.LogDebug("Cleaning {Number} items with dead parents", numItems);
IProgress<double> subProgress = new Progress<double>((val) => progress.Report(val / 2));
foreach (var itemId in itemIds)
{
cancellationToken.ThrowIfCancellationRequested();
@ -95,9 +97,10 @@ public class CleanDatabaseScheduledTask : ILibraryPostScanTask
numComplete++;
double percent = numComplete;
percent /= numItems;
progress.Report(percent * 100);
subProgress.Report(percent * 100);
}
subProgress = new Progress<double>((val) => progress.Report((val / 2) + 50));
var context = await _dbProvider.CreateDbContextAsync(cancellationToken).ConfigureAwait(false);
await using (context.ConfigureAwait(false))
{
@ -105,7 +108,9 @@ public class CleanDatabaseScheduledTask : ILibraryPostScanTask
await using (transaction.ConfigureAwait(false))
{
await context.ItemValues.Where(e => e.BaseItemsMap!.Count == 0).ExecuteDeleteAsync(cancellationToken).ConfigureAwait(false);
subProgress.Report(50);
await transaction.CommitAsync(cancellationToken).ConfigureAwait(false);
subProgress.Report(100);
}
}

View File

@ -1051,30 +1051,15 @@ namespace Emby.Server.Implementations.Dto
// Include artists that are not in the database yet, e.g., just added via metadata editor
// var foundArtists = artistItems.Items.Select(i => i.Item1.Name).ToList();
dto.ArtistItems = hasArtist.Artists
// .Except(foundArtists, new DistinctNameComparer())
dto.ArtistItems = _libraryManager.GetArtists([.. hasArtist.Artists.Where(e => !string.IsNullOrWhiteSpace(e))])
.Where(e => e.Value.Length > 0)
.Select(i =>
{
// This should not be necessary but we're seeing some cases of it
if (string.IsNullOrEmpty(i))
return new NameGuidPair
{
return null;
}
var artist = _libraryManager.GetArtist(i, new DtoOptions(false)
{
EnableImages = false
});
if (artist is not null)
{
return new NameGuidPair
{
Name = artist.Name,
Id = artist.Id
};
}
return null;
Name = i.Key,
Id = i.Value.First().Id
};
}).Where(i => i is not null).ToArray();
}

View File

@ -327,6 +327,45 @@ namespace Emby.Server.Implementations.Library
DeleteItem(item, options, parent, notifyParentItem);
}
public void DeleteItemsUnsafeFast(IEnumerable<BaseItem> items)
{
var pathMaps = items.Select(e => (Item: e, InternalPath: GetInternalMetadataPaths(e), DeletePaths: e.GetDeletePaths())).ToArray();
foreach (var (item, internalPaths, pathsToDelete) in pathMaps)
{
foreach (var metadataPath in internalPaths)
{
if (!Directory.Exists(metadataPath))
{
continue;
}
_logger.LogDebug(
"Deleting metadata path, Type: {Type}, Name: {Name}, Path: {Path}, Id: {Id}",
item.GetType().Name,
item.Name ?? "Unknown name",
metadataPath,
item.Id);
try
{
Directory.Delete(metadataPath, true);
}
catch (Exception ex)
{
_logger.LogError(ex, "Error deleting {MetadataPath}", metadataPath);
}
}
foreach (var fileSystemInfo in pathsToDelete)
{
DeleteItemPath(item, false, fileSystemInfo);
}
}
_itemRepository.DeleteItem([.. pathMaps.Select(f => f.Item.Id)]);
}
public void DeleteItem(BaseItem item, DeleteOptions options, BaseItem parent, bool notifyParentItem)
{
ArgumentNullException.ThrowIfNull(item);
@ -403,59 +442,7 @@ namespace Emby.Server.Implementations.Library
foreach (var fileSystemInfo in item.GetDeletePaths())
{
if (Directory.Exists(fileSystemInfo.FullName) || File.Exists(fileSystemInfo.FullName))
{
try
{
_logger.LogInformation(
"Deleting item path, Type: {Type}, Name: {Name}, Path: {Path}, Id: {Id}",
item.GetType().Name,
item.Name ?? "Unknown name",
fileSystemInfo.FullName,
item.Id);
if (fileSystemInfo.IsDirectory)
{
Directory.Delete(fileSystemInfo.FullName, true);
}
else
{
File.Delete(fileSystemInfo.FullName);
}
}
catch (DirectoryNotFoundException)
{
_logger.LogInformation(
"Directory not found, only removing from database, Type: {Type}, Name: {Name}, Path: {Path}, Id: {Id}",
item.GetType().Name,
item.Name ?? "Unknown name",
fileSystemInfo.FullName,
item.Id);
}
catch (FileNotFoundException)
{
_logger.LogInformation(
"File not found, only removing from database, Type: {Type}, Name: {Name}, Path: {Path}, Id: {Id}",
item.GetType().Name,
item.Name ?? "Unknown name",
fileSystemInfo.FullName,
item.Id);
}
catch (IOException)
{
if (isRequiredForDelete)
{
throw;
}
}
catch (UnauthorizedAccessException)
{
if (isRequiredForDelete)
{
throw;
}
}
}
DeleteItemPath(item, isRequiredForDelete, fileSystemInfo);
isRequiredForDelete = false;
}
@ -463,17 +450,73 @@ namespace Emby.Server.Implementations.Library
item.SetParent(null);
_itemRepository.DeleteItem(item.Id);
_itemRepository.DeleteItem([item.Id, .. children.Select(f => f.Id)]);
_cache.TryRemove(item.Id, out _);
foreach (var child in children)
{
_itemRepository.DeleteItem(child.Id);
_cache.TryRemove(child.Id, out _);
}
ReportItemRemoved(item, parent);
}
private void DeleteItemPath(BaseItem item, bool isRequiredForDelete, FileSystemMetadata fileSystemInfo)
{
if (Directory.Exists(fileSystemInfo.FullName) || File.Exists(fileSystemInfo.FullName))
{
try
{
_logger.LogInformation(
"Deleting item path, Type: {Type}, Name: {Name}, Path: {Path}, Id: {Id}",
item.GetType().Name,
item.Name ?? "Unknown name",
fileSystemInfo.FullName,
item.Id);
if (fileSystemInfo.IsDirectory)
{
Directory.Delete(fileSystemInfo.FullName, true);
}
else
{
File.Delete(fileSystemInfo.FullName);
}
}
catch (DirectoryNotFoundException)
{
_logger.LogInformation(
"Directory not found, only removing from database, Type: {Type}, Name: {Name}, Path: {Path}, Id: {Id}",
item.GetType().Name,
item.Name ?? "Unknown name",
fileSystemInfo.FullName,
item.Id);
}
catch (FileNotFoundException)
{
_logger.LogInformation(
"File not found, only removing from database, Type: {Type}, Name: {Name}, Path: {Path}, Id: {Id}",
item.GetType().Name,
item.Name ?? "Unknown name",
fileSystemInfo.FullName,
item.Id);
}
catch (IOException)
{
if (isRequiredForDelete)
{
throw;
}
}
catch (UnauthorizedAccessException)
{
if (isRequiredForDelete)
{
throw;
}
}
}
}
private bool IsInternalItem(BaseItem item)
{
if (!item.IsFileProtocol)
@ -990,6 +1033,11 @@ namespace Emby.Server.Implementations.Library
return GetArtist(name, new DtoOptions(true));
}
public IReadOnlyDictionary<string, MusicArtist[]> GetArtists(IReadOnlyList<string> names)
{
return _itemRepository.FindArtists(names);
}
public MusicArtist GetArtist(string name, DtoOptions options)
{
return CreateItemByName<MusicArtist>(MusicArtist.GetPath, name, options);
@ -1115,18 +1163,24 @@ namespace Emby.Server.Implementations.Library
cancellationToken: cancellationToken).ConfigureAwait(false);
// Quickly scan CollectionFolders for changes
var toDelete = new List<Guid>();
foreach (var child in rootFolder.Children!.OfType<Folder>())
{
// If the user has somehow deleted the collection directory, remove the metadata from the database.
if (child is CollectionFolder collectionFolder && !Directory.Exists(collectionFolder.Path))
{
_itemRepository.DeleteItem(collectionFolder.Id);
toDelete.Add(collectionFolder.Id);
}
else
{
await child.RefreshMetadata(cancellationToken).ConfigureAwait(false);
}
}
if (toDelete.Count > 0)
{
_itemRepository.DeleteItem(toDelete.ToArray());
}
}
private async Task PerformLibraryValidation(IProgress<double> progress, CancellationToken cancellationToken)

View File

@ -1,5 +1,5 @@
using System;
using System.Globalization;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Jellyfin.Data.Enums;
@ -55,6 +55,8 @@ public class PeopleValidator
var numPeople = people.Count;
IProgress<double> subProgress = new Progress<double>((val) => progress.Report(val / 2));
_logger.LogDebug("Will refresh {Amount} people", numPeople);
foreach (var person in people)
@ -92,7 +94,7 @@ public class PeopleValidator
double percent = numComplete;
percent /= numPeople;
progress.Report(100 * percent);
subProgress.Report(100 * percent);
}
var deadEntities = _libraryManager.GetItemList(new InternalItemsQuery
@ -102,17 +104,13 @@ public class PeopleValidator
IsLocked = false
});
foreach (var item in deadEntities)
{
_logger.LogInformation("Deleting dead {ItemType} {ItemId} {ItemName}", item.GetType().Name, item.Id.ToString("N", CultureInfo.InvariantCulture), item.Name);
subProgress = new Progress<double>((val) => progress.Report((val / 2) + 50));
_libraryManager.DeleteItem(
item,
new DeleteOptions
{
DeleteFileLocation = false
},
false);
var i = 0;
foreach (var item in deadEntities.Chunk(500))
{
_libraryManager.DeleteItemsUnsafeFast(item);
subProgress.Report(100f / deadEntities.Count * (i++ * 100));
}
progress.Report(100);

View File

@ -1,10 +1,14 @@
using System;
using System.Buffers;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Jellyfin.Database.Implementations;
using MediaBrowser.Controller.Library;
using MediaBrowser.Model.Globalization;
using MediaBrowser.Model.Tasks;
using Microsoft.EntityFrameworkCore;
namespace Emby.Server.Implementations.ScheduledTasks.Tasks;
@ -15,16 +19,19 @@ public class PeopleValidationTask : IScheduledTask, IConfigurableScheduledTask
{
private readonly ILibraryManager _libraryManager;
private readonly ILocalizationManager _localization;
private readonly IDbContextFactory<JellyfinDbContext> _dbContextFactory;
/// <summary>
/// Initializes a new instance of the <see cref="PeopleValidationTask" /> class.
/// </summary>
/// <param name="libraryManager">Instance of the <see cref="ILibraryManager"/> interface.</param>
/// <param name="localization">Instance of the <see cref="ILocalizationManager"/> interface.</param>
public PeopleValidationTask(ILibraryManager libraryManager, ILocalizationManager localization)
/// <param name="dbContextFactory">Instance of the <see cref="IDbContextFactory{TContext}"/> interface.</param>
public PeopleValidationTask(ILibraryManager libraryManager, ILocalizationManager localization, IDbContextFactory<JellyfinDbContext> dbContextFactory)
{
_libraryManager = libraryManager;
_localization = localization;
_dbContextFactory = dbContextFactory;
}
/// <inheritdoc />
@ -62,8 +69,61 @@ public class PeopleValidationTask : IScheduledTask, IConfigurableScheduledTask
}
/// <inheritdoc />
public Task ExecuteAsync(IProgress<double> progress, CancellationToken cancellationToken)
public async Task ExecuteAsync(IProgress<double> progress, CancellationToken cancellationToken)
{
return _libraryManager.ValidatePeopleAsync(progress, cancellationToken);
IProgress<double> subProgress = new Progress<double>((val) => progress.Report(val / 2));
await _libraryManager.ValidatePeopleAsync(subProgress, cancellationToken).ConfigureAwait(false);
subProgress = new Progress<double>((val) => progress.Report((val / 2) + 50));
var context = await _dbContextFactory.CreateDbContextAsync(cancellationToken).ConfigureAwait(false);
await using (context.ConfigureAwait(false))
{
var dupQuery = context.Peoples
.GroupBy(e => new { e.Name, e.PersonType })
.Where(e => e.Count() > 1)
.Select(e => e.Select(f => f.Id).ToArray());
var total = dupQuery.Count();
const int PartitionSize = 100;
var iterator = 0;
int itemCounter;
var buffer = ArrayPool<Guid[]>.Shared.Rent(PartitionSize)!;
try
{
do
{
itemCounter = 0;
await foreach (var item in dupQuery
.Take(PartitionSize)
.AsAsyncEnumerable()
.WithCancellation(cancellationToken)
.ConfigureAwait(false))
{
buffer[itemCounter++] = item;
}
for (int i = 0; i < itemCounter; i++)
{
var item = buffer[i];
var reference = item[0];
var dups = item[1..];
await context.PeopleBaseItemMap.WhereOneOrMany(dups, e => e.PeopleId)
.ExecuteUpdateAsync(e => e.SetProperty(f => f.PeopleId, reference), cancellationToken)
.ConfigureAwait(false);
await context.Peoples.Where(e => dups.Contains(e.Id)).ExecuteDeleteAsync(cancellationToken).ConfigureAwait(false);
subProgress.Report(100f / total * ((iterator * PartitionSize) + i));
}
iterator++;
} while (itemCounter == PartitionSize && !cancellationToken.IsCancellationRequested);
}
finally
{
ArrayPool<Guid[]>.Shared.Return(buffer);
}
subProgress.Report(100);
}
}
}

View File

@ -99,11 +99,11 @@ public sealed class BaseItemRepository
}
/// <inheritdoc />
public void DeleteItem(Guid id)
public void DeleteItem(params IReadOnlyList<Guid> ids)
{
if (id.IsEmpty() || id.Equals(PlaceholderId))
if (ids is null || ids.Count == 0 || ids.Any(f => f.Equals(PlaceholderId)))
{
throw new ArgumentException("Guid can't be empty or the placeholder id.", nameof(id));
throw new ArgumentException("Guid can't be empty or the placeholder id.", nameof(ids));
}
using var context = _dbProvider.CreateDbContext();
@ -111,7 +111,7 @@ public sealed class BaseItemRepository
var date = (DateTime?)DateTime.UtcNow;
var relatedItems = TraverseHirachyDown(id, context).ToArray();
var relatedItems = ids.SelectMany(f => TraverseHirachyDown(f, context)).ToArray();
// Remove any UserData entries for the placeholder item that would conflict with the UserData
// being detached from the item being deleted. This is necessary because, during an update,
@ -2538,4 +2538,16 @@ public sealed class BaseItemRepository
return folderList;
}
/// <inheritdoc/>
public IReadOnlyDictionary<string, MusicArtist[]> FindArtists(IReadOnlyList<string> artistNames)
{
using var dbContext = _dbProvider.CreateDbContext();
var artists = dbContext.BaseItems.Where(e => e.Type == _itemTypeLookup.BaseItemKindNames[BaseItemKind.MusicArtist]!)
.Where(e => artistNames.Contains(e.Name))
.ToArray();
return artists.GroupBy(e => e.Name).ToDictionary(e => e.Key!, e => e.Select(f => DeserializeBaseItem(f)).Cast<MusicArtist>().ToArray());
}
}

View File

@ -74,20 +74,34 @@ public class PeopleRepository(IDbContextFactory<JellyfinDbContext> dbProvider, I
/// <inheritdoc />
public void UpdatePeople(Guid itemId, IReadOnlyList<PersonInfo> people)
{
// TODO: yes for __SOME__ reason there can be duplicates.
people = people.DistinctBy(e => e.Id).ToArray();
var personids = people.Select(f => f.Id);
// multiple metadata providers can provide the _same_ person
people = people.DistinctBy(e => e.Name + "-" + e.Type).ToArray();
var personKeys = people.Select(e => e.Name + "-" + e.Type).ToArray();
using var context = _dbProvider.CreateDbContext();
using var transaction = context.Database.BeginTransaction();
var existingPersons = context.Peoples.Where(p => personids.Contains(p.Id)).Select(f => f.Id).ToArray();
context.Peoples.AddRange(people.Where(e => !existingPersons.Contains(e.Id)).Select(Map));
var existingPersons = context.Peoples.Select(e => new
{
item = e,
SelectionKey = e.Name + "-" + e.PersonType
})
.Where(p => personKeys.Contains(p.SelectionKey))
.Select(f => f.item)
.ToArray();
var toAdd = people
.Where(e => !existingPersons.Any(f => f.Name == e.Name && f.PersonType == e.Type.ToString()))
.Select(Map);
context.Peoples.AddRange(toAdd);
context.SaveChanges();
var maps = context.PeopleBaseItemMap.Where(e => e.ItemId == itemId).ToList();
var personsEntities = toAdd.Concat(existingPersons).ToArray();
var existingMaps = context.PeopleBaseItemMap.Include(e => e.People).Where(e => e.ItemId == itemId).ToList();
foreach (var person in people)
{
var existingMap = maps.FirstOrDefault(e => e.PeopleId == person.Id);
var entityPerson = personsEntities.First(e => e.Name == person.Name && e.PersonType == person.Type.ToString());
var existingMap = existingMaps.FirstOrDefault(e => e.People.Name == person.Name && e.Role == person.Role);
if (existingMap is null)
{
var sortOrder = (person.SortOrder ?? context.PeopleBaseItemMap.Where(e => e.ItemId == itemId).Max(e => e.SortOrder) ?? 0) + 1;
@ -96,7 +110,7 @@ public class PeopleRepository(IDbContextFactory<JellyfinDbContext> dbProvider, I
Item = null!,
ItemId = itemId,
People = null!,
PeopleId = person.Id,
PeopleId = entityPerson.Id,
ListOrder = sortOrder,
SortOrder = sortOrder,
Role = person.Role
@ -105,11 +119,11 @@ public class PeopleRepository(IDbContextFactory<JellyfinDbContext> dbProvider, I
else
{
// person mapping already exists so remove from list
maps.Remove(existingMap);
existingMaps.Remove(existingMap);
}
}
context.PeopleBaseItemMap.RemoveRange(maps);
context.PeopleBaseItemMap.RemoveRange(existingMaps);
context.SaveChanges();
transaction.Commit();

View File

@ -337,9 +337,9 @@ internal class MigrateLibraryDb : IDatabaseMigrationRoutine
}
var entity = GetPerson(reader);
if (!peopleCache.TryGetValue(entity.Name, out var personCache))
if (!peopleCache.TryGetValue(entity.Name + "|" + entity.PersonType, out var personCache))
{
peopleCache[entity.Name] = personCache = (entity, []);
peopleCache[entity.Name + "|" + entity.PersonType] = personCache = (entity, []);
}
if (reader.TryGetString(2, out var role))

View File

@ -336,6 +336,13 @@ namespace MediaBrowser.Controller.Library
/// <param name="options">Options to use for deletion.</param>
void DeleteItem(BaseItem item, DeleteOptions options);
/// <summary>
/// Deletes items that are not having any children like Actors.
/// </summary>
/// <param name="items">Items to delete.</param>
/// <remarks>In comparison to <see cref="DeleteItem(BaseItem, DeleteOptions, BaseItem, bool)"/> this method skips a lot of steps assuming there are no children to recusively delete nor does it define the special handling for channels and alike.</remarks>
public void DeleteItemsUnsafeFast(IEnumerable<BaseItem> items);
/// <summary>
/// Deletes the item.
/// </summary>
@ -624,6 +631,8 @@ namespace MediaBrowser.Controller.Library
QueryResult<(BaseItem Item, ItemCounts ItemCounts)> GetArtists(InternalItemsQuery query);
IReadOnlyDictionary<string, MusicArtist[]> GetArtists(IReadOnlyList<string> names);
QueryResult<(BaseItem Item, ItemCounts ItemCounts)> GetAlbumArtists(InternalItemsQuery query);
QueryResult<(BaseItem Item, ItemCounts ItemCounts)> GetAllArtists(InternalItemsQuery query);

View File

@ -9,6 +9,7 @@ using System.Threading.Tasks;
using Jellyfin.Data.Enums;
using Jellyfin.Database.Implementations.Entities;
using MediaBrowser.Controller.Entities;
using MediaBrowser.Controller.Entities.Audio;
using MediaBrowser.Model.Dto;
using MediaBrowser.Model.Querying;
@ -22,8 +23,8 @@ public interface IItemRepository
/// <summary>
/// Deletes the item.
/// </summary>
/// <param name="id">The identifier.</param>
void DeleteItem(Guid id);
/// <param name="ids">The identifier to delete.</param>
void DeleteItem(params IReadOnlyList<Guid> ids);
/// <summary>
/// Saves the items.
@ -122,4 +123,11 @@ public interface IItemRepository
/// <param name="recursive">Whever the check should be done recursive. Warning expensive operation.</param>
/// <returns>A value indicating whever all children has been played.</returns>
bool GetIsPlayed(User user, Guid id, bool recursive);
/// <summary>
/// Gets all artist matches from the db.
/// </summary>
/// <param name="artistNames">The names of the artists.</param>
/// <returns>A map of the artist name and the potential matches.</returns>
IReadOnlyDictionary<string, MusicArtist[]> FindArtists(IReadOnlyList<string> artistNames);
}

View File

@ -43,7 +43,7 @@ public class PragmaConnectionInterceptor : DbConnectionInterceptor
_customPragma = customPragma;
InitialCommand = BuildCommandText();
_logger.LogInformation("SQLITE connection pragma command set to: \r\n {PragmaCommand}", InitialCommand);
_logger.LogInformation("SQLITE connection pragma command set to: \r\n{PragmaCommand}", InitialCommand);
}
private string? InitialCommand { get; set; }