using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Threading.Tasks; using API.Constants; using API.Data; using API.Data.Repositories; using API.Entities; using API.Entities.Enums; using API.Entities.Person; using API.Extensions; using API.SignalR; using EasyCaching.Core; using Flurl; using Flurl.Http; using HtmlAgilityPack; using Kavita.Common; using Microsoft.Extensions.Hosting; using Microsoft.Extensions.Logging; using NetVips; namespace API.Services.Tasks.Metadata; #nullable enable public interface ICoverDbService { Task DownloadFaviconAsync(string url, EncodeFormat encodeFormat); Task DownloadPublisherImageAsync(string publisherName, EncodeFormat encodeFormat); Task DownloadPersonImageAsync(Person person, EncodeFormat encodeFormat); Task DownloadPersonImageAsync(Person person, EncodeFormat encodeFormat, string url); Task SetPersonCoverByUrl(Person person, string url, bool fromBase64 = true, bool checkNoImagePlaceholder = false); Task SetSeriesCoverByUrl(Series series, string url, bool fromBase64 = true, bool chooseBetterImage = false); Task SetChapterCoverByUrl(Chapter chapter, string url, bool fromBase64 = true, bool chooseBetterImage = false); } public class CoverDbService : ICoverDbService { private readonly ILogger _logger; private readonly IDirectoryService _directoryService; private readonly IEasyCachingProviderFactory _cacheFactory; private readonly IHostEnvironment _env; private readonly IImageService _imageService; private readonly IUnitOfWork _unitOfWork; private readonly IEventHub _eventHub; private TimeSpan _cacheTime = TimeSpan.FromDays(10); private const string NewHost = "https://www.kavitareader.com/CoversDB/"; private static readonly string[] ValidIconRelations = { "icon", "apple-touch-icon", "apple-touch-icon-precomposed", "apple-touch-icon icon-precomposed" // ComicVine has it combined }; /// /// A mapping of urls that need to get the icon from another url, due to strangeness (like app.plex.tv loading a black icon) /// private static readonly Dictionary FaviconUrlMapper = new() { ["https://app.plex.tv"] = "https://plex.tv" }; /// /// Cache of the publisher/favicon list /// private static readonly TimeSpan CacheDuration = TimeSpan.FromDays(1); public CoverDbService(ILogger logger, IDirectoryService directoryService, IEasyCachingProviderFactory cacheFactory, IHostEnvironment env, IImageService imageService, IUnitOfWork unitOfWork, IEventHub eventHub) { _logger = logger; _directoryService = directoryService; _cacheFactory = cacheFactory; _env = env; _imageService = imageService; _unitOfWork = unitOfWork; _eventHub = eventHub; } /// /// Downloads the favicon image from a given website URL, optionally falling back to a custom method if standard methods fail. /// /// The full URL of the website to extract the favicon from. /// The desired image encoding format for saving the favicon (e.g., WebP, PNG). /// /// A string representing the filename of the downloaded favicon image, saved to the configured favicon directory. /// /// /// Thrown when favicon retrieval fails or if a previously failed domain is detected in cache. /// /// /// This method first checks for a cached failure to avoid re-requesting bad links. /// It then attempts to parse HTML for `link` tags pointing to `.png` favicons and /// falls back to an internal fallback method if needed. Valid results are saved to disk. /// public async Task DownloadFaviconAsync(string url, EncodeFormat encodeFormat) { // Parse the URL to get the domain (including subdomain) var uri = new Uri(url); var domain = uri.Host.Replace(Environment.NewLine, string.Empty); var baseUrl = uri.Scheme + "://" + uri.Host; var provider = _cacheFactory.GetCachingProvider(EasyCacheProfiles.Favicon); var res = await provider.GetAsync(baseUrl); if (res.HasValue) { var sanitizedBaseUrl = baseUrl.Sanitize(); _logger.LogInformation("Kavita has already tried to fetch from {BaseUrl} and failed. Skipping duplicate check", sanitizedBaseUrl); throw new KavitaException($"Kavita has already tried to fetch from {sanitizedBaseUrl} and failed. Skipping duplicate check"); } await provider.SetAsync(baseUrl, string.Empty, _cacheTime); if (FaviconUrlMapper.TryGetValue(baseUrl, out var value)) { url = value; } var correctSizeLink = string.Empty; try { var htmlContent = url.GetStringAsync().Result; var htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(htmlContent); var pngLinks = htmlDocument.DocumentNode.Descendants("link") .Where(link => ValidIconRelations.Contains(link.GetAttributeValue("rel", string.Empty))) .Select(link => link.GetAttributeValue("href", string.Empty)) .Where(href => href.Split("?")[0].EndsWith(".png", StringComparison.InvariantCultureIgnoreCase)) .ToList(); correctSizeLink = (pngLinks?.Find(pngLink => pngLink.Contains("32")) ?? pngLinks?.FirstOrDefault()); } catch (Exception ex) { _logger.LogError(ex, "Error downloading favicon.png for {Domain}, will try fallback methods", domain); } try { if (string.IsNullOrEmpty(correctSizeLink)) { correctSizeLink = await FallbackToKavitaReaderFavicon(baseUrl); } if (string.IsNullOrEmpty(correctSizeLink)) { throw new KavitaException($"Could not grab favicon from {baseUrl}"); } var finalUrl = correctSizeLink; // If starts with //, it's coming usually from an offsite cdn if (correctSizeLink.StartsWith("//")) { finalUrl = "https:" + correctSizeLink; } else if (!correctSizeLink.StartsWith(uri.Scheme)) { finalUrl = Url.Combine(baseUrl, correctSizeLink); } _logger.LogTrace("Fetching favicon from {Url}", finalUrl); // Download the favicon.ico file using Flurl var faviconStream = await finalUrl .AllowHttpStatus("2xx,304") .GetStreamAsync(); // Create the destination file path using var image = Image.PngloadStream(faviconStream); var filename = ImageService.GetWebLinkFormat(baseUrl, encodeFormat); image.WriteToFile(Path.Combine(_directoryService.FaviconDirectory, filename)); _logger.LogDebug("Favicon for {Domain} downloaded and saved successfully", domain); return filename; } catch (Exception ex) { _logger.LogError(ex, "Error downloading favicon for {Domain}", domain); throw; } } public async Task DownloadPublisherImageAsync(string publisherName, EncodeFormat encodeFormat) { try { // Sanitize user input publisherName = publisherName.Replace(Environment.NewLine, string.Empty).Replace("\r", string.Empty).Replace("\n", string.Empty); var provider = _cacheFactory.GetCachingProvider(EasyCacheProfiles.Publisher); var res = await provider.GetAsync(publisherName); if (res.HasValue) { _logger.LogInformation("Kavita has already tried to fetch Publisher: {PublisherName} and failed. Skipping duplicate check", publisherName); throw new KavitaException($"Kavita has already tried to fetch Publisher: {publisherName} and failed. Skipping duplicate check"); } await provider.SetAsync(publisherName, string.Empty, _cacheTime); var publisherLink = await FallbackToKavitaReaderPublisher(publisherName); if (string.IsNullOrEmpty(publisherLink)) { throw new KavitaException($"Could not grab publisher image for {publisherName}"); } _logger.LogTrace("Fetching publisher image from {Url}", publisherLink.Sanitize()); // Download the publisher file using Flurl var publisherStream = await publisherLink .AllowHttpStatus("2xx,304") .GetStreamAsync(); // Create the destination file path using var image = Image.NewFromStream(publisherStream); var filename = ImageService.GetPublisherFormat(publisherName, encodeFormat); image.WriteToFile(Path.Combine(_directoryService.PublisherDirectory, filename)); _logger.LogDebug("Publisher image for {PublisherName} downloaded and saved successfully", publisherName.Sanitize()); return filename; } catch (Exception ex) { _logger.LogError(ex, "Error downloading image for {PublisherName}", publisherName.Sanitize()); throw; } } /// /// Attempts to download the Person image from CoverDB while matching against metadata within the Person /// /// /// /// Person image (in correct directory) or null if not found/error public async Task DownloadPersonImageAsync(Person person, EncodeFormat encodeFormat) { try { var personImageLink = await GetCoverPersonImagePath(person); if (string.IsNullOrEmpty(personImageLink)) { throw new KavitaException($"Could not grab person image for {person.Name}"); } return await DownloadPersonImageAsync(person, encodeFormat, personImageLink); } catch (Exception ex) { _logger.LogError(ex, "Error downloading image for {PersonName}", person.Name); } return null; } /// /// Attempts to download the Person cover image from a Url /// /// /// /// /// /// /// public async Task DownloadPersonImageAsync(Person person, EncodeFormat encodeFormat, string url) { try { var personImageLink = await GetCoverPersonImagePath(person); if (string.IsNullOrEmpty(personImageLink)) { throw new KavitaException($"Could not grab person image for {person.Name}"); } var filename = await DownloadImageFromUrl(ImageService.GetPersonFormat(person.Id), encodeFormat, personImageLink); _logger.LogDebug("Person image for {PersonName} downloaded and saved successfully", person.Name); return filename; } catch (Exception ex) { _logger.LogError(ex, "Error downloading image for {PersonName}", person.Name); } return null; } private async Task DownloadImageFromUrl(string filenameWithoutExtension, EncodeFormat encodeFormat, string url, string? targetDirectory = null) { // TODO: I need to unit test this to ensure it works when overwriting, etc // Target Directory defaults to CoverImageDirectory, but can be temp for when comparison between images is used targetDirectory ??= _directoryService.CoverImageDirectory; // Create the destination file path var filename = filenameWithoutExtension + encodeFormat.GetExtension(); var targetFile = Path.Combine(targetDirectory, filename); _logger.LogTrace("Fetching person image from {Url}", url.Sanitize()); // Download the file using Flurl var imageStream = await url .AllowHttpStatus("2xx,304") .GetStreamAsync(); using var image = Image.NewFromStream(imageStream); image.WriteToFile(targetFile); return filename; } private async Task GetCoverPersonImagePath(Person person) { var tempFile = Path.Join(_directoryService.LongTermCacheDirectory, "people.yml"); // Check if the file already exists and skip download in Development environment if (File.Exists(tempFile)) { if (_env.IsDevelopment()) { _logger.LogInformation("Using existing people.yml file in Development environment"); } else { // Remove file if not in Development and file is older than 7 days if (File.GetLastWriteTime(tempFile) < DateTime.Now.AddDays(-7)) { File.Delete(tempFile); } } } // Download the file if it doesn't exist or was deleted due to age if (!File.Exists(tempFile)) { var masterPeopleFile = await $"{NewHost}people/people.yml" .DownloadFileAsync(_directoryService.LongTermCacheDirectory); if (!File.Exists(tempFile) || string.IsNullOrEmpty(masterPeopleFile)) { _logger.LogError("Could not download people.yml from Github"); return null; } } var coverDbRepository = new CoverDbRepository(tempFile); var coverAuthor = coverDbRepository.FindBestAuthorMatch(person); if (coverAuthor == null || string.IsNullOrEmpty(coverAuthor.ImagePath)) { throw new KavitaException($"Could not grab person image for {person.Name}"); } return $"{NewHost}{coverAuthor.ImagePath}"; } private async Task FallbackToKavitaReaderFavicon(string baseUrl) { const string urlsFileName = "publishers.txt"; var correctSizeLink = string.Empty; var allOverrides = await GetCachedData(urlsFileName) ?? await $"{NewHost}favicons/{urlsFileName}".GetStringAsync(); // Cache immediately await CacheDataAsync(urlsFileName, allOverrides); if (string.IsNullOrEmpty(allOverrides)) return correctSizeLink; var cleanedBaseUrl = baseUrl.Replace("https://", string.Empty); var externalFile = allOverrides .Split("\n") .FirstOrDefault(url => cleanedBaseUrl.Equals(url.Replace(".png", string.Empty)) || cleanedBaseUrl.Replace("www.", string.Empty).Equals(url.Replace(".png", string.Empty) )); if (string.IsNullOrEmpty(externalFile)) { throw new KavitaException($"Could not grab favicon from {baseUrl.Sanitize()}"); } return $"{NewHost}favicons/{externalFile}"; } private async Task FallbackToKavitaReaderPublisher(string publisherName) { const string publisherFileName = "publishers.txt"; var externalLink = string.Empty; var allOverrides = await GetCachedData(publisherFileName) ?? await $"{NewHost}publishers/{publisherFileName}".GetStringAsync(); // Cache immediately await CacheDataAsync(publisherFileName, allOverrides); if (string.IsNullOrEmpty(allOverrides)) return externalLink; var externalFile = allOverrides .Split("\n") .Select(publisherLine => { var tokens = publisherLine.Split("|"); if (tokens.Length != 2) return null; var aliases = tokens[0]; // Multiple publisher aliases are separated by # if (aliases.Split("#").Any(name => name.ToLowerInvariant().Trim().Equals(publisherName.ToLowerInvariant().Trim()))) { return tokens[1]; } return null; }) .FirstOrDefault(url => !string.IsNullOrEmpty(url)); if (string.IsNullOrEmpty(externalFile)) { throw new KavitaException($"Could not grab publisher image for {publisherName}"); } return $"{NewHost}publishers/{externalLink}"; } private async Task CacheDataAsync(string fileName, string? content) { if (content == null) return; try { var filePath = _directoryService.FileSystem.Path.Join(_directoryService.LongTermCacheDirectory, fileName); await File.WriteAllTextAsync(filePath, content); } catch (Exception ex) { _logger.LogError(ex, "Failed to cache {FileName}", fileName); } } private async Task GetCachedData(string cacheFile) { // Form the full file path: var filePath = _directoryService.FileSystem.Path.Join(_directoryService.LongTermCacheDirectory, cacheFile); if (!File.Exists(filePath)) return null; var fileInfo = new FileInfo(filePath); if (DateTime.UtcNow - fileInfo.LastWriteTimeUtc <= CacheDuration) { return await File.ReadAllTextAsync(filePath); } return null; } /// /// /// /// /// /// /// Will check against all known null image placeholders to avoid writing it public async Task SetPersonCoverByUrl(Person person, string url, bool fromBase64 = true, bool checkNoImagePlaceholder = false) { if (!string.IsNullOrEmpty(url)) { var tempDir = _directoryService.TempDirectory; var format = ImageService.GetPersonFormat(person.Id); var finalFileName = format + ".webp"; var tempFileName = format + "_new"; var tempFilePath = await CreateThumbnail(url, tempFileName, fromBase64, tempDir); if (!string.IsNullOrEmpty(tempFilePath)) { var tempFullPath = Path.Combine(tempDir, tempFilePath); var finalFullPath = Path.Combine(_directoryService.CoverImageDirectory, finalFileName); // Skip setting image if it's similar to a known placeholder if (checkNoImagePlaceholder) { var placeholderPath = Path.Combine(_directoryService.AssetsDirectory, "anilist-no-image-placeholder.jpg"); var similarity = placeholderPath.CalculateSimilarity(tempFullPath); if (similarity >= 0.9f) { _logger.LogInformation("Skipped setting placeholder image for person {PersonId} due to high similarity ({Similarity})", person.Id, similarity); _directoryService.DeleteFiles([tempFullPath]); return; } } try { if (!string.IsNullOrEmpty(person.CoverImage)) { var existingPath = Path.Combine(_directoryService.CoverImageDirectory, person.CoverImage); var betterImage = existingPath.GetBetterImage(tempFullPath)!; var choseNewImage = string.Equals(betterImage, tempFullPath, StringComparison.OrdinalIgnoreCase); if (choseNewImage) { _directoryService.DeleteFiles([existingPath]); _directoryService.CopyFile(tempFullPath, finalFullPath); person.CoverImage = finalFileName; } else { _directoryService.DeleteFiles([tempFullPath]); return; } } else { _directoryService.CopyFile(tempFullPath, finalFullPath); person.CoverImage = finalFileName; } } catch (Exception ex) { _logger.LogError(ex, "Error choosing better image for Person: {PersonId}", person.Id); _directoryService.CopyFile(tempFullPath, finalFullPath); person.CoverImage = finalFileName; } _directoryService.DeleteFiles([tempFullPath]); person.CoverImageLocked = true; _imageService.UpdateColorScape(person); _unitOfWork.PersonRepository.Update(person); } } else { person.CoverImage = string.Empty; person.CoverImageLocked = false; _imageService.UpdateColorScape(person); _unitOfWork.PersonRepository.Update(person); } if (_unitOfWork.HasChanges()) { await _unitOfWork.CommitAsync(); await _eventHub.SendMessageAsync(MessageFactory.CoverUpdate, MessageFactory.CoverUpdateEvent(person.Id, MessageFactoryEntityTypes.Person), false); } } /// /// Sets the series cover by url /// /// /// /// /// If images are similar, will choose the higher quality image public async Task SetSeriesCoverByUrl(Series series, string url, bool fromBase64 = true, bool chooseBetterImage = false) { if (!string.IsNullOrEmpty(url)) { var tempDir = _directoryService.TempDirectory; var format = ImageService.GetSeriesFormat(series.Id); var finalFileName = format + ".webp"; var tempFileName = format + "_new"; var tempFilePath = await CreateThumbnail(url, tempFileName, fromBase64, tempDir); if (!string.IsNullOrEmpty(tempFilePath)) { var tempFullPath = Path.Combine(tempDir, tempFilePath); var finalFullPath = Path.Combine(_directoryService.CoverImageDirectory, finalFileName); if (chooseBetterImage && !string.IsNullOrEmpty(series.CoverImage)) { try { var existingPath = Path.Combine(_directoryService.CoverImageDirectory, series.CoverImage); var betterImage = existingPath.GetBetterImage(tempFullPath)!; var choseNewImage = string.Equals(betterImage, tempFullPath, StringComparison.OrdinalIgnoreCase); if (choseNewImage) { _directoryService.DeleteFiles([existingPath]); _directoryService.CopyFile(tempFullPath, finalFullPath); series.CoverImage = finalFileName; } else { _directoryService.DeleteFiles([tempFullPath]); return; } } catch (Exception ex) { _logger.LogError(ex, "Error choosing better image for Series: {SeriesId}", series.Id); _directoryService.CopyFile(tempFullPath, finalFullPath); series.CoverImage = finalFileName; } } else { _directoryService.CopyFile(tempFullPath, finalFullPath); series.CoverImage = finalFileName; } _directoryService.DeleteFiles([tempFullPath]); series.CoverImageLocked = true; _imageService.UpdateColorScape(series); _unitOfWork.SeriesRepository.Update(series); } } else { series.CoverImage = null; series.CoverImageLocked = false; _logger.LogDebug("[SeriesCoverImageBug] Setting Series Cover Image to null"); _imageService.UpdateColorScape(series); _unitOfWork.SeriesRepository.Update(series); } if (_unitOfWork.HasChanges()) { await _unitOfWork.CommitAsync(); await _eventHub.SendMessageAsync(MessageFactory.CoverUpdate, MessageFactory.CoverUpdateEvent(series.Id, MessageFactoryEntityTypes.Series), false); } } public async Task SetChapterCoverByUrl(Chapter chapter, string url, bool fromBase64 = true, bool chooseBetterImage = false) { if (!string.IsNullOrEmpty(url)) { var tempDirectory = _directoryService.TempDirectory; var finalFileName = ImageService.GetChapterFormat(chapter.Id, chapter.VolumeId) + ".webp"; var tempFileName = ImageService.GetChapterFormat(chapter.Id, chapter.VolumeId) + "_new"; var tempFilePath = await CreateThumbnail(url, tempFileName, fromBase64, tempDirectory); if (!string.IsNullOrEmpty(tempFilePath)) { var tempFullPath = Path.Combine(tempDirectory, tempFilePath); var finalFullPath = Path.Combine(_directoryService.CoverImageDirectory, finalFileName); if (chooseBetterImage && !string.IsNullOrEmpty(chapter.CoverImage)) { try { var existingPath = Path.Combine(_directoryService.CoverImageDirectory, chapter.CoverImage); var betterImage = existingPath.GetBetterImage(tempFullPath)!; var choseNewImage = string.Equals(betterImage, tempFullPath, StringComparison.OrdinalIgnoreCase); if (choseNewImage) { // This will fail if Cover gen is done just before this as there is a bug with files getting locked. _directoryService.DeleteFiles([existingPath]); _directoryService.CopyFile(tempFullPath, finalFullPath); _directoryService.DeleteFiles([tempFullPath]); } else { _directoryService.DeleteFiles([tempFullPath]); return; } chapter.CoverImage = finalFileName; } catch (Exception ex) { _logger.LogError(ex, "There was an issue trying to choose a better cover image for Chapter: {FileName} ({ChapterId})", chapter.Range, chapter.Id); } } else { // No comparison needed, just copy and rename to final _directoryService.CopyFile(tempFullPath, finalFullPath); _directoryService.DeleteFiles([tempFullPath]); chapter.CoverImage = finalFileName; } chapter.CoverImageLocked = true; _imageService.UpdateColorScape(chapter); _unitOfWork.ChapterRepository.Update(chapter); } } else { chapter.CoverImage = null; chapter.CoverImageLocked = false; _imageService.UpdateColorScape(chapter); _unitOfWork.ChapterRepository.Update(chapter); } if (_unitOfWork.HasChanges()) { await _unitOfWork.CommitAsync(); await _eventHub.SendMessageAsync( MessageFactory.CoverUpdate, MessageFactory.CoverUpdateEvent(chapter.Id, MessageFactoryEntityTypes.Chapter), false ); } } /// /// /// /// /// Filename without extension /// /// Not useable with fromBase64. Allows a different directory to be written to /// private async Task CreateThumbnail(string url, string filenameWithoutExtension, bool fromBase64 = true, string? targetDirectory = null) { targetDirectory ??= _directoryService.CoverImageDirectory; var settings = await _unitOfWork.SettingsRepository.GetSettingsDtoAsync(); var encodeFormat = settings.EncodeMediaAs; var coverImageSize = settings.CoverImageSize; if (fromBase64) { return _imageService.CreateThumbnailFromBase64(url, filenameWithoutExtension, encodeFormat, coverImageSize.GetDimensions().Width); } return await DownloadImageFromUrl(filenameWithoutExtension, encodeFormat, url, targetDirectory); } }