Kavita/API/Services/Tasks/Scanner/ParseScannedFiles.cs
Joseph Milazzo eddbb7ab18
Event Widget Update (#1098)
* Took care of some notes in the code

* Fixed an issue where Extra might get flagged as special too early, if in a word like Extraordinary

* Moved Tag cleanup code into Scanner service. Added a SplitQuery to another heavy API. Refactored Scan loop to remove parallelism and use async instead.

* Lots of rework on the codebase to support detailed messages and easier management of message sending. Need to take a break on this work.

* Progress is being made, but slowly. Code is broken in this commit.

* Progress is being made, but slowly. Code is broken in this commit.

* Fixed merge issue

* Fixed unit tests

* CoverUpdate is now hooked into new ProgressEvent structure

* Refactored code to remove custom observables and have everything use standard messages$

* Refactored a ton of instances to NotificationProgressEvent style and tons of the UI to respect that too. UI is still a bit buggy, but wholistically the work is done.

* Working much better. Sometimes events come in too fast. Currently cover update progress doesn't display on UI

* Fixed unit tests

* Removed SignalREvent to minimize internal event types. Updated the UI to use progress bars. Finished SiteThemeService.

* Merged metadata refresh progress events and changed library scan events to merge cleaner in the UI

* Changed RefreshMetadataProgress to CoverUpdateProgress to reflect the event better.

* Theme Cleanup (#1089)

* Fixed e-ink theme not properly applying correctly

* Fixed some seed changes. Changed card checkboxes to use our themed ones

* Fixed recently added carousel not going to recently-added page

* Fixed an issue where no results found would show when searching for a library name

* Cleaned up list a bit, typeahead dropdown still needs work

* Added a TODO to streamline series-card component

* Removed ng-lazyload-image module since we don't use it. We use lazysizes

* Darken card on hover

* Fixing accordion focus style

* ux pass updates

- Fixed typeahead width
- Fixed changelog download buttons
- Fixed a select
- Fixed various input box-shadows
- Fixed all anchors to only have underline on hover
- Added navtab hover and active effects

* more ux pass

- Fixed spacing on theme cards
- Fixed some light theme issues
- Exposed text-muted-color for theme card subtitle color

* UX pass fixes

- Changed back to bright green for primary on dark theme
- Changed fa icon to black on e-ink

* Merged changelog component

* Fixed anchor buttons text decoration

* Changed nav tabs to have a background color instead of open active state

* When user is not authenticated, make sure we set default theme (dark)

* Cleanup on carousel

* Updated Users tab to use small buttons with icons to align with Library tab

* Cleaned up brand to not underline, removed default link underline on hover in dropdown and pill tabs

* Fixed collection detail posters not rendering

Co-authored-by: Robbie Davis <robbie@therobbiedavis.com>

* Bump versions by dotnet-bump-version.

* Tweaked some of the emitting code

* Some css, but pretty bad. Robbie please save me

* Removed a todo

* styling update

* Only send filename on FileScanProgress

* Some console.log spam cleanup

* Various updates

* Show events widget activity based on activeEvents

* progress bar color updates

* Code cleanup

Co-authored-by: Robbie Davis <robbie@therobbiedavis.com>
2022-02-18 18:57:37 -08:00

228 lines
9.4 KiB
C#

using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
using API.Data.Metadata;
using API.Entities;
using API.Entities.Enums;
using API.Helpers;
using API.Parser;
using API.SignalR;
using Microsoft.AspNetCore.SignalR;
using Microsoft.Extensions.Logging;
namespace API.Services.Tasks.Scanner
{
public class ParsedSeries
{
public string Name { get; init; }
public string NormalizedName { get; init; }
public MangaFormat Format { get; init; }
}
public class ParseScannedFiles
{
private readonly ConcurrentDictionary<ParsedSeries, List<ParserInfo>> _scannedSeries;
private readonly ILogger _logger;
private readonly IDirectoryService _directoryService;
private readonly IReadingItemService _readingItemService;
private readonly IEventHub _eventHub;
private readonly DefaultParser _defaultParser;
/// <summary>
/// An instance of a pipeline for processing files and returning a Map of Series -> ParserInfos.
/// Each instance is separate from other threads, allowing for no cross over.
/// </summary>
/// <param name="logger">Logger of the parent class that invokes this</param>
/// <param name="directoryService">Directory Service</param>
/// <param name="readingItemService">ReadingItemService Service for extracting information on a number of formats</param>
public ParseScannedFiles(ILogger logger, IDirectoryService directoryService,
IReadingItemService readingItemService, IEventHub eventHub)
{
_logger = logger;
_directoryService = directoryService;
_readingItemService = readingItemService;
_scannedSeries = new ConcurrentDictionary<ParsedSeries, List<ParserInfo>>();
_defaultParser = new DefaultParser(_directoryService);
_eventHub = eventHub;
}
/// <summary>
/// Gets the list of all parserInfos given a Series (Will match on Name, LocalizedName, OriginalName). If the series does not exist within, return empty list.
/// </summary>
/// <param name="parsedSeries"></param>
/// <param name="series"></param>
/// <returns></returns>
public static IList<ParserInfo> GetInfosByName(Dictionary<ParsedSeries, List<ParserInfo>> parsedSeries, Series series)
{
var allKeys = parsedSeries.Keys.Where(ps =>
SeriesHelper.FindSeries(series, ps));
var infos = new List<ParserInfo>();
foreach (var key in allKeys)
{
infos.AddRange(parsedSeries[key]);
}
return infos;
}
/// <summary>
/// Processes files found during a library scan.
/// Populates a collection of <see cref="ParserInfo"/> for DB updates later.
/// </summary>
/// <param name="path">Path of a file</param>
/// <param name="rootPath"></param>
/// <param name="type">Library type to determine parsing to perform</param>
private void ProcessFile(string path, string rootPath, LibraryType type)
{
var info = _readingItemService.Parse(path, rootPath, type);
if (info == null)
{
// If the file is an image and literally a cover image, skip processing.
if (!(Parser.Parser.IsImage(path) && Parser.Parser.IsCoverImage(path)))
{
_logger.LogWarning("[Scanner] Could not parse series from {Path}", path);
}
return;
}
// This catches when original library type is Manga/Comic and when parsing with non
if (Parser.Parser.IsEpub(path) && Parser.Parser.ParseVolume(info.Series) != Parser.Parser.DefaultVolume) // Shouldn't this be info.Volume != DefaultVolume?
{
info = _defaultParser.Parse(path, rootPath, LibraryType.Book);
var info2 = _readingItemService.Parse(path, rootPath, type);
info.Merge(info2);
}
info.ComicInfo = _readingItemService.GetComicInfo(path);
if (info.ComicInfo != null)
{
if (!string.IsNullOrEmpty(info.ComicInfo.Volume))
{
info.Volumes = info.ComicInfo.Volume;
}
if (!string.IsNullOrEmpty(info.ComicInfo.Series))
{
info.Series = info.ComicInfo.Series;
}
if (!string.IsNullOrEmpty(info.ComicInfo.Number))
{
info.Chapters = info.ComicInfo.Number;
}
}
TrackSeries(info);
}
/// <summary>
/// Attempts to either add a new instance of a show mapping to the _scannedSeries bag or adds to an existing.
/// This will check if the name matches an existing series name (multiple fields) <see cref="MergeName"/>
/// </summary>
/// <param name="info"></param>
private void TrackSeries(ParserInfo info)
{
if (info.Series == string.Empty) return;
// Check if normalized info.Series already exists and if so, update info to use that name instead
info.Series = MergeName(info);
var existingKey = _scannedSeries.Keys.FirstOrDefault(ps =>
ps.Format == info.Format && ps.NormalizedName == Parser.Parser.Normalize(info.Series));
existingKey ??= new ParsedSeries()
{
Format = info.Format,
Name = info.Series,
NormalizedName = Parser.Parser.Normalize(info.Series)
};
_scannedSeries.AddOrUpdate(existingKey, new List<ParserInfo>() {info}, (_, oldValue) =>
{
oldValue ??= new List<ParserInfo>();
if (!oldValue.Contains(info))
{
oldValue.Add(info);
}
return oldValue;
});
}
/// <summary>
/// Using a normalized name from the passed ParserInfo, this checks against all found series so far and if an existing one exists with
/// same normalized name, it merges into the existing one. This is important as some manga may have a slight difference with punctuation or capitalization.
/// </summary>
/// <param name="info"></param>
/// <returns>Series Name to group this info into</returns>
public string MergeName(ParserInfo info)
{
var normalizedSeries = Parser.Parser.Normalize(info.Series);
var existingName =
_scannedSeries.SingleOrDefault(p => Parser.Parser.Normalize(p.Key.NormalizedName) == normalizedSeries && p.Key.Format == info.Format)
.Key;
if (existingName != null && !string.IsNullOrEmpty(existingName.Name))
{
return existingName.Name;
}
return info.Series;
}
/// <summary>
///
/// </summary>
/// <param name="libraryType">Type of library. Used for selecting the correct file extensions to search for and parsing files</param>
/// <param name="folders">The folders to scan. By default, this should be library.Folders, however it can be overwritten to restrict folders</param>
/// <returns></returns>
public async Task<Dictionary<ParsedSeries, List<ParserInfo>>> ScanLibrariesForSeries(LibraryType libraryType, IEnumerable<string> folders, string libraryName)
{
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, MessageFactory.FileScanProgressEvent("", libraryName, ProgressEventType.Started));
foreach (var folderPath in folders)
{
try
{
async void Action(string f)
{
try
{
ProcessFile(f, folderPath, libraryType);
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, MessageFactory.FileScanProgressEvent(f, libraryName, ProgressEventType.Updated));
}
catch (FileNotFoundException exception)
{
_logger.LogError(exception, "The file {Filename} could not be found", f);
}
}
_directoryService.TraverseTreeParallelForEach(folderPath, Action, Parser.Parser.SupportedExtensions, _logger);
}
catch (ArgumentException ex)
{
_logger.LogError(ex, "The directory '{FolderPath}' does not exist", folderPath);
}
}
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, MessageFactory.FileScanProgressEvent("", libraryName, ProgressEventType.Ended));
return SeriesWithInfos();
}
/// <summary>
/// Returns any series where there were parsed infos
/// </summary>
/// <returns></returns>
private Dictionary<ParsedSeries, List<ParserInfo>> SeriesWithInfos()
{
var filtered = _scannedSeries.Where(kvp => kvp.Value.Count > 0);
var series = filtered.ToDictionary(v => v.Key, v => v.Value);
return series;
}
}
}