From 154b30c3fdb9731cea1d86ca17875e43f895dfa1 Mon Sep 17 00:00:00 2001 From: Joseph Milazzo Date: Tue, 23 Mar 2021 14:12:28 -0500 Subject: [PATCH] Implemented fallback functionality. Try to use System.IO.Compression.ZipArchive then fallback to SharpCompress (which is extremely slow). Currently this hits disk 3 times per operation, needs performance tuning. --- API/Archive/Archive.cs | 1 + API/Services/ArchiveService.cs | 185 ++++++++++++++++++++++----- API/Services/MetadataService.cs | 1 - API/Services/Tasks/ScannerService.cs | 5 +- 4 files changed, 154 insertions(+), 38 deletions(-) diff --git a/API/Archive/Archive.cs b/API/Archive/Archive.cs index 146ae3ffc..9f55c8153 100644 --- a/API/Archive/Archive.cs +++ b/API/Archive/Archive.cs @@ -14,6 +14,7 @@ namespace API.Archive /// public static ArchiveLibrary CanOpen(string archivePath) { + // TODO: Should I introduce something for NotFound? if (!File.Exists(archivePath) || !Parser.Parser.IsArchive(archivePath)) return ArchiveLibrary.NotSupported; try diff --git a/API/Services/ArchiveService.cs b/API/Services/ArchiveService.cs index fea540923..550e9fb8f 100644 --- a/API/Services/ArchiveService.cs +++ b/API/Services/ArchiveService.cs @@ -44,10 +44,11 @@ namespace API.Services } var count = 0; - var libraryHandler = Archive.Archive.CanOpen(archivePath); + try { + var libraryHandler = Archive.Archive.CanOpen(archivePath); switch (libraryHandler) { case ArchiveLibrary.Default: @@ -153,21 +154,47 @@ namespace API.Services /// Given a path to a compressed file (zip, rar, cbz, cbr, etc), will ensure the first image is returned unless /// a folder.extension exists in the root directory of the compressed file. /// - /// + /// /// Create a smaller variant of file extracted from archive. Archive images are usually 1MB each. /// - public byte[] GetCoverImage(string filepath, bool createThumbnail = false) + public byte[] GetCoverImage(string archivePath, bool createThumbnail = false) { try { - if (!IsValidArchive(filepath)) return Array.Empty(); + if (!IsValidArchive(archivePath)) return Array.Empty(); - using var archive = ArchiveFactory.Open(filepath); - return FindCoverImage(archive.Entries.Where(entry => !entry.IsDirectory && Parser.Parser.IsImage(entry.Key)), createThumbnail); + var libraryHandler = Archive.Archive.CanOpen(archivePath); + + + switch (libraryHandler) + { + case ArchiveLibrary.Default: + { + _logger.LogDebug("Using default compression handling"); + using var archive = ZipFile.OpenRead(archivePath); + var folder = archive.Entries.SingleOrDefault(x => Path.GetFileNameWithoutExtension(x.Name).ToLower() == "folder"); + var entries = archive.Entries.Where(x => Path.HasExtension(x.FullName) && Parser.Parser.IsImage(x.FullName)).OrderBy(x => x.FullName).ToList(); + var entry = folder ?? entries[0]; + + return createThumbnail ? CreateThumbnail(entry) : ConvertEntryToByteArray(entry); + } + case ArchiveLibrary.SharpCompress: + { + _logger.LogDebug("Using SharpCompress compression handling"); + using var archive = ArchiveFactory.Open(archivePath); + return FindCoverImage(archive.Entries.Where(entry => !entry.IsDirectory && Parser.Parser.IsImage(entry.Key)), createThumbnail); + } + case ArchiveLibrary.NotSupported: + _logger.LogError("[GetCoverImage] This archive cannot be read: {ArchivePath}. Defaulting to no cover image", archivePath); + return Array.Empty(); + default: + _logger.LogError("[GetCoverImage] There was an exception when reading archive stream: {ArchivePath}. Defaulting to no cover image", archivePath); + return Array.Empty(); + } } catch (Exception ex) { - _logger.LogError(ex, "[GetCoverImage] There was an exception when reading archive stream: {Filepath}. Defaulting to no cover image", filepath); + _logger.LogError(ex, "[GetCoverImage] There was an exception when reading archive stream: {ArchivePath}. Defaulting to no cover image", archivePath); } return Array.Empty(); @@ -221,9 +248,53 @@ namespace API.Services return Array.Empty(); } + private static byte[] ConvertEntryToByteArray(ZipArchiveEntry entry) + { + using var stream = entry.Open(); + using var ms = new MemoryStream(); + stream.CopyTo(ms); + var data = ms.ToArray(); + + return data; + } + + /// + /// Given an archive stream, will assess whether directory needs to be flattened so that the extracted archive files are directly + /// under extract path and not nested in subfolders. See Flatten method. + /// + /// An opened archive stream + /// + public bool ArchiveNeedsFlattening(ZipArchive archive) + { + // Sometimes ZipArchive will list the directory and others it will just keep it in the FullName + return archive.Entries.Count > 0 && + !Path.HasExtension(archive.Entries.ElementAt(0).FullName) || + archive.Entries.Any(e => e.FullName.Contains(Path.AltDirectorySeparatorChar)); + } + + + private byte[] CreateThumbnail(ZipArchiveEntry entry, string formatExtension = ".jpg") + { + if (!formatExtension.StartsWith(".")) + { + formatExtension = $".{formatExtension}"; + } + try + { + using var stream = entry.Open(); + using var thumbnail = Image.ThumbnailStream(stream, ThumbnailWidth); + return thumbnail.WriteToBuffer(formatExtension); // TODO: Validate this code works with .png files + } + catch (Exception ex) + { + _logger.LogError(ex, "There was a critical error and prevented thumbnail generation on {EntryName}. Defaulting to no cover image", entry.FullName); + } + + return Array.Empty(); + } /// - /// Test if the archive path exists and there are images inside it. This will log as an error. + /// Test if the archive path exists and an archive /// /// /// @@ -272,32 +343,39 @@ namespace API.Services try { if (!File.Exists(archivePath)) return summary; - - if (SharpCompress.Archives.Zip.ZipArchive.IsZipFile(archivePath)) + + var libraryHandler = Archive.Archive.CanOpen(archivePath); + switch (libraryHandler) { - using var archive = SharpCompress.Archives.Zip.ZipArchive.Open(archivePath); - info = FindComicInfoXml(archive.Entries.Where(entry => !entry.IsDirectory && Parser.Parser.IsXml(entry.Key))); - } - else if (GZipArchive.IsGZipFile(archivePath)) - { - using var archive = GZipArchive.Open(archivePath); - info = FindComicInfoXml(archive.Entries.Where(entry => !entry.IsDirectory && Parser.Parser.IsXml(entry.Key))); - } - else if (RarArchive.IsRarFile(archivePath)) - { - using var archive = RarArchive.Open(archivePath); - info = FindComicInfoXml(archive.Entries.Where(entry => !entry.IsDirectory && Parser.Parser.IsXml(entry.Key))); - } - else if (SevenZipArchive.IsSevenZipFile(archivePath)) - { - using var archive = SevenZipArchive.Open(archivePath); - info = FindComicInfoXml(archive.Entries.Where(entry => !entry.IsDirectory && Parser.Parser.IsXml(entry.Key))); - } - else if (TarArchive.IsTarFile(archivePath)) - { - using var archive = TarArchive.Open(archivePath); - info = FindComicInfoXml(archive.Entries.Where(entry => !entry.IsDirectory && Parser.Parser.IsXml(entry.Key))); + case ArchiveLibrary.Default: + { + _logger.LogDebug("Using default compression handling"); + using var archive = ZipFile.OpenRead(archivePath); + var entry = archive.Entries.SingleOrDefault(x => Path.GetFileNameWithoutExtension(x.Name).ToLower() == "comicinfo" && Parser.Parser.IsXml(x.FullName)); + if (entry != null) + { + using var stream = entry.Open(); + var serializer = new XmlSerializer(typeof(ComicInfo)); + info = (ComicInfo) serializer.Deserialize(stream); + } + break; + } + case ArchiveLibrary.SharpCompress: + { + _logger.LogDebug("Using SharpCompress compression handling"); + using var archive = ArchiveFactory.Open(archivePath); + info = FindComicInfoXml(archive.Entries.Where(entry => !entry.IsDirectory && Parser.Parser.IsXml(entry.Key))); + break; + } + case ArchiveLibrary.NotSupported: + _logger.LogError("[GetNumberOfPagesFromArchive] This archive cannot be read: {ArchivePath}. Defaulting to 0 pages", archivePath); + return summary; + default: + _logger.LogError("[GetNumberOfPagesFromArchive] There was an exception when reading archive stream: {ArchivePath}. Defaulting to 0 pages", archivePath); + return summary; } + + if (info != null) { @@ -330,6 +408,19 @@ namespace API.Services // archive.ExtractToDirectory(extractPath, true); } + private void ExtractArchiveEntries(ZipArchive archive, string extractPath) + { + var needsFlattening = ArchiveNeedsFlattening(archive); + if (!archive.HasFiles() && !needsFlattening) return; + + archive.ExtractToDirectory(extractPath, true); + if (needsFlattening) + { + _logger.LogDebug("Extracted archive is nested in root folder, flattening..."); + new DirectoryInfo(extractPath).Flatten(); + } + } + /// /// Extracts an archive to a temp cache directory. Returns path to new directory. If temp cache directory already exists, /// will return that without performing an extraction. Returns empty string if there are any invalidations which would @@ -342,13 +433,37 @@ namespace API.Services { if (!File.Exists(archivePath)) return; - if (new DirectoryInfo(extractPath).Exists) return; + if (Directory.Exists(extractPath)) return; var sw = Stopwatch.StartNew(); - using var archive = ArchiveFactory.Open(archivePath); + try { - ExtractArchiveEntities(archive.Entries.Where(entry => !entry.IsDirectory && Parser.Parser.IsImage(entry.Key)), extractPath); + var libraryHandler = Archive.Archive.CanOpen(archivePath); + switch (libraryHandler) + { + case ArchiveLibrary.Default: + { + _logger.LogDebug("Using default compression handling"); + using ZipArchive archive = ZipFile.OpenRead(archivePath); + ExtractArchiveEntries(archive, extractPath); + break; + } + case ArchiveLibrary.SharpCompress: + { + _logger.LogDebug("Using SharpCompress compression handling"); + using var archive = ArchiveFactory.Open(archivePath); + ExtractArchiveEntities(archive.Entries.Where(entry => !entry.IsDirectory && Parser.Parser.IsImage(entry.Key)), extractPath); + break; + } + case ArchiveLibrary.NotSupported: + _logger.LogError("[GetNumberOfPagesFromArchive] This archive cannot be read: {ArchivePath}. Defaulting to 0 pages", archivePath); + return; + default: + _logger.LogError("[GetNumberOfPagesFromArchive] There was an exception when reading archive stream: {ArchivePath}. Defaulting to 0 pages", archivePath); + return; + } + } catch (Exception e) { diff --git a/API/Services/MetadataService.cs b/API/Services/MetadataService.cs index 98a5660ed..ccde75ae0 100644 --- a/API/Services/MetadataService.cs +++ b/API/Services/MetadataService.cs @@ -36,7 +36,6 @@ namespace API.Services var firstFile = chapter.Files.OrderBy(x => x.Chapter).FirstOrDefault(); if (firstFile != null) chapter.CoverImage = _archiveService.GetCoverImage(firstFile.FilePath, true); } - // NOTE: Can I put page calculation here? chapter.Pages = chapter.Files.Sum(f => f.Pages); } diff --git a/API/Services/Tasks/ScannerService.cs b/API/Services/Tasks/ScannerService.cs index 0a46bb383..2c107a989 100644 --- a/API/Services/Tasks/ScannerService.cs +++ b/API/Services/Tasks/ScannerService.cs @@ -193,6 +193,7 @@ namespace API.Services.Tasks series.Pages = series.Volumes.Sum(v => v.Pages); _metadataService.UpdateMetadata(series, _forceUpdate); }); + foreach (var folder in library.Folders) folder.LastScanned = DateTime.Now; } @@ -224,7 +225,7 @@ namespace API.Services.Tasks _logger.LogDebug("Parsing {SeriesName} - Volume {VolumeNumber}", series.Name, volume.Name); UpdateChapters(volume, infos); volume.Pages = volume.Chapters.Sum(c => c.Pages); - // _metadataService.UpdateMetadata(volume, _forceUpdate); // NOTE: Testing removing here. We do at the end of all DB work + _metadataService.UpdateMetadata(volume, _forceUpdate); } @@ -285,7 +286,7 @@ namespace API.Services.Tasks chapter.Number = Parser.Parser.MinimumNumberFromRange(info.Chapters) + ""; chapter.Range = info.Chapters; chapter.Pages = chapter.Files.Sum(f => f.Pages); - //_metadataService.UpdateMetadata(chapter, _forceUpdate); // NOTE: Testing removing here. We do at the end of all DB work + _metadataService.UpdateMetadata(chapter, _forceUpdate); }