Implemented fallback functionality. Try to use System.IO.Compression.ZipArchive then fallback to SharpCompress (which is extremely slow).

Currently this hits disk 3 times per operation, needs performance tuning.
This commit is contained in:
Joseph Milazzo 2021-03-23 14:12:28 -05:00
parent 067b5174ab
commit 154b30c3fd
4 changed files with 154 additions and 38 deletions

View File

@ -14,6 +14,7 @@ namespace API.Archive
/// <returns></returns> /// <returns></returns>
public static ArchiveLibrary CanOpen(string archivePath) public static ArchiveLibrary CanOpen(string archivePath)
{ {
// TODO: Should I introduce something for NotFound?
if (!File.Exists(archivePath) || !Parser.Parser.IsArchive(archivePath)) return ArchiveLibrary.NotSupported; if (!File.Exists(archivePath) || !Parser.Parser.IsArchive(archivePath)) return ArchiveLibrary.NotSupported;
try try

View File

@ -44,10 +44,11 @@ namespace API.Services
} }
var count = 0; var count = 0;
var libraryHandler = Archive.Archive.CanOpen(archivePath);
try try
{ {
var libraryHandler = Archive.Archive.CanOpen(archivePath);
switch (libraryHandler) switch (libraryHandler)
{ {
case ArchiveLibrary.Default: case ArchiveLibrary.Default:
@ -153,21 +154,47 @@ namespace API.Services
/// Given a path to a compressed file (zip, rar, cbz, cbr, etc), will ensure the first image is returned unless /// Given a path to a compressed file (zip, rar, cbz, cbr, etc), will ensure the first image is returned unless
/// a folder.extension exists in the root directory of the compressed file. /// a folder.extension exists in the root directory of the compressed file.
/// </summary> /// </summary>
/// <param name="filepath"></param> /// <param name="archivePath"></param>
/// <param name="createThumbnail">Create a smaller variant of file extracted from archive. Archive images are usually 1MB each.</param> /// <param name="createThumbnail">Create a smaller variant of file extracted from archive. Archive images are usually 1MB each.</param>
/// <returns></returns> /// <returns></returns>
public byte[] GetCoverImage(string filepath, bool createThumbnail = false) public byte[] GetCoverImage(string archivePath, bool createThumbnail = false)
{ {
try try
{ {
if (!IsValidArchive(filepath)) return Array.Empty<byte>(); if (!IsValidArchive(archivePath)) return Array.Empty<byte>();
using var archive = ArchiveFactory.Open(filepath); var libraryHandler = Archive.Archive.CanOpen(archivePath);
return FindCoverImage(archive.Entries.Where(entry => !entry.IsDirectory && Parser.Parser.IsImage(entry.Key)), createThumbnail);
switch (libraryHandler)
{
case ArchiveLibrary.Default:
{
_logger.LogDebug("Using default compression handling");
using var archive = ZipFile.OpenRead(archivePath);
var folder = archive.Entries.SingleOrDefault(x => Path.GetFileNameWithoutExtension(x.Name).ToLower() == "folder");
var entries = archive.Entries.Where(x => Path.HasExtension(x.FullName) && Parser.Parser.IsImage(x.FullName)).OrderBy(x => x.FullName).ToList();
var entry = folder ?? entries[0];
return createThumbnail ? CreateThumbnail(entry) : ConvertEntryToByteArray(entry);
}
case ArchiveLibrary.SharpCompress:
{
_logger.LogDebug("Using SharpCompress compression handling");
using var archive = ArchiveFactory.Open(archivePath);
return FindCoverImage(archive.Entries.Where(entry => !entry.IsDirectory && Parser.Parser.IsImage(entry.Key)), createThumbnail);
}
case ArchiveLibrary.NotSupported:
_logger.LogError("[GetCoverImage] This archive cannot be read: {ArchivePath}. Defaulting to no cover image", archivePath);
return Array.Empty<byte>();
default:
_logger.LogError("[GetCoverImage] There was an exception when reading archive stream: {ArchivePath}. Defaulting to no cover image", archivePath);
return Array.Empty<byte>();
}
} }
catch (Exception ex) catch (Exception ex)
{ {
_logger.LogError(ex, "[GetCoverImage] There was an exception when reading archive stream: {Filepath}. Defaulting to no cover image", filepath); _logger.LogError(ex, "[GetCoverImage] There was an exception when reading archive stream: {ArchivePath}. Defaulting to no cover image", archivePath);
} }
return Array.Empty<byte>(); return Array.Empty<byte>();
@ -221,9 +248,53 @@ namespace API.Services
return Array.Empty<byte>(); return Array.Empty<byte>();
} }
private static byte[] ConvertEntryToByteArray(ZipArchiveEntry entry)
{
using var stream = entry.Open();
using var ms = new MemoryStream();
stream.CopyTo(ms);
var data = ms.ToArray();
return data;
}
/// <summary>
/// Given an archive stream, will assess whether directory needs to be flattened so that the extracted archive files are directly
/// under extract path and not nested in subfolders. See <see cref="DirectoryInfoExtensions"/> Flatten method.
/// </summary>
/// <param name="archive">An opened archive stream</param>
/// <returns></returns>
public bool ArchiveNeedsFlattening(ZipArchive archive)
{
// Sometimes ZipArchive will list the directory and others it will just keep it in the FullName
return archive.Entries.Count > 0 &&
!Path.HasExtension(archive.Entries.ElementAt(0).FullName) ||
archive.Entries.Any(e => e.FullName.Contains(Path.AltDirectorySeparatorChar));
}
private byte[] CreateThumbnail(ZipArchiveEntry entry, string formatExtension = ".jpg")
{
if (!formatExtension.StartsWith("."))
{
formatExtension = $".{formatExtension}";
}
try
{
using var stream = entry.Open();
using var thumbnail = Image.ThumbnailStream(stream, ThumbnailWidth);
return thumbnail.WriteToBuffer(formatExtension); // TODO: Validate this code works with .png files
}
catch (Exception ex)
{
_logger.LogError(ex, "There was a critical error and prevented thumbnail generation on {EntryName}. Defaulting to no cover image", entry.FullName);
}
return Array.Empty<byte>();
}
/// <summary> /// <summary>
/// Test if the archive path exists and there are images inside it. This will log as an error. /// Test if the archive path exists and an archive
/// </summary> /// </summary>
/// <param name="archivePath"></param> /// <param name="archivePath"></param>
/// <returns></returns> /// <returns></returns>
@ -272,32 +343,39 @@ namespace API.Services
try try
{ {
if (!File.Exists(archivePath)) return summary; if (!File.Exists(archivePath)) return summary;
if (SharpCompress.Archives.Zip.ZipArchive.IsZipFile(archivePath)) var libraryHandler = Archive.Archive.CanOpen(archivePath);
switch (libraryHandler)
{ {
using var archive = SharpCompress.Archives.Zip.ZipArchive.Open(archivePath); case ArchiveLibrary.Default:
info = FindComicInfoXml(archive.Entries.Where(entry => !entry.IsDirectory && Parser.Parser.IsXml(entry.Key))); {
} _logger.LogDebug("Using default compression handling");
else if (GZipArchive.IsGZipFile(archivePath)) using var archive = ZipFile.OpenRead(archivePath);
{ var entry = archive.Entries.SingleOrDefault(x => Path.GetFileNameWithoutExtension(x.Name).ToLower() == "comicinfo" && Parser.Parser.IsXml(x.FullName));
using var archive = GZipArchive.Open(archivePath); if (entry != null)
info = FindComicInfoXml(archive.Entries.Where(entry => !entry.IsDirectory && Parser.Parser.IsXml(entry.Key))); {
} using var stream = entry.Open();
else if (RarArchive.IsRarFile(archivePath)) var serializer = new XmlSerializer(typeof(ComicInfo));
{ info = (ComicInfo) serializer.Deserialize(stream);
using var archive = RarArchive.Open(archivePath); }
info = FindComicInfoXml(archive.Entries.Where(entry => !entry.IsDirectory && Parser.Parser.IsXml(entry.Key))); break;
} }
else if (SevenZipArchive.IsSevenZipFile(archivePath)) case ArchiveLibrary.SharpCompress:
{ {
using var archive = SevenZipArchive.Open(archivePath); _logger.LogDebug("Using SharpCompress compression handling");
info = FindComicInfoXml(archive.Entries.Where(entry => !entry.IsDirectory && Parser.Parser.IsXml(entry.Key))); using var archive = ArchiveFactory.Open(archivePath);
} info = FindComicInfoXml(archive.Entries.Where(entry => !entry.IsDirectory && Parser.Parser.IsXml(entry.Key)));
else if (TarArchive.IsTarFile(archivePath)) break;
{ }
using var archive = TarArchive.Open(archivePath); case ArchiveLibrary.NotSupported:
info = FindComicInfoXml(archive.Entries.Where(entry => !entry.IsDirectory && Parser.Parser.IsXml(entry.Key))); _logger.LogError("[GetNumberOfPagesFromArchive] This archive cannot be read: {ArchivePath}. Defaulting to 0 pages", archivePath);
return summary;
default:
_logger.LogError("[GetNumberOfPagesFromArchive] There was an exception when reading archive stream: {ArchivePath}. Defaulting to 0 pages", archivePath);
return summary;
} }
if (info != null) if (info != null)
{ {
@ -330,6 +408,19 @@ namespace API.Services
// archive.ExtractToDirectory(extractPath, true); // archive.ExtractToDirectory(extractPath, true);
} }
private void ExtractArchiveEntries(ZipArchive archive, string extractPath)
{
var needsFlattening = ArchiveNeedsFlattening(archive);
if (!archive.HasFiles() && !needsFlattening) return;
archive.ExtractToDirectory(extractPath, true);
if (needsFlattening)
{
_logger.LogDebug("Extracted archive is nested in root folder, flattening...");
new DirectoryInfo(extractPath).Flatten();
}
}
/// <summary> /// <summary>
/// Extracts an archive to a temp cache directory. Returns path to new directory. If temp cache directory already exists, /// Extracts an archive to a temp cache directory. Returns path to new directory. If temp cache directory already exists,
/// will return that without performing an extraction. Returns empty string if there are any invalidations which would /// will return that without performing an extraction. Returns empty string if there are any invalidations which would
@ -342,13 +433,37 @@ namespace API.Services
{ {
if (!File.Exists(archivePath)) return; if (!File.Exists(archivePath)) return;
if (new DirectoryInfo(extractPath).Exists) return; if (Directory.Exists(extractPath)) return;
var sw = Stopwatch.StartNew(); var sw = Stopwatch.StartNew();
using var archive = ArchiveFactory.Open(archivePath);
try try
{ {
ExtractArchiveEntities(archive.Entries.Where(entry => !entry.IsDirectory && Parser.Parser.IsImage(entry.Key)), extractPath); var libraryHandler = Archive.Archive.CanOpen(archivePath);
switch (libraryHandler)
{
case ArchiveLibrary.Default:
{
_logger.LogDebug("Using default compression handling");
using ZipArchive archive = ZipFile.OpenRead(archivePath);
ExtractArchiveEntries(archive, extractPath);
break;
}
case ArchiveLibrary.SharpCompress:
{
_logger.LogDebug("Using SharpCompress compression handling");
using var archive = ArchiveFactory.Open(archivePath);
ExtractArchiveEntities(archive.Entries.Where(entry => !entry.IsDirectory && Parser.Parser.IsImage(entry.Key)), extractPath);
break;
}
case ArchiveLibrary.NotSupported:
_logger.LogError("[GetNumberOfPagesFromArchive] This archive cannot be read: {ArchivePath}. Defaulting to 0 pages", archivePath);
return;
default:
_logger.LogError("[GetNumberOfPagesFromArchive] There was an exception when reading archive stream: {ArchivePath}. Defaulting to 0 pages", archivePath);
return;
}
} }
catch (Exception e) catch (Exception e)
{ {

View File

@ -36,7 +36,6 @@ namespace API.Services
var firstFile = chapter.Files.OrderBy(x => x.Chapter).FirstOrDefault(); var firstFile = chapter.Files.OrderBy(x => x.Chapter).FirstOrDefault();
if (firstFile != null) chapter.CoverImage = _archiveService.GetCoverImage(firstFile.FilePath, true); if (firstFile != null) chapter.CoverImage = _archiveService.GetCoverImage(firstFile.FilePath, true);
} }
// NOTE: Can I put page calculation here? chapter.Pages = chapter.Files.Sum(f => f.Pages);
} }

View File

@ -193,6 +193,7 @@ namespace API.Services.Tasks
series.Pages = series.Volumes.Sum(v => v.Pages); series.Pages = series.Volumes.Sum(v => v.Pages);
_metadataService.UpdateMetadata(series, _forceUpdate); _metadataService.UpdateMetadata(series, _forceUpdate);
}); });
foreach (var folder in library.Folders) folder.LastScanned = DateTime.Now; foreach (var folder in library.Folders) folder.LastScanned = DateTime.Now;
} }
@ -224,7 +225,7 @@ namespace API.Services.Tasks
_logger.LogDebug("Parsing {SeriesName} - Volume {VolumeNumber}", series.Name, volume.Name); _logger.LogDebug("Parsing {SeriesName} - Volume {VolumeNumber}", series.Name, volume.Name);
UpdateChapters(volume, infos); UpdateChapters(volume, infos);
volume.Pages = volume.Chapters.Sum(c => c.Pages); volume.Pages = volume.Chapters.Sum(c => c.Pages);
// _metadataService.UpdateMetadata(volume, _forceUpdate); // NOTE: Testing removing here. We do at the end of all DB work _metadataService.UpdateMetadata(volume, _forceUpdate);
} }
@ -285,7 +286,7 @@ namespace API.Services.Tasks
chapter.Number = Parser.Parser.MinimumNumberFromRange(info.Chapters) + ""; chapter.Number = Parser.Parser.MinimumNumberFromRange(info.Chapters) + "";
chapter.Range = info.Chapters; chapter.Range = info.Chapters;
chapter.Pages = chapter.Files.Sum(f => f.Pages); chapter.Pages = chapter.Files.Sum(f => f.Pages);
//_metadataService.UpdateMetadata(chapter, _forceUpdate); // NOTE: Testing removing here. We do at the end of all DB work _metadataService.UpdateMetadata(chapter, _forceUpdate);
} }