Finished refactoring to SharpCompress.

This commit is contained in:
Joseph Milazzo 2021-03-22 13:29:24 -05:00
parent 16bc83b3c3
commit d543511131
6 changed files with 331 additions and 120 deletions

View File

@ -1,44 +1,51 @@
using System.IO;
using System.IO.Compression;
using System;
using System.Diagnostics;
using System.IO;
using API.Interfaces.Services;
using API.Services;
using Microsoft.Extensions.Logging;
using NSubstitute;
using Xunit;
using Xunit.Abstractions;
namespace API.Tests.Services
{
public class ArchiveServiceTests
{
private readonly ITestOutputHelper _testOutputHelper;
private readonly IArchiveService _archiveService;
private readonly ILogger<ArchiveService> _logger = Substitute.For<ILogger<ArchiveService>>();
public ArchiveServiceTests()
public ArchiveServiceTests(ITestOutputHelper testOutputHelper)
{
_testOutputHelper = testOutputHelper;
_archiveService = new ArchiveService(_logger);
}
[Theory]
[InlineData("flat file.zip", false)]
[InlineData("file in folder in folder.zip", true)]
[InlineData("file in folder.zip", true)]
[InlineData("file in folder_alt.zip", true)]
public void ArchiveNeedsFlatteningTest(string archivePath, bool expected)
{
var testDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ArchiveService/Archives");
var file = Path.Join(testDirectory, archivePath);
using ZipArchive archive = ZipFile.OpenRead(file);
Assert.Equal(expected, _archiveService.ArchiveNeedsFlattening(archive));
}
// [Theory]
// [InlineData("flat file.zip", false)]
// [InlineData("file in folder in folder.zip", true)]
// [InlineData("file in folder.zip", true)]
// [InlineData("file in folder_alt.zip", true)]
// public void ArchiveNeedsFlatteningTest(string archivePath, bool expected)
// {
// var testDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ArchiveService/Archives");
// var file = Path.Join(testDirectory, archivePath);
// using ZipArchive archive = ZipFile.OpenRead(file);
// Assert.Equal(expected, _archiveService.ArchiveNeedsFlattening(archive));
// }
[Theory]
[InlineData("non existent file.zip", false)]
[InlineData("wrong extension.rar", false)]
[InlineData("empty.zip", false)]
[InlineData("winrar.rar", true)]
[InlineData("empty.zip", true)]
[InlineData("flat file.zip", true)]
[InlineData("file in folder in folder.zip", true)]
[InlineData("file in folder.zip", true)]
[InlineData("file in folder_alt.zip", true)]
[InlineData("not supported 1.zip", true)]
[InlineData("not supported 2.cbz", true)]
[InlineData("not supported 3.cbz", true)]
public void IsValidArchiveTest(string archivePath, bool expected)
{
var testDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ArchiveService/Archives");
@ -53,21 +60,114 @@ namespace API.Tests.Services
[InlineData("file in folder in folder.zip", 1)]
[InlineData("file in folder.zip", 1)]
[InlineData("file in folder_alt.zip", 1)]
[InlineData("not supported 1.zip", 1)]
[InlineData("not supported 2.cbz", 0)]
[InlineData("not supported 3.cbz", 0)]
[InlineData("mangadex_131.zip", 577)]
public void GetNumberOfPagesFromArchiveTest(string archivePath, int expected)
{
var testDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ArchiveService/Archives");
var sw = Stopwatch.StartNew();
try
{
Assert.Equal(expected, _archiveService.GetNumberOfPagesFromArchive(Path.Join(testDirectory, archivePath)));
_testOutputHelper.WriteLine($"Processed Original in {sw.ElapsedMilliseconds} ms");
}
catch (Exception e)
{
_testOutputHelper.WriteLine("Could not process");
}
}
[Theory]
[InlineData("non existent file.zip", false)]
[InlineData("wrong extension.rar", false)]
[InlineData("empty.zip", false)]
[InlineData("flat file.zip", true)]
[InlineData("file in folder in folder.zip", true)]
[InlineData("file in folder.zip", true)]
[InlineData("file in folder_alt.zip", true)]
[InlineData("not supported 1.zip", true)]
[InlineData("not supported 3.cbz", true)]
public void CanOpenArchive(string archivePath, bool expected)
{
var sw = Stopwatch.StartNew();
var testDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ArchiveService/Archives");
try
{
Assert.Equal(expected, _archiveService.IsValidArchive(Path.Join(testDirectory, archivePath)));
_testOutputHelper.WriteLine($"Processed Original in {sw.ElapsedMilliseconds} ms");
}
catch (Exception e)
{
_testOutputHelper.WriteLine("Could not process");
}
}
[Theory]
[InlineData("non existent file.zip", 0)]
[InlineData("wrong extension.rar", 0)]
[InlineData("empty.zip", 0)]
[InlineData("flat file.zip", 1)]
[InlineData("file in folder in folder.zip", 1)]
[InlineData("file in folder.zip", 1)]
[InlineData("file in folder_alt.zip", 1)]
[InlineData("not supported 1.zip", 1)]
[InlineData("not supported 2.cbz", 169)]
[InlineData("not supported 3.cbz", 1)]
[InlineData("mangadex_131.zip", 577)]
public void CanExtractArchive(string archivePath, int expectedFileCount)
{
var testDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ArchiveService/Archives");
var extractDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ArchiveService/Archives/Extraction");
DirectoryService.ClearAndDeleteDirectory(extractDirectory);
Stopwatch sw = Stopwatch.StartNew();
try
{
_archiveService.ExtractArchive(Path.Join(testDirectory, archivePath), extractDirectory);
var di1 = new DirectoryInfo(extractDirectory);
Assert.Equal(expectedFileCount, di1.GetFiles().Length);
_testOutputHelper.WriteLine($"Processed Original in {sw.ElapsedMilliseconds} ms");
}
catch (Exception e)
{
_testOutputHelper.WriteLine("Could not process");
}
DirectoryService.ClearAndDeleteDirectory(extractDirectory);
}
[Theory]
[InlineData("v10.cbz", "v10.expected.jpg")]
[InlineData("v10 - with folder.cbz", "v10 - with folder.expected.jpg")]
[InlineData("v10 - nested folder.cbz", "v10 - nested folder.expected.jpg")]
[InlineData("png.zip", "png.PNG")]
public void GetCoverImageTest(string inputFile, string expectedOutputFile)
{
var testDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ArchiveService/CoverImages");
var expectedBytes = File.ReadAllBytes(Path.Join(testDirectory, expectedOutputFile));
Stopwatch sw = Stopwatch.StartNew();
Assert.Equal(expectedBytes, _archiveService.GetCoverImage(Path.Join(testDirectory, inputFile)));
_testOutputHelper.WriteLine($"Processed in {sw.ElapsedMilliseconds} ms");
}
[Fact]
public void ShouldHaveComicInfo()
{
var testDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ArchiveService/ComicInfos");
var archive = Path.Join(testDirectory, "file in folder.zip");
var summaryInfo = "By all counts, Ryouta Sakamoto is a loser when he's not holed up in his room, bombing things into oblivion in his favorite online action RPG. But his very own uneventful life is blown to pieces when he's abducted and taken to an uninhabited island, where he soon learns the hard way that he's being pitted against others just like him in a explosives-riddled death match! How could this be happening? Who's putting them up to this? And why!? The name, not to mention the objective, of this very real survival game is eerily familiar to Ryouta, who has mastered its virtual counterpart-BTOOOM! Can Ryouta still come out on top when he's playing for his life!?";
Assert.Equal(summaryInfo, _archiveService.GetSummaryInfo(archive));
}
}
}

View File

@ -5,12 +5,10 @@ namespace API.Interfaces.Services
{
public interface IArchiveService
{
bool ArchiveNeedsFlattening(ZipArchive archive);
void ExtractArchive(string archivePath, string extractPath);
int GetNumberOfPagesFromArchive(string archivePath);
byte[] GetCoverImage(string filepath, bool createThumbnail = false);
bool IsValidArchive(string archivePath);
string GetSummaryInfo(string archivePath);
}
}

View File

@ -8,7 +8,7 @@ namespace API.Parser
{
public static class Parser
{
public static readonly string MangaFileExtensions = @"\.cbz|\.zip"; // |\.rar|\.cbr
public static readonly string MangaFileExtensions = @"\.cbz|\.zip|\.rar|\.cbr|.tar.gz|.7zip";
public static readonly string ImageFileExtensions = @"\.png|\.jpeg|\.jpg|\.gif";
private static readonly string XmlRegexExtensions = @"\.xml";
private static readonly Regex ImageRegex = new Regex(ImageFileExtensions, RegexOptions.IgnoreCase | RegexOptions.Compiled);

View File

@ -1,14 +1,20 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.IO.Compression;
using System.Linq;
using System.Xml.Serialization;
using API.Extensions;
using API.Interfaces.Services;
using API.Services.Tasks;
using Microsoft.Extensions.Logging;
using NetVips;
using SharpCompress.Archives;
using SharpCompress.Archives.GZip;
using SharpCompress.Archives.Rar;
using SharpCompress.Archives.SevenZip;
using SharpCompress.Archives.Tar;
using SharpCompress.Common;
using SharpCompress.Readers;
using Image = NetVips.Image;
namespace API.Services
{
@ -27,18 +33,44 @@ namespace API.Services
public int GetNumberOfPagesFromArchive(string archivePath)
{
if (!IsValidArchive(archivePath)) return 0;
if (!IsValidArchive(archivePath))
{
_logger.LogError("Archive {ArchivePath} could not be found", archivePath);
return 0;
}
var count = 0;
try
{
using ZipArchive archive = ZipFile.OpenRead(archivePath);
return archive.Entries.Count(e => Parser.Parser.IsImage(e.FullName));
using Stream stream = File.OpenRead(archivePath);
using (var reader = ReaderFactory.Open(stream))
{
try
{
_logger.LogDebug("Archive Type: {ArchiveType}", reader.ArchiveType);
}
catch (System.InvalidOperationException ex)
{
_logger.LogError(ex, "Could not parse the archive. Please validate it is not corrupted");
return 0;
}
while (reader.MoveToNextEntry())
{
if (!reader.Entry.IsDirectory && Parser.Parser.IsImage(reader.Entry.Key))
{
count++;
}
}
}
}
catch (Exception ex)
{
_logger.LogError(ex, "There was an exception when reading archive stream: {ArchivePath}. Defaulting to 0 pages", archivePath);
return 0;
}
return count;
}
/// <summary>
@ -55,14 +87,39 @@ namespace API.Services
{
if (!IsValidArchive(filepath)) return Array.Empty<byte>();
using var archive = ZipFile.OpenRead(filepath);
if (!archive.HasFiles()) return Array.Empty<byte>();
if (SharpCompress.Archives.Zip.ZipArchive.IsZipFile(filepath))
{
using var archive = SharpCompress.Archives.Zip.ZipArchive.Open(filepath);
return FindCoverImage(archive.Entries.Where(entry => !entry.IsDirectory), createThumbnail);
}
var folder = archive.Entries.SingleOrDefault(x => Path.GetFileNameWithoutExtension(x.Name).ToLower() == "folder");
var entries = archive.Entries.Where(x => Path.HasExtension(x.FullName) && Parser.Parser.IsImage(x.FullName)).OrderBy(x => x.FullName).ToList();
var entry = folder ?? entries[0];
if (GZipArchive.IsGZipFile(filepath))
{
using var archive = GZipArchive.Open(filepath);
return FindCoverImage(archive.Entries.Where(entry => !entry.IsDirectory), createThumbnail);
}
if (RarArchive.IsRarFile(filepath))
{
using var archive = RarArchive.Open(filepath);
return FindCoverImage(archive.Entries.Where(entry => !entry.IsDirectory), createThumbnail);
}
if (SevenZipArchive.IsSevenZipFile(filepath))
{
using var archive = SevenZipArchive.Open(filepath);
return FindCoverImage(archive.Entries.Where(entry => !entry.IsDirectory), createThumbnail);
}
if (TarArchive.IsTarFile(filepath))
{
using var archive = TarArchive.Open(filepath);
return FindCoverImage(archive.Entries.Where(entry => !entry.IsDirectory), createThumbnail);
}
_logger.LogError("Could not parse archive file");
return Array.Empty<byte>();
return createThumbnail ? CreateThumbnail(entry) : ConvertEntryToByteArray(entry);
}
catch (Exception ex)
{
@ -72,54 +129,60 @@ namespace API.Services
return Array.Empty<byte>();
}
private byte[] CreateThumbnail(ZipArchiveEntry entry)
private byte[] FindCoverImage(IEnumerable<IArchiveEntry> entries, bool createThumbnail)
{
try
var images = entries.ToList();
foreach (var entry in images)
{
using var stream = entry.Open();
using var thumbnail = Image.ThumbnailStream(stream, ThumbnailWidth);
return thumbnail.WriteToBuffer(".jpg"); // TODO: Validate this code works with .png files
if (Path.GetFileNameWithoutExtension(entry.Key).ToLower() == "folder")
{
using var ms = new MemoryStream();
entry.WriteTo(ms);
ms.Position = 0;
return createThumbnail ? CreateThumbnail(ms.ToArray(), Path.GetExtension(entry.Key)) : ms.ToArray();
}
catch (Exception ex)
}
if (images.Any())
{
_logger.LogError(ex, "There was a critical error and prevented thumbnail generation on {EntryName}. Defaulting to no cover image", entry.FullName);
var entry = images.OrderBy(e => e.Key).FirstOrDefault();
if (entry == null) return Array.Empty<byte>();
using var ms = new MemoryStream();
entry.WriteTo(ms);
ms.Position = 0;
var data = ms.ToArray();
return createThumbnail ? CreateThumbnail(data, Path.GetExtension(entry.Key)) : data;
}
return Array.Empty<byte>();
}
private static byte[] ConvertEntryToByteArray(ZipArchiveEntry entry)
private byte[] CreateThumbnail(byte[] entry, string formatExtension = ".jpg")
{
using var stream = entry.Open();
using var ms = new MemoryStream();
stream.CopyTo(ms); // TODO: Check if we can use CopyToAsync here
var data = ms.ToArray();
return data;
if (!formatExtension.StartsWith("."))
{
formatExtension = "." + formatExtension;
}
try
{
using var thumbnail = Image.ThumbnailBuffer(entry, ThumbnailWidth);
return thumbnail.WriteToBuffer(formatExtension);
}
catch (Exception ex)
{
_logger.LogError(ex, "There was a critical error and prevented thumbnail generation. Defaulting to no cover image");
}
/// <summary>
/// Given an archive stream, will assess whether directory needs to be flattened so that the extracted archive files are directly
/// under extract path and not nested in subfolders. See <see cref="DirectoryInfoExtensions"/> Flatten method.
/// </summary>
/// <param name="archive">An opened archive stream</param>
/// <returns></returns>
public bool ArchiveNeedsFlattening(ZipArchive archive)
{
// Sometimes ZipArchive will list the directory and others it will just keep it in the FullName
return archive.Entries.Count > 0 &&
!Path.HasExtension(archive.Entries.ElementAt(0).FullName) ||
archive.Entries.Any(e => e.FullName.Contains(Path.AltDirectorySeparatorChar));
return Array.Empty<byte>();
}
/// <summary>
/// Test if the archive path exists and there are images inside it. This will log as an error.
/// </summary>
/// <param name="archivePath"></param>
/// <returns></returns>
public bool IsValidArchive(string archivePath)
{
try
{
if (!File.Exists(archivePath))
{
@ -127,56 +190,96 @@ namespace API.Services
return false;
}
if (!Parser.Parser.IsArchive(archivePath))
{
if (Parser.Parser.IsArchive(archivePath)) return true;
_logger.LogError("Archive {ArchivePath} is not a valid archive", archivePath);
return false;
}
using var archive = ZipFile.OpenRead(archivePath);
if (archive.Entries.Any(e => Parser.Parser.IsImage(e.FullName))) return true;
_logger.LogError("Archive {ArchivePath} contains no images", archivePath);
private static ComicInfo FindComicInfoXml(IEnumerable<IArchiveEntry> entries)
{
foreach (var entry in entries)
{
if (Path.GetFileNameWithoutExtension(entry.Key).ToLower().EndsWith("comicinfo") && Parser.Parser.IsXml(entry.Key))
{
using var ms = new MemoryStream();
entry.WriteTo(ms);
ms.Position = 0;
var serializer = new XmlSerializer(typeof(ComicInfo));
var info = (ComicInfo) serializer.Deserialize(ms);
return info;
}
}
return null;
}
public string GetSummaryInfo(string archivePath)
{
var summary = string.Empty;
if (!IsValidArchive(archivePath)) return summary;
ComicInfo info = null;
try
{
if (!File.Exists(archivePath)) return summary;
if (SharpCompress.Archives.Zip.ZipArchive.IsZipFile(archivePath))
{
using var archive = SharpCompress.Archives.Zip.ZipArchive.Open(archivePath);
info = FindComicInfoXml(archive.Entries.Where(entry => !entry.IsDirectory));
}
else if (GZipArchive.IsGZipFile(archivePath))
{
using var archive = GZipArchive.Open(archivePath);
info = FindComicInfoXml(archive.Entries.Where(entry => !entry.IsDirectory));
}
else if (RarArchive.IsRarFile(archivePath))
{
using var archive = RarArchive.Open(archivePath);
info = FindComicInfoXml(archive.Entries.Where(entry => !entry.IsDirectory));
}
else if (SevenZipArchive.IsSevenZipFile(archivePath))
{
using var archive = SevenZipArchive.Open(archivePath);
info = FindComicInfoXml(archive.Entries.Where(entry => !entry.IsDirectory));
}
else if (TarArchive.IsTarFile(archivePath))
{
using var archive = TarArchive.Open(archivePath);
info = FindComicInfoXml(archive.Entries.Where(entry => !entry.IsDirectory));
}
if (info != null)
{
return info.Summary;
}
_logger.LogError("Could not parse archive file");
}
catch (Exception ex)
{
_logger.LogError(ex, "Unable to validate archive ({ArchivePath}) due to problem opening archive", archivePath);
}
return false;
}
public string GetSummaryInfo(string archivePath)
{
var summary = "";
if (!IsValidArchive(archivePath)) return summary;
using var archive = ZipFile.OpenRead(archivePath);
if (!archive.HasFiles()) return summary;
var info = archive.Entries.SingleOrDefault(x => Path.GetFileNameWithoutExtension(x.Name).ToLower() == "comicinfo" && Parser.Parser.IsXml(x.FullName));
if (info == null) return summary;
// Parse XML file
try
{
using var stream = info.Open();
var serializer = new XmlSerializer(typeof(ComicInfo));
ComicInfo comicInfo =
(ComicInfo)serializer.Deserialize(stream);
if (comicInfo != null)
{
return comicInfo.Summary;
}
}
catch (AggregateException ex)
{
_logger.LogError(ex, "There was an issue parsing ComicInfo.xml from {ArchivePath}", archivePath);
_logger.LogError(ex, "There was an exception when reading archive stream: {Filepath}", archivePath);
}
return summary;
}
private void ExtractArchiveEntities(IEnumerable<IArchiveEntry> entries, string extractPath)
{
foreach (var entry in entries)
{
entry.WriteToDirectory(extractPath, new ExtractionOptions()
{
ExtractFullPath = false,
Overwrite = false
});
}
}
/// <summary>
/// Extracts an archive to a temp cache directory. Returns path to new directory. If temp cache directory already exists,
/// will return that without performing an extraction. Returns empty string if there are any invalidations which would
@ -187,29 +290,39 @@ namespace API.Services
/// <returns></returns>
public void ExtractArchive(string archivePath, string extractPath)
{
if (!IsValidArchive(archivePath)) return;
if (!File.Exists(archivePath)) return;
if (Directory.Exists(extractPath))
var sw = Stopwatch.StartNew();
if (SharpCompress.Archives.Zip.ZipArchive.IsZipFile(archivePath))
{
_logger.LogDebug("Archive {ArchivePath} has already been extracted. Returning existing folder", archivePath);
using var archive = SharpCompress.Archives.Zip.ZipArchive.Open(archivePath);
ExtractArchiveEntities(archive.Entries.Where(entry => !entry.IsDirectory), extractPath);
}
else if (GZipArchive.IsGZipFile(archivePath))
{
using var archive = GZipArchive.Open(archivePath);
ExtractArchiveEntities(archive.Entries.Where(entry => !entry.IsDirectory), extractPath);
} else if (RarArchive.IsRarFile(archivePath))
{
using var archive = RarArchive.Open(archivePath);
ExtractArchiveEntities(archive.Entries.Where(entry => !entry.IsDirectory), extractPath);
} else if (SevenZipArchive.IsSevenZipFile(archivePath))
{
using var archive = SevenZipArchive.Open(archivePath);
ExtractArchiveEntities(archive.Entries.Where(entry => !entry.IsDirectory), extractPath);
}
else if (TarArchive.IsTarFile(archivePath))
{
using var archive = TarArchive.Open(archivePath);
ExtractArchiveEntities(archive.Entries.Where(entry => !entry.IsDirectory), extractPath);
}
else
{
_logger.LogError("Could not parse archive file");
return;
}
Stopwatch sw = Stopwatch.StartNew();
using ZipArchive archive = ZipFile.OpenRead(archivePath);
var needsFlattening = ArchiveNeedsFlattening(archive);
if (!archive.HasFiles() && !needsFlattening) return;
archive.ExtractToDirectory(extractPath, true);
_logger.LogDebug("Extracted archive to {ExtractPath} in {ElapsedMilliseconds} milliseconds", extractPath, sw.ElapsedMilliseconds);
if (needsFlattening)
{
sw = Stopwatch.StartNew();
_logger.LogInformation("Extracted archive is nested in root folder, flattening...");
new DirectoryInfo(extractPath).Flatten();
_logger.LogInformation("Flattened in {ElapsedMilliseconds} milliseconds", sw.ElapsedMilliseconds);
}
_logger.LogDebug("[Fallback] Extracted archive to {ExtractPath} in {ElapsedMilliseconds} milliseconds", extractPath, sw.ElapsedMilliseconds);
}
}
}