Kavita/API/Services/ArchiveService.cs
Joseph Milazzo 4e49aa47ce
Change Detection: On Push aka UI Smoothness (#1369)
* Updated Series Info Cards to use OnPush and hooked in progress events when we do a mark as read/unread on entities. These events update progress bars but also will now trigger a re-calculation on Read Time Left.

* Removed Library Card Component

* Refactored manga reader title and subtitle calculation to the backend.

* Coverted card actionables to onPush

* Series Card on push cleanup

* Updated edit collection tags for on push

* Update cover image chooser for on push

* Cleaned up carsouel reel

* Updated cover image to allow for uploading gif and webp files

* Bulk add to collection on push

* Updated bulk operation to use on push. Updated bulk operation to have mark as unread and read buttons explicitly. Updated so add to collection is visible and delete.

Fixed a bug where manage library component wasn't invoking the trackBy function

* Updating entity title for on push

* Removed file info component

* Updated Mange Library for on push

* Entity info cards on push

* List item on push

* Updated icon and title for on push and fixed some missing change detection on series detail

* Restricted the typeahead interface to simplify the design

* Edit Series Relation now shows a value in the dropdown for Parent relationships and disables the field.

* Updated edit series relation to focus on new typeahead when adding a new relationship

* Added some documentation and when Scanning a library, don't allow the user to enqueue the same job multiple times.

* Applied the No-enqueue if already enqueued logic to other tasks

* Library detail on push

* Updated events widget to onpush

* Card detail drawer on push. Card detail cover chooser now will show all chapter's covers for selection in cover chooser.

* Chapter metadata detail on push

* Removed Card Detail modal

* All collections on push

* Removed some comments

* Updated bulk selection to use an observable rather than function calls so new on push strategy works

* collection detail now uses on push and scroller is placed on correct element

* Updated library recommended to on push. Ensure that when mark as read occurs, the appropriate streams are refreshed.

* Updated library detail to on push

* Update metadata fiter to onpush. Bugs found and reported to Project

* person badge on push

* Read more on push

* Updated tag badge to on push

* User login on push

* When initing side nav, don't call an authenticated api until we are sure a user is logged in

* Updated splash container to on push

* Dashboard on push

* Side nav slight refactor around some api calls

* Cleaned up series card on push to use same cdRef naming convention

* Updated Static Files to use caching

* Added width and height to logo image

* shortcuts modal on push

* reading lists on push

* Reading list detail on push

* draggable ordered list on push

* Refactored reading-list-detail to use a new item which drastically reduces renders on operations

* series format on push

* circular loader on push

* Badge Expander on push

* update notification modal on push

* drawer on push

* Edit Series Modal on push

* reset password on push

* review series modal on push

* series metadata detail on push

* theme manager on push

* confirm reset password on push

* register on push

* confirm migration email on push

* confirm email on push

* add email to account migration on push

* user preferences on push. Made global settings default open

* edit series relation on push

* Fixed an edge case bug for next chapter where if the current volume had a single chapter of 1 and the next volume had a chapter number of 0, it would say there are no more chapters.

* Updated infinite scroller with on push support

* Moved some animations over to typeahead, not integrated yet.

* Manga reader is now on push

* Reader settings on push

* refactored how we close the book

* Updated table of contents for on push

* Updated book reader for on push. Fixed a bug where table of contents wasn't showing current page anchor due to a scroll calulation bug

* Small code tweak

* Icon and title on push

* nav header on push

* grouped typeahead on push

* typeahead on push and added a new trackby identity function to allow even faster rendering of big lists

* pdf reader on push

* code cleanup
2022-07-11 08:57:07 -07:00

486 lines
23 KiB
C#

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.IO.Compression;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Xml.Serialization;
using API.Archive;
using API.Data.Metadata;
using API.Extensions;
using API.Services.Tasks;
using Kavita.Common;
using Microsoft.Extensions.Logging;
using SharpCompress.Archives;
using SharpCompress.Common;
namespace API.Services
{
public interface IArchiveService
{
void ExtractArchive(string archivePath, string extractPath);
int GetNumberOfPagesFromArchive(string archivePath);
string GetCoverImage(string archivePath, string fileName, string outputDirectory);
bool IsValidArchive(string archivePath);
ComicInfo GetComicInfo(string archivePath);
ArchiveLibrary CanOpen(string archivePath);
bool ArchiveNeedsFlattening(ZipArchive archive);
Task<Tuple<byte[], string>> CreateZipForDownload(IEnumerable<string> files, string tempFolder);
}
/// <summary>
/// Responsible for manipulating Archive files. Used by <see cref="CacheService"/> and <see cref="ScannerService"/>
/// </summary>
// ReSharper disable once ClassWithVirtualMembersNeverInherited.Global
public class ArchiveService : IArchiveService
{
private readonly ILogger<ArchiveService> _logger;
private readonly IDirectoryService _directoryService;
private readonly IImageService _imageService;
private const string ComicInfoFilename = "comicinfo";
public ArchiveService(ILogger<ArchiveService> logger, IDirectoryService directoryService, IImageService imageService)
{
_logger = logger;
_directoryService = directoryService;
_imageService = imageService;
}
/// <summary>
/// Checks if a File can be opened. Requires up to 2 opens of the filestream.
/// </summary>
/// <param name="archivePath"></param>
/// <returns></returns>
public virtual ArchiveLibrary CanOpen(string archivePath)
{
if (string.IsNullOrEmpty(archivePath) || !(File.Exists(archivePath) && Parser.Parser.IsArchive(archivePath) || Parser.Parser.IsEpub(archivePath))) return ArchiveLibrary.NotSupported;
var ext = _directoryService.FileSystem.Path.GetExtension(archivePath).ToUpper();
if (ext.Equals(".CBR") || ext.Equals(".RAR")) return ArchiveLibrary.SharpCompress;
try
{
using var a2 = ZipFile.OpenRead(archivePath);
return ArchiveLibrary.Default;
}
catch (Exception)
{
try
{
using var a1 = ArchiveFactory.Open(archivePath);
return ArchiveLibrary.SharpCompress;
}
catch (Exception)
{
return ArchiveLibrary.NotSupported;
}
}
}
public int GetNumberOfPagesFromArchive(string archivePath)
{
if (!IsValidArchive(archivePath))
{
_logger.LogError("Archive {ArchivePath} could not be found", archivePath);
return 0;
}
try
{
var libraryHandler = CanOpen(archivePath);
switch (libraryHandler)
{
case ArchiveLibrary.Default:
{
using var archive = ZipFile.OpenRead(archivePath);
return archive.Entries.Count(e => !Parser.Parser.HasBlacklistedFolderInPath(e.FullName) && Parser.Parser.IsImage(e.FullName));
}
case ArchiveLibrary.SharpCompress:
{
using var archive = ArchiveFactory.Open(archivePath);
return archive.Entries.Count(entry => !entry.IsDirectory &&
!Parser.Parser.HasBlacklistedFolderInPath(Path.GetDirectoryName(entry.Key) ?? string.Empty)
&& Parser.Parser.IsImage(entry.Key));
}
case ArchiveLibrary.NotSupported:
_logger.LogWarning("[GetNumberOfPagesFromArchive] This archive cannot be read: {ArchivePath}. Defaulting to 0 pages", archivePath);
return 0;
default:
_logger.LogWarning("[GetNumberOfPagesFromArchive] There was an exception when reading archive stream: {ArchivePath}. Defaulting to 0 pages", archivePath);
return 0;
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "[GetNumberOfPagesFromArchive] There was an exception when reading archive stream: {ArchivePath}. Defaulting to 0 pages", archivePath);
return 0;
}
}
/// <summary>
/// Finds the first instance of a folder entry and returns it
/// </summary>
/// <param name="entryFullNames"></param>
/// <returns>Entry name of match, null if no match</returns>
public static string FindFolderEntry(IEnumerable<string> entryFullNames)
{
var result = entryFullNames
.Where(path => !(Path.EndsInDirectorySeparator(path) || Parser.Parser.HasBlacklistedFolderInPath(path) || path.StartsWith(Parser.Parser.MacOsMetadataFileStartsWith)))
.OrderByNatural(Path.GetFileNameWithoutExtension)
.FirstOrDefault(Parser.Parser.IsCoverImage);
return string.IsNullOrEmpty(result) ? null : result;
}
/// <summary>
/// Returns first entry that is an image and is not in a blacklisted folder path. Uses <see cref="OrderByNatural"/> for ordering files
/// </summary>
/// <param name="entryFullNames"></param>
/// <returns>Entry name of match, null if no match</returns>
public static string? FirstFileEntry(IEnumerable<string> entryFullNames, string archiveName)
{
// First check if there are any files that are not in a nested folder before just comparing by filename. This is needed
// because NaturalSortComparer does not work with paths and doesn't seem 001.jpg as before chapter 1/001.jpg.
var fullNames = entryFullNames
.Where(path => !(Path.EndsInDirectorySeparator(path) || Parser.Parser.HasBlacklistedFolderInPath(path) || path.StartsWith(Parser.Parser.MacOsMetadataFileStartsWith)) && Parser.Parser.IsImage(path))
.OrderByNatural(c => c.GetFullPathWithoutExtension())
.ToList();
if (fullNames.Count == 0) return null;
var nonNestedFile = fullNames.Where(entry => (Path.GetDirectoryName(entry) ?? string.Empty).Equals(archiveName))
.OrderByNatural(c => c.GetFullPathWithoutExtension())
.FirstOrDefault();
if (!string.IsNullOrEmpty(nonNestedFile)) return nonNestedFile;
// Check the first folder and sort within that to see if we can find a file, else fallback to first file with basic sort.
// Get first folder, then sort within that
var firstDirectoryFile = fullNames.OrderByNatural(Path.GetDirectoryName).FirstOrDefault();
if (!string.IsNullOrEmpty(firstDirectoryFile))
{
var firstDirectory = Path.GetDirectoryName(firstDirectoryFile);
if (!string.IsNullOrEmpty(firstDirectory))
{
var firstDirectoryResult = fullNames.Where(f => firstDirectory.Equals(Path.GetDirectoryName(f)))
.OrderByNatural(Path.GetFileNameWithoutExtension)
.FirstOrDefault();
if (!string.IsNullOrEmpty(firstDirectoryResult)) return firstDirectoryResult;
}
}
var result = fullNames
.OrderByNatural(Path.GetFileNameWithoutExtension)
.FirstOrDefault();
return string.IsNullOrEmpty(result) ? null : result;
}
/// <summary>
/// Generates byte array of cover image.
/// Given a path to a compressed file <see cref="Parser.Parser.ArchiveFileExtensions"/>, will ensure the first image (respects directory structure) is returned unless
/// a folder/cover.(image extension) exists in the the compressed file (if duplicate, the first is chosen)
///
/// This skips over any __MACOSX folder/file iteration.
/// </summary>
/// <remarks>This always creates a thumbnail</remarks>
/// <param name="archivePath"></param>
/// <param name="fileName">File name to use based on context of entity.</param>
/// <param name="outputDirectory">Where to output the file, defaults to covers directory</param>
/// <returns></returns>
public string GetCoverImage(string archivePath, string fileName, string outputDirectory)
{
if (archivePath == null || !IsValidArchive(archivePath)) return string.Empty;
try
{
var libraryHandler = CanOpen(archivePath);
switch (libraryHandler)
{
case ArchiveLibrary.Default:
{
using var archive = ZipFile.OpenRead(archivePath);
var entryName = FindCoverImageFilename(archivePath, archive.Entries.Select(e => e.FullName));
var entry = archive.Entries.Single(e => e.FullName == entryName);
using var stream = entry.Open();
return _imageService.WriteCoverThumbnail(stream, fileName, outputDirectory);
}
case ArchiveLibrary.SharpCompress:
{
using var archive = ArchiveFactory.Open(archivePath);
var entryNames = archive.Entries.Where(archiveEntry => !archiveEntry.IsDirectory).Select(e => e.Key).ToList();
var entryName = FindCoverImageFilename(archivePath, entryNames);
var entry = archive.Entries.Single(e => e.Key == entryName);
using var stream = entry.OpenEntryStream();
return _imageService.WriteCoverThumbnail(stream, fileName, outputDirectory);
}
case ArchiveLibrary.NotSupported:
_logger.LogWarning("[GetCoverImage] This archive cannot be read: {ArchivePath}. Defaulting to no cover image", archivePath);
return string.Empty;
default:
_logger.LogWarning("[GetCoverImage] There was an exception when reading archive stream: {ArchivePath}. Defaulting to no cover image", archivePath);
return string.Empty;
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "[GetCoverImage] There was an exception when reading archive stream: {ArchivePath}. Defaulting to no cover image", archivePath);
}
return string.Empty;
}
/// <summary>
/// Given a list of image paths (assume within an archive), find the filename that corresponds to the cover
/// </summary>
/// <param name="archivePath"></param>
/// <param name="entryNames"></param>
/// <returns></returns>
public static string FindCoverImageFilename(string archivePath, IEnumerable<string> entryNames)
{
var entryName = FindFolderEntry(entryNames) ?? FirstFileEntry(entryNames, Path.GetFileName(archivePath));
return entryName;
}
/// <summary>
/// Given an archive stream, will assess whether directory needs to be flattened so that the extracted archive files are directly
/// under extract path and not nested in subfolders. See <see cref="DirectoryService"/> Flatten method.
/// </summary>
/// <param name="archive">An opened archive stream</param>
/// <returns></returns>
public bool ArchiveNeedsFlattening(ZipArchive archive)
{
// Sometimes ZipArchive will list the directory and others it will just keep it in the FullName
return archive.Entries.Count > 0 &&
!Path.HasExtension(archive.Entries.ElementAt(0).FullName) ||
archive.Entries.Any(e => e.FullName.Contains(Path.AltDirectorySeparatorChar) && !Parser.Parser.HasBlacklistedFolderInPath(e.FullName));
}
/// <summary>
/// Creates a zip file form the listed files and outputs to the temp folder.
/// </summary>
/// <param name="files">List of files to be zipped up. Should be full file paths.</param>
/// <param name="tempFolder">Temp folder name to use for preparing the files. Will be created and deleted</param>
/// <returns>All bytes for the given file in a Tuple</returns>
/// <exception cref="KavitaException"></exception>
public async Task<Tuple<byte[], string>> CreateZipForDownload(IEnumerable<string> files, string tempFolder)
{
// TODO: Refactor CreateZipForDownload to return the temp file so we can stream it from temp
var dateString = DateTime.Now.ToShortDateString().Replace("/", "_");
var tempLocation = Path.Join(_directoryService.TempDirectory, $"{tempFolder}_{dateString}");
_directoryService.ExistOrCreate(tempLocation);
if (!_directoryService.CopyFilesToDirectory(files, tempLocation))
{
throw new KavitaException("Unable to copy files to temp directory archive download.");
}
var zipPath = Path.Join(_directoryService.TempDirectory, $"kavita_{tempFolder}_{dateString}.zip");
try
{
ZipFile.CreateFromDirectory(tempLocation, zipPath);
}
catch (AggregateException ex)
{
_logger.LogError(ex, "There was an issue creating temp archive");
throw new KavitaException("There was an issue creating temp archive");
}
var fileBytes = await _directoryService.ReadFileAsync(zipPath);
_directoryService.ClearAndDeleteDirectory(tempLocation); // NOTE: For sending back just zip, just schedule this to be called after the file is returned or let next temp storage cleanup take care of it
(new FileInfo(zipPath)).Delete();
return Tuple.Create(fileBytes, zipPath);
}
/// <summary>
/// Test if the archive path exists and an archive
/// </summary>
/// <param name="archivePath"></param>
/// <returns></returns>
public bool IsValidArchive(string archivePath)
{
if (!File.Exists(archivePath))
{
_logger.LogWarning("Archive {ArchivePath} could not be found", archivePath);
return false;
}
if (Parser.Parser.IsArchive(archivePath) || Parser.Parser.IsEpub(archivePath)) return true;
_logger.LogWarning("Archive {ArchivePath} is not a valid archive", archivePath);
return false;
}
private static bool ValidComicInfoArchiveEntry(string fullName, string name)
{
var filenameWithoutExtension = Path.GetFileNameWithoutExtension(name).ToLower();
return !Parser.Parser.HasBlacklistedFolderInPath(fullName)
&& filenameWithoutExtension.Equals(ComicInfoFilename, StringComparison.InvariantCultureIgnoreCase)
&& !filenameWithoutExtension.StartsWith(Parser.Parser.MacOsMetadataFileStartsWith)
&& Parser.Parser.IsXml(name);
}
/// <summary>
/// This can be null if nothing is found or any errors occur during access
/// </summary>
/// <param name="archivePath"></param>
/// <returns></returns>
public ComicInfo? GetComicInfo(string archivePath)
{
if (!IsValidArchive(archivePath)) return null;
try
{
if (!File.Exists(archivePath)) return null;
var libraryHandler = CanOpen(archivePath);
switch (libraryHandler)
{
case ArchiveLibrary.Default:
{
using var archive = ZipFile.OpenRead(archivePath);
var entry = archive.Entries.FirstOrDefault(x => ValidComicInfoArchiveEntry(x.FullName, x.Name));
if (entry != null)
{
using var stream = entry.Open();
var serializer = new XmlSerializer(typeof(ComicInfo));
var info = (ComicInfo) serializer.Deserialize(stream);
ComicInfo.CleanComicInfo(info);
return info;
}
break;
}
case ArchiveLibrary.SharpCompress:
{
using var archive = ArchiveFactory.Open(archivePath);
var entry = archive.Entries.FirstOrDefault(entry =>
ValidComicInfoArchiveEntry(Path.GetDirectoryName(entry.Key), entry.Key));
if (entry != null)
{
using var stream = entry.OpenEntryStream();
var serializer = new XmlSerializer(typeof(ComicInfo));
var info = (ComicInfo) serializer.Deserialize(stream);
ComicInfo.CleanComicInfo(info);
return info;
}
break;
}
case ArchiveLibrary.NotSupported:
_logger.LogWarning("[GetComicInfo] This archive cannot be read: {ArchivePath}", archivePath);
return null;
default:
_logger.LogWarning(
"[GetComicInfo] There was an exception when reading archive stream: {ArchivePath}",
archivePath);
return null;
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "[GetComicInfo] There was an exception when reading archive stream: {Filepath}", archivePath);
}
return null;
}
private void ExtractArchiveEntities(IEnumerable<IArchiveEntry> entries, string extractPath)
{
_directoryService.ExistOrCreate(extractPath);
foreach (var entry in entries)
{
entry.WriteToDirectory(extractPath, new ExtractionOptions()
{
ExtractFullPath = true, // Don't flatten, let the flatterner ensure correct order of nested folders
Overwrite = false
});
}
}
private void ExtractArchiveEntries(ZipArchive archive, string extractPath)
{
var needsFlattening = ArchiveNeedsFlattening(archive);
if (!archive.HasFiles() && !needsFlattening) return;
archive.ExtractToDirectory(extractPath, true);
if (!needsFlattening) return;
_logger.LogDebug("Extracted archive is nested in root folder, flattening...");
_directoryService.Flatten(extractPath);
}
/// <summary>
/// Extracts an archive to a temp cache directory. Returns path to new directory. If temp cache directory already exists,
/// will return that without performing an extraction. Returns empty string if there are any invalidations which would
/// prevent operations to perform correctly (missing archivePath file, empty archive, etc).
/// </summary>
/// <param name="archivePath">A valid file to an archive file.</param>
/// <param name="extractPath">Path to extract to</param>
/// <returns></returns>
public void ExtractArchive(string archivePath, string extractPath)
{
if (!IsValidArchive(archivePath)) return;
if (Directory.Exists(extractPath)) return;
if (!_directoryService.FileSystem.File.Exists(archivePath))
{
_logger.LogError("{Archive} does not exist on disk", archivePath);
throw new KavitaException($"{archivePath} does not exist on disk");
}
var sw = Stopwatch.StartNew();
try
{
var libraryHandler = CanOpen(archivePath);
switch (libraryHandler)
{
case ArchiveLibrary.Default:
{
using var archive = ZipFile.OpenRead(archivePath);
ExtractArchiveEntries(archive, extractPath);
break;
}
case ArchiveLibrary.SharpCompress:
{
using var archive = ArchiveFactory.Open(archivePath);
ExtractArchiveEntities(archive.Entries.Where(entry => !entry.IsDirectory
&& !Parser.Parser.HasBlacklistedFolderInPath(Path.GetDirectoryName(entry.Key) ?? string.Empty)
&& Parser.Parser.IsImage(entry.Key)), extractPath);
break;
}
case ArchiveLibrary.NotSupported:
_logger.LogWarning("[ExtractArchive] This archive cannot be read: {ArchivePath}", archivePath);
return;
default:
_logger.LogWarning("[ExtractArchive] There was an exception when reading archive stream: {ArchivePath}", archivePath);
return;
}
}
catch (Exception e)
{
_logger.LogWarning(e, "[ExtractArchive] There was a problem extracting {ArchivePath} to {ExtractPath}",archivePath, extractPath);
throw new KavitaException(
$"There was an error when extracting {archivePath}. Check the file exists, has read permissions or the server OS can support all path characters.");
}
_logger.LogDebug("Extracted archive to {ExtractPath} in {ElapsedMilliseconds} milliseconds", extractPath, sw.ElapsedMilliseconds);
}
}
}