Performance Improvements and Some Polish (#1702)

* Auto scale reading timeline

* Added benchmarks for SharpImage and NetVips. When an epub has a malformed page, catch the error and present it better to the user.

* Added a hint for an upcoming feature

* Slightly sped up word count for epubs

* Added one more test to reflect actual code.

* Some light cleanup

* Use compact number for stat lists

* Fixed brightness being broken on manga reader

* Replaced CoverToWebP SharpImage version with NetVips which is MUCH lighter on memory and CPU.

* Added last modified on the progress dto for CdDisplayEx.

* Code cleanup

* Forgot one cleanup
This commit is contained in:
Joe Milazzo 2022-12-17 09:07:30 -06:00 committed by GitHub
parent d1596c4ab7
commit b62d340bb3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 192 additions and 123 deletions

View File

@ -1,9 +1,14 @@
using System;
using System.IO;
using System.IO.Abstractions;
using Microsoft.Extensions.Logging.Abstractions;
using API.Services;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Order;
using SixLabors.ImageSharp;
using SixLabors.ImageSharp.Formats.Png;
using SixLabors.ImageSharp.Formats.Webp;
using SixLabors.ImageSharp.Processing;
namespace API.Benchmark;
@ -17,6 +22,10 @@ public class ArchiveServiceBenchmark
private readonly ArchiveService _archiveService;
private readonly IDirectoryService _directoryService;
private readonly IImageService _imageService;
private readonly PngEncoder _pngEncoder = new PngEncoder();
private readonly WebpEncoder _webPEncoder = new WebpEncoder();
private const string SourceImage = "C:/Users/josep/Pictures/obey_by_grrsa-d6llkaa_colored_by_me.png";
public ArchiveServiceBenchmark()
{
@ -49,6 +58,52 @@ public class ArchiveServiceBenchmark
}
}
[Benchmark]
public void ImageSharp_ExtractImage_PNG()
{
var outputDirectory = "C:/Users/josep/Pictures/imagesharp/";
_directoryService.ExistOrCreate(outputDirectory);
using var stream = new FileStream(SourceImage, FileMode.Open);
using var thumbnail2 = SixLabors.ImageSharp.Image.Load(stream);
thumbnail2.Mutate(x => x.Resize(320, 0));
thumbnail2.Save(_directoryService.FileSystem.Path.Join(outputDirectory, "imagesharp.png"), _pngEncoder);
}
[Benchmark]
public void ImageSharp_ExtractImage_WebP()
{
var outputDirectory = "C:/Users/josep/Pictures/imagesharp/";
_directoryService.ExistOrCreate(outputDirectory);
using var stream = new FileStream(SourceImage, FileMode.Open);
using var thumbnail2 = SixLabors.ImageSharp.Image.Load(stream);
thumbnail2.Mutate(x => x.Resize(320, 0));
thumbnail2.Save(_directoryService.FileSystem.Path.Join(outputDirectory, "imagesharp.webp"), _webPEncoder);
}
[Benchmark]
public void NetVips_ExtractImage_PNG()
{
var outputDirectory = "C:/Users/josep/Pictures/netvips/";
_directoryService.ExistOrCreate(outputDirectory);
using var stream = new FileStream(SourceImage, FileMode.Open);
using var thumbnail = NetVips.Image.ThumbnailStream(stream, 320);
thumbnail.WriteToFile(_directoryService.FileSystem.Path.Join(outputDirectory, "netvips.png"));
}
[Benchmark]
public void NetVips_ExtractImage_WebP()
{
var outputDirectory = "C:/Users/josep/Pictures/netvips/";
_directoryService.ExistOrCreate(outputDirectory);
using var stream = new FileStream(SourceImage, FileMode.Open);
using var thumbnail = NetVips.Image.ThumbnailStream(stream, 320);
thumbnail.WriteToFile(_directoryService.FileSystem.Path.Join(outputDirectory, "netvips.webp"));
}
// Benchmark to test default GetNumberOfPages from archive
// vs a new method where I try to open the archive and return said stream
}

View File

@ -1,5 +1,6 @@
using System;
using System.Linq;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using API.Services;
using BenchmarkDotNet.Attributes;
@ -9,34 +10,58 @@ using VersOne.Epub;
namespace API.Benchmark;
[StopOnFirstError]
[MemoryDiagnoser]
[Orderer(SummaryOrderPolicy.FastestToSlowest)]
[RankColumn]
[SimpleJob(launchCount: 1, warmupCount: 3, targetCount: 5, invocationCount: 100, id: "Epub"), ShortRunJob]
[Orderer(SummaryOrderPolicy.FastestToSlowest)]
[SimpleJob(launchCount: 1, warmupCount: 5, targetCount: 20)]
public class EpubBenchmark
{
private const string FilePath = @"E:\Books\Invaders of the Rokujouma\Invaders of the Rokujouma - Volume 01.epub";
private readonly Regex WordRegex = new Regex(@"\b\w+\b", RegexOptions.Compiled | RegexOptions.IgnoreCase);
// [Benchmark]
// public async Task GetWordCount_PassByString()
// {
// using var book = await EpubReader.OpenBookAsync(FilePath, BookService.BookReaderOptions);
// foreach (var bookFile in book.Content.Html.Values)
// {
// GetBookWordCount_PassByString(await bookFile.ReadContentAsTextAsync());
// ;
// }
// }
[Benchmark]
public static async Task GetWordCount_PassByString()
public async Task GetWordCount_PassByRef()
{
using var book = await EpubReader.OpenBookAsync("Data/book-test.epub", BookService.BookReaderOptions);
using var book = await EpubReader.OpenBookAsync(FilePath, BookService.BookReaderOptions);
foreach (var bookFile in book.Content.Html.Values)
{
Console.WriteLine(GetBookWordCount_PassByString(await bookFile.ReadContentAsTextAsync()));
;
await GetBookWordCount_PassByRef(bookFile);
}
}
[Benchmark]
public static async Task GetWordCount_PassByRef()
public async Task GetBookWordCount_SumEarlier()
{
using var book = await EpubReader.OpenBookAsync("Data/book-test.epub", BookService.BookReaderOptions);
using var book = await EpubReader.OpenBookAsync(FilePath, BookService.BookReaderOptions);
foreach (var bookFile in book.Content.Html.Values)
{
Console.WriteLine(await GetBookWordCount_PassByRef(bookFile));
await GetBookWordCount_SumEarlier(bookFile);
}
}
private static int GetBookWordCount_PassByString(string fileContents)
[Benchmark]
public async Task GetBookWordCount_Regex()
{
using var book = await EpubReader.OpenBookAsync(FilePath, BookService.BookReaderOptions);
foreach (var bookFile in book.Content.Html.Values)
{
await GetBookWordCount_Regex(bookFile);
}
}
private int GetBookWordCount_PassByString(string fileContents)
{
var doc = new HtmlDocument();
doc.LoadHtml(fileContents);
@ -51,18 +76,41 @@ public class EpubBenchmark
.Sum();
}
private static async Task<int> GetBookWordCount_PassByRef(EpubContentFileRef bookFile)
private async Task<int> GetBookWordCount_PassByRef(EpubContentFileRef bookFile)
{
var doc = new HtmlDocument();
doc.LoadHtml(await bookFile.ReadContentAsTextAsync());
var delimiter = new char[] {' '};
return doc.DocumentNode.SelectNodes("//body//text()[not(parent::script)]")
.Select(node => node.InnerText)
var textNodes = doc.DocumentNode.SelectNodes("//body//text()[not(parent::script)]");
if (textNodes == null) return 0;
return textNodes.Select(node => node.InnerText)
.Select(text => text.Split(delimiter, StringSplitOptions.RemoveEmptyEntries)
.Where(s => char.IsLetter(s[0])))
.Select(words => words.Count())
.Where(wordCount => wordCount > 0)
.Sum();
}
private async Task<int> GetBookWordCount_SumEarlier(EpubContentFileRef bookFile)
{
var doc = new HtmlDocument();
doc.LoadHtml(await bookFile.ReadContentAsTextAsync());
return doc.DocumentNode.SelectNodes("//body//text()[not(parent::script)]")
.DefaultIfEmpty()
.Select(node => node.InnerText.Split(' ', StringSplitOptions.RemoveEmptyEntries)
.Where(s => char.IsLetter(s[0])))
.Sum(words => words.Count());
}
private async Task<int> GetBookWordCount_Regex(EpubContentFileRef bookFile)
{
var doc = new HtmlDocument();
doc.LoadHtml(await bookFile.ReadContentAsTextAsync());
return doc.DocumentNode.SelectNodes("//body//text()[not(parent::script)]")
.Sum(node => WordRegex.Matches(node.InnerText).Count);
}
}

View File

@ -1,4 +1,5 @@
using System.ComponentModel.DataAnnotations;
using System;
using System.ComponentModel.DataAnnotations;
namespace API.DTOs;
@ -19,4 +20,8 @@ public class ProgressDto
/// on pages that combine multiple "chapters".
/// </summary>
public string BookScrollId { get; set; }
/// <summary>
/// Last time the progress was synced from UI or external app
/// </summary>
public DateTime LastModified { get; set; }
}

View File

@ -52,7 +52,7 @@ public class ChapterRepository : IChapterRepository
_context.Entry(chapter).State = EntityState.Modified;
}
public async Task<IEnumerable<Chapter>> GetChaptersByIdsAsync(IList<int> chapterIds, ChapterIncludes includes)
public async Task<IEnumerable<Chapter>> GetChaptersByIdsAsync(IList<int> chapterIds, ChapterIncludes includes = ChapterIncludes.None)
{
return await _context.Chapter
.Where(c => chapterIds.Contains(c.Id))

View File

@ -49,7 +49,7 @@ public interface IBookService
/// <summary>
/// Extracts a PDF file's pages as images to an target directory
/// </summary>
/// <remarks>This method relies on Docnet which has explict patches from Kavita for ARM support. This should only be used with Tachiyomi</remarks>
/// <remarks>This method relies on Docnet which has explicit patches from Kavita for ARM support. This should only be used with Tachiyomi</remarks>
/// <param name="fileFilePath"></param>
/// <param name="targetDirectory">Where the files will be extracted to. If doesn't exist, will be created.</param>
void ExtractPdfImages(string fileFilePath, string targetDirectory);
@ -401,7 +401,7 @@ public class BookService : IBookService
{
using var epubBook = EpubReader.OpenBook(filePath, BookReaderOptions);
var publicationDate =
epubBook.Schema.Package.Metadata.Dates.FirstOrDefault(date => date.Event == "publication")?.Date;
epubBook.Schema.Package.Metadata.Dates.FirstOrDefault(pDate => pDate.Event == "publication")?.Date;
if (string.IsNullOrEmpty(publicationDate))
{
@ -533,7 +533,7 @@ public class BookService : IBookService
return 0;
}
public static string EscapeTags(string content)
private static string EscapeTags(string content)
{
content = Regex.Replace(content, @"<script(.*)(/>)", "<script$1></script>");
content = Regex.Replace(content, @"<title(.*)(/>)", "<title$1></title>");
@ -830,6 +830,8 @@ public class BookService : IBookService
var bookPages = await book.GetReadingOrderAsync();
try
{
foreach (var contentFileRef in bookPages)
{
if (page != counter)
@ -861,12 +863,17 @@ public class BookService : IBookService
return await ScopePage(doc, book, apiBase, body, mappings, page);
}
} catch (Exception ex)
{
// NOTE: We can log this to media analysis service
_logger.LogError(ex, "There was an issue reading one of the pages for {Book}", book.FilePath);
}
throw new KavitaException("Could not find the appropriate html for that page");
}
private static void CreateToCChapter(EpubNavigationItemRef navigationItem, IList<BookChapterItem> nestedChapters, IList<BookChapterItem> chaptersList,
IReadOnlyDictionary<string, int> mappings)
private static void CreateToCChapter(EpubNavigationItemRef navigationItem, IList<BookChapterItem> nestedChapters,
ICollection<BookChapterItem> chaptersList, IReadOnlyDictionary<string, int> mappings)
{
if (navigationItem.Link == null)
{

View File

@ -2,6 +2,7 @@
using System.IO;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using NetVips;
using SixLabors.ImageSharp;
using Image = NetVips.Image;
@ -113,15 +114,15 @@ public class ImageService : IImageService
return filename;
}
public async Task<string> ConvertToWebP(string filePath, string outputPath)
public Task<string> ConvertToWebP(string filePath, string outputPath)
{
var file = _directoryService.FileSystem.FileInfo.FromFileName(filePath);
var fileName = file.Name.Replace(file.Extension, string.Empty);
var outputFile = Path.Join(outputPath, fileName + ".webp");
using var sourceImage = await SixLabors.ImageSharp.Image.LoadAsync(filePath);
await sourceImage.SaveAsWebpAsync(outputFile);
return outputFile;
using var sourceImage = Image.NewFromFile(filePath, false, Enums.Access.SequentialUnbuffered);
sourceImage.WriteToFile(outputFile);
return Task.FromResult(outputFile);
}
public async Task<bool> IsImage(string filePath)

View File

@ -26,7 +26,6 @@ public interface IStatisticService
Task<FileExtensionBreakdownDto> GetFileBreakdown();
Task<IEnumerable<TopReadDto>> GetTopUsers(int days);
Task<IEnumerable<ReadHistoryEvent>> GetReadingHistory(int userId);
Task<IEnumerable<ReadHistoryEvent>> GetHistory();
Task<IEnumerable<PagesReadOnADayCount<DateTime>>> ReadCountByDay(int userId = 0);
}
@ -71,20 +70,6 @@ public class StatisticService : IStatisticService
.Where(c => chapterIds.Contains(c.Id))
.SumAsync(c => c.AvgHoursToRead);
// Maybe make this top 5 genres? But usually there are 3-5 genres that are always common...
// Maybe use rating to calculate top genres?
// var genres = await _context.Series
// .Where(s => seriesIds.Contains(s.Id))
// .Select(s => s.Metadata)
// .SelectMany(sm => sm.Genres)
// //.DistinctBy(g => g.NormalizedTitle)
// .ToListAsync();
// How many series of each format have you read? (Epub, Archive, etc)
// Percentage of libraries read. For each library, get the total pages vs read
//var allLibraryIds = await _context.Library.GetUserLibraries(userId).ToListAsync();
var chaptersRead = await _context.AppUserProgresses
.Where(p => p.AppUserId == userId)
.Where(p => libraryIds.Contains(p.LibraryId))
@ -344,43 +329,6 @@ public class StatisticService : IStatisticService
.ToListAsync();
}
public Task<IEnumerable<ReadHistoryEvent>> GetHistory()
{
// _context.AppUserProgresses
// .AsSplitQuery()
// .AsEnumerable()
// .GroupBy(sm => sm.LastModified)
// .Select(sm => new
// {
// User = _context.AppUser.Single(u => u.Id == sm.Key),
// Chapters = _context.Chapter.Where(c => _context.AppUserProgresses
// .Where(u => u.AppUserId == sm.Key)
// .Where(p => p.PagesRead > 0)
// .Select(p => p.ChapterId)
// .Distinct()
// .Contains(c.Id))
// })
// .OrderByDescending(d => d.Chapters.Sum(c => c.AvgHoursToRead))
// .Take(5)
// .ToList();
var firstOfWeek = DateTime.Now.StartOfWeek(DayOfWeek.Monday);
var groupedReadingDays = _context.AppUserProgresses
.Where(x => x.LastModified >= firstOfWeek)
.GroupBy(x => x.LastModified.Day)
.Select(g => new StatCount<int>()
{
Value = g.Key,
Count = _context.AppUserProgresses.Where(p => p.LastModified.Day == g.Key).Select(p => p.ChapterId).Distinct().Count()
})
.AsEnumerable();
// var records = firstOfWeek.Range(7)
// .GroupJoin(groupedReadingDays, wd => wd.Day, lg => lg.Key, (_, lg) => lg.Any() ? lg.First().Count() : 0).ToArray();
return Task.FromResult<IEnumerable<ReadHistoryEvent>>(null);
}
public async Task<IEnumerable<TopReadDto>> GetTopUsers(int days)
{
var libraries = (await _unitOfWork.LibraryRepository.GetLibrariesAsync()).ToList();

View File

@ -196,8 +196,7 @@ public class WordCountAnalyzerService : IWordCountAnalyzerService
return;
}
file.LastFileAnalysis = DateTime.Now;
_unitOfWork.MangaFileRepository.Update(file);
UpdateFileAnalysis(file);
}
chapter.WordCount = sum;
@ -211,8 +210,7 @@ public class WordCountAnalyzerService : IWordCountAnalyzerService
chapter.AvgHoursToRead = est.AvgHours;
foreach (var file in chapter.Files)
{
file.LastFileAnalysis = DateTime.Now;
_unitOfWork.MangaFileRepository.Update(file);
UpdateFileAnalysis(file);
}
_unitOfWork.ChapterRepository.Update(chapter);
}
@ -233,22 +231,22 @@ public class WordCountAnalyzerService : IWordCountAnalyzerService
_unitOfWork.SeriesRepository.Update(series);
}
private void UpdateFileAnalysis(MangaFile file)
{
file.LastFileAnalysis = DateTime.Now;
_unitOfWork.MangaFileRepository.Update(file);
}
private static async Task<int> GetWordCountFromHtml(EpubContentFileRef bookFile)
{
var doc = new HtmlDocument();
doc.LoadHtml(await bookFile.ReadContentAsTextAsync());
var textNodes = doc.DocumentNode.SelectNodes("//body//text()[not(parent::script)]");
if (textNodes == null) return 0;
return textNodes
return doc.DocumentNode.SelectNodes("//body//text()[not(parent::script)]")
.DefaultIfEmpty()
.Select(node => node.InnerText.Split(' ', StringSplitOptions.RemoveEmptyEntries)
.Where(s => char.IsLetter(s[0])))
.Select(words => words.Count())
.Where(wordCount => wordCount > 0)
.Sum();
.Sum(words => words.Count());
}
}

View File

@ -2,8 +2,14 @@
<s:String x:Key="/Default/CodeInspection/ExcludedFiles/FilesAndFoldersToSkip2/=1BC0273F_002DFEBE_002D4DA1_002DBC04_002D3A3167E4C86C_002Fd_003AData_002Fd_003AMigrations/@EntryIndexedValue">ExplicitlyExcluded</s:String>
<s:Boolean x:Key="/Default/CodeInspection/Highlighting/RunLongAnalysisInSwa/@EntryValue">True</s:Boolean>
<s:Boolean x:Key="/Default/CodeInspection/Highlighting/RunValueAnalysisInNullableWarningsEnabledContext2/@EntryValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=Docnet/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=epubs/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=kavitaignore/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=kavitaignores/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=noopener/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=noreferrer/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=OEBPS/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=Omake/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=Opds/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=rewinded/@EntryIndexedValue">True</s:Boolean></wpf:ResourceDictionary>
<s:Boolean x:Key="/Default/UserDictionary/Words/=rewinded/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=Tachiyomi/@EntryIndexedValue">True</s:Boolean></wpf:ResourceDictionary>

View File

@ -32,4 +32,7 @@
<button type="submit" class="flex-fill btn btn-primary" (click)="saveSettings()" [disabled]="!settingsForm.dirty">Save</button>
</div>
</form>
<!-- Accordion with Issues from Media anaysis -->
</div>

View File

@ -349,7 +349,7 @@ export class MangaReaderComponent implements OnInit, AfterViewInit, OnDestroy {
}
get SplitIconClass() {
// NOTE: This could be rewritten to valueChanges.pipe(map()) and | async in the UI instead of the getter
// TODO: make this a pipe
if (this.mangaReaderService.isSplitLeftToRight(this.pageSplitOption)) {
return 'left-side';
} else if (this.mangaReaderService.isNoSplit(this.pageSplitOption)) {
@ -593,7 +593,7 @@ export class MangaReaderComponent implements OnInit, AfterViewInit, OnDestroy {
pageSplit: parseInt(this.generalSettingsForm.get('pageSplitOption')?.value, 10),
fitting: (this.generalSettingsForm.get('fittingOption')?.value as FITTING_OPTION),
layoutMode: this.layoutMode,
darkness: 100,
darkness: parseInt(this.generalSettingsForm.get('darkness')?.value + '', 10) || 100,
pagingDirection: this.pagingDirection,
readerMode: this.readerMode,
emulateBook: this.generalSettingsForm.get('emulateBook')?.value,

View File

@ -1,7 +1,6 @@
<div class="row g-0 mb-2">
<div class="col-8">
<h4><span>Publication Status</span>
<i class="fa fa-info-circle ms-1" aria-hidden="true" placement="right" [ngbTooltip]="tooltip" role="button" tabindex="0"></i>
</h4>
</div>
<div class="col-4">
@ -14,8 +13,6 @@
</div>
</div>
<ng-template #tooltip></ng-template>
<ng-container *ngIf="publicationStatues$ | async as statuses">
<ng-container *ngIf="formControl.value; else tableLayout">

View File

@ -30,8 +30,9 @@
[showGridLines]="false"
[showRefLines]="true"
[roundDomains]="true"
[autoScale]="true"
xAxisLabel="Time"
yAxisLabel="Reading Events"
yAxisLabel="Reading Activity"
[timeline]="false"
[results]="data"
>

View File

@ -9,7 +9,7 @@
<ng-container *ngIf="image && image(item) as url">
<app-image *ngIf="url && url.length > 0" width="32px" maxHeight="32px" class="img-top me-1" [imageUrl]="url"></app-image>
</ng-container>
{{item.name}} <span class="float-end" *ngIf="item.value >= 0">{{item.value}} {{label}}</span>
{{item.name}} <span class="float-end" *ngIf="item.value >= 0">{{item.value | compactNumber}} {{label}}</span>
</li>
</ul>
</div>

View File

@ -7,7 +7,7 @@
"name": "GPL-3.0",
"url": "https://github.com/Kareadita/Kavita/blob/develop/LICENSE"
},
"version": "0.6.1.16"
"version": "0.6.1.17"
},
"servers": [
{