Kavita/API.Benchmark/EpubBenchmark.cs
Joe Milazzo b62d340bb3
Performance Improvements and Some Polish (#1702)
* Auto scale reading timeline

* Added benchmarks for SharpImage and NetVips. When an epub has a malformed page, catch the error and present it better to the user.

* Added a hint for an upcoming feature

* Slightly sped up word count for epubs

* Added one more test to reflect actual code.

* Some light cleanup

* Use compact number for stat lists

* Fixed brightness being broken on manga reader

* Replaced CoverToWebP SharpImage version with NetVips which is MUCH lighter on memory and CPU.

* Added last modified on the progress dto for CdDisplayEx.

* Code cleanup

* Forgot one cleanup
2022-12-17 07:07:30 -08:00

117 lines
4.0 KiB
C#

using System;
using System.Linq;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using API.Services;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Order;
using HtmlAgilityPack;
using VersOne.Epub;
namespace API.Benchmark;
[StopOnFirstError]
[MemoryDiagnoser]
[RankColumn]
[Orderer(SummaryOrderPolicy.FastestToSlowest)]
[SimpleJob(launchCount: 1, warmupCount: 5, targetCount: 20)]
public class EpubBenchmark
{
private const string FilePath = @"E:\Books\Invaders of the Rokujouma\Invaders of the Rokujouma - Volume 01.epub";
private readonly Regex WordRegex = new Regex(@"\b\w+\b", RegexOptions.Compiled | RegexOptions.IgnoreCase);
// [Benchmark]
// public async Task GetWordCount_PassByString()
// {
// using var book = await EpubReader.OpenBookAsync(FilePath, BookService.BookReaderOptions);
// foreach (var bookFile in book.Content.Html.Values)
// {
// GetBookWordCount_PassByString(await bookFile.ReadContentAsTextAsync());
// ;
// }
// }
[Benchmark]
public async Task GetWordCount_PassByRef()
{
using var book = await EpubReader.OpenBookAsync(FilePath, BookService.BookReaderOptions);
foreach (var bookFile in book.Content.Html.Values)
{
await GetBookWordCount_PassByRef(bookFile);
}
}
[Benchmark]
public async Task GetBookWordCount_SumEarlier()
{
using var book = await EpubReader.OpenBookAsync(FilePath, BookService.BookReaderOptions);
foreach (var bookFile in book.Content.Html.Values)
{
await GetBookWordCount_SumEarlier(bookFile);
}
}
[Benchmark]
public async Task GetBookWordCount_Regex()
{
using var book = await EpubReader.OpenBookAsync(FilePath, BookService.BookReaderOptions);
foreach (var bookFile in book.Content.Html.Values)
{
await GetBookWordCount_Regex(bookFile);
}
}
private int GetBookWordCount_PassByString(string fileContents)
{
var doc = new HtmlDocument();
doc.LoadHtml(fileContents);
var delimiter = new char[] {' '};
return doc.DocumentNode.SelectNodes("//body//text()[not(parent::script)]")
.Select(node => node.InnerText)
.Select(text => text.Split(delimiter, StringSplitOptions.RemoveEmptyEntries)
.Where(s => char.IsLetter(s[0])))
.Select(words => words.Count())
.Where(wordCount => wordCount > 0)
.Sum();
}
private async Task<int> GetBookWordCount_PassByRef(EpubContentFileRef bookFile)
{
var doc = new HtmlDocument();
doc.LoadHtml(await bookFile.ReadContentAsTextAsync());
var delimiter = new char[] {' '};
var textNodes = doc.DocumentNode.SelectNodes("//body//text()[not(parent::script)]");
if (textNodes == null) return 0;
return textNodes.Select(node => node.InnerText)
.Select(text => text.Split(delimiter, StringSplitOptions.RemoveEmptyEntries)
.Where(s => char.IsLetter(s[0])))
.Select(words => words.Count())
.Where(wordCount => wordCount > 0)
.Sum();
}
private async Task<int> GetBookWordCount_SumEarlier(EpubContentFileRef bookFile)
{
var doc = new HtmlDocument();
doc.LoadHtml(await bookFile.ReadContentAsTextAsync());
return doc.DocumentNode.SelectNodes("//body//text()[not(parent::script)]")
.DefaultIfEmpty()
.Select(node => node.InnerText.Split(' ', StringSplitOptions.RemoveEmptyEntries)
.Where(s => char.IsLetter(s[0])))
.Sum(words => words.Count());
}
private async Task<int> GetBookWordCount_Regex(EpubContentFileRef bookFile)
{
var doc = new HtmlDocument();
doc.LoadHtml(await bookFile.ReadContentAsTextAsync());
return doc.DocumentNode.SelectNodes("//body//text()[not(parent::script)]")
.Sum(node => WordRegex.Matches(node.InnerText).Count);
}
}