Kavita/API.Benchmark/EpubBenchmark.cs
Joe Milazzo 5d1dd7b3f0
.NET 7 + Spring Cleaning (#1677)
* Updated to net7.0

* Updated GA to .net 7

* Updated System.IO.Abstractions to use New factory.

* Converted Regex into SourceGenerator in Parser.

* Updated more regex to source generators.

* Enabled Nullability and more regex changes throughout codebase.

* Parser is 100% GeneratedRegexified

* Lots of nullability code

* Enabled nullability for all repositories.

* Fixed another unit test

* Refactored some code around and took care of some todos.

* Updating code for nullability and cleaning up methods that aren't used anymore. Refctored all uses of Parser.Normalize() to use new extension

* More nullability exercises. 500 warnings to go.

* Fixed a bug where custom file uploads for entities wouldn't save in webP.

* Nullability is done for all DTOs

* Fixed all unit tests and nullability for the project. Only OPDS is left which will be done with an upcoming OPDS enhancement.

* Use localization in book service after validating

* Code smells

* Switched to preview build of swashbuckle for .net7 support

* Fixed up merge issues

* Disable emulate comic book when on single page reader

* Fixed a regression where double page renderer wouldn't layout the images correctly

* Updated to swashbuckle which support .net 7

* Fixed a bad GA action

* Some code cleanup

* More code smells

* Took care of most of nullable issues

* Fixed a broken test due to having more than one test run in parallel

* I'm really not sure why the unit tests are failing or are so extremely slow on .net 7

* Updated all dependencies

* Fixed up build and removed hardcoded framework from build scripts. (this merge removes Regex Source generators). Unit tests are completely busted.

* Unit tests and code cleanup. Needs shakeout now.

* Adjusted Series model since a few fields are not-nullable. Removed dead imports on the project.

* Refactored to use Builder pattern for all unit tests.

* Switched nullability down to warnings. It wasn't possible to switch due to constraint issues in DB Migration.
2023-03-05 12:55:13 -08:00

106 lines
3.6 KiB
C#

using System;
using System.Linq;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using API.Services;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Order;
using HtmlAgilityPack;
using VersOne.Epub;
namespace API.Benchmark;
[StopOnFirstError]
[MemoryDiagnoser]
[RankColumn]
[Orderer(SummaryOrderPolicy.FastestToSlowest)]
[SimpleJob(launchCount: 1, warmupCount: 5, invocationCount: 20)]
public class EpubBenchmark
{
private const string FilePath = @"E:\Books\Invaders of the Rokujouma\Invaders of the Rokujouma - Volume 01.epub";
private readonly Regex WordRegex = new Regex(@"\b\w+\b", RegexOptions.Compiled | RegexOptions.IgnoreCase);
[Benchmark]
public async Task GetWordCount_PassByRef()
{
using var book = await EpubReader.OpenBookAsync(FilePath, BookService.BookReaderOptions);
foreach (var bookFile in book.Content.Html.Values)
{
await GetBookWordCount_PassByRef(bookFile);
}
}
[Benchmark]
public async Task GetBookWordCount_SumEarlier()
{
using var book = await EpubReader.OpenBookAsync(FilePath, BookService.BookReaderOptions);
foreach (var bookFile in book.Content.Html.Values)
{
await GetBookWordCount_SumEarlier(bookFile);
}
}
[Benchmark]
public async Task GetBookWordCount_Regex()
{
using var book = await EpubReader.OpenBookAsync(FilePath, BookService.BookReaderOptions);
foreach (var bookFile in book.Content.Html.Values)
{
await GetBookWordCount_Regex(bookFile);
}
}
private int GetBookWordCount_PassByString(string fileContents)
{
var doc = new HtmlDocument();
doc.LoadHtml(fileContents);
var delimiter = new char[] {' '};
return doc.DocumentNode.SelectNodes("//body//text()[not(parent::script)]")
.Select(node => node.InnerText)
.Select(text => text.Split(delimiter, StringSplitOptions.RemoveEmptyEntries)
.Where(s => char.IsLetter(s[0])))
.Select(words => words.Count())
.Where(wordCount => wordCount > 0)
.Sum();
}
private async Task<int> GetBookWordCount_PassByRef(EpubContentFileRef bookFile)
{
var doc = new HtmlDocument();
doc.LoadHtml(await bookFile.ReadContentAsTextAsync());
var delimiter = new char[] {' '};
var textNodes = doc.DocumentNode.SelectNodes("//body//text()[not(parent::script)]");
if (textNodes == null) return 0;
return textNodes.Select(node => node.InnerText)
.Select(text => text.Split(delimiter, StringSplitOptions.RemoveEmptyEntries)
.Where(s => char.IsLetter(s[0])))
.Select(words => words.Count())
.Where(wordCount => wordCount > 0)
.Sum();
}
private async Task<int> GetBookWordCount_SumEarlier(EpubContentFileRef bookFile)
{
var doc = new HtmlDocument();
doc.LoadHtml(await bookFile.ReadContentAsTextAsync());
return doc.DocumentNode.SelectNodes("//body//text()[not(parent::script)]")
.DefaultIfEmpty()
.Select(node => node.InnerText.Split(' ', StringSplitOptions.RemoveEmptyEntries)
.Where(s => char.IsLetter(s[0])))
.Sum(words => words.Count());
}
private async Task<int> GetBookWordCount_Regex(EpubContentFileRef bookFile)
{
var doc = new HtmlDocument();
doc.LoadHtml(await bookFile.ReadContentAsTextAsync());
return doc.DocumentNode.SelectNodes("//body//text()[not(parent::script)]")
.Sum(node => WordRegex.Matches(node.InnerText).Count);
}
}