mirror of
https://github.com/Kareadita/Kavita.git
synced 2025-06-17 04:24:14 -04:00
* Updated to net7.0 * Updated GA to .net 7 * Updated System.IO.Abstractions to use New factory. * Converted Regex into SourceGenerator in Parser. * Updated more regex to source generators. * Enabled Nullability and more regex changes throughout codebase. * Parser is 100% GeneratedRegexified * Lots of nullability code * Enabled nullability for all repositories. * Fixed another unit test * Refactored some code around and took care of some todos. * Updating code for nullability and cleaning up methods that aren't used anymore. Refctored all uses of Parser.Normalize() to use new extension * More nullability exercises. 500 warnings to go. * Fixed a bug where custom file uploads for entities wouldn't save in webP. * Nullability is done for all DTOs * Fixed all unit tests and nullability for the project. Only OPDS is left which will be done with an upcoming OPDS enhancement. * Use localization in book service after validating * Code smells * Switched to preview build of swashbuckle for .net7 support * Fixed up merge issues * Disable emulate comic book when on single page reader * Fixed a regression where double page renderer wouldn't layout the images correctly * Updated to swashbuckle which support .net 7 * Fixed a bad GA action * Some code cleanup * More code smells * Took care of most of nullable issues * Fixed a broken test due to having more than one test run in parallel * I'm really not sure why the unit tests are failing or are so extremely slow on .net 7 * Updated all dependencies * Fixed up build and removed hardcoded framework from build scripts. (this merge removes Regex Source generators). Unit tests are completely busted. * Unit tests and code cleanup. Needs shakeout now. * Adjusted Series model since a few fields are not-nullable. Removed dead imports on the project. * Refactored to use Builder pattern for all unit tests. * Switched nullability down to warnings. It wasn't possible to switch due to constraint issues in DB Migration.
106 lines
3.6 KiB
C#
106 lines
3.6 KiB
C#
using System;
|
|
using System.Linq;
|
|
using System.Text.RegularExpressions;
|
|
using System.Threading.Tasks;
|
|
using API.Services;
|
|
using BenchmarkDotNet.Attributes;
|
|
using BenchmarkDotNet.Order;
|
|
using HtmlAgilityPack;
|
|
using VersOne.Epub;
|
|
|
|
namespace API.Benchmark;
|
|
|
|
[StopOnFirstError]
|
|
[MemoryDiagnoser]
|
|
[RankColumn]
|
|
[Orderer(SummaryOrderPolicy.FastestToSlowest)]
|
|
[SimpleJob(launchCount: 1, warmupCount: 5, invocationCount: 20)]
|
|
public class EpubBenchmark
|
|
{
|
|
private const string FilePath = @"E:\Books\Invaders of the Rokujouma\Invaders of the Rokujouma - Volume 01.epub";
|
|
private readonly Regex WordRegex = new Regex(@"\b\w+\b", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
|
|
|
[Benchmark]
|
|
public async Task GetWordCount_PassByRef()
|
|
{
|
|
using var book = await EpubReader.OpenBookAsync(FilePath, BookService.BookReaderOptions);
|
|
foreach (var bookFile in book.Content.Html.Values)
|
|
{
|
|
await GetBookWordCount_PassByRef(bookFile);
|
|
}
|
|
}
|
|
|
|
[Benchmark]
|
|
public async Task GetBookWordCount_SumEarlier()
|
|
{
|
|
using var book = await EpubReader.OpenBookAsync(FilePath, BookService.BookReaderOptions);
|
|
foreach (var bookFile in book.Content.Html.Values)
|
|
{
|
|
await GetBookWordCount_SumEarlier(bookFile);
|
|
}
|
|
}
|
|
|
|
[Benchmark]
|
|
public async Task GetBookWordCount_Regex()
|
|
{
|
|
using var book = await EpubReader.OpenBookAsync(FilePath, BookService.BookReaderOptions);
|
|
foreach (var bookFile in book.Content.Html.Values)
|
|
{
|
|
await GetBookWordCount_Regex(bookFile);
|
|
}
|
|
}
|
|
|
|
private int GetBookWordCount_PassByString(string fileContents)
|
|
{
|
|
var doc = new HtmlDocument();
|
|
doc.LoadHtml(fileContents);
|
|
var delimiter = new char[] {' '};
|
|
|
|
return doc.DocumentNode.SelectNodes("//body//text()[not(parent::script)]")
|
|
.Select(node => node.InnerText)
|
|
.Select(text => text.Split(delimiter, StringSplitOptions.RemoveEmptyEntries)
|
|
.Where(s => char.IsLetter(s[0])))
|
|
.Select(words => words.Count())
|
|
.Where(wordCount => wordCount > 0)
|
|
.Sum();
|
|
}
|
|
|
|
private async Task<int> GetBookWordCount_PassByRef(EpubContentFileRef bookFile)
|
|
{
|
|
var doc = new HtmlDocument();
|
|
doc.LoadHtml(await bookFile.ReadContentAsTextAsync());
|
|
var delimiter = new char[] {' '};
|
|
|
|
var textNodes = doc.DocumentNode.SelectNodes("//body//text()[not(parent::script)]");
|
|
if (textNodes == null) return 0;
|
|
return textNodes.Select(node => node.InnerText)
|
|
.Select(text => text.Split(delimiter, StringSplitOptions.RemoveEmptyEntries)
|
|
.Where(s => char.IsLetter(s[0])))
|
|
.Select(words => words.Count())
|
|
.Where(wordCount => wordCount > 0)
|
|
.Sum();
|
|
}
|
|
|
|
private async Task<int> GetBookWordCount_SumEarlier(EpubContentFileRef bookFile)
|
|
{
|
|
var doc = new HtmlDocument();
|
|
doc.LoadHtml(await bookFile.ReadContentAsTextAsync());
|
|
|
|
return doc.DocumentNode.SelectNodes("//body//text()[not(parent::script)]")
|
|
.DefaultIfEmpty()
|
|
.Select(node => node.InnerText.Split(' ', StringSplitOptions.RemoveEmptyEntries)
|
|
.Where(s => char.IsLetter(s[0])))
|
|
.Sum(words => words.Count());
|
|
}
|
|
|
|
private async Task<int> GetBookWordCount_Regex(EpubContentFileRef bookFile)
|
|
{
|
|
var doc = new HtmlDocument();
|
|
doc.LoadHtml(await bookFile.ReadContentAsTextAsync());
|
|
|
|
|
|
return doc.DocumentNode.SelectNodes("//body//text()[not(parent::script)]")
|
|
.Sum(node => WordRegex.Matches(node.InnerText).Count);
|
|
}
|
|
}
|