Epub Text Bleeding Finally Fixed! (#4086)

Co-authored-by: Amelia <77553571+Fesaa@users.noreply.github.com>
Co-authored-by: Gazy Mahomar <gmahomarf@users.noreply.github.com>
Co-authored-by: Stefans.A <104719225+privatestefans@users.noreply.github.com>
This commit is contained in:
Joe Milazzo
2025-10-11 09:18:54 -05:00
committed by GitHub
parent 75e844404c
commit f7dca3806f
48 changed files with 4616 additions and 186 deletions
+171 -5
View File
@@ -35,7 +35,8 @@ public class ScannerHelper
private readonly string _testDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ScannerService/ScanTests");
private readonly string _testcasesDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ScannerService/TestCases");
private readonly string _imagePath = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ScannerService/1x1.png");
private static readonly string[] ComicInfoExtensions = new[] { ".cbz", ".cbr", ".zip", ".rar" };
private static readonly string[] ComicInfoExtensions = [".cbz", ".cbr", ".zip", ".rar"];
private static readonly string[] EpubExtensions = [".epub"];
public ScannerHelper(IUnitOfWork unitOfWork, ITestOutputHelper testOutputHelper)
{
@@ -43,7 +44,7 @@ public class ScannerHelper
_testOutputHelper = testOutputHelper;
}
public async Task<Library> GenerateScannerData(string testcase, Dictionary<string, ComicInfo> comicInfos = null)
public async Task<Library> GenerateScannerData(string testcase, Dictionary<string, ComicInfo>? comicInfos = null)
{
var testDirectoryPath = await GenerateTestDirectory(Path.Join(_testcasesDirectory, testcase), comicInfos);
@@ -113,7 +114,7 @@ public class ScannerHelper
private async Task<string> GenerateTestDirectory(string mapPath, Dictionary<string, ComicInfo> comicInfos = null)
private async Task<string> GenerateTestDirectory(string mapPath, Dictionary<string, ComicInfo>? comicInfos = null)
{
// Read the map file
var mapContent = await File.ReadAllTextAsync(mapPath);
@@ -130,7 +131,7 @@ public class ScannerHelper
Directory.CreateDirectory(testDirectory);
// Generate the files and folders
await Scaffold(testDirectory, filePaths, comicInfos);
await Scaffold(testDirectory, filePaths ?? [], comicInfos);
_testOutputHelper.WriteLine($"Test Directory Path: {testDirectory}");
@@ -138,7 +139,7 @@ public class ScannerHelper
}
public async Task Scaffold(string testDirectory, List<string> filePaths, Dictionary<string, ComicInfo> comicInfos = null)
public async Task Scaffold(string testDirectory, List<string> filePaths, Dictionary<string, ComicInfo>? comicInfos = null)
{
foreach (var relativePath in filePaths)
{
@@ -157,6 +158,10 @@ public class ScannerHelper
{
CreateMinimalCbz(fullPath, info);
}
else if (EpubExtensions.Contains(ext) && comicInfos != null && comicInfos.TryGetValue(Path.GetFileName(relativePath), out var epubInfo))
{
CreateMinimalEpub(fullPath, epubInfo);
}
else
{
// Create an empty file
@@ -205,4 +210,165 @@ public class ScannerHelper
return stringWriter.ToString().Replace("""<?xml version="1.0" encoding="utf-16"?>""",
@"<?xml version='1.0' encoding='utf-8'?>");
}
private void CreateMinimalEpub(string filePath, ComicInfo? comicInfo = null)
{
using (var archive = ZipFile.Open(filePath, ZipArchiveMode.Create))
{
// EPUB requires a mimetype file as the first entry (uncompressed)
var mimetypeEntry = archive.CreateEntry("mimetype", CompressionLevel.NoCompression);
using (var mimetypeStream = mimetypeEntry.Open())
using (var writer = new StreamWriter(mimetypeStream, Encoding.ASCII))
{
writer.Write("application/epub+zip");
}
// Create META-INF/container.xml
var containerEntry = archive.CreateEntry("META-INF/container.xml");
using (var containerStream = containerEntry.Open())
using (var writer = new StreamWriter(containerStream, Encoding.UTF8))
{
writer.Write("""
<?xml version="1.0" encoding="UTF-8"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
<rootfiles>
<rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>
</rootfiles>
</container>
""");
}
// Create content.opf with metadata
var contentOpf = GenerateContentOpf(comicInfo);
var contentEntry = archive.CreateEntry("OEBPS/content.opf");
using (var contentStream = contentEntry.Open())
using (var writer = new StreamWriter(contentStream, Encoding.UTF8))
{
writer.Write(contentOpf);
}
// Add a minimal chapter XHTML file
var chapterEntry = archive.CreateEntry("OEBPS/chapter1.xhtml");
using (var chapterStream = chapterEntry.Open())
using (var writer = new StreamWriter(chapterStream, Encoding.UTF8))
{
writer.Write("""
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Chapter 1</title>
</head>
<body>
<p>Test content.</p>
</body>
</html>
""");
}
// Add the cover image
archive.CreateEntryFromFile(_imagePath, "OEBPS/cover.png");
}
Console.WriteLine($"Created minimal EPUB archive: {filePath} with{(comicInfo != null ? "" : "out")} metadata.");
}
private static string GenerateContentOpf(ComicInfo? comicInfo)
{
var sb = new StringBuilder();
sb.AppendLine("""<?xml version="1.0" encoding="UTF-8"?>""");
sb.AppendLine("""<package xmlns="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="book-id">""");
// Metadata section
sb.AppendLine(" <metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:opf=\"http://www.idpf.org/2007/opf\" xmlns:calibre=\"http://calibre.kovidgoyal.net/2009/metadata\">");
if (comicInfo != null)
{
if (!string.IsNullOrEmpty(comicInfo.Title))
sb.AppendLine($" <dc:title>{EscapeXml(comicInfo.Title)}</dc:title>");
else
sb.AppendLine(" <dc:title>Untitled</dc:title>");
if (!string.IsNullOrEmpty(comicInfo.Series))
{
sb.AppendLine($" <meta property=\"belongs-to-collection\" id=\"collection\">{EscapeXml(comicInfo.Series)}</meta>");
sb.AppendLine(" <meta refines=\"#collection\" property=\"collection-type\">series</meta>");
}
if (!string.IsNullOrEmpty(comicInfo.Writer))
sb.AppendLine($" <dc:creator opf:role=\"aut\">{EscapeXml(comicInfo.Writer)}</dc:creator>");
if (!string.IsNullOrEmpty(comicInfo.Publisher))
sb.AppendLine($" <dc:publisher>{EscapeXml(comicInfo.Publisher)}</dc:publisher>");
if (!string.IsNullOrEmpty(comicInfo.Summary))
sb.AppendLine($" <dc:description>{EscapeXml(comicInfo.Summary)}</dc:description>");
if (!string.IsNullOrEmpty(comicInfo.LanguageISO))
sb.AppendLine($" <dc:language>{EscapeXml(comicInfo.LanguageISO)}</dc:language>");
else
sb.AppendLine(" <dc:language>en</dc:language>");
if (!string.IsNullOrEmpty(comicInfo.Isbn))
sb.AppendLine($" <dc:identifier id=\"book-id\" opf:scheme=\"ISBN\">{EscapeXml(comicInfo.Isbn)}</dc:identifier>");
else
sb.AppendLine($" <dc:identifier id=\"book-id\">urn:uuid:{Guid.NewGuid()}</dc:identifier>");
if (comicInfo.Year > 0)
{
var date = $"{comicInfo.Year:D4}";
if (comicInfo.Month > 0)
{
date += $"-{comicInfo.Month:D2}";
if (comicInfo.Day > 0)
date += $"-{comicInfo.Day:D2}";
}
sb.AppendLine($" <dc:date>{date}</dc:date>");
}
if (!string.IsNullOrEmpty(comicInfo.TitleSort))
sb.AppendLine($" <meta name=\"calibre:title_sort\" content=\"{EscapeXml(comicInfo.TitleSort)}\"/>");
if (!string.IsNullOrEmpty(comicInfo.SeriesSort))
sb.AppendLine($" <meta name=\"calibre:series_sort\" content=\"{EscapeXml(comicInfo.SeriesSort)}\"/>");
if (!string.IsNullOrEmpty(comicInfo.Number))
sb.AppendLine($" <meta name=\"calibre:series_index\" content=\"{EscapeXml(comicInfo.Number)}\"/>");
}
else
{
sb.AppendLine(" <dc:title>Untitled</dc:title>");
sb.AppendLine(" <dc:language>en</dc:language>");
sb.AppendLine($" <dc:identifier id=\"book-id\">urn:uuid:{Guid.NewGuid()}</dc:identifier>");
}
sb.AppendLine(" </metadata>");
// Manifest section
sb.AppendLine(" <manifest>");
sb.AppendLine(" <item id=\"chapter1\" href=\"chapter1.xhtml\" media-type=\"application/xhtml+xml\"/>");
sb.AppendLine(" <item id=\"cover\" href=\"cover.png\" media-type=\"image/png\" properties=\"cover-image\"/>");
sb.AppendLine(" </manifest>");
// Spine section
sb.AppendLine(" <spine>");
sb.AppendLine(" <itemref idref=\"chapter1\"/>");
sb.AppendLine(" </spine>");
sb.AppendLine("</package>");
return sb.ToString();
}
private static string EscapeXml(string text)
{
if (string.IsNullOrEmpty(text)) return text;
return text
.Replace("&", "&amp;")
.Replace("<", "&lt;")
.Replace(">", "&gt;")
.Replace("\"", "&quot;")
.Replace("'", "&apos;");
}
}