using System; using System.Collections.Generic; using System.Drawing; using System.Drawing.Imaging; using System.IO; using System.Linq; using System.Runtime.InteropServices; using System.Text; using System.Text.RegularExpressions; using System.Threading.Tasks; using System.Web; using API.Entities.Enums; using API.Interfaces.Services; using API.Parser; using Docnet.Core; using Docnet.Core.Converters; using Docnet.Core.Models; using Docnet.Core.Readers; using ExCSS; using HtmlAgilityPack; using Microsoft.Extensions.Logging; using Microsoft.IO; using VersOne.Epub; namespace API.Services { public class BookService : IBookService { private readonly ILogger _logger; private readonly StylesheetParser _cssParser = new (); private static readonly RecyclableMemoryStreamManager StreamManager = new (); public BookService(ILogger logger) { _logger = logger; } private static bool HasClickableHrefPart(HtmlNode anchor) { return anchor.GetAttributeValue("href", string.Empty).Contains("#") && anchor.GetAttributeValue("tabindex", string.Empty) != "-1" && anchor.GetAttributeValue("role", string.Empty) != "presentation"; } public static string GetContentType(EpubContentType type) { string contentType; switch (type) { case EpubContentType.IMAGE_GIF: contentType = "image/gif"; break; case EpubContentType.IMAGE_PNG: contentType = "image/png"; break; case EpubContentType.IMAGE_JPEG: contentType = "image/jpeg"; break; case EpubContentType.FONT_OPENTYPE: contentType = "font/otf"; break; case EpubContentType.FONT_TRUETYPE: contentType = "font/ttf"; break; case EpubContentType.IMAGE_SVG: contentType = "image/svg+xml"; break; default: contentType = "application/octet-stream"; break; } return contentType; } public static void UpdateLinks(HtmlNode anchor, Dictionary mappings, int currentPage) { if (anchor.Name != "a") return; var hrefParts = CleanContentKeys(anchor.GetAttributeValue("href", string.Empty)) .Split("#"); // Some keys get uri encoded when parsed, so replace any of those characters with original var mappingKey = HttpUtility.UrlDecode(hrefParts[0]); if (!mappings.ContainsKey(mappingKey)) { if (HasClickableHrefPart(anchor)) { var part = hrefParts.Length > 1 ? hrefParts[1] : anchor.GetAttributeValue("href", string.Empty); anchor.Attributes.Add("kavita-page", $"{currentPage}"); anchor.Attributes.Add("kavita-part", part); anchor.Attributes.Remove("href"); anchor.Attributes.Add("href", "javascript:void(0)"); } else { anchor.Attributes.Add("target", "_blank"); anchor.Attributes.Add("rel", "noreferrer noopener"); } return; } var mappedPage = mappings[mappingKey]; anchor.Attributes.Add("kavita-page", $"{mappedPage}"); if (hrefParts.Length > 1) { anchor.Attributes.Add("kavita-part", hrefParts[1]); } anchor.Attributes.Remove("href"); anchor.Attributes.Add("href", "javascript:void(0)"); } public async Task ScopeStyles(string stylesheetHtml, string apiBase, string filename, EpubBookRef book) { // @Import statements will be handled by browser, so we must inline the css into the original file that request it, so they can be // Scoped var prepend = filename.Length > 0 ? filename.Replace(Path.GetFileName(filename), "") : string.Empty; var importBuilder = new StringBuilder(); foreach (Match match in Parser.Parser.CssImportUrlRegex.Matches(stylesheetHtml)) { if (!match.Success) continue; var importFile = match.Groups["Filename"].Value; var key = CleanContentKeys(importFile); if (!key.Contains(prepend)) { key = prepend + key; } if (!book.Content.AllFiles.ContainsKey(key)) continue; var bookFile = book.Content.AllFiles[key]; var content = await bookFile.ReadContentAsBytesAsync(); importBuilder.Append(Encoding.UTF8.GetString(content)); } stylesheetHtml = stylesheetHtml.Insert(0, importBuilder.ToString()); stylesheetHtml = Parser.Parser.CssImportUrlRegex.Replace(stylesheetHtml, "$1" + apiBase + prepend + "$2" + "$3"); var styleContent = RemoveWhiteSpaceFromStylesheets(stylesheetHtml); styleContent = Parser.Parser.FontSrcUrlRegex.Replace(styleContent, "$1" + apiBase + "$2" + "$3"); styleContent = styleContent.Replace("body", ".reading-section"); var stylesheet = await _cssParser.ParseAsync(styleContent); foreach (var styleRule in stylesheet.StyleRules) { if (styleRule.Selector.Text == ".reading-section") continue; if (styleRule.Selector.Text.Contains(",")) { styleRule.Text = styleRule.Text.Replace(styleRule.SelectorText, string.Join(", ", styleRule.Selector.Text.Split(",").Select(s => ".reading-section " + s))); continue; } styleRule.Text = ".reading-section " + styleRule.Text; } return RemoveWhiteSpaceFromStylesheets(stylesheet.ToCss()); } public string GetSummaryInfo(string filePath) { if (!IsValidFile(filePath) || Parser.Parser.IsPdf(filePath)) return string.Empty; try { using var epubBook = EpubReader.OpenBook(filePath); return epubBook.Schema.Package.Metadata.Description; } catch (Exception ex) { _logger.LogWarning(ex, "[BookService] There was an exception getting summary, defaulting to empty string"); } return string.Empty; } private bool IsValidFile(string filePath) { if (!File.Exists(filePath)) { _logger.LogWarning("[BookService] Book {EpubFile} could not be found", filePath); return false; } if (Parser.Parser.IsBook(filePath)) return true; _logger.LogWarning("[BookService] Book {EpubFile} is not a valid EPUB/PDF", filePath); return false; } public int GetNumberOfPages(string filePath) { if (!IsValidFile(filePath)) return 0; try { if (Parser.Parser.IsPdf(filePath)) { using var docReader = DocLib.Instance.GetDocReader(filePath, new PageDimensions(1080, 1920)); return docReader.GetPageCount(); } using var epubBook = EpubReader.OpenBook(filePath); return epubBook.Content.Html.Count; } catch (Exception ex) { _logger.LogWarning(ex, "[BookService] There was an exception getting number of pages, defaulting to 0"); } return 0; } public static string EscapeTags(string content) { content = Regex.Replace(content, @")", ""); content = Regex.Replace(content, @")", ""); return content; } public static string CleanContentKeys(string key) { return key.Replace("../", string.Empty); } public async Task> CreateKeyToPageMappingAsync(EpubBookRef book) { var dict = new Dictionary(); var pageCount = 0; foreach (var contentFileRef in await book.GetReadingOrderAsync()) { if (contentFileRef.ContentType != EpubContentType.XHTML_1_1) continue; dict.Add(contentFileRef.FileName, pageCount); pageCount += 1; } return dict; } /// /// Parses out Title from book. Chapters and Volumes will always be "0". If there is any exception reading book (malformed books) /// then null is returned. This expects only an epub file /// /// /// public ParserInfo ParseInfo(string filePath) { if (!Parser.Parser.IsEpub(filePath)) return null; try { using var epubBook = EpubReader.OpenBook(filePath); // If the epub has the following tags, we can group the books as Volumes // // // // If all three are present, we can take that over dc:title and format as: // Series = The Dark Tower, Volume = 5, Filename as "Wolves of the Calla" // In addition, the following can exist and should parse as a series (EPUB 3.2 spec) // // The Lord of the Rings // // set // 2 try { var seriesIndex = string.Empty; var series = string.Empty; var specialName = string.Empty; var groupPosition = string.Empty; foreach (var metadataItem in epubBook.Schema.Package.Metadata.MetaItems) { // EPUB 2 and 3 switch (metadataItem.Name) { case "calibre:series_index": seriesIndex = metadataItem.Content; break; case "calibre:series": series = metadataItem.Content; break; case "calibre:title_sort": specialName = metadataItem.Content; break; } // EPUB 3.2+ only switch (metadataItem.Property) { case "group-position": seriesIndex = metadataItem.Content; break; case "belongs-to-collection": series = metadataItem.Content; break; case "collection-type": groupPosition = metadataItem.Content; break; } } if (!string.IsNullOrEmpty(series) && !string.IsNullOrEmpty(seriesIndex) && (!string.IsNullOrEmpty(specialName) || groupPosition.Equals("series") || groupPosition.Equals("set"))) { if (string.IsNullOrEmpty(specialName)) { specialName = epubBook.Title; } return new ParserInfo() { Chapters = Parser.Parser.DefaultChapter, Edition = string.Empty, Format = MangaFormat.Epub, Filename = Path.GetFileName(filePath), Title = specialName.Trim(), FullFilePath = filePath, IsSpecial = false, Series = series.Trim(), Volumes = seriesIndex.Split(".")[0] }; } } catch (Exception) { // Swallow exception } return new ParserInfo() { Chapters = Parser.Parser.DefaultChapter, Edition = string.Empty, Format = MangaFormat.Epub, Filename = Path.GetFileName(filePath), Title = epubBook.Title.Trim(), FullFilePath = filePath, IsSpecial = false, Series = epubBook.Title.Trim(), Volumes = Parser.Parser.DefaultVolume }; } catch (Exception ex) { _logger.LogWarning(ex, "[BookService] There was an exception when opening epub book: {FileName}", filePath); } return null; } private static void AddBytesToBitmap(Bitmap bmp, byte[] rawBytes) { var rect = new Rectangle(0, 0, bmp.Width, bmp.Height); var bmpData = bmp.LockBits(rect, ImageLockMode.WriteOnly, bmp.PixelFormat); var pNative = bmpData.Scan0; Marshal.Copy(rawBytes, 0, pNative, rawBytes.Length); bmp.UnlockBits(bmpData); } public void ExtractPdfImages(string fileFilePath, string targetDirectory) { DirectoryService.ExistOrCreate(targetDirectory); using var docReader = DocLib.Instance.GetDocReader(fileFilePath, new PageDimensions(1080, 1920)); var pages = docReader.GetPageCount(); using var stream = StreamManager.GetStream("BookService.GetPdfPage"); for (var pageNumber = 0; pageNumber < pages; pageNumber++) { GetPdfPage(docReader, pageNumber, stream); File.WriteAllBytes(Path.Combine(targetDirectory, "Page-" + pageNumber + ".png"), stream.ToArray()); } } public byte[] GetCoverImage(string fileFilePath, bool createThumbnail = true) { if (!IsValidFile(fileFilePath)) return Array.Empty(); if (Parser.Parser.IsPdf(fileFilePath)) { return GetPdfCoverImage(fileFilePath, createThumbnail); } using var epubBook = EpubReader.OpenBook(fileFilePath); try { // Try to get the cover image from OPF file, if not set, try to parse it from all the files, then result to the first one. var coverImageContent = epubBook.Content.Cover ?? epubBook.Content.Images.Values.FirstOrDefault(file => Parser.Parser.IsCoverImage(file.FileName)) ?? epubBook.Content.Images.Values.FirstOrDefault(); if (coverImageContent == null) return Array.Empty(); if (!createThumbnail) return coverImageContent.ReadContent(); using var stream = StreamManager.GetStream("BookService.GetCoverImage", coverImageContent.ReadContent()); using var thumbnail = NetVips.Image.ThumbnailStream(stream, MetadataService.ThumbnailWidth); return thumbnail.WriteToBuffer(".jpg"); } catch (Exception ex) { _logger.LogWarning(ex, "[BookService] There was a critical error and prevented thumbnail generation on {BookFile}. Defaulting to no cover image", fileFilePath); } return Array.Empty(); } private byte[] GetPdfCoverImage(string fileFilePath, bool createThumbnail) { try { using var docReader = DocLib.Instance.GetDocReader(fileFilePath, new PageDimensions(1080, 1920)); if (docReader.GetPageCount() == 0) return Array.Empty(); using var stream = StreamManager.GetStream("BookService.GetPdfPage"); GetPdfPage(docReader, 0, stream); if (!createThumbnail) return stream.ToArray(); using var thumbnail = NetVips.Image.ThumbnailStream(stream, MetadataService.ThumbnailWidth); return thumbnail.WriteToBuffer(".png"); } catch (Exception ex) { _logger.LogWarning(ex, "[BookService] There was a critical error and prevented thumbnail generation on {BookFile}. Defaulting to no cover image", fileFilePath); } return Array.Empty(); } private static void GetPdfPage(IDocReader docReader, int pageNumber, Stream stream) { using var pageReader = docReader.GetPageReader(pageNumber); var rawBytes = pageReader.GetImage(new NaiveTransparencyRemover()); var width = pageReader.GetPageWidth(); var height = pageReader.GetPageHeight(); using var bmp = new Bitmap(width, height, PixelFormat.Format32bppArgb); AddBytesToBitmap(bmp, rawBytes); // Removes 1px margin on left/right side after bitmap is copied out for (var y = 0; y < bmp.Height; y++) { bmp.SetPixel(bmp.Width - 1, y, bmp.GetPixel(bmp.Width - 2, y)); } stream.Seek(0, SeekOrigin.Begin); bmp.Save(stream, ImageFormat.Jpeg); stream.Seek(0, SeekOrigin.Begin); } private static string RemoveWhiteSpaceFromStylesheets(string body) { body = Regex.Replace(body, @"[a-zA-Z]+#", "#"); body = Regex.Replace(body, @"[\n\r]+\s*", string.Empty); body = Regex.Replace(body, @"\s+", " "); body = Regex.Replace(body, @"\s?([:,;{}])\s?", "$1"); body = body.Replace(";}", "}"); body = Regex.Replace(body, @"([\s:]0)(px|pt|%|em)", "$1"); // Remove comments from CSS body = Regex.Replace(body, @"/\*[\d\D]*?\*/", string.Empty); return body; } } }