mirror of
https://github.com/Kareadita/Kavita.git
synced 2025-05-24 00:52:23 -04:00
* Added better handling around when importing css files that are empty. Moved comment removal on css files to before some css whitespace cleanup to get better matches. * Some enhancements on the checks to see if we need the bottom action bar on reader. Now we don't query DOM and have something that works more reliably.
358 lines
16 KiB
C#
358 lines
16 KiB
C#
using System.Collections.Generic;
|
|
using System.Linq;
|
|
using System.Threading.Tasks;
|
|
using API.DTOs;
|
|
using API.DTOs.Reader;
|
|
using API.Entities.Enums;
|
|
using API.Extensions;
|
|
using API.Interfaces;
|
|
using API.Interfaces.Services;
|
|
using API.Services;
|
|
using HtmlAgilityPack;
|
|
using Microsoft.AspNetCore.Mvc;
|
|
using Microsoft.Extensions.Logging;
|
|
using VersOne.Epub;
|
|
|
|
namespace API.Controllers
|
|
{
|
|
public class BookController : BaseApiController
|
|
{
|
|
private readonly ILogger<BookController> _logger;
|
|
private readonly IBookService _bookService;
|
|
private readonly IUnitOfWork _unitOfWork;
|
|
private readonly ICacheService _cacheService;
|
|
private static readonly string BookApiUrl = "book-resources?file=";
|
|
|
|
|
|
public BookController(ILogger<BookController> logger, IBookService bookService, IUnitOfWork unitOfWork, ICacheService cacheService)
|
|
{
|
|
_logger = logger;
|
|
_bookService = bookService;
|
|
_unitOfWork = unitOfWork;
|
|
_cacheService = cacheService;
|
|
}
|
|
|
|
[HttpGet("{chapterId}/book-info")]
|
|
public async Task<ActionResult<BookInfoDto>> GetBookInfo(int chapterId)
|
|
{
|
|
var dto = await _unitOfWork.ChapterRepository.GetChapterInfoDtoAsync(chapterId);
|
|
var bookTitle = string.Empty;
|
|
if (dto.SeriesFormat == MangaFormat.Epub)
|
|
{
|
|
var mangaFile = (await _unitOfWork.ChapterRepository.GetFilesForChapterAsync(chapterId)).First();
|
|
using var book = await EpubReader.OpenBookAsync(mangaFile.FilePath);
|
|
bookTitle = book.Title;
|
|
}
|
|
|
|
return Ok(new BookInfoDto()
|
|
{
|
|
ChapterNumber = dto.ChapterNumber,
|
|
VolumeNumber = dto.VolumeNumber,
|
|
VolumeId = dto.VolumeId,
|
|
BookTitle = bookTitle,
|
|
SeriesName = dto.SeriesName,
|
|
SeriesFormat = dto.SeriesFormat,
|
|
SeriesId = dto.SeriesId,
|
|
LibraryId = dto.LibraryId,
|
|
IsSpecial = dto.IsSpecial,
|
|
Pages = dto.Pages,
|
|
});
|
|
}
|
|
|
|
[HttpGet("{chapterId}/book-resources")]
|
|
public async Task<ActionResult> GetBookPageResources(int chapterId, [FromQuery] string file)
|
|
{
|
|
var chapter = await _unitOfWork.ChapterRepository.GetChapterAsync(chapterId);
|
|
var book = await EpubReader.OpenBookAsync(chapter.Files.ElementAt(0).FilePath);
|
|
|
|
var key = BookService.CleanContentKeys(file);
|
|
if (!book.Content.AllFiles.ContainsKey(key)) return BadRequest("File was not found in book");
|
|
|
|
var bookFile = book.Content.AllFiles[key];
|
|
var content = await bookFile.ReadContentAsBytesAsync();
|
|
|
|
Response.AddCacheHeader(content);
|
|
var contentType = BookService.GetContentType(bookFile.ContentType);
|
|
return File(content, contentType, $"{chapterId}-{file}");
|
|
}
|
|
|
|
[HttpGet("{chapterId}/chapters")]
|
|
public async Task<ActionResult<ICollection<BookChapterItem>>> GetBookChapters(int chapterId)
|
|
{
|
|
// This will return a list of mappings from ID -> pagenum. ID will be the xhtml key and pagenum will be the reading order
|
|
// this is used to rewrite anchors in the book text so that we always load properly in FE
|
|
var chapter = await _unitOfWork.ChapterRepository.GetChapterAsync(chapterId);
|
|
using var book = await EpubReader.OpenBookAsync(chapter.Files.ElementAt(0).FilePath);
|
|
var mappings = await _bookService.CreateKeyToPageMappingAsync(book);
|
|
|
|
var navItems = await book.GetNavigationAsync();
|
|
var chaptersList = new List<BookChapterItem>();
|
|
|
|
foreach (var navigationItem in navItems)
|
|
{
|
|
if (navigationItem.NestedItems.Count > 0)
|
|
{
|
|
var nestedChapters = new List<BookChapterItem>();
|
|
|
|
foreach (var nestedChapter in navigationItem.NestedItems)
|
|
{
|
|
if (nestedChapter.Link == null) continue;
|
|
var key = BookService.CleanContentKeys(nestedChapter.Link.ContentFileName);
|
|
if (mappings.ContainsKey(key))
|
|
{
|
|
nestedChapters.Add(new BookChapterItem()
|
|
{
|
|
Title = nestedChapter.Title,
|
|
Page = mappings[key],
|
|
Part = nestedChapter.Link.Anchor ?? string.Empty,
|
|
Children = new List<BookChapterItem>()
|
|
});
|
|
}
|
|
}
|
|
|
|
if (navigationItem.Link == null)
|
|
{
|
|
var item = new BookChapterItem()
|
|
{
|
|
Title = navigationItem.Title,
|
|
Children = nestedChapters
|
|
};
|
|
if (nestedChapters.Count > 0)
|
|
{
|
|
item.Page = nestedChapters[0].Page;
|
|
}
|
|
chaptersList.Add(item);
|
|
}
|
|
else
|
|
{
|
|
var groupKey = BookService.CleanContentKeys(navigationItem.Link.ContentFileName);
|
|
if (mappings.ContainsKey(groupKey))
|
|
{
|
|
chaptersList.Add(new BookChapterItem()
|
|
{
|
|
Title = navigationItem.Title,
|
|
Page = mappings[groupKey],
|
|
Children = nestedChapters
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (chaptersList.Count == 0)
|
|
{
|
|
// Generate from TOC
|
|
var tocPage = book.Content.Html.Keys.FirstOrDefault(k => k.ToUpper().Contains("TOC"));
|
|
if (tocPage == null) return Ok(chaptersList);
|
|
|
|
// Find all anchor tags, for each anchor we get inner text, to lower then titlecase on UI. Get href and generate page content
|
|
var doc = new HtmlDocument();
|
|
var content = await book.Content.Html[tocPage].ReadContentAsync();
|
|
doc.LoadHtml(content);
|
|
var anchors = doc.DocumentNode.SelectNodes("//a");
|
|
if (anchors == null) return Ok(chaptersList);
|
|
|
|
foreach (var anchor in anchors)
|
|
{
|
|
if (anchor.Attributes.Contains("href"))
|
|
{
|
|
var key = BookService.CleanContentKeys(anchor.Attributes["href"].Value).Split("#")[0];
|
|
if (!mappings.ContainsKey(key))
|
|
{
|
|
// Fallback to searching for key (bad epub metadata)
|
|
var correctedKey = book.Content.Html.Keys.SingleOrDefault(s => s.EndsWith(key));
|
|
if (!string.IsNullOrEmpty(correctedKey))
|
|
{
|
|
key = correctedKey;
|
|
}
|
|
}
|
|
if (!string.IsNullOrEmpty(key) && mappings.ContainsKey(key))
|
|
{
|
|
var part = string.Empty;
|
|
if (anchor.Attributes["href"].Value.Contains("#"))
|
|
{
|
|
part = anchor.Attributes["href"].Value.Split("#")[1];
|
|
}
|
|
chaptersList.Add(new BookChapterItem()
|
|
{
|
|
Title = anchor.InnerText,
|
|
Page = mappings[key],
|
|
Part = part,
|
|
Children = new List<BookChapterItem>()
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
return Ok(chaptersList);
|
|
}
|
|
|
|
[HttpGet("{chapterId}/book-page")]
|
|
public async Task<ActionResult<string>> GetBookPage(int chapterId, [FromQuery] int page)
|
|
{
|
|
var chapter = await _cacheService.Ensure(chapterId);
|
|
var path = _cacheService.GetCachedEpubFile(chapter.Id, chapter);
|
|
|
|
|
|
using var book = await EpubReader.OpenBookAsync(path);
|
|
var mappings = await _bookService.CreateKeyToPageMappingAsync(book);
|
|
|
|
var counter = 0;
|
|
var doc = new HtmlDocument {OptionFixNestedTags = true};
|
|
var baseUrl = Request.Scheme + "://" + Request.Host + Request.PathBase + "/api/";
|
|
var apiBase = baseUrl + "book/" + chapterId + "/" + BookApiUrl;
|
|
var bookPages = await book.GetReadingOrderAsync();
|
|
foreach (var contentFileRef in bookPages)
|
|
{
|
|
if (page == counter)
|
|
{
|
|
var content = await contentFileRef.ReadContentAsync();
|
|
if (contentFileRef.ContentType != EpubContentType.XHTML_1_1) return Ok(content);
|
|
|
|
// In more cases than not, due to this being XML not HTML, we need to escape the script tags.
|
|
content = BookService.EscapeTags(content);
|
|
|
|
doc.LoadHtml(content);
|
|
var body = doc.DocumentNode.SelectSingleNode("//body");
|
|
|
|
if (body == null)
|
|
{
|
|
if (doc.ParseErrors.Any())
|
|
{
|
|
LogBookErrors(book, contentFileRef, doc);
|
|
return BadRequest("The file is malformed! Cannot read.");
|
|
}
|
|
_logger.LogError("{FilePath} has no body tag! Generating one for support. Book may be skewed", book.FilePath);
|
|
doc.DocumentNode.SelectSingleNode("/html").AppendChild(HtmlNode.CreateNode("<body></body>"));
|
|
body = doc.DocumentNode.SelectSingleNode("/html/body");
|
|
}
|
|
|
|
var inlineStyles = doc.DocumentNode.SelectNodes("//style");
|
|
if (inlineStyles != null)
|
|
{
|
|
foreach (var inlineStyle in inlineStyles)
|
|
{
|
|
var styleContent = await _bookService.ScopeStyles(inlineStyle.InnerHtml, apiBase, "", book);
|
|
body.PrependChild(HtmlNode.CreateNode($"<style>{styleContent}</style>"));
|
|
}
|
|
}
|
|
|
|
var styleNodes = doc.DocumentNode.SelectNodes("/html/head/link");
|
|
if (styleNodes != null)
|
|
{
|
|
foreach (var styleLinks in styleNodes)
|
|
{
|
|
var key = BookService.CleanContentKeys(styleLinks.Attributes["href"].Value);
|
|
// Some epubs are malformed the key in content.opf might be: content/resources/filelist_0_0.xml but the actual html links to resources/filelist_0_0.xml
|
|
// In this case, we will do a search for the key that ends with
|
|
if (!book.Content.Css.ContainsKey(key))
|
|
{
|
|
var correctedKey = book.Content.Css.Keys.SingleOrDefault(s => s.EndsWith(key));
|
|
if (correctedKey == null)
|
|
{
|
|
_logger.LogError("Epub is Malformed, key: {Key} is not matching OPF file", key);
|
|
continue;
|
|
}
|
|
|
|
key = correctedKey;
|
|
}
|
|
|
|
var styleContent = await _bookService.ScopeStyles(await book.Content.Css[key].ReadContentAsync(), apiBase, book.Content.Css[key].FileName, book);
|
|
if (styleContent != null)
|
|
{
|
|
body.PrependChild(HtmlNode.CreateNode($"<style>{styleContent}</style>"));
|
|
}
|
|
}
|
|
}
|
|
|
|
var anchors = doc.DocumentNode.SelectNodes("//a");
|
|
if (anchors != null)
|
|
{
|
|
foreach (var anchor in anchors)
|
|
{
|
|
BookService.UpdateLinks(anchor, mappings, page);
|
|
}
|
|
}
|
|
|
|
var images = doc.DocumentNode.SelectNodes("//img");
|
|
if (images != null)
|
|
{
|
|
foreach (var image in images)
|
|
{
|
|
if (image.Name != "img") continue;
|
|
|
|
// Need to do for xlink:href
|
|
if (image.Attributes["src"] != null)
|
|
{
|
|
var imageFile = image.Attributes["src"].Value;
|
|
if (!book.Content.Images.ContainsKey(imageFile))
|
|
{
|
|
var correctedKey = book.Content.Images.Keys.SingleOrDefault(s => s.EndsWith(imageFile));
|
|
if (correctedKey != null)
|
|
{
|
|
imageFile = correctedKey;
|
|
}
|
|
}
|
|
image.Attributes.Remove("src");
|
|
image.Attributes.Add("src", $"{apiBase}" + imageFile);
|
|
}
|
|
}
|
|
}
|
|
|
|
images = doc.DocumentNode.SelectNodes("//image");
|
|
if (images != null)
|
|
{
|
|
foreach (var image in images)
|
|
{
|
|
if (image.Name != "image") continue;
|
|
|
|
if (image.Attributes["xlink:href"] != null)
|
|
{
|
|
var imageFile = image.Attributes["xlink:href"].Value;
|
|
if (!book.Content.Images.ContainsKey(imageFile))
|
|
{
|
|
var correctedKey = book.Content.Images.Keys.SingleOrDefault(s => s.EndsWith(imageFile));
|
|
if (correctedKey != null)
|
|
{
|
|
imageFile = correctedKey;
|
|
}
|
|
}
|
|
image.Attributes.Remove("xlink:href");
|
|
image.Attributes.Add("xlink:href", $"{apiBase}" + imageFile);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Check if any classes on the html node (some r2l books do this) and move them to body tag for scoping
|
|
var htmlNode = doc.DocumentNode.SelectSingleNode("//html");
|
|
if (htmlNode != null && htmlNode.Attributes.Contains("class"))
|
|
{
|
|
var bodyClasses = body.Attributes.Contains("class") ? body.Attributes["class"].Value : string.Empty;
|
|
var classes = htmlNode.Attributes["class"].Value + " " + bodyClasses;
|
|
body.Attributes.Add("class", $"{classes}");
|
|
// I actually need the body tag itself for the classes, so i will create a div and put the body stuff there.
|
|
return Ok($"<div class=\"{body.Attributes["class"].Value}\">{body.InnerHtml}</div>");
|
|
}
|
|
|
|
|
|
return Ok(body.InnerHtml);
|
|
}
|
|
|
|
counter++;
|
|
}
|
|
|
|
return BadRequest("Could not find the appropriate html for that page");
|
|
}
|
|
|
|
private void LogBookErrors(EpubBookRef book, EpubTextContentFileRef contentFileRef, HtmlDocument doc)
|
|
{
|
|
_logger.LogError("{FilePath} has an invalid html file (Page {PageName})", book.FilePath, contentFileRef.FileName);
|
|
foreach (var error in doc.ParseErrors)
|
|
{
|
|
_logger.LogError("Line {LineNumber}, Reason: {Reason}", error.Line, error.Reason);
|
|
}
|
|
}
|
|
}
|
|
}
|