mirror of
https://github.com/Kareadita/Kavita.git
synced 2025-05-24 00:52:23 -04:00
# Added - Added support for PDFs within Kavita. PDFs will open in the Manga reader and you can read through them as images. PDFs are heavier than archives, so they may take longer to open for reading. (Fixes #187) # Changed - Changed: Major change in how Kavita libraries work. Kavita libraries will now allow for mixed media types, that means you can have raw images, archives, epubs, and pdfs all within your Manga library. In the case that the same Series exists between 2 different types of medias, they will be separated and an icon will show to help you identify the types. The correct reader will open regardless of what library you are on. Note: Nightly users need to delete their Raw Images libraries before updating. # Fixed - Fixed: Fixed an issue where checking if a file was modified since last scan always returned true, meaning we would do more I/O than was needed (Fixes #415) - Fixed: There wasn't enough spacing on the top menu bar on the Manga reader - Fixed: Fixed a bug where user preferences dark mode control always showed true, even if you were not using dark mode # Dev stuff - For image extraction, if there is only 1 image we will extract just that, else we will extract only images - Refactored all the Parser code out of the ScannerService into a self contained class. The class should be created for any scans, allowing multiple tasks to run without any chance of cross over. * Fixed indentation for cs files * Fixed an issue where the logic for if a file had been modified or not was not working and always saying modified, meaning we were doing more file I/O than needed. * Implemented the ability to have PDF books. No reader functionality. * Implemented a basic form of scanning for PDF files. Reworked Image based libraries to remove the need to separate in a special library and instead just work within the Manga/Comic library. * Removed the old library types. * Removed some extra code around old raw library types * Fully implemented PDF support into Kavita by using docnet. Removed old libraries we tried that did not work. PDFs take about 200ms to save the file to disk, so they are much slower than reading archives. * Refactored Libraries so that they can have any file extension and the UI will decide which reader to use. * Reworked the Series Parsing code. We now use a separate instance for each task call, so there should be no cross over if 2 tasks are running at the same time. Second, we now store Format with the Series, so we can have duplicate Series with the same name, but a different type of files underneath. * Fixed PDF transparency issues - Used this code to fix an issue when a PDF page doesn't have a background. https://github.com/GowenGit/docnet/issues/8#issuecomment-538985672 - This also fixes the same issue for cover images * Fixed an issue where if a raw image was in a directory with non-image files, those would get moved to cache when trying to open the file. * For image extraction, if there is only 1 image, just copy that to cache instead of everything else in the directory that is an image. * Add some spacing to the top menu bar * Added an icon to the card to showcase the type of file * Added a tag badge to the series detail page * Fixed a bug in user preferences where dark mode control would default to true, even if you weren't on it * Fixed some tests up * Some code smells Co-authored-by: Robbie Davis <robbie@therobbiedavis.com>
327 lines
15 KiB
C#
327 lines
15 KiB
C#
using System.Collections.Generic;
|
|
using System.Linq;
|
|
using System.Threading.Tasks;
|
|
using API.DTOs;
|
|
using API.Extensions;
|
|
using API.Interfaces;
|
|
using API.Interfaces.Services;
|
|
using API.Services;
|
|
using HtmlAgilityPack;
|
|
using Microsoft.AspNetCore.Mvc;
|
|
using Microsoft.Extensions.Logging;
|
|
using VersOne.Epub;
|
|
|
|
namespace API.Controllers
|
|
{
|
|
public class BookController : BaseApiController
|
|
{
|
|
private readonly ILogger<BookController> _logger;
|
|
private readonly IBookService _bookService;
|
|
private readonly IUnitOfWork _unitOfWork;
|
|
private static readonly string BookApiUrl = "book-resources?file=";
|
|
|
|
|
|
public BookController(ILogger<BookController> logger, IBookService bookService, IUnitOfWork unitOfWork)
|
|
{
|
|
_logger = logger;
|
|
_bookService = bookService;
|
|
_unitOfWork = unitOfWork;
|
|
}
|
|
|
|
[HttpGet("{chapterId}/book-info")]
|
|
public async Task<ActionResult<string>> GetBookInfo(int chapterId)
|
|
{
|
|
var chapter = await _unitOfWork.VolumeRepository.GetChapterAsync(chapterId);
|
|
using var book = await EpubReader.OpenBookAsync(chapter.Files.ElementAt(0).FilePath);
|
|
|
|
return book.Title;
|
|
}
|
|
|
|
[HttpGet("{chapterId}/book-resources")]
|
|
public async Task<ActionResult> GetBookPageResources(int chapterId, [FromQuery] string file)
|
|
{
|
|
var chapter = await _unitOfWork.VolumeRepository.GetChapterAsync(chapterId);
|
|
var book = await EpubReader.OpenBookAsync(chapter.Files.ElementAt(0).FilePath);
|
|
|
|
var key = BookService.CleanContentKeys(file);
|
|
if (!book.Content.AllFiles.ContainsKey(key)) return BadRequest("File was not found in book");
|
|
|
|
var bookFile = book.Content.AllFiles[key];
|
|
var content = await bookFile.ReadContentAsBytesAsync();
|
|
|
|
Response.AddCacheHeader(content);
|
|
var contentType = BookService.GetContentType(bookFile.ContentType);
|
|
return File(content, contentType, $"{chapterId}-{file}");
|
|
}
|
|
|
|
[HttpGet("{chapterId}/chapters")]
|
|
public async Task<ActionResult<ICollection<BookChapterItem>>> GetBookChapters(int chapterId)
|
|
{
|
|
// This will return a list of mappings from ID -> pagenum. ID will be the xhtml key and pagenum will be the reading order
|
|
// this is used to rewrite anchors in the book text so that we always load properly in FE
|
|
var chapter = await _unitOfWork.VolumeRepository.GetChapterAsync(chapterId);
|
|
using var book = await EpubReader.OpenBookAsync(chapter.Files.ElementAt(0).FilePath);
|
|
var mappings = await _bookService.CreateKeyToPageMappingAsync(book);
|
|
|
|
var navItems = await book.GetNavigationAsync();
|
|
var chaptersList = new List<BookChapterItem>();
|
|
|
|
foreach (var navigationItem in navItems)
|
|
{
|
|
if (navigationItem.NestedItems.Count > 0)
|
|
{
|
|
var nestedChapters = new List<BookChapterItem>();
|
|
|
|
foreach (var nestedChapter in navigationItem.NestedItems)
|
|
{
|
|
if (nestedChapter.Link == null) continue;
|
|
var key = BookService.CleanContentKeys(nestedChapter.Link.ContentFileName);
|
|
if (mappings.ContainsKey(key))
|
|
{
|
|
nestedChapters.Add(new BookChapterItem()
|
|
{
|
|
Title = nestedChapter.Title,
|
|
Page = mappings[key],
|
|
Part = nestedChapter.Link.Anchor ?? string.Empty,
|
|
Children = new List<BookChapterItem>()
|
|
});
|
|
}
|
|
}
|
|
|
|
if (navigationItem.Link == null)
|
|
{
|
|
var item = new BookChapterItem()
|
|
{
|
|
Title = navigationItem.Title,
|
|
Children = nestedChapters
|
|
};
|
|
if (nestedChapters.Count > 0)
|
|
{
|
|
item.Page = nestedChapters[0].Page;
|
|
}
|
|
chaptersList.Add(item);
|
|
}
|
|
else
|
|
{
|
|
var groupKey = BookService.CleanContentKeys(navigationItem.Link.ContentFileName);
|
|
if (mappings.ContainsKey(groupKey))
|
|
{
|
|
chaptersList.Add(new BookChapterItem()
|
|
{
|
|
Title = navigationItem.Title,
|
|
Page = mappings[groupKey],
|
|
Children = nestedChapters
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (chaptersList.Count == 0)
|
|
{
|
|
// Generate from TOC
|
|
var tocPage = book.Content.Html.Keys.FirstOrDefault(k => k.ToUpper().Contains("TOC"));
|
|
if (tocPage == null) return Ok(chaptersList);
|
|
|
|
// Find all anchor tags, for each anchor we get inner text, to lower then titlecase on UI. Get href and generate page content
|
|
var doc = new HtmlDocument();
|
|
var content = await book.Content.Html[tocPage].ReadContentAsync();
|
|
doc.LoadHtml(content);
|
|
var anchors = doc.DocumentNode.SelectNodes("//a");
|
|
if (anchors == null) return Ok(chaptersList);
|
|
|
|
foreach (var anchor in anchors)
|
|
{
|
|
if (anchor.Attributes.Contains("href"))
|
|
{
|
|
var key = BookService.CleanContentKeys(anchor.Attributes["href"].Value).Split("#")[0];
|
|
if (!mappings.ContainsKey(key))
|
|
{
|
|
// Fallback to searching for key (bad epub metadata)
|
|
var correctedKey = book.Content.Html.Keys.SingleOrDefault(s => s.EndsWith(key));
|
|
if (!string.IsNullOrEmpty(correctedKey))
|
|
{
|
|
key = correctedKey;
|
|
}
|
|
}
|
|
if (!string.IsNullOrEmpty(key) && mappings.ContainsKey(key))
|
|
{
|
|
var part = string.Empty;
|
|
if (anchor.Attributes["href"].Value.Contains("#"))
|
|
{
|
|
part = anchor.Attributes["href"].Value.Split("#")[1];
|
|
}
|
|
chaptersList.Add(new BookChapterItem()
|
|
{
|
|
Title = anchor.InnerText,
|
|
Page = mappings[key],
|
|
Part = part,
|
|
Children = new List<BookChapterItem>()
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
return Ok(chaptersList);
|
|
}
|
|
|
|
[HttpGet("{chapterId}/book-page")]
|
|
public async Task<ActionResult<string>> GetBookPage(int chapterId, [FromQuery] int page)
|
|
{
|
|
var chapter = await _unitOfWork.VolumeRepository.GetChapterAsync(chapterId);
|
|
|
|
using var book = await EpubReader.OpenBookAsync(chapter.Files.ElementAt(0).FilePath);
|
|
var mappings = await _bookService.CreateKeyToPageMappingAsync(book);
|
|
|
|
var counter = 0;
|
|
var doc = new HtmlDocument {OptionFixNestedTags = true};
|
|
var baseUrl = Request.Scheme + "://" + Request.Host + Request.PathBase + "/api/";
|
|
var apiBase = baseUrl + "book/" + chapterId + "/" + BookApiUrl;
|
|
var bookPages = await book.GetReadingOrderAsync();
|
|
foreach (var contentFileRef in bookPages)
|
|
{
|
|
if (page == counter)
|
|
{
|
|
var content = await contentFileRef.ReadContentAsync();
|
|
if (contentFileRef.ContentType != EpubContentType.XHTML_1_1) return Ok(content);
|
|
|
|
// In more cases than not, due to this being XML not HTML, we need to escape the script tags.
|
|
content = BookService.EscapeTags(content);
|
|
|
|
doc.LoadHtml(content);
|
|
var body = doc.DocumentNode.SelectSingleNode("//body");
|
|
|
|
if (body == null)
|
|
{
|
|
if (doc.ParseErrors.Any())
|
|
{
|
|
_logger.LogError("{FilePath} has an invalid html file (Page {PageName})", book.FilePath, contentFileRef.FileName);
|
|
foreach (var error in doc.ParseErrors)
|
|
{
|
|
_logger.LogError("Line {LineNumber}, Reason: {Reason}", error.Line, error.Reason);
|
|
}
|
|
|
|
return BadRequest("The file is malformed! Cannot read.");
|
|
}
|
|
_logger.LogError("{FilePath} has no body tag! Generating one for support. Book may be skewed", book.FilePath);
|
|
doc.DocumentNode.SelectSingleNode("/html").AppendChild(HtmlNode.CreateNode("<body></body>"));
|
|
body = doc.DocumentNode.SelectSingleNode("/html/body");
|
|
}
|
|
|
|
var inlineStyles = doc.DocumentNode.SelectNodes("//style");
|
|
if (inlineStyles != null)
|
|
{
|
|
foreach (var inlineStyle in inlineStyles)
|
|
{
|
|
var styleContent = await _bookService.ScopeStyles(inlineStyle.InnerHtml, apiBase, "", book);
|
|
body.PrependChild(HtmlNode.CreateNode($"<style>{styleContent}</style>"));
|
|
}
|
|
}
|
|
|
|
var styleNodes = doc.DocumentNode.SelectNodes("/html/head/link");
|
|
if (styleNodes != null)
|
|
{
|
|
foreach (var styleLinks in styleNodes)
|
|
{
|
|
var key = BookService.CleanContentKeys(styleLinks.Attributes["href"].Value);
|
|
// Some epubs are malformed the key in content.opf might be: content/resources/filelist_0_0.xml but the actual html links to resources/filelist_0_0.xml
|
|
// In this case, we will do a search for the key that ends with
|
|
if (!book.Content.Css.ContainsKey(key))
|
|
{
|
|
var correctedKey = book.Content.Css.Keys.SingleOrDefault(s => s.EndsWith(key));
|
|
if (correctedKey == null)
|
|
{
|
|
_logger.LogError("Epub is Malformed, key: {Key} is not matching OPF file", key);
|
|
continue;
|
|
}
|
|
|
|
key = correctedKey;
|
|
}
|
|
|
|
var styleContent = await _bookService.ScopeStyles(await book.Content.Css[key].ReadContentAsync(), apiBase, book.Content.Css[key].FileName, book);
|
|
body.PrependChild(HtmlNode.CreateNode($"<style>{styleContent}</style>"));
|
|
}
|
|
}
|
|
|
|
var anchors = doc.DocumentNode.SelectNodes("//a");
|
|
if (anchors != null)
|
|
{
|
|
foreach (var anchor in anchors)
|
|
{
|
|
BookService.UpdateLinks(anchor, mappings, page);
|
|
}
|
|
}
|
|
|
|
var images = doc.DocumentNode.SelectNodes("//img");
|
|
if (images != null)
|
|
{
|
|
foreach (var image in images)
|
|
{
|
|
if (image.Name != "img") continue;
|
|
|
|
// Need to do for xlink:href
|
|
if (image.Attributes["src"] != null)
|
|
{
|
|
var imageFile = image.Attributes["src"].Value;
|
|
if (!book.Content.Images.ContainsKey(imageFile))
|
|
{
|
|
var correctedKey = book.Content.Images.Keys.SingleOrDefault(s => s.EndsWith(imageFile));
|
|
if (correctedKey != null)
|
|
{
|
|
imageFile = correctedKey;
|
|
}
|
|
}
|
|
image.Attributes.Remove("src");
|
|
image.Attributes.Add("src", $"{apiBase}" + imageFile);
|
|
}
|
|
}
|
|
}
|
|
|
|
images = doc.DocumentNode.SelectNodes("//image");
|
|
if (images != null)
|
|
{
|
|
foreach (var image in images)
|
|
{
|
|
if (image.Name != "image") continue;
|
|
|
|
if (image.Attributes["xlink:href"] != null)
|
|
{
|
|
var imageFile = image.Attributes["xlink:href"].Value;
|
|
if (!book.Content.Images.ContainsKey(imageFile))
|
|
{
|
|
var correctedKey = book.Content.Images.Keys.SingleOrDefault(s => s.EndsWith(imageFile));
|
|
if (correctedKey != null)
|
|
{
|
|
imageFile = correctedKey;
|
|
}
|
|
}
|
|
image.Attributes.Remove("xlink:href");
|
|
image.Attributes.Add("xlink:href", $"{apiBase}" + imageFile);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Check if any classes on the html node (some r2l books do this) and move them to body tag for scoping
|
|
var htmlNode = doc.DocumentNode.SelectSingleNode("//html");
|
|
if (htmlNode != null && htmlNode.Attributes.Contains("class"))
|
|
{
|
|
var bodyClasses = body.Attributes.Contains("class") ? body.Attributes["class"].Value : string.Empty;
|
|
var classes = htmlNode.Attributes["class"].Value + " " + bodyClasses;
|
|
body.Attributes.Add("class", $"{classes}");
|
|
// I actually need the body tag itself for the classes, so i will create a div and put the body stuff there.
|
|
return Ok($"<div class=\"{body.Attributes["class"].Value}\">{body.InnerHtml}</div>");
|
|
}
|
|
|
|
|
|
return Ok(body.InnerHtml);
|
|
}
|
|
|
|
counter++;
|
|
}
|
|
|
|
return BadRequest("Could not find the appropriate html for that page");
|
|
}
|
|
}
|
|
}
|