Kavita/API/Controllers/BookController.cs
Joseph Milazzo cf7a9aa71e
Reading Lists & More (#564)
* Added continous reading to the book reader. Clicking on the max pages to right of progress bar will now go to last page.

* Forgot a file for continous book reading

* Fixed up some code regarding transitioning between chapters. Arrows now show to represent a chapter transition.

* Laid the foundation for reading lists

* All foundation is laid out. Actions are wired in the UI. Backend repository is setup. Redid the migration to have ReadingList track modification so we can order them for the user.

* Updated add modal to have basic skeleton

* Hooked up ability to fetch reading lists from backend

* Made a huge performance improvement to GetChapterIdsForSeriesAsync() by reducing a JOIN and an iteration loop. Improvement went from 2 seconds -> 200 ms.

* Implemented the ability to add all chapters in a series to a reading list.

* Fixed issue with adding new items to reading list not being in a logical order. Lots of work on getting all the information around the reading list view. Added some foreign keys back to chapter so delete should clean up after itself.

* Added ability to open directly the series

* Reading List Items now have progress attached

* Hooked up list deletion and added a case where if doesn't exist on load, then redirect to library.

* Lots of changes. Introduced a dashboard component for the main app. This will sit on libraries route for now and will have 3 tabs to show different sections.

Moved libraries reel down to bottom as people are more likely to access recently added or in progress than explore their whole library.

Note: Bundles are messed up, they need to be reoptimized and routes need to be updated.

* Added pagination to the reading lists api and implemented a page to show all lists

* Cleaned up old code from all-collections component so now it only handles all collections and doesn't have the old code for an individual collection

* Hooked in actions and navigation on reading lists

* When the user re-arranges items, they are now persisted

* Implemented remove read, but performance is pretty poor. Needs to be optimized.

* Lots of API fixes for adding items to a series, returning items, etc. Committing before fixing incorrect fetches of items for a readingListId.

* Rewrote the joins for GetReadingListItemDtosByIdAsync() to not return extra records.

* Remove bug marker now that it is fixed

* Refactor update-by-series to move more of the code to a re-usable function for update-by-volume/chapter APIs

* Implemented the ability to add via series, volume or chapter.

* Added OPDS support for reading lists. This included adding VolumeId to the ReadingListDto.

* Fixed a bug with deleting items

* After we create a library inform user that a scan has started

* Added some extra help information for users on directory picker, since linux users were getting confused.

* Setup for the reading functionality

* Fixed an issue where opening the edit series modal and pressing save without doing anything would empty collection tags. Would happen often when editing cover images.

* Fixed get-next-chapter for reading list. Refactored all methods to use the new GetUserIdByUsernameAsync(), which is much faster and uses less memory.

* Hooked in prev chapter for continuous reading with reading list

* Hooked up the read code for manga reader and book reader to have list id passed

* Manga reader now functions completely with reading lists

* Implemented reading list and incognito mode into book reader

* Refactored some common reading code into reader service

* Added support for "Series -  - Vol. 03 Ch. 023.5 - Volume 3 Extras.cbz" format that can occur with FMD2.

* Implemented continuous reading with a reading list between different readers. This incurs a 3x performance hit on the book info api.

* style changes. Don't emit an event if position of draggable item hasn't changed

* Styling and added the edit reading list flow.

* Cleaned up some extra spaces when actionables isn't shown. Lots of cleanup for promoted lists.

* Refactored some filter code to a common service

* Added an RBS check in getting Items for a given user.

* Code smells

* More smells
2021-09-08 12:03:27 -05:00

356 lines
16 KiB
C#

using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using API.DTOs;
using API.DTOs.Reader;
using API.Entities.Enums;
using API.Extensions;
using API.Interfaces;
using API.Interfaces.Services;
using API.Services;
using HtmlAgilityPack;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Extensions.Logging;
using VersOne.Epub;
namespace API.Controllers
{
public class BookController : BaseApiController
{
private readonly ILogger<BookController> _logger;
private readonly IBookService _bookService;
private readonly IUnitOfWork _unitOfWork;
private readonly ICacheService _cacheService;
private static readonly string BookApiUrl = "book-resources?file=";
public BookController(ILogger<BookController> logger, IBookService bookService, IUnitOfWork unitOfWork, ICacheService cacheService)
{
_logger = logger;
_bookService = bookService;
_unitOfWork = unitOfWork;
_cacheService = cacheService;
}
[HttpGet("{chapterId}/book-info")]
public async Task<ActionResult<BookInfoDto>> GetBookInfo(int chapterId)
{
// PERF: Write this in one DB call - This does not meet NFR
var chapter = await _unitOfWork.VolumeRepository.GetChapterAsync(chapterId);
var volume = await _unitOfWork.SeriesRepository.GetVolumeDtoAsync(chapter.VolumeId);
if (volume == null) return BadRequest("Could not find Volume");
var series = await _unitOfWork.SeriesRepository.GetSeriesByIdAsync(volume.SeriesId);
if (series == null) return BadRequest("Series could not be found");
var bookTitle = string.Empty;
if (series.Format == MangaFormat.Epub)
{
using var book = await EpubReader.OpenBookAsync(chapter.Files.ElementAt(0).FilePath);
bookTitle = book.Title;
}
return new BookInfoDto()
{
BookTitle = bookTitle,
VolumeId = chapter.VolumeId,
SeriesFormat = series.Format,
SeriesId = series.Id,
LibraryId = series.LibraryId,
};
}
[HttpGet("{chapterId}/book-resources")]
public async Task<ActionResult> GetBookPageResources(int chapterId, [FromQuery] string file)
{
var chapter = await _unitOfWork.VolumeRepository.GetChapterAsync(chapterId);
var book = await EpubReader.OpenBookAsync(chapter.Files.ElementAt(0).FilePath);
var key = BookService.CleanContentKeys(file);
if (!book.Content.AllFiles.ContainsKey(key)) return BadRequest("File was not found in book");
var bookFile = book.Content.AllFiles[key];
var content = await bookFile.ReadContentAsBytesAsync();
Response.AddCacheHeader(content);
var contentType = BookService.GetContentType(bookFile.ContentType);
return File(content, contentType, $"{chapterId}-{file}");
}
[HttpGet("{chapterId}/chapters")]
public async Task<ActionResult<ICollection<BookChapterItem>>> GetBookChapters(int chapterId)
{
// This will return a list of mappings from ID -> pagenum. ID will be the xhtml key and pagenum will be the reading order
// this is used to rewrite anchors in the book text so that we always load properly in FE
var chapter = await _unitOfWork.VolumeRepository.GetChapterAsync(chapterId);
using var book = await EpubReader.OpenBookAsync(chapter.Files.ElementAt(0).FilePath);
var mappings = await _bookService.CreateKeyToPageMappingAsync(book);
var navItems = await book.GetNavigationAsync();
var chaptersList = new List<BookChapterItem>();
foreach (var navigationItem in navItems)
{
if (navigationItem.NestedItems.Count > 0)
{
var nestedChapters = new List<BookChapterItem>();
foreach (var nestedChapter in navigationItem.NestedItems)
{
if (nestedChapter.Link == null) continue;
var key = BookService.CleanContentKeys(nestedChapter.Link.ContentFileName);
if (mappings.ContainsKey(key))
{
nestedChapters.Add(new BookChapterItem()
{
Title = nestedChapter.Title,
Page = mappings[key],
Part = nestedChapter.Link.Anchor ?? string.Empty,
Children = new List<BookChapterItem>()
});
}
}
if (navigationItem.Link == null)
{
var item = new BookChapterItem()
{
Title = navigationItem.Title,
Children = nestedChapters
};
if (nestedChapters.Count > 0)
{
item.Page = nestedChapters[0].Page;
}
chaptersList.Add(item);
}
else
{
var groupKey = BookService.CleanContentKeys(navigationItem.Link.ContentFileName);
if (mappings.ContainsKey(groupKey))
{
chaptersList.Add(new BookChapterItem()
{
Title = navigationItem.Title,
Page = mappings[groupKey],
Children = nestedChapters
});
}
}
}
}
if (chaptersList.Count == 0)
{
// Generate from TOC
var tocPage = book.Content.Html.Keys.FirstOrDefault(k => k.ToUpper().Contains("TOC"));
if (tocPage == null) return Ok(chaptersList);
// Find all anchor tags, for each anchor we get inner text, to lower then titlecase on UI. Get href and generate page content
var doc = new HtmlDocument();
var content = await book.Content.Html[tocPage].ReadContentAsync();
doc.LoadHtml(content);
var anchors = doc.DocumentNode.SelectNodes("//a");
if (anchors == null) return Ok(chaptersList);
foreach (var anchor in anchors)
{
if (anchor.Attributes.Contains("href"))
{
var key = BookService.CleanContentKeys(anchor.Attributes["href"].Value).Split("#")[0];
if (!mappings.ContainsKey(key))
{
// Fallback to searching for key (bad epub metadata)
var correctedKey = book.Content.Html.Keys.SingleOrDefault(s => s.EndsWith(key));
if (!string.IsNullOrEmpty(correctedKey))
{
key = correctedKey;
}
}
if (!string.IsNullOrEmpty(key) && mappings.ContainsKey(key))
{
var part = string.Empty;
if (anchor.Attributes["href"].Value.Contains("#"))
{
part = anchor.Attributes["href"].Value.Split("#")[1];
}
chaptersList.Add(new BookChapterItem()
{
Title = anchor.InnerText,
Page = mappings[key],
Part = part,
Children = new List<BookChapterItem>()
});
}
}
}
}
return Ok(chaptersList);
}
[HttpGet("{chapterId}/book-page")]
public async Task<ActionResult<string>> GetBookPage(int chapterId, [FromQuery] int page)
{
var chapter = await _cacheService.Ensure(chapterId);
var path = _cacheService.GetCachedEpubFile(chapter.Id, chapter);
using var book = await EpubReader.OpenBookAsync(path);
var mappings = await _bookService.CreateKeyToPageMappingAsync(book);
var counter = 0;
var doc = new HtmlDocument {OptionFixNestedTags = true};
var baseUrl = Request.Scheme + "://" + Request.Host + Request.PathBase + "/api/";
var apiBase = baseUrl + "book/" + chapterId + "/" + BookApiUrl;
var bookPages = await book.GetReadingOrderAsync();
foreach (var contentFileRef in bookPages)
{
if (page == counter)
{
var content = await contentFileRef.ReadContentAsync();
if (contentFileRef.ContentType != EpubContentType.XHTML_1_1) return Ok(content);
// In more cases than not, due to this being XML not HTML, we need to escape the script tags.
content = BookService.EscapeTags(content);
doc.LoadHtml(content);
var body = doc.DocumentNode.SelectSingleNode("//body");
if (body == null)
{
if (doc.ParseErrors.Any())
{
LogBookErrors(book, contentFileRef, doc);
return BadRequest("The file is malformed! Cannot read.");
}
_logger.LogError("{FilePath} has no body tag! Generating one for support. Book may be skewed", book.FilePath);
doc.DocumentNode.SelectSingleNode("/html").AppendChild(HtmlNode.CreateNode("<body></body>"));
body = doc.DocumentNode.SelectSingleNode("/html/body");
}
var inlineStyles = doc.DocumentNode.SelectNodes("//style");
if (inlineStyles != null)
{
foreach (var inlineStyle in inlineStyles)
{
var styleContent = await _bookService.ScopeStyles(inlineStyle.InnerHtml, apiBase, "", book);
body.PrependChild(HtmlNode.CreateNode($"<style>{styleContent}</style>"));
}
}
var styleNodes = doc.DocumentNode.SelectNodes("/html/head/link");
if (styleNodes != null)
{
foreach (var styleLinks in styleNodes)
{
var key = BookService.CleanContentKeys(styleLinks.Attributes["href"].Value);
// Some epubs are malformed the key in content.opf might be: content/resources/filelist_0_0.xml but the actual html links to resources/filelist_0_0.xml
// In this case, we will do a search for the key that ends with
if (!book.Content.Css.ContainsKey(key))
{
var correctedKey = book.Content.Css.Keys.SingleOrDefault(s => s.EndsWith(key));
if (correctedKey == null)
{
_logger.LogError("Epub is Malformed, key: {Key} is not matching OPF file", key);
continue;
}
key = correctedKey;
}
var styleContent = await _bookService.ScopeStyles(await book.Content.Css[key].ReadContentAsync(), apiBase, book.Content.Css[key].FileName, book);
body.PrependChild(HtmlNode.CreateNode($"<style>{styleContent}</style>"));
}
}
var anchors = doc.DocumentNode.SelectNodes("//a");
if (anchors != null)
{
foreach (var anchor in anchors)
{
BookService.UpdateLinks(anchor, mappings, page);
}
}
var images = doc.DocumentNode.SelectNodes("//img");
if (images != null)
{
foreach (var image in images)
{
if (image.Name != "img") continue;
// Need to do for xlink:href
if (image.Attributes["src"] != null)
{
var imageFile = image.Attributes["src"].Value;
if (!book.Content.Images.ContainsKey(imageFile))
{
var correctedKey = book.Content.Images.Keys.SingleOrDefault(s => s.EndsWith(imageFile));
if (correctedKey != null)
{
imageFile = correctedKey;
}
}
image.Attributes.Remove("src");
image.Attributes.Add("src", $"{apiBase}" + imageFile);
}
}
}
images = doc.DocumentNode.SelectNodes("//image");
if (images != null)
{
foreach (var image in images)
{
if (image.Name != "image") continue;
if (image.Attributes["xlink:href"] != null)
{
var imageFile = image.Attributes["xlink:href"].Value;
if (!book.Content.Images.ContainsKey(imageFile))
{
var correctedKey = book.Content.Images.Keys.SingleOrDefault(s => s.EndsWith(imageFile));
if (correctedKey != null)
{
imageFile = correctedKey;
}
}
image.Attributes.Remove("xlink:href");
image.Attributes.Add("xlink:href", $"{apiBase}" + imageFile);
}
}
}
// Check if any classes on the html node (some r2l books do this) and move them to body tag for scoping
var htmlNode = doc.DocumentNode.SelectSingleNode("//html");
if (htmlNode != null && htmlNode.Attributes.Contains("class"))
{
var bodyClasses = body.Attributes.Contains("class") ? body.Attributes["class"].Value : string.Empty;
var classes = htmlNode.Attributes["class"].Value + " " + bodyClasses;
body.Attributes.Add("class", $"{classes}");
// I actually need the body tag itself for the classes, so i will create a div and put the body stuff there.
return Ok($"<div class=\"{body.Attributes["class"].Value}\">{body.InnerHtml}</div>");
}
return Ok(body.InnerHtml);
}
counter++;
}
return BadRequest("Could not find the appropriate html for that page");
}
private void LogBookErrors(EpubBookRef book, EpubTextContentFileRef contentFileRef, HtmlDocument doc)
{
_logger.LogError("{FilePath} has an invalid html file (Page {PageName})", book.FilePath, contentFileRef.FileName);
foreach (var error in doc.ParseErrors)
{
_logger.LogError("Line {LineNumber}, Reason: {Reason}", error.Line, error.Reason);
}
}
}
}