diff --git a/API/Controllers/BookController.cs b/API/Controllers/BookController.cs index a2af28ab6..e5a980467 100644 --- a/API/Controllers/BookController.cs +++ b/API/Controllers/BookController.cs @@ -186,6 +186,9 @@ namespace API.Controllers var content = await contentFileRef.ReadContentAsync(); if (contentFileRef.ContentType != EpubContentType.XHTML_1_1) return Ok(content); + // In more cases than not, due to this being XML not HTML, we need to escape the script tags. + content = BookService.EscapeTags(content); + doc.LoadHtml(content); var body = doc.DocumentNode.SelectSingleNode("//body"); diff --git a/API/Parser/Parser.cs b/API/Parser/Parser.cs index 149d5a940..efcb6c550 100644 --- a/API/Parser/Parser.cs +++ b/API/Parser/Parser.cs @@ -15,7 +15,7 @@ namespace API.Parser public const string ArchiveFileExtensions = @"\.cbz|\.zip|\.rar|\.cbr|\.tar.gz|\.7zip|\.7z|.cb7"; public const string BookFileExtensions = @"\.epub"; public const string ImageFileExtensions = @"^(\.png|\.jpeg|\.jpg)"; - public static readonly Regex FontSrcUrlRegex = new Regex("(src:url\\(\"?'?)([a-z0-9/\\._]+)(\"?'?\\))", RegexOptions.IgnoreCase | RegexOptions.Compiled); + public static readonly Regex FontSrcUrlRegex = new Regex(@"(src:url\(.{1})" + "([^\"']*)" + @"(.{1}\))", RegexOptions.IgnoreCase | RegexOptions.Compiled); public static readonly Regex CssImportUrlRegex = new Regex("(@import\\s[\"|'])(?[\\w\\d/\\._-]+)([\"|'];?)", RegexOptions.IgnoreCase | RegexOptions.Compiled); private static readonly string XmlRegexExtensions = @"\.xml"; diff --git a/API/Services/BookService.cs b/API/Services/BookService.cs index 3d35919f0..b0252f122 100644 --- a/API/Services/BookService.cs +++ b/API/Services/BookService.cs @@ -23,7 +23,7 @@ namespace API.Services private const int ThumbnailWidth = 320; // 153w x 230h private readonly StylesheetParser _cssParser = new (); - + public BookService(ILogger logger) { _logger = logger; @@ -204,6 +204,13 @@ namespace API.Services return 0; } + public static string EscapeTags(string content) + { + content = Regex.Replace(content, @")", ""); + content = Regex.Replace(content, @")", ""); + return content; + } + public static string CleanContentKeys(string key) { return key.Replace("../", string.Empty); @@ -241,14 +248,23 @@ namespace API.Services // // If all three are present, we can take that over dc:title and format as: // Series = The Dark Tower, Volume = 5, Filename as "Wolves of the Calla" + // In addition, the following can exist and should parse as a series (EPUB 3.2 spec) + // + // The Lord of the Rings + // + // set + // 2 try { - string seriesIndex = string.Empty; - string series = string.Empty; - string specialName = string.Empty; + var seriesIndex = string.Empty; + var series = string.Empty; + var specialName = string.Empty; + var groupPosition = string.Empty; + foreach (var metadataItem in epubBook.Schema.Package.Metadata.MetaItems) { + // EPUB 2 and 3 switch (metadataItem.Name) { case "calibre:series_index": @@ -261,10 +277,29 @@ namespace API.Services specialName = metadataItem.Content; break; } + + // EPUB 3.2+ only + switch (metadataItem.Property) + { + case "group-position": + seriesIndex = metadataItem.Content; + break; + case "belongs-to-collection": + series = metadataItem.Content; + break; + case "collection-type": + groupPosition = metadataItem.Content; + break; + } } - if (!string.IsNullOrEmpty(series) && !string.IsNullOrEmpty(seriesIndex) && !string.IsNullOrEmpty(specialName)) + if (!string.IsNullOrEmpty(series) && !string.IsNullOrEmpty(seriesIndex) && + (!string.IsNullOrEmpty(specialName) || groupPosition.Equals("series") || groupPosition.Equals("set"))) { + if (string.IsNullOrEmpty(specialName)) + { + specialName = epubBook.Title; + } return new ParserInfo() { Chapters = "0",