From 60dd66f6ae28248078fc5b7e2f167691533146e3 Mon Sep 17 00:00:00 2001 From: Joseph Milazzo Date: Mon, 18 Oct 2021 16:28:07 -0700 Subject: [PATCH] EPUB CSS Parsing Issues (#690) * WIP. Rewrote some of the Regex to better support css escaping. We now escape background-image, border-image, and list-style-image within css files. * Added position relative to help with positioning on books that are just absolute positioned elements. * When there is absolute positioning, like in some epub based comics, supress the bottom action bar since it wont render in the correct location. * Fixed tests * Commented out tests --- API.Tests/Parser/BookParserTests.cs | 19 ++++++++++++ API/Parser/Parser.cs | 18 +++++++++-- API/Services/BookService.cs | 30 +++++++++++++++---- .../book-reader/book-reader.component.html | 3 ++ .../book-reader/book-reader.component.scss | 5 ++++ .../book-reader/book-reader.component.ts | 14 ++++++++- 6 files changed, 81 insertions(+), 8 deletions(-) diff --git a/API.Tests/Parser/BookParserTests.cs b/API.Tests/Parser/BookParserTests.cs index b33ef1f54..7f6975fe5 100644 --- a/API.Tests/Parser/BookParserTests.cs +++ b/API.Tests/Parser/BookParserTests.cs @@ -17,5 +17,24 @@ namespace API.Tests.Parser { Assert.Equal(expected, API.Parser.Parser.ParseVolume(filename)); } + + // [Theory] + // [InlineData("@font-face{font-family:'syyskuu_repaleinen';src:url(data:font/opentype;base64,AAEAAAA", "@font-face{font-family:'syyskuu_repaleinen';src:url(data:font/opentype;base64,AAEAAAA")] + // [InlineData("@font-face{font-family:'syyskuu_repaleinen';src:url('fonts/font.css')", "@font-face{font-family:'syyskuu_repaleinen';src:url('TEST/fonts/font.css')")] + // public void ReplaceFontSrcUrl(string input, string expected) + // { + // var apiBase = "TEST/"; + // var actual = API.Parser.Parser.FontSrcUrlRegex.Replace(input, "$1" + apiBase + "$2" + "$3"); + // Assert.Equal(expected, actual); + // } + // + // [Theory] + // [InlineData("@import url('font.css');", "@import url('TEST/font.css');")] + // public void ReplaceImportSrcUrl(string input, string expected) + // { + // var apiBase = "TEST/"; + // var actual = API.Parser.Parser.CssImportUrlRegex.Replace(input, "$1" + apiBase + "$2" + "$3"); + // Assert.Equal(expected, actual); + // } } } diff --git a/API/Parser/Parser.cs b/API/Parser/Parser.cs index b219e264f..51a07a835 100644 --- a/API/Parser/Parser.cs +++ b/API/Parser/Parser.cs @@ -24,11 +24,25 @@ namespace API.Parser private const RegexOptions MatchOptions = RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.CultureInvariant; - public static readonly Regex FontSrcUrlRegex = new Regex(@"(src:url\(.{1})" + "([^\"']*)" + @"(.{1}\))", + /// + /// Matches against font-family css syntax. Does not match if url import has data: starting, as that is binary data + /// + /// See here for some examples https://developer.mozilla.org/en-US/docs/Web/CSS/@font-face + public static readonly Regex FontSrcUrlRegex = new Regex(@"(?(src:\s?)?url\((?!data:).(?!data:))" + "(?(?!data:)[^\"']*)" + @"(?.{1}\))", MatchOptions, RegexTimeout); - public static readonly Regex CssImportUrlRegex = new Regex("@import\\s([\"|']|url\\([\"|'])(?[^'\"]+)[\"|']\\)?;", + /// + /// https://developer.mozilla.org/en-US/docs/Web/CSS/@import + /// + public static readonly Regex CssImportUrlRegex = new Regex("(@import\\s([\"|']|url\\([\"|']))(?[^'\"]+)([\"|']\\)?);", + MatchOptions | RegexOptions.Multiline, RegexTimeout); + /// + /// Misc css image references, like background-image: url(), border-image, or list-style-image + /// + /// Original prepend: (background|border|list-style)-image:\s?)? + public static readonly Regex CssImageUrlRegex = new Regex(@"(url\((?!data:).(?!data:))" + "(?(?!data:)[^\"']*)" + @"(.\))", MatchOptions, RegexTimeout); + private static readonly string XmlRegexExtensions = @"\.xml"; private static readonly Regex ImageRegex = new Regex(ImageFileExtensions, MatchOptions, RegexTimeout); diff --git a/API/Services/BookService.cs b/API/Services/BookService.cs index a114b3bd6..4449b6834 100644 --- a/API/Services/BookService.cs +++ b/API/Services/BookService.cs @@ -140,13 +140,18 @@ namespace API.Services } stylesheetHtml = stylesheetHtml.Insert(0, importBuilder.ToString()); - stylesheetHtml = - Parser.Parser.CssImportUrlRegex.Replace(stylesheetHtml, "$1" + apiBase + prepend + "$2" + "$3"); + var importMatches = Parser.Parser.CssImportUrlRegex.Matches(stylesheetHtml); + foreach (Match match in importMatches) + { + if (!match.Success) continue; + var importFile = match.Groups["Filename"].Value; + stylesheetHtml = stylesheetHtml.Replace(importFile, apiBase + prepend + importFile); + } + + // Check if there are any background images and rewrite those urls + EscapeCssImageReferences(ref stylesheetHtml, apiBase, book); var styleContent = RemoveWhiteSpaceFromStylesheets(stylesheetHtml); - styleContent = - Parser.Parser.FontSrcUrlRegex.Replace(styleContent, "$1" + apiBase + "$2" + "$3"); - styleContent = styleContent.Replace("body", ".reading-section"); var stylesheet = await _cssParser.ParseAsync(styleContent); @@ -165,6 +170,21 @@ namespace API.Services return RemoveWhiteSpaceFromStylesheets(stylesheet.ToCss()); } + private static void EscapeCssImageReferences(ref string stylesheetHtml, string apiBase, EpubBookRef book) + { + var matches = Parser.Parser.CssImageUrlRegex.Matches(stylesheetHtml); + foreach (Match match in matches) + { + if (!match.Success) continue; + + var importFile = match.Groups["Filename"].Value; + var key = CleanContentKeys(importFile); + if (!book.Content.AllFiles.ContainsKey(key)) continue; + + stylesheetHtml = stylesheetHtml.Replace(importFile, apiBase + key); + } + } + public ComicInfo GetComicInfo(string filePath) { if (!IsValidFile(filePath) || Parser.Parser.IsPdf(filePath)) return null; diff --git a/UI/Web/src/app/book-reader/book-reader/book-reader.component.html b/UI/Web/src/app/book-reader/book-reader/book-reader.component.html index 02ea43531..5f209490e 100644 --- a/UI/Web/src/app/book-reader/book-reader/book-reader.component.html +++ b/UI/Web/src/app/book-reader/book-reader/book-reader.component.html @@ -111,6 +111,9 @@ +
diff --git a/UI/Web/src/app/book-reader/book-reader/book-reader.component.scss b/UI/Web/src/app/book-reader/book-reader/book-reader.component.scss index f7c62217e..7132626f9 100644 --- a/UI/Web/src/app/book-reader/book-reader/book-reader.component.scss +++ b/UI/Web/src/app/book-reader/book-reader/book-reader.component.scss @@ -155,6 +155,11 @@ $primary-color: #0062cc; .reading-section { height: 100vh; + width: 100%; +} + +.book-content { + position: relative; } .drawer-body { diff --git a/UI/Web/src/app/book-reader/book-reader/book-reader.component.ts b/UI/Web/src/app/book-reader/book-reader/book-reader.component.ts index 5faeaef7e..0eee0bfb0 100644 --- a/UI/Web/src/app/book-reader/book-reader/book-reader.component.ts +++ b/UI/Web/src/app/book-reader/book-reader/book-reader.component.ts @@ -160,7 +160,11 @@ export class BookReaderComponent implements OnInit, AfterViewInit, OnDestroy { readerStyles: string = ''; darkModeStyleElem!: HTMLElement; topOffset: number = 0; // Offset for drawer and rendering canvas - scrollbarNeeded = false; // Used for showing/hiding bottom action bar + /** + * Used for showing/hiding bottom action bar. Calculates if there is enough scroll to show it. + * Will hide if all content in book is absolute positioned + */ + scrollbarNeeded = false; readingDirection: ReadingDirection = ReadingDirection.LeftToRight; private readonly onDestroy = new Subject(); @@ -715,6 +719,14 @@ export class BookReaderComponent implements OnInit, AfterViewInit, OnDestroy { this.isLoading = false; this.scrollbarNeeded = this.readingSectionElemRef.nativeElement.scrollHeight > this.readingSectionElemRef.nativeElement.clientHeight; + const itemsOnScreen = Array.from(this.readingHtml.nativeElement.querySelectorAll('*')).filter(elem => (elem as HTMLElement).nodeName != 'STYLE'); + const itemsWithAbsolutePositioning = itemsOnScreen.filter(elem => (elem as HTMLElement).style.getPropertyValue('position') === 'absolute').length; + + if (itemsWithAbsolutePositioning >= itemsOnScreen.length) { + // Supress bottom actionbar. This is because of how the html is structured, with abs positioning, it will render inside images, etc. + this.scrollbarNeeded = false; + } + // Find all the part ids and their top offset this.setupPageAnchors();