mirror of
https://github.com/Kareadita/Kavita.git
synced 2025-06-03 13:44:31 -04:00
Epub 3.2 Collection Tag support (#308)
* Hooked up logic for collections based on EPUB3.2 Spec and Fixed improper tags in EPUBs since it is XML and we are using HTML to parse it. * Fixed a bug with src:url url replacing so that it's much cleaner regex
This commit is contained in:
parent
584348c6ad
commit
d02d2d3cb5
@ -186,6 +186,9 @@ namespace API.Controllers
|
|||||||
var content = await contentFileRef.ReadContentAsync();
|
var content = await contentFileRef.ReadContentAsync();
|
||||||
if (contentFileRef.ContentType != EpubContentType.XHTML_1_1) return Ok(content);
|
if (contentFileRef.ContentType != EpubContentType.XHTML_1_1) return Ok(content);
|
||||||
|
|
||||||
|
// In more cases than not, due to this being XML not HTML, we need to escape the script tags.
|
||||||
|
content = BookService.EscapeTags(content);
|
||||||
|
|
||||||
doc.LoadHtml(content);
|
doc.LoadHtml(content);
|
||||||
var body = doc.DocumentNode.SelectSingleNode("//body");
|
var body = doc.DocumentNode.SelectSingleNode("//body");
|
||||||
|
|
||||||
|
@ -15,7 +15,7 @@ namespace API.Parser
|
|||||||
public const string ArchiveFileExtensions = @"\.cbz|\.zip|\.rar|\.cbr|\.tar.gz|\.7zip|\.7z|.cb7";
|
public const string ArchiveFileExtensions = @"\.cbz|\.zip|\.rar|\.cbr|\.tar.gz|\.7zip|\.7z|.cb7";
|
||||||
public const string BookFileExtensions = @"\.epub";
|
public const string BookFileExtensions = @"\.epub";
|
||||||
public const string ImageFileExtensions = @"^(\.png|\.jpeg|\.jpg)";
|
public const string ImageFileExtensions = @"^(\.png|\.jpeg|\.jpg)";
|
||||||
public static readonly Regex FontSrcUrlRegex = new Regex("(src:url\\(\"?'?)([a-z0-9/\\._]+)(\"?'?\\))", RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
public static readonly Regex FontSrcUrlRegex = new Regex(@"(src:url\(.{1})" + "([^\"']*)" + @"(.{1}\))", RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
||||||
public static readonly Regex CssImportUrlRegex = new Regex("(@import\\s[\"|'])(?<Filename>[\\w\\d/\\._-]+)([\"|'];?)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
public static readonly Regex CssImportUrlRegex = new Regex("(@import\\s[\"|'])(?<Filename>[\\w\\d/\\._-]+)([\"|'];?)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
||||||
|
|
||||||
private static readonly string XmlRegexExtensions = @"\.xml";
|
private static readonly string XmlRegexExtensions = @"\.xml";
|
||||||
|
@ -23,7 +23,7 @@ namespace API.Services
|
|||||||
|
|
||||||
private const int ThumbnailWidth = 320; // 153w x 230h
|
private const int ThumbnailWidth = 320; // 153w x 230h
|
||||||
private readonly StylesheetParser _cssParser = new ();
|
private readonly StylesheetParser _cssParser = new ();
|
||||||
|
|
||||||
public BookService(ILogger<BookService> logger)
|
public BookService(ILogger<BookService> logger)
|
||||||
{
|
{
|
||||||
_logger = logger;
|
_logger = logger;
|
||||||
@ -204,6 +204,13 @@ namespace API.Services
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static string EscapeTags(string content)
|
||||||
|
{
|
||||||
|
content = Regex.Replace(content, @"<script(.*)(/>)", "<script$1></script>");
|
||||||
|
content = Regex.Replace(content, @"<title(.*)(/>)", "<title$1></title>");
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
public static string CleanContentKeys(string key)
|
public static string CleanContentKeys(string key)
|
||||||
{
|
{
|
||||||
return key.Replace("../", string.Empty);
|
return key.Replace("../", string.Empty);
|
||||||
@ -241,14 +248,23 @@ namespace API.Services
|
|||||||
// <meta content="Wolves of the Calla" name="calibre:title_sort"/>
|
// <meta content="Wolves of the Calla" name="calibre:title_sort"/>
|
||||||
// If all three are present, we can take that over dc:title and format as:
|
// If all three are present, we can take that over dc:title and format as:
|
||||||
// Series = The Dark Tower, Volume = 5, Filename as "Wolves of the Calla"
|
// Series = The Dark Tower, Volume = 5, Filename as "Wolves of the Calla"
|
||||||
|
// In addition, the following can exist and should parse as a series (EPUB 3.2 spec)
|
||||||
|
// <meta property="belongs-to-collection" id="c01">
|
||||||
|
// The Lord of the Rings
|
||||||
|
// </meta>
|
||||||
|
// <meta refines="#c01" property="collection-type">set</meta>
|
||||||
|
// <meta refines="#c01" property="group-position">2</meta>
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
string seriesIndex = string.Empty;
|
var seriesIndex = string.Empty;
|
||||||
string series = string.Empty;
|
var series = string.Empty;
|
||||||
string specialName = string.Empty;
|
var specialName = string.Empty;
|
||||||
|
var groupPosition = string.Empty;
|
||||||
|
|
||||||
|
|
||||||
foreach (var metadataItem in epubBook.Schema.Package.Metadata.MetaItems)
|
foreach (var metadataItem in epubBook.Schema.Package.Metadata.MetaItems)
|
||||||
{
|
{
|
||||||
|
// EPUB 2 and 3
|
||||||
switch (metadataItem.Name)
|
switch (metadataItem.Name)
|
||||||
{
|
{
|
||||||
case "calibre:series_index":
|
case "calibre:series_index":
|
||||||
@ -261,10 +277,29 @@ namespace API.Services
|
|||||||
specialName = metadataItem.Content;
|
specialName = metadataItem.Content;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// EPUB 3.2+ only
|
||||||
|
switch (metadataItem.Property)
|
||||||
|
{
|
||||||
|
case "group-position":
|
||||||
|
seriesIndex = metadataItem.Content;
|
||||||
|
break;
|
||||||
|
case "belongs-to-collection":
|
||||||
|
series = metadataItem.Content;
|
||||||
|
break;
|
||||||
|
case "collection-type":
|
||||||
|
groupPosition = metadataItem.Content;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!string.IsNullOrEmpty(series) && !string.IsNullOrEmpty(seriesIndex) && !string.IsNullOrEmpty(specialName))
|
if (!string.IsNullOrEmpty(series) && !string.IsNullOrEmpty(seriesIndex) &&
|
||||||
|
(!string.IsNullOrEmpty(specialName) || groupPosition.Equals("series") || groupPosition.Equals("set")))
|
||||||
{
|
{
|
||||||
|
if (string.IsNullOrEmpty(specialName))
|
||||||
|
{
|
||||||
|
specialName = epubBook.Title;
|
||||||
|
}
|
||||||
return new ParserInfo()
|
return new ParserInfo()
|
||||||
{
|
{
|
||||||
Chapters = "0",
|
Chapters = "0",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user