using System;
using System.Collections.Immutable;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using API.Entities.Enums;
namespace API.Services.Tasks.Scanner.Parser;
public static class Parser
{
public const string DefaultChapter = "0";
public const string DefaultVolume = "0";
private static readonly TimeSpan RegexTimeout = TimeSpan.FromMilliseconds(500);
public const string ImageFileExtensions = @"^(\.png|\.jpeg|\.jpg|\.webp|\.gif)";
public const string ArchiveFileExtensions = @"\.cbz|\.zip|\.rar|\.cbr|\.tar.gz|\.7zip|\.7z|\.cb7|\.cbt";
private const string BookFileExtensions = @"\.epub|\.pdf";
public const string MacOsMetadataFileStartsWith = @"._";
public const string SupportedExtensions =
ArchiveFileExtensions + "|" + ImageFileExtensions + "|" + BookFileExtensions;
private const RegexOptions MatchOptions =
RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.CultureInvariant;
///
/// Matches against font-family css syntax. Does not match if url import has data: starting, as that is binary data
///
/// See here for some examples https://developer.mozilla.org/en-US/docs/Web/CSS/@font-face
public static readonly Regex FontSrcUrlRegex = new Regex(@"(?(?:src:\s?)?(?:url|local)\((?!data:)" + "(?:[\"']?)" + @"(?!data:))"
+ "(?(?!data:)[^\"']+?)" + "(?[\"']?" + @"\);?)",
MatchOptions, RegexTimeout);
///
/// https://developer.mozilla.org/en-US/docs/Web/CSS/@import
///
public static readonly Regex CssImportUrlRegex = new Regex("(@import\\s([\"|']|url\\([\"|']))(?[^'\"]+)([\"|']\\)?);",
MatchOptions | RegexOptions.Multiline, RegexTimeout);
///
/// Misc css image references, like background-image: url(), border-image, or list-style-image
///
/// Original prepend: (background|border|list-style)-image:\s?)?
public static readonly Regex CssImageUrlRegex = new Regex(@"(url\((?!data:).(?!data:))" + "(?(?!data:)[^\"']*)" + @"(.\))",
MatchOptions, RegexTimeout);
private const string XmlRegexExtensions = @"\.xml";
private static readonly Regex ImageRegex = new Regex(ImageFileExtensions,
MatchOptions, RegexTimeout);
private static readonly Regex ArchiveFileRegex = new Regex(ArchiveFileExtensions,
MatchOptions, RegexTimeout);
private static readonly Regex ComicInfoArchiveRegex = new Regex(@"\.cbz|\.cbr|\.cb7|\.cbt",
MatchOptions, RegexTimeout);
private static readonly Regex XmlRegex = new Regex(XmlRegexExtensions,
MatchOptions, RegexTimeout);
private static readonly Regex BookFileRegex = new Regex(BookFileExtensions,
MatchOptions, RegexTimeout);
private static readonly Regex CoverImageRegex = new Regex(@"(?
/// Recognizes the Special token only
///
private static readonly Regex SpecialTokenRegex = new Regex(@"SP\d+",
MatchOptions, RegexTimeout);
private const string Number = @"\d+(\.\d)?";
private const string NumberRange = Number + @"(-" + Number + @")?";
// Some generic reusage regex patterns:
// - non greedy matching of a string where parenthesis are balanced
public const string BalancedParen = @"(?:[^()]|(?\()|(?<-open>\)))*?(?(open)(?!))";
// - non greedy matching of a string where square brackets are balanced
public const string BalancedBrack = @"(?:[^\[\]]|(?\[)|(?<-open>\]))*?(?(open)(?!))";
private static readonly Regex[] MangaVolumeRegex = new[]
{
// Dance in the Vampire Bund v16-17
new Regex(
@"(?.*)(\b|_)v(?\d+-?\d+)( |_)",
MatchOptions, RegexTimeout),
// NEEDLESS_Vol.4_-Simeon_6_v2[SugoiSugoi].rar
new Regex(
@"(?.*)(\b|_)(?!\[)(vol\.?)(?\d+(-\d+)?)(?!\])",
MatchOptions, RegexTimeout),
// TODO: In .NET 7, update this to use raw literal strings and apply the NumberRange everywhere
// Historys Strongest Disciple Kenichi_v11_c90-98.zip or Dance in the Vampire Bund v16-17
new Regex(
@"(?.*)(\b|_)(?!\[)v(?" + NumberRange + @")(?!\])",
MatchOptions, RegexTimeout),
// Kodomo no Jikan vol. 10, [dmntsf.net] One Piece - Digital Colored Comics Vol. 20.5-21.5 Ch. 177
new Regex(
@"(?.*)(\b|_)(vol\.? ?)(?\d+(\.\d)?(-\d+)?(\.\d)?)",
MatchOptions, RegexTimeout),
// Killing Bites Vol. 0001 Ch. 0001 - Galactica Scanlations (gb)
new Regex(
@"(vol\.? ?)(?\d+(\.\d)?)",
MatchOptions, RegexTimeout),
// Tonikaku Cawaii [Volume 11].cbz
new Regex(
@"(volume )(?\d+(\.\d)?)",
MatchOptions, RegexTimeout),
// Tower Of God S01 014 (CBT) (digital).cbz
new Regex(
@"(?.*)(\b|_|)(S(?\d+))",
MatchOptions, RegexTimeout),
// vol_001-1.cbz for MangaPy default naming convention
new Regex(
@"(vol_)(?\d+(\.\d)?)",
MatchOptions, RegexTimeout),
// Chinese Volume: 第n卷 -> Volume n, 第n册 -> Volume n, 幽游白书完全版 第03卷 天下 or 阿衰online 第1册
new Regex(
@"第(?\d+)(卷|册)",
MatchOptions, RegexTimeout),
// Chinese Volume: 卷n -> Volume n, 册n -> Volume n
new Regex(
@"(卷|册)(?\d+)",
MatchOptions, RegexTimeout),
// Korean Volume: 제n화|권|회|장 -> Volume n, n화|권|회|장 -> Volume n, 63권#200.zip -> Volume 63 (no chapter, #200 is just files inside)
new Regex(
@"제?(?\d+(\.\d)?)(권|회|화|장)",
MatchOptions, RegexTimeout),
// Korean Season: 시즌n -> Season n,
new Regex(
@"시즌(?\d+\-?\d+)",
MatchOptions, RegexTimeout),
// Korean Season: 시즌n -> Season n, n시즌 -> season n
new Regex(
@"(?\d+(\-|~)?\d+?)시즌",
MatchOptions, RegexTimeout),
// Korean Season: 시즌n -> Season n, n시즌 -> season n
new Regex(
@"시즌(?\d+(\-|~)?\d+?)",
MatchOptions, RegexTimeout),
// Japanese Volume: n巻 -> Volume n
new Regex(
@"(?\d+(?:(\-)\d+)?)巻",
MatchOptions, RegexTimeout),
// Russian Volume: Том n -> Volume n, Тома n -> Volume
new Regex(
@"Том(а?)(\.?)(\s|_)?(?\d+(?:(\-)\d+)?)",
MatchOptions, RegexTimeout),
// Russian Volume: n Том -> Volume n
new Regex(
@"(\s|_)?(?\d+(?:(\-)\d+)?)(\s|_)Том(а?)",
MatchOptions, RegexTimeout),
};
private static readonly Regex[] MangaSeriesRegex = new[]
{
// Russian Volume: Том n -> Volume n, Тома n -> Volume
new Regex(
@"(?.+?)Том(а?)(\.?)(\s|_)?(?\d+(?:(\-)\d+)?)",
MatchOptions, RegexTimeout),
// Russian Volume: n Том -> Volume n
new Regex(
@"(?.+?)(\s|_)?(?\d+(?:(\-)\d+)?)(\s|_)Том(а?)",
MatchOptions, RegexTimeout),
// Russian Chapter: n Главa -> Chapter n
new Regex(
@"(?.+?)(?!Том)(?\d+(?:\.\d+|-\d+)?)(\s|_)(Глава|глава|Главы|Глава)",
MatchOptions, RegexTimeout),
// Russian Chapter: Главы n -> Chapter n
new Regex(
@"(?.+?)(Глава|глава|Главы|Глава)(\.?)(\s|_)?(?\d+(?:.\d+|-\d+)?)",
MatchOptions, RegexTimeout),
// Grand Blue Dreaming - SP02
new Regex(
@"(?.*)(\b|_|-|\s)(?:sp)\d",
MatchOptions, RegexTimeout),
// [SugoiSugoi]_NEEDLESS_Vol.2_-_Disk_The_Informant_5_[ENG].rar, Yuusha Ga Shinda! - Vol.tbd Chapter 27.001 V2 Infection ①.cbz
new Regex(
@"^(?.*)( |_)Vol\.?(\d+|tbd)",
MatchOptions, RegexTimeout),
// Mad Chimera World - Volume 005 - Chapter 026.cbz (couldn't figure out how to get Volume negative lookaround working on below regex),
// The Duke of Death and His Black Maid - Vol. 04 Ch. 054.5 - V4 Omake
new Regex(
@"(?.+?)(\s|_|-)+(?:Vol(ume|\.)?(\s|_|-)+\d+)(\s|_|-)+(?:(Ch|Chapter|Ch)\.?)(\s|_|-)+(?\d+)",
MatchOptions,
RegexTimeout),
// Ichiban_Ushiro_no_Daimaou_v04_ch34_[VISCANS].zip, VanDread-v01-c01.zip
new Regex(
@"(?.*)(\b|_)v(?\d+-?\d*)(\s|_|-)",
MatchOptions,
RegexTimeout),
// Gokukoku no Brynhildr - c001-008 (v01) [TrinityBAKumA], Black Bullet - v4 c17 [batoto]
new Regex(
@"(?.*)( - )(?:v|vo|c|chapters)\d",
MatchOptions, RegexTimeout),
// Kedouin Makoto - Corpse Party Musume, Chapter 19 [Dametrans].zip
new Regex(
@"(?.*)(?:, Chapter )(?\d+)",
MatchOptions, RegexTimeout),
// Please Go Home, Akutsu-San! - Chapter 038.5 - Volume Announcement.cbz, My Charms Are Wasted on Kuroiwa Medaka - Ch. 37.5 - Volume Extras
new Regex(
@"(?.+?)(\s|_|-)(?!Vol)(\s|_|-)((?:Chapter)|(?:Ch\.))(\s|_|-)(?\d+)",
MatchOptions, RegexTimeout),
// [dmntsf.net] One Piece - Digital Colored Comics Vol. 20 Ch. 177 - 30 Million vs 81 Million.cbz
new Regex(
@"(?.*) (\b|_|-)(vol)\.?(\s|-|_)?\d+",
MatchOptions, RegexTimeout),
// [xPearse] Kyochuu Rettou Volume 1 [English] [Manga] [Volume Scans]
new Regex(
@"(?.*) (\b|_|-)(vol)(ume)",
MatchOptions,
RegexTimeout),
//Knights of Sidonia c000 (S2 LE BD Omake - BLAME!) [Habanero Scans]
new Regex(
@"(?.*)(\bc\d+\b)",
MatchOptions, RegexTimeout),
//Tonikaku Cawaii [Volume 11], Darling in the FranXX - Volume 01.cbz
new Regex(
@"(?.*)(?: _|-|\[|\()\s?vol(ume)?",
MatchOptions, RegexTimeout),
// Momo The Blood Taker - Chapter 027 Violent Emotion.cbz, Grand Blue Dreaming - SP02 Extra (2019) (Digital) (danke-Empire).cbz
new Regex(
@"^(?(?!Vol).+?)(?:(ch(apter|\.)(\b|_|-|\s))|sp)\d",
MatchOptions, RegexTimeout),
// Historys Strongest Disciple Kenichi_v11_c90-98.zip, Killing Bites Vol. 0001 Ch. 0001 - Galactica Scanlations (gb)
new Regex(
@"(?.*) (\b|_|-)(v|ch\.?|c|s)\d+",
MatchOptions, RegexTimeout),
// Hinowa ga CRUSH! 018 (2019) (Digital) (LuCaZ).cbz
new Regex(
@"(?.*)\s+(?\d+)\s+(?:\(\d{4}\))\s",
MatchOptions, RegexTimeout),
// Goblin Slayer - Brand New Day 006.5 (2019) (Digital) (danke-Empire)
new Regex(
@"(?.*) (-)?(?\d+(?:.\d+|-\d+)?) \(\d{4}\)",
MatchOptions, RegexTimeout),
// Noblesse - Episode 429 (74 Pages).7z
new Regex(
@"(?.*)(\s|_)(?:Episode|Ep\.?)(\s|_)(?\d+(?:.\d+|-\d+)?)",
MatchOptions, RegexTimeout),
// Akame ga KILL! ZERO (2016-2019) (Digital) (LuCaZ)
new Regex(
@"(?.*)\(\d",
MatchOptions, RegexTimeout),
// Tonikaku Kawaii (Ch 59-67) (Ongoing)
new Regex(
@"(?.*)(\s|_)\((c\s|ch\s|chapter\s)",
MatchOptions, RegexTimeout),
// Fullmetal Alchemist chapters 101-108
new Regex(
@"(?.+?)(\s|_|\-)+?chapters(\s|_|\-)+?\d+(\s|_|\-)+?",
MatchOptions, RegexTimeout),
// It's Witching Time! 001 (Digital) (Anonymous1234)
new Regex(
@"(?.+?)(\s|_|\-)+?\d+(\s|_|\-)\(",
MatchOptions, RegexTimeout),
//Ichinensei_ni_Nacchattara_v01_ch01_[Taruby]_v1.1.zip must be before [Suihei Kiki]_Kasumi_Otoko_no_Ko_[Taruby]_v1.1.zip
// due to duplicate version identifiers in file.
new Regex(
@"(?.*)(v|s)\d+(-\d+)?(_|\s)",
MatchOptions, RegexTimeout),
//[Suihei Kiki]_Kasumi_Otoko_no_Ko_[Taruby]_v1.1.zip
new Regex(
@"(?.*)(v|s)\d+(-\d+)?",
MatchOptions, RegexTimeout),
// Black Bullet (This is very loose, keep towards bottom)
new Regex(
@"(?.*)(_)(v|vo|c|volume)( |_)\d+",
MatchOptions, RegexTimeout),
// [Hidoi]_Amaenaideyo_MS_vol01_chp02.rar
new Regex(
@"(?.*)( |_)(vol\d+)?( |_)(?:Chp\.? ?\d+)",
MatchOptions, RegexTimeout),
// Mahoutsukai to Deshi no Futekisetsu na Kankei Chp. 1
new Regex(
@"(?.*)( |_)(?:Chp.? ?\d+)",
MatchOptions, RegexTimeout),
// Corpse Party -The Anthology- Sachikos game of love Hysteric Birthday 2U Chapter 01
new Regex(
@"^(?!Vol)(?.*)( |_)Chapter( |_)(\d+)",
MatchOptions, RegexTimeout),
// Fullmetal Alchemist chapters 101-108.cbz
new Regex(
@"^(?!vol)(?.*)( |_)(chapters( |_)?)\d+-?\d*",
MatchOptions, RegexTimeout),
// Umineko no Naku Koro ni - Episode 1 - Legend of the Golden Witch #1
new Regex(
@"^(?!Vol\.?)(?.*)( |_|-)(?.*)ch\d+-?\d?",
MatchOptions, RegexTimeout),
// Magi - Ch.252-005.cbz
new Regex(
@"(?.*)( ?- ?)Ch\.\d+-?\d*",
MatchOptions, RegexTimeout),
// [BAA]_Darker_than_Black_Omake-1.zip
new Regex(
@"^(?!Vol)(?.*)(-)\d+-?\d*", // This catches a lot of stuff ^(?!Vol)(?.*)( |_)(\d+)
MatchOptions, RegexTimeout),
// Kodoja #001 (March 2016)
new Regex(
@"(?.*)(\s|_|-)#",
MatchOptions, RegexTimeout),
// Baketeriya ch01-05.zip, Akiiro Bousou Biyori - 01.jpg, Beelzebub_172_RHS.zip, Cynthia the Mission 29.rar, A Compendium of Ghosts - 031 - The Third Story_ Part 12 (Digital) (Cobalt001)
new Regex(
@"^(?!Vol\.?)(?!Chapter)(?.+?)(\s|_|-)(?.*)( |_|-)(ch?)\d+",
MatchOptions, RegexTimeout),
// Japanese Volume: n巻 -> Volume n
new Regex(
@"(?.+?)第(?\d+(?:(\-)\d+)?)巻",
MatchOptions, RegexTimeout),
};
private static readonly Regex[] ComicSeriesRegex = new[]
{
// Russian Volume: Том n -> Volume n, Тома n -> Volume
new Regex(
@"(?.+?)Том(а?)(\.?)(\s|_)?(?\d+(?:(\-)\d+)?)",
MatchOptions, RegexTimeout),
// Russian Volume: n Том -> Volume n
new Regex(
@"(?.+?)(\s|_)?(?\d+(?:(\-)\d+)?)(\s|_)Том(а?)",
MatchOptions, RegexTimeout),
// Russian Chapter: n Главa -> Chapter n
new Regex(
@"(?.+?)(?!Том)(?\d+(?:\.\d+|-\d+)?)(\s|_)(Глава|глава|Главы|Глава)",
MatchOptions, RegexTimeout),
// Russian Chapter: Главы n -> Chapter n
new Regex(
@"(?.+?)(Глава|глава|Главы|Глава)(\.?)(\s|_)?(?\d+(?:.\d+|-\d+)?)",
MatchOptions, RegexTimeout),
// Tintin - T22 Vol 714 pour Sydney
new Regex(
@"(?.+?)\s?(\b|_|-)\s?((vol|tome|t)\.?)(?\d+(-\d+)?)",
MatchOptions, RegexTimeout),
// Invincible Vol 01 Family matters (2005) (Digital)
new Regex(
@"(?.+?)(\b|_)((vol|tome|t)\.?)(\s|_)(?\d+(-\d+)?)",
MatchOptions, RegexTimeout),
// Batman Beyond 2.0 001 (2013)
new Regex(
@"^(?.+?\S\.\d) (?\d+)",
MatchOptions, RegexTimeout),
// 04 - Asterix the Gladiator (1964) (Digital-Empire) (WebP by Doc MaKS)
new Regex(
@"^(?\d+)\s(-\s|_)(?.*(\d{4})?)( |_)(\(|\d+)",
MatchOptions, RegexTimeout),
// 01 Spider-Man & Wolverine 01.cbr
new Regex(
@"^(?\d+)\s(?:-\s)(?.*) (\d+)?",
MatchOptions, RegexTimeout),
// Batman & Wildcat (1 of 3)
new Regex(
@"(?.*(\d{4})?)( |_)(?:\((?\d+) of \d+)",
MatchOptions, RegexTimeout),
// Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus), Aldebaran-Antares-t6
new Regex(
@"^(?.+?)(?: |_|-)(v|t)\d+",
MatchOptions, RegexTimeout),
// Amazing Man Comics chapter 25
new Regex(
@"^(?.+?)(?: |_)c(hapter) \d+",
MatchOptions, RegexTimeout),
// Amazing Man Comics issue #25
new Regex(
@"^(?.+?)(?: |_)i(ssue) #\d+",
MatchOptions, RegexTimeout),
// Batman Wayne Family Adventures - Ep. 001 - Moving In
new Regex(
@"^(?.+?)(\s|_|-)(?:Ep\.?)(\s|_|-)+\d+",
MatchOptions, RegexTimeout),
// Batgirl Vol.2000 #57 (December, 2004)
new Regex(
@"^(?.+?)Vol\.?\s?#?(?:\d+)",
MatchOptions, RegexTimeout),
// Batman & Robin the Teen Wonder #0
new Regex(
@"^(?.*)(?: |_)#\d+",
MatchOptions, RegexTimeout),
// Batman & Catwoman - Trail of the Gun 01, Batman & Grendel (1996) 01 - Devil's Bones, Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)
new Regex(
@"^(?.+?)(?: \d+)",
MatchOptions, RegexTimeout),
// Scott Pilgrim 02 - Scott Pilgrim vs. The World (2005)
new Regex(
@"^(?.+?)(?: |_)(?\d+)",
MatchOptions, RegexTimeout),
// The First Asterix Frieze (WebP by Doc MaKS)
new Regex(
@"^(?.*)(?: |_)(?!\(\d{4}|\d{4}-\d{2}\))\(",
MatchOptions, RegexTimeout),
// spawn-123, spawn-chapter-123 (from https://github.com/Girbons/comics-downloader)
new Regex(
@"^(?.+?)-(chapter-)?(?\d+)",
MatchOptions, RegexTimeout),
// MUST BE LAST: Batman & Daredevil - King of New York
new Regex(
@"^(?.*)",
MatchOptions, RegexTimeout),
};
private static readonly Regex[] ComicVolumeRegex = new[]
{
// Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)
new Regex(
@"^(?.*)(?: |_)(t|v)(?\d+)",
MatchOptions, RegexTimeout),
// Batgirl Vol.2000 #57 (December, 2004)
new Regex(
@"^(?.+?)(?:\s|_)(v|vol|tome|t)\.?(\s|_)?(?\d+)",
MatchOptions, RegexTimeout),
// Chinese Volume: 第n卷 -> Volume n, 第n册 -> Volume n, 幽游白书完全版 第03卷 天下 or 阿衰online 第1册
new Regex(
@"第(?\d+)(卷|册)",
MatchOptions, RegexTimeout),
// Chinese Volume: 卷n -> Volume n, 册n -> Volume n
new Regex(
@"(卷|册)(?\d+)",
MatchOptions, RegexTimeout),
// Korean Volume: 제n권 -> Volume n, n권 -> Volume n, 63권#200.zip
new Regex(
@"제?(?\d+)권",
MatchOptions, RegexTimeout),
// Japanese Volume: n巻 -> Volume n
new Regex(
@"(?\d+(?:(\-)\d+)?)巻",
MatchOptions, RegexTimeout),
// Russian Volume: Том n -> Volume n, Тома n -> Volume
new Regex(
@"Том(а?)(\.?)(\s|_)?(?\d+(?:(\-)\d+)?)",
MatchOptions, RegexTimeout),
// Russian Volume: n Том -> Volume n
new Regex(
@"(\s|_)?(?\d+(?:(\-)\d+)?)(\s|_)Том(а?)",
MatchOptions, RegexTimeout),
};
private static readonly Regex[] ComicChapterRegex = new[]
{
// Batman & Wildcat (1 of 3)
new Regex(
@"(?.*(\d{4})?)( |_)(?:\((?\d+) of \d+)",
MatchOptions, RegexTimeout),
// Batman Beyond 04 (of 6) (1999)
new Regex(
@"(?.+?)(?\d+)(\s|_|-)?\(of",
MatchOptions, RegexTimeout),
// Batman Beyond 2.0 001 (2013)
new Regex(
@"^(?.+?\S\.\d) (?\d+)",
MatchOptions, RegexTimeout),
// Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)
new Regex(
@"^(?.+?)(?: |_)v(?\d+)(?: |_)(c? ?)(?(\d+(\.\d)?)-?(\d+(\.\d)?)?)(c? ?)",
MatchOptions, RegexTimeout),
// Batman & Robin the Teen Wonder #0
new Regex(
@"^(?.+?)(?:\s|_)#(?\d+)",
MatchOptions, RegexTimeout),
// Batman 2016 - Chapter 01, Batman 2016 - Issue 01, Batman 2016 - Issue #01
new Regex(
@"^(?.+?)((c(hapter)?)|issue)(_|\s)#?(?(\d+(\.\d)?)-?(\d+(\.\d)?)?)",
MatchOptions, RegexTimeout),
// Invincible 070.5 - Invincible Returns 1 (2010) (digital) (Minutemen-InnerDemons).cbr
new Regex(
@"^(?.+?)(?:\s|_)(c? ?(chapter)?)(?(\d+(\.\d)?)-?(\d+(\.\d)?)?)(c? ?)-",
MatchOptions, RegexTimeout),
// Batgirl Vol.2000 #57 (December, 2004)
new Regex(
@"^(?.+?)(?:vol\.?\d+)\s#(?\d+)",
MatchOptions,
RegexTimeout),
// Russian Chapter: Главы n -> Chapter n
new Regex(
@"(Глава|глава|Главы|Глава)(\.?)(\s|_)?(?\d+(?:.\d+|-\d+)?)",
MatchOptions, RegexTimeout),
// Russian Chapter: n Главa -> Chapter n
new Regex(
@"(?!Том)(?\d+(?:\.\d+|-\d+)?)(\s|_)(Глава|глава|Главы|Глава)",
MatchOptions, RegexTimeout),
// Batman & Catwoman - Trail of the Gun 01, Batman & Grendel (1996) 01 - Devil's Bones, Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)
new Regex(
@"^(?.+?)(?: (?\d+))",
MatchOptions, RegexTimeout),
// Saga 001 (2012) (Digital) (Empire-Zone)
new Regex(
@"(?.+?)(?: |_)(c? ?)(?(\d+(\.\d)?)-?(\d+(\.\d)?)?)\s\(\d{4}",
MatchOptions, RegexTimeout),
// Amazing Man Comics chapter 25
new Regex(
@"^(?!Vol)(?.+?)( |_)c(hapter)( |_)(?\d*)",
MatchOptions, RegexTimeout),
// Amazing Man Comics issue #25
new Regex(
@"^(?!Vol)(?.+?)( |_)i(ssue)( |_) #(?\d*)",
MatchOptions, RegexTimeout),
// spawn-123, spawn-chapter-123 (from https://github.com/Girbons/comics-downloader)
new Regex(
@"^(?.+?)-(chapter-)?(?\d+)",
MatchOptions, RegexTimeout),
};
private static readonly Regex[] MangaChapterRegex = new[]
{
// Historys Strongest Disciple Kenichi_v11_c90-98.zip, ...c90.5-100.5
new Regex(
@"(\b|_)(c|ch)(\.?\s?)(?(\d+(\.\d)?)-?(\d+(\.\d)?)?)",
MatchOptions, RegexTimeout),
// [Suihei Kiki]_Kasumi_Otoko_no_Ko_[Taruby]_v1.1.zip
new Regex(
@"v\d+\.(\s|_)(?\d+(?:.\d+|-\d+)?)",
MatchOptions, RegexTimeout),
// Umineko no Naku Koro ni - Episode 3 - Banquet of the Golden Witch #02.cbz (Rare case, if causes issue remove)
new Regex(
@"^(?.*)(?: |_)#(?\d+)",
MatchOptions, RegexTimeout),
// Green Worldz - Chapter 027, Kimi no Koto ga Daidaidaidaidaisuki na 100-nin no Kanojo Chapter 11-10
new Regex(
@"^(?!Vol)(?.*)\s?(?\d+(?:\.?[\d-]+)?)",
MatchOptions, RegexTimeout),
// Russian Chapter: Главы n -> Chapter n
new Regex(
@"(Глава|глава|Главы|Глава)(\.?)(\s|_)?(?\d+(?:.\d+|-\d+)?)",
MatchOptions, RegexTimeout),
// Hinowa ga CRUSH! 018 (2019) (Digital) (LuCaZ).cbz, Hinowa ga CRUSH! 018.5 (2019) (Digital) (LuCaZ).cbz
new Regex(
@"^(?!Vol)(?.+?)(?\d+(?:\.\d+|-\d+)?)(?:\s\(\d{4}\))?(\b|_|-)",
MatchOptions, RegexTimeout),
// Tower Of God S01 014 (CBT) (digital).cbz
new Regex(
@"(?.*)\sS(?\d+)\s(?\d+(?:.\d+|-\d+)?)",
MatchOptions, RegexTimeout),
// Beelzebub_01_[Noodles].zip, Beelzebub_153b_RHS.zip
new Regex(
@"^((?!v|vo|vol|Volume).)*(\s|_)(?\.?\d+(?:.\d+|-\d+)?)(?b)?(\s|_|\[|\()",
MatchOptions, RegexTimeout),
// Yumekui-Merry_DKThias_Chapter21.zip
new Regex(
@"Chapter(?\d+(-\d+)?)", //(?:.\d+|-\d+)?
MatchOptions, RegexTimeout),
// [Hidoi]_Amaenaideyo_MS_vol01_chp02.rar
new Regex(
@"(?.*)(\s|_)(vol\d+)?(\s|_)Chp\.? ?(?\d+)",
MatchOptions, RegexTimeout),
// Vol 1 Chapter 2
new Regex(
@"(?((vol|volume|v))?(\s|_)?\.?\d+)(\s|_)(Chp|Chapter)\.?(\s|_)?(?\d+)",
MatchOptions, RegexTimeout),
// Chinese Chapter: 第n话 -> Chapter n, 【TFO汉化&Petit汉化】迷你偶像漫画第25话
new Regex(
@"第(?\d+)话",
MatchOptions, RegexTimeout),
// Korean Chapter: 제n화 -> Chapter n, 가디언즈 오브 갤럭시 죽음의 보석.E0008.7화#44
new Regex(
@"제?(?\d+\.?\d+)(회|화|장)",
MatchOptions, RegexTimeout),
// Korean Chapter: 第10話 -> Chapter n, [ハレム]ナナとカオル ~高校生のSMごっこ~ 第1話
new Regex(
@"第?(?\d+(?:\.\d+|-\d+)?)話",
MatchOptions, RegexTimeout),
// Russian Chapter: n Главa -> Chapter n
new Regex(
@"(?!Том)(?\d+(?:\.\d+|-\d+)?)(\s|_)(Глава|глава|Главы|Глава)",
MatchOptions, RegexTimeout),
};
private static readonly Regex MangaEditionRegex = new Regex(
// Tenjo Tenge {Full Contact Edition} v01 (2011) (Digital) (ASTC).cbz
// To Love Ru v01 Uncensored (Ch.001-007)
@"\b(?:Omnibus(?:\s?Edition)?|Uncensored)\b",
MatchOptions, RegexTimeout
);
// Matches [Complete], release tags like [kmts] but not [ Complete ] or [kmts ]
private const string TagsInBrackets = $@"\[(?!\s){BalancedBrack}(? FormatTagSpecialKeywords = ImmutableArray.Create(
"Special", "Reference", "Director's Cut", "Box Set", "Box-Set", "Annual", "Anthology", "Epilogue",
"One Shot", "One-Shot", "Prologue", "TPB", "Trade Paper Back", "Omnibus", "Compendium", "Absolute", "Graphic Novel",
"GN", "FCBD");
private static readonly char[] LeadingZeroesTrimChars = new[] { '0' };
private static readonly char[] SpacesAndSeparators = { '\0', '\t', '\r', ' ', '-', ','};
public static MangaFormat ParseFormat(string filePath)
{
if (IsArchive(filePath)) return MangaFormat.Archive;
if (IsImage(filePath)) return MangaFormat.Image;
if (IsEpub(filePath)) return MangaFormat.Epub;
if (IsPdf(filePath)) return MangaFormat.Pdf;
return MangaFormat.Unknown;
}
public static string ParseEdition(string filePath)
{
filePath = ReplaceUnderscores(filePath);
var match = MangaEditionRegex.Match(filePath);
return match.Success ? match.Value : string.Empty;
}
///
/// If the file has SP marker.
///
///
///
public static bool HasSpecialMarker(string filePath)
{
return SpecialMarkerRegex.IsMatch(filePath);
}
public static bool IsMangaSpecial(string filePath)
{
filePath = ReplaceUnderscores(filePath);
return MangaSpecialRegex.IsMatch(filePath);
}
public static bool IsComicSpecial(string filePath)
{
filePath = ReplaceUnderscores(filePath);
return ComicSpecialRegex.IsMatch(filePath);
}
public static string ParseSeries(string filename)
{
foreach (var regex in MangaSeriesRegex)
{
var matches = regex.Matches(filename);
foreach (var group in matches.Select(match => match.Groups["Series"])
.Where(group => group.Success && group != Match.Empty))
{
return CleanTitle(group.Value);
}
}
return string.Empty;
}
public static string ParseComicSeries(string filename)
{
foreach (var regex in ComicSeriesRegex)
{
var matches = regex.Matches(filename);
foreach (var group in matches.Select(match => match.Groups["Series"])
.Where(group => group.Success && group != Match.Empty))
{
return CleanTitle(group.Value, true);
}
}
return string.Empty;
}
public static string ParseVolume(string filename)
{
foreach (var regex in MangaVolumeRegex)
{
var matches = regex.Matches(filename);
foreach (var group in matches.Select(match => match.Groups))
{
if (!group["Volume"].Success || group["Volume"] == Match.Empty) continue;
var value = group["Volume"].Value;
var hasPart = group["Part"].Success;
return FormatValue(value, hasPart);
}
}
return DefaultVolume;
}
public static string ParseComicVolume(string filename)
{
foreach (var regex in ComicVolumeRegex)
{
var matches = regex.Matches(filename);
foreach (var group in matches.Select(match => match.Groups))
{
if (!group["Volume"].Success || group["Volume"] == Match.Empty) continue;
var value = group["Volume"].Value;
var hasPart = group["Part"].Success;
return FormatValue(value, hasPart);
}
}
return DefaultVolume;
}
private static string FormatValue(string value, bool hasPart)
{
if (!value.Contains('-'))
{
return RemoveLeadingZeroes(hasPart ? AddChapterPart(value) : value);
}
var tokens = value.Split("-");
var from = RemoveLeadingZeroes(tokens[0]);
if (tokens.Length != 2) return from;
var to = RemoveLeadingZeroes(hasPart ? AddChapterPart(tokens[1]) : tokens[1]);
return $"{from}-{to}";
}
public static string ParseChapter(string filename)
{
foreach (var regex in MangaChapterRegex)
{
var matches = regex.Matches(filename);
foreach (var groups in matches.Select(match => match.Groups))
{
if (!groups["Chapter"].Success || groups["Chapter"] == Match.Empty) continue;
var value = groups["Chapter"].Value;
var hasPart = groups["Part"].Success;
return FormatValue(value, hasPart);
}
}
return DefaultChapter;
}
private static string AddChapterPart(string value)
{
if (value.Contains('.'))
{
return value;
}
return $"{value}.5";
}
public static string ParseComicChapter(string filename)
{
foreach (var regex in ComicChapterRegex)
{
var matches = regex.Matches(filename);
foreach (var groups in matches.Select(match => match.Groups))
{
if (!groups["Chapter"].Success || groups["Chapter"] == Match.Empty) continue;
var value = groups["Chapter"].Value;
var hasPart = groups["Part"].Success;
return FormatValue(value, hasPart);
}
}
return DefaultChapter;
}
private static string RemoveEditionTagHolders(string title)
{
title = CleanupRegex.Replace(title, string.Empty);
title = MangaEditionRegex.Replace(title, string.Empty);
return title;
}
private static string RemoveMangaSpecialTags(string title)
{
return MangaSpecialRegex.Replace(title, string.Empty);
}
private static string RemoveEuropeanTags(string title)
{
return EuropeanComicRegex.Replace(title, string.Empty);
}
private static string RemoveComicSpecialTags(string title)
{
return ComicSpecialRegex.Replace(title, string.Empty);
}
///
/// Translates _ -> spaces, trims front and back of string, removes release groups
///
/// Hippos_the_Great [Digital], -> Hippos the Great
///
///
///
///
///
public static string CleanTitle(string title, bool isComic = false)
{
title = ReplaceUnderscores(title);
title = RemoveEditionTagHolders(title);
if (isComic)
{
title = RemoveComicSpecialTags(title);
title = RemoveEuropeanTags(title);
}
else
{
title = RemoveMangaSpecialTags(title);
}
title = title.Trim(SpacesAndSeparators);
title = EmptySpaceRegex.Replace(title, " ");
return title;
}
///
/// Pads the start of a number string with 0's so ordering works fine if there are over 100 items.
/// Handles ranges (ie 4-8) -> (004-008).
///
///
/// A zero padded number
public static string PadZeros(string number)
{
if (!number.Contains('-')) return PerformPadding(number);
var tokens = number.Split("-");
return $"{PerformPadding(tokens[0])}-{PerformPadding(tokens[1])}";
}
private static string PerformPadding(string number)
{
var num = int.Parse(number);
return num switch
{
< 10 => "00" + num,
< 100 => "0" + num,
_ => number
};
}
public static string RemoveLeadingZeroes(string title)
{
var ret = title.TrimStart(LeadingZeroesTrimChars);
return string.IsNullOrEmpty(ret) ? "0" : ret;
}
public static bool IsArchive(string filePath)
{
return ArchiveFileRegex.IsMatch(Path.GetExtension(filePath));
}
public static bool IsComicInfoExtension(string filePath)
{
return ComicInfoArchiveRegex.IsMatch(Path.GetExtension(filePath));
}
public static bool IsBook(string filePath)
{
return BookFileRegex.IsMatch(Path.GetExtension(filePath));
}
public static bool IsImage(string filePath)
{
return !filePath.StartsWith(".") && ImageRegex.IsMatch(Path.GetExtension(filePath));
}
public static bool IsXml(string filePath)
{
return XmlRegex.IsMatch(Path.GetExtension(filePath));
}
public static float MinNumberFromRange(string range)
{
try
{
if (!Regex.IsMatch(range, @"^[\d\-.]+$"))
{
return (float) 0.0;
}
var tokens = range.Replace("_", string.Empty).Split("-");
return tokens.Min(float.Parse);
}
catch
{
return (float) 0.0;
}
}
public static float MaxNumberFromRange(string range)
{
try
{
if (!Regex.IsMatch(range, @"^[\d\-.]+$"))
{
return (float) 0.0;
}
var tokens = range.Replace("_", string.Empty).Split("-");
return tokens.Max(float.Parse);
}
catch
{
return (float) 0.0;
}
}
public static string Normalize(string name)
{
return NormalizeRegex.Replace(name, string.Empty).ToLower();
}
///
/// Responsible for preparing special title for rendering to the UI. Replaces _ with ' ' and strips out SP\d+
///
///
///
public static string CleanSpecialTitle(string name)
{
if (string.IsNullOrEmpty(name)) return name;
var cleaned = SpecialTokenRegex.Replace(name.Replace('_', ' '), string.Empty).Trim();
var lastIndex = cleaned.LastIndexOf('.');
if (lastIndex > 0)
{
cleaned = cleaned.Substring(0, cleaned.LastIndexOf('.')).Trim();
}
return string.IsNullOrEmpty(cleaned) ? name : cleaned;
}
///
/// Tests whether the file is a cover image such that: contains "cover", is named "folder", and is an image
///
/// If the path has "backcover" in it, it will be ignored
/// Filename with extension
///
public static bool IsCoverImage(string filename)
{
return IsImage(filename) && CoverImageRegex.IsMatch(filename);
}
///
/// Validates that a Path doesn't start with certain blacklisted folders, like __MACOSX, @Recently-Snapshot, etc and that if a full path, the filename
/// doesn't start with ._, which is a metadata file on MACOSX.
///
///
///
public static bool HasBlacklistedFolderInPath(string path)
{
return path.Contains("__MACOSX") || path.StartsWith("@Recently-Snapshot") || path.StartsWith("@recycle") || path.StartsWith("._") || Path.GetFileName(path).StartsWith("._") || path.Contains(".qpkg");
}
public static bool IsEpub(string filePath)
{
return Path.GetExtension(filePath).Equals(".epub", StringComparison.InvariantCultureIgnoreCase);
}
public static bool IsPdf(string filePath)
{
return Path.GetExtension(filePath).Equals(".pdf", StringComparison.InvariantCultureIgnoreCase);
}
///
/// Cleans an author's name
///
/// If the author is Last, First, this will not reverse
///
///
public static string CleanAuthor(string author)
{
return string.IsNullOrEmpty(author) ? string.Empty : author.Trim();
}
///
/// Normalizes the slashes in a path to be
///
/// /manga/1\1 -> /manga/1/1
///
///
public static string NormalizePath(string path)
{
return path.Replace(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar)
.Replace(@"//", Path.AltDirectorySeparatorChar + string.Empty);
}
///
/// Checks against a set of strings to validate if a ComicInfo.Format should receive special treatment
///
///
///
public static bool HasComicInfoSpecial(string comicInfoFormat)
{
return FormatTagSpecialKeywords.Contains(comicInfoFormat);
}
private static string ReplaceUnderscores(string name) => name?.Replace("_", " ");
}