From 3e031ab4580f4f2cad5fb3b576b38e01fd41f927 Mon Sep 17 00:00:00 2001 From: Joseph Milazzo Date: Sun, 28 Mar 2021 18:00:05 -0500 Subject: [PATCH] Lots of Parsing Enhancements (#120) * More cases for parsing regex * Implemented the ability to parse "Special" keywords. * Commented out some unit tests * More parsing cases * Fixed unit tests * Fixed typo in build script --- API.Tests/ParserTest.cs | 25 ++++++++ API/Data/kavita.db | Bin 0 -> 188416 bytes API/Parser/Parser.cs | 85 +++++++++++++++++++++++---- API/Parser/ParserInfo.cs | 5 ++ API/Services/Tasks/ScannerService.cs | 22 ++++--- API/Startup.cs | 2 +- build.sh | 4 +- 7 files changed, 122 insertions(+), 21 deletions(-) create mode 100644 API/Data/kavita.db diff --git a/API.Tests/ParserTest.cs b/API.Tests/ParserTest.cs index 76a457177..1380d5ab5 100644 --- a/API.Tests/ParserTest.cs +++ b/API.Tests/ParserTest.cs @@ -53,6 +53,8 @@ namespace API.Tests [InlineData("Kodomo no Jikan vol. 1.cbz", "1")] [InlineData("Kodomo no Jikan vol. 10.cbz", "10")] [InlineData("Kedouin Makoto - Corpse Party Musume, Chapter 12 [Dametrans][v2]", "0")] + [InlineData("Vagabond_v03", "3")] + [InlineData("Mujaki No Rakune Volume 10.cbz", "10")] public void ParseVolumeTest(string filename, string expected) { Assert.Equal(expected, ParseVolume(filename)); @@ -105,6 +107,11 @@ namespace API.Tests [InlineData("Goblin Slayer Side Story - Year One 025.5", "Goblin Slayer Side Story - Year One")] [InlineData("Goblin Slayer - Brand New Day 006.5 (2019) (Digital) (danke-Empire)", "Goblin Slayer - Brand New Day")] [InlineData("Kedouin Makoto - Corpse Party Musume, Chapter 01 [Dametrans][v2]", "Kedouin Makoto - Corpse Party Musume")] + [InlineData("Vagabond_v03", "Vagabond")] + [InlineData("[AN] Mahoutsukai to Deshi no Futekisetsu na Kankei Chp. 1", "Mahoutsukai to Deshi no Futekisetsu na Kankei")] + [InlineData("Beelzebub_Side_Story_02_RHS.zip", "Beelzebub Side Story")] + [InlineData("Corpse Party -The Anthology- Sachikos game of love Hysteric Birthday 2U Chapter 01", "Corpse Party -The Anthology- Sachikos game of love Hysteric Birthday 2U")] + [InlineData("[BAA]_Darker_than_Black_Omake-1.zip", "Darker than Black")] public void ParseSeriesTest(string filename, string expected) { Assert.Equal(expected, ParseSeries(filename)); @@ -146,6 +153,10 @@ namespace API.Tests [InlineData("VanDread-v01-c001[MD].zip", "1")] [InlineData("Goblin Slayer Side Story - Year One 025.5", "25.5")] [InlineData("Kedouin Makoto - Corpse Party Musume, Chapter 01", "1")] + [InlineData("To Love Ru v11 Uncensored (Ch.089-097+Omake)", "89-97")] + [InlineData("To Love Ru v18 Uncensored (Ch.153-162.5)", "153-162.5")] + [InlineData("[AN] Mahoutsukai to Deshi no Futekisetsu na Kankei Chp. 1", "1")] + [InlineData("Beelzebub_Side_Story_02_RHS.zip", "2")] public void ParseChaptersTest(string filename, string expected) { Assert.Equal(expected, ParseChapter(filename)); @@ -197,10 +208,23 @@ namespace API.Tests [InlineData("Tenjou Tenge Omnibus", "Omnibus")] [InlineData("Tenjou Tenge {Full Contact Edition}", "Full Contact Edition")] [InlineData("Tenjo Tenge {Full Contact Edition} v01 (2011) (Digital) (ASTC).cbz", "Full Contact Edition")] + [InlineData("Wotakoi - Love is Hard for Otaku Omnibus v01 (2018) (Digital) (danke-Empire)", "Omnibus")] + [InlineData("To Love Ru v01 Uncensored (Ch.001-007)", "Uncensored")] + [InlineData("Chobits Omnibus Edition v01 [Dark Horse]", "Omnibus Edition")] public void ParseEditionTest(string input, string expected) { Assert.Equal(expected, ParseEdition(input)); } + [Theory] + [InlineData("Beelzebub Special OneShot - Minna no Kochikame x Beelzebub (2016) [Mangastream].cbz", true)] + [InlineData("Beelzebub_Omake_June_2012_RHS", true)] + [InlineData("Beelzebub_Side_Story_02_RHS.zip", false)] + [InlineData("Darker than Black Shikkoku no Hana Special [Simple Scans].zip", true)] + [InlineData("Darker than Black Shikkoku no Hana Fanbook Extra [Simple Scans].zip", true)] + public void ParseMangaSpecialTest(string input, bool expected) + { + Assert.Equal(expected, ParseMangaSpecial(input) != ""); + } [Theory] [InlineData("12-14", 12)] @@ -236,6 +260,7 @@ namespace API.Tests [InlineData("Scott Pilgrim 01 - Scott Pilgrim's Precious Little Life (2004)", "Scott Pilgrim")] [InlineData("Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)", "Teen Titans")] [InlineData("Scott Pilgrim 02 - Scott Pilgrim vs. The World (2005)", "Scott Pilgrim")] + [InlineData("Wolverine - Origins 003 (2006) (digital) (Minutemen-PhD)", "Wolverine - Origins")] public void ParseComicSeriesTest(string filename, string expected) { Assert.Equal(expected, ParseComicSeries(filename)); diff --git a/API/Data/kavita.db b/API/Data/kavita.db new file mode 100644 index 0000000000000000000000000000000000000000..c946be3fb4ca693f60d7a24fb138e9f95c1f8081 GIT binary patch literal 188416 zcmeI5U2GfKb;miPCF+Z!y}MpUtKIczv{{L3%O*w1lCw=6iW+$tQ6x=LhMjo9jL5M( zS(2lj8QQC)NkD7Y8)Q+WXn`U?P~@RS(gJ-b&^{FC7t$a_+P?H5eJN0&K#>+G(4qx` z6hVQa_s$3BgF{l@tYmxpCv1^3_uO;O{hf2q{TNd&UC&CI$lj5ahM=*@!0EuyP~bBx z8wdnu$={3Qul4#4`Ebg*An%OhbFYuHfs5CEcbJq8MSezQJ{kF+$j`>UF}6MWm!U6@ z{PxJD;a?8^G4uW4edZnJonA^lj`y!VI}$A|G6AVs6W>wat`jFzgq9}j*OipIpA)qm zRaDCITVhkKB)oUd5GbP*%wh6$D7v`FJe<~q-MVPYW#v7osd}%c(nUU1=Gk&;JuGUvlzP2}+K1D#qq>>;|9Z3G{TT zlum8%;qlnjGe>j3(L<;EjXROUU_^KOdsjSePuB&h;c&ZgJ=)9dMzQU7KR2*WSS|-x zwVe(~?UeJG9BFTx&y}r6u@Ls~FsCajAME=@hhBC=sJDCyotC-WXjpdKZfrNFLeaT7 z=5Ws8g`!*+O$X>7Lw@Y_k`II3tT>Q(kD->MiM}p{FR>G}p{P6B*C>o0~?@t)H#3k*P9dn0;yU-2|^FHZ!&#BcMXNLVp zoqt`Hd1qfoqh#ted|=}7j}y1`R*bmW`$dbX)HVGMjF%&sK!ZoEI6C5qR!50DT587qVVA2tg$qrxp1MG4#vYyy>J!lidq(hd){l;r zfO^WM%dz&O)|yK?zgY3k|2iMH-+NB_J(6NUP}TdgQhP&CNhVp(=GWZ|OS8Bn)HR>t zv?2RIMR3dk>1DeE$Xs0ZI0e36GPoPF0Dc)J;LE zi?y`eY7!&837)L^Hd8y$~TB2t21<2Y~CWZ?np|5cC@edtXzFdZfSgz zP9sMa?%*?8v=;_G=GcYQ_gl<&)% zLRFKMBXl*E85)Vcx!NlkS1SDGwzQ`R8u7CFhNKd$2bF~H?O^*R6N;{`G7s|l=<*fv zy*cC_alSJ4*z4Fs2cavpy(6%PBFEg`nW`1J)~afqa5mJEedDuN$&Y8I!{q0=@gD^u z{}lO?$TuTWgpZt^`pMLvPJMGqocg7yXT$#<{_F7Xgtc%X%ufDv@&}W@IVnyiC;oTh z#}mIl@wJH?6WsXENp-wH00ck)1V8`;KmY_l;6w>rIv4zWbSb`+h$oklONr!4e6>P1 zg4=RUx+96Td~;i9?g^V@$EL1aj9*O5zZ~4MDl9D}mR6URD;<%tTp(wBhTzgmLBSME ztR#}l@ujs&Nfeao-3)zplWNw;^MyTy><2fY@o4bPPNl?hGP$g)CpO=Z>#c@p)+2AtJ;U%z&s)w(5Y&Y9%8JLJf+97@ z*|BBU`Lh-lTH*4_TKtNqf>Zl*w)VtQVr7MN!rk3e?Tal{Yluxv@4#!%2CrKkNF>(c z@s$Mij#zWo(q%6`6TD^0Ca#dUT1&24CqqSorMXA^Uli|2;(f#O+?im?l3z`%C6<>G z)EENCj;M>LZJk6Wz8t??DT+dkoC9ku)ri=t!CbdtE)q;zI*GpJ6(Udf;a1$~Lfn+y zaDqhA+H$;N_``1am2i+Z6~(VC#aH6VWDej4pY8tmAkxmRFO> zrORg9#_2*)yrUlg810YR?b9e(NnWn-O^uu#l%gnKmyeSyeQu%_i2O_BuOi=xd^J*uEJw!3 z3BaFCeQQdaDokCPIvxJc@DIb^4SzXY4ljfQlRuvP-sJts;^d{t(8NDa{L#d(k?MGX z00@8p2!H?xfB*=900@`_&Idow9Pw!4++1+WlI?XW@$$u>V9FfjK;os2%HC%YFU|)` zhRXiO5HnmbW6AY6fq3DS;I<{w^YCH%Qm|?W9qruV6BmMScH{>gIYc|W=~k=%X~XlK zj#Js*2MuQ(ItQIGJojqwx@ByiKMVMlPyjk7<03qB7w%%t4lMb0)#|9IfDC%=!Fm5}q?_t51qrqP11fw>? zK}Q53QO!m^i7Z&%wa3iIW!lo(Y^XLy7L*`y2-ZO|MchzmtHW>EM+y|3d(} z|0n>KyTwM__ zSC@BJ6L*q#lFV=*8r{+VZ;RB%b|xn*C&jf|Tv#N(4)V_8ax%WMxOQi?wkQhK-CFJP zt`JX(bpLOD|35@NjhtvF(HIDT00@8p2!H?xfB*=900@8p2!Oze5Wv6xKM~Am1Oz|; z1V8`;KmY_l00ck)1V8`;9xDOt{~s%UG!6nF00JNY0w4eaAOHd&00JNY0w+QM`~MTc zj7C5J1V8`;KmY_l00ck)1V8`;K;W?waD4y&vD$&gK>!3m00ck)1V8`;KmY_l00ck) z1dfjY_W#Gn7Mei-1V8`;KmY_l00ck)1V8`;K;TIsfc^iI;tALR0w4eaAOHd&00JNY z0w4eaAOHf#M*!ddKR&k53<4kk0w4eaAOHd&00JNY0w4eaPYMA%|9?_E0Xskd1V8`; zKmY_l00ck)1V8`;K;ZZY;Q9aYv4v(3009sH0T2KI5C8!X009sH0T6gn2+-&MrvrZ% zh(-p&v6f@9pVykx!L*Hj~@nZ?atPiX5BIv7EO$$1ZS&VlMXS z=SQN~&ocqt0`={>q=^-wrOEoWY0r)-DpX#rBn(xZ8_z%Z+C(TiJIlP6(S+T)xGC3b zqEZmFyY{C^)7WxqJqUy{4F-MQ_g2{q>XJpSH55>%u2h8pd9$=wM3|q99!mZ8qHl`xvZdS zrK-?uio_hFz_Mk7-%Raf%j|qS9#1Ulf7p2ZQ~IBIQ=`+~O{cvT(;DlYTD6%k@|kNn zy$1`NZElP$@|%2-&!zbivB>BK^%toWqt4vmvpn&9I#o)iHu&&(?7-_LI@kd$_1?^FN zVYqQyu1R;KN30t=ym>YhJ%65gXrT1Oy65t&8?C3*j%_+t`o*C+5W5igL=_nYUClV1 zvcQ>*5qQ@4_EAHE)1$_bcdV4-4N*&&6D}tkcOr+$)4G!%PJ5hel-uST*HgWmY!us0 z)~^RR(JI~&4|=%SlGu>c{km|_V_F*!P|k7a^1NlUGeeN|fh+uozbzxEnFHN!=yc@# zV{!XoFw&i2DF$bTwXa8edF{hzSX;S!e83QS`UG=+^n_~MQm0NN}ABvx5gz|Taqb7 z35TMu&ob?^))va%C)n$4``(Dzb$jPzm)Y{hw#hD`OAB)uOxNLsQzLYlcuPlY?wM5a z?G`an^jr?RP^ z$%LY-tIUJEZl14@@692XshqEjolHCS&=rt-v*bx`Jrp@IoHc`ztX6APwHss&NIL3K zOMD$3-VTHVe-McLdSq(q8&eaLFHHR7L~&Fe4TruR42M=p(G&H0_j6Z6(My+@uUC!1 zxW7*m0qNzQBC4vWro1=i-7e@VxA7lo-RV_-gh6UH>8HNqC9=O$i=yDOXSYO3q9TcE zKP5NhdaFTBHoCWdbmMnd*w_4bO=SOT?8M_#?u0jpX7`-&P4`k>%&r{o0P5b(*aaVM zNAvEdS3=S0Y3A!YdT<%Yji0Z$!^pV#Xt86}ag0*`lNRTme?6OD*UxUOIPiCnRZ7oW zdTi7?B7@IyddwPmMIuXfq28moBr964qE@3p&W^0~Lys`Xf{O-ycd)v`vG+_y4khGj zze&y$QU-l%KC<5FXmQ7oV@`7I%R5Ilea?*n0p{Oma(eg`Bq#57?H5l@#wyC=*2k17 zb}xsb$!VrNY%P)8;gzWw*Pk%6hP^cM${EWdf z6NG+>EIA_6b?Rk@NH2SBiB1r8t0@SodS6y*ZwRXE>44ei!dZ6E0NHSJ4 z<_uy^J^EtKU2@7jw))mIhnq{Yoj;6~V(j&EBhh@U*A~d#IC&1`Qg-gJ`En>4i!tv< zos*f_3AgOao&oDpWRF?1Qr|_ap}+r1*Et2ylO9iV&XK;rIT|Hv4yXEFhdeHIehW{% z1MXFWqm>V_^fW`y*)Mw$><*g)XZZN9T}%l3F zef{oB-9hVH%{xC>XCs}bDsARP-zT@UCG)tAezcz4+P7cqV}Yxp*94=Gjv7NT7i%*! zJ#8=!(aZ-a_S9nBp6O$QtD@J2&e2@QhWZP}v_GS}fnxGZ`#QN7%m&@lzDwHKREM^m zvGt{T4B!8MqK-FU3VfB*=900@8p z2!H?xfB*=9z!OCPzyI%vatDk70T2KI5C8!X009sH0T2KI5CDN=A%Ok=v0#N#5C8!X z009sH0T2KI5C8!X009tqq6lFB|3tY1#()3_fB*=900@8p2!H?xfB*=9z_Ad({{L98 zLMaG<00@8p2!H?xfB*=900@8p2s}{)=>C5w@*jc7f0G}0fdB}A00@8p2!H?xfB*=9 z00@8p2z(3(j0Vp#g)F~5G#ZSKr8c%RIXwUW7`PQ(009sH0T2KI5C8!X009sH0T2Lz z0R*uB9{>o&AOHd&00JNY0w4eaAOHd&00JQJF(82b|Hr_I=mH3U00@8p2!H?xfB*=9 z00@8p2n-+)rsbyx2%#7RKmY_l00ck)1V8`;KmY_l00cnbsV0Eu|4+61U_1zb00@8p z2!H?xfB*=900@8p2pow3_Wwu11R)Rr0T2KI5C8!X009sH0T2KI5O}Hy;NSm0)$W7w zAOHd&00JNY0w4eaAOHd&00JOzBm&s~9|;qLKmY_l00ck)1V8`;KmY_l00cnbsV3mq z|FchZM_@b%fB*=900@8p2!H?xfB*=900@9UPXdnpKmAnzV?6~?00JNY0w4eaAOHd& z00JNY0w4eaAn?=@2#>4WTY&`xc{m;Cq+S%nzN0%#RhpcznvCVvu&s@vt5EeMQ$1%3ZZ}LSx zm*z{Xb)TT7kYj8<$8PXho_HgjDy35!e0V(8zF7)I?@u!i7NurQd?$0WQW6zOR4X~5 zAy#sfMqT=nSko`Fa@Bby?J9zDkf|9pE|{L)$z`tZ@WiDX{7vHB52KKyrgDZ_YCUb7 zyTEd8bLbtHMfwfPC@$8XE{39838tO4x|Z7C-%&-Spon)wMQm0{H_cm_T6fv`ULEQd zwbdA=cT3Q2blT!#?+#rLMH30;{Y(0|>6Y)l^QwC!bqhS&kn*cPaBLMtp(Zu=HY7!? zYLeXa83^VmEYV15?&bIUi_zg!+OJF6Ai+1Jnn+vEH#bC0km`P8&}`c5S)T;p8EV}q zDk*h8Cu%fU93kQzcXmVWap$PqckDIuq3D%!O#6%#pPe*kkM+*23Df=dxat(y(nj=k z3TW$&E>PPa!UgjJG_<&MQ3K1hh)r<>DiPq-(GVk6kCBgj{F9YcVg~T z8k{=F(m=H4MKZ-(6Kby2*cBC@g0vzETGv!+RqHd`S|Vxr9$5x7NQU9q^=y7!Us+g% zz2a^;wIpg<`*Vd*baj;(xjv#V0^6S@*Yoqt17ob)f+RV+E-t#G!Bs{qGr4Su2i-Z} z8x)?pj?l2^7v=jmNNVjngWM46EoZ=x#mZMVLebPb)1I})jG2Lpf=0SyPr|O-v91B+ zDzcXc|LWJW+q>+6k)#vzG-$-cv+X^<7h1MbWN?`_y*DqpjW>llz|uMW?5k_g~jz(y-b1 zx!{h%PIn$5zgu+%#$DfJ`wORXeFMswC8X@}0Gv zi~COo1~h9UWw+wqwRP^h=K|w)-?_l?{Qtx@`e+CQKmY_l00ck)1V8`;KmY_l00cnb zWC>vZf3m#M8VG;@2!H?xfB*=900@8p2!H?xJPrcb|341uXcGiL00ck)1V8`;KmY_l z00ck)1WuL!{iT0_(|S_1(P009sH0T2KI5C8!X009sH0T4J@0(ky^vb@n62!H?x zfB*=900@8p2!H?xfB*B_5Bj4-W@M+oANQZttj~qTc?jEZ*LbtF4CE)YRJrMgF2#B_Fngdy*#P zl|A#*?NlLidsnEw)!J8Yj}H-T)AvR3t@^=OfP9YD1gU=T8HTD3ekj#dp?X)m{eK4e BhbjO7 literal 0 HcmV?d00001 diff --git a/API/Parser/Parser.cs b/API/Parser/Parser.cs index c089a688e..569ba93d5 100644 --- a/API/Parser/Parser.cs +++ b/API/Parser/Parser.cs @@ -14,8 +14,7 @@ namespace API.Parser private static readonly Regex ImageRegex = new Regex(ImageFileExtensions, RegexOptions.IgnoreCase | RegexOptions.Compiled); private static readonly Regex MangaFileRegex = new Regex(MangaFileExtensions, RegexOptions.IgnoreCase | RegexOptions.Compiled); private static readonly Regex XmlRegex = new Regex(XmlRegexExtensions, RegexOptions.IgnoreCase | RegexOptions.Compiled); - - //?: is a non-capturing group in C#, else anything in () will be a group + private static readonly Regex[] MangaVolumeRegex = new[] { // Dance in the Vampire Bund v16-17 @@ -32,11 +31,11 @@ namespace API.Parser RegexOptions.IgnoreCase | RegexOptions.Compiled), // Killing Bites Vol. 0001 Ch. 0001 - Galactica Scanlations (gb) new Regex( - @"(vol\.? ?)(?0*[1-9]+)", + @"(vol\.? ?)(?\d+)", RegexOptions.IgnoreCase | RegexOptions.Compiled), // Tonikaku Cawaii [Volume 11].cbz new Regex( - @"(volume )(?0?[1-9]+)", + @"(volume )(?\d+)", RegexOptions.IgnoreCase | RegexOptions.Compiled), // Tower Of God S01 014 (CBT) (digital).cbz @@ -101,13 +100,21 @@ namespace API.Parser new Regex( @"(?.*)(_)(v|vo|c|volume)( |_)\d+", RegexOptions.IgnoreCase | RegexOptions.Compiled), + // Mahoutsukai to Deshi no Futekisetsu na Kankei Chp. 1 + new Regex( + @"(?.*)( |_)(?:Chp.? ?\d+)", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + // Corpse Party -The Anthology- Sachikos game of love Hysteric Birthday 2U Chapter 01 + new Regex( + @"^(?!Vol)(?.*)( |_)Chapter( |_)(\d+)", // TODO: This is breaking a ton of cases + RegexOptions.IgnoreCase | RegexOptions.Compiled), // Akiiro Bousou Biyori - 01.jpg, Beelzebub_172_RHS.zip, Cynthia the Mission 29.rar new Regex( - @"^(?!Vol)(?.*)( |_)(\d+)", + @"^(?!Vol)(?.*)( |_|-)(\d+)", RegexOptions.IgnoreCase | RegexOptions.Compiled), // [BAA]_Darker_than_Black_c1 (This is very greedy, make sure it's close to last) new Regex( - @"(?.*)( |_)(c)\d+", + @"(?.*)( |_|-)(c)\d+", RegexOptions.IgnoreCase | RegexOptions.Compiled), }; @@ -223,8 +230,9 @@ namespace API.Parser private static readonly Regex[] MangaChapterRegex = new[] { + // Historys Strongest Disciple Kenichi_v11_c90-98.zip, ...c90.5-100.5 new Regex( - @"(c|ch)(\.? ?)(?\d+(?:.\d+|-\d+)?)", + @"(c|ch)(\.? ?)(?(\d+(\.\d)?)-?(\d+(\.\d)?)?)", RegexOptions.IgnoreCase | RegexOptions.Compiled), // [Suihei Kiki]_Kasumi_Otoko_no_Ko_[Taruby]_v1.1.zip new Regex( @@ -251,13 +259,17 @@ namespace API.Parser }; private static readonly Regex[] MangaEditionRegex = { - //Tenjo Tenge {Full Contact Edition} v01 (2011) (Digital) (ASTC).cbz + // Tenjo Tenge {Full Contact Edition} v01 (2011) (Digital) (ASTC).cbz new Regex( @"(?({|\(|\[).* Edition(}|\)|\]))", RegexOptions.IgnoreCase | RegexOptions.Compiled), - //Tenjo Tenge {Full Contact Edition} v01 (2011) (Digital) (ASTC).cbz + // Tenjo Tenge {Full Contact Edition} v01 (2011) (Digital) (ASTC).cbz new Regex( - @"(\b|_)(?Omnibus)(\b|_)", + @"(\b|_)(?Omnibus(( |_)?Edition)?)(\b|_)?", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + // To Love Ru v01 Uncensored (Ch.001-007) + new Regex( + @"(\b|_)(?Uncensored)(\b|_)", RegexOptions.IgnoreCase | RegexOptions.Compiled), }; @@ -277,6 +289,14 @@ namespace API.Parser RegexOptions.IgnoreCase | RegexOptions.Compiled), }; + private static readonly Regex[] MangaSpecialRegex = + { + // All Keywords, does not account for checking if contains volume/chapter identification. Parser.Parse() will handle. + new Regex( + @"(?Special|OneShot|One\-Shot|Omake|Extra)", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + }; + /// /// Parses information out of a file path. Will fallback to using directory name if Series couldn't be parsed @@ -314,6 +334,13 @@ namespace API.Parser ret.Series = CleanTitle(ret.Series.Replace(edition, "")); ret.Edition = edition; } + + var isSpecial = ParseMangaSpecial(fileName); + if (ret.Chapters == "0" && ret.Volumes == "0" && !string.IsNullOrEmpty(isSpecial)) + { + ret.IsSpecial = true; + } + return ret.Series == string.Empty ? null : ret; @@ -346,6 +373,23 @@ namespace API.Parser return string.Empty; } + public static string ParseMangaSpecial(string filePath) + { + foreach (var regex in MangaSpecialRegex) + { + var matches = regex.Matches(filePath); + foreach (Match match in matches) + { + if (match.Groups["Special"].Success && match.Groups["Special"].Value != string.Empty) + { + return match.Groups["Special"].Value; + } + } + } + + return string.Empty; + } + public static string ParseSeries(string filename) { foreach (var regex in MangaSeriesRegex) @@ -496,6 +540,25 @@ namespace API.Parser return title; } + private static string RemoveSpecialTags(string title) + { + foreach (var regex in MangaSpecialRegex) + { + var matches = regex.Matches(title); + foreach (Match match in matches) + { + if (match.Success) + { + title = title.Replace(match.Value, ""); + } + } + } + + return title; + } + + + /// /// Translates _ -> spaces, trims front and back of string, removes release groups /// @@ -507,6 +570,8 @@ namespace API.Parser title = RemoveEditionTagHolders(title); + title = RemoveSpecialTags(title); + title = title.Replace("_", " ").Trim(); if (title.EndsWith("-")) { diff --git a/API/Parser/ParserInfo.cs b/API/Parser/ParserInfo.cs index ee92ddd9f..4b7d5985e 100644 --- a/API/Parser/ParserInfo.cs +++ b/API/Parser/ParserInfo.cs @@ -24,5 +24,10 @@ namespace API.Parser /// This can potentially story things like "Omnibus, Color, Full Contact Edition, Extra, Final, etc" /// public string Edition { get; set; } = ""; + + /// + /// If the file contains no volume/chapter information and contains Special Keywords + /// + public bool IsSpecial { get; set; } = false; } } \ No newline at end of file diff --git a/API/Services/Tasks/ScannerService.cs b/API/Services/Tasks/ScannerService.cs index 4be3d749d..e8101cc75 100644 --- a/API/Services/Tasks/ScannerService.cs +++ b/API/Services/Tasks/ScannerService.cs @@ -49,15 +49,15 @@ namespace API.Services.Tasks { // NOTE: This solution isn't the best, but it has potential. We need to handle a few other cases so it works great. return false; - - // if (/*_environment.IsProduction() && */!_forceUpdate && Directory.GetLastWriteTime(folder.Path) < folder.LastScanned) + + // if (!_forceUpdate && Directory.GetLastWriteTime(folder.Path) < folder.LastScanned) // { - // _logger.LogDebug($"{folder.Path} hasn't been updated since last scan. Skipping."); + // _logger.LogDebug("{FolderPath} hasn't been modified since last scan. Skipping", folder.Path); // skippedFolders += 1; // return true; // } - // - // return false; + + //return false; } private void Cleanup() @@ -134,7 +134,6 @@ namespace API.Services.Tasks if (Task.Run(() => _unitOfWork.Complete()).Result) { - _logger.LogInformation("Scan completed on {LibraryName}. Parsed {ParsedSeriesCount} series in {ElapsedScanTime} ms", library.Name, series.Keys.Count, sw.ElapsedMilliseconds); } else @@ -149,6 +148,13 @@ namespace API.Services.Tasks { if (parsedSeries == null) throw new ArgumentNullException(nameof(parsedSeries)); + // For all parsedSeries, any infos that contain same series name and IsSpecial is true are combined + // foreach (var series in parsedSeries) + // { + // var seriesName = series.Key; + // if (parsedSeries.ContainsKey(seriesName)) + // } + // First, remove any series that are not in parsedSeries list var foundSeries = parsedSeries.Select(s => Parser.Parser.Normalize(s.Key)).ToList(); var missingSeries = library.Series.Where(existingSeries => @@ -222,7 +228,7 @@ namespace API.Services.Tasks series.Volumes.Add(volume); } - volume.IsSpecial = volume.Number == 0 && infos.All(p => p.Chapters == "0"); + volume.IsSpecial = volume.Number == 0 && infos.All(p => p.Chapters == "0" || p.IsSpecial); _logger.LogDebug("Parsing {SeriesName} - Volume {VolumeNumber}", series.Name, volume.Name); UpdateChapters(volume, infos); volume.Pages = volume.Chapters.Sum(c => c.Pages); @@ -314,7 +320,7 @@ namespace API.Services.Tasks private void TrackSeries(ParserInfo info) { if (info.Series == string.Empty) return; - + _scannedSeries.AddOrUpdate(info.Series, new List() {info}, (_, oldValue) => { oldValue ??= new List(); diff --git a/API/Startup.cs b/API/Startup.cs index 081249137..736d712f6 100644 --- a/API/Startup.cs +++ b/API/Startup.cs @@ -136,7 +136,7 @@ namespace API applicationLifetime.ApplicationStopping.Register(OnShutdown); applicationLifetime.ApplicationStarted.Register(() => { - Console.WriteLine("Kavita - v0.3.5"); + Console.WriteLine("Kavita - v0.3.6"); }); } diff --git a/build.sh b/build.sh index a8d59b9d1..de97ff72f 100644 --- a/build.sh +++ b/build.sh @@ -65,8 +65,8 @@ Package() # TODO: Use no-restore? Because Build should have already done it for us echo "Building" cd API - echo dotnet publish -c release --self-contained --runtime $runtime -o "$lOutputFolder" --framework $framework - dotnet publish -c release --self-contained --runtime $runtime -o "$lOutputFolder" --framework $framework + echo dotnet publish -c Release --self-contained --runtime $runtime -o "$lOutputFolder" --framework $framework + dotnet publish -c Release --self-contained --runtime $runtime -o "$lOutputFolder" --framework $framework echo "Copying Install information" cp ../INSTALL.txt "$lOutputFolder"/README.txt