mirror of
https://github.com/zoriya/Kyoo.git
synced 2025-11-29 09:45:08 -05:00
Chunk identify scans
This commit is contained in:
parent
37ec32b52d
commit
f7e801e574
@ -1,3 +1,5 @@
|
|||||||
|
import asyncio
|
||||||
|
import itertools
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
@ -111,30 +113,33 @@ class FsScanner:
|
|||||||
logger.error("Unexpected error while monitoring files.", exc_info=e)
|
logger.error("Unexpected error while monitoring files.", exc_info=e)
|
||||||
|
|
||||||
async def _register(self, videos: list[str] | set[str]):
|
async def _register(self, videos: list[str] | set[str]):
|
||||||
# TODO: we should probably chunk those
|
async def process(path: str):
|
||||||
vids: list[Video] = []
|
|
||||||
for path in list(videos):
|
|
||||||
try:
|
try:
|
||||||
vid = await identify(path)
|
vid = await identify(path)
|
||||||
vid = self._match(vid)
|
return self._match(vid)
|
||||||
vids.append(vid)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("Couldn't identify %s.", path, exc_info=e)
|
logger.error("Couldn't identify %s.", path, exc_info=e)
|
||||||
created = await self._client.create_videos(vids)
|
return None
|
||||||
|
|
||||||
await self._requests.enqueue(
|
for batch in itertools.batched(videos, 20):
|
||||||
[
|
vids = await asyncio.gather(*(process(path) for path in batch))
|
||||||
Request(
|
created = await self._client.create_videos(
|
||||||
kind=x.guess.kind,
|
[v for v in vids if v is not None]
|
||||||
title=x.guess.title,
|
)
|
||||||
year=next(iter(x.guess.years), None),
|
|
||||||
external_id=x.guess.external_id,
|
await self._requests.enqueue(
|
||||||
videos=[Request.Video(id=x.id, episodes=x.guess.episodes)],
|
[
|
||||||
)
|
Request(
|
||||||
for x in created
|
kind=x.guess.kind,
|
||||||
if not any(x.entries) and x.guess.kind != "extra"
|
title=x.guess.title,
|
||||||
]
|
year=next(iter(x.guess.years), None),
|
||||||
)
|
external_id=x.guess.external_id,
|
||||||
|
videos=[Request.Video(id=x.id, episodes=x.guess.episodes)],
|
||||||
|
)
|
||||||
|
for x in created
|
||||||
|
if not any(x.entries) and x.guess.kind != "extra"
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
def _match(self, video: Video) -> Video:
|
def _match(self, video: Video) -> Video:
|
||||||
video.for_ = []
|
video.for_ = []
|
||||||
|
|||||||
@ -1,15 +1,18 @@
|
|||||||
|
import os
|
||||||
from collections.abc import Awaitable
|
from collections.abc import Awaitable
|
||||||
from hashlib import sha256
|
from hashlib import sha256
|
||||||
from itertools import zip_longest
|
from itertools import zip_longest
|
||||||
from logging import getLogger
|
from logging import getLogger
|
||||||
from typing import Callable, Literal, cast
|
from typing import Callable, Literal, cast
|
||||||
|
|
||||||
|
from opentelemetry import trace
|
||||||
from rebulk.match import Match
|
from rebulk.match import Match
|
||||||
|
|
||||||
from ..models.videos import Guess, Video
|
from ..models.videos import Guess, Video
|
||||||
from .guess.guess import guessit
|
from .guess.guess import guessit
|
||||||
|
|
||||||
logger = getLogger(__name__)
|
logger = getLogger(__name__)
|
||||||
|
tracer = trace.get_tracer("kyoo.scanner")
|
||||||
|
|
||||||
pipeline: list[Callable[[str, Guess], Awaitable[Guess]]] = [
|
pipeline: list[Callable[[str, Guess], Awaitable[Guess]]] = [
|
||||||
# TODO: add nfo scanner
|
# TODO: add nfo scanner
|
||||||
@ -19,62 +22,66 @@ pipeline: list[Callable[[str, Guess], Awaitable[Guess]]] = [
|
|||||||
|
|
||||||
|
|
||||||
async def identify(path: str) -> Video:
|
async def identify(path: str) -> Video:
|
||||||
raw = guessit(path, expected_titles=[])
|
with tracer.start_as_current_span(f"identify {os.path.basename(path)}") as span:
|
||||||
|
span.set_attribute("video.path", path)
|
||||||
|
|
||||||
# guessit should only return one (according to the doc)
|
raw = guessit(path, expected_titles=[])
|
||||||
title = raw.get("title", [])[0]
|
|
||||||
kind = raw.get("type", [])[0]
|
|
||||||
version = next(iter(raw.get("version", [])), None)
|
|
||||||
# apparently guessit can return multiples but tbh idk what to do with
|
|
||||||
# multiples part. we'll just ignore them for now
|
|
||||||
part = next(iter(raw.get("part", [])), None)
|
|
||||||
|
|
||||||
years = raw.get("year", [])
|
# guessit should only return one (according to the doc)
|
||||||
seasons = raw.get("season", [])
|
title = raw.get("title", [])[0]
|
||||||
episodes = raw.get("episode", [])
|
kind = raw.get("type", [])[0]
|
||||||
|
version = next(iter(raw.get("version", [])), None)
|
||||||
|
# apparently guessit can return multiples but tbh idk what to do with
|
||||||
|
# multiples part. we'll just ignore them for now
|
||||||
|
part = next(iter(raw.get("part", [])), None)
|
||||||
|
|
||||||
# just strip the version & part number from the path
|
years = raw.get("year", [])
|
||||||
rendering_path = "".join(
|
seasons = raw.get("season", [])
|
||||||
c
|
episodes = raw.get("episode", [])
|
||||||
for i, c in enumerate(path)
|
|
||||||
if not (version and version.start <= i < version.end)
|
|
||||||
and not (part and part.start <= i < part.end)
|
|
||||||
)
|
|
||||||
|
|
||||||
guess = Guess(
|
# just strip the version & part number from the path
|
||||||
title=cast(str, title.value),
|
rendering_path = "".join(
|
||||||
kind=cast(Literal["episode", "movie"], kind.value),
|
c
|
||||||
extra_kind=None,
|
for i, c in enumerate(path)
|
||||||
years=[cast(int, y.value) for y in years],
|
if not (version and version.start <= i < version.end)
|
||||||
episodes=[
|
and not (part and part.start <= i < part.end)
|
||||||
Guess.Episode(season=cast(int, s.value), episode=cast(int, e.value))
|
)
|
||||||
for s, e in zip_longest(
|
|
||||||
seasons,
|
|
||||||
episodes,
|
|
||||||
fillvalue=seasons[-1] if any(seasons) else Match(0, 0, value=1),
|
|
||||||
)
|
|
||||||
],
|
|
||||||
external_id={},
|
|
||||||
from_="guessit",
|
|
||||||
raw={
|
|
||||||
k: [x.value if x.value is int else str(x.value) for x in v]
|
|
||||||
for k, v in raw.items()
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
for step in pipeline:
|
guess = Guess(
|
||||||
try:
|
title=cast(str, title.value),
|
||||||
guess = await step(path, guess)
|
kind=cast(Literal["episode", "movie"], kind.value),
|
||||||
except Exception as e:
|
extra_kind=None,
|
||||||
logger.error("Couldn't run %s.", step.__name__, exc_info=e)
|
years=[cast(int, y.value) for y in years],
|
||||||
|
episodes=[
|
||||||
|
Guess.Episode(season=cast(int, s.value), episode=cast(int, e.value))
|
||||||
|
for s, e in zip_longest(
|
||||||
|
seasons,
|
||||||
|
episodes,
|
||||||
|
fillvalue=seasons[-1] if any(seasons) else Match(0, 0, value=1),
|
||||||
|
)
|
||||||
|
],
|
||||||
|
external_id={},
|
||||||
|
from_="guessit",
|
||||||
|
raw={
|
||||||
|
k: [x.value if x.value is int else str(x.value) for x in v]
|
||||||
|
for k, v in raw.items()
|
||||||
|
},
|
||||||
|
)
|
||||||
|
span.set_attribute("video.name", guess.title)
|
||||||
|
|
||||||
return Video(
|
for step in pipeline:
|
||||||
path=path,
|
try:
|
||||||
rendering=sha256(rendering_path.encode()).hexdigest(),
|
guess = await step(path, guess)
|
||||||
part=cast(int, part.value) if part else None,
|
except Exception as e:
|
||||||
version=cast(int, version.value) if version else 1,
|
logger.error("Couldn't run %s.", step.__name__, exc_info=e)
|
||||||
guess=guess,
|
|
||||||
)
|
return Video(
|
||||||
|
path=path,
|
||||||
|
rendering=sha256(rendering_path.encode()).hexdigest(),
|
||||||
|
part=cast(int, part.value) if part else None,
|
||||||
|
version=cast(int, version.value) if version else 1,
|
||||||
|
guess=guess,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user