Chunk identify scans

This commit is contained in:
Zoe Roux 2025-11-20 09:44:00 +01:00
parent 37ec32b52d
commit f7e801e574
No known key found for this signature in database
2 changed files with 81 additions and 69 deletions

View File

@ -1,3 +1,5 @@
import asyncio
import itertools
import os
import re
from contextlib import asynccontextmanager
@ -111,16 +113,19 @@ class FsScanner:
logger.error("Unexpected error while monitoring files.", exc_info=e)
async def _register(self, videos: list[str] | set[str]):
# TODO: we should probably chunk those
vids: list[Video] = []
for path in list(videos):
async def process(path: str):
try:
vid = await identify(path)
vid = self._match(vid)
vids.append(vid)
return self._match(vid)
except Exception as e:
logger.error("Couldn't identify %s.", path, exc_info=e)
created = await self._client.create_videos(vids)
return None
for batch in itertools.batched(videos, 20):
vids = await asyncio.gather(*(process(path) for path in batch))
created = await self._client.create_videos(
[v for v in vids if v is not None]
)
await self._requests.enqueue(
[

View File

@ -1,15 +1,18 @@
import os
from collections.abc import Awaitable
from hashlib import sha256
from itertools import zip_longest
from logging import getLogger
from typing import Callable, Literal, cast
from opentelemetry import trace
from rebulk.match import Match
from ..models.videos import Guess, Video
from .guess.guess import guessit
logger = getLogger(__name__)
tracer = trace.get_tracer("kyoo.scanner")
pipeline: list[Callable[[str, Guess], Awaitable[Guess]]] = [
# TODO: add nfo scanner
@ -19,6 +22,9 @@ pipeline: list[Callable[[str, Guess], Awaitable[Guess]]] = [
async def identify(path: str) -> Video:
with tracer.start_as_current_span(f"identify {os.path.basename(path)}") as span:
span.set_attribute("video.path", path)
raw = guessit(path, expected_titles=[])
# guessit should only return one (according to the doc)
@ -61,6 +67,7 @@ async def identify(path: str) -> Video:
for k, v in raw.items()
},
)
span.set_attribute("video.name", guess.title)
for step in pipeline:
try: