Chunk identify scans

This commit is contained in:
Zoe Roux 2025-11-20 09:44:00 +01:00
parent 37ec32b52d
commit f7e801e574
No known key found for this signature in database
2 changed files with 81 additions and 69 deletions

View File

@ -1,3 +1,5 @@
import asyncio
import itertools
import os import os
import re import re
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
@ -111,16 +113,19 @@ class FsScanner:
logger.error("Unexpected error while monitoring files.", exc_info=e) logger.error("Unexpected error while monitoring files.", exc_info=e)
async def _register(self, videos: list[str] | set[str]): async def _register(self, videos: list[str] | set[str]):
# TODO: we should probably chunk those async def process(path: str):
vids: list[Video] = []
for path in list(videos):
try: try:
vid = await identify(path) vid = await identify(path)
vid = self._match(vid) return self._match(vid)
vids.append(vid)
except Exception as e: except Exception as e:
logger.error("Couldn't identify %s.", path, exc_info=e) logger.error("Couldn't identify %s.", path, exc_info=e)
created = await self._client.create_videos(vids) return None
for batch in itertools.batched(videos, 20):
vids = await asyncio.gather(*(process(path) for path in batch))
created = await self._client.create_videos(
[v for v in vids if v is not None]
)
await self._requests.enqueue( await self._requests.enqueue(
[ [

View File

@ -1,15 +1,18 @@
import os
from collections.abc import Awaitable from collections.abc import Awaitable
from hashlib import sha256 from hashlib import sha256
from itertools import zip_longest from itertools import zip_longest
from logging import getLogger from logging import getLogger
from typing import Callable, Literal, cast from typing import Callable, Literal, cast
from opentelemetry import trace
from rebulk.match import Match from rebulk.match import Match
from ..models.videos import Guess, Video from ..models.videos import Guess, Video
from .guess.guess import guessit from .guess.guess import guessit
logger = getLogger(__name__) logger = getLogger(__name__)
tracer = trace.get_tracer("kyoo.scanner")
pipeline: list[Callable[[str, Guess], Awaitable[Guess]]] = [ pipeline: list[Callable[[str, Guess], Awaitable[Guess]]] = [
# TODO: add nfo scanner # TODO: add nfo scanner
@ -19,6 +22,9 @@ pipeline: list[Callable[[str, Guess], Awaitable[Guess]]] = [
async def identify(path: str) -> Video: async def identify(path: str) -> Video:
with tracer.start_as_current_span(f"identify {os.path.basename(path)}") as span:
span.set_attribute("video.path", path)
raw = guessit(path, expected_titles=[]) raw = guessit(path, expected_titles=[])
# guessit should only return one (according to the doc) # guessit should only return one (according to the doc)
@ -61,6 +67,7 @@ async def identify(path: str) -> Video:
for k, v in raw.items() for k, v in raw.items()
}, },
) )
span.set_attribute("video.name", guess.title)
for step in pipeline: for step in pipeline:
try: try: