From f0f12e269007d454da8925572f1b64d30b6e3e81 Mon Sep 17 00:00:00 2001 From: Zoe Roux Date: Wed, 7 May 2025 12:53:59 +0200 Subject: [PATCH] Filter videos & push them to the api --- api/src/controllers/videos.ts | 14 ++++++++--- scanner/README.md | 42 ++++++++++++++++---------------- scanner/scanner/client.py | 16 ++++-------- scanner/scanner/fsscan.py | 29 ++++++++++++++++++---- scanner/scanner/models/videos.py | 4 +++ 5 files changed, 65 insertions(+), 40 deletions(-) diff --git a/api/src/controllers/videos.ts b/api/src/controllers/videos.ts index ecba48ed..ca98ab06 100644 --- a/api/src/controllers/videos.ts +++ b/api/src/controllers/videos.ts @@ -22,7 +22,7 @@ import { sortToSql, } from "~/models/utils"; import { desc as description } from "~/models/utils/descriptions"; -import { Guesses, SeedVideo, Video } from "~/models/video"; +import { Guess, Guesses, SeedVideo, Video } from "~/models/video"; import { comment } from "~/utils"; import { computeVideoSlug } from "./seed/insert/entries"; import { @@ -33,6 +33,7 @@ import { const CreatedVideo = t.Object({ id: t.String({ format: "uuid" }), path: t.String({ examples: [bubbleVideo.path] }), + guess: t.Omit(Guess, ["history"]), entries: t.Array( t.Object({ slug: t.String({ format: "slug", examples: ["bubble-v2"] }), @@ -170,7 +171,7 @@ export const videosH = new Elysia({ prefix: "/videos", tags: ["videos"] }) "", async ({ body, status }) => { return await db.transaction(async (tx) => { - let vids: { pk: number; id: string; path: string }[] = []; + let vids: { pk: number; id: string; path: string; guess: Guess }[] = []; try { vids = await tx .insert(videos) @@ -183,6 +184,7 @@ export const videosH = new Elysia({ prefix: "/videos", tags: ["videos"] }) pk: videos.pk, id: videos.id, path: videos.path, + guess: videos.guess, }); } catch (e) { if (!isUniqueConstraint(e)) throw e; @@ -223,7 +225,12 @@ export const videosH = new Elysia({ prefix: "/videos", tags: ["videos"] }) if (!vidEntries.length) { return status( 201, - vids.map((x) => ({ id: x.id, path: x.path, entries: [] })), + vids.map((x) => ({ + id: x.id, + path: x.path, + guess: x.guess, + entries: [], + })), ); } @@ -362,6 +369,7 @@ export const videosH = new Elysia({ prefix: "/videos", tags: ["videos"] }) vids.map((x) => ({ id: x.id, path: x.path, + guess: x.guess, entries: entr[x.pk] ?? [], })), ); diff --git a/scanner/README.md b/scanner/README.md index 6247aed6..d2f6982d 100644 --- a/scanner/README.md +++ b/scanner/README.md @@ -17,9 +17,8 @@ In order of action: from: "guessit" kind: movie | episode | extra title: string, - year?: number[], - season?: number[], - episode?: number[], + years?: number[], + episodes?: {season?: number, episode: number}[], ... }, } @@ -36,41 +35,42 @@ In order of action: from: "anilist", kind: movie | episode | extra name: string, - year: number | null, - season?: number[], - episode?: number[], - absolute?: number[], + years: number[], + episodes?: {season?: number, episode: number}[], externalId: Record, history: { from: "guessit" kind: movie | episode | extra title: string, - year?: number, - season?: number[], - episode?: number[], - ... + years?: number[], + episodes?: {season?: number, episode: number}[], }, ... }, } ``` - - If kind is episode, try to find the serie's id on kyoo (using the previously fetched data from `/videos`): + - Try to find the series id on kyoo (using the previously fetched data from `/videos`): - if another video in the list of already registered videos has the same `kind`, `name` & `year`, assume it's the same - if a match is found, add to the video's json: ```json5 { - entries: (uuid | slug | { - show: uuid | slug, - season: number, - episode: number, - externalId?: Record // takes priority over season/episode for matching if we have one + entries: ( + | { slug: string } + | { movie: uuid | string } + | { serie: uuid | slug, season: number, episode: number } + | { serie: uuid | slug, order: number } + | { serie: uuid | slug, special: number } + | { externalId?: Record } + | { externalId?: Record } })[], } ``` - Scanner pushes everything to the api in a single post `/videos` call - - Api registers every video in the database - - For each video without an associated entry, the guess data + the video's id is sent to the Matcher via a queue. - - Matcher retrieves metadata from the movie/serie + ALL episodes/seasons (from an external provider) - - Matcher pushes every metadata to the api (if there are 1000 episodes but only 1 video, still push the 1000 episodes) + - Api registers every video in the database & return the list of videos not matched to an existing serie/movie. + - Scanner adds every non-matched video to a queue + +For each item in the queue, the scanner will: + - retrieves metadata from the movie/serie + ALL episodes/seasons (from an external provider) + - pushes every metadata to the api (if there are 1000 episodes but only 1 video, still push the 1000 episodes) diff --git a/scanner/scanner/client.py b/scanner/scanner/client.py index 7c42c1b0..aed9f286 100644 --- a/scanner/scanner/client.py +++ b/scanner/scanner/client.py @@ -1,12 +1,9 @@ import os -import jsons -from aiohttp import ClientSession -from datetime import date from logging import getLogger -from typing import Optional -from .utils import format_date -from .models.videos import VideoInfo, Video +from aiohttp import ClientSession + +from .models.videos import Video, VideoCreated, VideoInfo logger = getLogger(__name__) @@ -20,14 +17,10 @@ class KyooClient: self._url = os.environ.get("KYOO_URL", "http://api:3567/api") async def __aenter__(self): - jsons.set_serializer(lambda x, **_: format_date(x), type[Optional[date | int]]) self._client = ClientSession( headers={ "User-Agent": "kyoo", }, - json_serialize=lambda *args, **kwargs: jsons.dumps( - *args, key_transformer=jsons.KEY_TRANSFORMER_CAMELCASE, **kwargs - ), ) return self @@ -41,12 +34,13 @@ class KyooClient: r.raise_for_status() return VideoInfo(**await r.json()) - async def create_videos(self, videos: list[Video]): + async def create_videos(self, videos: list[Video]) -> list[VideoCreated]: async with self._client.post( f"{self._url}/videos", json=[x.model_dump_json() for x in videos], ) as r: r.raise_for_status() + return list[VideoCreated](** await r.json()) async def delete_videos(self, videos: list[str] | set[str]): async with self._client.delete( diff --git a/scanner/scanner/fsscan.py b/scanner/scanner/fsscan.py index 72934798..765ca675 100644 --- a/scanner/scanner/fsscan.py +++ b/scanner/scanner/fsscan.py @@ -1,10 +1,12 @@ import os import re -import asyncio -from typing import Optional from logging import getLogger +from mimetypes import guess_file_type +from typing import Optional from .client import KyooClient +from .identify import identify +from .models.videos import Video logger = getLogger(__name__) @@ -21,6 +23,11 @@ def get_ignore_pattern(): ignore_pattern = get_ignore_pattern() +def is_video(path: str) -> bool: + (mime, _) = guess_file_type(path, strict=False) + return mime is not None and mime.startswith("video/") + + async def scan(path: Optional[str], client: KyooClient, remove_deleted=False): path = path or os.environ.get("SCANNER_LIBRARY_ROOT", "/video") logger.info("Starting scan at %s. This may take some time...", path) @@ -29,7 +36,7 @@ async def scan(path: Optional[str], client: KyooClient, remove_deleted=False): info = await client.get_videos_info() - videos = set() + videos: set[str] = set() for dirpath, dirnames, files in os.walk(path): # Skip directories with a `.ignore` file if ".ignore" in files: @@ -42,7 +49,8 @@ async def scan(path: Optional[str], client: KyooClient, remove_deleted=False): # Apply ignore pattern, if any if ignore_pattern and ignore_pattern.match(file_path): continue - videos.add(file_path) + if is_video(file_path): + videos.add(file_path) to_register = videos - info.paths to_delete = info.paths - videos if remove_deleted else set() @@ -58,6 +66,17 @@ async def scan(path: Optional[str], client: KyooClient, remove_deleted=False): if to_register: logger.info("Found %d new files to register.", len(to_register)) - await asyncio.gather(*[publisher.add(path) for path in to_register]) + + # TODO: we should probably chunk those + vids: list[Video] = [] + for path in to_register: + try: + new = await identify(path) + vids.append(new) + except Exception as e: + logger.error("Couldn't identify %s.", path, exc_info=e) + created = await client.create_videos(vids) + + need_scan = [x for x in created if not any(x.entries)] logger.info("Scan finished for %s.", path) diff --git a/scanner/scanner/models/videos.py b/scanner/scanner/models/videos.py index df81eee4..9a530a81 100644 --- a/scanner/scanner/models/videos.py +++ b/scanner/scanner/models/videos.py @@ -71,3 +71,7 @@ class Video(Model): for_: list[ For.Slug | For.ExternalId | For.Movie | For.Episode | For.Order | For.Special ] = [] + +class VideoCreated(Resource): + guess: Guess + entries: list[Resource]