From 04c8e57c00139bcda8105e88ed5cf02fb126aedd Mon Sep 17 00:00:00 2001 From: Zoe Roux Date: Sun, 4 May 2025 23:48:42 +0200 Subject: [PATCH] Move fs scanner --- .env.example | 2 +- scanner/.env.example | 13 ++---- scanner/old_scanner/scanner.py | 73 --------------------------------- scanner/scanner/__init__.py | 1 + scanner/scanner/fsscan.py | 74 ++++++++++++++++++++++++++++++++++ 5 files changed, 80 insertions(+), 83 deletions(-) create mode 100644 scanner/scanner/fsscan.py diff --git a/.env.example b/.env.example index a8829d6c..7e8f50ba 100644 --- a/.env.example +++ b/.env.example @@ -13,7 +13,7 @@ LIBRARY_LANGUAGES=en # If this is true, kyoo will prefer to download the media in the original language of the item. MEDIA_PREFER_ORIGINAL_LANGUAGE=false # A pattern (regex) to ignore files. -LIBRARY_IGNORE_PATTERN=".*/[dD]ownloads?/.*|.*[Tt][Rr][Aa][Ii][Ll][Ee][Rr].*" +LIBRARY_IGNORE_PATTERN=".*/[dD]ownloads?/.*" # If this is true, new accounts wont have any permissions before you approve them in your admin dashboard. REQUIRE_ACCOUNT_VERIFICATION=true diff --git a/scanner/.env.example b/scanner/.env.example index 90b614e7..6df9fd11 100644 --- a/scanner/.env.example +++ b/scanner/.env.example @@ -1,12 +1,7 @@ # vi: ft=sh # shellcheck disable=SC2034 -# RabbitMQ settings -# URL examples: https://docs.aio-pika.com/#url-examples -# This uses AIORMQ (https://github.com/mosquito/aiormq/) under the hood, and supports whatever the library supports. -# RABBITMQ_URL=ampqs://user:password@rabbitmq-server:1234/vhost?capath=/path/to/cacert.pem&certfile=/path/to/cert.pem&keyfile=/path/to/key.pem -# These values are ignored when the RABBITMQ_URL is set -RABBITMQ_HOST=rabbitmq -RABBITMQ_PORT=5672 -RABBITMQ_USER=guest -RABBITMQ_PASSWORD=guest +# Root directory that will be traversed to find video files (inside the container) +SCANNER_LIBRARY_ROOT="/video" +# A pattern (regex) to ignore video files. +LIBRARY_IGNORE_PATTERN=".*/[dD]ownloads?/.*" diff --git a/scanner/old_scanner/scanner.py b/scanner/old_scanner/scanner.py index 9fceac58..e69de29b 100644 --- a/scanner/old_scanner/scanner.py +++ b/scanner/old_scanner/scanner.py @@ -1,73 +0,0 @@ -import os -import re -import asyncio -from typing import Optional -from logging import getLogger - -from .publisher import Publisher -from providers.kyoo_client import KyooClient - -logger = getLogger(__name__) - - -def get_ignore_pattern(): - try: - pattern = os.environ.get("LIBRARY_IGNORE_PATTERN") - return re.compile(pattern) if pattern else None - except re.error as e: - logger.error(f"Invalid ignore pattern. Ignoring. Error: {e}") - return None - - -async def scan( - path_: Optional[str], publisher: Publisher, client: KyooClient, remove_deleted=False -): - path = path_ or os.environ.get("SCANNER_LIBRARY_ROOT", "/video") - logger.info("Starting scan at %s. This may take some time...", path) - - ignore_pattern = get_ignore_pattern() - if ignore_pattern: - logger.info(f"Applying ignore pattern: {ignore_pattern}") - - registered = set(await client.get_registered_paths()) - videos = set() - - for dirpath, dirnames, files in os.walk(path): - # Skip directories with a `.ignore` file - if ".ignore" in files: - dirnames.clear() # Prevents os.walk from descending into this directory - continue - - for file in files: - file_path = os.path.join(dirpath, file) - # Apply ignore pattern, if any - if ignore_pattern and ignore_pattern.match(file_path): - continue - videos.add(file_path) - - to_register = videos - registered - to_delete = registered - videos if remove_deleted else set() - - if not any(to_register) and any(to_delete) and len(to_delete) == len(registered): - logger.warning("All video files are unavailable. Check your disks.") - return - - # delete stale files before creating new ones to prevent potential conflicts - if to_delete: - logger.info("Removing %d stale files.", len(to_delete)) - await asyncio.gather(*[publisher.delete(path) for path in to_delete]) - - if to_register: - logger.info("Found %d new files to register.", len(to_register)) - await asyncio.gather(*[publisher.add(path) for path in to_register]) - - if remove_deleted: - issues = set(await client.get_issues()) - issues_to_delete = issues - videos - if issues_to_delete: - logger.info("Removing %d stale issues.", len(issues_to_delete)) - await asyncio.gather( - *[client.delete_issue(issue) for issue in issues_to_delete] - ) - - logger.info("Scan finished for %s.", path) diff --git a/scanner/scanner/__init__.py b/scanner/scanner/__init__.py index 2c4005a8..afa6ab81 100644 --- a/scanner/scanner/__init__.py +++ b/scanner/scanner/__init__.py @@ -5,6 +5,7 @@ app = FastAPI( description="API to control the long running scanner or interacting with external databases (themoviedb, tvdb...)\n\n" + "Most of those APIs are for admins only.", openapi_prefix="/scanner", + #lifetime=smth ) diff --git a/scanner/scanner/fsscan.py b/scanner/scanner/fsscan.py new file mode 100644 index 00000000..ed3592ff --- /dev/null +++ b/scanner/scanner/fsscan.py @@ -0,0 +1,74 @@ + +import os +import re +import asyncio +from typing import Optional +from logging import getLogger + +from .publisher import Publisher +from providers.kyoo_client import KyooClient + +logger = getLogger(__name__) + + +def get_ignore_pattern(): + try: + pattern = os.environ.get("LIBRARY_IGNORE_PATTERN") + return re.compile(pattern) if pattern else None + except re.error as e: + logger.error(f"Invalid ignore pattern. Ignoring. Error: {e}") + return None + + +async def scan( + path_: Optional[str], publisher: Publisher, client: KyooClient, remove_deleted=False +): + path = path_ or os.environ.get("SCANNER_LIBRARY_ROOT", "/video") + logger.info("Starting scan at %s. This may take some time...", path) + + ignore_pattern = get_ignore_pattern() + if ignore_pattern: + logger.info(f"Applying ignore pattern: {ignore_pattern}") + + registered = set(await client.get_registered_paths()) + videos = set() + + for dirpath, dirnames, files in os.walk(path): + # Skip directories with a `.ignore` file + if ".ignore" in files: + dirnames.clear() # Prevents os.walk from descending into this directory + continue + + for file in files: + file_path = os.path.join(dirpath, file) + # Apply ignore pattern, if any + if ignore_pattern and ignore_pattern.match(file_path): + continue + videos.add(file_path) + + to_register = videos - registered + to_delete = registered - videos if remove_deleted else set() + + if not any(to_register) and any(to_delete) and len(to_delete) == len(registered): + logger.warning("All video files are unavailable. Check your disks.") + return + + # delete stale files before creating new ones to prevent potential conflicts + if to_delete: + logger.info("Removing %d stale files.", len(to_delete)) + await asyncio.gather(*[publisher.delete(path) for path in to_delete]) + + if to_register: + logger.info("Found %d new files to register.", len(to_register)) + await asyncio.gather(*[publisher.add(path) for path in to_register]) + + if remove_deleted: + issues = set(await client.get_issues()) + issues_to_delete = issues - videos + if issues_to_delete: + logger.info("Removing %d stale issues.", len(issues_to_delete)) + await asyncio.gather( + *[client.delete_issue(issue) for issue in issues_to_delete] + ) + + logger.info("Scan finished for %s.", path)