diff --git a/scanner/scanner/scanner.py b/scanner/scanner/scanner.py index de2250ee..310ee768 100644 --- a/scanner/scanner/scanner.py +++ b/scanner/scanner/scanner.py @@ -8,7 +8,7 @@ from guessit import guessit from providers.provider import Provider from providers.types.episode import Episode, PartialShow from providers.types.season import Season, SeasonTranslation -from .utils import log_errors, provider_cache, set_in_cache +from .utils import batch, log_errors, provider_cache, set_in_cache class Scanner: @@ -23,8 +23,11 @@ class Scanner: self.languages = languages async def scan(self, path: str): + logging.info("Starting the scan. It can take some times...") videos = filter(lambda p: p.is_file(), Path(path).rglob("*")) - await asyncio.gather(*map(self.identify, videos)) + # We batch videos by 30 because too mutch at once kinda DDOS everything. + for group in batch(videos, 30): + await asyncio.gather(*map(self.identify, group)) async def is_registered(self, path: Path) -> bool: # TODO: Once movies are separated from the api, a new endpoint should be created to check for paths. diff --git a/scanner/scanner/utils.py b/scanner/scanner/utils.py index a8c8f28e..ff780119 100644 --- a/scanner/scanner/utils.py +++ b/scanner/scanner/utils.py @@ -1,9 +1,25 @@ import asyncio import logging from functools import wraps +from itertools import islice +from typing import Iterator, List, TypeVar from providers.utils import ProviderError +T = TypeVar("T") + + +def batch(iterable: Iterator[T], n: int) -> Iterator[List[T]]: + "Batch data into lists of length n. The last batch may be shorter." + # batched('ABCDEFG', 3) --> ABC DEF G + it = iter(iterable) + while True: + batch = list(islice(it, n)) + if not batch: + return + yield batch + + def log_errors(f): @wraps(f) async def internal(*args, **kwargs):