Rework get movie for tmdb

2025-07-09 03:04:20 -04:00 · 2025-05-08 03:27:59 +02:00 · 2025-05-08 03:27:59 +02:00 · a4fdeb8a9b
commit a4fdeb8a9b
parent 7a00878eae
4 changed files with 163 additions and 209 deletions
--- a/scanner/scanner/models/movie.py
+++ b/scanner/scanner/models/movie.py
@ -29,11 +29,11 @@ class Movie(Model):
 	air_date: date | None
 	external_id: dict[str, MetadataId]
-	translations: dict[str, MovieTranslation] = {}
+	translations: dict[Language, MovieTranslation] = {}
 	videos: list[str] = []
 	collections: list[Collection] = []
 	studios: list[Studio] = []
 	staff: list[Staff] = []
 	videos: list[str] = []
 class MovieTranslation(Model):
--- a/scanner/scanner/models/studio.py
+++ b/scanner/scanner/models/studio.py
@ -7,9 +7,9 @@ from .metadataid import MetadataId
 class Studio(Model):
 	slug: str
 	external_id: dict[str, MetadataId]
-	translations: dict[str, StudioTranslations] = {}
+	translations: dict[str, StudioTranslation] = {}
-class StudioTranslations(Model):
+class StudioTranslation(Model):
 	name: str
 	logo: str | None
--- a/scanner/scanner/providers/themoviedatabase.py
+++ b/scanner/scanner/providers/themoviedatabase.py
@ -1,23 +1,25 @@
 import asyncio
-from aiohttp import ClientSession
+import os
 from datetime import datetime, timedelta
 from logging import getLogger
 from typing import Awaitable, Callable, Dict, List, Optional, Any, TypeVar
 from itertools import accumulate, zip_longest
 from logging import getLogger
 from typing import Any, Awaitable, Callable, Dict, List, Optional, TypeVar, override
 from aiohttp import ClientSession
 from langcodes import Language
 from providers.utils import ProviderError
 from matcher.cache import cache
-from ..provider import Provider
+from ..models.collection import Collection, CollectionTranslation
-from ..types.movie import Movie, MovieTranslation, Status as MovieStatus
+from ..models.entry import Entry, EntryTranslation
-from ..types.season import Season, SeasonTranslation
+from ..models.genre import Genre
-from ..types.episode import Episode, EpisodeTranslation, PartialShow, EpisodeID
+from ..models.metadataid import EpisodeId, MetadataId
-from ..types.studio import Studio
+from ..models.movie import Movie, MovieStatus, MovieTranslation
-from ..types.genre import Genre
+from ..models.season import Season, SeasonTranslation
-from ..types.metadataid import MetadataID
+from ..models.serie import Serie, SerieStatus, SerieTranslation
-from ..types.show import Show, ShowTranslation, Status as ShowStatus
+from ..models.studio import Studio, StudioTranslation
-from ..types.collection import Collection, CollectionTranslation
+from ..utils import clean, to_slug
 from .provider import Provider, ProviderError
 logger = getLogger(__name__)
@ -27,16 +29,16 @@ class TheMovieDatabase(Provider):
 	def __init__(
 		self,
 		languages: list[str],
 		client: ClientSession,
 		api_key: str,
 	) -> None:
 		super().__init__()
 		self._languages = [Language.get(l) for l in languages]
 		self._client = client
-		self.base = "https://api.themoviedb.org/3"
+		self._base = "https://api.themoviedb.org/3"
-		self.api_key = api_key
+		self._api_key = (
-		self.genre_map = {
+			os.environ.get("THEMOVIEDB_APIKEY") or TheMovieDatabase.DEFAULT_API_KEY
 		)
 		self._genre_map = {
 			28: Genre.ACTION,
 			12: Genre.ADVENTURE,
 			16: Genre.ANIMATION,
@ -57,7 +59,7 @@ class TheMovieDatabase(Provider):
 			37: Genre.WESTERN,
 			10759: [Genre.ACTION, Genre.ADVENTURE],
 			10762: Genre.KIDS,
-			10763: Genre.NEWS,
+			10763: [],
 			10764: Genre.REALITY,
 			10765: [Genre.SCIENCE_FICTION, Genre.FANTASY],
 			10766: Genre.SOAP,
@ -66,6 +68,7 @@ class TheMovieDatabase(Provider):
 		}
 	@property
 	@override
 	def name(self) -> str:
 		return "themoviedatabase"
@ -76,12 +79,9 @@ class TheMovieDatabase(Provider):
 			return [x]
 		return flatten(
-			[self.genre_map[x["id"]] for x in genres if x["id"] in self.genre_map]
+			[self._genre_map[x["id"]] for x in genres if x["id"] in self._genre_map]
 		)
 	def get_languages(self, *args) -> list[Language]:
 		return self._languages + list(args)
 	async def get(
 		self,
 		path: str,
@ -91,110 +91,54 @@ class TheMovieDatabase(Provider):
 	):
 		params = {k: v for k, v in params.items() if v is not None}
 		async with self._client.get(
-			f"{self.base}/{path}", params={"api_key": self.api_key, **params}
+			f"{self._base}/{path}", params={"api_key": self._api_key, **params}
 		) as r:
 			if not_found_fail and r.status == 404:
 				raise ProviderError(not_found_fail)
 			r.raise_for_status()
 			return await r.json()
 	T = TypeVar("T")
 	def merge_translations(self, host, translations, *, languages: list[Language]):
 		host.translations = {
 			k.to_tag(): v.translations[k.to_tag()]
 			for k, v in zip(languages, translations)
 		}
 		return host
 	async def process_translations(
 		self,
 		for_language: Callable[[str], Awaitable[T]],
 		languages: list[Language],
 		post_merge: Callable[[T, list[T]], T] | None = None,
 	) -> T:
 		tasks = map(lambda lng: for_language(lng), languages)
 		items: list[Any] = await asyncio.gather(*tasks)
 		item = self.merge_translations(items[0], items, languages=languages)
 		if post_merge:
 			item = post_merge(item, items)
 		return item
 	def get_image(self, images: list[Dict[str, Any]]) -> list[str]:
 		return [
 			f"https://image.tmdb.org/t/p/original{x['file_path']}"
 			for x in images
 			if x["file_path"]
 		]
 	def to_studio(self, company: dict[str, Any]) -> Studio:
 		return Studio(
-			name=company["name"],
+			slug=to_slug(company["name"]),
 			logos=[f"https://image.tmdb.org/t/p/original{company['logo_path']}"]
 			if "logo_path" in company
 			else [],
 			external_id={
-				self.name: MetadataID(
+				self.name: MetadataId(
-					company["id"], f"https://www.themoviedb.org/company/{company['id']}"
+					data_id=company["id"],
 					link=f"https://www.themoviedb.org/company/{company['id']}",
 				)
 			},
 			translations={
 				"en": StudioTranslation(
 					name=company["name"],
 					logo=f"https://image.tmdb.org/t/p/original{company['logo_path']}"
 					if "logo_path" in company
 					else None,
 				),
 			},
 		)
-	def get_best_image(
+	def _pick_image(self, item: dict[str, Any], lng: str, key: str) -> str | None:
-		self, item: dict[str, Any], lng: Language, key: str
+		base_path = "https://image.tmdb.org/t/p/original"
 	) -> list[dict]:
 		"""
 		Retrieves the best available images for a item based on localization.
-		Args:
+		images = sorted(
 			item (dict): A dictionary containing item information, including images and language details.
 			lng (Language): The preferred language for the images.
 			key (str): The key to access the images in the item dictionary. (e.g. "posters", "backdrops", "logos")
 		Returns:
 			list: A list of images, prioritized by localization, original language, and any available image.
 		"""
 		# Order images by size and vote average
 		item["images"][key] = sorted(
 			item["images"][key],
 			key=lambda x: (x.get("vote_average", 0), x.get("width", 0)),
 			reverse=True,
 		)
-		# Step 1: Try to get localized images
+		# check images in your language
-		localized_images = [
+		localized = next((x for x in images if x["iso_639_1"] == lng), None)
-			image
+		if localized:
-			for image in item["images"][key]
+			return base_path + localized
-			if image.get("iso_639_1") == lng.language
+		# if failed, check images without text
-		]
+		notext = next((x for x in images if x["iso_639_1"] == None), None)
-
+		if notext:
-		# Step 2: If no localized images, try images in the original language
+			return base_path + notext
-		if not localized_images:
+		# take a random image, it's better than nothing
-			localized_images = [
+		random_img = next((x for x in images if x["iso_639_1"] == None), None)
-				image
+		if random_img:
-				for image in item["images"][key]
+			return base_path + random_img
-				if image.get("iso_639_1") == item.get("original_language")
+		return None
 			]
 		# Step 3: If still no images, use any available images
 		if not localized_images:
 			localized_images = item["images"][key]
 		# Step 4: If there are no images at all, fallback to _path attribute.
 		if not localized_images:
 			localized_images = self._get_image_fallback(item, key)
 		return self.get_image(localized_images)
 	def _get_image_fallback(self, item: dict[str, Any], key: str) -> list[dict]:
 		"""
 		Fallback to _path attribute if there are no images available in the images list.
 		"""
 		if key == "posters":
 			return [{"file_path": item.get("poster_path")}]
 		elif key == "backdrops":
 			return [{"file_path": item.get("backdrop_path")}]
 		return []
 	async def search_movie(self, name: str, year: Optional[int]) -> Movie:
 		search_results = (
@ -208,94 +152,107 @@ class TheMovieDatabase(Provider):
 			search["id"], original_language=original_language
 		)
-	async def identify_movie(
+	@override
-		self, movie_id: str, original_language: Optional[Language] = None
+	async def get_movie(self, external_id: dict[str, str]) -> Movie | None:
-	) -> Movie:
+		# TODO: fallback to search via another id
-		languages = self.get_languages()
+		if self.name not in external_id:
 			return None
-		async def for_language(lng: Language) -> Movie:
+		movie = await self.get(
-			movie = await self.get(
+			f"movie/{external_id[self.name]}",
-				f"movie/{movie_id}",
+			params={
-				params={
+				"append_to_response": "alternative_titles,videos,credits,keywords,images,translations",
-					"language": lng.to_tag(),
+			},
-					"append_to_response": "alternative_titles,videos,credits,keywords,images",
+		)
-					"include_image_language": f"{lng.language},null,{original_language.language if original_language else ''}",
+		logger.debug("TMDb responded: %s", movie)
 				},
 			)
 			logger.debug("TMDb responded: %s", movie)
-			ret = Movie(
+		return Movie(
-				original_language=movie["original_language"],
+			slug=to_slug(movie["title"]),
-				aliases=[x["title"] for x in movie["alternative_titles"]["titles"]],
+			original_language=Language.get(movie["original_language"]),
-				air_date=datetime.strptime(movie["release_date"], "%Y-%m-%d").date()
+			genres=self.process_genres(movie["genres"]),
-				if movie["release_date"]
+			rating=round(float(movie["vote_average"]) * 10),
-				else None,
+			status=MovieStatus.FINISHED
-				status=MovieStatus.FINISHED
+			if movie["status"] == "Released"
-				if movie["status"] == "Released"
+			else MovieStatus.PLANNED,
-				else MovieStatus.PLANNED,
+			runtime=int(movie["runtime"]) if movie["runtime"] is not None else None,
-				rating=round(float(movie["vote_average"]) * 10),
+			air_date=datetime.strptime(movie["release_date"], "%Y-%m-%d").date()
-				runtime=int(movie["runtime"]) if movie["runtime"] is not None else None,
+			if movie["release_date"]
-				studios=[self.to_studio(x) for x in movie["production_companies"]],
+			else None,
-				genres=self.process_genres(movie["genres"]),
+			external_id=(
-				external_id=(
+				{
 					self.name: MetadataId(
 						data_id=movie["id"],
 						link=f"https://www.themoviedb.org/movie/{movie['id']}",
 					)
 				}
 				| (
 					{
-						self.name: MetadataID(
+						"imdb": MetadataId(
-							movie["id"],
+							data_id=movie["imdb_id"],
-							f"https://www.themoviedb.org/movie/{movie['id']}",
+							link=f"https://www.imdb.com/title/{movie['imdb_id']}",
 						)
 					}
-					| (
+					if movie["imdb_id"]
-						{
+					else {}
-							"imdb": MetadataID(
+				)
-								movie["imdb_id"],
+			),
-								f"https://www.imdb.com/title/{movie['imdb_id']}",
+			translations={
-							)
+				Language.get(
-						}
+					f"{trans['iso_639_1']}-{trans['iso_3166_1']}"
-						if movie["imdb_id"]
+				): MovieTranslation(
-						else {}
+					name=clean(trans["data"]["title"])
 					or (
 						clean(movie["original_title"])
 						if movie["original_language"] == trans["iso_639_1"]
 						else None
 					)
-				),
+					or movie["title"],
-				collections=[
+					latin_name=next(
-					Collection(
+						(
-						external_id={
+							x["title"]
-							self.name: MetadataID(
+							for x in movie["alternative_titles"]["titles"]
-								movie["belongs_to_collection"]["id"],
+							if x["iso_3166_1"] == trans["iso_3166_1"]
-								f"https://www.themoviedb.org/collection/{movie['belongs_to_collection']['id']}",
+							and x["type"] == "Romaji"
-							)
+						),
-						},
+						None,
-					)
+					),
-				]
+					description=clean(trans["data"]["overview"]),
-				if movie["belongs_to_collection"] is not None
+					tagline=clean(trans["data"]["tagline"]),
-				else [],
+					aliases=[
-				# TODO: Add cast information
+						x["title"]
-			)
+						for x in movie["alternative_titles"]["titles"]
-			translation = MovieTranslation(
+						if x["iso_3166_1"] == trans["iso_3166_1"]
-				name=movie["title"],
+					],
-				tagline=movie["tagline"] if movie["tagline"] else None,
+					tags=[x["name"] for x in movie["keywords"]["keywords"]],
-				tags=list(map(lambda x: x["name"], movie["keywords"]["keywords"])),
+					poster=self._pick_image(movie, trans["iso_639_1"], "posters"),
-				overview=movie["overview"],
+					logo=self._pick_image(movie, trans["iso_639_1"], "logos"),
-				posters=self.get_best_image(movie, lng, "posters"),
+					banner=None,
-				logos=self.get_best_image(movie, lng, "logos"),
+					thumbnail=self._pick_image(movie, trans["iso_639_1"], "backdrops"),
-				thumbnails=self.get_best_image(movie, lng, "backdrops"),
+					trailer=None,
-				trailers=[
+					# TODO: should the trailer be added? or all of them as extra?
-					f"https://www.youtube.com/watch?v={x['key']}"
+					# [
-					for x in movie["videos"]["results"]
+					# 	f"https://www.youtube.com/watch?v={x['key']}"
-					if x["type"] == "Trailer" and x["site"] == "YouTube"
+					# 	for x in movie["videos"]["results"]
-				],
+					# 	if x["type"] == "Trailer" and x["site"] == "YouTube"
-			)
+					# ],
-			ret.translations = {lng.to_tag(): translation}
+				)
-			return ret
+				for trans in movie["translations"]["translations"]
-
+			},
-		ret = await self.process_translations(for_language, languages)
+			collections=[
-		if (
+				# Collection(
-			ret.original_language is not None
+				# 	external_id={
-			and ret.original_language not in ret.translations
+				# 		self.name: MetadataID(
-		):
+				# 			movie["belongs_to_collection"]["id"],
-			orig_language = Language.get(ret.original_language)
+				# 			f"https://www.themoviedb.org/collection/{movie['belongs_to_collection']['id']}",
-			ret.translations[orig_language.to_tag()] = (
+				# 		)
-				await for_language(orig_language)
+				# 	},
-			).translations[orig_language.to_tag()]
+				# )
-		return ret
+			]
 			if movie["belongs_to_collection"] is not None
 			else [],
 			studios=[self.to_studio(x) for x in movie["production_companies"]],
 			staff=[],
 		)
 	@cache(ttl=timedelta(days=1))
 	async def identify_show(
@ -363,9 +320,9 @@ class TheMovieDatabase(Provider):
 				tagline=show["tagline"] if show["tagline"] else None,
 				tags=list(map(lambda x: x["name"], show["keywords"]["results"])),
 				overview=show["overview"],
-				posters=self.get_best_image(show, lng, "posters"),
+				posters=self._pick_image(show, lng, "posters"),
-				logos=self.get_best_image(show, lng, "logos"),
+				logos=self._pick_image(show, lng, "logos"),
-				thumbnails=self.get_best_image(show, lng, "backdrops"),
+				thumbnails=self._pick_image(show, lng, "backdrops"),
 				trailers=[
 					f"https://www.youtube.com/watch?v={x['key']}"
 					for x in show["videos"]["results"]
@ -759,9 +716,9 @@ class TheMovieDatabase(Provider):
 			translation = CollectionTranslation(
 				name=collection["name"],
 				overview=collection["overview"],
-				posters=self.get_best_image(collection, lng, "posters"),
+				posters=self._pick_image(collection, lng, "posters"),
 				logos=[],
-				thumbnails=self.get_best_image(collection, lng, "backdrops"),
+				thumbnails=self._pick_image(collection, lng, "backdrops"),
 			)
 			ret.translations = {lng.to_tag(): translation}
 			return ret
--- a/scanner/scanner/utils.py
+++ b/scanner/scanner/utils.py
@ -1,22 +1,19 @@
 from datetime import date
 from langcodes import Language
 from pydantic import AliasGenerator, BaseModel, ConfigDict
 from pydantic.alias_generators import to_camel
 def format_date(date: date | int | None) -> str | None:
 	if date is None:
 		return None
 	if isinstance(date, int):
 		return f"{date}-01-01"
 	return date.isoformat()
 def normalize_lang(lang: str) -> str:
 	return str(Language.get(lang))
 def to_slug(title: str) -> str:
 	return title
 def clean(val: str) -> str | None:
 	return val or None
 class Model(BaseModel):
 	model_config = ConfigDict(
 		use_enum_values=True,