Rework get movie for tmdb

This commit is contained in:
Zoe Roux 2025-05-08 03:27:59 +02:00
parent 7a00878eae
commit a4fdeb8a9b
No known key found for this signature in database
4 changed files with 163 additions and 209 deletions

View File

@ -29,11 +29,11 @@ class Movie(Model):
air_date: date | None air_date: date | None
external_id: dict[str, MetadataId] external_id: dict[str, MetadataId]
translations: dict[str, MovieTranslation] = {} translations: dict[Language, MovieTranslation] = {}
videos: list[str] = []
collections: list[Collection] = [] collections: list[Collection] = []
studios: list[Studio] = [] studios: list[Studio] = []
staff: list[Staff] = [] staff: list[Staff] = []
videos: list[str] = []
class MovieTranslation(Model): class MovieTranslation(Model):

View File

@ -7,9 +7,9 @@ from .metadataid import MetadataId
class Studio(Model): class Studio(Model):
slug: str slug: str
external_id: dict[str, MetadataId] external_id: dict[str, MetadataId]
translations: dict[str, StudioTranslations] = {} translations: dict[str, StudioTranslation] = {}
class StudioTranslations(Model): class StudioTranslation(Model):
name: str name: str
logo: str | None logo: str | None

View File

@ -1,23 +1,25 @@
import asyncio import asyncio
from aiohttp import ClientSession import os
from datetime import datetime, timedelta from datetime import datetime, timedelta
from logging import getLogger
from typing import Awaitable, Callable, Dict, List, Optional, Any, TypeVar
from itertools import accumulate, zip_longest from itertools import accumulate, zip_longest
from logging import getLogger
from typing import Any, Awaitable, Callable, Dict, List, Optional, TypeVar, override
from aiohttp import ClientSession
from langcodes import Language from langcodes import Language
from providers.utils import ProviderError
from matcher.cache import cache from matcher.cache import cache
from ..provider import Provider from ..models.collection import Collection, CollectionTranslation
from ..types.movie import Movie, MovieTranslation, Status as MovieStatus from ..models.entry import Entry, EntryTranslation
from ..types.season import Season, SeasonTranslation from ..models.genre import Genre
from ..types.episode import Episode, EpisodeTranslation, PartialShow, EpisodeID from ..models.metadataid import EpisodeId, MetadataId
from ..types.studio import Studio from ..models.movie import Movie, MovieStatus, MovieTranslation
from ..types.genre import Genre from ..models.season import Season, SeasonTranslation
from ..types.metadataid import MetadataID from ..models.serie import Serie, SerieStatus, SerieTranslation
from ..types.show import Show, ShowTranslation, Status as ShowStatus from ..models.studio import Studio, StudioTranslation
from ..types.collection import Collection, CollectionTranslation from ..utils import clean, to_slug
from .provider import Provider, ProviderError
logger = getLogger(__name__) logger = getLogger(__name__)
@ -27,16 +29,16 @@ class TheMovieDatabase(Provider):
def __init__( def __init__(
self, self,
languages: list[str],
client: ClientSession, client: ClientSession,
api_key: str, api_key: str,
) -> None: ) -> None:
super().__init__() super().__init__()
self._languages = [Language.get(l) for l in languages]
self._client = client self._client = client
self.base = "https://api.themoviedb.org/3" self._base = "https://api.themoviedb.org/3"
self.api_key = api_key self._api_key = (
self.genre_map = { os.environ.get("THEMOVIEDB_APIKEY") or TheMovieDatabase.DEFAULT_API_KEY
)
self._genre_map = {
28: Genre.ACTION, 28: Genre.ACTION,
12: Genre.ADVENTURE, 12: Genre.ADVENTURE,
16: Genre.ANIMATION, 16: Genre.ANIMATION,
@ -57,7 +59,7 @@ class TheMovieDatabase(Provider):
37: Genre.WESTERN, 37: Genre.WESTERN,
10759: [Genre.ACTION, Genre.ADVENTURE], 10759: [Genre.ACTION, Genre.ADVENTURE],
10762: Genre.KIDS, 10762: Genre.KIDS,
10763: Genre.NEWS, 10763: [],
10764: Genre.REALITY, 10764: Genre.REALITY,
10765: [Genre.SCIENCE_FICTION, Genre.FANTASY], 10765: [Genre.SCIENCE_FICTION, Genre.FANTASY],
10766: Genre.SOAP, 10766: Genre.SOAP,
@ -66,6 +68,7 @@ class TheMovieDatabase(Provider):
} }
@property @property
@override
def name(self) -> str: def name(self) -> str:
return "themoviedatabase" return "themoviedatabase"
@ -76,12 +79,9 @@ class TheMovieDatabase(Provider):
return [x] return [x]
return flatten( return flatten(
[self.genre_map[x["id"]] for x in genres if x["id"] in self.genre_map] [self._genre_map[x["id"]] for x in genres if x["id"] in self._genre_map]
) )
def get_languages(self, *args) -> list[Language]:
return self._languages + list(args)
async def get( async def get(
self, self,
path: str, path: str,
@ -91,110 +91,54 @@ class TheMovieDatabase(Provider):
): ):
params = {k: v for k, v in params.items() if v is not None} params = {k: v for k, v in params.items() if v is not None}
async with self._client.get( async with self._client.get(
f"{self.base}/{path}", params={"api_key": self.api_key, **params} f"{self._base}/{path}", params={"api_key": self._api_key, **params}
) as r: ) as r:
if not_found_fail and r.status == 404: if not_found_fail and r.status == 404:
raise ProviderError(not_found_fail) raise ProviderError(not_found_fail)
r.raise_for_status() r.raise_for_status()
return await r.json() return await r.json()
T = TypeVar("T")
def merge_translations(self, host, translations, *, languages: list[Language]):
host.translations = {
k.to_tag(): v.translations[k.to_tag()]
for k, v in zip(languages, translations)
}
return host
async def process_translations(
self,
for_language: Callable[[str], Awaitable[T]],
languages: list[Language],
post_merge: Callable[[T, list[T]], T] | None = None,
) -> T:
tasks = map(lambda lng: for_language(lng), languages)
items: list[Any] = await asyncio.gather(*tasks)
item = self.merge_translations(items[0], items, languages=languages)
if post_merge:
item = post_merge(item, items)
return item
def get_image(self, images: list[Dict[str, Any]]) -> list[str]:
return [
f"https://image.tmdb.org/t/p/original{x['file_path']}"
for x in images
if x["file_path"]
]
def to_studio(self, company: dict[str, Any]) -> Studio: def to_studio(self, company: dict[str, Any]) -> Studio:
return Studio( return Studio(
name=company["name"], slug=to_slug(company["name"]),
logos=[f"https://image.tmdb.org/t/p/original{company['logo_path']}"]
if "logo_path" in company
else [],
external_id={ external_id={
self.name: MetadataID( self.name: MetadataId(
company["id"], f"https://www.themoviedb.org/company/{company['id']}" data_id=company["id"],
link=f"https://www.themoviedb.org/company/{company['id']}",
) )
}, },
translations={
"en": StudioTranslation(
name=company["name"],
logo=f"https://image.tmdb.org/t/p/original{company['logo_path']}"
if "logo_path" in company
else None,
),
},
) )
def get_best_image( def _pick_image(self, item: dict[str, Any], lng: str, key: str) -> str | None:
self, item: dict[str, Any], lng: Language, key: str base_path = "https://image.tmdb.org/t/p/original"
) -> list[dict]:
"""
Retrieves the best available images for a item based on localization.
Args: images = sorted(
item (dict): A dictionary containing item information, including images and language details.
lng (Language): The preferred language for the images.
key (str): The key to access the images in the item dictionary. (e.g. "posters", "backdrops", "logos")
Returns:
list: A list of images, prioritized by localization, original language, and any available image.
"""
# Order images by size and vote average
item["images"][key] = sorted(
item["images"][key], item["images"][key],
key=lambda x: (x.get("vote_average", 0), x.get("width", 0)), key=lambda x: (x.get("vote_average", 0), x.get("width", 0)),
reverse=True, reverse=True,
) )
# Step 1: Try to get localized images # check images in your language
localized_images = [ localized = next((x for x in images if x["iso_639_1"] == lng), None)
image if localized:
for image in item["images"][key] return base_path + localized
if image.get("iso_639_1") == lng.language # if failed, check images without text
] notext = next((x for x in images if x["iso_639_1"] == None), None)
if notext:
# Step 2: If no localized images, try images in the original language return base_path + notext
if not localized_images: # take a random image, it's better than nothing
localized_images = [ random_img = next((x for x in images if x["iso_639_1"] == None), None)
image if random_img:
for image in item["images"][key] return base_path + random_img
if image.get("iso_639_1") == item.get("original_language") return None
]
# Step 3: If still no images, use any available images
if not localized_images:
localized_images = item["images"][key]
# Step 4: If there are no images at all, fallback to _path attribute.
if not localized_images:
localized_images = self._get_image_fallback(item, key)
return self.get_image(localized_images)
def _get_image_fallback(self, item: dict[str, Any], key: str) -> list[dict]:
"""
Fallback to _path attribute if there are no images available in the images list.
"""
if key == "posters":
return [{"file_path": item.get("poster_path")}]
elif key == "backdrops":
return [{"file_path": item.get("backdrop_path")}]
return []
async def search_movie(self, name: str, year: Optional[int]) -> Movie: async def search_movie(self, name: str, year: Optional[int]) -> Movie:
search_results = ( search_results = (
@ -208,94 +152,107 @@ class TheMovieDatabase(Provider):
search["id"], original_language=original_language search["id"], original_language=original_language
) )
async def identify_movie( @override
self, movie_id: str, original_language: Optional[Language] = None async def get_movie(self, external_id: dict[str, str]) -> Movie | None:
) -> Movie: # TODO: fallback to search via another id
languages = self.get_languages() if self.name not in external_id:
return None
async def for_language(lng: Language) -> Movie: movie = await self.get(
movie = await self.get( f"movie/{external_id[self.name]}",
f"movie/{movie_id}", params={
params={ "append_to_response": "alternative_titles,videos,credits,keywords,images,translations",
"language": lng.to_tag(), },
"append_to_response": "alternative_titles,videos,credits,keywords,images", )
"include_image_language": f"{lng.language},null,{original_language.language if original_language else ''}", logger.debug("TMDb responded: %s", movie)
},
)
logger.debug("TMDb responded: %s", movie)
ret = Movie( return Movie(
original_language=movie["original_language"], slug=to_slug(movie["title"]),
aliases=[x["title"] for x in movie["alternative_titles"]["titles"]], original_language=Language.get(movie["original_language"]),
air_date=datetime.strptime(movie["release_date"], "%Y-%m-%d").date() genres=self.process_genres(movie["genres"]),
if movie["release_date"] rating=round(float(movie["vote_average"]) * 10),
else None, status=MovieStatus.FINISHED
status=MovieStatus.FINISHED if movie["status"] == "Released"
if movie["status"] == "Released" else MovieStatus.PLANNED,
else MovieStatus.PLANNED, runtime=int(movie["runtime"]) if movie["runtime"] is not None else None,
rating=round(float(movie["vote_average"]) * 10), air_date=datetime.strptime(movie["release_date"], "%Y-%m-%d").date()
runtime=int(movie["runtime"]) if movie["runtime"] is not None else None, if movie["release_date"]
studios=[self.to_studio(x) for x in movie["production_companies"]], else None,
genres=self.process_genres(movie["genres"]), external_id=(
external_id=( {
self.name: MetadataId(
data_id=movie["id"],
link=f"https://www.themoviedb.org/movie/{movie['id']}",
)
}
| (
{ {
self.name: MetadataID( "imdb": MetadataId(
movie["id"], data_id=movie["imdb_id"],
f"https://www.themoviedb.org/movie/{movie['id']}", link=f"https://www.imdb.com/title/{movie['imdb_id']}",
) )
} }
| ( if movie["imdb_id"]
{ else {}
"imdb": MetadataID( )
movie["imdb_id"], ),
f"https://www.imdb.com/title/{movie['imdb_id']}", translations={
) Language.get(
} f"{trans['iso_639_1']}-{trans['iso_3166_1']}"
if movie["imdb_id"] ): MovieTranslation(
else {} name=clean(trans["data"]["title"])
or (
clean(movie["original_title"])
if movie["original_language"] == trans["iso_639_1"]
else None
) )
), or movie["title"],
collections=[ latin_name=next(
Collection( (
external_id={ x["title"]
self.name: MetadataID( for x in movie["alternative_titles"]["titles"]
movie["belongs_to_collection"]["id"], if x["iso_3166_1"] == trans["iso_3166_1"]
f"https://www.themoviedb.org/collection/{movie['belongs_to_collection']['id']}", and x["type"] == "Romaji"
) ),
}, None,
) ),
] description=clean(trans["data"]["overview"]),
if movie["belongs_to_collection"] is not None tagline=clean(trans["data"]["tagline"]),
else [], aliases=[
# TODO: Add cast information x["title"]
) for x in movie["alternative_titles"]["titles"]
translation = MovieTranslation( if x["iso_3166_1"] == trans["iso_3166_1"]
name=movie["title"], ],
tagline=movie["tagline"] if movie["tagline"] else None, tags=[x["name"] for x in movie["keywords"]["keywords"]],
tags=list(map(lambda x: x["name"], movie["keywords"]["keywords"])), poster=self._pick_image(movie, trans["iso_639_1"], "posters"),
overview=movie["overview"], logo=self._pick_image(movie, trans["iso_639_1"], "logos"),
posters=self.get_best_image(movie, lng, "posters"), banner=None,
logos=self.get_best_image(movie, lng, "logos"), thumbnail=self._pick_image(movie, trans["iso_639_1"], "backdrops"),
thumbnails=self.get_best_image(movie, lng, "backdrops"), trailer=None,
trailers=[ # TODO: should the trailer be added? or all of them as extra?
f"https://www.youtube.com/watch?v={x['key']}" # [
for x in movie["videos"]["results"] # f"https://www.youtube.com/watch?v={x['key']}"
if x["type"] == "Trailer" and x["site"] == "YouTube" # for x in movie["videos"]["results"]
], # if x["type"] == "Trailer" and x["site"] == "YouTube"
) # ],
ret.translations = {lng.to_tag(): translation} )
return ret for trans in movie["translations"]["translations"]
},
ret = await self.process_translations(for_language, languages) collections=[
if ( # Collection(
ret.original_language is not None # external_id={
and ret.original_language not in ret.translations # self.name: MetadataID(
): # movie["belongs_to_collection"]["id"],
orig_language = Language.get(ret.original_language) # f"https://www.themoviedb.org/collection/{movie['belongs_to_collection']['id']}",
ret.translations[orig_language.to_tag()] = ( # )
await for_language(orig_language) # },
).translations[orig_language.to_tag()] # )
return ret ]
if movie["belongs_to_collection"] is not None
else [],
studios=[self.to_studio(x) for x in movie["production_companies"]],
staff=[],
)
@cache(ttl=timedelta(days=1)) @cache(ttl=timedelta(days=1))
async def identify_show( async def identify_show(
@ -363,9 +320,9 @@ class TheMovieDatabase(Provider):
tagline=show["tagline"] if show["tagline"] else None, tagline=show["tagline"] if show["tagline"] else None,
tags=list(map(lambda x: x["name"], show["keywords"]["results"])), tags=list(map(lambda x: x["name"], show["keywords"]["results"])),
overview=show["overview"], overview=show["overview"],
posters=self.get_best_image(show, lng, "posters"), posters=self._pick_image(show, lng, "posters"),
logos=self.get_best_image(show, lng, "logos"), logos=self._pick_image(show, lng, "logos"),
thumbnails=self.get_best_image(show, lng, "backdrops"), thumbnails=self._pick_image(show, lng, "backdrops"),
trailers=[ trailers=[
f"https://www.youtube.com/watch?v={x['key']}" f"https://www.youtube.com/watch?v={x['key']}"
for x in show["videos"]["results"] for x in show["videos"]["results"]
@ -759,9 +716,9 @@ class TheMovieDatabase(Provider):
translation = CollectionTranslation( translation = CollectionTranslation(
name=collection["name"], name=collection["name"],
overview=collection["overview"], overview=collection["overview"],
posters=self.get_best_image(collection, lng, "posters"), posters=self._pick_image(collection, lng, "posters"),
logos=[], logos=[],
thumbnails=self.get_best_image(collection, lng, "backdrops"), thumbnails=self._pick_image(collection, lng, "backdrops"),
) )
ret.translations = {lng.to_tag(): translation} ret.translations = {lng.to_tag(): translation}
return ret return ret

View File

@ -1,22 +1,19 @@
from datetime import date
from langcodes import Language from langcodes import Language
from pydantic import AliasGenerator, BaseModel, ConfigDict from pydantic import AliasGenerator, BaseModel, ConfigDict
from pydantic.alias_generators import to_camel from pydantic.alias_generators import to_camel
def format_date(date: date | int | None) -> str | None:
if date is None:
return None
if isinstance(date, int):
return f"{date}-01-01"
return date.isoformat()
def normalize_lang(lang: str) -> str: def normalize_lang(lang: str) -> str:
return str(Language.get(lang)) return str(Language.get(lang))
def to_slug(title: str) -> str:
return title
def clean(val: str) -> str | None:
return val or None
class Model(BaseModel): class Model(BaseModel):
model_config = ConfigDict( model_config = ConfigDict(
use_enum_values=True, use_enum_values=True,