Add sub and metadata extraction on the new transcoder

This commit is contained in:
Zoe Roux 2023-07-30 23:09:11 +09:00
parent f12c1053ca
commit fdc537d69a
9 changed files with 199 additions and 36 deletions

View File

@ -1,5 +1,5 @@
# Useful config options # Useful config options
LIBRARY_ROOT=/video LIBRARY_ROOT=./video
CACHE_ROOT=/tmp/kyoo_cache CACHE_ROOT=/tmp/kyoo_cache
LIBRARY_LANGUAGES=en LIBRARY_LANGUAGES=en

View File

@ -63,6 +63,7 @@ services:
- ./transcoder:/app - ./transcoder:/app
- ${LIBRARY_ROOT}:/video - ${LIBRARY_ROOT}:/video
- ${CACHE_ROOT}:/cache - ${CACHE_ROOT}:/cache
- metadata:/metadata
ingress: ingress:
image: nginx image: nginx
@ -99,3 +100,4 @@ services:
volumes: volumes:
kyoo: kyoo:
db: db:
metadata:

View File

@ -41,6 +41,7 @@ services:
volumes: volumes:
- ${LIBRARY_ROOT}:/video - ${LIBRARY_ROOT}:/video
- ${CACHE_ROOT}:/cache - ${CACHE_ROOT}:/cache
- metadata:/metadata
ingress: ingress:
image: nginx image: nginx
@ -73,3 +74,4 @@ services:
volumes: volumes:
kyoo: kyoo:
db: db:
metadata:

View File

@ -41,6 +41,7 @@ services:
volumes: volumes:
- ${LIBRARY_ROOT}:/video - ${LIBRARY_ROOT}:/video
- ${CACHE_ROOT}:/cache - ${CACHE_ROOT}:/cache
- metadata:/metadata
ingress: ingress:
image: nginx image: nginx
@ -73,3 +74,4 @@ services:
volumes: volumes:
kyoo: kyoo:
db: db:
metadata:

12
transcoder/Cargo.lock generated
View File

@ -1112,6 +1112,17 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "sha-1"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f5058ada175748e33390e40e872bd0fe59a19f265d0158daa551c5a88a76009c"
dependencies = [
"cfg-if",
"cpufeatures",
"digest",
]
[[package]] [[package]]
name = "sha1" name = "sha1"
version = "0.10.5" version = "0.10.5"
@ -1306,6 +1317,7 @@ dependencies = [
"rand", "rand",
"reqwest", "reqwest",
"serde", "serde",
"sha-1",
"tokio", "tokio",
"utoipa", "utoipa",
] ]

View File

@ -13,3 +13,4 @@ derive_more = "0.99.17"
reqwest = { version = "0.11.16", default_features = false, features = ["json", "rustls-tls"] } reqwest = { version = "0.11.16", default_features = false, features = ["json", "rustls-tls"] }
utoipa = { version = "3", features = ["actix_extras"] } utoipa = { version = "3", features = ["actix_extras"] }
json = "0.12.4" json = "0.12.4"
sha-1 = "0.10.1"

View File

@ -1,6 +1,12 @@
use json::JsonValue; use json::JsonValue;
use serde::Serialize; use serde::Serialize;
use std::str::{self, FromStr}; use sha1::{Sha1, Digest};
use std::{
fs,
path::PathBuf,
process::Stdio,
str::{self, FromStr}, io,
};
use tokio::process::Command; use tokio::process::Command;
use utoipa::ToSchema; use utoipa::ToSchema;
@ -8,18 +14,21 @@ use crate::transcode::Quality;
#[derive(Serialize, ToSchema)] #[derive(Serialize, ToSchema)]
pub struct MediaInfo { pub struct MediaInfo {
pub sha: String,
/// The internal path of this track.
pub path: String,
/// The length of the media in seconds. /// The length of the media in seconds.
pub length: f32, pub length: f32,
pub container: String, pub container: String,
pub video: VideoTrack, pub video: Video,
pub audios: Vec<Track>, pub audios: Vec<Audio>,
pub subtitles: Vec<Track>, pub subtitles: Vec<Subtitle>,
pub fonts: Vec<String>, pub fonts: Vec<String>,
pub chapters: Vec<Chapter>, pub chapters: Vec<Chapter>,
} }
#[derive(Serialize, ToSchema)] #[derive(Serialize, ToSchema)]
pub struct VideoTrack { pub struct Video {
/// The codec of this stream (defined as the RFC 6381). /// The codec of this stream (defined as the RFC 6381).
pub codec: String, pub codec: String,
/// The language of this stream (as a ISO-639-2 language code) /// The language of this stream (as a ISO-639-2 language code)
@ -35,7 +44,7 @@ pub struct VideoTrack {
} }
#[derive(Serialize, ToSchema)] #[derive(Serialize, ToSchema)]
pub struct Track { pub struct Audio {
/// The index of this track on the media. /// The index of this track on the media.
pub index: u32, pub index: u32,
/// The title of the stream. /// The title of the stream.
@ -50,6 +59,24 @@ pub struct Track {
pub forced: bool, pub forced: bool,
} }
#[derive(Serialize, ToSchema)]
pub struct Subtitle {
/// The index of this track on the media.
pub index: u32,
/// The title of the stream.
pub title: Option<String>,
/// The language of this stream (as a ISO-639-2 language code)
pub language: Option<String>,
/// The codec of this stream.
pub codec: String,
/// Is this stream the default one of it's type?
pub default: bool,
/// Is this stream tagged as forced? (useful only for subtitles)
pub forced: bool,
/// The link to access this subtitle.
pub link: String,
}
#[derive(Serialize, ToSchema)] #[derive(Serialize, ToSchema)]
pub struct Chapter { pub struct Chapter {
/// The start time of the chapter (in second from the start of the episode). /// The start time of the chapter (in second from the start of the episode).
@ -60,22 +87,80 @@ pub struct Chapter {
pub name: String, // TODO: add a type field for Opening, Credits... pub name: String, // TODO: add a type field for Opening, Credits...
} }
async fn extract(path: String, sha: &String, subs: &Vec<Subtitle>) {
let mut cmd = Command::new("ffmpeg");
cmd.current_dir(format!("/metadata/{sha}/att/"))
.args(&["-dump_attachment:t", ""])
.args(&["-i", path.as_str()]);
for sub in subs {
cmd.args(&[
"-map",
format!("0:s:{idx}", idx = sub.index).as_str(),
"-c:s",
"copy",
format!(
"/metadata/{sha}/sub/{idx}.{ext}",
idx = sub.index,
ext = sub.codec
)
.as_str(),
]);
}
println!("Starting extraction with the command: {:?}", cmd);
cmd.stdout(Stdio::null())
.spawn()
.expect("Error starting ffmpeg extract")
.wait()
.await
.expect("Error running ffmpeg extract");
}
pub async fn identify(path: String) -> Result<MediaInfo, std::io::Error> { pub async fn identify(path: String) -> Result<MediaInfo, std::io::Error> {
let mediainfo = Command::new("mediainfo") let mediainfo = Command::new("mediainfo")
.arg("--Output=JSON") .arg("--Output=JSON")
.arg("--Language=raw") .arg("--Language=raw")
.arg(path) .arg(path.clone())
.output() .output()
.await .await
.expect("Error running the mediainfo command"); .expect("Error running the mediainfo command");
assert!(mediainfo.status.success()); assert!(mediainfo.status.success());
let output = json::parse(str::from_utf8(mediainfo.stdout.as_slice()).unwrap()).unwrap(); let output = json::parse(str::from_utf8(mediainfo.stdout.as_slice()).unwrap()).unwrap();
let mut file = fs::File::open(&path)?;
let mut hasher = Sha1::new();
io::copy(&mut file, &mut hasher)?;
let sha = format!("{:x}", hasher.finalize());
let general = output["media"]["track"] let general = output["media"]["track"]
.members() .members()
.find(|x| x["@type"] == "General") .find(|x| x["@type"] == "General")
.unwrap(); .unwrap();
let subs: Vec<Subtitle> = output["media"]["track"]
.members()
.filter(|x| x["@type"] == "Text")
.map(|a| {
let index = parse::<u32>(&a["@typeorder"]).unwrap() - 1;
let codec = a["Format"].as_str().unwrap().to_string().to_lowercase();
Subtitle {
link: format!("/video/{sha}/subtitle/{index}.{codec}"),
index,
title: a["Title"].as_str().map(|x| x.to_string()),
language: a["Language"].as_str().map(|x| x.to_string()),
codec,
default: a["Default"] == "Yes",
forced: a["Forced"] == "No",
}
})
.collect();
if !PathBuf::from(format!("/metadata/{sha}")).exists() {
std::fs::create_dir_all(format!("/metadata/{sha}/att"))?;
std::fs::create_dir_all(format!("/metadata/{sha}/sub"))?;
extract(path.clone(), &sha, &subs).await;
}
fn parse<F: FromStr>(v: &JsonValue) -> Option<F> { fn parse<F: FromStr>(v: &JsonValue) -> Option<F> {
v.as_str().and_then(|x| x.parse::<F>().ok()) v.as_str().and_then(|x| x.parse::<F>().ok())
} }
@ -88,7 +173,7 @@ pub async fn identify(path: String) -> Result<MediaInfo, std::io::Error> {
.members() .members()
.find(|x| x["@type"] == "Video") .find(|x| x["@type"] == "Video")
.expect("File without video found. This is not supported"); .expect("File without video found. This is not supported");
VideoTrack { Video {
// This codec is not in the right format (does not include bitdepth...). // This codec is not in the right format (does not include bitdepth...).
codec: v["Format"].as_str().unwrap().to_string(), codec: v["Format"].as_str().unwrap().to_string(),
language: v["Language"].as_str().map(|x| x.to_string()), language: v["Language"].as_str().map(|x| x.to_string()),
@ -102,7 +187,7 @@ pub async fn identify(path: String) -> Result<MediaInfo, std::io::Error> {
audios: output["media"]["track"] audios: output["media"]["track"]
.members() .members()
.filter(|x| x["@type"] == "Audio") .filter(|x| x["@type"] == "Audio")
.map(|a| Track { .map(|a| Audio {
index: parse::<u32>(&a["StreamOrder"]).unwrap() - 1, index: parse::<u32>(&a["StreamOrder"]).unwrap() - 1,
title: a["Title"].as_str().map(|x| x.to_string()), title: a["Title"].as_str().map(|x| x.to_string()),
language: a["Language"].as_str().map(|x| x.to_string()), language: a["Language"].as_str().map(|x| x.to_string()),
@ -112,20 +197,12 @@ pub async fn identify(path: String) -> Result<MediaInfo, std::io::Error> {
forced: a["Forced"] == "No", forced: a["Forced"] == "No",
}) })
.collect(), .collect(),
subtitles: output["media"]["track"] subtitles: subs,
.members() fonts: general["extra"]["Attachments"]
.filter(|x| x["@type"] == "Text") .to_string()
.map(|a| Track { .split(" / ")
index: parse::<u32>(&a["StreamOrder"]).unwrap() - 1, .map(|x| format!("/video/{sha}/attachment/{x}"))
title: a["Title"].as_str().map(|x| x.to_string()),
language: a["Language"].as_str().map(|x| x.to_string()),
// TODO: format is invalid. Channels count missing...
codec: a["Format"].as_str().unwrap().to_string(),
default: a["Default"] == "Yes",
forced: a["Forced"] == "No",
})
.collect(), .collect(),
fonts: vec![],
chapters: output["media"]["track"] chapters: output["media"]["track"]
.members() .members()
.find(|x| x["@type"] == "Menu") .find(|x| x["@type"] == "Menu")
@ -139,6 +216,8 @@ pub async fn identify(path: String) -> Result<MediaInfo, std::io::Error> {
.collect() .collect()
}) })
.unwrap_or(vec![]), .unwrap_or(vec![]),
sha,
path,
}) })
} }

View File

@ -1,3 +1,5 @@
use std::path::PathBuf;
use actix_files::NamedFile; use actix_files::NamedFile;
use actix_web::{ use actix_web::{
get, get,
@ -9,7 +11,7 @@ use utoipa::OpenApi;
use crate::{ use crate::{
audio::*, audio::*,
identify::{identify, Chapter, MediaInfo, Track}, identify::{identify, Chapter, MediaInfo, Video, Audio, Subtitle},
state::Transcoder, state::Transcoder,
video::*, video::*,
}; };
@ -37,14 +39,20 @@ mod video;
) )
)] )]
#[get("/{resource}/{slug}/direct")] #[get("/{resource}/{slug}/direct")]
async fn get_direct(query: web::Path<(String, String)>) -> Result<NamedFile> { async fn get_direct(query: web::Path<(String, String)>) -> Result<NamedFile, ApiError> {
let (resource, slug) = query.into_inner(); let (resource, slug) = query.into_inner();
let path = paths::get_path(resource, slug).await.map_err(|e| { let path = paths::get_path(resource, slug).await.map_err(|e| {
eprintln!("Unhandled error occured while getting the path: {}", e); eprintln!("Unhandled error occured while getting the path: {}", e);
ApiError::NotFound ApiError::NotFound
})?; })?;
Ok(NamedFile::open_async(path).await?) NamedFile::open_async(path).await.map_err(|e| {
eprintln!(
"Unhandled error occured while openning the direct stream: {}",
e
);
ApiError::InternalError
})
} }
/// Get master playlist /// Get master playlist
@ -96,13 +104,66 @@ async fn identify_resource(
.await .await
.map_err(|_| ApiError::NotFound)?; .map_err(|_| ApiError::NotFound)?;
identify(path).await.map(|info| Json(info)).map_err(|e| { identify(path)
eprintln!( .await
"Unhandled error occured while identifing the resource: {}", .map(|info| Json(info))
e .map_err(|e| {
); eprintln!(
ApiError::InternalError "Unhandled error occured while identifing the resource: {}",
}) e
);
ApiError::InternalError
})
}
/// Get attachments
///
/// Get a specific attachment
#[utoipa::path(
responses(
(status = 200, description = "Ok", body = MediaInfo),
(status = NOT_FOUND, description = "Invalid slug.")
),
params(
("sha" = String, Path, description = "The sha1 of the file"),
("name" = String, Path, description = "The name of the attachment."),
)
)]
#[get("/{sha}/attachment/{name}")]
async fn get_attachment(query: web::Path<(String, String)>) -> Result<NamedFile, ApiError> {
let (sha, name) = query.into_inner();
let mut attpath = PathBuf::from("/metadata");
attpath.push(sha);
attpath.push("att");
attpath.push(name);
NamedFile::open_async(attpath)
.await
.map_err(|_| ApiError::NotFound)
}
/// Get subtitle
///
/// Get a specific subtitle
#[utoipa::path(
responses(
(status = 200, description = "Ok", body = MediaInfo),
(status = NOT_FOUND, description = "Invalid slug.")
),
params(
("sha" = String, Path, description = "The sha1 of the file"),
("name" = String, Path, description = "The name of the subtitle."),
)
)]
#[get("/{sha}/subtitle/{name}")]
async fn get_subtitle(query: web::Path<(String, String)>) -> Result<NamedFile, ApiError> {
let (sha, name) = query.into_inner();
let mut subpath = PathBuf::from("/metadata");
subpath.push(sha);
subpath.push("sub");
subpath.push(name);
NamedFile::open_async(subpath)
.await
.map_err(|_| ApiError::NotFound)
} }
#[get("/openapi.json")] #[get("/openapi.json")]
@ -117,9 +178,11 @@ async fn get_swagger() -> String {
get_chunk, get_chunk,
get_audio_transcoded, get_audio_transcoded,
get_audio_chunk, get_audio_chunk,
identify_resource identify_resource,
get_attachment,
get_subtitle,
), ),
components(schemas(MediaInfo, Track, Chapter)) components(schemas(MediaInfo, Video, Audio, Subtitle, Chapter))
)] )]
struct ApiDoc; struct ApiDoc;
@ -146,6 +209,8 @@ async fn main() -> std::io::Result<()> {
.service(get_audio_chunk) .service(get_audio_chunk)
.service(identify_resource) .service(identify_resource)
.service(get_swagger) .service(get_swagger)
.service(get_attachment)
.service(get_subtitle)
}) })
.bind(("0.0.0.0", 7666))? .bind(("0.0.0.0", 7666))?
.run() .run()

View File

@ -157,7 +157,7 @@ fn get_transcode_video_quality_args(quality: &Quality, segment_time: u32) -> Vec
vec![ vec![
// superfast or ultrafast would produce a file extremly big so we prever veryfast. // superfast or ultrafast would produce a file extremly big so we prever veryfast.
vec![ vec![
"-map", "0:v:0", "-c:v", "libx264", "-crf", "21", "-preset", "veryfast", "-map", "0:V:0", "-c:v", "libx264", "-crf", "21", "-preset", "veryfast",
], ],
vec![ vec![
"-vf", "-vf",