Add sub and metadata extraction on the new transcoder

This commit is contained in:
Zoe Roux 2023-07-30 23:09:11 +09:00
parent f12c1053ca
commit fdc537d69a
9 changed files with 199 additions and 36 deletions

View File

@ -1,5 +1,5 @@
# Useful config options
LIBRARY_ROOT=/video
LIBRARY_ROOT=./video
CACHE_ROOT=/tmp/kyoo_cache
LIBRARY_LANGUAGES=en

View File

@ -63,6 +63,7 @@ services:
- ./transcoder:/app
- ${LIBRARY_ROOT}:/video
- ${CACHE_ROOT}:/cache
- metadata:/metadata
ingress:
image: nginx
@ -99,3 +100,4 @@ services:
volumes:
kyoo:
db:
metadata:

View File

@ -41,6 +41,7 @@ services:
volumes:
- ${LIBRARY_ROOT}:/video
- ${CACHE_ROOT}:/cache
- metadata:/metadata
ingress:
image: nginx
@ -73,3 +74,4 @@ services:
volumes:
kyoo:
db:
metadata:

View File

@ -41,6 +41,7 @@ services:
volumes:
- ${LIBRARY_ROOT}:/video
- ${CACHE_ROOT}:/cache
- metadata:/metadata
ingress:
image: nginx
@ -73,3 +74,4 @@ services:
volumes:
kyoo:
db:
metadata:

12
transcoder/Cargo.lock generated
View File

@ -1112,6 +1112,17 @@ dependencies = [
"serde",
]
[[package]]
name = "sha-1"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f5058ada175748e33390e40e872bd0fe59a19f265d0158daa551c5a88a76009c"
dependencies = [
"cfg-if",
"cpufeatures",
"digest",
]
[[package]]
name = "sha1"
version = "0.10.5"
@ -1306,6 +1317,7 @@ dependencies = [
"rand",
"reqwest",
"serde",
"sha-1",
"tokio",
"utoipa",
]

View File

@ -13,3 +13,4 @@ derive_more = "0.99.17"
reqwest = { version = "0.11.16", default_features = false, features = ["json", "rustls-tls"] }
utoipa = { version = "3", features = ["actix_extras"] }
json = "0.12.4"
sha-1 = "0.10.1"

View File

@ -1,6 +1,12 @@
use json::JsonValue;
use serde::Serialize;
use std::str::{self, FromStr};
use sha1::{Sha1, Digest};
use std::{
fs,
path::PathBuf,
process::Stdio,
str::{self, FromStr}, io,
};
use tokio::process::Command;
use utoipa::ToSchema;
@ -8,18 +14,21 @@ use crate::transcode::Quality;
#[derive(Serialize, ToSchema)]
pub struct MediaInfo {
pub sha: String,
/// The internal path of this track.
pub path: String,
/// The length of the media in seconds.
pub length: f32,
pub container: String,
pub video: VideoTrack,
pub audios: Vec<Track>,
pub subtitles: Vec<Track>,
pub video: Video,
pub audios: Vec<Audio>,
pub subtitles: Vec<Subtitle>,
pub fonts: Vec<String>,
pub chapters: Vec<Chapter>,
}
#[derive(Serialize, ToSchema)]
pub struct VideoTrack {
pub struct Video {
/// The codec of this stream (defined as the RFC 6381).
pub codec: String,
/// The language of this stream (as a ISO-639-2 language code)
@ -35,7 +44,7 @@ pub struct VideoTrack {
}
#[derive(Serialize, ToSchema)]
pub struct Track {
pub struct Audio {
/// The index of this track on the media.
pub index: u32,
/// The title of the stream.
@ -50,6 +59,24 @@ pub struct Track {
pub forced: bool,
}
#[derive(Serialize, ToSchema)]
pub struct Subtitle {
/// The index of this track on the media.
pub index: u32,
/// The title of the stream.
pub title: Option<String>,
/// The language of this stream (as a ISO-639-2 language code)
pub language: Option<String>,
/// The codec of this stream.
pub codec: String,
/// Is this stream the default one of it's type?
pub default: bool,
/// Is this stream tagged as forced? (useful only for subtitles)
pub forced: bool,
/// The link to access this subtitle.
pub link: String,
}
#[derive(Serialize, ToSchema)]
pub struct Chapter {
/// The start time of the chapter (in second from the start of the episode).
@ -60,22 +87,80 @@ pub struct Chapter {
pub name: String, // TODO: add a type field for Opening, Credits...
}
async fn extract(path: String, sha: &String, subs: &Vec<Subtitle>) {
let mut cmd = Command::new("ffmpeg");
cmd.current_dir(format!("/metadata/{sha}/att/"))
.args(&["-dump_attachment:t", ""])
.args(&["-i", path.as_str()]);
for sub in subs {
cmd.args(&[
"-map",
format!("0:s:{idx}", idx = sub.index).as_str(),
"-c:s",
"copy",
format!(
"/metadata/{sha}/sub/{idx}.{ext}",
idx = sub.index,
ext = sub.codec
)
.as_str(),
]);
}
println!("Starting extraction with the command: {:?}", cmd);
cmd.stdout(Stdio::null())
.spawn()
.expect("Error starting ffmpeg extract")
.wait()
.await
.expect("Error running ffmpeg extract");
}
pub async fn identify(path: String) -> Result<MediaInfo, std::io::Error> {
let mediainfo = Command::new("mediainfo")
.arg("--Output=JSON")
.arg("--Language=raw")
.arg(path)
.arg(path.clone())
.output()
.await
.expect("Error running the mediainfo command");
assert!(mediainfo.status.success());
let output = json::parse(str::from_utf8(mediainfo.stdout.as_slice()).unwrap()).unwrap();
let mut file = fs::File::open(&path)?;
let mut hasher = Sha1::new();
io::copy(&mut file, &mut hasher)?;
let sha = format!("{:x}", hasher.finalize());
let general = output["media"]["track"]
.members()
.find(|x| x["@type"] == "General")
.unwrap();
let subs: Vec<Subtitle> = output["media"]["track"]
.members()
.filter(|x| x["@type"] == "Text")
.map(|a| {
let index = parse::<u32>(&a["@typeorder"]).unwrap() - 1;
let codec = a["Format"].as_str().unwrap().to_string().to_lowercase();
Subtitle {
link: format!("/video/{sha}/subtitle/{index}.{codec}"),
index,
title: a["Title"].as_str().map(|x| x.to_string()),
language: a["Language"].as_str().map(|x| x.to_string()),
codec,
default: a["Default"] == "Yes",
forced: a["Forced"] == "No",
}
})
.collect();
if !PathBuf::from(format!("/metadata/{sha}")).exists() {
std::fs::create_dir_all(format!("/metadata/{sha}/att"))?;
std::fs::create_dir_all(format!("/metadata/{sha}/sub"))?;
extract(path.clone(), &sha, &subs).await;
}
fn parse<F: FromStr>(v: &JsonValue) -> Option<F> {
v.as_str().and_then(|x| x.parse::<F>().ok())
}
@ -88,7 +173,7 @@ pub async fn identify(path: String) -> Result<MediaInfo, std::io::Error> {
.members()
.find(|x| x["@type"] == "Video")
.expect("File without video found. This is not supported");
VideoTrack {
Video {
// This codec is not in the right format (does not include bitdepth...).
codec: v["Format"].as_str().unwrap().to_string(),
language: v["Language"].as_str().map(|x| x.to_string()),
@ -102,7 +187,7 @@ pub async fn identify(path: String) -> Result<MediaInfo, std::io::Error> {
audios: output["media"]["track"]
.members()
.filter(|x| x["@type"] == "Audio")
.map(|a| Track {
.map(|a| Audio {
index: parse::<u32>(&a["StreamOrder"]).unwrap() - 1,
title: a["Title"].as_str().map(|x| x.to_string()),
language: a["Language"].as_str().map(|x| x.to_string()),
@ -112,20 +197,12 @@ pub async fn identify(path: String) -> Result<MediaInfo, std::io::Error> {
forced: a["Forced"] == "No",
})
.collect(),
subtitles: output["media"]["track"]
.members()
.filter(|x| x["@type"] == "Text")
.map(|a| Track {
index: parse::<u32>(&a["StreamOrder"]).unwrap() - 1,
title: a["Title"].as_str().map(|x| x.to_string()),
language: a["Language"].as_str().map(|x| x.to_string()),
// TODO: format is invalid. Channels count missing...
codec: a["Format"].as_str().unwrap().to_string(),
default: a["Default"] == "Yes",
forced: a["Forced"] == "No",
})
subtitles: subs,
fonts: general["extra"]["Attachments"]
.to_string()
.split(" / ")
.map(|x| format!("/video/{sha}/attachment/{x}"))
.collect(),
fonts: vec![],
chapters: output["media"]["track"]
.members()
.find(|x| x["@type"] == "Menu")
@ -139,6 +216,8 @@ pub async fn identify(path: String) -> Result<MediaInfo, std::io::Error> {
.collect()
})
.unwrap_or(vec![]),
sha,
path,
})
}

View File

@ -1,3 +1,5 @@
use std::path::PathBuf;
use actix_files::NamedFile;
use actix_web::{
get,
@ -9,7 +11,7 @@ use utoipa::OpenApi;
use crate::{
audio::*,
identify::{identify, Chapter, MediaInfo, Track},
identify::{identify, Chapter, MediaInfo, Video, Audio, Subtitle},
state::Transcoder,
video::*,
};
@ -37,14 +39,20 @@ mod video;
)
)]
#[get("/{resource}/{slug}/direct")]
async fn get_direct(query: web::Path<(String, String)>) -> Result<NamedFile> {
async fn get_direct(query: web::Path<(String, String)>) -> Result<NamedFile, ApiError> {
let (resource, slug) = query.into_inner();
let path = paths::get_path(resource, slug).await.map_err(|e| {
eprintln!("Unhandled error occured while getting the path: {}", e);
ApiError::NotFound
})?;
Ok(NamedFile::open_async(path).await?)
NamedFile::open_async(path).await.map_err(|e| {
eprintln!(
"Unhandled error occured while openning the direct stream: {}",
e
);
ApiError::InternalError
})
}
/// Get master playlist
@ -96,13 +104,66 @@ async fn identify_resource(
.await
.map_err(|_| ApiError::NotFound)?;
identify(path).await.map(|info| Json(info)).map_err(|e| {
eprintln!(
"Unhandled error occured while identifing the resource: {}",
e
);
ApiError::InternalError
})
identify(path)
.await
.map(|info| Json(info))
.map_err(|e| {
eprintln!(
"Unhandled error occured while identifing the resource: {}",
e
);
ApiError::InternalError
})
}
/// Get attachments
///
/// Get a specific attachment
#[utoipa::path(
responses(
(status = 200, description = "Ok", body = MediaInfo),
(status = NOT_FOUND, description = "Invalid slug.")
),
params(
("sha" = String, Path, description = "The sha1 of the file"),
("name" = String, Path, description = "The name of the attachment."),
)
)]
#[get("/{sha}/attachment/{name}")]
async fn get_attachment(query: web::Path<(String, String)>) -> Result<NamedFile, ApiError> {
let (sha, name) = query.into_inner();
let mut attpath = PathBuf::from("/metadata");
attpath.push(sha);
attpath.push("att");
attpath.push(name);
NamedFile::open_async(attpath)
.await
.map_err(|_| ApiError::NotFound)
}
/// Get subtitle
///
/// Get a specific subtitle
#[utoipa::path(
responses(
(status = 200, description = "Ok", body = MediaInfo),
(status = NOT_FOUND, description = "Invalid slug.")
),
params(
("sha" = String, Path, description = "The sha1 of the file"),
("name" = String, Path, description = "The name of the subtitle."),
)
)]
#[get("/{sha}/subtitle/{name}")]
async fn get_subtitle(query: web::Path<(String, String)>) -> Result<NamedFile, ApiError> {
let (sha, name) = query.into_inner();
let mut subpath = PathBuf::from("/metadata");
subpath.push(sha);
subpath.push("sub");
subpath.push(name);
NamedFile::open_async(subpath)
.await
.map_err(|_| ApiError::NotFound)
}
#[get("/openapi.json")]
@ -117,9 +178,11 @@ async fn get_swagger() -> String {
get_chunk,
get_audio_transcoded,
get_audio_chunk,
identify_resource
identify_resource,
get_attachment,
get_subtitle,
),
components(schemas(MediaInfo, Track, Chapter))
components(schemas(MediaInfo, Video, Audio, Subtitle, Chapter))
)]
struct ApiDoc;
@ -146,6 +209,8 @@ async fn main() -> std::io::Result<()> {
.service(get_audio_chunk)
.service(identify_resource)
.service(get_swagger)
.service(get_attachment)
.service(get_subtitle)
})
.bind(("0.0.0.0", 7666))?
.run()

View File

@ -157,7 +157,7 @@ fn get_transcode_video_quality_args(quality: &Quality, segment_time: u32) -> Vec
vec![
// superfast or ultrafast would produce a file extremly big so we prever veryfast.
vec![
"-map", "0:v:0", "-c:v", "libx264", "-crf", "21", "-preset", "veryfast",
"-map", "0:V:0", "-c:v", "libx264", "-crf", "21", "-preset", "veryfast",
],
vec![
"-vf",