mirror of
https://github.com/zoriya/Kyoo.git
synced 2025-07-09 03:04:20 -04:00
Add sub and metadata extraction on the new transcoder
This commit is contained in:
parent
f12c1053ca
commit
fdc537d69a
@ -1,5 +1,5 @@
|
||||
# Useful config options
|
||||
LIBRARY_ROOT=/video
|
||||
LIBRARY_ROOT=./video
|
||||
CACHE_ROOT=/tmp/kyoo_cache
|
||||
LIBRARY_LANGUAGES=en
|
||||
|
||||
|
@ -63,6 +63,7 @@ services:
|
||||
- ./transcoder:/app
|
||||
- ${LIBRARY_ROOT}:/video
|
||||
- ${CACHE_ROOT}:/cache
|
||||
- metadata:/metadata
|
||||
|
||||
ingress:
|
||||
image: nginx
|
||||
@ -99,3 +100,4 @@ services:
|
||||
volumes:
|
||||
kyoo:
|
||||
db:
|
||||
metadata:
|
||||
|
@ -41,6 +41,7 @@ services:
|
||||
volumes:
|
||||
- ${LIBRARY_ROOT}:/video
|
||||
- ${CACHE_ROOT}:/cache
|
||||
- metadata:/metadata
|
||||
|
||||
ingress:
|
||||
image: nginx
|
||||
@ -73,3 +74,4 @@ services:
|
||||
volumes:
|
||||
kyoo:
|
||||
db:
|
||||
metadata:
|
||||
|
@ -41,6 +41,7 @@ services:
|
||||
volumes:
|
||||
- ${LIBRARY_ROOT}:/video
|
||||
- ${CACHE_ROOT}:/cache
|
||||
- metadata:/metadata
|
||||
|
||||
ingress:
|
||||
image: nginx
|
||||
@ -73,3 +74,4 @@ services:
|
||||
volumes:
|
||||
kyoo:
|
||||
db:
|
||||
metadata:
|
||||
|
12
transcoder/Cargo.lock
generated
12
transcoder/Cargo.lock
generated
@ -1112,6 +1112,17 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sha-1"
|
||||
version = "0.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f5058ada175748e33390e40e872bd0fe59a19f265d0158daa551c5a88a76009c"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"cpufeatures",
|
||||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sha1"
|
||||
version = "0.10.5"
|
||||
@ -1306,6 +1317,7 @@ dependencies = [
|
||||
"rand",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"sha-1",
|
||||
"tokio",
|
||||
"utoipa",
|
||||
]
|
||||
|
@ -13,3 +13,4 @@ derive_more = "0.99.17"
|
||||
reqwest = { version = "0.11.16", default_features = false, features = ["json", "rustls-tls"] }
|
||||
utoipa = { version = "3", features = ["actix_extras"] }
|
||||
json = "0.12.4"
|
||||
sha-1 = "0.10.1"
|
||||
|
@ -1,6 +1,12 @@
|
||||
use json::JsonValue;
|
||||
use serde::Serialize;
|
||||
use std::str::{self, FromStr};
|
||||
use sha1::{Sha1, Digest};
|
||||
use std::{
|
||||
fs,
|
||||
path::PathBuf,
|
||||
process::Stdio,
|
||||
str::{self, FromStr}, io,
|
||||
};
|
||||
use tokio::process::Command;
|
||||
use utoipa::ToSchema;
|
||||
|
||||
@ -8,18 +14,21 @@ use crate::transcode::Quality;
|
||||
|
||||
#[derive(Serialize, ToSchema)]
|
||||
pub struct MediaInfo {
|
||||
pub sha: String,
|
||||
/// The internal path of this track.
|
||||
pub path: String,
|
||||
/// The length of the media in seconds.
|
||||
pub length: f32,
|
||||
pub container: String,
|
||||
pub video: VideoTrack,
|
||||
pub audios: Vec<Track>,
|
||||
pub subtitles: Vec<Track>,
|
||||
pub video: Video,
|
||||
pub audios: Vec<Audio>,
|
||||
pub subtitles: Vec<Subtitle>,
|
||||
pub fonts: Vec<String>,
|
||||
pub chapters: Vec<Chapter>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, ToSchema)]
|
||||
pub struct VideoTrack {
|
||||
pub struct Video {
|
||||
/// The codec of this stream (defined as the RFC 6381).
|
||||
pub codec: String,
|
||||
/// The language of this stream (as a ISO-639-2 language code)
|
||||
@ -35,7 +44,7 @@ pub struct VideoTrack {
|
||||
}
|
||||
|
||||
#[derive(Serialize, ToSchema)]
|
||||
pub struct Track {
|
||||
pub struct Audio {
|
||||
/// The index of this track on the media.
|
||||
pub index: u32,
|
||||
/// The title of the stream.
|
||||
@ -50,6 +59,24 @@ pub struct Track {
|
||||
pub forced: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize, ToSchema)]
|
||||
pub struct Subtitle {
|
||||
/// The index of this track on the media.
|
||||
pub index: u32,
|
||||
/// The title of the stream.
|
||||
pub title: Option<String>,
|
||||
/// The language of this stream (as a ISO-639-2 language code)
|
||||
pub language: Option<String>,
|
||||
/// The codec of this stream.
|
||||
pub codec: String,
|
||||
/// Is this stream the default one of it's type?
|
||||
pub default: bool,
|
||||
/// Is this stream tagged as forced? (useful only for subtitles)
|
||||
pub forced: bool,
|
||||
/// The link to access this subtitle.
|
||||
pub link: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, ToSchema)]
|
||||
pub struct Chapter {
|
||||
/// The start time of the chapter (in second from the start of the episode).
|
||||
@ -60,22 +87,80 @@ pub struct Chapter {
|
||||
pub name: String, // TODO: add a type field for Opening, Credits...
|
||||
}
|
||||
|
||||
async fn extract(path: String, sha: &String, subs: &Vec<Subtitle>) {
|
||||
let mut cmd = Command::new("ffmpeg");
|
||||
cmd.current_dir(format!("/metadata/{sha}/att/"))
|
||||
.args(&["-dump_attachment:t", ""])
|
||||
.args(&["-i", path.as_str()]);
|
||||
for sub in subs {
|
||||
cmd.args(&[
|
||||
"-map",
|
||||
format!("0:s:{idx}", idx = sub.index).as_str(),
|
||||
"-c:s",
|
||||
"copy",
|
||||
format!(
|
||||
"/metadata/{sha}/sub/{idx}.{ext}",
|
||||
idx = sub.index,
|
||||
ext = sub.codec
|
||||
)
|
||||
.as_str(),
|
||||
]);
|
||||
}
|
||||
println!("Starting extraction with the command: {:?}", cmd);
|
||||
cmd.stdout(Stdio::null())
|
||||
.spawn()
|
||||
.expect("Error starting ffmpeg extract")
|
||||
.wait()
|
||||
.await
|
||||
.expect("Error running ffmpeg extract");
|
||||
}
|
||||
|
||||
pub async fn identify(path: String) -> Result<MediaInfo, std::io::Error> {
|
||||
let mediainfo = Command::new("mediainfo")
|
||||
.arg("--Output=JSON")
|
||||
.arg("--Language=raw")
|
||||
.arg(path)
|
||||
.arg(path.clone())
|
||||
.output()
|
||||
.await
|
||||
.expect("Error running the mediainfo command");
|
||||
assert!(mediainfo.status.success());
|
||||
let output = json::parse(str::from_utf8(mediainfo.stdout.as_slice()).unwrap()).unwrap();
|
||||
|
||||
let mut file = fs::File::open(&path)?;
|
||||
let mut hasher = Sha1::new();
|
||||
io::copy(&mut file, &mut hasher)?;
|
||||
let sha = format!("{:x}", hasher.finalize());
|
||||
|
||||
|
||||
let general = output["media"]["track"]
|
||||
.members()
|
||||
.find(|x| x["@type"] == "General")
|
||||
.unwrap();
|
||||
|
||||
let subs: Vec<Subtitle> = output["media"]["track"]
|
||||
.members()
|
||||
.filter(|x| x["@type"] == "Text")
|
||||
.map(|a| {
|
||||
let index = parse::<u32>(&a["@typeorder"]).unwrap() - 1;
|
||||
let codec = a["Format"].as_str().unwrap().to_string().to_lowercase();
|
||||
Subtitle {
|
||||
link: format!("/video/{sha}/subtitle/{index}.{codec}"),
|
||||
index,
|
||||
title: a["Title"].as_str().map(|x| x.to_string()),
|
||||
language: a["Language"].as_str().map(|x| x.to_string()),
|
||||
codec,
|
||||
default: a["Default"] == "Yes",
|
||||
forced: a["Forced"] == "No",
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
if !PathBuf::from(format!("/metadata/{sha}")).exists() {
|
||||
std::fs::create_dir_all(format!("/metadata/{sha}/att"))?;
|
||||
std::fs::create_dir_all(format!("/metadata/{sha}/sub"))?;
|
||||
extract(path.clone(), &sha, &subs).await;
|
||||
}
|
||||
|
||||
fn parse<F: FromStr>(v: &JsonValue) -> Option<F> {
|
||||
v.as_str().and_then(|x| x.parse::<F>().ok())
|
||||
}
|
||||
@ -88,7 +173,7 @@ pub async fn identify(path: String) -> Result<MediaInfo, std::io::Error> {
|
||||
.members()
|
||||
.find(|x| x["@type"] == "Video")
|
||||
.expect("File without video found. This is not supported");
|
||||
VideoTrack {
|
||||
Video {
|
||||
// This codec is not in the right format (does not include bitdepth...).
|
||||
codec: v["Format"].as_str().unwrap().to_string(),
|
||||
language: v["Language"].as_str().map(|x| x.to_string()),
|
||||
@ -102,7 +187,7 @@ pub async fn identify(path: String) -> Result<MediaInfo, std::io::Error> {
|
||||
audios: output["media"]["track"]
|
||||
.members()
|
||||
.filter(|x| x["@type"] == "Audio")
|
||||
.map(|a| Track {
|
||||
.map(|a| Audio {
|
||||
index: parse::<u32>(&a["StreamOrder"]).unwrap() - 1,
|
||||
title: a["Title"].as_str().map(|x| x.to_string()),
|
||||
language: a["Language"].as_str().map(|x| x.to_string()),
|
||||
@ -112,20 +197,12 @@ pub async fn identify(path: String) -> Result<MediaInfo, std::io::Error> {
|
||||
forced: a["Forced"] == "No",
|
||||
})
|
||||
.collect(),
|
||||
subtitles: output["media"]["track"]
|
||||
.members()
|
||||
.filter(|x| x["@type"] == "Text")
|
||||
.map(|a| Track {
|
||||
index: parse::<u32>(&a["StreamOrder"]).unwrap() - 1,
|
||||
title: a["Title"].as_str().map(|x| x.to_string()),
|
||||
language: a["Language"].as_str().map(|x| x.to_string()),
|
||||
// TODO: format is invalid. Channels count missing...
|
||||
codec: a["Format"].as_str().unwrap().to_string(),
|
||||
default: a["Default"] == "Yes",
|
||||
forced: a["Forced"] == "No",
|
||||
})
|
||||
subtitles: subs,
|
||||
fonts: general["extra"]["Attachments"]
|
||||
.to_string()
|
||||
.split(" / ")
|
||||
.map(|x| format!("/video/{sha}/attachment/{x}"))
|
||||
.collect(),
|
||||
fonts: vec![],
|
||||
chapters: output["media"]["track"]
|
||||
.members()
|
||||
.find(|x| x["@type"] == "Menu")
|
||||
@ -139,6 +216,8 @@ pub async fn identify(path: String) -> Result<MediaInfo, std::io::Error> {
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or(vec![]),
|
||||
sha,
|
||||
path,
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -1,3 +1,5 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use actix_files::NamedFile;
|
||||
use actix_web::{
|
||||
get,
|
||||
@ -9,7 +11,7 @@ use utoipa::OpenApi;
|
||||
|
||||
use crate::{
|
||||
audio::*,
|
||||
identify::{identify, Chapter, MediaInfo, Track},
|
||||
identify::{identify, Chapter, MediaInfo, Video, Audio, Subtitle},
|
||||
state::Transcoder,
|
||||
video::*,
|
||||
};
|
||||
@ -37,14 +39,20 @@ mod video;
|
||||
)
|
||||
)]
|
||||
#[get("/{resource}/{slug}/direct")]
|
||||
async fn get_direct(query: web::Path<(String, String)>) -> Result<NamedFile> {
|
||||
async fn get_direct(query: web::Path<(String, String)>) -> Result<NamedFile, ApiError> {
|
||||
let (resource, slug) = query.into_inner();
|
||||
let path = paths::get_path(resource, slug).await.map_err(|e| {
|
||||
eprintln!("Unhandled error occured while getting the path: {}", e);
|
||||
ApiError::NotFound
|
||||
})?;
|
||||
|
||||
Ok(NamedFile::open_async(path).await?)
|
||||
NamedFile::open_async(path).await.map_err(|e| {
|
||||
eprintln!(
|
||||
"Unhandled error occured while openning the direct stream: {}",
|
||||
e
|
||||
);
|
||||
ApiError::InternalError
|
||||
})
|
||||
}
|
||||
|
||||
/// Get master playlist
|
||||
@ -96,13 +104,66 @@ async fn identify_resource(
|
||||
.await
|
||||
.map_err(|_| ApiError::NotFound)?;
|
||||
|
||||
identify(path).await.map(|info| Json(info)).map_err(|e| {
|
||||
eprintln!(
|
||||
"Unhandled error occured while identifing the resource: {}",
|
||||
e
|
||||
);
|
||||
ApiError::InternalError
|
||||
})
|
||||
identify(path)
|
||||
.await
|
||||
.map(|info| Json(info))
|
||||
.map_err(|e| {
|
||||
eprintln!(
|
||||
"Unhandled error occured while identifing the resource: {}",
|
||||
e
|
||||
);
|
||||
ApiError::InternalError
|
||||
})
|
||||
}
|
||||
|
||||
/// Get attachments
|
||||
///
|
||||
/// Get a specific attachment
|
||||
#[utoipa::path(
|
||||
responses(
|
||||
(status = 200, description = "Ok", body = MediaInfo),
|
||||
(status = NOT_FOUND, description = "Invalid slug.")
|
||||
),
|
||||
params(
|
||||
("sha" = String, Path, description = "The sha1 of the file"),
|
||||
("name" = String, Path, description = "The name of the attachment."),
|
||||
)
|
||||
)]
|
||||
#[get("/{sha}/attachment/{name}")]
|
||||
async fn get_attachment(query: web::Path<(String, String)>) -> Result<NamedFile, ApiError> {
|
||||
let (sha, name) = query.into_inner();
|
||||
let mut attpath = PathBuf::from("/metadata");
|
||||
attpath.push(sha);
|
||||
attpath.push("att");
|
||||
attpath.push(name);
|
||||
NamedFile::open_async(attpath)
|
||||
.await
|
||||
.map_err(|_| ApiError::NotFound)
|
||||
}
|
||||
|
||||
/// Get subtitle
|
||||
///
|
||||
/// Get a specific subtitle
|
||||
#[utoipa::path(
|
||||
responses(
|
||||
(status = 200, description = "Ok", body = MediaInfo),
|
||||
(status = NOT_FOUND, description = "Invalid slug.")
|
||||
),
|
||||
params(
|
||||
("sha" = String, Path, description = "The sha1 of the file"),
|
||||
("name" = String, Path, description = "The name of the subtitle."),
|
||||
)
|
||||
)]
|
||||
#[get("/{sha}/subtitle/{name}")]
|
||||
async fn get_subtitle(query: web::Path<(String, String)>) -> Result<NamedFile, ApiError> {
|
||||
let (sha, name) = query.into_inner();
|
||||
let mut subpath = PathBuf::from("/metadata");
|
||||
subpath.push(sha);
|
||||
subpath.push("sub");
|
||||
subpath.push(name);
|
||||
NamedFile::open_async(subpath)
|
||||
.await
|
||||
.map_err(|_| ApiError::NotFound)
|
||||
}
|
||||
|
||||
#[get("/openapi.json")]
|
||||
@ -117,9 +178,11 @@ async fn get_swagger() -> String {
|
||||
get_chunk,
|
||||
get_audio_transcoded,
|
||||
get_audio_chunk,
|
||||
identify_resource
|
||||
identify_resource,
|
||||
get_attachment,
|
||||
get_subtitle,
|
||||
),
|
||||
components(schemas(MediaInfo, Track, Chapter))
|
||||
components(schemas(MediaInfo, Video, Audio, Subtitle, Chapter))
|
||||
)]
|
||||
struct ApiDoc;
|
||||
|
||||
@ -146,6 +209,8 @@ async fn main() -> std::io::Result<()> {
|
||||
.service(get_audio_chunk)
|
||||
.service(identify_resource)
|
||||
.service(get_swagger)
|
||||
.service(get_attachment)
|
||||
.service(get_subtitle)
|
||||
})
|
||||
.bind(("0.0.0.0", 7666))?
|
||||
.run()
|
||||
|
@ -157,7 +157,7 @@ fn get_transcode_video_quality_args(quality: &Quality, segment_time: u32) -> Vec
|
||||
vec![
|
||||
// superfast or ultrafast would produce a file extremly big so we prever veryfast.
|
||||
vec![
|
||||
"-map", "0:v:0", "-c:v", "libx264", "-crf", "21", "-preset", "veryfast",
|
||||
"-map", "0:V:0", "-c:v", "libx264", "-crf", "21", "-preset", "veryfast",
|
||||
],
|
||||
vec![
|
||||
"-vf",
|
||||
|
Loading…
x
Reference in New Issue
Block a user