diff --git a/benchmarks/200.multimedia/225.video-watermarking-gpu/.gitkeep b/benchmarks/200.multimedia/225.video-watermarking-gpu/.gitkeep new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/benchmarks/200.multimedia/225.video-watermarking-gpu/.gitkeep @@ -0,0 +1 @@ + diff --git a/benchmarks/200.multimedia/225.video-watermarking-gpu/Dockerfile b/benchmarks/200.multimedia/225.video-watermarking-gpu/Dockerfile new file mode 100644 index 00000000..516dcff0 --- /dev/null +++ b/benchmarks/200.multimedia/225.video-watermarking-gpu/Dockerfile @@ -0,0 +1,15 @@ +# NVENC-enabled FFmpeg base image +FROM jrottenberg/ffmpeg:6.1-nvidia + +# Python for gpu_bench.py +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3 python3-pip python3-venv ca-certificates && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR /app +COPY gpu_bench.py /app/gpu_bench.py +COPY run.sh /app/run.sh +RUN chmod +x /app/gpu_bench.py /app/run.sh + +# default entrypoint lets SeBS simply "docker run" it +ENTRYPOINT ["/app/run.sh"] diff --git a/benchmarks/200.multimedia/225.video-watermarking-gpu/Dockerfile.cpu b/benchmarks/200.multimedia/225.video-watermarking-gpu/Dockerfile.cpu new file mode 100644 index 00000000..6c85bfd9 --- /dev/null +++ b/benchmarks/200.multimedia/225.video-watermarking-gpu/Dockerfile.cpu @@ -0,0 +1,14 @@ +# Multi-arch FFmpeg base (CPU only) +FROM jrottenberg/ffmpeg:6.1-ubuntu + +# Python for gpu_bench.py +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3 python3-pip ca-certificates && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR /app +COPY gpu_bench.py /app/gpu_bench.py +COPY run.sh /app/run.sh +RUN chmod +x /app/gpu_bench.py /app/run.sh + +ENTRYPOINT ["/app/run.sh"] \ No newline at end of file diff --git a/benchmarks/200.multimedia/225.video-watermarking-gpu/gpu_bench.py b/benchmarks/200.multimedia/225.video-watermarking-gpu/gpu_bench.py new file mode 100644 index 00000000..7cf1d3d0 --- /dev/null +++ b/benchmarks/200.multimedia/225.video-watermarking-gpu/gpu_bench.py @@ -0,0 +1,307 @@ +#!/usr/bin/env python3 +import argparse, datetime, json, os, re, shutil, subprocess, sys, tempfile, csv +from typing import List, Dict, Any, Optional, Tuple + +# --- helpers --------------------------------------------------------------- + +def which_ffmpeg() -> str: + p = shutil.which("ffmpeg") + if not p: + sys.exit("ffmpeg not found on PATH. Use Docker image with NVENC or install FFmpeg with NVENC.") + return p + +def run(cmd: List[str]) -> subprocess.CompletedProcess: + return subprocess.run(cmd, stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) + +def has_encoder(ffmpeg: str, enc: str) -> bool: + out = run([ffmpeg, "-hide_banner", "-encoders"]).stdout + return re.search(rf"\b{re.escape(enc)}\b", out) is not None + +def has_filter(ffmpeg: str, name: str) -> bool: + out = run([ffmpeg, "-hide_banner", "-filters"]).stdout + return (f" {name} " in out) + +def gpu_info() -> Dict[str, Any]: + try: + out = run(["nvidia-smi", "--query-gpu=name,memory.total,driver_version", "--format=csv,noheader,nounits"]).stdout.strip() + name, mem, drv = [x.strip() for x in out.splitlines()[0].split(",")] + return {"name": name, "memory_total_mb": int(mem), "driver_version": drv} + except Exception: + return {"name": None, "memory_total_mb": None, "driver_version": None} + +def parse_progress(log: str) -> Dict[str, Any]: + lines = [ln for ln in log.splitlines() if ("fps=" in ln or "speed=" in ln or "frame=" in ln)] + fps = speed = frames = None + if lines: + last = lines[-1] + m = re.search(r"fps=\s*([0-9]+(?:\.[0-9]+)?)", last); fps = float(m.group(1)) if m else None + m = re.search(r"speed=\s*([0-9]+(?:\.[0-9]+)?)x", last); speed = float(m.group(1)) if m else None + m = re.search(r"frame=\s*([0-9]+)", last); frames = int(m.group(1)) if m else None + return {"fps": fps, "speed_x": speed, "frames": frames} + +# --- filter planning ------------------------------------------------------- + +def build_vf_or_complex( + ffmpeg: str, + scale: Optional[str], + wm_path: Optional[str], + overlay: str, + want_gpu_decode: bool +) -> Tuple[List[str], str]: + """ + Returns (ffmpeg_args_for_filters, filter_used_string). + CPU path: never uses hw* or *_cuda filters. + GPU path: prefer scale_npp -> scale_cuda -> CPU scale with bridges; prefer overlay_cuda. + """ + used: List[str] = [] + vf_args: List[str] = [] + complex_graph = "" + + # ---------- CPU-ONLY SHORT-CIRCUIT ---------- + if not want_gpu_decode: + if not wm_path: + if scale: + return (["-vf", f"scale={scale}"], "scale(cpu)") + return ([], "") + # watermark present + if scale: + complex_graph = f"[0:v]scale={scale}[v0];[v0][1:v]overlay={overlay}[vout]" + used = ["scale(cpu)", "overlay(cpu)"] + else: + complex_graph = f"[0:v][1:v]overlay={overlay}[vout]" + used = ["overlay(cpu)"] + return (["-filter_complex", complex_graph, "-map", "[vout]"], "+".join(used)) + # ------------------------------------------- + + # From here on: GPU-preferred path + have_scale_npp = has_filter(ffmpeg, "scale_npp") + have_scale_cuda = has_filter(ffmpeg, "scale_cuda") + have_overlay_cuda = has_filter(ffmpeg, "overlay_cuda") + + # No watermark case + if not wm_path: + if scale: + if have_scale_npp: + vf_args = ["-vf", f"scale_npp={scale}"]; used.append("scale_npp") + elif have_scale_cuda: + vf_args = ["-vf", f"scale_cuda={scale}"]; used.append("scale_cuda") + else: + vf_args = ["-vf", f"hwdownload,format=nv12,scale={scale},hwupload_cuda"] + used.append("scale(cpu)+hwdownload+hwupload_cuda") + else: + vf_args = [] + return (vf_args, "+".join(used)) + + # Watermark case with GPU overlay if available + if have_overlay_cuda: + if scale and have_scale_npp: + complex_graph = f"[0:v]scale_npp={scale}[v0];[v0][1:v]overlay_cuda={overlay}[vout]" + used += ["scale_npp","overlay_cuda"] + elif scale and have_scale_cuda: + complex_graph = f"[0:v]scale_cuda={scale}[v0];[v0][1:v]overlay_cuda={overlay}[vout]" + used += ["scale_cuda","overlay_cuda"] + elif scale: + complex_graph = ( + f"[0:v]hwdownload,format=nv12,scale={scale},hwupload_cuda[v0];" + f"[v0][1:v]overlay_cuda={overlay}[vout]" + ) + used += ["scale(cpu)+hwdownload+hwupload_cuda","overlay_cuda"] + else: + complex_graph = f"[0:v][1:v]overlay_cuda={overlay}[vout]" + used += ["overlay_cuda"] + return (["-filter_complex", complex_graph, "-map", "[vout]"], "+".join(used)) + + # GPU decode + CPU overlay fallback (bridged) + if scale and (have_scale_npp or have_scale_cuda): + scaler = "scale_npp" if have_scale_npp else "scale_cuda" + complex_graph = ( + f"[0:v]{scaler}={scale}[v0gpu];" + f"[v0gpu]hwdownload,format=nv12[v0cpu];" + f"[v0cpu][1:v]overlay={overlay}[mix];" + f"[mix]hwupload_cuda[vout]" + ) + used += [scaler, "hwdownload+overlay(cpu)+hwupload_cuda"] + elif scale: + complex_graph = ( + f"[0:v]hwdownload,format=nv12,scale={scale}[v0cpu];" + f"[v0cpu][1:v]overlay={overlay}[mix];" + f"[mix]hwupload_cuda[vout]" + ) + used += ["scale(cpu)+overlay(cpu)+hwupload_cuda"] + else: + complex_graph = ( + f"[0:v]hwdownload,format=nv12[v0cpu];" + f"[v0cpu][1:v]overlay={overlay}[mix];" + f"[mix]hwupload_cuda[vout]" + ) + used += ["overlay(cpu)+hwupload_cuda"] + + return (["-filter_complex", complex_graph, "-map", "[vout]"], "+".join(used)) + +# --- core ------------------------------------------------------------------ + +def transcode_once( + ffmpeg: str, + inp: str, + outp: str, + codec: str, + bitrate: str, + preset: str, + duration: Optional[float], + scale: Optional[str], + wm_path: Optional[str], + overlay_pos: str, + decode_mode: str = "gpu" # "gpu" or "cpu" +) -> Dict[str, Any]: + + if not has_encoder(ffmpeg, codec): + raise RuntimeError(f"encoder '{codec}' not available; check your ffmpeg build (NVENC/AV1).") + + want_gpu_decode = (decode_mode == "gpu") + + args = [ffmpeg, "-hide_banner", "-y", "-vsync", "0"] + + if want_gpu_decode: + # Keep decode on GPU & use CUDA frames. Give NVDEC extra surfaces. + args += ["-hwaccel", "cuda", "-hwaccel_output_format", "cuda", "-extra_hw_frames", "16"] + # Helpful on some builds to make filters pick the right device + args += ["-init_hw_device", "cuda=cuda", "-filter_hw_device", "cuda"] + + # inputs + args += ["-i", inp] + if wm_path: + args += ["-loop", "1", "-i", wm_path] + + if duration: + args += ["-t", str(duration)] + + # Build filters + filt_args, filter_used = build_vf_or_complex(ffmpeg, scale, wm_path, overlay_pos, want_gpu_decode) + args += filt_args + + # encoder params + args += ["-c:v", codec, "-b:v", bitrate, "-preset", preset] + # Only NVENC supports -rc vbr + if codec.endswith("_nvenc"): + args += ["-rc", "vbr"] + args += ["-movflags", "+faststart"] + + # audio: copy if present + args += ["-c:a", "copy"] + + # Output path + args += [outp] + + t0 = datetime.datetime.now() + proc = run(args) + t1 = datetime.datetime.now() + if proc.returncode != 0: + raise RuntimeError("ffmpeg failed:\n" + proc.stdout + f"\n\nARGS:\n{' '.join(args)}") + + parsed = parse_progress(proc.stdout) + size = os.path.getsize(outp) if os.path.exists(outp) else 0 + return { + "args": args, + "filter_used": filter_used, + "stdout_tail": "\n".join(proc.stdout.splitlines()[-15:]), + "compute_time_us": (t1 - t0) / datetime.timedelta(microseconds=1), + "fps": parsed["fps"], + "speed_x": parsed["speed_x"], + "frames": parsed["frames"], + "output_size_bytes": size + } + +def main(): + ap = argparse.ArgumentParser(description="GPU NVENC benchmark.") + ap.add_argument("--input", required=True, help="Path to input video") + ap.add_argument("--duration", type=float, default=None, help="Trim to first N seconds") + ap.add_argument("--repeat", type=int, default=1, help="Repeat each trial") + ap.add_argument("--warmup", action="store_true", help="Run one warmup trial (not recorded)") + ap.add_argument("--csv", default=None, help="Optional path to write CSV summary") + ap.add_argument("--watermark", default=None, help="Path to watermark PNG (optional)") + ap.add_argument("--overlay", default="main_w/2-overlay_w/2:main_h/2-overlay_h/2", + help="Overlay position (ffmpeg expr), e.g. '10:10' or 'main_w-overlay_w-10:10'") + ap.add_argument("--decode", choices=["gpu","cpu"], default="gpu", + help="Decode on GPU (default) or CPU.") + ap.add_argument("--trials", nargs="+", default=[ + "codec=h264_nvenc,bitrate=5M,preset=p5", + "codec=h264_nvenc,bitrate=12M,preset=p1,scale=1920:1080", + "codec=hevc_nvenc,bitrate=6M,preset=p4", + # "codec=av1_nvenc,bitrate=3M,preset=p5", # include only if available + ], help="List like codec=h264_nvenc,bitrate=5M,preset=p5[,scale=WxH]") + args = ap.parse_args() + + ffmpeg = which_ffmpeg() + gi = gpu_info() + + def parse_trial(s: str) -> Dict[str, str]: + d: Dict[str, str] = {} + for kv in s.split(","): + k, v = kv.split("=", 1) + d[k.strip()] = v.strip() + return d + + trial_specs = [parse_trial(s) for s in args.trials] + + # optional warmup (uses first trial spec) + if args.warmup and trial_specs: + with tempfile.NamedTemporaryFile(suffix=".mp4", delete=True) as tmp: + _ = transcode_once(ffmpeg, args.input, tmp.name, + trial_specs[0].get("codec","h264_nvenc"), + trial_specs[0].get("bitrate","5M"), + trial_specs[0].get("preset","p5"), + args.duration, + trial_specs[0].get("scale"), + args.watermark, + args.overlay, + args.decode) + + results: List[Dict[str, Any]] = [] + idx = 0 + for spec in trial_specs: + for _ in range(args.repeat): + with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp: + outp = tmp.name + res = transcode_once(ffmpeg, args.input, outp, + spec.get("codec","h264_nvenc"), + spec.get("bitrate","5M"), + spec.get("preset","p5"), + args.duration, + spec.get("scale"), + args.watermark, + args.overlay, + args.decode) + results.append({ + "trial_index": idx, + "codec": spec.get("codec"), + "bitrate": spec.get("bitrate"), + "preset": spec.get("preset"), + "scale_filter": res["filter_used"], + "fps": res["fps"], + "speed_x": res["speed_x"], + "frames": res["frames"], + "compute_time_us": res["compute_time_us"], + "output_size_bytes": res["output_size_bytes"], + "stdout_tail": res["stdout_tail"], + "argv": " ".join(res["args"]), + }) + idx += 1 + try: os.remove(outp) + except OSError: pass + + report = { + "gpu": gi, + "ffmpeg_path": ffmpeg, + "trial_count": len(results), + "results": results + } + print(json.dumps(report, indent=2)) + + if args.csv and results: + with open(args.csv, "w", newline="") as f: + w = csv.DictWriter(f, fieldnames=list(results[0].keys())) + w.writeheader() + w.writerows(results) + +if __name__ == "__main__": + main() diff --git a/benchmarks/200.multimedia/225.video-watermarking-gpu/out_cpu/results.csv b/benchmarks/200.multimedia/225.video-watermarking-gpu/out_cpu/results.csv new file mode 100644 index 00000000..adaa188e --- /dev/null +++ b/benchmarks/200.multimedia/225.video-watermarking-gpu/out_cpu/results.csv @@ -0,0 +1,16 @@ +trial_index,codec,bitrate,preset,scale_filter,fps,speed_x,frames,compute_time_us,output_size_bytes,stdout_tail,argv +0,libx264,5M,medium,scale(cpu)+overlay(cpu),119.0,3.87,150,1376883.0,1352563,"[libx264 @ 0x555556068500] mb I I16..4: 82.9% 7.8% 9.4% +[libx264 @ 0x555556068500] mb P I16..4: 6.7% 1.3% 0.5% P16..4: 5.9% 1.6% 1.6% 0.0% 0.0% skip:82.5% +[libx264 @ 0x555556068500] mb B I16..4: 4.1% 0.8% 0.3% B16..8: 3.3% 0.7% 0.2% direct: 3.7% skip:86.9% L0:47.7% L1:35.3% BI:17.0% +[libx264 @ 0x555556068500] final ratefactor: -9.69 +[libx264 @ 0x555556068500] 8x8 transform intra:14.5% inter:28.8% +[libx264 @ 0x555556068500] coded y,uvDC,uvAC intra: 21.5% 47.4% 45.3% inter: 3.6% 7.7% 7.2% +[libx264 @ 0x555556068500] i16 v,h,dc,p: 98% 1% 0% 1% +[libx264 @ 0x555556068500] i8 v,h,dc,ddl,ddr,vr,hd,vl,hu: 99% 0% 0% 0% 0% 0% 0% 0% 0% +[libx264 @ 0x555556068500] i4 v,h,dc,ddl,ddr,vr,hd,vl,hu: 85% 8% 5% 0% 0% 0% 0% 0% 0% +[libx264 @ 0x555556068500] i8c dc,h,v,p: 7% 1% 86% 5% +[libx264 @ 0x555556068500] Weighted P-Frames: Y:0.0% UV:0.0% +[libx264 @ 0x555556068500] ref P L0: 64.0% 1.3% 16.0% 18.7% +[libx264 @ 0x555556068500] ref B L0: 68.9% 13.3% 17.9% +[libx264 @ 0x555556068500] ref B L1: 91.4% 8.6% +[libx264 @ 0x555556068500] kb/s:2158.66",/usr/local/bin/ffmpeg -hide_banner -y -vsync 0 -i /data/sample.mp4 -loop 1 -i /data/watermark.png -t 5.0 -filter_complex [0:v]scale=1280:720[v0];[v0][1:v]overlay=main_w/2-overlay_w/2:main_h/2-overlay_h/2[vout] -map [vout] -c:v libx264 -b:v 5M -preset medium -movflags +faststart -c:a copy /tmp/tmpr3ivnhpo.mp4 diff --git a/benchmarks/200.multimedia/225.video-watermarking-gpu/read.me b/benchmarks/200.multimedia/225.video-watermarking-gpu/read.me new file mode 100644 index 00000000..9b817cd2 --- /dev/null +++ b/benchmarks/200.multimedia/225.video-watermarking-gpu/read.me @@ -0,0 +1,16 @@ +``` +# build on the NVIDIA host +docker build -t video-wm-gpu -f Dockerfile . + +# run with GPU, mounting your data and output dirs +docker run --rm --gpus all \ + -v /path/to/serverless-benchmarks-data-dphpc/200.multimedia/225.video-watermarking-gpu:/data:ro \ + -v $PWD/out_gpu:/out \ + -e INPUT=/data/sample.mp4 \ + -e WATERMARK=/data/watermark.png \ + -e DURATION=8 \ + -e REPEAT=1 \ + -e DECODE=gpu \ + -e CSV=/out/results.csv \ + video-wm-gpu + ``` \ No newline at end of file diff --git a/benchmarks/200.multimedia/225.video-watermarking-gpu/run.sh b/benchmarks/200.multimedia/225.video-watermarking-gpu/run.sh new file mode 100644 index 00000000..151cb916 --- /dev/null +++ b/benchmarks/200.multimedia/225.video-watermarking-gpu/run.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Default paths (mounted by SeBS local backend) +DATA_DIR="${DATA_DIR:-/data}" +OUT_DIR="${OUT_DIR:-/out}" + +INPUT="${INPUT:-${DATA_DIR}/sample.mp4}" +WATERMARK="${WATERMARK:-${DATA_DIR}/watermark.png}" # new line +DURATION="${DURATION:-8}" +REPEAT="${REPEAT:-1}" +CSV="${CSV:-${OUT_DIR}/results.csv}" +DECODE="${DECODE:-gpu}" # 'gpu' or 'cpu' + +mkdir -p "$OUT_DIR" + +echo "==[ Video Watermarking GPU Benchmark ]==" +echo "Input: $INPUT" +echo "Watermark: $WATERMARK" +echo "Duration: $DURATION s" +echo "Repeat: $REPEAT" +echo "Decode: $DECODE" +echo "Output CSV: $CSV" +echo + +# If INPUT is missing, let gpu_bench synthesize one +/app/gpu_bench.py \ + --input "$INPUT" \ + --watermark "$WATERMARK" \ + --duration "$DURATION" \ + --repeat "$REPEAT" \ + --decode "$DECODE" \ + --csv "$CSV" \ + --trials \ + "codec=h264_nvenc,bitrate=5M,preset=p5" \ + "codec=h264_nvenc,bitrate=12M,preset=p1,scale=1920:1080" \ + "codec=hevc_nvenc,bitrate=6M,preset=p4" \ + "codec=libx264,bitrate=5M,preset=medium,scale=1280:720" \ No newline at end of file diff --git a/watermarking_bench/.gitkeep b/watermarking_bench/.gitkeep new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/watermarking_bench/.gitkeep @@ -0,0 +1 @@ + diff --git a/watermarking_bench/gpu_bench.py b/watermarking_bench/gpu_bench.py new file mode 100644 index 00000000..0f45a3a1 --- /dev/null +++ b/watermarking_bench/gpu_bench.py @@ -0,0 +1,293 @@ +#!/usr/bin/env python3 +import argparse, datetime, json, os, re, shutil, subprocess, sys, tempfile, csv +from typing import List, Dict, Any, Optional, Tuple + +# --- helpers --------------------------------------------------------------- + +def which_ffmpeg() -> str: + p = shutil.which("ffmpeg") + if not p: + sys.exit("ffmpeg not found on PATH. Use Docker image with NVENC or install FFmpeg with NVENC.") + return p + +def run(cmd: List[str]) -> subprocess.CompletedProcess: + return subprocess.run(cmd, stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) + +def has_encoder(ffmpeg: str, enc: str) -> bool: + out = run([ffmpeg, "-hide_banner", "-encoders"]).stdout + return re.search(rf"\b{re.escape(enc)}\b", out) is not None + +def has_filter(ffmpeg: str, name: str) -> bool: + out = run([ffmpeg, "-hide_banner", "-filters"]).stdout + return (f" {name} " in out) + +def gpu_info() -> Dict[str, Any]: + try: + out = run(["nvidia-smi", "--query-gpu=name,memory.total,driver_version", "--format=csv,noheader,nounits"]).stdout.strip() + name, mem, drv = [x.strip() for x in out.splitlines()[0].split(",")] + return {"name": name, "memory_total_mb": int(mem), "driver_version": drv} + except Exception: + return {"name": None, "memory_total_mb": None, "driver_version": None} + +def parse_progress(log: str) -> Dict[str, Any]: + lines = [ln for ln in log.splitlines() if ("fps=" in ln or "speed=" in ln or "frame=" in ln)] + fps = speed = frames = None + if lines: + last = lines[-1] + m = re.search(r"fps=\s*([0-9]+(?:\.[0-9]+)?)", last); fps = float(m.group(1)) if m else None + m = re.search(r"speed=\s*([0-9]+(?:\.[0-9]+)?)x", last); speed = float(m.group(1)) if m else None + m = re.search(r"frame=\s*([0-9]+)", last); frames = int(m.group(1)) if m else None + return {"fps": fps, "speed_x": speed, "frames": frames} + +# --- filter planning ------------------------------------------------------- + +def build_vf_or_complex( + ffmpeg: str, + scale: Optional[str], + wm_path: Optional[str], + overlay: str, + want_gpu_decode: bool +) -> Tuple[List[str], str]: + """ + Returns (ffmpeg_args_for_filters, filter_used_string). + + Priority: + - Prefer GPU filters: scale_npp, then scale_cuda, then CPU scale with explicit bridges. + - Prefer overlay_cuda; else CPU overlay with explicit bridges. + - Never place 'format=nv12' *after* 'hwupload_cuda'. + """ + used = [] + vf_args: List[str] = [] + complex_graph = "" + + have_scale_npp = has_filter(ffmpeg, "scale_npp") + have_scale_cuda = has_filter(ffmpeg, "scale_cuda") + have_overlay_cuda= has_filter(ffmpeg, "overlay_cuda") + + # No watermark case + if not wm_path: + if scale: + if want_gpu_decode and have_scale_npp: + vf_args = ["-vf", f"scale_npp={scale}"] + used.append("scale_npp") + elif want_gpu_decode and have_scale_cuda: + vf_args = ["-vf", f"scale_cuda={scale}"] + used.append("scale_cuda") + else: + # CPU scale with explicit bridges + # hw frames -> CPU: hwdownload,format=nv12 + # CPU scale -> back to GPU: hwupload_cuda + vf_args = ["-vf", f"hwdownload,format=nv12,scale={scale},hwupload_cuda"] + used.append("scale(cpu)+hwdownload+hwupload_cuda") + else: + vf_args = [] + return (vf_args, "+".join(used)) + + # Watermark case + if want_gpu_decode and have_overlay_cuda: + if scale and have_scale_npp: + complex_graph = f"[0:v]scale_npp={scale}[v0];[v0][1:v]overlay_cuda={overlay}[vout]" + used += ["scale_npp","overlay_cuda"] + elif scale and have_scale_cuda: + complex_graph = f"[0:v]scale_cuda={scale}[v0];[v0][1:v]overlay_cuda={overlay}[vout]" + used += ["scale_cuda","overlay_cuda"] + elif scale: + complex_graph = ( + f"[0:v]hwdownload,format=nv12,scale={scale},hwupload_cuda[v0];" + f"[v0][1:v]overlay_cuda={overlay}[vout]" + ) + used += ["scale(cpu)+hwdownload+hwupload_cuda","overlay_cuda"] + else: + complex_graph = f"[0:v][1:v]overlay_cuda={overlay}[vout]" + used += ["overlay_cuda"] + return (["-filter_complex", complex_graph, "-map", "[vout]"], "+".join(used)) + + # CPU overlay fallback + if scale and want_gpu_decode and (have_scale_npp or have_scale_cuda): + scaler = "scale_npp" if have_scale_npp else "scale_cuda" + complex_graph = ( + f"[0:v]{scaler}={scale}[v0gpu];" + f"[v0gpu]hwdownload,format=nv12[v0cpu];" + f"[v0cpu][1:v]overlay={overlay}[mix];" + f"[mix]hwupload_cuda[vout]" + ) + used += [scaler, "hwdownload+overlay(cpu)+hwupload_cuda"] + elif scale: + complex_graph = ( + f"[0:v]hwdownload,format=nv12,scale={scale}[v0cpu];" + f"[v0cpu][1:v]overlay={overlay}[mix];" + f"[mix]hwupload_cuda[vout]" + ) + used += ["scale(cpu)+overlay(cpu)+hwupload_cuda"] + else: + complex_graph = ( + f"[0:v]hwdownload,format=nv12[v0cpu];" + f"[v0cpu][1:v]overlay={overlay}[mix];" + f"[mix]hwupload_cuda[vout]" + ) + used += ["overlay(cpu)+hwupload_cuda"] + + return (["-filter_complex", complex_graph, "-map", "[vout]"], "+".join(used)) + +# --- core ------------------------------------------------------------------ + +def transcode_once( + ffmpeg: str, + inp: str, + outp: str, + codec: str, + bitrate: str, + preset: str, + duration: Optional[float], + scale: Optional[str], + wm_path: Optional[str], + overlay_pos: str, + decode_mode: str = "gpu" # "gpu" or "cpu" +) -> Dict[str, Any]: + + if not has_encoder(ffmpeg, codec): + raise RuntimeError(f"encoder '{codec}' not available; check your ffmpeg build (NVENC/AV1).") + + want_gpu_decode = (decode_mode == "gpu") + + args = [ffmpeg, "-hide_banner", "-y", "-vsync", "0"] + + if want_gpu_decode: + # Keep decode on GPU & use CUDA frames. Give NVDEC extra surfaces. + args += ["-hwaccel", "cuda", "-hwaccel_output_format", "cuda", "-extra_hw_frames", "16"] + # Helpful on some builds to make filters pick the right device + args += ["-init_hw_device", "cuda=cuda", "-filter_hw_device", "cuda"] + + # inputs + args += ["-i", inp] + if wm_path: + args += ["-loop", "1", "-i", wm_path] + + if duration: + args += ["-t", str(duration)] + + # Build filters + filt_args, filter_used = build_vf_or_complex(ffmpeg, scale, wm_path, overlay_pos, want_gpu_decode) + args += filt_args + + # encoder params + args += ["-c:v", codec, "-b:v", bitrate, "-preset", preset, "-rc", "vbr", "-movflags", "+faststart"] + # audio: copy if present + args += ["-c:a", "copy"] + + # Output path + args += [outp] + + t0 = datetime.datetime.now() + proc = run(args) + t1 = datetime.datetime.now() + if proc.returncode != 0: + raise RuntimeError("ffmpeg failed:\n" + proc.stdout + f"\n\nARGS:\n{' '.join(args)}") + + parsed = parse_progress(proc.stdout) + size = os.path.getsize(outp) if os.path.exists(outp) else 0 + return { + "args": args, + "filter_used": filter_used, + "stdout_tail": "\n".join(proc.stdout.splitlines()[-15:]), + "compute_time_us": (t1 - t0) / datetime.timedelta(microseconds=1), + "fps": parsed["fps"], + "speed_x": parsed["speed_x"], + "frames": parsed["frames"], + "output_size_bytes": size + } + +def main(): + ap = argparse.ArgumentParser(description="GPU NVENC benchmark.") + ap.add_argument("--input", required=True, help="Path to input video") + ap.add_argument("--duration", type=float, default=None, help="Trim to first N seconds") + ap.add_argument("--repeat", type=int, default=1, help="Repeat each trial") + ap.add_argument("--warmup", action="store_true", help="Run one warmup trial (not recorded)") + ap.add_argument("--csv", default=None, help="Optional path to write CSV summary") + ap.add_argument("--watermark", default=None, help="Path to watermark PNG (optional)") + ap.add_argument("--overlay", default="main_w/2-overlay_w/2:main_h/2-overlay_h/2", + help="Overlay position (ffmpeg expr), e.g. '10:10' or 'main_w-overlay_w-10:10'") + ap.add_argument("--decode", choices=["gpu","cpu"], default="gpu", + help="Decode on GPU (default) or CPU.") + ap.add_argument("--trials", nargs="+", default=[ + "codec=h264_nvenc,bitrate=5M,preset=p5", + "codec=h264_nvenc,bitrate=12M,preset=p1,scale=1920:1080", + "codec=hevc_nvenc,bitrate=6M,preset=p4", + "codec=av1_nvenc,bitrate=3M,preset=p5" + ], help="List like codec=h264_nvenc,bitrate=5M,preset=p5[,scale=WxH]") + args = ap.parse_args() + + ffmpeg = which_ffmpeg() + gi = gpu_info() + + def parse_trial(s: str) -> Dict[str, str]: + d: Dict[str, str] = {} + for kv in s.split(","): + k, v = kv.split("=", 1) + d[k.strip()] = v.strip() + return d + + trial_specs = [parse_trial(s) for s in args.trials] + + # optional warmup + if args.warmup: + with tempfile.NamedTemporaryFile(suffix=".mp4", delete=True) as tmp: + _ = transcode_once(ffmpeg, args.input, tmp.name, + trial_specs[0].get("codec","h264_nvenc"), + trial_specs[0].get("bitrate","5M"), + trial_specs[0].get("preset","p5"), + args.duration, + trial_specs[0].get("scale"), + args.watermark, + args.overlay, + args.decode) + + results = [] + idx = 0 + for spec in trial_specs: + for _ in range(args.repeat): + with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp: + outp = tmp.name + res = transcode_once(ffmpeg, args.input, outp, + spec.get("codec","h264_nvenc"), + spec.get("bitrate","5M"), + spec.get("preset","p5"), + args.duration, + spec.get("scale"), + args.watermark, + args.overlay, + args.decode) + results.append({ + "trial_index": idx, + "codec": spec.get("codec"), + "bitrate": spec.get("bitrate"), + "preset": spec.get("preset"), + "scale_filter": res["filter_used"], + "fps": res["fps"], + "speed_x": res["speed_x"], + "frames": res["frames"], + "compute_time_us": res["compute_time_us"], + "output_size_bytes": res["output_size_bytes"], + "stdout_tail": res["stdout_tail"], + "argv": " ".join(res["args"]), + }) + idx += 1 + try: os.remove(outp) + except OSError: pass + + report = { + "gpu": gi, + "ffmpeg_path": ffmpeg, + "trial_count": len(results), + "results": results + } + print(json.dumps(report, indent=2)) + + if args.csv and results: + with open(args.csv, "w", newline="") as f: + w = csv.DictWriter(f, fieldnames=list(results[0].keys())) + w.writeheader() + w.writerows(results) + +if __name__ == "__main__": + main() diff --git a/watermarking_bench/read.me b/watermarking_bench/read.me new file mode 100644 index 00000000..efe5b1ab --- /dev/null +++ b/watermarking_bench/read.me @@ -0,0 +1,3 @@ +chmod +x run_nvenc_bench.sh +./run_nvenc_bench.sh # uses ~/bench/sample.mp4 (auto-creates) +./run_nvenc_bench.sh /path/video.mp4 # use your own file diff --git a/watermarking_bench/results.csv b/watermarking_bench/results.csv new file mode 100644 index 00000000..7dcd68aa --- /dev/null +++ b/watermarking_bench/results.csv @@ -0,0 +1,46 @@ +trial_index,codec,bitrate,preset,scale_filter,fps,speed_x,frames,compute_time_us,output_size_bytes,stdout_tail,argv +0,h264_nvenc,5M,p5,,73.0,2.44,240,5879259.0,2272623," Side data: + cpb: bitrate max/min/avg: 0/0/5000000 buffer size: 10000000 vbv_delay: N/A + Stream #0:1(und): Audio: aac (LC) (mp4a / 0x6134706D), 48000 Hz, mono, fltp, 69 kb/s (default) + Metadata: + handler_name : SoundHandler + vendor_id : [0][0][0][0] +frame= 1 fps=0.0 q=0.0 size= 0kB time=00:00:00.12 bitrate= 3.0kbits/s speed=1.44x +frame= 41 fps=0.0 q=22.0 size= 0kB time=00:00:01.47 bitrate= 0.3kbits/s speed=2.49x +frame= 81 fps= 74 q=12.0 size= 256kB time=00:00:02.81 bitrate= 744.9kbits/s speed=2.58x +frame= 121 fps= 76 q=12.0 size= 768kB time=00:00:04.13 bitrate=1520.3kbits/s speed=2.59x +frame= 161 fps= 77 q=12.0 size= 1024kB time=00:00:05.48 bitrate=1530.1kbits/s speed=2.61x +frame= 201 fps= 77 q=13.0 size= 1536kB time=00:00:06.80 bitrate=1849.0kbits/s speed=2.62x +[mp4 @ 0x601c5da3d280] Starting second pass: moving the moov atom to the beginning of the file +frame= 240 fps= 73 q=13.0 Lsize= 2219kB time=00:00:07.97 bitrate=2278.7kbits/s speed=2.44x +video:2142kB audio:68kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.409259%",/usr/bin/ffmpeg -hide_banner -y -vsync 0 -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 16 -init_hw_device cuda=cuda -filter_hw_device cuda -i ./sample.mp4 -t 8.0 -c:v h264_nvenc -b:v 5M -preset p5 -rc vbr -movflags +faststart -c:a copy /tmp/tmpy5hxojjv.mp4 +1,h264_nvenc,12M,p1,scale_cuda,191.0,6.34,240,3748632.0,3041922," handler_name : VideoHandler + vendor_id : [0][0][0][0] + encoder : Lavc58.134.100 h264_nvenc + Side data: + cpb: bitrate max/min/avg: 0/0/12000000 buffer size: 24000000 vbv_delay: N/A + Stream #0:1(und): Audio: aac (LC) (mp4a / 0x6134706D), 48000 Hz, mono, fltp, 69 kb/s (default) + Metadata: + handler_name : SoundHandler + vendor_id : [0][0][0][0] +frame= 1 fps=0.0 q=0.0 size= 0kB time=00:00:00.12 bitrate= 3.0kbits/s speed=1.51x +frame= 102 fps=0.0 q=7.0 size= 768kB time=00:00:03.52 bitrate=1787.5kbits/s speed=5.93x +frame= 209 fps=191 q=7.0 size= 2304kB time=00:00:07.08 bitrate=2664.9kbits/s speed=6.46x +[mp4 @ 0x5c6c573cf740] Starting second pass: moving the moov atom to the beginning of the file +frame= 240 fps=191 q=7.0 Lsize= 2971kB time=00:00:07.97 bitrate=3050.1kbits/s speed=6.34x +video:2895kB audio:68kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.274427%",/usr/bin/ffmpeg -hide_banner -y -vsync 0 -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 16 -init_hw_device cuda=cuda -filter_hw_device cuda -i ./sample.mp4 -t 8.0 -vf scale_cuda=1920:1080 -c:v h264_nvenc -b:v 12M -preset p1 -rc vbr -movflags +faststart -c:a copy /tmp/tmp68ay0l6q.mp4 +2,hevc_nvenc,6M,p4,,101.0,3.37,240,4821593.0,2393406," encoder : Lavc58.134.100 hevc_nvenc + Side data: + cpb: bitrate max/min/avg: 0/0/6000000 buffer size: 12000000 vbv_delay: N/A + Stream #0:1(und): Audio: aac (LC) (mp4a / 0x6134706D), 48000 Hz, mono, fltp, 69 kb/s (default) + Metadata: + handler_name : SoundHandler + vendor_id : [0][0][0][0] +frame= 1 fps=0.0 q=0.0 size= 0kB time=00:00:00.12 bitrate= 2.8kbits/s speed=1.18x +frame= 52 fps=0.0 q=17.0 size= 0kB time=00:00:01.83 bitrate= 0.2kbits/s speed=2.98x +frame= 110 fps= 98 q=12.0 size= 512kB time=00:00:03.77 bitrate=1110.9kbits/s speed=3.36x +frame= 168 fps=103 q=9.0 size= 1280kB time=00:00:05.71 bitrate=1834.1kbits/s speed=3.52x +frame= 226 fps=106 q=12.0 size= 1792kB time=00:00:07.63 bitrate=1922.2kbits/s speed=3.59x +[mp4 @ 0x62016db565c0] Starting second pass: moving the moov atom to the beginning of the file +frame= 240 fps=101 q=12.0 Lsize= 2337kB time=00:00:07.97 bitrate=2399.8kbits/s speed=3.37x +video:2260kB audio:68kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.392147%",/usr/bin/ffmpeg -hide_banner -y -vsync 0 -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 16 -init_hw_device cuda=cuda -filter_hw_device cuda -i ./sample.mp4 -t 8.0 -c:v hevc_nvenc -b:v 6M -preset p4 -rc vbr -movflags +faststart -c:a copy /tmp/tmpkjy5g24f.mp4 diff --git a/watermarking_bench/results_long.csv b/watermarking_bench/results_long.csv new file mode 100644 index 00000000..e0dc0560 --- /dev/null +++ b/watermarking_bench/results_long.csv @@ -0,0 +1,136 @@ +trial_index,codec,bitrate,preset,scale_filter,fps,speed_x,frames,compute_time_us,output_size_bytes,stdout_tail,argv +0,h264_nvenc,5M,p5,,77.0,2.57,600,10411441.0,6064925,"frame= 121 fps= 76 q=12.0 size= 768kB time=00:00:04.13 bitrate=1520.3kbits/s speed= 2.6x +frame= 161 fps= 77 q=12.0 size= 1024kB time=00:00:05.48 bitrate=1530.1kbits/s speed=2.62x +frame= 201 fps= 77 q=13.0 size= 1536kB time=00:00:06.80 bitrate=1849.0kbits/s speed=2.62x +frame= 241 fps= 78 q=12.0 size= 2048kB time=00:00:08.14 bitrate=2058.8kbits/s speed=2.63x +frame= 281 fps= 78 q=13.0 size= 2304kB time=00:00:09.47 bitrate=1992.7kbits/s speed=2.63x +frame= 321 fps= 78 q=12.0 size= 2816kB time=00:00:10.81 bitrate=2132.9kbits/s speed=2.64x +frame= 361 fps= 78 q=12.0 size= 3072kB time=00:00:12.13 bitrate=2073.2kbits/s speed=2.64x +frame= 401 fps= 79 q=12.0 size= 3584kB time=00:00:13.48 bitrate=2177.6kbits/s speed=2.64x +frame= 441 fps= 79 q=13.0 size= 4096kB time=00:00:14.80 bitrate=2266.4kbits/s speed=2.64x +frame= 481 fps= 79 q=13.0 size= 4352kB time=00:00:16.14 bitrate=2207.6kbits/s speed=2.64x +frame= 521 fps= 79 q=14.0 size= 4864kB time=00:00:17.47 bitrate=2280.6kbits/s speed=2.64x +frame= 561 fps= 79 q=12.0 size= 5120kB time=00:00:18.81 bitrate=2229.1kbits/s speed=2.65x +[mp4 @ 0x62a7ce754b00] Starting second pass: moving the moov atom to the beginning of the file +frame= 600 fps= 77 q=12.0 Lsize= 5923kB time=00:00:19.98 bitrate=2427.3kbits/s speed=2.57x +video:5733kB audio:170kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.348635%",/usr/bin/ffmpeg -hide_banner -y -vsync 0 -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 16 -init_hw_device cuda=cuda -filter_hw_device cuda -i ./sample.mp4 -t 30.0 -c:v h264_nvenc -b:v 5M -preset p5 -rc vbr -movflags +faststart -c:a copy /tmp/tmpzb_ed9aq.mp4 +1,h264_nvenc,5M,p5,,76.0,2.52,600,10294387.0,6064925,"frame= 150 fps= 71 q=12.0 size= 1024kB time=00:00:05.12 bitrate=1638.5kbits/s speed=2.44x +frame= 190 fps= 73 q=12.0 size= 1280kB time=00:00:06.44 bitrate=1627.6kbits/s speed=2.48x +frame= 230 fps= 74 q=13.0 size= 1792kB time=00:00:07.78 bitrate=1885.3kbits/s speed=2.51x +frame= 270 fps= 75 q=9.0 size= 2304kB time=00:00:09.10 bitrate=2072.0kbits/s speed=2.53x +frame= 310 fps= 75 q=9.0 size= 2560kB time=00:00:10.45 bitrate=2006.2kbits/s speed=2.54x +frame= 350 fps= 76 q=9.0 size= 3072kB time=00:00:11.77 bitrate=2137.1kbits/s speed=2.55x +frame= 390 fps= 76 q=9.0 size= 3584kB time=00:00:13.12 bitrate=2237.8kbits/s speed=2.57x +frame= 430 fps= 77 q=9.0 size= 3840kB time=00:00:14.44 bitrate=2178.1kbits/s speed=2.57x +frame= 470 fps= 77 q=9.0 size= 4352kB time=00:00:15.78 bitrate=2258.4kbits/s speed=2.58x +frame= 510 fps= 77 q=9.0 size= 4608kB time=00:00:17.10 bitrate=2206.3kbits/s speed=2.59x +frame= 550 fps= 77 q=12.0 size= 5120kB time=00:00:18.45 bitrate=2272.9kbits/s speed=2.59x +frame= 590 fps= 77 q=12.0 size= 5632kB time=00:00:19.77 bitrate=2333.0kbits/s speed=2.59x +[mp4 @ 0x60d928a16bc0] Starting second pass: moving the moov atom to the beginning of the file +frame= 600 fps= 76 q=12.0 Lsize= 5923kB time=00:00:19.98 bitrate=2427.3kbits/s speed=2.52x +video:5733kB audio:170kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.348635%",/usr/bin/ffmpeg -hide_banner -y -vsync 0 -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 16 -init_hw_device cuda=cuda -filter_hw_device cuda -i ./sample.mp4 -t 30.0 -c:v h264_nvenc -b:v 5M -preset p5 -rc vbr -movflags +faststart -c:a copy /tmp/tmpy9va1dr6.mp4 +2,h264_nvenc,5M,p5,,77.0,2.57,600,10306400.0,6064925,"frame= 121 fps= 76 q=12.0 size= 768kB time=00:00:04.13 bitrate=1520.3kbits/s speed=2.59x +frame= 161 fps= 77 q=12.0 size= 1024kB time=00:00:05.48 bitrate=1530.1kbits/s speed=2.61x +frame= 201 fps= 77 q=13.0 size= 1536kB time=00:00:06.80 bitrate=1849.0kbits/s speed=2.62x +frame= 241 fps= 78 q=12.0 size= 2048kB time=00:00:08.14 bitrate=2058.8kbits/s speed=2.63x +frame= 281 fps= 78 q=13.0 size= 2304kB time=00:00:09.47 bitrate=1992.7kbits/s speed=2.63x +frame= 321 fps= 78 q=12.0 size= 2816kB time=00:00:10.81 bitrate=2132.9kbits/s speed=2.63x +frame= 361 fps= 78 q=12.0 size= 3072kB time=00:00:12.13 bitrate=2073.2kbits/s speed=2.63x +frame= 401 fps= 78 q=12.0 size= 3584kB time=00:00:13.48 bitrate=2177.6kbits/s speed=2.64x +frame= 441 fps= 79 q=13.0 size= 4096kB time=00:00:14.80 bitrate=2266.4kbits/s speed=2.64x +frame= 481 fps= 79 q=13.0 size= 4352kB time=00:00:16.14 bitrate=2207.6kbits/s speed=2.64x +frame= 521 fps= 79 q=14.0 size= 4864kB time=00:00:17.47 bitrate=2280.6kbits/s speed=2.64x +frame= 561 fps= 79 q=12.0 size= 5120kB time=00:00:18.81 bitrate=2229.1kbits/s speed=2.64x +[mp4 @ 0x5c30462b0640] Starting second pass: moving the moov atom to the beginning of the file +frame= 600 fps= 77 q=12.0 Lsize= 5923kB time=00:00:19.98 bitrate=2427.3kbits/s speed=2.57x +video:5733kB audio:170kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.348635%",/usr/bin/ffmpeg -hide_banner -y -vsync 0 -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 16 -init_hw_device cuda=cuda -filter_hw_device cuda -i ./sample.mp4 -t 30.0 -c:v h264_nvenc -b:v 5M -preset p5 -rc vbr -movflags +faststart -c:a copy /tmp/tmp8540g4_n.mp4 +3,h264_nvenc,12M,p1,scale_cuda,195.0,6.48,600,5529076.0,8430659," Side data: + cpb: bitrate max/min/avg: 0/0/12000000 buffer size: 24000000 vbv_delay: N/A + Stream #0:1(und): Audio: aac (LC) (mp4a / 0x6134706D), 48000 Hz, mono, fltp, 69 kb/s (default) + Metadata: + handler_name : SoundHandler + vendor_id : [0][0][0][0] +frame= 1 fps=0.0 q=0.0 size= 0kB time=00:00:00.12 bitrate= 3.0kbits/s speed=1.48x +frame= 102 fps=0.0 q=7.0 size= 768kB time=00:00:03.52 bitrate=1787.5kbits/s speed=5.92x +frame= 209 fps=191 q=7.0 size= 2304kB time=00:00:07.08 bitrate=2664.9kbits/s speed=6.46x +frame= 290 fps=181 q=7.0 size= 3584kB time=00:00:09.77 bitrate=3005.0kbits/s speed=6.11x +frame= 396 fps=188 q=7.0 size= 5120kB time=00:00:13.31 bitrate=3150.8kbits/s speed=6.32x +frame= 502 fps=192 q=7.0 size= 6656kB time=00:00:16.85 bitrate=3235.3kbits/s speed=6.46x +[mp4 @ 0x587cfb879b80] Starting second pass: moving the moov atom to the beginning of the file +frame= 600 fps=195 q=7.0 Lsize= 8233kB time=00:00:19.98 bitrate=3374.1kbits/s speed=6.48x +video:8045kB audio:170kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.222219%",/usr/bin/ffmpeg -hide_banner -y -vsync 0 -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 16 -init_hw_device cuda=cuda -filter_hw_device cuda -i ./sample.mp4 -t 30.0 -vf scale_cuda=1920:1080 -c:v h264_nvenc -b:v 12M -preset p1 -rc vbr -movflags +faststart -c:a copy /tmp/tmp5_mf5dkd.mp4 +4,h264_nvenc,12M,p1,scale_cuda,203.0,6.75,600,5264378.0,8430659," Side data: + cpb: bitrate max/min/avg: 0/0/12000000 buffer size: 24000000 vbv_delay: N/A + Stream #0:1(und): Audio: aac (LC) (mp4a / 0x6134706D), 48000 Hz, mono, fltp, 69 kb/s (default) + Metadata: + handler_name : SoundHandler + vendor_id : [0][0][0][0] +frame= 1 fps=0.0 q=0.0 size= 0kB time=00:00:00.12 bitrate= 3.0kbits/s speed= 1.5x +frame= 102 fps=0.0 q=7.0 size= 768kB time=00:00:03.52 bitrate=1787.5kbits/s speed=5.95x +frame= 209 fps=191 q=7.0 size= 2304kB time=00:00:07.08 bitrate=2664.9kbits/s speed=6.48x +frame= 315 fps=197 q=7.0 size= 3840kB time=00:00:10.60 bitrate=2967.0kbits/s speed=6.65x +frame= 420 fps=200 q=7.0 size= 5376kB time=00:00:14.10 bitrate=3123.1kbits/s speed=6.72x +frame= 526 fps=202 q=7.0 size= 6912kB time=00:00:17.64 bitrate=3209.5kbits/s speed=6.79x +[mp4 @ 0x64e038e965c0] Starting second pass: moving the moov atom to the beginning of the file +frame= 600 fps=203 q=7.0 Lsize= 8233kB time=00:00:19.98 bitrate=3374.1kbits/s speed=6.75x +video:8045kB audio:170kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.222219%",/usr/bin/ffmpeg -hide_banner -y -vsync 0 -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 16 -init_hw_device cuda=cuda -filter_hw_device cuda -i ./sample.mp4 -t 30.0 -vf scale_cuda=1920:1080 -c:v h264_nvenc -b:v 12M -preset p1 -rc vbr -movflags +faststart -c:a copy /tmp/tmp7t8tpliz.mp4 +5,h264_nvenc,12M,p1,scale_cuda,203.0,6.75,600,5273983.0,8430659," Side data: + cpb: bitrate max/min/avg: 0/0/12000000 buffer size: 24000000 vbv_delay: N/A + Stream #0:1(und): Audio: aac (LC) (mp4a / 0x6134706D), 48000 Hz, mono, fltp, 69 kb/s (default) + Metadata: + handler_name : SoundHandler + vendor_id : [0][0][0][0] +frame= 1 fps=0.0 q=0.0 size= 0kB time=00:00:00.12 bitrate= 3.0kbits/s speed=1.51x +frame= 102 fps=0.0 q=7.0 size= 768kB time=00:00:03.52 bitrate=1787.5kbits/s speed=5.95x +frame= 209 fps=191 q=7.0 size= 2304kB time=00:00:07.08 bitrate=2664.9kbits/s speed=6.47x +frame= 315 fps=197 q=7.0 size= 3840kB time=00:00:10.60 bitrate=2967.0kbits/s speed=6.64x +frame= 420 fps=200 q=7.0 size= 5376kB time=00:00:14.10 bitrate=3123.1kbits/s speed=6.72x +frame= 526 fps=202 q=7.0 size= 6912kB time=00:00:17.64 bitrate=3209.5kbits/s speed=6.79x +[mp4 @ 0x5d9076a76740] Starting second pass: moving the moov atom to the beginning of the file +frame= 600 fps=203 q=7.0 Lsize= 8233kB time=00:00:19.98 bitrate=3374.1kbits/s speed=6.75x +video:8045kB audio:170kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.222219%",/usr/bin/ffmpeg -hide_banner -y -vsync 0 -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 16 -init_hw_device cuda=cuda -filter_hw_device cuda -i ./sample.mp4 -t 30.0 -vf scale_cuda=1920:1080 -c:v h264_nvenc -b:v 12M -preset p1 -rc vbr -movflags +faststart -c:a copy /tmp/tmpe5tfql7r.mp4 +6,hevc_nvenc,6M,p4,,110.0,3.67,600,7783224.0,6248386," vendor_id : [0][0][0][0] +frame= 1 fps=0.0 q=0.0 size= 0kB time=00:00:00.12 bitrate= 2.8kbits/s speed=1.52x +frame= 55 fps=0.0 q=18.0 size= 0kB time=00:00:01.94 bitrate= 0.2kbits/s speed=3.29x +frame= 113 fps=103 q=12.0 size= 768kB time=00:00:03.88 bitrate=1620.5kbits/s speed=3.54x +frame= 171 fps=107 q=12.0 size= 1280kB time=00:00:05.80 bitrate=1807.1kbits/s speed=3.63x +frame= 229 fps=109 q=12.0 size= 1792kB time=00:00:07.74 bitrate=1895.7kbits/s speed=3.69x +frame= 287 fps=110 q=12.0 size= 2560kB time=00:00:09.68 bitrate=2165.3kbits/s speed=3.72x +frame= 345 fps=111 q=12.0 size= 3072kB time=00:00:11.60 bitrate=2168.5kbits/s speed=3.74x +frame= 403 fps=112 q=12.0 size= 3840kB time=00:00:13.54 bitrate=2322.2kbits/s speed=3.76x +frame= 461 fps=112 q=12.0 size= 4352kB time=00:00:15.46 bitrate=2305.1kbits/s speed=3.77x +frame= 519 fps=113 q=12.0 size= 5120kB time=00:00:17.40 bitrate=2409.4kbits/s speed=3.78x +frame= 577 fps=113 q=12.0 size= 5632kB time=00:00:19.34 bitrate=2384.5kbits/s speed=3.79x +[mp4 @ 0x5dbdbab92440] Starting second pass: moving the moov atom to the beginning of the file +frame= 600 fps=110 q=12.0 Lsize= 6102kB time=00:00:19.98 bitrate=2500.7kbits/s speed=3.67x +video:5912kB audio:170kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.339750%",/usr/bin/ffmpeg -hide_banner -y -vsync 0 -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 16 -init_hw_device cuda=cuda -filter_hw_device cuda -i ./sample.mp4 -t 30.0 -c:v hevc_nvenc -b:v 6M -preset p4 -rc vbr -movflags +faststart -c:a copy /tmp/tmph3v5xk4u.mp4 +7,hevc_nvenc,6M,p4,,110.0,3.67,600,7939115.0,6248386," vendor_id : [0][0][0][0] +frame= 1 fps=0.0 q=0.0 size= 0kB time=00:00:00.12 bitrate= 2.8kbits/s speed=1.49x +frame= 55 fps=0.0 q=18.0 size= 0kB time=00:00:01.94 bitrate= 0.2kbits/s speed=3.28x +frame= 113 fps=103 q=12.0 size= 768kB time=00:00:03.88 bitrate=1620.5kbits/s speed=3.54x +frame= 171 fps=107 q=12.0 size= 1280kB time=00:00:05.80 bitrate=1807.1kbits/s speed=3.63x +frame= 229 fps=109 q=12.0 size= 1792kB time=00:00:07.74 bitrate=1895.7kbits/s speed=3.68x +frame= 287 fps=110 q=12.0 size= 2560kB time=00:00:09.68 bitrate=2165.3kbits/s speed=3.72x +frame= 345 fps=111 q=12.0 size= 3072kB time=00:00:11.60 bitrate=2168.5kbits/s speed=3.74x +frame= 403 fps=112 q=12.0 size= 3840kB time=00:00:13.54 bitrate=2322.2kbits/s speed=3.76x +frame= 461 fps=112 q=12.0 size= 4352kB time=00:00:15.46 bitrate=2305.1kbits/s speed=3.76x +frame= 519 fps=113 q=12.0 size= 5120kB time=00:00:17.40 bitrate=2409.4kbits/s speed=3.77x +frame= 577 fps=113 q=12.0 size= 5632kB time=00:00:19.34 bitrate=2384.5kbits/s speed=3.78x +[mp4 @ 0x56aa54a3cb00] Starting second pass: moving the moov atom to the beginning of the file +frame= 600 fps=110 q=12.0 Lsize= 6102kB time=00:00:19.98 bitrate=2500.7kbits/s speed=3.67x +video:5912kB audio:170kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.339750%",/usr/bin/ffmpeg -hide_banner -y -vsync 0 -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 16 -init_hw_device cuda=cuda -filter_hw_device cuda -i ./sample.mp4 -t 30.0 -c:v hevc_nvenc -b:v 6M -preset p4 -rc vbr -movflags +faststart -c:a copy /tmp/tmpbo93lv7k.mp4 +8,hevc_nvenc,6M,p4,,110.0,3.67,600,8011795.0,6248386," vendor_id : [0][0][0][0] +frame= 1 fps=0.0 q=0.0 size= 0kB time=00:00:00.12 bitrate= 2.8kbits/s speed=1.51x +frame= 55 fps=0.0 q=18.0 size= 0kB time=00:00:01.94 bitrate= 0.2kbits/s speed=3.28x +frame= 113 fps=103 q=12.0 size= 768kB time=00:00:03.88 bitrate=1620.5kbits/s speed=3.54x +frame= 171 fps=107 q=12.0 size= 1280kB time=00:00:05.80 bitrate=1807.1kbits/s speed=3.63x +frame= 229 fps=109 q=12.0 size= 1792kB time=00:00:07.74 bitrate=1895.7kbits/s speed=3.69x +frame= 287 fps=110 q=12.0 size= 2560kB time=00:00:09.68 bitrate=2165.3kbits/s speed=3.72x +frame= 345 fps=111 q=12.0 size= 3072kB time=00:00:11.60 bitrate=2168.5kbits/s speed=3.74x +frame= 403 fps=112 q=12.0 size= 3840kB time=00:00:13.54 bitrate=2322.2kbits/s speed=3.76x +frame= 461 fps=112 q=12.0 size= 4352kB time=00:00:15.46 bitrate=2305.1kbits/s speed=3.76x +frame= 519 fps=113 q=12.0 size= 5120kB time=00:00:17.40 bitrate=2409.4kbits/s speed=3.78x +frame= 577 fps=113 q=12.0 size= 5632kB time=00:00:19.34 bitrate=2384.5kbits/s speed=3.78x +[mp4 @ 0x55882f1e1400] Starting second pass: moving the moov atom to the beginning of the file +frame= 600 fps=110 q=12.0 Lsize= 6102kB time=00:00:19.98 bitrate=2500.7kbits/s speed=3.67x +video:5912kB audio:170kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.339750%",/usr/bin/ffmpeg -hide_banner -y -vsync 0 -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 16 -init_hw_device cuda=cuda -filter_hw_device cuda -i ./sample.mp4 -t 30.0 -c:v hevc_nvenc -b:v 6M -preset p4 -rc vbr -movflags +faststart -c:a copy /tmp/tmpehv_nft4.mp4 diff --git a/watermarking_bench/run_nvenc_bench.sh b/watermarking_bench/run_nvenc_bench.sh new file mode 100644 index 00000000..c6ed88d3 --- /dev/null +++ b/watermarking_bench/run_nvenc_bench.sh @@ -0,0 +1,300 @@ +#!/usr/bin/env bash +set -euo pipefail + +# --- Config --- +BENCH_DIR="${HOME}/bench" +INPUT="${1:-${BENCH_DIR}/sample.mp4}" +DURATION="${DURATION:-8}" # seconds per trial +REPEAT="${REPEAT:-1}" # repeats per trial + +mkdir -p "$BENCH_DIR" +cd "$BENCH_DIR" + +echo "==[ NVENC Bench Repro ]==" +command -v nvidia-smi >/dev/null || { echo "nvidia-smi missing"; exit 1; } +command -v ffmpeg >/dev/null || { echo "ffmpeg missing"; exit 1; } +command -v python3 >/dev/null || { echo "python3 missing"; exit 1; } + +echo "GPU:" +nvidia-smi --query-gpu=name,memory.total,driver_version --format=csv,noheader +echo +echo "Encoders (ffmpeg):" +ffmpeg -hide_banner -encoders | grep -E 'nvenc|av1' || true +echo +echo "Filters (ffmpeg):" +ffmpeg -hide_banner -filters | grep -E 'scale_(npp|cuda)|overlay_cuda' || true +echo + +# --- Make or update a working gpu_bench.py (GPU-first, safe bridges, skip missing encoders) --- +cat > gpu_bench.py <<'PY' +#!/usr/bin/env python3 +import argparse, datetime, json, os, re, shutil, subprocess, sys, tempfile, csv +from typing import List, Dict, Any, Optional, Tuple + +def which_ffmpeg() -> str: + p = shutil.which("ffmpeg") + if not p: + sys.exit("ffmpeg not found on PATH. Use Docker image with NVENC or install FFmpeg with NVENC.") + return p + +def run(cmd: List[str]) -> subprocess.CompletedProcess: + return subprocess.run(cmd, stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) + +def has_encoder(ffmpeg: str, enc: str) -> bool: + out = run([ffmpeg, "-hide_banner", "-encoders"]).stdout + return re.search(rf"\b{re.escape(enc)}\b", out) is not None + +def has_filter(ffmpeg: str, name: str) -> bool: + out = run([ffmpeg, "-hide_banner", "-filters"]).stdout + return (f" {name} " in out) + +def gpu_info() -> Dict[str, Any]: + try: + out = run(["nvidia-smi", "--query-gpu=name,memory.total,driver_version", "--format=csv,noheader,nounits"]).stdout.strip() + name, mem, drv = [x.strip() for x in out.splitlines()[0].split(",")] + return {"name": name, "memory_total_mb": int(mem), "driver_version": drv} + except Exception: + return {"name": None, "memory_total_mb": None, "driver_version": None} + +def parse_progress(log: str) -> Dict[str, Any]: + lines = [ln for ln in log.splitlines() if ("fps=" in ln or "speed=" in ln or "frame=" in ln)] + fps = speed = frames = None + if lines: + last = lines[-1] + m = re.search(r"fps=\s*([0-9]+(?:\.[0-9]+)?)", last); fps = float(m.group(1)) if m else None + m = re.search(r"speed=\s*([0-9]+(?:\.[0-9]+)?)x", last); speed = float(m.group(1)) if m else None + m = re.search(r"frame=\s*([0-9]+)", last); frames = int(m.group(1)) if m else None + return {"fps": fps, "speed_x": speed, "frames": frames} + +def build_vf_or_complex(ffmpeg: str, scale: Optional[str], wm_path: Optional[str], overlay: str, want_gpu_decode: bool) -> Tuple[List[str], str]: + used = [] + vf_args: List[str] = [] + complex_graph = "" + + have_scale_npp = has_filter(ffmpeg, "scale_npp") + have_scale_cuda = has_filter(ffmpeg, "scale_cuda") + have_overlay_cuda = has_filter(ffmpeg, "overlay_cuda") + + if not wm_path: + if scale: + if want_gpu_decode and have_scale_npp: + vf_args = ["-vf", f"scale_npp={scale}"]; used.append("scale_npp") + elif want_gpu_decode and have_scale_cuda: + vf_args = ["-vf", f"scale_cuda={scale}"]; used.append("scale_cuda") + else: + vf_args = ["-vf", f"hwdownload,format=nv12,scale={scale},hwupload_cuda"] + used.append("scale(cpu)+hwdownload+hwupload_cuda") + return (vf_args, "+".join(used)) + + # watermark path + if want_gpu_decode and have_overlay_cuda: + if scale and have_scale_npp: + complex_graph = f"[0:v]scale_npp={scale}[v0];[v0][1:v]overlay_cuda={overlay}[vout]" + used += ["scale_npp","overlay_cuda"] + elif scale and have_scale_cuda: + complex_graph = f"[0:v]scale_cuda={scale}[v0];[v0][1:v]overlay_cuda={overlay}[vout]" + used += ["scale_cuda","overlay_cuda"] + elif scale: + complex_graph = f"[0:v]hwdownload,format=nv12,scale={scale},hwupload_cuda[v0];[v0][1:v]overlay_cuda={overlay}[vout]" + used += ["scale(cpu)+hwdownload+hwupload_cuda","overlay_cuda"] + else: + complex_graph = f"[0:v][1:v]overlay_cuda={overlay}[vout]" + used += ["overlay_cuda"] + return (["-filter_complex", complex_graph, "-map", "[vout]"], "+".join(used)) + + # CPU overlay fallback (explicit bridges) + if scale and want_gpu_decode and (have_scale_npp or have_scale_cuda): + scaler = "scale_npp" if have_scale_npp else "scale_cuda" + complex_graph = ( + f"[0:v]{scaler}={scale}[v0gpu];" + f"[v0gpu]hwdownload,format=nv12[v0cpu];" + f"[v0cpu][1:v]overlay={overlay}[mix];" + f"[mix]hwupload_cuda[vout]" + ) + used += [scaler, "hwdownload+overlay(cpu)+hwupload_cuda"] + elif scale: + complex_graph = ( + f"[0:v]hwdownload,format=nv12,scale={scale}[v0cpu];" + f"[v0cpu][1:v]overlay={overlay}[mix];" + f"[mix]hwupload_cuda[vout]" + ) + used += ["scale(cpu)+overlay(cpu)+hwupload_cuda"] + else: + complex_graph = ( + f"[0:v]hwdownload,format=nv12[v0cpu];" + f"[v0cpu][1:v]overlay={overlay}[mix];" + f"[mix]hwupload_cuda[vout]" + ) + used += ["overlay(cpu)+hwupload_cuda"] + + return (["-filter_complex", complex_graph, "-map", "[vout]"], "+".join(used)) + +def transcode_once(ffmpeg: str, inp: str, outp: str, codec: str, bitrate: str, preset: str, + duration: Optional[float], scale: Optional[str], wm_path: Optional[str], + overlay_pos: str, decode_mode: str = "gpu") -> Dict[str, Any]: + + if not has_encoder(ffmpeg, codec): + raise RuntimeError(f"encoder '{codec}' not available; check your ffmpeg build (NVENC/AV1).") + + want_gpu_decode = (decode_mode == "gpu") + args = [ffmpeg, "-hide_banner", "-y", "-vsync", "0"] + + if want_gpu_decode: + args += ["-hwaccel", "cuda", "-hwaccel_output_format", "cuda", "-extra_hw_frames", "16", + "-init_hw_device", "cuda=cuda", "-filter_hw_device", "cuda"] + + args += ["-i", inp] + if wm_path: + args += ["-loop", "1", "-i", wm_path] + if duration: + args += ["-t", str(duration)] + + filt_args, filter_used = build_vf_or_complex(ffmpeg, scale, wm_path, overlay_pos, want_gpu_decode) + args += filt_args + + args += ["-c:v", codec, "-b:v", bitrate, "-preset", preset, "-rc", "vbr", "-movflags", "+faststart"] + args += ["-c:a", "copy"] + args += [outp] + + t0 = datetime.datetime.now() + proc = run(args) + t1 = datetime.datetime.now() + if proc.returncode != 0: + raise RuntimeError("ffmpeg failed:\n" + proc.stdout + f"\n\nARGS:\n{' '.join(args)}") + + parsed = parse_progress(proc.stdout) + size = os.path.getsize(outp) if os.path.exists(outp) else 0 + return { + "args": args, + "filter_used": filter_used, + "stdout_tail": "\n".join(proc.stdout.splitlines()[-15:]), + "compute_time_us": (t1 - t0) / datetime.timedelta(microseconds=1), + "fps": parsed["fps"], + "speed_x": parsed["speed_x"], + "frames": parsed["frames"], + "output_size_bytes": size + } + +def main(): + ap = argparse.ArgumentParser(description="GPU NVENC benchmark.") + ap.add_argument("--input", required=True) + ap.add_argument("--duration", type=float, default=None) + ap.add_argument("--repeat", type=int, default=1) + ap.add_argument("--warmup", action="store_true") + ap.add_argument("--csv", default=None) + ap.add_argument("--watermark", default=None) + ap.add_argument("--overlay", default="main_w/2-overlay_w/2:main_h/2-overlay_h/2") + ap.add_argument("--decode", choices=["gpu","cpu"], default="gpu") + ap.add_argument("--trials", nargs="+", default=[ + "codec=h264_nvenc,bitrate=5M,preset=p5", + "codec=h264_nvenc,bitrate=12M,preset=p1,scale=1920:1080", + "codec=hevc_nvenc,bitrate=6M,preset=p4", + # "codec=av1_nvenc,bitrate=3M,preset=p5", # only if available + ]) + args = ap.parse_args() + + ffmpeg = which_ffmpeg() + gi = gpu_info() + + def parse_trial(s: str) -> Dict[str, str]: + d: Dict[str, str] = {} + for kv in s.split(","): + k, v = kv.split("=", 1); d[k.strip()] = v.strip() + return d + + trial_specs = [parse_trial(s) for s in args.trials] + + # Warmup with first available encoder + if args.warmup: + warm = next((t for t in trial_specs if has_encoder(ffmpeg, t.get("codec","h264_nvenc"))), None) + if warm: + with tempfile.NamedTemporaryFile(suffix=".mp4", delete=True) as tmp: + _ = transcode_once(ffmpeg, args.input, tmp.name, + warm.get("codec","h264_nvenc"), + warm.get("bitrate","5M"), + warm.get("preset","p5"), + args.duration, + warm.get("scale"), + args.watermark, + args.overlay, + args.decode) + + results = []; idx = 0 + for spec in trial_specs: + for _ in range(args.repeat): + if not has_encoder(ffmpeg, spec.get("codec","h264_nvenc")): + results.append({ + "trial_index": idx, "codec": spec.get("codec"), "bitrate": spec.get("bitrate"), + "preset": spec.get("preset"), "scale_filter": "", "fps": None, "speed_x": None, + "frames": None, "compute_time_us": 0, "output_size_bytes": 0, + "stdout_tail": "SKIPPED: encoder not available", "argv": "", "status": "skipped" + }); idx += 1; continue + with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp: + outp = tmp.name + res = transcode_once(ffmpeg, args.input, outp, + spec.get("codec","h264_nvenc"), + spec.get("bitrate","5M"), + spec.get("preset","p5"), + args.duration, + spec.get("scale"), + args.watermark, + args.overlay, + args.decode) + results.append({ + "trial_index": idx, "codec": spec.get("codec"), "bitrate": spec.get("bitrate"), + "preset": spec.get("preset"), "scale_filter": res["filter_used"], "fps": res["fps"], + "speed_x": res["speed_x"], "frames": res["frames"], + "compute_time_us": res["compute_time_us"], "output_size_bytes": res["output_size_bytes"], + "stdout_tail": res["stdout_tail"], "argv": " ".join(res["args"]), "status": "ok" + }) + idx += 1 + try: os.remove(outp) + except OSError: pass + + report = {"gpu": gi, "ffmpeg_path": ffmpeg, "trial_count": len(results), "results": results} + print(json.dumps(report, indent=2)) + + if args.csv and results: + with open(args.csv, "w", newline="") as f: + w = csv.DictWriter(f, fieldnames=list(results[0].keys())) + w.writeheader(); w.writerows(results) + +if __name__ == "__main__": + main() +PY + +chmod +x gpu_bench.py + +# --- Provide a sample 4K clip if missing --- +if [[ ! -f "$INPUT" ]]; then + echo "No input provided or file missing. Creating ${INPUT} (4K, 20s, tone + test pattern)..." + ffmpeg -hide_banner -y \ + -f lavfi -i testsrc2=size=3840x2160:rate=30 \ + -f lavfi -i sine=frequency=1000:sample_rate=48000 \ + -t 20 \ + -c:v libx264 -pix_fmt yuv420p -b:v 600k \ + -c:a aac -b:a 96k \ + "$INPUT" +fi + +# --- Run the benchmark --- +TS="$(date +%Y%m%d_%H%M%S)" +CSV="results_${TS}.csv" + +echo +echo "Running GPU NVENC benchmark..." +./gpu_bench.py \ + --input "$INPUT" \ + --duration "$DURATION" \ + --repeat "$REPEAT" \ + --decode gpu \ + --csv "$CSV" \ + --trials \ + "codec=h264_nvenc,bitrate=5M,preset=p5" \ + "codec=h264_nvenc,bitrate=12M,preset=p1,scale=1920:1080" \ + "codec=hevc_nvenc,bitrate=6M,preset=p4" + +echo +echo "Done. CSV saved at: $BENCH_DIR/$CSV" +echo "Preview:" +(head -n 1 "$CSV" && tail -n +2 "$CSV" | sed -n '1,3p') | sed 's/,/ | /g' diff --git a/watermarking_bench/sample.mp4 b/watermarking_bench/sample.mp4 new file mode 100644 index 00000000..909679fc Binary files /dev/null and b/watermarking_bench/sample.mp4 differ diff --git a/watermarking_bench/watermark.png b/watermarking_bench/watermark.png new file mode 100644 index 00000000..8a6d9e54 Binary files /dev/null and b/watermarking_bench/watermark.png differ diff --git a/watermarking_bench/watermarking_readme.md b/watermarking_bench/watermarking_readme.md new file mode 100644 index 00000000..f522c717 --- /dev/null +++ b/watermarking_bench/watermarking_readme.md @@ -0,0 +1,9 @@ +### Running the NVENC Benchmark + +```bash +chmod +x run_nvenc_bench.sh +# Run it with the default test video (auto-generated in ~/bench/sample.mp4): +./run_nvenc_bench.sh +# Run it on your own input video: +./run_nvenc_bench.sh /path/to/your/video.mp4 +```