-
Notifications
You must be signed in to change notification settings - Fork 91
Jessie/video watermarking #259
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
f8577e7
dfaa14a
7f4f6c9
1653b7c
f5e7ab7
aa3483f
fa7e76e
51713f4
e0cfbdc
f534a53
7057465
44c8bcb
e53cfde
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| # NVENC-enabled FFmpeg base image | ||
| FROM jrottenberg/ffmpeg:6.1-nvidia | ||
|
|
||
| # Python for gpu_bench.py | ||
| RUN apt-get update && apt-get install -y --no-install-recommends \ | ||
| python3 python3-pip python3-venv ca-certificates && \ | ||
| rm -rf /var/lib/apt/lists/* | ||
|
|
||
| WORKDIR /app | ||
| COPY gpu_bench.py /app/gpu_bench.py | ||
| COPY run.sh /app/run.sh | ||
| RUN chmod +x /app/gpu_bench.py /app/run.sh | ||
|
|
||
| # default entrypoint lets SeBS simply "docker run" it | ||
| ENTRYPOINT ["/app/run.sh"] | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| # Multi-arch FFmpeg base (CPU only) | ||
| FROM jrottenberg/ffmpeg:6.1-ubuntu | ||
|
|
||
| # Python for gpu_bench.py | ||
| RUN apt-get update && apt-get install -y --no-install-recommends \ | ||
| python3 python3-pip ca-certificates && \ | ||
| rm -rf /var/lib/apt/lists/* | ||
|
|
||
| WORKDIR /app | ||
| COPY gpu_bench.py /app/gpu_bench.py | ||
| COPY run.sh /app/run.sh | ||
| RUN chmod +x /app/gpu_bench.py /app/run.sh | ||
|
|
||
| ENTRYPOINT ["/app/run.sh"] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,307 @@ | ||
| #!/usr/bin/env python3 | ||
| import argparse, datetime, json, os, re, shutil, subprocess, sys, tempfile, csv | ||
| from typing import List, Dict, Any, Optional, Tuple | ||
|
|
||
| # --- helpers --------------------------------------------------------------- | ||
|
|
||
| def which_ffmpeg() -> str: | ||
| p = shutil.which("ffmpeg") | ||
| if not p: | ||
| sys.exit("ffmpeg not found on PATH. Use Docker image with NVENC or install FFmpeg with NVENC.") | ||
| return p | ||
|
|
||
| def run(cmd: List[str]) -> subprocess.CompletedProcess: | ||
| return subprocess.run(cmd, stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) | ||
|
|
||
| def has_encoder(ffmpeg: str, enc: str) -> bool: | ||
| out = run([ffmpeg, "-hide_banner", "-encoders"]).stdout | ||
| return re.search(rf"\b{re.escape(enc)}\b", out) is not None | ||
|
|
||
| def has_filter(ffmpeg: str, name: str) -> bool: | ||
| out = run([ffmpeg, "-hide_banner", "-filters"]).stdout | ||
| return (f" {name} " in out) | ||
|
|
||
| def gpu_info() -> Dict[str, Any]: | ||
| try: | ||
| out = run(["nvidia-smi", "--query-gpu=name,memory.total,driver_version", "--format=csv,noheader,nounits"]).stdout.strip() | ||
| name, mem, drv = [x.strip() for x in out.splitlines()[0].split(",")] | ||
| return {"name": name, "memory_total_mb": int(mem), "driver_version": drv} | ||
| except Exception: | ||
| return {"name": None, "memory_total_mb": None, "driver_version": None} | ||
|
|
||
| def parse_progress(log: str) -> Dict[str, Any]: | ||
| lines = [ln for ln in log.splitlines() if ("fps=" in ln or "speed=" in ln or "frame=" in ln)] | ||
| fps = speed = frames = None | ||
| if lines: | ||
| last = lines[-1] | ||
| m = re.search(r"fps=\s*([0-9]+(?:\.[0-9]+)?)", last); fps = float(m.group(1)) if m else None | ||
| m = re.search(r"speed=\s*([0-9]+(?:\.[0-9]+)?)x", last); speed = float(m.group(1)) if m else None | ||
| m = re.search(r"frame=\s*([0-9]+)", last); frames = int(m.group(1)) if m else None | ||
| return {"fps": fps, "speed_x": speed, "frames": frames} | ||
|
|
||
| # --- filter planning ------------------------------------------------------- | ||
|
|
||
| def build_vf_or_complex( | ||
| ffmpeg: str, | ||
| scale: Optional[str], | ||
| wm_path: Optional[str], | ||
| overlay: str, | ||
| want_gpu_decode: bool | ||
| ) -> Tuple[List[str], str]: | ||
| """ | ||
| Returns (ffmpeg_args_for_filters, filter_used_string). | ||
| CPU path: never uses hw* or *_cuda filters. | ||
| GPU path: prefer scale_npp -> scale_cuda -> CPU scale with bridges; prefer overlay_cuda. | ||
| """ | ||
| used: List[str] = [] | ||
| vf_args: List[str] = [] | ||
| complex_graph = "" | ||
|
|
||
| # ---------- CPU-ONLY SHORT-CIRCUIT ---------- | ||
| if not want_gpu_decode: | ||
| if not wm_path: | ||
| if scale: | ||
| return (["-vf", f"scale={scale}"], "scale(cpu)") | ||
| return ([], "") | ||
| # watermark present | ||
| if scale: | ||
| complex_graph = f"[0:v]scale={scale}[v0];[v0][1:v]overlay={overlay}[vout]" | ||
| used = ["scale(cpu)", "overlay(cpu)"] | ||
| else: | ||
| complex_graph = f"[0:v][1:v]overlay={overlay}[vout]" | ||
| used = ["overlay(cpu)"] | ||
| return (["-filter_complex", complex_graph, "-map", "[vout]"], "+".join(used)) | ||
| # ------------------------------------------- | ||
|
|
||
| # From here on: GPU-preferred path | ||
| have_scale_npp = has_filter(ffmpeg, "scale_npp") | ||
| have_scale_cuda = has_filter(ffmpeg, "scale_cuda") | ||
| have_overlay_cuda = has_filter(ffmpeg, "overlay_cuda") | ||
|
|
||
| # No watermark case | ||
| if not wm_path: | ||
| if scale: | ||
| if have_scale_npp: | ||
| vf_args = ["-vf", f"scale_npp={scale}"]; used.append("scale_npp") | ||
| elif have_scale_cuda: | ||
| vf_args = ["-vf", f"scale_cuda={scale}"]; used.append("scale_cuda") | ||
| else: | ||
| vf_args = ["-vf", f"hwdownload,format=nv12,scale={scale},hwupload_cuda"] | ||
| used.append("scale(cpu)+hwdownload+hwupload_cuda") | ||
| else: | ||
| vf_args = [] | ||
| return (vf_args, "+".join(used)) | ||
|
|
||
| # Watermark case with GPU overlay if available | ||
| if have_overlay_cuda: | ||
| if scale and have_scale_npp: | ||
| complex_graph = f"[0:v]scale_npp={scale}[v0];[v0][1:v]overlay_cuda={overlay}[vout]" | ||
| used += ["scale_npp","overlay_cuda"] | ||
| elif scale and have_scale_cuda: | ||
| complex_graph = f"[0:v]scale_cuda={scale}[v0];[v0][1:v]overlay_cuda={overlay}[vout]" | ||
| used += ["scale_cuda","overlay_cuda"] | ||
| elif scale: | ||
| complex_graph = ( | ||
| f"[0:v]hwdownload,format=nv12,scale={scale},hwupload_cuda[v0];" | ||
| f"[v0][1:v]overlay_cuda={overlay}[vout]" | ||
| ) | ||
| used += ["scale(cpu)+hwdownload+hwupload_cuda","overlay_cuda"] | ||
| else: | ||
| complex_graph = f"[0:v][1:v]overlay_cuda={overlay}[vout]" | ||
| used += ["overlay_cuda"] | ||
| return (["-filter_complex", complex_graph, "-map", "[vout]"], "+".join(used)) | ||
|
|
||
| # GPU decode + CPU overlay fallback (bridged) | ||
| if scale and (have_scale_npp or have_scale_cuda): | ||
| scaler = "scale_npp" if have_scale_npp else "scale_cuda" | ||
| complex_graph = ( | ||
| f"[0:v]{scaler}={scale}[v0gpu];" | ||
| f"[v0gpu]hwdownload,format=nv12[v0cpu];" | ||
| f"[v0cpu][1:v]overlay={overlay}[mix];" | ||
| f"[mix]hwupload_cuda[vout]" | ||
| ) | ||
| used += [scaler, "hwdownload+overlay(cpu)+hwupload_cuda"] | ||
| elif scale: | ||
| complex_graph = ( | ||
| f"[0:v]hwdownload,format=nv12,scale={scale}[v0cpu];" | ||
| f"[v0cpu][1:v]overlay={overlay}[mix];" | ||
| f"[mix]hwupload_cuda[vout]" | ||
| ) | ||
| used += ["scale(cpu)+overlay(cpu)+hwupload_cuda"] | ||
| else: | ||
| complex_graph = ( | ||
| f"[0:v]hwdownload,format=nv12[v0cpu];" | ||
| f"[v0cpu][1:v]overlay={overlay}[mix];" | ||
| f"[mix]hwupload_cuda[vout]" | ||
| ) | ||
| used += ["overlay(cpu)+hwupload_cuda"] | ||
|
|
||
| return (["-filter_complex", complex_graph, "-map", "[vout]"], "+".join(used)) | ||
|
|
||
| # --- core ------------------------------------------------------------------ | ||
|
|
||
| def transcode_once( | ||
| ffmpeg: str, | ||
| inp: str, | ||
| outp: str, | ||
| codec: str, | ||
| bitrate: str, | ||
| preset: str, | ||
| duration: Optional[float], | ||
| scale: Optional[str], | ||
| wm_path: Optional[str], | ||
| overlay_pos: str, | ||
| decode_mode: str = "gpu" # "gpu" or "cpu" | ||
| ) -> Dict[str, Any]: | ||
|
|
||
| if not has_encoder(ffmpeg, codec): | ||
| raise RuntimeError(f"encoder '{codec}' not available; check your ffmpeg build (NVENC/AV1).") | ||
|
|
||
| want_gpu_decode = (decode_mode == "gpu") | ||
|
|
||
| args = [ffmpeg, "-hide_banner", "-y", "-vsync", "0"] | ||
|
|
||
| if want_gpu_decode: | ||
| # Keep decode on GPU & use CUDA frames. Give NVDEC extra surfaces. | ||
| args += ["-hwaccel", "cuda", "-hwaccel_output_format", "cuda", "-extra_hw_frames", "16"] | ||
| # Helpful on some builds to make filters pick the right device | ||
| args += ["-init_hw_device", "cuda=cuda", "-filter_hw_device", "cuda"] | ||
|
|
||
| # inputs | ||
| args += ["-i", inp] | ||
| if wm_path: | ||
| args += ["-loop", "1", "-i", wm_path] | ||
|
|
||
| if duration: | ||
| args += ["-t", str(duration)] | ||
|
|
||
| # Build filters | ||
| filt_args, filter_used = build_vf_or_complex(ffmpeg, scale, wm_path, overlay_pos, want_gpu_decode) | ||
| args += filt_args | ||
|
|
||
| # encoder params | ||
| args += ["-c:v", codec, "-b:v", bitrate, "-preset", preset] | ||
| # Only NVENC supports -rc vbr | ||
| if codec.endswith("_nvenc"): | ||
| args += ["-rc", "vbr"] | ||
| args += ["-movflags", "+faststart"] | ||
|
|
||
| # audio: copy if present | ||
| args += ["-c:a", "copy"] | ||
|
|
||
|
Comment on lines
+189
to
+191
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Audio copy crashes when watermark is enabled Whenever A minimal fix is to extend each filter-complex return to include audio (and keep - return (["-filter_complex", complex_graph, "-map", "[vout]"], "+".join(used))
+ return ([
+ "-filter_complex", complex_graph,
+ "-map", "[vout]",
+ "-map", "0:a?"
+ ], "+".join(used))Make the corresponding change for the other watermark branches as well.
🤖 Prompt for AI Agents |
||
| # Output path | ||
| args += [outp] | ||
|
|
||
| t0 = datetime.datetime.now() | ||
| proc = run(args) | ||
| t1 = datetime.datetime.now() | ||
| if proc.returncode != 0: | ||
| raise RuntimeError("ffmpeg failed:\n" + proc.stdout + f"\n\nARGS:\n{' '.join(args)}") | ||
|
|
||
| parsed = parse_progress(proc.stdout) | ||
| size = os.path.getsize(outp) if os.path.exists(outp) else 0 | ||
| return { | ||
| "args": args, | ||
| "filter_used": filter_used, | ||
| "stdout_tail": "\n".join(proc.stdout.splitlines()[-15:]), | ||
| "compute_time_us": (t1 - t0) / datetime.timedelta(microseconds=1), | ||
| "fps": parsed["fps"], | ||
| "speed_x": parsed["speed_x"], | ||
| "frames": parsed["frames"], | ||
| "output_size_bytes": size | ||
| } | ||
|
|
||
| def main(): | ||
| ap = argparse.ArgumentParser(description="GPU NVENC benchmark.") | ||
| ap.add_argument("--input", required=True, help="Path to input video") | ||
| ap.add_argument("--duration", type=float, default=None, help="Trim to first N seconds") | ||
| ap.add_argument("--repeat", type=int, default=1, help="Repeat each trial") | ||
| ap.add_argument("--warmup", action="store_true", help="Run one warmup trial (not recorded)") | ||
| ap.add_argument("--csv", default=None, help="Optional path to write CSV summary") | ||
| ap.add_argument("--watermark", default=None, help="Path to watermark PNG (optional)") | ||
| ap.add_argument("--overlay", default="main_w/2-overlay_w/2:main_h/2-overlay_h/2", | ||
| help="Overlay position (ffmpeg expr), e.g. '10:10' or 'main_w-overlay_w-10:10'") | ||
| ap.add_argument("--decode", choices=["gpu","cpu"], default="gpu", | ||
| help="Decode on GPU (default) or CPU.") | ||
| ap.add_argument("--trials", nargs="+", default=[ | ||
| "codec=h264_nvenc,bitrate=5M,preset=p5", | ||
| "codec=h264_nvenc,bitrate=12M,preset=p1,scale=1920:1080", | ||
| "codec=hevc_nvenc,bitrate=6M,preset=p4", | ||
| # "codec=av1_nvenc,bitrate=3M,preset=p5", # include only if available | ||
| ], help="List like codec=h264_nvenc,bitrate=5M,preset=p5[,scale=WxH]") | ||
| args = ap.parse_args() | ||
|
|
||
| ffmpeg = which_ffmpeg() | ||
| gi = gpu_info() | ||
|
|
||
| def parse_trial(s: str) -> Dict[str, str]: | ||
| d: Dict[str, str] = {} | ||
| for kv in s.split(","): | ||
| k, v = kv.split("=", 1) | ||
| d[k.strip()] = v.strip() | ||
| return d | ||
|
|
||
| trial_specs = [parse_trial(s) for s in args.trials] | ||
|
|
||
| # optional warmup (uses first trial spec) | ||
| if args.warmup and trial_specs: | ||
| with tempfile.NamedTemporaryFile(suffix=".mp4", delete=True) as tmp: | ||
| _ = transcode_once(ffmpeg, args.input, tmp.name, | ||
| trial_specs[0].get("codec","h264_nvenc"), | ||
| trial_specs[0].get("bitrate","5M"), | ||
| trial_specs[0].get("preset","p5"), | ||
| args.duration, | ||
| trial_specs[0].get("scale"), | ||
| args.watermark, | ||
| args.overlay, | ||
| args.decode) | ||
|
|
||
| results: List[Dict[str, Any]] = [] | ||
| idx = 0 | ||
| for spec in trial_specs: | ||
| for _ in range(args.repeat): | ||
| with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp: | ||
| outp = tmp.name | ||
| res = transcode_once(ffmpeg, args.input, outp, | ||
| spec.get("codec","h264_nvenc"), | ||
| spec.get("bitrate","5M"), | ||
| spec.get("preset","p5"), | ||
| args.duration, | ||
| spec.get("scale"), | ||
| args.watermark, | ||
| args.overlay, | ||
| args.decode) | ||
| results.append({ | ||
| "trial_index": idx, | ||
| "codec": spec.get("codec"), | ||
| "bitrate": spec.get("bitrate"), | ||
| "preset": spec.get("preset"), | ||
| "scale_filter": res["filter_used"], | ||
| "fps": res["fps"], | ||
| "speed_x": res["speed_x"], | ||
| "frames": res["frames"], | ||
| "compute_time_us": res["compute_time_us"], | ||
| "output_size_bytes": res["output_size_bytes"], | ||
| "stdout_tail": res["stdout_tail"], | ||
| "argv": " ".join(res["args"]), | ||
| }) | ||
| idx += 1 | ||
| try: os.remove(outp) | ||
| except OSError: pass | ||
|
|
||
| report = { | ||
| "gpu": gi, | ||
| "ffmpeg_path": ffmpeg, | ||
| "trial_count": len(results), | ||
| "results": results | ||
| } | ||
| print(json.dumps(report, indent=2)) | ||
|
|
||
| if args.csv and results: | ||
| with open(args.csv, "w", newline="") as f: | ||
| w = csv.DictWriter(f, fieldnames=list(results[0].keys())) | ||
| w.writeheader() | ||
| w.writerows(results) | ||
|
|
||
| if __name__ == "__main__": | ||
| main() | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| trial_index,codec,bitrate,preset,scale_filter,fps,speed_x,frames,compute_time_us,output_size_bytes,stdout_tail,argv | ||
| 0,libx264,5M,medium,scale(cpu)+overlay(cpu),119.0,3.87,150,1376883.0,1352563,"[libx264 @ 0x555556068500] mb I I16..4: 82.9% 7.8% 9.4% | ||
| [libx264 @ 0x555556068500] mb P I16..4: 6.7% 1.3% 0.5% P16..4: 5.9% 1.6% 1.6% 0.0% 0.0% skip:82.5% | ||
| [libx264 @ 0x555556068500] mb B I16..4: 4.1% 0.8% 0.3% B16..8: 3.3% 0.7% 0.2% direct: 3.7% skip:86.9% L0:47.7% L1:35.3% BI:17.0% | ||
| [libx264 @ 0x555556068500] final ratefactor: -9.69 | ||
| [libx264 @ 0x555556068500] 8x8 transform intra:14.5% inter:28.8% | ||
| [libx264 @ 0x555556068500] coded y,uvDC,uvAC intra: 21.5% 47.4% 45.3% inter: 3.6% 7.7% 7.2% | ||
| [libx264 @ 0x555556068500] i16 v,h,dc,p: 98% 1% 0% 1% | ||
| [libx264 @ 0x555556068500] i8 v,h,dc,ddl,ddr,vr,hd,vl,hu: 99% 0% 0% 0% 0% 0% 0% 0% 0% | ||
| [libx264 @ 0x555556068500] i4 v,h,dc,ddl,ddr,vr,hd,vl,hu: 85% 8% 5% 0% 0% 0% 0% 0% 0% | ||
| [libx264 @ 0x555556068500] i8c dc,h,v,p: 7% 1% 86% 5% | ||
| [libx264 @ 0x555556068500] Weighted P-Frames: Y:0.0% UV:0.0% | ||
| [libx264 @ 0x555556068500] ref P L0: 64.0% 1.3% 16.0% 18.7% | ||
| [libx264 @ 0x555556068500] ref B L0: 68.9% 13.3% 17.9% | ||
| [libx264 @ 0x555556068500] ref B L1: 91.4% 8.6% | ||
| [libx264 @ 0x555556068500] kb/s:2158.66",/usr/local/bin/ffmpeg -hide_banner -y -vsync 0 -i /data/sample.mp4 -loop 1 -i /data/watermark.png -t 5.0 -filter_complex [0:v]scale=1280:720[v0];[v0][1:v]overlay=main_w/2-overlay_w/2:main_h/2-overlay_h/2[vout] -map [vout] -c:v libx264 -b:v 5M -preset medium -movflags +faststart -c:a copy /tmp/tmpr3ivnhpo.mp4 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| ``` | ||
| # build on the NVIDIA host | ||
| docker build -t video-wm-gpu -f Dockerfile . | ||
|
|
||
| # run with GPU, mounting your data and output dirs | ||
| docker run --rm --gpus all \ | ||
| -v /path/to/serverless-benchmarks-data-dphpc/200.multimedia/225.video-watermarking-gpu:/data:ro \ | ||
| -v $PWD/out_gpu:/out \ | ||
| -e INPUT=/data/sample.mp4 \ | ||
| -e WATERMARK=/data/watermark.png \ | ||
| -e DURATION=8 \ | ||
| -e REPEAT=1 \ | ||
| -e DECODE=gpu \ | ||
| -e CSV=/out/results.csv \ | ||
| video-wm-gpu | ||
| ``` |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧩 Analysis chain
Verify COPY source paths match actual file locations.
The Dockerfile copies
gpu_bench.pyandrun.shfrom the build context root (lines 10-11), but based on the PR summary,gpu_bench.pyis located inwatermarking_bench/directory. Confirm whether:watermarking_bench/gpu_bench.py🏁 Script executed:
Length of output: 284
I need to gather more information to confirm the build context assumptions:Now I need to verify the file details and check the directory structure to understand the exact build setup:Now let me verify the actual content of the files and directory structure to understand the full context:
🏁 Script executed:
Length of output: 1338
Let me check the build documentation and run.sh to understand the intended build context:
Now let me check the build documentation and run.sh to determine the intended build pattern:
🏁 Script executed:
Length of output: 935
Fix COPY source paths to match actual file locations.
The Dockerfile's COPY commands (lines 10-11) assume both files are in the build context root, but they are located elsewhere:
gpu_bench.pyis atwatermarking_bench/gpu_bench.py(not in context root)run.shis atbenchmarks/200.multimedia/225.video-watermarking-gpu/run.sh(not in context root)Update to:
This assumes
docker buildis invoked from the repository root with the standard context (.).🤖 Prompt for AI Agents