@@ -24,9 +24,9 @@ mkdir -p "$2"
24
24
OUT=$( realpath " $1 " )
25
25
MNT=$( realpath " $2 " )
26
26
27
- rm -f " $OUT /*.log"
28
- rm -f " $OUT /*.exit"
29
- rm -f " $OUT /*.md"
27
+ rm -vf $OUT /* .log
28
+ rm -vf $OUT /* .exit
29
+ rm -vf $OUT /* .md
30
30
31
31
sd=` dirname $0 `
32
32
cd $sd /../
50
50
51
51
CMAKE_EXTRA=" -DWHISPER_FATAL_WARNINGS=ON"
52
52
53
+ if [ ! -z ${GG_BUILD_METAL} ]; then
54
+ CMAKE_EXTRA=" ${CMAKE_EXTRA} -DGGML_METAL=ON"
55
+ fi
56
+
53
57
if [ ! -z ${GG_BUILD_CUDA} ]; then
54
- CMAKE_EXTRA=" ${CMAKE_EXTRA} -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=native"
58
+ CMAKE_EXTRA=" ${CMAKE_EXTRA} -DGGML_CUDA=ON"
59
+
60
+ if command -v nvidia-smi > /dev/null 2>&1 ; then
61
+ CUDA_ARCH=$( nvidia-smi --query-gpu=compute_cap --format=csv,noheader,nounits 2> /dev/null | head -1 | tr -d ' .' )
62
+ if [[ -n " $CUDA_ARCH " && " $CUDA_ARCH " =~ ^[0-9]+$ ]]; then
63
+ CMAKE_EXTRA=" ${CMAKE_EXTRA} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH} "
64
+ else
65
+ echo " Warning: Using fallback CUDA architectures"
66
+ CMAKE_EXTRA=" ${CMAKE_EXTRA} -DCMAKE_CUDA_ARCHITECTURES=61;70;75;80;86;89"
67
+ fi
68
+ else
69
+ echo " Error: nvidia-smi not found, cannot build with CUDA"
70
+ exit 1
71
+ fi
72
+ fi
73
+
74
+ if [ ! -z ${GG_BUILD_ROCM} ]; then
75
+ CMAKE_EXTRA=" ${CMAKE_EXTRA} -DGGML_HIP=ON"
76
+ if [ -z ${GG_BUILD_AMDGPU_TARGETS} ]; then
77
+ echo " Missing GG_BUILD_AMDGPU_TARGETS, please set it to your GPU architecture (e.g. gfx90a, gfx1100, etc.)"
78
+ exit 1
79
+ fi
80
+
81
+ CMAKE_EXTRA=" ${CMAKE_EXTRA} -DAMDGPU_TARGETS=${GG_BUILD_AMDGPU_TARGETS} "
55
82
fi
56
83
57
84
if [ ! -z ${GG_BUILD_SYCL} ]; then
@@ -60,28 +87,38 @@ if [ ! -z ${GG_BUILD_SYCL} ]; then
60
87
echo " source /opt/intel/oneapi/setvars.sh"
61
88
exit 1
62
89
fi
63
-
64
- CMAKE_EXTRA=" ${CMAKE_EXTRA} -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON"
90
+ # Use only main GPU
91
+ export ONEAPI_DEVICE_SELECTOR=" level_zero:0"
92
+ # Enable sysman for correct memory reporting
93
+ export ZES_ENABLE_SYSMAN=1
94
+ # to circumvent precision issues on CPY operations
95
+ export SYCL_PROGRAM_COMPILE_OPTIONS=" -cl-fp32-correctly-rounded-divide-sqrt"
96
+ CMAKE_EXTRA=" ${CMAKE_EXTRA} -DGGML_SYCL=1 -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON"
65
97
fi
66
98
67
- if [ ! -z ${GG_BUILD_OPENVINO} ]; then
68
- CMAKE_EXTRA=" ${CMAKE_EXTRA} -DWHISPER_OPENVINO=ON"
69
- fi
99
+ if [ ! -z ${GG_BUILD_VULKAN} ]; then
100
+ CMAKE_EXTRA=" ${CMAKE_EXTRA} -DGGML_VULKAN=1"
101
+
102
+ # if on Mac, disable METAL
103
+ if [[ " $OSTYPE " == " darwin" * ]]; then
104
+ CMAKE_EXTRA=" ${CMAKE_EXTRA} -DGGML_METAL=OFF -DGGML_BLAS=OFF"
105
+ fi
70
106
71
- if [ ! -z ${GG_BUILD_METAL} ]; then
72
- CMAKE_EXTRA=" ${CMAKE_EXTRA} -DGGML_METAL=ON"
73
107
fi
74
108
75
- if [ ! -z ${GG_BUILD_VULKAN } ]; then
76
- CMAKE_EXTRA=" ${CMAKE_EXTRA} -DGGML_VULKAN=ON "
109
+ if [ ! -z ${GG_BUILD_WEBGPU } ]; then
110
+ CMAKE_EXTRA=" ${CMAKE_EXTRA} -DGGML_WEBGPU=1 "
77
111
fi
78
112
79
- if [ ! -z ${GG_BUILD_BLAS} ]; then
80
- CMAKE_EXTRA=" ${CMAKE_EXTRA} -DGGML_BLAS=ON"
113
+ if [ ! -z ${GG_BUILD_MUSA} ]; then
114
+ # Use qy1 by default (MTT S80)
115
+ MUSA_ARCH=${MUSA_ARCH:- 21}
116
+ CMAKE_EXTRA=" ${CMAKE_EXTRA} -DGGML_MUSA=ON -DMUSA_ARCHITECTURES=${MUSA_ARCH} "
81
117
fi
82
118
83
- if [ ! -z ${GG_BUILD_COREML} ]; then
84
- CMAKE_EXTRA=" ${CMAKE_EXTRA} -DWHISPER_COREML=ON"
119
+ if [ ! -z ${GG_BUILD_NO_SVE} ]; then
120
+ # arm 9 and newer enables sve by default, adjust these flags depending on the cpu used
121
+ CMAKE_EXTRA=" ${CMAKE_EXTRA} -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm"
85
122
fi
86
123
87
124
# # helpers
@@ -178,7 +215,7 @@ function gg_run_ctest {
178
215
mode=$2
179
216
180
217
cd ${SRC}
181
-
218
+
182
219
rm -rf build-ci-${mode} && mkdir build-ci-${mode} && cd build-ci-${mode}
183
220
184
221
set -e
@@ -219,7 +256,7 @@ function gg_run_bench {
219
256
echo " Running memcpy benchmark"
220
257
(time ./build-ci-release/bin/whisper-bench -w 1 -t $BENCH_N_THREADS 2>&1 ) | tee -a $OUT /${ci} -memcpy.log
221
258
gg_check_last_command_status " $OUT /${ci} -memcpy.exit" " memcpy benchmark"
222
-
259
+
223
260
echo " Running ggml_mul_mat benchmark with $BENCH_N_THREADS threads"
224
261
(time ./build-ci-release/bin/whisper-bench -w 2 -t $BENCH_N_THREADS 2>&1 ) | tee -a $OUT /${ci} -mul_mat.log
225
262
gg_check_last_command_status " $OUT /${ci} -mul_mat.exit" " ggml_mul_mat benchmark"
@@ -233,6 +270,8 @@ function gg_run_bench {
233
270
printf " | %16s | %13s | %3s | %3s | %7s | %7s | %7s | %7s | %7s |\n" " ---" " ---" " ---" " ---" " ---" " ---" " ---" " ---" " ---"
234
271
} | tee -a $OUT /${ci} -models-table.log
235
272
273
+ res=0
274
+
236
275
# run benchmark for each model
237
276
for model in " ${MODELS[@]} " ; do
238
277
echo " Benchmarking model: $model "
@@ -283,8 +322,11 @@ function gg_run_bench {
283
322
| tee -a $OUT /${ci} -models-table.log
284
323
else
285
324
echo " Benchmark failed for model: $model " | tee -a $OUT /${ci} -bench-errors.log
325
+ res=1
286
326
fi
287
327
done
328
+
329
+ return $res
288
330
}
289
331
290
332
function gg_sum_bench {
@@ -326,11 +368,12 @@ ret=0
326
368
for model in " ${MODELS[@]} " ; do
327
369
test $ret -eq 0 && gg_download_model ${model}
328
370
done
329
- if [ -z ${GG_BUILD_SYCL} ]; then
330
- test $ret -eq 0 && gg_run ctest debug
331
- fi
371
+
372
+ test $ret -eq 0 && gg_run ctest debug
332
373
test $ret -eq 0 && gg_run ctest release
333
374
334
375
test $ret -eq 0 && gg_run bench
335
376
377
+ cat $OUT /README.md
378
+
336
379
exit $ret
0 commit comments