We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent fd45550 commit 8977ffbCopy full SHA for 8977ffb
csrc/fused_qknorm_rope_kernel.cu
@@ -37,6 +37,16 @@
37
38
#ifdef USE_ROCM
39
#define FINAL_MASK 0xffffffffffffffffULL
40
+
41
+ #if defined(HIP_VERSION) && HIP_VERSION < 70000000
42
+// On ROCm versions before 7.0, __syncwarp isn't defined. The below
43
+// implementation is copy/pasted from the implementation in ROCm 7.0
44
+__device__ inline void __syncwarp() {
45
+ __builtin_amdgcn_fence(__ATOMIC_RELEASE, "wavefront");
46
+ __builtin_amdgcn_wave_barrier();
47
+ __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "wavefront");
48
+}
49
+ #endif
50
#else
51
#define FINAL_MASK 0xffffffff
52
#endif
0 commit comments