@@ -14,10 +14,11 @@ define void @func1() {
1414; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
1515; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
1616; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
17- ; GFX12-SDAG-NEXT: s_mov_b32 m0, 3
18- ; GFX12-SDAG-NEXT: s_barrier_join m0
1917; GFX12-SDAG-NEXT: s_mov_b32 m0, 0x70003
18+ ; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
2019; GFX12-SDAG-NEXT: s_barrier_signal m0
20+ ; GFX12-SDAG-NEXT: s_mov_b32 m0, 3
21+ ; GFX12-SDAG-NEXT: s_barrier_join m0
2122; GFX12-SDAG-NEXT: s_barrier_wait 1
2223; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
2324;
@@ -29,12 +30,13 @@ define void @func1() {
2930; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
3031; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
3132; GFX12-GISEL-NEXT: s_mov_b32 m0, 0x70003
32- ; GFX12-GISEL-NEXT: s_barrier_join 3
33+ ; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
3334; GFX12-GISEL-NEXT: s_barrier_signal m0
35+ ; GFX12-GISEL-NEXT: s_barrier_join 3
3436; GFX12-GISEL-NEXT: s_barrier_wait 1
3537; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
36- call void @llvm.amdgcn.s.barrier.join (ptr addrspace (3 ) @bar3 )
3738 call void @llvm.amdgcn.s.barrier.signal.var (ptr addrspace (3 ) @bar3 , i32 7 )
39+ call void @llvm.amdgcn.s.barrier.join (ptr addrspace (3 ) @bar3 )
3840 call void @llvm.amdgcn.s.barrier.wait (i16 1 )
3941 ret void
4042}
@@ -47,10 +49,11 @@ define void @func2() {
4749; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
4850; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
4951; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
50- ; GFX12-SDAG-NEXT: s_mov_b32 m0, 1
51- ; GFX12-SDAG-NEXT: s_barrier_join m0
5252; GFX12-SDAG-NEXT: s_mov_b32 m0, 0x70001
53+ ; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
5354; GFX12-SDAG-NEXT: s_barrier_signal m0
55+ ; GFX12-SDAG-NEXT: s_mov_b32 m0, 1
56+ ; GFX12-SDAG-NEXT: s_barrier_join m0
5457; GFX12-SDAG-NEXT: s_barrier_wait 1
5558; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
5659;
@@ -62,12 +65,13 @@ define void @func2() {
6265; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
6366; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
6467; GFX12-GISEL-NEXT: s_mov_b32 m0, 0x70001
65- ; GFX12-GISEL-NEXT: s_barrier_join 1
68+ ; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
6669; GFX12-GISEL-NEXT: s_barrier_signal m0
70+ ; GFX12-GISEL-NEXT: s_barrier_join 1
6771; GFX12-GISEL-NEXT: s_barrier_wait 1
6872; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
69- call void @llvm.amdgcn.s.barrier.join (ptr addrspace (3 ) @bar2 )
7073 call void @llvm.amdgcn.s.barrier.signal.var (ptr addrspace (3 ) @bar2 , i32 7 )
74+ call void @llvm.amdgcn.s.barrier.join (ptr addrspace (3 ) @bar2 )
7175 call void @llvm.amdgcn.s.barrier.wait (i16 1 )
7276 ret void
7377}
@@ -98,9 +102,9 @@ define amdgpu_kernel void @kernel1(ptr addrspace(1) %out, ptr addrspace(3) %in)
98102; GFX12-SDAG-NEXT: s_barrier_signal m0
99103; GFX12-SDAG-NEXT: s_mov_b32 m0, s2
100104; GFX12-SDAG-NEXT: s_barrier_signal -1
105+ ; GFX12-SDAG-NEXT: s_barrier_signal_isfirst -1
101106; GFX12-SDAG-NEXT: s_barrier_join m0
102107; GFX12-SDAG-NEXT: s_mov_b32 m0, 2
103- ; GFX12-SDAG-NEXT: s_barrier_signal_isfirst -1
104108; GFX12-SDAG-NEXT: s_barrier_wait 1
105109; GFX12-SDAG-NEXT: s_barrier_leave
106110; GFX12-SDAG-NEXT: s_get_barrier_state s3, m0
@@ -151,11 +155,11 @@ define amdgpu_kernel void @kernel1(ptr addrspace(1) %out, ptr addrspace(3) %in)
151155; GFX12-GISEL-NEXT: s_barrier_signal m0
152156; GFX12-GISEL-NEXT: s_mov_b32 m0, s1
153157; GFX12-GISEL-NEXT: s_barrier_signal m0
154- ; GFX12-GISEL-NEXT: s_mov_b32 m0, s0
155158; GFX12-GISEL-NEXT: s_barrier_signal -1
156- ; GFX12-GISEL-NEXT: s_barrier_join m0
157159; GFX12-GISEL-NEXT: s_barrier_signal_isfirst -1
160+ ; GFX12-GISEL-NEXT: s_mov_b32 m0, s0
158161; GFX12-GISEL-NEXT: s_add_co_u32 s8, s12, 48
162+ ; GFX12-GISEL-NEXT: s_barrier_join m0
159163; GFX12-GISEL-NEXT: s_barrier_wait 1
160164; GFX12-GISEL-NEXT: s_barrier_leave
161165; GFX12-GISEL-NEXT: s_get_barrier_state s0, 2
@@ -190,8 +194,8 @@ define amdgpu_kernel void @kernel1(ptr addrspace(1) %out, ptr addrspace(3) %in)
190194 call void @llvm.amdgcn.s.barrier.signal.var (ptr addrspace (3 ) @bar , i32 12 )
191195 call void @llvm.amdgcn.s.barrier.signal.var (ptr addrspace (3 ) %in , i32 9 )
192196 call void @llvm.amdgcn.s.barrier.signal (i32 -1 )
193- call void @llvm.amdgcn.s.barrier.join (ptr addrspace (3 ) %in )
194197 %isfirst = call i1 @llvm.amdgcn.s.barrier.signal.isfirst (i32 -1 )
198+ call void @llvm.amdgcn.s.barrier.join (ptr addrspace (3 ) %in )
195199 call void @llvm.amdgcn.s.barrier.wait (i16 1 )
196200 call void @llvm.amdgcn.s.barrier.leave (i16 1 )
197201 %state = call i32 @llvm.amdgcn.s.get.named.barrier.state (ptr addrspace (3 ) @bar )
@@ -215,14 +219,14 @@ define amdgpu_kernel void @kernel2(ptr addrspace(1) %out, ptr addrspace(3) %in)
215219; GFX12-SDAG-NEXT: s_load_b64 s[12:13], s[6:7], 0x0
216220; GFX12-SDAG-NEXT: s_mov_b32 m0, 0x70002
217221; GFX12-SDAG-NEXT: s_add_nc_u64 s[8:9], s[4:5], 48
222+ ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
218223; GFX12-SDAG-NEXT: s_barrier_signal m0
219224; GFX12-SDAG-NEXT: s_mov_b32 m0, 2
220225; GFX12-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1]
221226; GFX12-SDAG-NEXT: s_mov_b64 s[6:7], s[2:3]
222227; GFX12-SDAG-NEXT: s_mov_b32 s32, 0
223228; GFX12-SDAG-NEXT: s_barrier_join m0
224229; GFX12-SDAG-NEXT: s_barrier_wait 1
225- ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
226230; GFX12-SDAG-NEXT: s_swappc_b64 s[30:31], s[12:13]
227231; GFX12-SDAG-NEXT: s_endpgm
228232;
@@ -241,10 +245,10 @@ define amdgpu_kernel void @kernel2(ptr addrspace(1) %out, ptr addrspace(3) %in)
241245; GFX12-GISEL-NEXT: s_mov_b64 s[4:5], s[0:1]
242246; GFX12-GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
243247; GFX12-GISEL-NEXT: s_mov_b32 s32, 0
248+ ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
244249; GFX12-GISEL-NEXT: s_barrier_signal m0
245250; GFX12-GISEL-NEXT: s_barrier_join 2
246251; GFX12-GISEL-NEXT: s_barrier_wait 1
247- ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
248252; GFX12-GISEL-NEXT: s_swappc_b64 s[30:31], s[12:13]
249253; GFX12-GISEL-NEXT: s_endpgm
250254 call void @llvm.amdgcn.s.barrier.signal.var (ptr addrspace (3 ) @bar , i32 7 )
0 commit comments