Skip to content

Commit 2cef45d

Browse files
committed
AMDGPU: Figure out required AGPR count for inline asm
For now just try to compute the minimum number of AGPRs required to allocate the asm. Leave the attributor changes to turn this into an integer value for later.
1 parent 042ffe9 commit 2cef45d

File tree

2 files changed

+251
-7
lines changed

2 files changed

+251
-7
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 52 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1200,16 +1200,61 @@ AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
12001200
llvm_unreachable("AAAMDWavesPerEU is only valid for function position");
12011201
}
12021202

1203-
static bool inlineAsmUsesAGPRs(const InlineAsm *IA) {
1204-
for (const auto &CI : IA->ParseConstraints()) {
1203+
/// Compute the minimum number of AGPRs required to allocate the inline asm.
1204+
static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
1205+
const CallBase &Call) {
1206+
unsigned ArgNo = 0;
1207+
unsigned ResNo = 0;
1208+
unsigned AGPRDefCount = 0;
1209+
unsigned AGPRUseCount = 0;
1210+
unsigned MaxPhysReg = 0;
1211+
const DataLayout &DL = Call.getFunction()->getParent()->getDataLayout();
1212+
1213+
for (const InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
1214+
Type *Ty = nullptr;
1215+
switch (CI.Type) {
1216+
case InlineAsm::isOutput: {
1217+
Ty = Call.getType();
1218+
if (auto *STy = dyn_cast<StructType>(Ty))
1219+
Ty = STy->getElementType(ResNo);
1220+
++ResNo;
1221+
break;
1222+
}
1223+
case InlineAsm::isInput: {
1224+
Ty = Call.getArgOperand(ArgNo++)->getType();
1225+
break;
1226+
}
1227+
case InlineAsm::isLabel:
1228+
continue;
1229+
case InlineAsm::isClobber:
1230+
// Parse the physical register reference.
1231+
break;
1232+
}
1233+
12051234
for (StringRef Code : CI.Codes) {
1206-
Code.consume_front("{");
1207-
if (Code.starts_with("a"))
1208-
return true;
1235+
if (Code.starts_with("a")) {
1236+
// Virtual register, compute number of registers based on the type.
1237+
//
1238+
// We ought to be going through TargetLowering to get the number of
1239+
// registers, but we should avoid the dependence on CodeGen here.
1240+
unsigned RegCount = divideCeil(DL.getTypeSizeInBits(Ty), 32);
1241+
if (CI.Type == InlineAsm::isOutput) {
1242+
AGPRDefCount += RegCount;
1243+
if (CI.isEarlyClobber)
1244+
AGPRUseCount += RegCount;
1245+
} else
1246+
AGPRUseCount += RegCount;
1247+
} else {
1248+
// Physical register reference
1249+
auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg(Code);
1250+
if (Kind == 'a')
1251+
MaxPhysReg = std::max(MaxPhysReg, std::min(RegIdx + NumRegs, 256u));
1252+
}
12091253
}
12101254
}
12111255

1212-
return false;
1256+
unsigned MaxVirtReg = std::max(AGPRUseCount, AGPRDefCount);
1257+
return std::min(MaxVirtReg + MaxPhysReg, 256u);
12131258
}
12141259

12151260
// TODO: Migrate to range merge of amdgpu-agpr-alloc.
@@ -1251,7 +1296,7 @@ struct AAAMDGPUNoAGPR
12511296
const Function *Callee = dyn_cast<Function>(CalleeOp);
12521297
if (!Callee) {
12531298
if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp))
1254-
return !inlineAsmUsesAGPRs(IA);
1299+
return inlineAsmGetNumRequiredAGPRs(IA, CB) == 0;
12551300
return false;
12561301
}
12571302

llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,205 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
251251
ret void
252252
}
253253

254+
define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0() {
255+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0(
256+
; CHECK-SAME: ) #[[ATTR0]] {
257+
; CHECK-NEXT: [[DEF:%.*]] = call { i32, i32 } asm sideeffect "
258+
; CHECK-NEXT: ret void
259+
;
260+
%def = call {i32, i32} asm sideeffect "; def $0", "=a,=a"()
261+
ret void
262+
}
263+
264+
define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_1() {
265+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_1(
266+
; CHECK-SAME: ) #[[ATTR0]] {
267+
; CHECK-NEXT: [[DEF:%.*]] = call { i32, <2 x i32> } asm sideeffect "
268+
; CHECK-NEXT: ret void
269+
;
270+
%def = call {i32, <2 x i32>} asm sideeffect "; def $0", "=a,=a"()
271+
ret void
272+
}
273+
274+
define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_2() {
275+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_2(
276+
; CHECK-SAME: ) #[[ATTR0]] {
277+
; CHECK-NEXT: [[DEF:%.*]] = call { i32, <2 x i32> } asm sideeffect "
278+
; CHECK-NEXT: ret void
279+
;
280+
%def = call {i32, <2 x i32>} asm sideeffect "; def $0", "=a,=v"()
281+
ret void
282+
}
283+
284+
define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty() {
285+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty(
286+
; CHECK-SAME: ) #[[ATTR0]] {
287+
; CHECK-NEXT: call void asm sideeffect "
288+
; CHECK-NEXT: ret void
289+
;
290+
call void asm sideeffect "; use $0", "a"(ptr poison)
291+
ret void
292+
}
293+
294+
define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty() {
295+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty(
296+
; CHECK-SAME: ) #[[ATTR0]] {
297+
; CHECK-NEXT: [[DEF:%.*]] = call ptr asm sideeffect "
298+
; CHECK-NEXT: ret void
299+
;
300+
%def = call ptr asm sideeffect "; def $0", "=a"()
301+
ret void
302+
}
303+
304+
define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty() {
305+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty(
306+
; CHECK-SAME: ) #[[ATTR0]] {
307+
; CHECK-NEXT: [[DEF:%.*]] = call <2 x ptr> asm sideeffect "
308+
; CHECK-NEXT: ret void
309+
;
310+
%def = call <2 x ptr> asm sideeffect "; def $0", "=a"()
311+
ret void
312+
}
313+
314+
define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0() {
315+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0(
316+
; CHECK-SAME: ) #[[ATTR0]] {
317+
; CHECK-NEXT: [[DEF:%.*]] = call { i32, i32 } asm sideeffect "
318+
; CHECK-NEXT: ret void
319+
;
320+
%def = call {i32, i32} asm sideeffect "; def $0", "={a0},={a[4:5]}"()
321+
ret void
322+
}
323+
324+
define amdgpu_kernel void @kernel_uses_asm_clobber() {
325+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber(
326+
; CHECK-SAME: ) #[[ATTR0]] {
327+
; CHECK-NEXT: call void asm sideeffect "
328+
; CHECK-NEXT: ret void
329+
;
330+
call void asm sideeffect "; clobber $0", "~{a4}"()
331+
ret void
332+
}
333+
334+
define amdgpu_kernel void @kernel_uses_asm_clobber_tuple() {
335+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_tuple(
336+
; CHECK-SAME: ) #[[ATTR0]] {
337+
; CHECK-NEXT: call void asm sideeffect "
338+
; CHECK-NEXT: ret void
339+
;
340+
call void asm sideeffect "; clobber $0", "~{a[10:13]}"()
341+
ret void
342+
}
343+
344+
define amdgpu_kernel void @kernel_uses_asm_clobber_oob() {
345+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_oob(
346+
; CHECK-SAME: ) #[[ATTR0]] {
347+
; CHECK-NEXT: call void asm sideeffect "
348+
; CHECK-NEXT: ret void
349+
;
350+
call void asm sideeffect "; clobber $0", "~{a256}"()
351+
ret void
352+
}
353+
354+
define amdgpu_kernel void @kernel_uses_asm_clobber_max() {
355+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_max(
356+
; CHECK-SAME: ) #[[ATTR0]] {
357+
; CHECK-NEXT: call void asm sideeffect "
358+
; CHECK-NEXT: ret void
359+
;
360+
call void asm sideeffect "; clobber $0", "~{a255}"()
361+
ret void
362+
}
363+
364+
define amdgpu_kernel void @kernel_uses_asm_physreg_oob() {
365+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_oob(
366+
; CHECK-SAME: ) #[[ATTR0]] {
367+
; CHECK-NEXT: call void asm sideeffect "
368+
; CHECK-NEXT: ret void
369+
;
370+
call void asm sideeffect "; use $0", "{a256}"(i32 poison)
371+
ret void
372+
}
373+
374+
define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty() {
375+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty(
376+
; CHECK-SAME: ) #[[ATTR0]] {
377+
; CHECK-NEXT: [[DEF:%.*]] = call <32 x i32> asm sideeffect "
378+
; CHECK-NEXT: ret void
379+
;
380+
%def = call <32 x i32> asm sideeffect "; def $0", "=a"()
381+
ret void
382+
}
383+
384+
define amdgpu_kernel void @kernel_uses_asm_virtreg_use_max_ty() {
385+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_max_ty(
386+
; CHECK-SAME: ) #[[ATTR0]] {
387+
; CHECK-NEXT: call void asm sideeffect "
388+
; CHECK-NEXT: ret void
389+
;
390+
call void asm sideeffect "; use $0", "a"(<32 x i32> poison)
391+
ret void
392+
}
393+
394+
define amdgpu_kernel void @kernel_uses_asm_virtreg_use_def_max_ty() {
395+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_def_max_ty(
396+
; CHECK-SAME: ) #[[ATTR0]] {
397+
; CHECK-NEXT: [[DEF:%.*]] = call <32 x i32> asm sideeffect "
398+
; CHECK-NEXT: ret void
399+
;
400+
%def = call <32 x i32> asm sideeffect "; use $0", "=a,a"(<32 x i32> poison)
401+
ret void
402+
}
403+
404+
define amdgpu_kernel void @vreg_use_exceeds_register_file() {
405+
; CHECK-LABEL: define amdgpu_kernel void @vreg_use_exceeds_register_file(
406+
; CHECK-SAME: ) #[[ATTR0]] {
407+
; CHECK-NEXT: call void asm sideeffect "
408+
; CHECK-NEXT: ret void
409+
;
410+
call void asm sideeffect "; use $0", "a"(<257 x i32> poison)
411+
ret void
412+
}
413+
414+
define amdgpu_kernel void @vreg_def_exceeds_register_file() {
415+
; CHECK-LABEL: define amdgpu_kernel void @vreg_def_exceeds_register_file(
416+
; CHECK-SAME: ) #[[ATTR0]] {
417+
; CHECK-NEXT: [[DEF:%.*]] = call <257 x i32> asm sideeffect "
418+
; CHECK-NEXT: ret void
419+
;
420+
%def = call <257 x i32> asm sideeffect "; def $0", "=a"()
421+
ret void
422+
}
423+
424+
define amdgpu_kernel void @multiple() {
425+
; CHECK-LABEL: define amdgpu_kernel void @multiple(
426+
; CHECK-SAME: ) #[[ATTR0]] {
427+
; CHECK-NEXT: [[DEF:%.*]] = call { <16 x i32>, <8 x i32>, <8 x i32> } asm sideeffect "
428+
; CHECK-NEXT: ret void
429+
;
430+
%def = call {<16 x i32>, <8 x i32>, <8 x i32>} asm sideeffect "; def $0", "=a,=a,=a,a,a,a"(<4 x i32> splat (i32 0), <8 x i32> splat (i32 1), i64 999)
431+
ret void
432+
}
433+
434+
define amdgpu_kernel void @earlyclobber_0() {
435+
; CHECK-LABEL: define amdgpu_kernel void @earlyclobber_0(
436+
; CHECK-SAME: ) #[[ATTR0]] {
437+
; CHECK-NEXT: [[DEF:%.*]] = call <8 x i32> asm sideeffect "
438+
; CHECK-NEXT: ret void
439+
;
440+
%def = call <8 x i32> asm sideeffect "; def $0", "=&a,a"(i32 0)
441+
ret void
442+
}
443+
444+
define amdgpu_kernel void @earlyclobber_1() {
445+
; CHECK-LABEL: define amdgpu_kernel void @earlyclobber_1(
446+
; CHECK-SAME: ) #[[ATTR0]] {
447+
; CHECK-NEXT: [[DEF:%.*]] = call { <8 x i32>, <16 x i32> } asm sideeffect "
448+
; CHECK-NEXT: ret void
449+
;
450+
%def = call { <8 x i32>, <16 x i32 > } asm sideeffect "; def $0, $1", "=&a,=&a,a,a"(i32 0, <16 x i32> splat (i32 1))
451+
ret void
452+
}
254453

255454
attributes #0 = { "amdgpu-agpr-alloc"="0" }
256455
;.

0 commit comments

Comments
 (0)