Skip to content

Commit e140221

Browse files
committed
AMDGPU: Account for read/write register intrinsics for AGPR usage
Fix the special case intrinsics that can directly reference a physical register. There's no reason to use this.
1 parent c7942b3 commit e140221

File tree

4 files changed

+120
-18
lines changed

4 files changed

+120
-18
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1313,10 +1313,21 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
13131313
return false;
13141314
}
13151315

1316-
// Some intrinsics may use AGPRs, but if we have a choice, we are not
1317-
// required to use AGPRs.
1318-
if (Callee->isIntrinsic())
1316+
switch (Callee->getIntrinsicID()) {
1317+
case Intrinsic::write_register:
1318+
case Intrinsic::read_register:
1319+
case Intrinsic::read_volatile_register: {
1320+
const MDString *RegName = cast<MDString>(
1321+
cast<MetadataAsValue>(CB.getArgOperand(0))->getMetadata());
1322+
auto [Kind, RegIdx, NumRegs] =
1323+
AMDGPU::parseAsmPhysRegName(RegName->getString());
1324+
return Kind != 'a';
1325+
}
1326+
default:
1327+
// Some intrinsics may use AGPRs, but if we have a choice, we are not
1328+
// required to use AGPRs.
13191329
return true;
1330+
}
13201331

13211332
// TODO: Handle callsite attributes
13221333
const auto *CalleeInfo = A.getAAFor<AAAMDGPUNoAGPR>(

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1577,12 +1577,7 @@ static bool isValidRegPrefix(char C) {
15771577
return C == 'v' || C == 's' || C == 'a';
15781578
}
15791579

1580-
std::tuple<char, unsigned, unsigned>
1581-
parseAsmConstraintPhysReg(StringRef Constraint) {
1582-
StringRef RegName = Constraint;
1583-
if (!RegName.consume_front("{") || !RegName.consume_back("}"))
1584-
return {};
1585-
1580+
std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef RegName) {
15861581
char Kind = RegName.front();
15871582
if (!isValidRegPrefix(Kind))
15881583
return {};
@@ -1609,6 +1604,14 @@ parseAsmConstraintPhysReg(StringRef Constraint) {
16091604
return {};
16101605
}
16111606

1607+
std::tuple<char, unsigned, unsigned>
1608+
parseAsmConstraintPhysReg(StringRef Constraint) {
1609+
StringRef RegName = Constraint;
1610+
if (!RegName.consume_front("{") || !RegName.consume_back("}"))
1611+
return {};
1612+
return parseAsmPhysRegName(RegName);
1613+
}
1614+
16121615
std::pair<unsigned, unsigned>
16131616
getIntegerPairAttribute(const Function &F, StringRef Name,
16141617
std::pair<unsigned, unsigned> Default,

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1013,6 +1013,13 @@ bool isReadOnlySegment(const GlobalValue *GV);
10131013
/// target triple \p TT, false otherwise.
10141014
bool shouldEmitConstantsToTextSection(const Triple &TT);
10151015

1016+
/// Returns a valid charcode or 0 in the first entry if this is a valid physical
1017+
/// register name. Followed by the start register number, and the register
1018+
/// width. Does not validate the number of registers exists in the class. Unlike
1019+
/// parseAsmConstraintPhysReg, this does not expect the name to be wrapped in
1020+
/// "{}".
1021+
std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef TupleString);
1022+
10161023
/// Returns a valid charcode or 0 in the first entry if this is a valid physical
10171024
/// register constraint. Followed by the start register number, and the register
10181025
/// width. Does not validate the number of registers exists in the class.

llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll

Lines changed: 90 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ declare void @unknown()
168168

169169
define amdgpu_kernel void @kernel_calls_extern() {
170170
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern(
171-
; CHECK-SAME: ) #[[ATTR1]] {
171+
; CHECK-SAME: ) #[[ATTR0]] {
172172
; CHECK-NEXT: call void @unknown()
173173
; CHECK-NEXT: call void @use_most()
174174
; CHECK-NEXT: ret void
@@ -180,8 +180,8 @@ define amdgpu_kernel void @kernel_calls_extern() {
180180

181181
define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
182182
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite(
183-
; CHECK-SAME: ) #[[ATTR1]] {
184-
; CHECK-NEXT: call void @unknown() #[[ATTR5:[0-9]+]]
183+
; CHECK-SAME: ) #[[ATTR0]] {
184+
; CHECK-NEXT: call void @unknown() #[[ATTR10:[0-9]+]]
185185
; CHECK-NEXT: call void @use_most()
186186
; CHECK-NEXT: ret void
187187
;
@@ -205,7 +205,7 @@ define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
205205
define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) {
206206
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(
207207
; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR1]] {
208-
; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR5]]
208+
; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR10]]
209209
; CHECK-NEXT: call void @use_most()
210210
; CHECK-NEXT: ret void
211211
;
@@ -216,7 +216,7 @@ define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect)
216216

217217
define amdgpu_kernel void @kernel_transitively_uses_agpr_asm() {
218218
; CHECK-LABEL: define amdgpu_kernel void @kernel_transitively_uses_agpr_asm(
219-
; CHECK-SAME: ) #[[ATTR1]] {
219+
; CHECK-SAME: ) #[[ATTR0]] {
220220
; CHECK-NEXT: call void @func_uses_asm_physreg_agpr()
221221
; CHECK-NEXT: call void @use_most()
222222
; CHECK-NEXT: ret void
@@ -260,7 +260,7 @@ define amdgpu_kernel void @kernel_calls_empty() {
260260

261261
define amdgpu_kernel void @kernel_calls_non_agpr_and_agpr() {
262262
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_non_agpr_and_agpr(
263-
; CHECK-SAME: ) #[[ATTR1]] {
263+
; CHECK-SAME: ) #[[ATTR0]] {
264264
; CHECK-NEXT: call void @empty()
265265
; CHECK-NEXT: call void @func_uses_asm_physreg_agpr()
266266
; CHECK-NEXT: call void @use_most()
@@ -616,12 +616,93 @@ define amdgpu_kernel void @physreg_def_a32___def_vreg_a512_use_vreg_a256() {
616616
ret void
617617
}
618618

619+
define amdgpu_kernel void @kernel_uses_write_register_a55() {
620+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55(
621+
; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
622+
; CHECK-NEXT: call void @llvm.write_register.i32(metadata !"a55", i32 0)
623+
; CHECK-NEXT: ret void
624+
;
625+
call void @llvm.write_register.i64(metadata !"a55", i32 0)
626+
ret void
627+
}
628+
629+
define amdgpu_kernel void @kernel_uses_write_register_v55() {
630+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_v55(
631+
; CHECK-SAME: ) #[[ATTR4:[0-9]+]] {
632+
; CHECK-NEXT: call void @llvm.write_register.i32(metadata !"v55", i32 0)
633+
; CHECK-NEXT: ret void
634+
;
635+
call void @llvm.write_register.i64(metadata !"v55", i32 0)
636+
ret void
637+
}
638+
639+
define amdgpu_kernel void @kernel_uses_write_register_a55_57() {
640+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55_57(
641+
; CHECK-SAME: ) #[[ATTR3]] {
642+
; CHECK-NEXT: call void @llvm.write_register.i96(metadata !"a[55:57]", i96 0)
643+
; CHECK-NEXT: ret void
644+
;
645+
call void @llvm.write_register.i64(metadata !"a[55:57]", i96 0)
646+
ret void
647+
}
648+
649+
define amdgpu_kernel void @kernel_uses_read_register_a55(ptr addrspace(1) %ptr) {
650+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a55(
651+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
652+
; CHECK-NEXT: [[REG:%.*]] = call i32 @llvm.read_register.i32(metadata !"a55")
653+
; CHECK-NEXT: store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4
654+
; CHECK-NEXT: ret void
655+
;
656+
%reg = call i32 @llvm.read_register.i64(metadata !"a55")
657+
store i32 %reg, ptr addrspace(1) %ptr
658+
ret void
659+
}
660+
661+
define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(ptr addrspace(1) %ptr) {
662+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(
663+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
664+
; CHECK-NEXT: [[REG:%.*]] = call i32 @llvm.read_volatile_register.i32(metadata !"a55")
665+
; CHECK-NEXT: store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4
666+
; CHECK-NEXT: ret void
667+
;
668+
%reg = call i32 @llvm.read_volatile_register.i64(metadata !"a55")
669+
store i32 %reg, ptr addrspace(1) %ptr
670+
ret void
671+
}
672+
673+
define amdgpu_kernel void @kernel_uses_read_register_a56_59(ptr addrspace(1) %ptr) {
674+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a56_59(
675+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
676+
; CHECK-NEXT: [[REG:%.*]] = call i128 @llvm.read_register.i128(metadata !"a[56:59]")
677+
; CHECK-NEXT: store i128 [[REG]], ptr addrspace(1) [[PTR]], align 8
678+
; CHECK-NEXT: ret void
679+
;
680+
%reg = call i128 @llvm.read_register.i64(metadata !"a[56:59]")
681+
store i128 %reg, ptr addrspace(1) %ptr
682+
ret void
683+
}
684+
685+
define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256() {
686+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256(
687+
; CHECK-SAME: ) #[[ATTR3]] {
688+
; CHECK-NEXT: call void @llvm.write_register.i32(metadata !"a256", i32 0)
689+
; CHECK-NEXT: ret void
690+
;
691+
call void @llvm.write_register.i64(metadata !"a256", i32 0)
692+
ret void
693+
}
694+
619695
attributes #0 = { "amdgpu-agpr-alloc"="0" }
620696
;.
621697
; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
622698
; CHECK: attributes #[[ATTR1]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
623699
; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
624-
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
625-
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
626-
; CHECK: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="0" }
700+
; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
701+
; CHECK: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
702+
; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
703+
; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
704+
; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(read) "target-cpu"="gfx90a" }
705+
; CHECK: attributes #[[ATTR8:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
706+
; CHECK: attributes #[[ATTR9:[0-9]+]] = { nocallback nounwind "target-cpu"="gfx90a" }
707+
; CHECK: attributes #[[ATTR10]] = { "amdgpu-agpr-alloc"="0" }
627708
;.

0 commit comments

Comments
 (0)