Skip to content

Conversation

arsenm
Copy link
Contributor

@arsenm arsenm commented Oct 5, 2025

Fix the special case intrinsics that can directly reference a physical
register. There's no reason to use this.

Fix the special case intrinsics that can directly reference a physical
register. There's no reason to use this.
Copy link
Contributor Author

arsenm commented Oct 5, 2025

Warning

This pull request is not mergeable via GitHub because a downstack PR is open. Once all requirements are satisfied, merge this PR as a stack on Graphite.
Learn more

This stack of pull requests is managed by Graphite. Learn more about stacking.

@arsenm arsenm marked this pull request as ready for review October 5, 2025 02:16
@llvmbot
Copy link
Member

llvmbot commented Oct 5, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

Fix the special case intrinsics that can directly reference a physical
register. There's no reason to use this.


Full diff: https://github.com/llvm/llvm-project/pull/161988.diff

4 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp (+14-3)
  • (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+9-6)
  • (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+7)
  • (modified) llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll (+90-9)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 155b1a6a380dd..e8e39ff015eed 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1313,10 +1313,21 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
         return false;
       }
 
-      // Some intrinsics may use AGPRs, but if we have a choice, we are not
-      // required to use AGPRs.
-      if (Callee->isIntrinsic())
+      switch (Callee->getIntrinsicID()) {
+      case Intrinsic::write_register:
+      case Intrinsic::read_register:
+      case Intrinsic::read_volatile_register: {
+        const MDString *RegName = cast<MDString>(
+            cast<MetadataAsValue>(CB.getArgOperand(0))->getMetadata());
+        auto [Kind, RegIdx, NumRegs] =
+            AMDGPU::parseAsmPhysRegName(RegName->getString());
+        return Kind != 'a';
+      }
+      default:
+        // Some intrinsics may use AGPRs, but if we have a choice, we are not
+        // required to use AGPRs.
         return true;
+      }
 
       // TODO: Handle callsite attributes
       const auto *CalleeInfo = A.getAAFor<AAAMDGPUNoAGPR>(
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 20fa1412a778e..8e57498c56f76 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1577,12 +1577,7 @@ static bool isValidRegPrefix(char C) {
   return C == 'v' || C == 's' || C == 'a';
 }
 
-std::tuple<char, unsigned, unsigned>
-parseAsmConstraintPhysReg(StringRef Constraint) {
-  StringRef RegName = Constraint;
-  if (!RegName.consume_front("{") || !RegName.consume_back("}"))
-    return {};
-
+std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef RegName) {
   char Kind = RegName.front();
   if (!isValidRegPrefix(Kind))
     return {};
@@ -1609,6 +1604,14 @@ parseAsmConstraintPhysReg(StringRef Constraint) {
   return {};
 }
 
+std::tuple<char, unsigned, unsigned>
+parseAsmConstraintPhysReg(StringRef Constraint) {
+  StringRef RegName = Constraint;
+  if (!RegName.consume_front("{") || !RegName.consume_back("}"))
+    return {};
+  return parseAsmPhysRegName(RegName);
+}
+
 std::pair<unsigned, unsigned>
 getIntegerPairAttribute(const Function &F, StringRef Name,
                         std::pair<unsigned, unsigned> Default,
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 2b9c063f42a5e..b656414f2b85c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1013,6 +1013,13 @@ bool isReadOnlySegment(const GlobalValue *GV);
 /// target triple \p TT, false otherwise.
 bool shouldEmitConstantsToTextSection(const Triple &TT);
 
+/// Returns a valid charcode or 0 in the first entry if this is a valid physical
+/// register name. Followed by the start register number, and the register
+/// width. Does not validate the number of registers exists in the class. Unlike
+/// parseAsmConstraintPhysReg, this does not expect the name to be wrapped in
+/// "{}".
+std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef TupleString);
+
 /// Returns a valid charcode or 0 in the first entry if this is a valid physical
 /// register constraint. Followed by the start register number, and the register
 /// width. Does not validate the number of registers exists in the class.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
index f19d563067eb2..7fcd07486d54d 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
@@ -168,7 +168,7 @@ declare void @unknown()
 
 define amdgpu_kernel void @kernel_calls_extern() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    call void @unknown()
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -180,8 +180,8 @@ define amdgpu_kernel void @kernel_calls_extern() {
 
 define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite(
-; CHECK-SAME: ) #[[ATTR1]] {
-; CHECK-NEXT:    call void @unknown() #[[ATTR5:[0-9]+]]
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    call void @unknown() #[[ATTR10:[0-9]+]]
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
@@ -205,7 +205,7 @@ define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
 define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(
 ; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    call void [[INDIRECT]]() #[[ATTR5]]
+; CHECK-NEXT:    call void [[INDIRECT]]() #[[ATTR10]]
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
@@ -216,7 +216,7 @@ define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect)
 
 define amdgpu_kernel void @kernel_transitively_uses_agpr_asm() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_transitively_uses_agpr_asm(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    call void @func_uses_asm_physreg_agpr()
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -260,7 +260,7 @@ define amdgpu_kernel void @kernel_calls_empty() {
 
 define amdgpu_kernel void @kernel_calls_non_agpr_and_agpr() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_non_agpr_and_agpr(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    call void @empty()
 ; CHECK-NEXT:    call void @func_uses_asm_physreg_agpr()
 ; CHECK-NEXT:    call void @use_most()
@@ -616,12 +616,93 @@ define amdgpu_kernel void @physreg_def_a32___def_vreg_a512_use_vreg_a256() {
   ret void
 }
 
+define amdgpu_kernel void @kernel_uses_write_register_a55() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55(
+; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT:    call void @llvm.write_register.i32(metadata !"a55", i32 0)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.write_register.i64(metadata !"a55", i32 0)
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_write_register_v55() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_v55(
+; CHECK-SAME: ) #[[ATTR4:[0-9]+]] {
+; CHECK-NEXT:    call void @llvm.write_register.i32(metadata !"v55", i32 0)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.write_register.i64(metadata !"v55", i32 0)
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_write_register_a55_57() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55_57(
+; CHECK-SAME: ) #[[ATTR3]] {
+; CHECK-NEXT:    call void @llvm.write_register.i96(metadata !"a[55:57]", i96 0)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.write_register.i64(metadata !"a[55:57]", i96 0)
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_read_register_a55(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a55(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:    [[REG:%.*]] = call i32 @llvm.read_register.i32(metadata !"a55")
+; CHECK-NEXT:    store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4
+; CHECK-NEXT:    ret void
+;
+  %reg = call i32 @llvm.read_register.i64(metadata !"a55")
+  store i32 %reg, ptr addrspace(1) %ptr
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:    [[REG:%.*]] = call i32 @llvm.read_volatile_register.i32(metadata !"a55")
+; CHECK-NEXT:    store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4
+; CHECK-NEXT:    ret void
+;
+  %reg = call i32 @llvm.read_volatile_register.i64(metadata !"a55")
+  store i32 %reg, ptr addrspace(1) %ptr
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_read_register_a56_59(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a56_59(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:    [[REG:%.*]] = call i128 @llvm.read_register.i128(metadata !"a[56:59]")
+; CHECK-NEXT:    store i128 [[REG]], ptr addrspace(1) [[PTR]], align 8
+; CHECK-NEXT:    ret void
+;
+  %reg = call i128 @llvm.read_register.i64(metadata !"a[56:59]")
+  store i128 %reg, ptr addrspace(1) %ptr
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256(
+; CHECK-SAME: ) #[[ATTR3]] {
+; CHECK-NEXT:    call void @llvm.write_register.i32(metadata !"a256", i32 0)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.write_register.i64(metadata !"a256", i32 0)
+  ret void
+}
+
 attributes #0 = { "amdgpu-agpr-alloc"="0" }
 ;.
 ; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR1]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="0" }
+; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(read) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR8:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR9:[0-9]+]] = { nocallback nounwind "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR10]] = { "amdgpu-agpr-alloc"="0" }
 ;.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants