@@ -1243,29 +1243,34 @@ static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
1243
1243
}
1244
1244
1245
1245
for (StringRef Code : CI.Codes ) {
1246
+ unsigned RegCount = 0 ;
1247
+
1246
1248
if (Code.starts_with (" a" )) {
1247
1249
// Virtual register, compute number of registers based on the type.
1248
1250
//
1249
1251
// We ought to be going through TargetLowering to get the number of
1250
1252
// registers, but we should avoid the dependence on CodeGen here.
1251
- unsigned RegCount = divideCeil (DL.getTypeSizeInBits (Ty), 32 );
1252
- if (CI.Type == InlineAsm::isOutput) {
1253
- AGPRDefCount += RegCount;
1254
- if (CI.isEarlyClobber )
1255
- AGPRUseCount += RegCount;
1256
- } else
1257
- AGPRUseCount += RegCount;
1253
+ RegCount = divideCeil (DL.getTypeSizeInBits (Ty), 32 );
1258
1254
} else {
1259
1255
// Physical register reference
1260
1256
auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg (Code);
1261
- if (Kind == ' a' )
1257
+ if (Kind == ' a' ) {
1258
+ RegCount = NumRegs;
1262
1259
MaxPhysReg = std::max (MaxPhysReg, std::min (RegIdx + NumRegs, 256u ));
1260
+ }
1263
1261
}
1262
+
1263
+ if (CI.Type == InlineAsm::isOutput) {
1264
+ AGPRDefCount += RegCount;
1265
+ if (CI.isEarlyClobber )
1266
+ AGPRUseCount += RegCount;
1267
+ } else
1268
+ AGPRUseCount += RegCount;
1264
1269
}
1265
1270
}
1266
1271
1267
1272
unsigned MaxVirtReg = std::max (AGPRUseCount, AGPRDefCount);
1268
- return std::min (MaxVirtReg + MaxPhysReg, 256u );
1273
+ return std::min (std::max ( MaxVirtReg, MaxPhysReg) , 256u );
1269
1274
}
1270
1275
1271
1276
// TODO: Migrate to range merge of amdgpu-agpr-alloc.
0 commit comments