ROCm
diff --git a/‎llvm/lib/Target/AArch64/AArch64InstrFormats.td‎
Lines changed: 18 additions & 7 deletions b/‎llvm/lib/Target/AArch64/AArch64InstrFormats.td‎
Lines changed: 18 additions & 7 deletions
diff --git a/‎llvm/lib/Target/AArch64/AArch64RegisterInfo.td‎
Lines changed: 15 additions & 18 deletions b/‎llvm/lib/Target/AArch64/AArch64RegisterInfo.td‎
Lines changed: 15 additions & 18 deletions
diff --git a/‎llvm/lib/Target/AArch64/CMakeLists.txt‎
Lines changed: 1 addition & 2 deletions b/‎llvm/lib/Target/AArch64/CMakeLists.txt‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp‎
Lines changed: 57 additions & 47 deletions b/‎llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp‎
Lines changed: 57 additions & 47 deletions
@@ -1561,13 +1561,12 @@ def VectorIndexHOperand : AsmVectorIndex<0, 7>;
 def VectorIndexSOperand : AsmVectorIndex<0, 3>;
 def VectorIndexDOperand : AsmVectorIndex<0, 1>;
 
-let OperandNamespace = "AArch64" in {
-  let OperandType = "OPERAND_IMPLICIT_IMM_0" in {
-    defm VectorIndex0 : VectorIndex<i64, VectorIndex0Operand,
-                                [{ return ((uint64_t)Imm) == 0; }]>;
-    defm VectorIndex032b : VectorIndex<i32, VectorIndex0Operand,
-                                [{ return ((uint32_t)Imm) == 0; }]>;
-  }
+let OperandNamespace = "AArch64", OperandType = "OPERAND_IMPLICIT_IMM_0",
+    DecoderMethod = "DecodeZeroImm" in {
+  defm VectorIndex0 : VectorIndex<i64, VectorIndex0Operand,
+                                  [{ return ((uint64_t)Imm) == 0; }]>;
+  defm VectorIndex032b : VectorIndex<i32, VectorIndex0Operand,
+                                     [{ return ((uint32_t)Imm) == 0; }]>;
 }
 defm VectorIndex1 : VectorIndex<i64, VectorIndex1Operand,
                                 [{ return ((uint64_t)Imm) == 1; }]>;
@@ -1620,6 +1619,7 @@ def sme_elm_idx0_0 : Operand<i32>, TImmLeaf<i32, [{
   let PrintMethod = "printMatrixIndex";
   let OperandNamespace = "AArch64";
   let OperandType = "OPERAND_IMPLICIT_IMM_0";
+  let DecoderMethod = "DecodeZeroImm";
 }
 def sme_elm_idx0_1 : Operand<i32>, TImmLeaf<i32, [{
   return ((uint32_t)Imm) <= 1;
@@ -1683,6 +1683,7 @@ def uimm0s2range : Operand<i64>, ImmLeaf<i64,
   let ParserMatchClass = UImm0s2RangeOperand;
   let OperandNamespace = "AArch64";
   let OperandType = "OPERAND_IMPLICIT_IMM_0";
+  let DecoderMethod = "DecodeZeroImm";
 }
 
 def uimm0s4range : Operand<i64>, ImmLeaf<i64,
@@ -1691,6 +1692,7 @@ def uimm0s4range : Operand<i64>, ImmLeaf<i64,
   let ParserMatchClass = UImm0s4RangeOperand;
   let OperandNamespace = "AArch64";
   let OperandType = "OPERAND_IMPLICIT_IMM_0";
+  let DecoderMethod = "DecodeZeroImm";
 }
 
 def uimm1s2range : Operand<i64>, ImmLeaf<i64,
@@ -8220,18 +8222,23 @@ multiclass SMov {
   // streaming mode.
   let Predicates = [HasNEONandIsStreamingSafe] in {
     def vi8to32_idx0 : SIMDSMov<0, ".b", GPR32, VectorIndex0> {
+      bits<0> idx;
       let Inst{20-16} = 0b00001;
     }
     def vi8to64_idx0 : SIMDSMov<1, ".b", GPR64, VectorIndex0> {
+      bits<0> idx;
       let Inst{20-16} = 0b00001;
     }
     def vi16to32_idx0 : SIMDSMov<0, ".h", GPR32, VectorIndex0> {
+      bits<0> idx;
       let Inst{20-16} = 0b00010;
     }
     def vi16to64_idx0 : SIMDSMov<1, ".h", GPR64, VectorIndex0> {
+      bits<0> idx;
       let Inst{20-16} = 0b00010;
     }
     def vi32to64_idx0 : SIMDSMov<1, ".s", GPR64, VectorIndex0> {
+      bits<0> idx;
       let Inst{20-16} = 0b00100;
     }
   }
@@ -8267,15 +8274,19 @@ multiclass UMov {
   // streaming mode.
   let Predicates = [HasNEONandIsStreamingSafe] in {
     def vi8_idx0 : SIMDUMov<0, ".b", v16i8, GPR32, VectorIndex0> {
+      bits<0> idx;
       let Inst{20-16} = 0b00001;
     }
     def vi16_idx0 : SIMDUMov<0, ".h", v8i16, GPR32, VectorIndex0> {
+      bits<0> idx;
       let Inst{20-16} = 0b00010;
     }
     def vi32_idx0 : SIMDUMov<0, ".s", v4i32, GPR32, VectorIndex0> {
+      bits<0> idx;
       let Inst{20-16} = 0b00100;
     }
     def vi64_idx0 : SIMDUMov<1, ".d", v2i64, GPR64, VectorIndex0> {
+      bits<0> idx;
       let Inst{20-16} = 0b01000;
     }
     def : SIMDMovAlias<"mov", ".s",
 
@@ -1875,16 +1875,15 @@ class MatrixTileAsmOperand<string RC, int EltSize> : AsmOperandClass {
                           # EltSize # ", AArch64::" # RC # "RegClassID>";
 }
 
-class MatrixTileOperand<int EltSize, int NumBitsForTile, RegisterClass RC>
+class MatrixTileOperand<int EltSize, RegisterClass RC>
     : RegisterOperand<RC> {
   let ParserMatchClass = MatrixTileAsmOperand<!cast<string>(RC), EltSize>;
-  let DecoderMethod = "DecodeMatrixTile<" # NumBitsForTile # ">";
   let PrintMethod = "printMatrixTile";
 }
 
-def TileOp16  : MatrixTileOperand<16, 1, MPR16>;
-def TileOp32  : MatrixTileOperand<32, 2, MPR32>;
-def TileOp64  : MatrixTileOperand<64, 3, MPR64>;
+def TileOp16  : MatrixTileOperand<16, MPR16>;
+def TileOp32  : MatrixTileOperand<32, MPR32>;
+def TileOp64  : MatrixTileOperand<64, MPR64>;
 
 //
 // Tile vectors (horizontal and vertical)
@@ -1902,26 +1901,24 @@ class MatrixTileVectorAsmOperand<string RC, int EltSize, int IsVertical>
                           # EltSize # ", AArch64::" # RC # "RegClassID>";
 }
 
-class MatrixTileVectorOperand<int EltSize, int NumBitsForTile,
-                              RegisterClass RC, int IsVertical>
+class MatrixTileVectorOperand<int EltSize, RegisterClass RC, int IsVertical>
     : RegisterOperand<RC> {
   let ParserMatchClass = MatrixTileVectorAsmOperand<!cast<string>(RC), EltSize,
                                                     IsVertical>;
-  let DecoderMethod = "DecodeMatrixTile<" # NumBitsForTile # ">";
   let PrintMethod = "printMatrixTileVector<" # IsVertical # ">";
 }
 
-def TileVectorOpH8   : MatrixTileVectorOperand<  8, 0, MPR8,   0>;
-def TileVectorOpH16  : MatrixTileVectorOperand< 16, 1, MPR16,  0>;
-def TileVectorOpH32  : MatrixTileVectorOperand< 32, 2, MPR32,  0>;
-def TileVectorOpH64  : MatrixTileVectorOperand< 64, 3, MPR64,  0>;
-def TileVectorOpH128 : MatrixTileVectorOperand<128, 4, MPR128, 0>;
+def TileVectorOpH8   : MatrixTileVectorOperand<  8, MPR8,   0>;
+def TileVectorOpH16  : MatrixTileVectorOperand< 16, MPR16,  0>;
+def TileVectorOpH32  : MatrixTileVectorOperand< 32, MPR32,  0>;
+def TileVectorOpH64  : MatrixTileVectorOperand< 64, MPR64,  0>;
+def TileVectorOpH128 : MatrixTileVectorOperand<128, MPR128, 0>;
 
-def TileVectorOpV8   : MatrixTileVectorOperand<  8, 0, MPR8,   1>;
-def TileVectorOpV16  : MatrixTileVectorOperand< 16, 1, MPR16,  1>;
-def TileVectorOpV32  : MatrixTileVectorOperand< 32, 2, MPR32,  1>;
-def TileVectorOpV64  : MatrixTileVectorOperand< 64, 3, MPR64,  1>;
-def TileVectorOpV128 : MatrixTileVectorOperand<128, 4, MPR128, 1>;
+def TileVectorOpV8   : MatrixTileVectorOperand<  8, MPR8,   1>;
+def TileVectorOpV16  : MatrixTileVectorOperand< 16, MPR16,  1>;
+def TileVectorOpV32  : MatrixTileVectorOperand< 32, MPR32,  1>;
+def TileVectorOpV64  : MatrixTileVectorOperand< 64, MPR64,  1>;
+def TileVectorOpV128 : MatrixTileVectorOperand<128, MPR128, 1>;
 
 //
 // Accumulator matrix
 
@@ -7,8 +7,7 @@ tablegen(LLVM AArch64GenAsmWriter.inc -gen-asm-writer)
 tablegen(LLVM AArch64GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
 tablegen(LLVM AArch64GenCallingConv.inc -gen-callingconv)
 tablegen(LLVM AArch64GenDAGISel.inc -gen-dag-isel)
-tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler
-              -ignore-non-decodable-operands)
+tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler)
 tablegen(LLVM AArch64GenFastISel.inc -gen-fast-isel)
 tablegen(LLVM AArch64GenGlobalISel.inc -gen-global-isel)
 tablegen(LLVM AArch64GenO0PreLegalizeGICombiner.inc -gen-global-isel-combiner
 
@@ -130,26 +130,57 @@ DecodeMatrixTileListRegisterClass(MCInst &Inst, unsigned RegMask,
   return Success;
 }
 
-static const MCPhysReg MatrixZATileDecoderTable[5][16] = {
-    {AArch64::ZAB0},
-    {AArch64::ZAH0, AArch64::ZAH1},
-    {AArch64::ZAS0, AArch64::ZAS1, AArch64::ZAS2, AArch64::ZAS3},
-    {AArch64::ZAD0, AArch64::ZAD1, AArch64::ZAD2, AArch64::ZAD3, AArch64::ZAD4,
-     AArch64::ZAD5, AArch64::ZAD6, AArch64::ZAD7},
-    {AArch64::ZAQ0, AArch64::ZAQ1, AArch64::ZAQ2, AArch64::ZAQ3, AArch64::ZAQ4,
-     AArch64::ZAQ5, AArch64::ZAQ6, AArch64::ZAQ7, AArch64::ZAQ8, AArch64::ZAQ9,
-     AArch64::ZAQ10, AArch64::ZAQ11, AArch64::ZAQ12, AArch64::ZAQ13,
-     AArch64::ZAQ14, AArch64::ZAQ15}};
-
-template <unsigned NumBitsForTile>
-static DecodeStatus DecodeMatrixTile(MCInst &Inst, unsigned RegNo,
-                                     uint64_t Address,
-                                     const MCDisassembler *Decoder) {
-  unsigned LastReg = (1 << NumBitsForTile) - 1;
-  if (RegNo > LastReg)
-    return Fail;
-  Inst.addOperand(
-      MCOperand::createReg(MatrixZATileDecoderTable[NumBitsForTile][RegNo]));
+static DecodeStatus DecodeZTRRegisterClass(MCInst &Inst,
+                                           const MCDisassembler *Decoder) {
+  Inst.addOperand(MCOperand::createReg(AArch64::ZT0));
+  return Success;
+}
+
+static DecodeStatus DecodeMPRRegisterClass(MCInst &Inst,
+                                           const MCDisassembler *Decoder) {
+  Inst.addOperand(MCOperand::createReg(AArch64::ZA));
+  return Success;
+}
+
+static DecodeStatus DecodeMPR8RegisterClass(MCInst &Inst,
+                                            const MCDisassembler *Decoder) {
+  Inst.addOperand(MCOperand::createReg(AArch64::ZAB0));
+  return Success;
+}
+
+static DecodeStatus DecodeMPR16RegisterClass(MCInst &Inst, unsigned RegNo,
+                                             uint64_t Address,
+                                             const MCDisassembler *Decoder) {
+  MCRegister Reg =
+      AArch64MCRegisterClasses[AArch64::MPR16RegClassID].getRegister(RegNo);
+  Inst.addOperand(MCOperand::createReg(Reg));
+  return Success;
+}
+
+static DecodeStatus DecodeMPR32RegisterClass(MCInst &Inst, unsigned RegNo,
+                                             uint64_t Address,
+                                             const MCDisassembler *Decoder) {
+  MCRegister Reg =
+      AArch64MCRegisterClasses[AArch64::MPR32RegClassID].getRegister(RegNo);
+  Inst.addOperand(MCOperand::createReg(Reg));
+  return Success;
+}
+
+static DecodeStatus DecodeMPR64RegisterClass(MCInst &Inst, unsigned RegNo,
+                                             uint64_t Address,
+                                             const MCDisassembler *Decoder) {
+  MCRegister Reg =
+      AArch64MCRegisterClasses[AArch64::MPR64RegClassID].getRegister(RegNo);
+  Inst.addOperand(MCOperand::createReg(Reg));
+  return Success;
+}
+
+static DecodeStatus DecodeMPR128RegisterClass(MCInst &Inst, unsigned RegNo,
+                                              uint64_t Address,
+                                              const MCDisassembler *Decoder) {
+  MCRegister Reg =
+      AArch64MCRegisterClasses[AArch64::MPR128RegClassID].getRegister(RegNo);
+  Inst.addOperand(MCOperand::createReg(Reg));
   return Success;
 }
 
@@ -1392,6 +1423,11 @@ DecodeSVELogicalImmInstruction(MCInst &Inst, uint32_t insn, uint64_t Addr,
   return Success;
 }
 
+static DecodeStatus DecodeZeroImm(MCInst &Inst, const MCDisassembler *Decoder) {
+  Inst.addOperand(MCOperand::createImm(0));
+  return Success;
+}
+
 template <int Bits>
 static DecodeStatus DecodeSImm(MCInst &Inst, uint64_t Imm, uint64_t Address,
                                const MCDisassembler *Decoder) {
@@ -1540,6 +1576,7 @@ DecodeSMESpillFillInstruction(MCInst &Inst, uint32_t Bits, uint64_t Addr,
   unsigned RnBits = fieldFromInstruction(Bits, 5, 5);
   unsigned Imm4Bits = fieldFromInstruction(Bits, 0, 4);
 
+  DecodeMPRRegisterClass(Inst, Decoder);
   DecodeSimpleRegisterClass<AArch64::MatrixIndexGPR32_12_15RegClassID, 0, 4>(
       Inst, RvBits, Addr, Decoder);
   Inst.addOperand(MCOperand::createImm(Imm4Bits));
@@ -1583,33 +1620,6 @@ DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
   for (const auto *Table : Tables) {
     DecodeStatus Result =
         decodeInstruction(Table, MI, Insn, Address, this, STI);
-
-    const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
-
-    // For Scalable Matrix Extension (SME) instructions that have an implicit
-    // operand for the accumulator (ZA) or implicit immediate zero which isn't
-    // encoded, manually insert operand.
-    for (unsigned i = 0; i < Desc.getNumOperands(); i++) {
-      if (Desc.operands()[i].OperandType == MCOI::OPERAND_REGISTER) {
-        switch (Desc.operands()[i].RegClass) {
-        default:
-          break;
-        case AArch64::MPRRegClassID:
-          MI.insert(MI.begin() + i, MCOperand::createReg(AArch64::ZA));
-          break;
-        case AArch64::MPR8RegClassID:
-          MI.insert(MI.begin() + i, MCOperand::createReg(AArch64::ZAB0));
-          break;
-        case AArch64::ZTRRegClassID:
-          MI.insert(MI.begin() + i, MCOperand::createReg(AArch64::ZT0));
-          break;
-        }
-      } else if (Desc.operands()[i].OperandType ==
-                 AArch64::OPERAND_IMPLICIT_IMM_0) {
-        MI.insert(MI.begin() + i, MCOperand::createImm(0));
-      }
-    }
-
     if (Result != MCDisassembler::Fail)
       return Result;
   }