Skip to content

Commit 8b3326f

Browse files
author
z1_cciauto
authored
merge main into amd-staging (llvm#4281)
2 parents 743d99d + a76ac7e commit 8b3326f

File tree

74 files changed

+2264
-1080
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

74 files changed

+2264
-1080
lines changed

clang/include/clang/Basic/BuiltinsPPC.def

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1105,6 +1105,13 @@ UNALIASED_CUSTOM_BUILTIN(mma_disassemble_dmr, "vv*W1024*", false,
11051105
UNALIASED_CUSTOM_BUILTIN(mma_build_dmr, "vW1024*VVVVVVVV", false,
11061106
"mma,isa-future-instructions")
11071107

1108+
UNALIASED_CUSTOM_BUILTIN(mma_dmsha2hash, "vW1024*W1024*Ii", true,
1109+
"mma,isa-future-instructions")
1110+
UNALIASED_CUSTOM_BUILTIN(mma_dmsha3hash, "vW2048*Ii", true,
1111+
"mma,isa-future-instructions")
1112+
UNALIASED_CUSTOM_BUILTIN(mma_dmxxshapad, "vW1024*VIiIiIi", true,
1113+
"mma,isa-future-instructions")
1114+
11081115
// MMA builtins with positive/negative multiply/accumulate.
11091116
UNALIASED_CUSTOM_MMA_BUILTIN(mma_xvf16ger2, "vW512*VV",
11101117
"mma,paired-vector-memops")

clang/include/clang/Basic/PPCTypes.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#endif
3131

3232

33+
PPC_VECTOR_MMA_TYPE(__dmr2048, DMR2048, 2048)
3334
PPC_VECTOR_MMA_TYPE(__dmr1024, DMR1024, 1024)
3435
PPC_VECTOR_MMA_TYPE(__vector_quad, VectorQuad, 512)
3536
PPC_VECTOR_VSX_TYPE(__vector_pair, VectorPair, 256)

clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,13 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
243243
return cir::AllocaOp::create(*this, loc, addrType, type, name, alignment);
244244
}
245245

246+
mlir::Value createAlloca(mlir::Location loc, cir::PointerType addrType,
247+
mlir::Type type, llvm::StringRef name,
248+
clang::CharUnits alignment) {
249+
mlir::IntegerAttr alignmentAttr = getAlignmentAttr(alignment);
250+
return createAlloca(loc, addrType, type, name, alignmentAttr);
251+
}
252+
246253
/// Get constant address of a global variable as an MLIR attribute.
247254
/// This wrapper infers the attribute type through the global op.
248255
cir::GlobalViewAttr getGlobalViewAttr(cir::GlobalOp globalOp,

clang/include/clang/Serialization/ASTBitCodes.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1160,7 +1160,7 @@ enum PredefinedTypeIDs {
11601160
///
11611161
/// Type IDs for non-predefined types will start at
11621162
/// NUM_PREDEF_TYPE_IDs.
1163-
const unsigned NUM_PREDEF_TYPE_IDS = 514;
1163+
const unsigned NUM_PREDEF_TYPE_IDS = 515;
11641164

11651165
// Ensure we do not overrun the predefined types we reserved
11661166
// in the enum PredefinedTypeIDs above.

clang/lib/AST/ASTContext.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3506,6 +3506,7 @@ static void encodeTypeForFunctionPointerAuth(const ASTContext &Ctx,
35063506
case BuiltinType::VectorQuad:
35073507
case BuiltinType::VectorPair:
35083508
case BuiltinType::DMR1024:
3509+
case BuiltinType::DMR2048:
35093510
OS << "?";
35103511
return;
35113512

clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp

Lines changed: 111 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13+
#include <numeric>
14+
1315
#include "CIRGenOpenACCRecipe.h"
1416

1517
namespace clang::CIRGen {
@@ -35,6 +37,110 @@ mlir::Block *OpenACCRecipeBuilderBase::createRecipeBlock(mlir::Region &region,
3537
return builder.createBlock(&region, region.end(), types, locs);
3638
}
3739

40+
mlir::Value OpenACCRecipeBuilderBase::makeBoundsAlloca(
41+
mlir::Block *block, SourceRange exprRange, mlir::Location loc,
42+
std::string_view allocaName, size_t numBounds,
43+
llvm::ArrayRef<QualType> boundTypes) {
44+
mlir::OpBuilder::InsertionGuard guardCase(builder);
45+
46+
// Get the range of bounds arguments, which are all but the 1st arg.
47+
llvm::ArrayRef<mlir::BlockArgument> boundsRange =
48+
block->getArguments().drop_front(1);
49+
50+
// boundTypes contains the before and after of each bounds, so it ends up
51+
// having 1 extra. Assert this is the case to ensure we don't call this in the
52+
// wrong 'block'.
53+
assert(boundsRange.size() + 1 == boundTypes.size());
54+
55+
mlir::Type itrTy = cgf.cgm.convertType(cgf.getContext().UnsignedLongLongTy);
56+
auto idxType = mlir::IndexType::get(&cgf.getMLIRContext());
57+
58+
auto getUpperBound = [&](mlir::Value bound) {
59+
auto upperBoundVal =
60+
mlir::acc::GetUpperboundOp::create(builder, loc, idxType, bound);
61+
return mlir::UnrealizedConversionCastOp::create(builder, loc, itrTy,
62+
upperBoundVal.getResult())
63+
.getResult(0);
64+
};
65+
66+
auto isArrayTy = [&](QualType ty) {
67+
if (ty->isArrayType() && !ty->isConstantArrayType())
68+
cgf.cgm.errorNYI(exprRange, "OpenACC recipe init for VLAs");
69+
return ty->isConstantArrayType();
70+
};
71+
72+
mlir::Type topLevelTy = cgf.convertType(boundTypes.back());
73+
cir::PointerType topLevelTyPtr = builder.getPointerTo(topLevelTy);
74+
// Do an alloca for the 'top' level type without bounds.
75+
mlir::Value initialAlloca = builder.createAlloca(
76+
loc, topLevelTyPtr, topLevelTy, allocaName,
77+
cgf.getContext().getTypeAlignInChars(boundTypes.back()));
78+
79+
bool lastBoundWasArray = isArrayTy(boundTypes.back());
80+
81+
// Since we're iterating the types in reverse, this sets up for each index
82+
// corresponding to the boundsRange to be the 'after application of the
83+
// bounds.
84+
llvm::ArrayRef<QualType> boundResults = boundTypes.drop_back(1);
85+
86+
// Collect the 'do we have any allocas needed after this type' list.
87+
llvm::SmallVector<bool> allocasLeftArr;
88+
llvm::ArrayRef<QualType> resultTypes = boundTypes.drop_front();
89+
std::transform_inclusive_scan(
90+
resultTypes.begin(), resultTypes.end(),
91+
std::back_inserter(allocasLeftArr), std::plus<bool>{},
92+
[](QualType ty) { return !ty->isConstantArrayType(); });
93+
94+
// Keep track of the number of 'elements' that we're allocating. Individual
95+
// allocas should multiply this by the size of its current allocation.
96+
mlir::Value cumulativeElts;
97+
for (auto [bound, resultType, allocasLeft] : llvm::reverse(
98+
llvm::zip_equal(boundsRange, boundResults, allocasLeftArr))) {
99+
100+
// if there is no further 'alloca' operation we need to do, we can skip
101+
// creating the UB/multiplications/etc.
102+
if (!allocasLeft)
103+
break;
104+
105+
// First: figure out the number of elements in the current 'bound' list.
106+
mlir::Value eltsPerSubArray = getUpperBound(bound);
107+
mlir::Value eltsToAlloca;
108+
109+
// IF we are in a sub-bounds, the total number of elements to alloca is
110+
// the product of that one and the current 'bounds' size. That is,
111+
// arr[5][5], we would need 25 elements, not just 5. Else it is just the
112+
// current number of elements.
113+
if (cumulativeElts)
114+
eltsToAlloca = builder.createMul(loc, eltsPerSubArray, cumulativeElts);
115+
else
116+
eltsToAlloca = eltsPerSubArray;
117+
118+
if (!lastBoundWasArray) {
119+
// If we have to do an allocation, figure out the size of the
120+
// allocation. alloca takes the number of bytes, not elements.
121+
TypeInfoChars eltInfo = cgf.getContext().getTypeInfoInChars(resultType);
122+
cir::ConstantOp eltSize = builder.getConstInt(
123+
loc, itrTy, eltInfo.Width.alignTo(eltInfo.Align).getQuantity());
124+
mlir::Value curSize = builder.createMul(loc, eltsToAlloca, eltSize);
125+
126+
mlir::Type eltTy = cgf.convertType(resultType);
127+
cir::PointerType ptrTy = builder.getPointerTo(eltTy);
128+
builder.createAlloca(loc, ptrTy, eltTy, "openacc.init.bounds",
129+
cgf.getContext().getTypeAlignInChars(resultType),
130+
curSize);
131+
132+
// TODO: OpenACC : At this point we should be copying the addresses of
133+
// each element of this to the last allocation. At the moment, that is
134+
// not yet implemented.
135+
cgf.cgm.errorNYI(exprRange, "OpenACC recipe alloca copying");
136+
}
137+
138+
cumulativeElts = eltsToAlloca;
139+
lastBoundWasArray = isArrayTy(resultType);
140+
}
141+
return initialAlloca;
142+
}
143+
38144
mlir::Value
39145
OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue,
40146
mlir::Value bound,
@@ -258,7 +364,11 @@ void OpenACCRecipeBuilderBase::createPrivateInitRecipe(
258364
cgf.emitAutoVarAlloca(*allocaDecl, builder.saveInsertionPoint());
259365
cgf.emitAutoVarInit(tempDeclEmission);
260366
} else {
261-
cgf.cgm.errorNYI(exprRange, "private-init with bounds");
367+
makeBoundsAlloca(block, exprRange, loc, "openacc.private.init", numBounds,
368+
boundTypes);
369+
370+
if (initExpr)
371+
cgf.cgm.errorNYI(exprRange, "private-init with bounds initialization");
262372
}
263373

264374
mlir::acc::YieldOp::create(builder, locEnd);

clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,13 @@
2424

2525
namespace clang::CIRGen {
2626
class OpenACCRecipeBuilderBase {
27+
// This function generates the required alloca, similar to
28+
// 'emitAutoVarAlloca', except for the OpenACC array/pointer types.
29+
mlir::Value makeBoundsAlloca(mlir::Block *block, SourceRange exprRange,
30+
mlir::Location loc, std::string_view allocaName,
31+
size_t numBounds,
32+
llvm::ArrayRef<QualType> boundTypes);
33+
2734
protected:
2835
CIRGen::CIRGenFunction &cgf;
2936
CIRGen::CIRGenBuilderTy &builder;
@@ -165,28 +172,9 @@ class OpenACCRecipeBuilder : OpenACCRecipeBuilderBase {
165172
cgf.emitAutoVarAlloca(*varRecipe, builder.saveInsertionPoint());
166173

167174
// 'firstprivate' doesn't do its initialization in the 'init' section,
168-
// instead does it in the 'copy' section. SO only do init here.
169-
// 'reduction' appears to use it too (rather than a 'copy' section), so
170-
// we probably have to do it here too, but we can do that when we get to
171-
// reduction implementation.
172-
if constexpr (std::is_same_v<RecipeTy, mlir::acc::PrivateRecipeOp>) {
173-
// We are OK with no init for builtins, arrays of builtins, or pointers,
174-
// else we should NYI so we know to go look for these.
175-
if (cgf.getContext().getLangOpts().CPlusPlus &&
176-
!varRecipe->getType()
177-
->getPointeeOrArrayElementType()
178-
->isBuiltinType() &&
179-
!varRecipe->getType()->isPointerType() && !varRecipe->getInit()) {
180-
// If we don't have any initialization recipe, we failed during Sema to
181-
// initialize this correctly. If we disable the
182-
// Sema::TentativeAnalysisScopes in SemaOpenACC::CreateInitRecipe, it'll
183-
// emit an error to tell us. However, emitting those errors during
184-
// production is a violation of the standard, so we cannot do them.
185-
cgf.cgm.errorNYI(exprRange, "private default-init recipe");
186-
}
187-
cgf.emitAutoVarInit(tempDeclEmission);
188-
} else if constexpr (std::is_same_v<RecipeTy,
189-
mlir::acc::ReductionRecipeOp>) {
175+
// instead it does it in the 'copy' section. SO, only do 'init' here for
176+
// reduction.
177+
if constexpr (std::is_same_v<RecipeTy, mlir::acc::ReductionRecipeOp>) {
190178
// Unlike Private, the recipe here is always required as it has to do
191179
// init, not just 'default' init.
192180
if (!varRecipe->getInit())

clang/lib/CodeGen/TargetBuiltins/PPC.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1153,7 +1153,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
11531153
}
11541154
if (BuiltinID == PPC::BI__builtin_mma_dmmr ||
11551155
BuiltinID == PPC::BI__builtin_mma_dmxor ||
1156-
BuiltinID == PPC::BI__builtin_mma_disassemble_dmr) {
1156+
BuiltinID == PPC::BI__builtin_mma_disassemble_dmr ||
1157+
BuiltinID == PPC::BI__builtin_mma_dmsha2hash) {
11571158
Address Addr = EmitPointerWithAlignment(E->getArg(1));
11581159
Ops[1] = Builder.CreateLoad(Addr);
11591160
}

clang/lib/StaticAnalyzer/Checkers/AnalysisOrderChecker.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,8 @@ class AnalysisOrderChecker
129129
llvm::errs() << " {argno: " << Call.getNumArgs() << '}';
130130
llvm::errs() << " [" << Call.getKindAsString() << ']';
131131
llvm::errs() << '\n';
132-
return true;
132+
// We can't return `true` from this callback without binding the return
133+
// value. Let's just fallthrough here and return `false`.
133134
}
134135
return false;
135136
}

clang/lib/StaticAnalyzer/Checkers/CheckerDocumentation.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,15 @@ class CheckerDocumentation
262262
/// state. This callback allows a checker to provide domain specific knowledge
263263
/// about the particular functions it knows about.
264264
///
265+
/// Note that to evaluate a call, the handler MUST bind the return value if
266+
/// its a non-void function. Invalidate the arguments if necessary.
267+
///
268+
/// Note that in general, user-provided functions should not be eval-called
269+
/// because the checker can't predict the exact semantics/contract of the
270+
/// callee, and by having the eval::Call callback, we also prevent it from
271+
/// getting inlined, potentially regressing analysis quality.
272+
/// Consider using check::PreCall or check::PostCall to allow inlining.
273+
///
265274
/// \returns true if the call has been successfully evaluated
266275
/// and false otherwise. Note, that only one checker can evaluate a call. If
267276
/// more than one checker claims that they can evaluate the same call the

0 commit comments

Comments
 (0)