Skip to content

Commit 4a62f0a

Browse files
Merge branch 'main' into main
2 parents 298aa73 + ea9ec7c commit 4a62f0a

File tree

10 files changed

+128
-101
lines changed

10 files changed

+128
-101
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19280,13 +19280,58 @@ static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG,
1928019280
return MatPCRel;
1928119281
}
1928219282

19283+
// Transform (add X, (build_vector (T 1), (T 1), ...)) -> (sub X, (XXLEQVOnes))
19284+
// XXLEQVOnes creates an all-1s vector (0xFFFFFFFF...) efficiently via xxleqv
19285+
// Mathematical identity: X + 1 = X - (-1)
19286+
// Applies to v4i32, v2i64, v8i16, v16i8 where all elements are constant 1
19287+
// Requirement: VSX feature for efficient xxleqv generation
19288+
static SDValue combineADDToSUB(SDNode *N, SelectionDAG &DAG,
19289+
const PPCSubtarget &Subtarget) {
19290+
19291+
EVT VT = N->getValueType(0);
19292+
if (!Subtarget.hasVSX())
19293+
return SDValue();
19294+
19295+
// Handle v2i64, v4i32, v8i16 and v16i8 types
19296+
if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
19297+
VT == MVT::v2i64))
19298+
return SDValue();
19299+
19300+
SDValue LHS = N->getOperand(0);
19301+
SDValue RHS = N->getOperand(1);
19302+
19303+
// Check if RHS is BUILD_VECTOR
19304+
if (RHS.getOpcode() != ISD::BUILD_VECTOR)
19305+
return SDValue();
19306+
19307+
// Check if all the elements are 1
19308+
unsigned NumOfEles = RHS.getNumOperands();
19309+
for (unsigned i = 0; i < NumOfEles; ++i) {
19310+
auto *CN = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
19311+
if (!CN || CN->getSExtValue() != 1)
19312+
return SDValue();
19313+
}
19314+
SDLoc DL(N);
19315+
19316+
SDValue MinusOne = DAG.getConstant(APInt::getAllOnes(32), DL, MVT::i32);
19317+
SmallVector<SDValue, 4> Ops(4, MinusOne);
19318+
SDValue AllOnesVec = DAG.getBuildVector(MVT::v4i32, DL, Ops);
19319+
19320+
// Bitcast to the target vector type
19321+
SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT, AllOnesVec);
19322+
19323+
return DAG.getNode(ISD::SUB, DL, VT, LHS, Bitcast);
19324+
}
19325+
1928319326
SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
1928419327
if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
1928519328
return Value;
1928619329

1928719330
if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
1928819331
return Value;
1928919332

19333+
if (auto Value = combineADDToSUB(N, DCI.DAG, Subtarget))
19334+
return Value;
1929019335
return SDValue();
1929119336
}
1929219337

llvm/test/CodeGen/PowerPC/addition-vector-all-ones.ll

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,14 @@
88
; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr9 -mtriple=powerpc-ibm-aix \
99
; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s
1010

11-
; The addition of vector `A` with vector of 1s currently uses `vspltisw` to generate vector of 1s followed by add operation.
11+
; Optimized version which `xxleqv` and `vsubu` to generate vector of -1s to leverage the identity A - (-1) = A + 1.
1212

1313
; Function for the vector type v2i64 `a + {1, 1}`
1414
define <2 x i64> @test_v2i64(<2 x i64> %a) {
1515
; CHECK-LABEL: test_v2i64:
1616
; CHECK: # %bb.0: # %entry
17-
; CHECK-NEXT: vspltisw v3, 1
18-
; CHECK-NEXT: vupklsw v3, v3
19-
; CHECK-NEXT: vaddudm v2, v2, v3
17+
; CHECK-NEXT: xxleqv v3, v3, v3
18+
; CHECK-NEXT: vsubudm v2, v2, v3
2019
; CHECK-NEXT: blr
2120
entry:
2221
%add = add <2 x i64> %a, splat (i64 1)
@@ -27,8 +26,8 @@ entry:
2726
define <4 x i32> @test_v4i32(<4 x i32> %a) {
2827
; CHECK-LABEL: test_v4i32:
2928
; CHECK: # %bb.0: # %entry
30-
; CHECK-NEXT: vspltisw v3, 1
31-
; CHECK-NEXT: vadduwm v2, v2, v3
29+
; CHECK-NEXT: xxleqv v3, v3, v3
30+
; CHECK-NEXT: vsubuwm v2, v2, v3
3231
; CHECK-NEXT: blr
3332
entry:
3433
%add = add <4 x i32> %a, splat (i32 1)
@@ -39,8 +38,8 @@ entry:
3938
define <8 x i16> @test_v8i16(<8 x i16> %a) {
4039
; CHECK-LABEL: test_v8i16:
4140
; CHECK: # %bb.0: # %entry
42-
; CHECK-NEXT: vspltish v3, 1
43-
; CHECK-NEXT: vadduhm v2, v2, v3
41+
; CHECK-NEXT: xxleqv v3, v3, v3
42+
; CHECK-NEXT: vsubuhm v2, v2, v3
4443
; CHECK-NEXT: blr
4544
entry:
4645
%add = add <8 x i16> %a, splat (i16 1)
@@ -51,8 +50,8 @@ entry:
5150
define <16 x i8> @test_16i8(<16 x i8> %a) {
5251
; CHECK-LABEL: test_16i8:
5352
; CHECK: # %bb.0: # %entry
54-
; CHECK-NEXT: xxspltib v3, 1
55-
; CHECK-NEXT: vaddubm v2, v2, v3
53+
; CHECK-NEXT: xxleqv v3, v3, v3
54+
; CHECK-NEXT: vsububm v2, v2, v3
5655
; CHECK-NEXT: blr
5756
entry:
5857
%add = add <16 x i8> %a, splat (i8 1)

llvm/test/CodeGen/PowerPC/vec_add_sub_doubleword.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,8 @@ define <2 x i64> @test_add(<2 x i64> %x, <2 x i64> %y) nounwind {
1616
define <2 x i64> @increment_by_one(<2 x i64> %x) nounwind {
1717
; VSX-LABEL: increment_by_one:
1818
; VSX: # %bb.0:
19-
; VSX-NEXT: vspltisw 3, 1
20-
; VSX-NEXT: vupklsw 3, 3
21-
; VSX-NEXT: vaddudm 2, 2, 3
19+
; VSX-NEXT: xxleqv 35, 35, 35
20+
; VSX-NEXT: vsubudm 2, 2, 3
2221
; VSX-NEXT: blr
2322
;
2423
; NOVSX-LABEL: increment_by_one:

orc-rt/include/orc-rt-c/WrapperFunction.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ typedef struct {
5454
* Asynchronous return function for an orc-rt wrapper function.
5555
*/
5656
typedef void (*orc_rt_WrapperFunctionReturn)(
57-
orc_rt_SessionRef Session, uint64_t CallId,
57+
orc_rt_SessionRef S, uint64_t CallId,
5858
orc_rt_WrapperFunctionBuffer ResultBytes);
5959

6060
/**
@@ -65,8 +65,7 @@ typedef void (*orc_rt_WrapperFunctionReturn)(
6565
* CallId holds a pointer to the context object for this particular call.
6666
* Return holds a pointer to the return function.
6767
*/
68-
typedef void (*orc_rt_WrapperFunction)(orc_rt_SessionRef Session,
69-
uint64_t CallId,
68+
typedef void (*orc_rt_WrapperFunction)(orc_rt_SessionRef S, uint64_t CallId,
7069
orc_rt_WrapperFunctionReturn Return,
7170
orc_rt_WrapperFunctionBuffer ArgBytes);
7271

orc-rt/include/orc-rt/SPSWrapperFunction.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,10 +124,10 @@ template <typename SPSSig> struct SPSWrapperFunction {
124124
}
125125

126126
template <typename Handler>
127-
static void handle(orc_rt_SessionRef Session, uint64_t CallId,
127+
static void handle(orc_rt_SessionRef S, uint64_t CallId,
128128
orc_rt_WrapperFunctionReturn Return,
129129
WrapperFunctionBuffer ArgBytes, Handler &&H) {
130-
WrapperFunction::handle(Session, CallId, Return, std::move(ArgBytes),
130+
WrapperFunction::handle(S, CallId, Return, std::move(ArgBytes),
131131
WrapperFunctionSPSSerializer<SPSSig>(),
132132
std::forward<Handler>(H));
133133
}

orc-rt/include/orc-rt/SimpleNativeMemoryMap.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -114,21 +114,21 @@ class SimpleNativeMemoryMap : public ResourceManager {
114114
} // namespace orc_rt
115115

116116
ORC_RT_SPS_INTERFACE void orc_rt_SimpleNativeMemoryMap_reserve_sps_wrapper(
117-
orc_rt_SessionRef Session, uint64_t CallId,
118-
orc_rt_WrapperFunctionReturn Return, orc_rt_WrapperFunctionBuffer ArgBytes);
117+
orc_rt_SessionRef S, uint64_t CallId, orc_rt_WrapperFunctionReturn Return,
118+
orc_rt_WrapperFunctionBuffer ArgBytes);
119119

120120
ORC_RT_SPS_INTERFACE void
121121
orc_rt_SimpleNativeMemoryMap_releaseMultiple_sps_wrapper(
122-
orc_rt_SessionRef Session, uint64_t CallId,
123-
orc_rt_WrapperFunctionReturn Return, orc_rt_WrapperFunctionBuffer ArgBytes);
122+
orc_rt_SessionRef S, uint64_t CallId, orc_rt_WrapperFunctionReturn Return,
123+
orc_rt_WrapperFunctionBuffer ArgBytes);
124124

125125
ORC_RT_SPS_INTERFACE void orc_rt_SimpleNativeMemoryMap_initialize_sps_wrapper(
126-
orc_rt_SessionRef Session, uint64_t CallId,
127-
orc_rt_WrapperFunctionReturn Return, orc_rt_WrapperFunctionBuffer ArgBytes);
126+
orc_rt_SessionRef S, uint64_t CallId, orc_rt_WrapperFunctionReturn Return,
127+
orc_rt_WrapperFunctionBuffer ArgBytes);
128128

129129
ORC_RT_SPS_INTERFACE void
130130
orc_rt_SimpleNativeMemoryMap_deinitializeMultiple_sps_wrapper(
131-
orc_rt_SessionRef Session, uint64_t CallId,
132-
orc_rt_WrapperFunctionReturn Return, orc_rt_WrapperFunctionBuffer ArgBytes);
131+
orc_rt_SessionRef S, uint64_t CallId, orc_rt_WrapperFunctionReturn Return,
132+
orc_rt_WrapperFunctionBuffer ArgBytes);
133133

134134
#endif // ORC_RT_SIMPLENATIVEMEMORYMAP_H

orc-rt/include/orc-rt/WrapperFunction.h

Lines changed: 24 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -137,16 +137,15 @@ using WFHandlerTraits = CallableTraitsHelper<WFHandlerTraitsImpl, C>;
137137

138138
template <typename Serializer> class StructuredYieldBase {
139139
public:
140-
StructuredYieldBase(orc_rt_SessionRef Session, uint64_t CallId,
141-
orc_rt_WrapperFunctionReturn Return, Serializer &&S)
142-
: Session(Session), CallId(CallId), Return(Return),
143-
S(std::forward<Serializer>(S)) {}
140+
StructuredYieldBase(orc_rt_SessionRef S, uint64_t CallId,
141+
orc_rt_WrapperFunctionReturn Return, Serializer &&Z)
142+
: S(S), CallId(CallId), Return(Return), Z(std::forward<Serializer>(Z)) {}
144143

145144
protected:
146-
orc_rt_SessionRef Session;
145+
orc_rt_SessionRef S;
147146
uint64_t CallId;
148147
orc_rt_WrapperFunctionReturn Return;
149-
std::decay_t<Serializer> S;
148+
std::decay_t<Serializer> Z;
150149
};
151150

152151
template <typename RetT, typename Serializer> class StructuredYield;
@@ -157,10 +156,10 @@ class StructuredYield<std::tuple<RetT>, Serializer>
157156
public:
158157
using StructuredYieldBase<Serializer>::StructuredYieldBase;
159158
void operator()(RetT &&R) {
160-
if (auto ResultBytes = this->S.result().serialize(std::forward<RetT>(R)))
161-
this->Return(this->Session, this->CallId, ResultBytes->release());
159+
if (auto ResultBytes = this->Z.result().serialize(std::forward<RetT>(R)))
160+
this->Return(this->S, this->CallId, ResultBytes->release());
162161
else
163-
this->Return(this->Session, this->CallId,
162+
this->Return(this->S, this->CallId,
164163
WrapperFunctionBuffer::createOutOfBandError(
165164
"Could not serialize wrapper function result data")
166165
.release());
@@ -173,8 +172,7 @@ class StructuredYield<std::tuple<>, Serializer>
173172
public:
174173
using StructuredYieldBase<Serializer>::StructuredYieldBase;
175174
void operator()() {
176-
this->Return(this->Session, this->CallId,
177-
WrapperFunctionBuffer().release());
175+
this->Return(this->S, this->CallId, WrapperFunctionBuffer().release());
178176
}
179177
};
180178

@@ -251,12 +249,12 @@ struct WrapperFunction {
251249
///
252250
///
253251
/// static void adder_add_async_sps_wrapper(
254-
/// orc_rt_SessionRef Session, uint64_t CallId,
252+
/// orc_rt_SessionRef S, uint64_t CallId,
255253
/// orc_rt_WrapperFunctionReturn Return,
256254
/// orc_rt_WrapperFunctionBuffer ArgBytes) {
257255
/// using SPSSig = SPSString(SPSExecutorAddr, int32_t, bool);
258256
/// SPSWrapperFunction<SPSSig>::handle(
259-
/// Session, CallId, Return, ArgBytes,
257+
/// S, CallId, Return, ArgBytes,
260258
/// WrapperFunction::handleWithAsyncMethod(&MyClass::myMethod));
261259
/// }
262260
/// @endcode
@@ -313,12 +311,12 @@ struct WrapperFunction {
313311
///
314312
///
315313
/// static void adder_add_sync_sps_wrapper(
316-
/// orc_rt_SessionRef Session, uint64_t CallId,
314+
/// orc_rt_SessionRef S, uint64_t CallId,
317315
/// orc_rt_WrapperFunctionReturn Return,
318316
/// orc_rt_WrapperFunctionBuffer ArgBytes) {
319317
/// using SPSSig = SPSString(SPSExecutorAddr, int32_t, bool);
320318
/// SPSWrapperFunction<SPSSig>::handle(
321-
/// Session, CallId, Return, ArgBytes,
319+
/// S, CallId, Return, ArgBytes,
322320
/// WrapperFunction::handleWithSyncMethod(&Adder::addSync));
323321
/// }
324322
/// @endcode
@@ -336,7 +334,7 @@ struct WrapperFunction {
336334
/// given Caller object.
337335
template <typename Caller, typename Serializer, typename ResultHandler,
338336
typename... ArgTs>
339-
static void call(Caller &&C, Serializer &&S, ResultHandler &&RH,
337+
static void call(Caller &&C, Serializer &&Z, ResultHandler &&RH,
340338
ArgTs &&...Args) {
341339
typedef CallableArgInfo<ResultHandler> ResultHandlerTraits;
342340
static_assert(std::is_void_v<typename ResultHandlerTraits::return_type>,
@@ -346,16 +344,15 @@ struct WrapperFunction {
346344
"Result-handler should have exactly one argument");
347345
typedef typename ResultHandlerTraits::args_tuple_type ResultTupleType;
348346

349-
if (auto ArgBytes = S.arguments().serialize(std::forward<ArgTs>(Args)...)) {
347+
if (auto ArgBytes = Z.arguments().serialize(std::forward<ArgTs>(Args)...)) {
350348
C(
351-
[RH = std::move(RH),
352-
S = std::move(S)](orc_rt_SessionRef Session,
353-
WrapperFunctionBuffer ResultBytes) mutable {
349+
[RH = std::move(RH), Z = std::move(Z)](
350+
orc_rt_SessionRef S, WrapperFunctionBuffer ResultBytes) mutable {
354351
if (const char *ErrMsg = ResultBytes.getOutOfBandError())
355352
RH(make_error<StringError>(ErrMsg));
356353
else
357354
RH(detail::ResultDeserializer<ResultTupleType, Serializer>::
358-
deserialize(std::move(ResultBytes), S));
355+
deserialize(std::move(ResultBytes), Z));
359356
},
360357
std::move(*ArgBytes));
361358
} else
@@ -368,9 +365,9 @@ struct WrapperFunction {
368365
/// This utility deserializes and serializes arguments and return values
369366
/// (using the given Serializer), and calls the given handler.
370367
template <typename Serializer, typename Handler>
371-
static void handle(orc_rt_SessionRef Session, uint64_t CallId,
368+
static void handle(orc_rt_SessionRef S, uint64_t CallId,
372369
orc_rt_WrapperFunctionReturn Return,
373-
WrapperFunctionBuffer ArgBytes, Serializer &&S,
370+
WrapperFunctionBuffer ArgBytes, Serializer &&Z,
374371
Handler &&H) {
375372
typedef detail::WFHandlerTraits<Handler> HandlerTraits;
376373
typedef typename HandlerTraits::ArgTupleType ArgTuple;
@@ -380,16 +377,16 @@ struct WrapperFunction {
380377
typedef typename CallableArgInfo<Yield>::args_tuple_type RetTupleType;
381378

382379
if (ArgBytes.getOutOfBandError())
383-
return Return(Session, CallId, ArgBytes.release());
380+
return Return(S, CallId, ArgBytes.release());
384381

385-
if (auto Args = S.arguments().template deserialize<ArgTuple>(ArgBytes))
382+
if (auto Args = Z.arguments().template deserialize<ArgTuple>(ArgBytes))
386383
std::apply(HandlerTraits::forwardArgsAsRequested(bind_front(
387384
std::forward<Handler>(H),
388385
detail::StructuredYield<RetTupleType, Serializer>(
389-
Session, CallId, Return, std::move(S)))),
386+
S, CallId, Return, std::move(Z)))),
390387
*Args);
391388
else
392-
Return(Session, CallId,
389+
Return(S, CallId,
393390
WrapperFunctionBuffer::createOutOfBandError(
394391
"Could not deserialize wrapper function arg data")
395392
.release());

orc-rt/lib/executor/SimpleNativeMemoryMap.cpp

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -367,45 +367,41 @@ Error SimpleNativeMemoryMap::recordDeallocActions(
367367
}
368368

369369
ORC_RT_SPS_INTERFACE void orc_rt_SimpleNativeMemoryMap_reserve_sps_wrapper(
370-
orc_rt_SessionRef Session, uint64_t CallId,
371-
orc_rt_WrapperFunctionReturn Return,
370+
orc_rt_SessionRef S, uint64_t CallId, orc_rt_WrapperFunctionReturn Return,
372371
orc_rt_WrapperFunctionBuffer ArgBytes) {
373372
using Sig = SPSExpected<SPSExecutorAddr>(SPSExecutorAddr, SPSSize);
374373
SPSWrapperFunction<Sig>::handle(
375-
Session, CallId, Return, ArgBytes,
374+
S, CallId, Return, ArgBytes,
376375
WrapperFunction::handleWithAsyncMethod(&SimpleNativeMemoryMap::reserve));
377376
}
378377

379378
ORC_RT_SPS_INTERFACE void
380379
orc_rt_SimpleNativeMemoryMap_releaseMultiple_sps_wrapper(
381-
orc_rt_SessionRef Session, uint64_t CallId,
382-
orc_rt_WrapperFunctionReturn Return,
380+
orc_rt_SessionRef S, uint64_t CallId, orc_rt_WrapperFunctionReturn Return,
383381
orc_rt_WrapperFunctionBuffer ArgBytes) {
384382
using Sig = SPSError(SPSExecutorAddr, SPSSequence<SPSExecutorAddr>);
385-
SPSWrapperFunction<Sig>::handle(Session, CallId, Return, ArgBytes,
383+
SPSWrapperFunction<Sig>::handle(S, CallId, Return, ArgBytes,
386384
WrapperFunction::handleWithAsyncMethod(
387385
&SimpleNativeMemoryMap::releaseMultiple));
388386
}
389387

390388
ORC_RT_SPS_INTERFACE void orc_rt_SimpleNativeMemoryMap_initialize_sps_wrapper(
391-
orc_rt_SessionRef Session, uint64_t CallId,
392-
orc_rt_WrapperFunctionReturn Return,
389+
orc_rt_SessionRef S, uint64_t CallId, orc_rt_WrapperFunctionReturn Return,
393390
orc_rt_WrapperFunctionBuffer ArgBytes) {
394391
using Sig = SPSExpected<SPSExecutorAddr>(
395392
SPSExecutorAddr, SPSSimpleNativeMemoryMapInitializeRequest);
396-
SPSWrapperFunction<Sig>::handle(Session, CallId, Return, ArgBytes,
393+
SPSWrapperFunction<Sig>::handle(S, CallId, Return, ArgBytes,
397394
WrapperFunction::handleWithAsyncMethod(
398395
&SimpleNativeMemoryMap::initialize));
399396
}
400397

401398
ORC_RT_SPS_INTERFACE void
402399
orc_rt_SimpleNativeMemoryMap_deinitializeMultiple_sps_wrapper(
403-
orc_rt_SessionRef Session, uint64_t CallId,
404-
orc_rt_WrapperFunctionReturn Return,
400+
orc_rt_SessionRef S, uint64_t CallId, orc_rt_WrapperFunctionReturn Return,
405401
orc_rt_WrapperFunctionBuffer ArgBytes) {
406402
using Sig = SPSError(SPSExecutorAddr, SPSSequence<SPSExecutorAddr>);
407403
SPSWrapperFunction<Sig>::handle(
408-
Session, CallId, Return, ArgBytes,
404+
S, CallId, Return, ArgBytes,
409405
WrapperFunction::handleWithAsyncMethod(
410406
&SimpleNativeMemoryMap::deinitializeMultiple));
411407
}

0 commit comments

Comments
 (0)