Skip to content

Commit 1201af1

Browse files
authored
Re-land upstream PR#146667: [flang][OpenMP] Allocate reduction init temps on the stack f or GPUs (llvm#3892)
2 parents 10773ef + a6761d7 commit 1201af1

15 files changed

+125
-116
lines changed

flang/lib/Lower/Support/PrivateReductionUtils.cpp

Lines changed: 31 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -503,31 +503,37 @@ void PopulateInitAndCleanupRegionsHelper::initAndCleanupBoxedArray(
503503

504504
// TODO: Allocate on the heap if the whole reduction/privatization is nested
505505
// inside of a loop
506-
mlir::Value tempValue;
507-
std::optional<int64_t> cstNeedsDealloc;
508-
if (isAllocatableOrPointer) {
509-
auto [heapTemp, needsDealloc] = createTempFromMold(loc, builder, source);
510-
tempValue = heapTemp;
511-
cstNeedsDealloc = fir::getIntIfConstant(needsDealloc);
512-
} else {
513-
tempValue = hlfir::createStackTempFromMold(loc, builder, source);
514-
cstNeedsDealloc = false;
515-
}
516-
hlfir::Entity temp{tempValue};
517-
518-
// if needsDealloc isn't statically false, add cleanup region. Always
519-
// do this for allocatable boxes because they might have been re-allocated
520-
// in the body of the loop/parallel region
521-
assert(cstNeedsDealloc.has_value() &&
522-
"createTempFromMold decides this statically");
523-
if (cstNeedsDealloc.has_value() && *cstNeedsDealloc != false) {
524-
mlir::OpBuilder::InsertionGuard guard(builder);
525-
createCleanupRegion(converter, loc, argType, cleanupRegion, sym,
526-
isDoConcurrent);
527-
} else {
528-
assert(!isAllocatableOrPointer &&
529-
"Pointer-like arrays must be heap allocated");
530-
}
506+
auto temp = [&]() {
507+
bool shouldAllocateOnStack = false;
508+
509+
// On the GPU, always allocate on the stack since heap allocatins are very
510+
// expensive.
511+
if (auto offloadMod = llvm::dyn_cast<mlir::omp::OffloadModuleInterface>(
512+
*builder.getModule()))
513+
shouldAllocateOnStack = offloadMod.getIsGPU();
514+
515+
if (shouldAllocateOnStack)
516+
return createStackTempFromMold(loc, builder, source);
517+
518+
auto [temp, needsDealloc] = createTempFromMold(loc, builder, source);
519+
520+
// if needsDealloc isn't statically false, add cleanup region. Always
521+
// do this for allocatable boxes because they might have been re-allocated
522+
// in the body of the loop/parallel region
523+
std::optional<int64_t> cstNeedsDealloc =
524+
fir::getIntIfConstant(needsDealloc);
525+
assert(cstNeedsDealloc.has_value() &&
526+
"createTempFromMold decides this statically");
527+
if (cstNeedsDealloc.has_value() && *cstNeedsDealloc != false) {
528+
mlir::OpBuilder::InsertionGuard guard(builder);
529+
createCleanupRegion(converter, loc, argType, cleanupRegion, sym,
530+
isDoConcurrent);
531+
} else {
532+
assert(!isAllocatableOrPointer &&
533+
"Pointer-like arrays must be heap allocated");
534+
}
535+
return temp;
536+
}();
531537

532538
// Put the temporary inside of a box:
533539
// hlfir::genVariableBox doesn't handle non-default lower bounds

flang/test/Lower/OpenMP/DelayedPrivatization/target-private-multiple-variables.f90

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
! XFAIL: *
21
! Tests delayed privatization for `targets ... private(..)` for allocatables.
32

43
! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --enable-delayed-privatization-staging \

flang/test/Lower/OpenMP/delayed-privatization-array.f90

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
! Test delayed privatization for arrays.
2-
! XFAIL: *
32

43
! RUN: split-file %s %t
54

flang/test/Lower/OpenMP/parallel-reduction-array-lb.f90

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
22
! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
3-
! XFAIL: *
43

54
program reduce
65
integer, dimension(2:4, 2) :: i = 0
Lines changed: 88 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1-
! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
2-
! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
3-
! XFAIL: *
1+
! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s --check-prefix=CPU
2+
! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s --check-prefix=CPU
3+
4+
! RUN: bbc -emit-hlfir -fopenmp -fopenmp-is-target-device -fopenmp-is-gpu -o - %s 2>&1 | FileCheck %s --check-prefix=GPU
5+
! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-hlfir -fopenmp -fopenmp-is-target-device -o - %s 2>&1 | FileCheck %s --check-prefix=GPU
46

57
program reduce
68
integer, dimension(3) :: i = 0
@@ -14,81 +16,88 @@ program reduce
1416
print *,i
1517
end program
1618

17-
! CHECK-LABEL: omp.declare_reduction @add_reduction_byref_box_3xi32 : !fir.ref<!fir.box<!fir.array<3xi32>>> alloc {
18-
! CHECK: %[[VAL_8:.*]] = fir.alloca !fir.box<!fir.array<3xi32>>
19-
! CHECK: omp.yield(%[[VAL_8]] : !fir.ref<!fir.box<!fir.array<3xi32>>>)
20-
! CHECK-LABEL: } init {
21-
! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<3xi32>>>, %[[ALLOC:.*]]: !fir.ref<!fir.box<!fir.array<3xi32>>>):
22-
! CHECK: %[[VAL_2:.*]] = arith.constant 0 : i32
23-
! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
24-
! CHECK: %[[VAL_4:.*]] = arith.constant 3 : index
25-
! CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_4]] : (index) -> !fir.shape<1>
26-
! CHECK: %[[VAL_1:.*]] = fir.allocmem !fir.array<3xi32> {bindc_name = ".tmp", uniq_name = ""}
27-
! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<3xi32>>,
28-
! CHECK: %[[TRUE:.*]] = arith.constant true
19+
! CPU-LABEL: omp.declare_reduction @add_reduction_byref_box_3xi32 : !fir.ref<!fir.box<!fir.array<3xi32>>> alloc {
20+
! CPU: %[[VAL_8:.*]] = fir.alloca !fir.box<!fir.array<3xi32>>
21+
! CPU: omp.yield(%[[VAL_8]] : !fir.ref<!fir.box<!fir.array<3xi32>>>)
22+
! CPU-LABEL: } init {
23+
! CPU: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<3xi32>>>, %[[ALLOC:.*]]: !fir.ref<!fir.box<!fir.array<3xi32>>>):
24+
! CPU: %[[VAL_2:.*]] = arith.constant 0 : i32
25+
! CPU: %[[VAL_3:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
26+
! CPU: %[[VAL_4:.*]] = arith.constant 3 : index
27+
! CPU: %[[VAL_5:.*]] = fir.shape %[[VAL_4]] : (index) -> !fir.shape<1>
28+
! CPU: %[[VAL_1:.*]] = fir.allocmem !fir.array<3xi32> {bindc_name = ".tmp", uniq_name = ""}
29+
! CPU: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<3xi32>>,
30+
! CPU: %[[TRUE:.*]] = arith.constant true
2931
!fir.shape<1>) -> (!fir.heap<!fir.array<3xi32>>, !fir.heap<!fir.array<3xi32>>)
30-
! CHECK: %[[C0:.*]] = arith.constant 0 : index
31-
! CHECK: %[[DIMS:.*]]:3 = fir.box_dims %[[VAL_3]], %[[C0]] : (!fir.box<!fir.array<3xi32>>, index) -> (index, index, index)
32-
! CHECK: %[[SHIFT:.*]] = fir.shape_shift %[[DIMS]]#0, %[[DIMS]]#1 : (index, index) -> !fir.shapeshift<1>
33-
! CHECK: %[[VAL_7:.*]] = fir.embox %[[VAL_6]]#0(%[[SHIFT]]) : (!fir.heap<!fir.array<3xi32>>, !fir.shapeshift<1>) -> !fir.box<!fir.array<3xi32>>
34-
! CHECK: hlfir.assign %[[VAL_2]] to %[[VAL_7]] : i32, !fir.box<!fir.array<3xi32>>
35-
! CHECK: fir.store %[[VAL_7]] to %[[ALLOC]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
36-
! CHECK: omp.yield(%[[ALLOC]] : !fir.ref<!fir.box<!fir.array<3xi32>>>)
37-
! CHECK: } combiner {
38-
! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<3xi32>>>, %[[VAL_1:.*]]: !fir.ref<!fir.box<!fir.array<3xi32>>>):
39-
! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
40-
! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_1]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
41-
! CHECK: %[[C1:.*]] = arith.constant 1 : index
42-
! CHECK: %[[C3:.*]] = arith.constant 3 : index
43-
! CHECK: %[[SHAPE_SHIFT:.*]] = fir.shape_shift %[[C1]], %[[C3]] : (index, index) -> !fir.shapeshift<1>
44-
! CHECK: %[[C1_0:.*]] = arith.constant 1 : index
45-
! CHECK: fir.do_loop %[[VAL_8:.*]] = %[[C1_0]] to %[[C3]] step %[[C1_0]] unordered {
46-
! CHECK: %[[VAL_9:.*]] = fir.array_coor %[[VAL_2]](%[[SHAPE_SHIFT]]) %[[VAL_8]] : (!fir.box<!fir.array<3xi32>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
47-
! CHECK: %[[VAL_10:.*]] = fir.array_coor %[[VAL_3]](%[[SHAPE_SHIFT]]) %[[VAL_8]] : (!fir.box<!fir.array<3xi32>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
48-
! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
49-
! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
50-
! CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_11]], %[[VAL_12]] : i32
51-
! CHECK: fir.store %[[VAL_13]] to %[[VAL_9]] : !fir.ref<i32>
52-
! CHECK: }
53-
! CHECK: omp.yield(%[[VAL_0]] : !fir.ref<!fir.box<!fir.array<3xi32>>>)
54-
! CHECK: } cleanup {
55-
! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<3xi32>>>):
56-
! CHECK: %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
57-
! CHECK: %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box<!fir.array<3xi32>>) -> !fir.ref<!fir.array<3xi32>>
58-
! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (!fir.ref<!fir.array<3xi32>>) -> i64
59-
! CHECK: %[[VAL_4:.*]] = arith.constant 0 : i64
60-
! CHECK: %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_3]], %[[VAL_4]] : i64
61-
! CHECK: fir.if %[[VAL_5]] {
62-
! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_2]] : (!fir.ref<!fir.array<3xi32>>) -> !fir.heap<!fir.array<3xi32>>
63-
! CHECK: fir.freemem %[[VAL_6]] : !fir.heap<!fir.array<3xi32>>
64-
! CHECK: }
65-
! CHECK: omp.yield
66-
! CHECK: }
32+
! CPU: %[[C0:.*]] = arith.constant 0 : index
33+
! CPU: %[[DIMS:.*]]:3 = fir.box_dims %[[VAL_3]], %[[C0]] : (!fir.box<!fir.array<3xi32>>, index) -> (index, index, index)
34+
! CPU: %[[SHIFT:.*]] = fir.shape_shift %[[DIMS]]#0, %[[DIMS]]#1 : (index, index) -> !fir.shapeshift<1>
35+
! CPU: %[[VAL_7:.*]] = fir.embox %[[VAL_6]]#0(%[[SHIFT]]) : (!fir.heap<!fir.array<3xi32>>, !fir.shapeshift<1>) -> !fir.box<!fir.array<3xi32>>
36+
! CPU: hlfir.assign %[[VAL_2]] to %[[VAL_7]] : i32, !fir.box<!fir.array<3xi32>>
37+
! CPU: fir.store %[[VAL_7]] to %[[ALLOC]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
38+
! CPU: omp.yield(%[[ALLOC]] : !fir.ref<!fir.box<!fir.array<3xi32>>>)
39+
! CPU: } combiner {
40+
! CPU: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<3xi32>>>, %[[VAL_1:.*]]: !fir.ref<!fir.box<!fir.array<3xi32>>>):
41+
! CPU: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
42+
! CPU: %[[VAL_3:.*]] = fir.load %[[VAL_1]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
43+
! CPU: %[[C1:.*]] = arith.constant 1 : index
44+
! CPU: %[[C3:.*]] = arith.constant 3 : index
45+
! CPU: %[[SHAPE_SHIFT:.*]] = fir.shape_shift %[[C1]], %[[C3]] : (index, index) -> !fir.shapeshift<1>
46+
! CPU: %[[C1_0:.*]] = arith.constant 1 : index
47+
! CPU: fir.do_loop %[[VAL_8:.*]] = %[[C1_0]] to %[[C3]] step %[[C1_0]] unordered {
48+
! CPU: %[[VAL_9:.*]] = fir.array_coor %[[VAL_2]](%[[SHAPE_SHIFT]]) %[[VAL_8]] : (!fir.box<!fir.array<3xi32>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
49+
! CPU: %[[VAL_10:.*]] = fir.array_coor %[[VAL_3]](%[[SHAPE_SHIFT]]) %[[VAL_8]] : (!fir.box<!fir.array<3xi32>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
50+
! CPU: %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
51+
! CPU: %[[VAL_12:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
52+
! CPU: %[[VAL_13:.*]] = arith.addi %[[VAL_11]], %[[VAL_12]] : i32
53+
! CPU: fir.store %[[VAL_13]] to %[[VAL_9]] : !fir.ref<i32>
54+
! CPU: }
55+
! CPU: omp.yield(%[[VAL_0]] : !fir.ref<!fir.box<!fir.array<3xi32>>>)
56+
! CPU: } cleanup {
57+
! CPU: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<3xi32>>>):
58+
! CPU: %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
59+
! CPU: %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box<!fir.array<3xi32>>) -> !fir.ref<!fir.array<3xi32>>
60+
! CPU: %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (!fir.ref<!fir.array<3xi32>>) -> i64
61+
! CPU: %[[VAL_4:.*]] = arith.constant 0 : i64
62+
! CPU: %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_3]], %[[VAL_4]] : i64
63+
! CPU: fir.if %[[VAL_5]] {
64+
! CPU: %[[VAL_6:.*]] = fir.convert %[[VAL_2]] : (!fir.ref<!fir.array<3xi32>>) -> !fir.heap<!fir.array<3xi32>>
65+
! CPU: fir.freemem %[[VAL_6]] : !fir.heap<!fir.array<3xi32>>
66+
! CPU: }
67+
! CPU: omp.yield
68+
! CPU: }
69+
70+
! CPU-LABEL: func.func @_QQmain()
71+
! CPU: %[[VAL_0:.*]] = fir.address_of(@_QFEi) : !fir.ref<!fir.array<3xi32>>
72+
! CPU: %[[VAL_1:.*]] = arith.constant 3 : index
73+
! CPU: %[[VAL_2:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
74+
! CPU: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_2]]) {uniq_name = "_QFEi"} : (!fir.ref<!fir.array<3xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<3xi32>>, !fir.ref<!fir.array<3xi32>>)
75+
! CPU: %[[VAL_4:.*]] = fir.embox %[[VAL_3]]#0(%[[VAL_2]]) : (!fir.ref<!fir.array<3xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<3xi32>>
76+
! CPU: %[[VAL_5:.*]] = fir.alloca !fir.box<!fir.array<3xi32>>
77+
! CPU: fir.store %[[VAL_4]] to %[[VAL_5]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
78+
! CPU: omp.parallel reduction(byref @add_reduction_byref_box_3xi32 %[[VAL_5]] -> %[[VAL_6:.*]] : !fir.ref<!fir.box<!fir.array<3xi32>>>) {
79+
! CPU: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFEi"} : (!fir.ref<!fir.box<!fir.array<3xi32>>>) -> (!fir.ref<!fir.box<!fir.array<3xi32>>>, !fir.ref<!fir.box<!fir.array<3xi32>>>)
80+
! CPU: %[[VAL_8:.*]] = arith.constant 1 : i32
81+
! CPU: %[[VAL_9:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<!fir.box<!fir.array<3xi32>>>
82+
! CPU: %[[VAL_10:.*]] = arith.constant 1 : index
83+
! CPU: %[[VAL_11:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_10]]) : (!fir.box<!fir.array<3xi32>>, index) -> !fir.ref<i32>
84+
! CPU: hlfir.assign %[[VAL_8]] to %[[VAL_11]] : i32, !fir.ref<i32>
85+
! CPU: %[[VAL_12:.*]] = arith.constant 2 : i32
86+
! CPU: %[[VAL_13:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<!fir.box<!fir.array<3xi32>>>
87+
! CPU: %[[VAL_14:.*]] = arith.constant 2 : index
88+
! CPU: %[[VAL_15:.*]] = hlfir.designate %[[VAL_13]] (%[[VAL_14]]) : (!fir.box<!fir.array<3xi32>>, index) -> !fir.ref<i32>
89+
! CPU: hlfir.assign %[[VAL_12]] to %[[VAL_15]] : i32, !fir.ref<i32>
90+
! CPU: %[[VAL_16:.*]] = arith.constant 3 : i32
91+
! CPU: %[[VAL_17:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<!fir.box<!fir.array<3xi32>>>
92+
! CPU: %[[VAL_18:.*]] = arith.constant 3 : index
93+
! CPU: %[[VAL_19:.*]] = hlfir.designate %[[VAL_17]] (%[[VAL_18]]) : (!fir.box<!fir.array<3xi32>>, index) -> !fir.ref<i32>
94+
! CPU: hlfir.assign %[[VAL_16]] to %[[VAL_19]] : i32, !fir.ref<i32>
95+
! CPU: omp.terminator
96+
! CPU: }
6797

68-
! CHECK-LABEL: func.func @_QQmain()
69-
! CHECK: %[[VAL_0:.*]] = fir.address_of(@_QFEi) : !fir.ref<!fir.array<3xi32>>
70-
! CHECK: %[[VAL_1:.*]] = arith.constant 3 : index
71-
! CHECK: %[[VAL_2:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
72-
! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_2]]) {uniq_name = "_QFEi"} : (!fir.ref<!fir.array<3xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<3xi32>>, !fir.ref<!fir.array<3xi32>>)
73-
! CHECK: %[[VAL_4:.*]] = fir.embox %[[VAL_3]]#0(%[[VAL_2]]) : (!fir.ref<!fir.array<3xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<3xi32>>
74-
! CHECK: %[[VAL_5:.*]] = fir.alloca !fir.box<!fir.array<3xi32>>
75-
! CHECK: fir.store %[[VAL_4]] to %[[VAL_5]] : !fir.ref<!fir.box<!fir.array<3xi32>>>
76-
! CHECK: omp.parallel reduction(byref @add_reduction_byref_box_3xi32 %[[VAL_5]] -> %[[VAL_6:.*]] : !fir.ref<!fir.box<!fir.array<3xi32>>>) {
77-
! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFEi"} : (!fir.ref<!fir.box<!fir.array<3xi32>>>) -> (!fir.ref<!fir.box<!fir.array<3xi32>>>, !fir.ref<!fir.box<!fir.array<3xi32>>>)
78-
! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32
79-
! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<!fir.box<!fir.array<3xi32>>>
80-
! CHECK: %[[VAL_10:.*]] = arith.constant 1 : index
81-
! CHECK: %[[VAL_11:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_10]]) : (!fir.box<!fir.array<3xi32>>, index) -> !fir.ref<i32>
82-
! CHECK: hlfir.assign %[[VAL_8]] to %[[VAL_11]] : i32, !fir.ref<i32>
83-
! CHECK: %[[VAL_12:.*]] = arith.constant 2 : i32
84-
! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<!fir.box<!fir.array<3xi32>>>
85-
! CHECK: %[[VAL_14:.*]] = arith.constant 2 : index
86-
! CHECK: %[[VAL_15:.*]] = hlfir.designate %[[VAL_13]] (%[[VAL_14]]) : (!fir.box<!fir.array<3xi32>>, index) -> !fir.ref<i32>
87-
! CHECK: hlfir.assign %[[VAL_12]] to %[[VAL_15]] : i32, !fir.ref<i32>
88-
! CHECK: %[[VAL_16:.*]] = arith.constant 3 : i32
89-
! CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<!fir.box<!fir.array<3xi32>>>
90-
! CHECK: %[[VAL_18:.*]] = arith.constant 3 : index
91-
! CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_17]] (%[[VAL_18]]) : (!fir.box<!fir.array<3xi32>>, index) -> !fir.ref<i32>
92-
! CHECK: hlfir.assign %[[VAL_16]] to %[[VAL_19]] : i32, !fir.ref<i32>
93-
! CHECK: omp.terminator
94-
! CHECK: }
98+
! GPU: omp.declare_reduction {{.*}} alloc {
99+
! GPU: } init {
100+
! GPU-NOT: fir.allocmem {{.*}} {bindc_name = ".tmp", {{.*}}}
101+
! GPU: fir.alloca {{.*}} {bindc_name = ".tmp"}
102+
! GPU: } combiner {
103+
! GPU: }

flang/test/Lower/OpenMP/parallel-reduction-array2.f90

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
22
! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
3-
! XFAIL: *
43

54
program reduce
65
integer, dimension(3) :: i = 0

flang/test/Lower/OpenMP/parallel-reduction3.f90

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
22
! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
3-
! XFAIL: *
43

54
! CHECK-LABEL: omp.declare_reduction @add_reduction_byref_box_Uxi32 : !fir.ref<!fir.box<!fir.array<?xi32>>> alloc {
65
! CHECK: %[[VAL_8:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>

flang/test/Lower/OpenMP/reduction-array-intrinsic.f90

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
22
! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
3-
! XFAIL: *
43

54
subroutine max_array_reduction(l, r)
65
integer :: l(:), r(:)

flang/test/Lower/OpenMP/taskgroup-task-array-reduction.f90

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
! RUN: bbc -emit-hlfir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s
22
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s
3-
! XFAIL: *
43

54
! CHECK-LABEL: omp.declare_reduction @add_reduction_byref_box_Uxf32 : !fir.ref<!fir.box<!fir.array<?xf32>>> alloc {
65
! [...]

flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
! RUN: bbc -emit-hlfir -fopenmp -o - %s | FileCheck %s
22
! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s | FileCheck %s
3-
! XFAIL: *
43

54
program reduce_assumed_shape
65
real(8), dimension(2) :: r

0 commit comments

Comments
 (0)