Skip to content

Commit 9c730a1

Browse files
authored
Merge branch 'main' into mlir-fix-077a796
2 parents dbb1445 + 1fda257 commit 9c730a1

File tree

61 files changed

+1513
-291
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+1513
-291
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "WUiWUiUiWUi", "nc")
163163
BUILTIN(__builtin_amdgcn_mqsad_pk_u16_u8, "WUiWUiUiWUi", "nc")
164164
BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiWUiUiV4Ui", "nc")
165165

166-
BUILTIN(__builtin_amdgcn_make_buffer_rsrc, "Qbv*sii", "nc")
166+
BUILTIN(__builtin_amdgcn_make_buffer_rsrc, "Qbv*sWii", "nc")
167167
BUILTIN(__builtin_amdgcn_raw_buffer_store_b8, "vUcQbiiIi", "n")
168168
BUILTIN(__builtin_amdgcn_raw_buffer_store_b16, "vUsQbiiIi", "n")
169169
BUILTIN(__builtin_amdgcn_raw_buffer_store_b32, "vUiQbiiIi", "n")

clang/lib/AST/ByteCode/Interp.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2126,10 +2126,10 @@ bool InitElem(InterpState &S, CodePtr OpPC, uint32_t Idx) {
21262126
const T &Value = S.Stk.pop<T>();
21272127
const Pointer &Ptr = S.Stk.peek<Pointer>();
21282128

2129-
if (Ptr.isUnknownSizeArray())
2129+
const Descriptor *Desc = Ptr.getFieldDesc();
2130+
if (Desc->isUnknownSizeArray())
21302131
return false;
21312132

2132-
const Descriptor *Desc = Ptr.getFieldDesc();
21332133
// In the unlikely event that we're initializing the first item of
21342134
// a non-array, skip the atIndex().
21352135
if (Idx == 0 && !Desc->isArray()) {
@@ -2160,10 +2160,10 @@ bool InitElemPop(InterpState &S, CodePtr OpPC, uint32_t Idx) {
21602160
const T &Value = S.Stk.pop<T>();
21612161
const Pointer &Ptr = S.Stk.pop<Pointer>();
21622162

2163-
if (Ptr.isUnknownSizeArray())
2163+
const Descriptor *Desc = Ptr.getFieldDesc();
2164+
if (Desc->isUnknownSizeArray())
21642165
return false;
21652166

2166-
const Descriptor *Desc = Ptr.getFieldDesc();
21672167
// In the unlikely event that we're initializing the first item of
21682168
// a non-array, skip the atIndex().
21692169
if (Idx == 0 && !Desc->isArray()) {

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2600,7 +2600,8 @@ static bool
26002600
interp__builtin_x86_pack(InterpState &S, CodePtr, const CallExpr *E,
26012601
llvm::function_ref<APInt(const APSInt &)> PackFn) {
26022602
const auto *VT0 = E->getArg(0)->getType()->castAs<VectorType>();
2603-
const auto *VT1 = E->getArg(1)->getType()->castAs<VectorType>();
2603+
[[maybe_unused]] const auto *VT1 =
2604+
E->getArg(1)->getType()->castAs<VectorType>();
26042605
assert(VT0 && VT1 && "pack builtin VT0 and VT1 must be VectorType");
26052606
assert(VT0->getElementType() == VT1->getElementType() &&
26062607
VT0->getNumElements() == VT1->getNumElements() &&

clang/test/CodeGenHIP/builtins-make-buffer-rsrc.hip

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,9 @@
2424
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR_ASCAST]], align 8
2525
// CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[STRIDE_ADDR_ASCAST]], align 2
2626
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[NUM_ADDR_ASCAST]], align 4
27+
// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[TMP2]] to i64
2728
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[FLAGS_ADDR_ASCAST]], align 4
28-
// CHECK-NEXT: [[TMP4:%.*]] = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[TMP0]], i16 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]])
29+
// CHECK-NEXT: [[TMP4:%.*]] = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[TMP0]], i16 [[TMP1]], i64 [[CONV]], i32 [[TMP3]])
2930
// CHECK-NEXT: ret ptr addrspace(8) [[TMP4]]
3031
//
3132
__device__ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0(void *p, short stride, int num, int flags) {
@@ -48,8 +49,9 @@ __device__ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0(void *p, short
4849
// CHECK-NEXT: store i32 [[FLAGS]], ptr [[FLAGS_ADDR_ASCAST]], align 4
4950
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR_ASCAST]], align 8
5051
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[NUM_ADDR_ASCAST]], align 4
52+
// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[TMP1]] to i64
5153
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[FLAGS_ADDR_ASCAST]], align 4
52-
// CHECK-NEXT: [[TMP3:%.*]] = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[TMP0]], i16 4, i32 [[TMP1]], i32 [[TMP2]])
54+
// CHECK-NEXT: [[TMP3:%.*]] = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[TMP0]], i16 4, i64 [[CONV]], i32 [[TMP2]])
5355
// CHECK-NEXT: ret ptr addrspace(8) [[TMP3]]
5456
//
5557
__device__ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_stride_constant(void *p, int num, int flags) {
@@ -73,7 +75,7 @@ __device__ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_stride_constan
7375
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR_ASCAST]], align 8
7476
// CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[STRIDE_ADDR_ASCAST]], align 2
7577
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[FLAGS_ADDR_ASCAST]], align 4
76-
// CHECK-NEXT: [[TMP3:%.*]] = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[TMP0]], i16 [[TMP1]], i32 1234, i32 [[TMP2]])
78+
// CHECK-NEXT: [[TMP3:%.*]] = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[TMP0]], i16 [[TMP1]], i64 1234, i32 [[TMP2]])
7779
// CHECK-NEXT: ret ptr addrspace(8) [[TMP3]]
7880
//
7981
__device__ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_num_constant(void *p, short stride, int flags) {
@@ -97,7 +99,8 @@ __device__ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_num_constant(v
9799
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR_ASCAST]], align 8
98100
// CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[STRIDE_ADDR_ASCAST]], align 2
99101
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[NUM_ADDR_ASCAST]], align 4
100-
// CHECK-NEXT: [[TMP3:%.*]] = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[TMP0]], i16 [[TMP1]], i32 [[TMP2]], i32 5678)
102+
// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[TMP2]] to i64
103+
// CHECK-NEXT: [[TMP3:%.*]] = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[TMP0]], i16 [[TMP1]], i64 [[CONV]], i32 5678)
101104
// CHECK-NEXT: ret ptr addrspace(8) [[TMP3]]
102105
//
103106
__device__ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_flags_constant(void *p, short stride, int num) {

clang/test/CodeGenOpenCL/builtins-amdgcn-make-buffer-rsrc.cl

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44

55
// CHECK-LABEL: @test_amdgcn_make_buffer_rsrc_p0(
66
// CHECK-NEXT: entry:
7-
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[P:%.*]], i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 [[FLAGS:%.*]])
7+
// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[NUM:%.*]] to i64
8+
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[P:%.*]], i16 [[STRIDE:%.*]], i64 [[CONV]], i32 [[FLAGS:%.*]])
89
// CHECK-NEXT: ret ptr addrspace(8) [[TMP0]]
910
//
1011
__amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0(void *p, short stride, int num, int flags) {
@@ -13,7 +14,8 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0(void *p, short stride, in
1314

1415
// CHECK-LABEL: @test_amdgcn_make_buffer_rsrc_p0_stride_constant(
1516
// CHECK-NEXT: entry:
16-
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[P:%.*]], i16 4, i32 [[NUM:%.*]], i32 [[FLAGS:%.*]])
17+
// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[NUM:%.*]] to i64
18+
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[P:%.*]], i16 4, i64 [[CONV]], i32 [[FLAGS:%.*]])
1719
// CHECK-NEXT: ret ptr addrspace(8) [[TMP0]]
1820
//
1921
__amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_stride_constant(void *p, int num, int flags) {
@@ -22,7 +24,7 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_stride_constant(void *p,
2224

2325
// CHECK-LABEL: @test_amdgcn_make_buffer_rsrc_p0_num_constant(
2426
// CHECK-NEXT: entry:
25-
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[P:%.*]], i16 [[STRIDE:%.*]], i32 1234, i32 [[FLAGS:%.*]])
27+
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[P:%.*]], i16 [[STRIDE:%.*]], i64 1234, i32 [[FLAGS:%.*]])
2628
// CHECK-NEXT: ret ptr addrspace(8) [[TMP0]]
2729
//
2830
__amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_num_constant(void *p, short stride, int flags) {
@@ -31,7 +33,8 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_num_constant(void *p, sho
3133

3234
// CHECK-LABEL: @test_amdgcn_make_buffer_rsrc_p0_flags_constant(
3335
// CHECK-NEXT: entry:
34-
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[P:%.*]], i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 5678)
36+
// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[NUM:%.*]] to i64
37+
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[P:%.*]], i16 [[STRIDE:%.*]], i64 [[CONV]], i32 5678)
3538
// CHECK-NEXT: ret ptr addrspace(8) [[TMP0]]
3639
//
3740
__amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_flags_constant(void *p, short stride, int num) {
@@ -40,7 +43,8 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_flags_constant(void *p, s
4043

4144
// CHECK-LABEL: @test_amdgcn_make_buffer_rsrc_p1(
4245
// CHECK-NEXT: entry:
43-
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) [[P:%.*]], i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 [[FLAGS:%.*]])
46+
// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[NUM:%.*]] to i64
47+
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) [[P:%.*]], i16 [[STRIDE:%.*]], i64 [[CONV]], i32 [[FLAGS:%.*]])
4448
// CHECK-NEXT: ret ptr addrspace(8) [[TMP0]]
4549
//
4650
__amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p1(global void *p, short stride, int num, int flags) {
@@ -49,7 +53,8 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p1(global void *p, short str
4953

5054
// CHECK-LABEL: @test_amdgcn_make_buffer_rsrc_p1_stride_constant(
5155
// CHECK-NEXT: entry:
52-
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) [[P:%.*]], i16 4, i32 [[NUM:%.*]], i32 [[FLAGS:%.*]])
56+
// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[NUM:%.*]] to i64
57+
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) [[P:%.*]], i16 4, i64 [[CONV]], i32 [[FLAGS:%.*]])
5358
// CHECK-NEXT: ret ptr addrspace(8) [[TMP0]]
5459
//
5560
__amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p1_stride_constant(global void *p, int num, int flags) {
@@ -58,7 +63,7 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p1_stride_constant(global vo
5863

5964
// CHECK-LABEL: @test_amdgcn_make_buffer_rsrc_p1_num_constant(
6065
// CHECK-NEXT: entry:
61-
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) [[P:%.*]], i16 [[STRIDE:%.*]], i32 1234, i32 [[FLAGS:%.*]])
66+
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) [[P:%.*]], i16 [[STRIDE:%.*]], i64 1234, i32 [[FLAGS:%.*]])
6267
// CHECK-NEXT: ret ptr addrspace(8) [[TMP0]]
6368
//
6469
__amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p1_num_constant(global void *p, short stride, int flags) {
@@ -67,7 +72,8 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p1_num_constant(global void
6772

6873
// CHECK-LABEL: @test_amdgcn_make_buffer_rsrc_p1_flags_constant(
6974
// CHECK-NEXT: entry:
70-
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) [[P:%.*]], i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 5678)
75+
// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[NUM:%.*]] to i64
76+
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) [[P:%.*]], i16 [[STRIDE:%.*]], i64 [[CONV]], i32 5678)
7177
// CHECK-NEXT: ret ptr addrspace(8) [[TMP0]]
7278
//
7379
__amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p1_flags_constant(global void *p, short stride, int num) {
@@ -76,7 +82,8 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p1_flags_constant(global voi
7682

7783
// CHECK-LABEL: @test_amdgcn_make_buffer_p0_nullptr(
7884
// CHECK-NEXT: entry:
79-
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr null, i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 [[FLAGS:%.*]])
85+
// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[NUM:%.*]] to i64
86+
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr null, i16 [[STRIDE:%.*]], i64 [[CONV]], i32 [[FLAGS:%.*]])
8087
// CHECK-NEXT: ret ptr addrspace(8) [[TMP0]]
8188
//
8289
__amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_p0_nullptr(short stride, int num, int flags) {
@@ -85,7 +92,8 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_p0_nullptr(short stride, int num,
8592

8693
// CHECK-LABEL: @test_amdgcn_make_buffer_p1_nullptr(
8794
// CHECK-NEXT: entry:
88-
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) null, i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 [[FLAGS:%.*]])
95+
// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[NUM:%.*]] to i64
96+
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) null, i16 [[STRIDE:%.*]], i64 [[CONV]], i32 [[FLAGS:%.*]])
8997
// CHECK-NEXT: ret ptr addrspace(8) [[TMP0]]
9098
//
9199
__amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_p1_nullptr(short stride, int num, int flags) {

flang/include/flang/Optimizer/HLFIR/Passes.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,10 @@ def SimplifyHLFIRIntrinsics : Pass<"simplify-hlfir-intrinsics"> {
6161
"the hlfir.matmul.">];
6262
}
6363

64+
def ExpressionSimplification : Pass<"hlfir-expression-simplification"> {
65+
let summary = "Simplify Fortran expressions";
66+
}
67+
6468
def InlineElementals : Pass<"inline-elementals"> {
6569
let summary = "Inline chained hlfir.elemental operations";
6670
}

flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
33
add_flang_library(HLFIRTransforms
44
BufferizeHLFIR.cpp
55
ConvertToFIR.cpp
6+
ExpressionSimplification.cpp
67
InlineElementals.cpp
78
InlineHLFIRAssign.cpp
89
InlineHLFIRCopyIn.cpp
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
//===- ExpressionSimplification.cpp - Simplify HLFIR expressions ----------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "flang/Optimizer/Builder/FIRBuilder.h"
10+
#include "flang/Optimizer/HLFIR/HLFIROps.h"
11+
#include "flang/Optimizer/HLFIR/Passes.h"
12+
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
13+
14+
namespace hlfir {
15+
#define GEN_PASS_DEF_EXPRESSIONSIMPLIFICATION
16+
#include "flang/Optimizer/HLFIR/Passes.h.inc"
17+
} // namespace hlfir
18+
19+
// Get the first user of `op`.
20+
// Note that we consider the first user to be the one on the lowest line of
21+
// the emitted HLFIR. The user iterator considers the opposite.
22+
template <typename UserOp>
23+
static UserOp getFirstUser(mlir::Operation *op) {
24+
auto it = op->user_begin(), end = op->user_end(), prev = it;
25+
for (; it != end; prev = it++)
26+
;
27+
if (prev != end)
28+
if (auto userOp = mlir::dyn_cast<UserOp>(*prev))
29+
return userOp;
30+
return {};
31+
}
32+
33+
// Get the last user of `op`.
34+
// Note that we consider the last user to be the one on the highest line of
35+
// the emitted HLFIR. The user iterator considers the opposite.
36+
template <typename UserOp>
37+
static UserOp getLastUser(mlir::Operation *op) {
38+
if (!op->getUsers().empty())
39+
if (auto userOp = mlir::dyn_cast<UserOp>(*op->user_begin()))
40+
return userOp;
41+
return {};
42+
}
43+
44+
namespace {
45+
46+
// Trim operations can be erased in certain expressions, such as character
47+
// comparisons.
48+
// Since a character comparison appends spaces to the shorter character,
49+
// calls to trim() that are used only in the comparison can be eliminated.
50+
//
51+
// Example:
52+
// `trim(x) == trim(y)`
53+
// can be simplified to
54+
// `x == y`
55+
class EraseTrim : public mlir::OpRewritePattern<hlfir::CharTrimOp> {
56+
public:
57+
using mlir::OpRewritePattern<hlfir::CharTrimOp>::OpRewritePattern;
58+
59+
llvm::LogicalResult
60+
matchAndRewrite(hlfir::CharTrimOp trimOp,
61+
mlir::PatternRewriter &rewriter) const override {
62+
int trimUses = std::distance(trimOp->use_begin(), trimOp->use_end());
63+
auto cmpCharOp = getFirstUser<hlfir::CmpCharOp>(trimOp);
64+
auto destroyOp = getLastUser<hlfir::DestroyOp>(trimOp);
65+
if (!cmpCharOp || !destroyOp || trimUses != 2)
66+
return rewriter.notifyMatchFailure(
67+
trimOp, "hlfir.char_trim is not used (only) by hlfir.cmpchar");
68+
69+
rewriter.eraseOp(destroyOp);
70+
rewriter.replaceOp(trimOp, trimOp.getChr());
71+
return mlir::success();
72+
}
73+
};
74+
75+
class ExpressionSimplificationPass
76+
: public hlfir::impl::ExpressionSimplificationBase<
77+
ExpressionSimplificationPass> {
78+
public:
79+
void runOnOperation() override {
80+
mlir::MLIRContext *context = &getContext();
81+
82+
mlir::GreedyRewriteConfig config;
83+
// Prevent the pattern driver from merging blocks.
84+
config.setRegionSimplificationLevel(
85+
mlir::GreedySimplifyRegionLevel::Disabled);
86+
87+
mlir::RewritePatternSet patterns(context);
88+
patterns.insert<EraseTrim>(context);
89+
90+
if (mlir::failed(mlir::applyPatternsGreedily(
91+
getOperation(), std::move(patterns), config))) {
92+
mlir::emitError(getOperation()->getLoc(),
93+
"failure in HLFIR expression simplification");
94+
signalPassFailure();
95+
}
96+
}
97+
};
98+
99+
} // namespace

flang/lib/Optimizer/Passes/Pipelines.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,10 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
245245
void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
246246
EnableOpenMP enableOpenMP,
247247
llvm::OptimizationLevel optLevel) {
248+
if (optLevel.getSizeLevel() > 0 || optLevel.getSpeedupLevel() > 0) {
249+
addNestedPassToAllTopLevelOperations<PassConstructor>(
250+
pm, hlfir::createExpressionSimplification);
251+
}
248252
if (optLevel.isOptimizingForSpeed()) {
249253
addCanonicalizerPassWithoutRegionSimplification(pm);
250254
addNestedPassToAllTopLevelOperations<PassConstructor>(

flang/test/Driver/mlir-pass-pipeline.f90

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,15 @@
1515
! ALL: Pass statistics report
1616

1717
! ALL: Fortran::lower::VerifierPass
18+
! O2-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
19+
! O2-NEXT: 'fir.global' Pipeline
20+
! O2-NEXT: ExpressionSimplification
21+
! O2-NEXT: 'func.func' Pipeline
22+
! O2-NEXT: ExpressionSimplification
23+
! O2-NEXT: 'omp.declare_reduction' Pipeline
24+
! O2-NEXT: ExpressionSimplification
25+
! O2-NEXT: 'omp.private' Pipeline
26+
! O2-NEXT: ExpressionSimplification
1827
! O2-NEXT: Canonicalizer
1928
! ALL: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
2029
! ALL-NEXT:'fir.global' Pipeline

0 commit comments

Comments
 (0)