llvm
diff --git a/‎clang/include/clang/Basic/BuiltinsAMDGPU.def‎
Lines changed: 1 addition & 1 deletion b/‎clang/include/clang/Basic/BuiltinsAMDGPU.def‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎clang/lib/AST/ByteCode/Interp.h‎
Lines changed: 4 additions & 4 deletions b/‎clang/lib/AST/ByteCode/Interp.h‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎clang/lib/AST/ByteCode/InterpBuiltin.cpp‎
Lines changed: 2 additions & 1 deletion b/‎clang/lib/AST/ByteCode/InterpBuiltin.cpp‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎clang/test/CodeGenHIP/builtins-make-buffer-rsrc.hip‎
Lines changed: 7 additions & 4 deletions b/‎clang/test/CodeGenHIP/builtins-make-buffer-rsrc.hip‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎clang/test/CodeGenOpenCL/builtins-amdgcn-make-buffer-rsrc.cl‎
Lines changed: 18 additions & 10 deletions b/‎clang/test/CodeGenOpenCL/builtins-amdgcn-make-buffer-rsrc.cl‎
Lines changed: 18 additions & 10 deletions
diff --git a/‎flang/include/flang/Optimizer/HLFIR/Passes.td‎
Lines changed: 4 additions & 0 deletions b/‎flang/include/flang/Optimizer/HLFIR/Passes.td‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎flang/lib/Optimizer/HLFIR/Transforms/ExpressionSimplification.cpp‎
Lines changed: 99 additions & 0 deletions b/‎flang/lib/Optimizer/HLFIR/Transforms/ExpressionSimplification.cpp‎
Lines changed: 99 additions & 0 deletions
diff --git a/‎flang/lib/Optimizer/Passes/Pipelines.cpp‎
Lines changed: 4 additions & 0 deletions b/‎flang/lib/Optimizer/Passes/Pipelines.cpp‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎flang/test/Driver/mlir-pass-pipeline.f90‎
Lines changed: 9 additions & 0 deletions b/‎flang/test/Driver/mlir-pass-pipeline.f90‎
Lines changed: 9 additions & 0 deletions
@@ -163,7 +163,7 @@ BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "WUiWUiUiWUi", "nc")
 BUILTIN(__builtin_amdgcn_mqsad_pk_u16_u8, "WUiWUiUiWUi", "nc")
 BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiWUiUiV4Ui", "nc")
 
-BUILTIN(__builtin_amdgcn_make_buffer_rsrc, "Qbv*sii", "nc")
+BUILTIN(__builtin_amdgcn_make_buffer_rsrc, "Qbv*sWii", "nc")
 BUILTIN(__builtin_amdgcn_raw_buffer_store_b8, "vUcQbiiIi", "n")
 BUILTIN(__builtin_amdgcn_raw_buffer_store_b16, "vUsQbiiIi", "n")
 BUILTIN(__builtin_amdgcn_raw_buffer_store_b32, "vUiQbiiIi", "n")
 
@@ -2126,10 +2126,10 @@ bool InitElem(InterpState &S, CodePtr OpPC, uint32_t Idx) {
   const T &Value = S.Stk.pop<T>();
   const Pointer &Ptr = S.Stk.peek<Pointer>();
 
-  if (Ptr.isUnknownSizeArray())
+  const Descriptor *Desc = Ptr.getFieldDesc();
+  if (Desc->isUnknownSizeArray())
     return false;
 
-  const Descriptor *Desc = Ptr.getFieldDesc();
   // In the unlikely event that we're initializing the first item of
   // a non-array, skip the atIndex().
   if (Idx == 0 && !Desc->isArray()) {
@@ -2160,10 +2160,10 @@ bool InitElemPop(InterpState &S, CodePtr OpPC, uint32_t Idx) {
   const T &Value = S.Stk.pop<T>();
   const Pointer &Ptr = S.Stk.pop<Pointer>();
 
-  if (Ptr.isUnknownSizeArray())
+  const Descriptor *Desc = Ptr.getFieldDesc();
+  if (Desc->isUnknownSizeArray())
     return false;
 
-  const Descriptor *Desc = Ptr.getFieldDesc();
   // In the unlikely event that we're initializing the first item of
   // a non-array, skip the atIndex().
   if (Idx == 0 && !Desc->isArray()) {
 
@@ -2600,7 +2600,8 @@ static bool
 interp__builtin_x86_pack(InterpState &S, CodePtr, const CallExpr *E,
                          llvm::function_ref<APInt(const APSInt &)> PackFn) {
   const auto *VT0 = E->getArg(0)->getType()->castAs<VectorType>();
-  const auto *VT1 = E->getArg(1)->getType()->castAs<VectorType>();
+  [[maybe_unused]] const auto *VT1 =
+      E->getArg(1)->getType()->castAs<VectorType>();
   assert(VT0 && VT1 && "pack builtin VT0 and VT1 must be VectorType");
   assert(VT0->getElementType() == VT1->getElementType() &&
          VT0->getNumElements() == VT1->getNumElements() &&
 
@@ -24,8 +24,9 @@
 // CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[STRIDE_ADDR_ASCAST]], align 2
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[NUM_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[TMP2]] to i64
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[FLAGS_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[TMP0]], i16 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]])
+// CHECK-NEXT:    [[TMP4:%.*]] = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[TMP0]], i16 [[TMP1]], i64 [[CONV]], i32 [[TMP3]])
 // CHECK-NEXT:    ret ptr addrspace(8) [[TMP4]]
 //
 __device__ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0(void *p, short stride, int num, int flags) {
@@ -48,8 +49,9 @@ __device__ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0(void *p, short
 // CHECK-NEXT:    store i32 [[FLAGS]], ptr [[FLAGS_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[NUM_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[TMP1]] to i64
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[FLAGS_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[TMP0]], i16 4, i32 [[TMP1]], i32 [[TMP2]])
+// CHECK-NEXT:    [[TMP3:%.*]] = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[TMP0]], i16 4, i64 [[CONV]], i32 [[TMP2]])
 // CHECK-NEXT:    ret ptr addrspace(8) [[TMP3]]
 //
 __device__ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_stride_constant(void *p, int num, int flags) {
@@ -73,7 +75,7 @@ __device__ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_stride_constan
 // CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[STRIDE_ADDR_ASCAST]], align 2
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[FLAGS_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[TMP0]], i16 [[TMP1]], i32 1234, i32 [[TMP2]])
+// CHECK-NEXT:    [[TMP3:%.*]] = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[TMP0]], i16 [[TMP1]], i64 1234, i32 [[TMP2]])
 // CHECK-NEXT:    ret ptr addrspace(8) [[TMP3]]
 //
 __device__ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_num_constant(void *p, short stride, int flags) {
@@ -97,7 +99,8 @@ __device__ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_num_constant(v
 // CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[STRIDE_ADDR_ASCAST]], align 2
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[NUM_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[TMP0]], i16 [[TMP1]], i32 [[TMP2]], i32 5678)
+// CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[TMP2]] to i64
+// CHECK-NEXT:    [[TMP3:%.*]] = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[TMP0]], i16 [[TMP1]], i64 [[CONV]], i32 5678)
 // CHECK-NEXT:    ret ptr addrspace(8) [[TMP3]]
 //
 __device__ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_flags_constant(void *p, short stride, int num) {
 
@@ -4,7 +4,8 @@
 
 // CHECK-LABEL: @test_amdgcn_make_buffer_rsrc_p0(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[P:%.*]], i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 [[FLAGS:%.*]])
+// CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[NUM:%.*]] to i64
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[P:%.*]], i16 [[STRIDE:%.*]], i64 [[CONV]], i32 [[FLAGS:%.*]])
 // CHECK-NEXT:    ret ptr addrspace(8) [[TMP0]]
 //
 __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0(void *p, short stride, int num, int flags) {
@@ -13,7 +14,8 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0(void *p, short stride, in
 
 // CHECK-LABEL: @test_amdgcn_make_buffer_rsrc_p0_stride_constant(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[P:%.*]], i16 4, i32 [[NUM:%.*]], i32 [[FLAGS:%.*]])
+// CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[NUM:%.*]] to i64
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[P:%.*]], i16 4, i64 [[CONV]], i32 [[FLAGS:%.*]])
 // CHECK-NEXT:    ret ptr addrspace(8) [[TMP0]]
 //
 __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_stride_constant(void *p, int num, int flags) {
@@ -22,7 +24,7 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_stride_constant(void *p,
 
 // CHECK-LABEL: @test_amdgcn_make_buffer_rsrc_p0_num_constant(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[P:%.*]], i16 [[STRIDE:%.*]], i32 1234, i32 [[FLAGS:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[P:%.*]], i16 [[STRIDE:%.*]], i64 1234, i32 [[FLAGS:%.*]])
 // CHECK-NEXT:    ret ptr addrspace(8) [[TMP0]]
 //
 __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_num_constant(void *p, short stride, int flags) {
@@ -31,7 +33,8 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_num_constant(void *p, sho
 
 // CHECK-LABEL: @test_amdgcn_make_buffer_rsrc_p0_flags_constant(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[P:%.*]], i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 5678)
+// CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[NUM:%.*]] to i64
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr [[P:%.*]], i16 [[STRIDE:%.*]], i64 [[CONV]], i32 5678)
 // CHECK-NEXT:    ret ptr addrspace(8) [[TMP0]]
 //
 __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_flags_constant(void *p, short stride, int num) {
@@ -40,7 +43,8 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p0_flags_constant(void *p, s
 
 // CHECK-LABEL: @test_amdgcn_make_buffer_rsrc_p1(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) [[P:%.*]], i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 [[FLAGS:%.*]])
+// CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[NUM:%.*]] to i64
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) [[P:%.*]], i16 [[STRIDE:%.*]], i64 [[CONV]], i32 [[FLAGS:%.*]])
 // CHECK-NEXT:    ret ptr addrspace(8) [[TMP0]]
 //
 __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p1(global void *p, short stride, int num, int flags) {
@@ -49,7 +53,8 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p1(global void *p, short str
 
 // CHECK-LABEL: @test_amdgcn_make_buffer_rsrc_p1_stride_constant(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) [[P:%.*]], i16 4, i32 [[NUM:%.*]], i32 [[FLAGS:%.*]])
+// CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[NUM:%.*]] to i64
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) [[P:%.*]], i16 4, i64 [[CONV]], i32 [[FLAGS:%.*]])
 // CHECK-NEXT:    ret ptr addrspace(8) [[TMP0]]
 //
 __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p1_stride_constant(global void *p, int num, int flags) {
@@ -58,7 +63,7 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p1_stride_constant(global vo
 
 // CHECK-LABEL: @test_amdgcn_make_buffer_rsrc_p1_num_constant(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) [[P:%.*]], i16 [[STRIDE:%.*]], i32 1234, i32 [[FLAGS:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) [[P:%.*]], i16 [[STRIDE:%.*]], i64 1234, i32 [[FLAGS:%.*]])
 // CHECK-NEXT:    ret ptr addrspace(8) [[TMP0]]
 //
 __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p1_num_constant(global void *p, short stride, int flags) {
@@ -67,7 +72,8 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p1_num_constant(global void
 
 // CHECK-LABEL: @test_amdgcn_make_buffer_rsrc_p1_flags_constant(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) [[P:%.*]], i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 5678)
+// CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[NUM:%.*]] to i64
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) [[P:%.*]], i16 [[STRIDE:%.*]], i64 [[CONV]], i32 5678)
 // CHECK-NEXT:    ret ptr addrspace(8) [[TMP0]]
 //
 __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p1_flags_constant(global void *p, short stride, int num) {
@@ -76,7 +82,8 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p1_flags_constant(global voi
 
 // CHECK-LABEL: @test_amdgcn_make_buffer_p0_nullptr(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr null, i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 [[FLAGS:%.*]])
+// CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[NUM:%.*]] to i64
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr null, i16 [[STRIDE:%.*]], i64 [[CONV]], i32 [[FLAGS:%.*]])
 // CHECK-NEXT:    ret ptr addrspace(8) [[TMP0]]
 //
 __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_p0_nullptr(short stride, int num, int flags) {
@@ -85,7 +92,8 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_p0_nullptr(short stride, int num,
 
 // CHECK-LABEL: @test_amdgcn_make_buffer_p1_nullptr(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) null, i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 [[FLAGS:%.*]])
+// CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[NUM:%.*]] to i64
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) null, i16 [[STRIDE:%.*]], i64 [[CONV]], i32 [[FLAGS:%.*]])
 // CHECK-NEXT:    ret ptr addrspace(8) [[TMP0]]
 //
 __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_p1_nullptr(short stride, int num, int flags) {
 
@@ -61,6 +61,10 @@ def SimplifyHLFIRIntrinsics : Pass<"simplify-hlfir-intrinsics"> {
                         "the hlfir.matmul.">];
 }
 
+def ExpressionSimplification : Pass<"hlfir-expression-simplification"> {
+  let summary = "Simplify Fortran expressions";
+}
+
 def InlineElementals : Pass<"inline-elementals"> {
   let summary = "Inline chained hlfir.elemental operations";
 }
 
@@ -3,6 +3,7 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
 add_flang_library(HLFIRTransforms
   BufferizeHLFIR.cpp
   ConvertToFIR.cpp
+  ExpressionSimplification.cpp
   InlineElementals.cpp
   InlineHLFIRAssign.cpp
   InlineHLFIRCopyIn.cpp
 
@@ -0,0 +1,99 @@
+//===- ExpressionSimplification.cpp - Simplify HLFIR expressions ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/HLFIR/HLFIROps.h"
+#include "flang/Optimizer/HLFIR/Passes.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+
+namespace hlfir {
+#define GEN_PASS_DEF_EXPRESSIONSIMPLIFICATION
+#include "flang/Optimizer/HLFIR/Passes.h.inc"
+} // namespace hlfir
+
+// Get the first user of `op`.
+// Note that we consider the first user to be the one on the lowest line of
+// the emitted HLFIR. The user iterator considers the opposite.
+template <typename UserOp>
+static UserOp getFirstUser(mlir::Operation *op) {
+  auto it = op->user_begin(), end = op->user_end(), prev = it;
+  for (; it != end; prev = it++)
+    ;
+  if (prev != end)
+    if (auto userOp = mlir::dyn_cast<UserOp>(*prev))
+      return userOp;
+  return {};
+}
+
+// Get the last user of `op`.
+// Note that we consider the last user to be the one on the highest line of
+// the emitted HLFIR. The user iterator considers the opposite.
+template <typename UserOp>
+static UserOp getLastUser(mlir::Operation *op) {
+  if (!op->getUsers().empty())
+    if (auto userOp = mlir::dyn_cast<UserOp>(*op->user_begin()))
+      return userOp;
+  return {};
+}
+
+namespace {
+
+// Trim operations can be erased in certain expressions, such as character
+// comparisons.
+// Since a character comparison appends spaces to the shorter character,
+// calls to trim() that are used only in the comparison can be eliminated.
+//
+// Example:
+// `trim(x) == trim(y)`
+// can be simplified to
+// `x == y`
+class EraseTrim : public mlir::OpRewritePattern<hlfir::CharTrimOp> {
+public:
+  using mlir::OpRewritePattern<hlfir::CharTrimOp>::OpRewritePattern;
+
+  llvm::LogicalResult
+  matchAndRewrite(hlfir::CharTrimOp trimOp,
+                  mlir::PatternRewriter &rewriter) const override {
+    int trimUses = std::distance(trimOp->use_begin(), trimOp->use_end());
+    auto cmpCharOp = getFirstUser<hlfir::CmpCharOp>(trimOp);
+    auto destroyOp = getLastUser<hlfir::DestroyOp>(trimOp);
+    if (!cmpCharOp || !destroyOp || trimUses != 2)
+      return rewriter.notifyMatchFailure(
+          trimOp, "hlfir.char_trim is not used (only) by hlfir.cmpchar");
+
+    rewriter.eraseOp(destroyOp);
+    rewriter.replaceOp(trimOp, trimOp.getChr());
+    return mlir::success();
+  }
+};
+
+class ExpressionSimplificationPass
+    : public hlfir::impl::ExpressionSimplificationBase<
+          ExpressionSimplificationPass> {
+public:
+  void runOnOperation() override {
+    mlir::MLIRContext *context = &getContext();
+
+    mlir::GreedyRewriteConfig config;
+    // Prevent the pattern driver from merging blocks.
+    config.setRegionSimplificationLevel(
+        mlir::GreedySimplifyRegionLevel::Disabled);
+
+    mlir::RewritePatternSet patterns(context);
+    patterns.insert<EraseTrim>(context);
+
+    if (mlir::failed(mlir::applyPatternsGreedily(
+            getOperation(), std::move(patterns), config))) {
+      mlir::emitError(getOperation()->getLoc(),
+                      "failure in HLFIR expression simplification");
+      signalPassFailure();
+    }
+  }
+};
+
+} // namespace
@@ -245,6 +245,10 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
 void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
                                   EnableOpenMP enableOpenMP,
                                   llvm::OptimizationLevel optLevel) {
+  if (optLevel.getSizeLevel() > 0 || optLevel.getSpeedupLevel() > 0) {
+    addNestedPassToAllTopLevelOperations<PassConstructor>(
+        pm, hlfir::createExpressionSimplification);
+  }
   if (optLevel.isOptimizingForSpeed()) {
     addCanonicalizerPassWithoutRegionSimplification(pm);
     addNestedPassToAllTopLevelOperations<PassConstructor>(
 
@@ -15,6 +15,15 @@
 ! ALL: Pass statistics report
 
 ! ALL: Fortran::lower::VerifierPass
+! O2-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
+! O2-NEXT: 'fir.global' Pipeline
+! O2-NEXT:   ExpressionSimplification
+! O2-NEXT: 'func.func' Pipeline
+! O2-NEXT:   ExpressionSimplification
+! O2-NEXT: 'omp.declare_reduction' Pipeline
+! O2-NEXT:   ExpressionSimplification
+! O2-NEXT: 'omp.private' Pipeline
+! O2-NEXT:   ExpressionSimplification
 ! O2-NEXT: Canonicalizer
 ! ALL:     Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
 ! ALL-NEXT:'fir.global' Pipeline
Original file line number	Diff line number	Diff line change
`@@ -61,6 +61,10 @@ def SimplifyHLFIRIntrinsics : Pass<"simplify-hlfir-intrinsics"> {`
`61`	`61`	`"the hlfir.matmul.">];`
`62`	`62`	`}`
`63`	`63`
	`64`	`+def ExpressionSimplification : Pass<"hlfir-expression-simplification"> {`
	`65`	`+ let summary = "Simplify Fortran expressions";`
	`66`	`+}`
	`67`	`+`
`64`	`68`	`def InlineElementals : Pass<"inline-elementals"> {`
`65`	`69`	`let summary = "Inline chained hlfir.elemental operations";`
`66`	`70`	`}`