Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[InstCombine] Convert fshl(x, 0, y) to shl(x, and(y, BitWidth - 1)) when BitWidth is pow2 #122362

Open
wants to merge 2 commits into
base: main
Choose a base branch
from

Conversation

AmrDeveloper
Copy link
Member

Convert fshl(x, 0, y) to shl(X, and(Y, BitWidth - 1)) or to shl(x, y) if y within range (0, Bitwidth - 1)

Fixes: #122235

@llvmbot
Copy link
Member

llvmbot commented Jan 9, 2025

@llvm/pr-subscribers-llvm-transforms

Author: Amr Hesham (AmrDeveloper)

Changes

Convert fshl(x, 0, y) to shl(X, and(Y, BitWidth - 1)) or to shl(x, y) if y within range (0, Bitwidth - 1)

Fixes: #122235


Full diff: https://github.com/llvm/llvm-project/pull/122362.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp (+13)
  • (modified) llvm/test/Transforms/InstCombine/fsh.ll (+95)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index c55c40c88bc845..f0ff76ba57555b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2229,6 +2229,19 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
         return BitOp;
     }
 
+    // fshal(X, 0, Y) --> shl(X, and(Y, BitWidth - 1))
+    // fshal(X, 0, Y) --> Shl(X, Y) if Y within the range 0 to type bit width
+    if (match(Op1, m_ZeroInt())) {
+      unsigned BitWidth = Ty->getScalarSizeInBits();
+      Value *Op2 = II->getArgOperand(2);
+      if (auto Range = II->getRange(); Range && Range->getLower().sge(0) &&
+                                       Range->getUpper().sle(BitWidth)) {
+        return BinaryOperator::CreateShl(Op0, Op2);
+      }
+      Value *And = Builder.CreateAnd(Op2, ConstantInt::get(Ty, BitWidth - 1));
+      return BinaryOperator::CreateShl(Op0, And);
+    }
+
     // Left or right might be masked.
     if (SimplifyDemandedInstructionBits(*II))
       return &CI;
diff --git a/llvm/test/Transforms/InstCombine/fsh.ll b/llvm/test/Transforms/InstCombine/fsh.ll
index 434cd810296d8c..c0f1ee4a5976bb 100644
--- a/llvm/test/Transforms/InstCombine/fsh.ll
+++ b/llvm/test/Transforms/InstCombine/fsh.ll
@@ -6,6 +6,7 @@ declare i16 @llvm.fshr.i16(i16, i16, i16)
 declare i32 @llvm.fshl.i32(i32, i32, i32)
 declare i33 @llvm.fshr.i33(i33, i33, i33)
 declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>)
+declare <2 x i16> @llvm.fshl.v2i16(<2 x i16>, <2 x i16>, <2 x i16>)
 declare <2 x i31> @llvm.fshl.v2i31(<2 x i31>, <2 x i31>, <2 x i31>)
 declare <3 x i16> @llvm.fshl.v3i16(<3 x i16>, <3 x i16>, <3 x i16>)
 
@@ -1010,3 +1011,97 @@ define <2 x i32> @fshr_vec_zero_elem(<2 x i32> %x, <2 x i32> %y) {
   %fsh = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 2, i32 0>)
   ret <2 x i32> %fsh
 }
+
+define i16 @fshl_i16_shl(i16 %x, i16 %y) {
+; CHECK-LABEL: @fshl_i16_shl(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i16 [[Y:%.*]], 15
+; CHECK-NEXT:    [[RES:%.*]] = shl i16 [[X:%.*]], [[TMP0]]
+; CHECK-NEXT:    ret i16 [[RES]]
+;
+entry:
+  %res = call i16 @llvm.fshl.i16(i16 %x, i16 0, i16 %y)
+  ret i16 %res
+}
+
+define i32 @fshl_i32_shl(i32 %x, i32 %y) {
+; CHECK-LABEL: @fshl_i32_shl(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[Y:%.*]], 31
+; CHECK-NEXT:    [[RES:%.*]] = shl i32 [[X:%.*]], [[TMP0]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %res = call i32 @llvm.fshl.i32(i32 %x, i32 0, i32 %y)
+  ret i32 %res
+}
+
+define <2 x i16> @fshl_vi16_shl(<2 x i16>  %x, <2 x i16> %y) {
+; CHECK-LABEL: @fshl_vi16_shl(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i16> [[Y:%.*]], splat (i16 15)
+; CHECK-NEXT:    [[RES:%.*]] = shl <2 x i16> [[X:%.*]], [[TMP0]]
+; CHECK-NEXT:    ret <2 x i16> [[RES]]
+;
+entry:
+  %res = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %x, <2 x i16> <i16 0, i16 0>, <2 x i16> %y)
+  ret <2 x i16> %res
+}
+
+define <2 x i31> @fshl_vi31_shl(<2 x i31>  %x, <2 x i31> %y) {
+; CHECK-LABEL: @fshl_vi31_shl(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i31> [[Y:%.*]], splat (i31 30)
+; CHECK-NEXT:    [[RES:%.*]] = shl <2 x i31> [[X:%.*]], [[TMP0]]
+; CHECK-NEXT:    ret <2 x i31> [[RES]]
+;
+entry:
+  %res = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> <i31 0, i31 0>, <2 x i31>  %y)
+  ret <2 x i31>  %res
+}
+
+define i16 @fshl_i16_shl_with_range(i16 %x, i16 range(i16 0, 16) %y) {
+; CHECK-LABEL: @fshl_i16_shl_with_range(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RES:%.*]] = shl i16 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i16 [[RES]]
+;
+entry:
+  %res = call i16 @llvm.fshl.i16(i16 %x, i16 0, i16 %y)
+  ret i16 %res
+}
+
+define i32 @fshl_i32_shl_with_range(i32 %x, i32 range(i32 0, 32) %y) {
+; CHECK-LABEL: @fshl_i32_shl_with_range(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RES:%.*]] = shl i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %res = call i32 @llvm.fshl.i32(i32 %x, i32 0, i32 %y)
+  ret i32 %res
+}
+
+define i16 @fshl_i16_shl_with_range_ignored(i16 %x, i16 range(i16 0, 17) %y) {
+; CHECK-LABEL: @fshl_i16_shl_with_range_ignored(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i16 [[Y:%.*]], 15
+; CHECK-NEXT:    [[RES:%.*]] = shl i16 [[X:%.*]], [[TMP0]]
+; CHECK-NEXT:    ret i16 [[RES]]
+;
+entry:
+  %res = call i16 @llvm.fshl.i16(i16 %x, i16 0, i16 %y)
+  ret i16 %res
+}
+
+define i32 @fshl_i32_shl_with_range_ignored(i32 %x, i32 range(i32 0, 33) %y) {
+; CHECK-LABEL: @fshl_i32_shl_with_range_ignored(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[Y:%.*]], 31
+; CHECK-NEXT:    [[RES:%.*]] = shl i32 [[X:%.*]], [[TMP0]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %res = call i32 @llvm.fshl.i32(i32 %x, i32 0, i32 %y)
+  ret i32 %res
+}

if (auto Range = II->getRange(); Range && Range->getLower().sge(0) &&
Range->getUpper().sle(BitWidth)) {
return BinaryOperator::CreateShl(Op0, Op2);
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You don't need this code, the and below will be optimized away if it's unnecessary.

Range->getUpper().sle(BitWidth)) {
return BinaryOperator::CreateShl(Op0, Op2);
}
Value *And = Builder.CreateAnd(Op2, ConstantInt::get(Ty, BitWidth - 1));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This transform needs to be limited to power of two bitwidths, otherwise this is incorrect.

Copy link
Contributor

@goldsteinn goldsteinn Jan 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could do (urem, Op2, BitWidth). Thats probably not worth it then.

@dtcxzyw dtcxzyw changed the title [InstCombine] Convert fshl(x, 0, y) to shl(x, y) or shl(X, and(Y, BitWidth - 1)) [InstCombine] Convert fshl(x, 0, y) to shl(x, y) or shl(X, urem(Y, BitWidth)) Jan 10, 2025
@dtcxzyw dtcxzyw changed the title [InstCombine] Convert fshl(x, 0, y) to shl(x, y) or shl(X, urem(Y, BitWidth)) [InstCombine] Convert fshl(x, 0, y) to shl(x, and(y, BitWidth - 1)) when BitWidth is pow2 Jan 10, 2025
@@ -2229,6 +2229,19 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
return BitOp;
}

// fshal(X, 0, Y) --> shl(X, and(Y, BitWidth - 1))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// fshal(X, 0, Y) --> shl(X, and(Y, BitWidth - 1))
// fshl(X, 0, Y) --> shl(X, and(Y, BitWidth - 1))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

[InstCombine] fshl(x, 0, y) with in-range y not converted to shl x, y
5 participants