Skip to content

Commit ee35e34

Browse files
authored
[ConstantFolding] Add folding for [de]interleave2, insert and extract (#141301)
The change adds folding for 4 vector intrinsics: `interleave2`, `deinterleave2`, `vector_extract` and `vector_insert`. For the last 2 intrinsics the change does not use `ShuffleVector` fold mechanism as it's much simpler to construct result vector explicitly.
1 parent 0e45731 commit ee35e34

File tree

2 files changed

+165
-0
lines changed

2 files changed

+165
-0
lines changed

llvm/lib/Analysis/ConstantFolding.cpp

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1635,6 +1635,10 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
16351635
case Intrinsic::vector_reduce_smax:
16361636
case Intrinsic::vector_reduce_umin:
16371637
case Intrinsic::vector_reduce_umax:
1638+
case Intrinsic::vector_extract:
1639+
case Intrinsic::vector_insert:
1640+
case Intrinsic::vector_interleave2:
1641+
case Intrinsic::vector_deinterleave2:
16381642
// Target intrinsics
16391643
case Intrinsic::amdgcn_perm:
16401644
case Intrinsic::amdgcn_wave_reduce_umin:
@@ -3758,6 +3762,72 @@ static Constant *ConstantFoldFixedVectorCall(
37583762
}
37593763
return nullptr;
37603764
}
3765+
case Intrinsic::vector_extract: {
3766+
auto *Idx = dyn_cast<ConstantInt>(Operands[1]);
3767+
Constant *Vec = Operands[0];
3768+
if (!Idx || !isa<FixedVectorType>(Vec->getType()))
3769+
return nullptr;
3770+
3771+
unsigned NumElements = FVTy->getNumElements();
3772+
unsigned VecNumElements =
3773+
cast<FixedVectorType>(Vec->getType())->getNumElements();
3774+
unsigned StartingIndex = Idx->getZExtValue();
3775+
3776+
// Extracting entire vector is nop
3777+
if (NumElements == VecNumElements && StartingIndex == 0)
3778+
return Vec;
3779+
3780+
for (unsigned I = StartingIndex, E = StartingIndex + NumElements; I < E;
3781+
++I) {
3782+
Constant *Elt = Vec->getAggregateElement(I);
3783+
if (!Elt)
3784+
return nullptr;
3785+
Result[I - StartingIndex] = Elt;
3786+
}
3787+
3788+
return ConstantVector::get(Result);
3789+
}
3790+
case Intrinsic::vector_insert: {
3791+
Constant *Vec = Operands[0];
3792+
Constant *SubVec = Operands[1];
3793+
auto *Idx = dyn_cast<ConstantInt>(Operands[2]);
3794+
if (!Idx || !isa<FixedVectorType>(Vec->getType()))
3795+
return nullptr;
3796+
3797+
unsigned SubVecNumElements =
3798+
cast<FixedVectorType>(SubVec->getType())->getNumElements();
3799+
unsigned VecNumElements =
3800+
cast<FixedVectorType>(Vec->getType())->getNumElements();
3801+
unsigned IdxN = Idx->getZExtValue();
3802+
// Replacing entire vector with a subvec is nop
3803+
if (SubVecNumElements == VecNumElements && IdxN == 0)
3804+
return SubVec;
3805+
3806+
for (unsigned I = 0; I < VecNumElements; ++I) {
3807+
Constant *Elt;
3808+
if (I < IdxN + SubVecNumElements)
3809+
Elt = SubVec->getAggregateElement(I - IdxN);
3810+
else
3811+
Elt = Vec->getAggregateElement(I);
3812+
if (!Elt)
3813+
return nullptr;
3814+
Result[I] = Elt;
3815+
}
3816+
return ConstantVector::get(Result);
3817+
}
3818+
case Intrinsic::vector_interleave2: {
3819+
unsigned NumElements =
3820+
cast<FixedVectorType>(Operands[0]->getType())->getNumElements();
3821+
for (unsigned I = 0; I < NumElements; ++I) {
3822+
Constant *Elt0 = Operands[0]->getAggregateElement(I);
3823+
Constant *Elt1 = Operands[1]->getAggregateElement(I);
3824+
if (!Elt0 || !Elt1)
3825+
return nullptr;
3826+
Result[2 * I] = Elt0;
3827+
Result[2 * I + 1] = Elt1;
3828+
}
3829+
return ConstantVector::get(Result);
3830+
}
37613831
default:
37623832
break;
37633833
}
@@ -3919,6 +3989,33 @@ ConstantFoldStructCall(StringRef Name, Intrinsic::ID IntrinsicID,
39193989
return nullptr;
39203990
return ConstantStruct::get(StTy, SinResult, CosResult);
39213991
}
3992+
case Intrinsic::vector_deinterleave2: {
3993+
auto *Vec = dyn_cast<Constant>(Operands[0]);
3994+
if (!Vec)
3995+
return nullptr;
3996+
3997+
auto *VecTy = cast<VectorType>(Vec->getType());
3998+
unsigned NumElements = VecTy->getElementCount().getKnownMinValue() / 2;
3999+
if (isa<ConstantAggregateZero>(Vec)) {
4000+
auto *HalfVecTy = VectorType::getHalfElementsVectorType(VecTy);
4001+
return ConstantStruct::get(StTy, ConstantAggregateZero::get(HalfVecTy),
4002+
ConstantAggregateZero::get(HalfVecTy));
4003+
}
4004+
if (isa<FixedVectorType>(Vec->getType())) {
4005+
SmallVector<Constant *, 4> Res0(NumElements), Res1(NumElements);
4006+
for (unsigned I = 0; I < NumElements; ++I) {
4007+
Constant *Elt0 = Vec->getAggregateElement(2 * I);
4008+
Constant *Elt1 = Vec->getAggregateElement(2 * I + 1);
4009+
if (!Elt0 || !Elt1)
4010+
return nullptr;
4011+
Res0[I] = Elt0;
4012+
Res1[I] = Elt1;
4013+
}
4014+
return ConstantStruct::get(StTy, ConstantVector::get(Res0),
4015+
ConstantVector::get(Res1));
4016+
}
4017+
return nullptr;
4018+
}
39224019
default:
39234020
// TODO: Constant folding of vector intrinsics that fall through here does
39244021
// not work (e.g. overflow intrinsics)
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s -passes=instsimplify,verify -S | FileCheck %s
3+
4+
define <3 x i32> @fold_vector_extract() {
5+
; CHECK-LABEL: define <3 x i32> @fold_vector_extract() {
6+
; CHECK-NEXT: ret <3 x i32> <i32 3, i32 4, i32 5>
7+
;
8+
%1 = call <3 x i32> @llvm.vector.extract.v3i32.v8i32(<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, i64 3)
9+
ret <3 x i32> %1
10+
}
11+
12+
@a = external global i16, align 1
13+
14+
define <3 x i32> @fold_vector_extract_constexpr() {
15+
; CHECK-LABEL: define <3 x i32> @fold_vector_extract_constexpr() {
16+
; CHECK-NEXT: ret <3 x i32> <i32 ptrtoint (ptr @a to i32), i32 1, i32 2>
17+
;
18+
%1 = call <3 x i32> @llvm.vector.extract.v3i32.v8i32(<8 x i32> <i32 ptrtoint (ptr @a to i32), i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, i64 0)
19+
ret <3 x i32> %1
20+
}
21+
22+
define <8 x i32> @fold_vector_extract_nop() {
23+
; CHECK-LABEL: define <8 x i32> @fold_vector_extract_nop() {
24+
; CHECK-NEXT: ret <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
25+
;
26+
%1 = call <8 x i32> @llvm.vector.extract.v3i32.v8i32(<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, i64 0)
27+
ret <8 x i32> %1
28+
}
29+
30+
define <8 x i32> @fold_vector_insert() {
31+
; CHECK-LABEL: define <8 x i32> @fold_vector_insert() {
32+
; CHECK-NEXT: ret <8 x i32> <i32 9, i32 10, i32 11, i32 12, i32 5, i32 6, i32 7, i32 8>
33+
;
34+
%1 = call <8 x i32> @llvm.vector.insert.v8i32(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>, <4 x i32> <i32 9, i32 10, i32 11, i32 12>, i64 0)
35+
ret <8 x i32> %1
36+
}
37+
38+
define <8 x i32> @fold_vector_insert_nop() {
39+
; CHECK-LABEL: define <8 x i32> @fold_vector_insert_nop() {
40+
; CHECK-NEXT: ret <8 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
41+
;
42+
%1 = call <8 x i32> @llvm.vector.insert.v8i32(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>, <8 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>, i64 0)
43+
ret <8 x i32> %1
44+
}
45+
46+
define <8 x i32> @fold_vector_interleave2() {
47+
; CHECK-LABEL: define <8 x i32> @fold_vector_interleave2() {
48+
; CHECK-NEXT: ret <8 x i32> <i32 1, i32 5, i32 2, i32 6, i32 3, i32 7, i32 4, i32 8>
49+
;
50+
%1 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8>)
51+
ret <8 x i32> %1
52+
}
53+
54+
define {<4 x i32>, <4 x i32>} @fold_vector_deinterleave2() {
55+
; CHECK-LABEL: define { <4 x i32>, <4 x i32> } @fold_vector_deinterleave2() {
56+
; CHECK-NEXT: ret { <4 x i32>, <4 x i32> } { <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8> }
57+
;
58+
%1 = call {<4 x i32>, <4 x i32>} @llvm.vector.deinterleave2.v4i32.v8i32(<8 x i32> <i32 1, i32 5, i32 2, i32 6, i32 3, i32 7, i32 4, i32 8>)
59+
ret {<4 x i32>, <4 x i32>} %1
60+
}
61+
62+
define {<vscale x 4 x i32>, <vscale x 4 x i32>} @fold_scalable_vector_deinterleave2() {
63+
; CHECK-LABEL: define { <vscale x 4 x i32>, <vscale x 4 x i32> } @fold_scalable_vector_deinterleave2() {
64+
; CHECK-NEXT: ret { <vscale x 4 x i32>, <vscale x 4 x i32> } zeroinitializer
65+
;
66+
%1 = call {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.vector.deinterleave2.v4i32.v8i32(<vscale x 8 x i32> zeroinitializer)
67+
ret {<vscale x 4 x i32>, <vscale x 4 x i32>} %1
68+
}

0 commit comments

Comments
 (0)