Skip to content

Commit ab9a81f

Browse files
committed
[SLP]Try to emit canonical shuffle with undef operand.
In the canonical form of the shuffle the poison/undef operand is the second operand, the patch tries to emit canonical form for partial vectorization of the buildvector sequence. Also, this patch starts emitting freeze instruction for shuffles with undef indices if the second shuffle operan is undef, not poison. It is an initial step to D93818, where undef mask element are treated as returning poison value. Differential Revision: https://reviews.llvm.org/D134377
1 parent 75b292c commit ab9a81f

File tree

5 files changed

+38
-20
lines changed

5 files changed

+38
-20
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8181,21 +8181,37 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
81818181
SmallVector<int> InsertMask(NumElts, UndefMaskElem);
81828182
for (unsigned I = 0; I < NumElts; I++) {
81838183
if (Mask[I] != UndefMaskElem)
8184-
InsertMask[Offset + I] = NumElts + I;
8185-
}
8186-
if (Offset != 0 ||
8187-
!isUndefVector(FirstInsert->getOperand(0), InsertMask)) {
8188-
for (unsigned I = 0; I < NumElts; I++) {
8189-
if (InsertMask[I] == UndefMaskElem)
8190-
InsertMask[I] = I;
8191-
}
8192-
8193-
V = Builder.CreateShuffleVector(
8194-
FirstInsert->getOperand(0), V, InsertMask,
8195-
cast<Instruction>(E->Scalars.back())->getName());
8196-
if (auto *I = dyn_cast<Instruction>(V)) {
8197-
GatherShuffleSeq.insert(I);
8198-
CSEBlocks.insert(I->getParent());
8184+
InsertMask[Offset + I] = I;
8185+
}
8186+
bool IsFirstUndef = isUndefVector(FirstInsert->getOperand(0), InsertMask);
8187+
if ((!IsIdentity || Offset != 0 || !IsFirstUndef) &&
8188+
NumElts != NumScalars) {
8189+
if (IsFirstUndef) {
8190+
if (!ShuffleVectorInst::isIdentityMask(InsertMask)) {
8191+
V = Builder.CreateShuffleVector(
8192+
V, InsertMask, cast<Instruction>(E->Scalars.back())->getName());
8193+
if (auto *I = dyn_cast<Instruction>(V)) {
8194+
GatherShuffleSeq.insert(I);
8195+
CSEBlocks.insert(I->getParent());
8196+
}
8197+
// Create freeze for undef values.
8198+
if (!isa<PoisonValue>(FirstInsert->getOperand(0)))
8199+
V = Builder.CreateFreeze(V);
8200+
}
8201+
} else {
8202+
for (unsigned I = 0; I < NumElts; I++) {
8203+
if (InsertMask[I] == UndefMaskElem)
8204+
InsertMask[I] = I;
8205+
else
8206+
InsertMask[I] += NumElts;
8207+
}
8208+
V = Builder.CreateShuffleVector(
8209+
FirstInsert->getOperand(0), V, InsertMask,
8210+
cast<Instruction>(E->Scalars.back())->getName());
8211+
if (auto *I = dyn_cast<Instruction>(V)) {
8212+
GatherShuffleSeq.insert(I);
8213+
CSEBlocks.insert(I->getParent());
8214+
}
81998215
}
82008216
}
82018217

llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ define void @_Z10fooConvertPDv4_xS0_S0_PKS_() {
1111
; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x half> [[TMP3]] to <2 x float>
1212
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <2 x i32>
1313
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
14-
; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> poison, <8 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
14+
; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef>
1515
; CHECK-NEXT: ret void
1616
;
1717
entry:

llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ define void @_Z10fooConvertPDv4_xS0_S0_PKS_() {
1111
; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x half> [[TMP3]] to <2 x float>
1212
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <2 x i32>
1313
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
14-
; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> undef, <8 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
14+
; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef>
15+
; CHECK-NEXT: [[TMP7:%.*]] = freeze <8 x i32> [[VECINS_I_5_I1]]
1516
; CHECK-NEXT: ret void
1617
;
1718
entry:

llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x
305305
; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP11]], <2 x float> [[TMP13]], <2 x float> [[TMP15]]
306306
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
307307
; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
308-
; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> poison, <4 x float> [[TMP18]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
308+
; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> [[TMP18]], <4 x float> poison, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
309309
; CHECK-NEXT: ret <4 x float> [[RD1]]
310310
;
311311
%c0 = extractelement <4 x i32> %c, i32 0

llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -339,8 +339,9 @@ define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x
339339
; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP11]], <2 x float> [[TMP13]], <2 x float> [[TMP15]]
340340
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
341341
; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
342-
; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> undef, <4 x float> [[TMP18]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
343-
; CHECK-NEXT: ret <4 x float> [[RD1]]
342+
; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> [[TMP18]], <4 x float> poison, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
343+
; CHECK-NEXT: [[TMP19:%.*]] = freeze <4 x float> [[RD1]]
344+
; CHECK-NEXT: ret <4 x float> [[TMP19]]
344345
;
345346
%c0 = extractelement <4 x i32> %c, i32 0
346347
%c1 = extractelement <4 x i32> %c, i32 1

0 commit comments

Comments
 (0)