Skip to content
This repository was archived by the owner on Jan 26, 2024. It is now read-only.

Commit 5dbe5d2

Browse files
committed
[DAG] Commute shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
We only merge shuffles if the inner (LHS) shuffle is a non-splat, so commute these shuffles to improve merging of multiple shuffles.
1 parent ffe72f9 commit 5dbe5d2

File tree

2 files changed

+33
-30
lines changed

2 files changed

+33
-30
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -20799,26 +20799,35 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
2079920799
}
2080020800
}
2080120801

20802-
// Canonicalize shuffles according to rules:
20803-
// shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
20804-
// shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
20805-
// shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
20806-
if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
20807-
N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
20808-
TLI.isTypeLegal(VT)) {
20809-
// The incoming shuffle must be of the same type as the result of the
20810-
// current shuffle.
20811-
assert(N1->getOperand(0).getValueType() == VT &&
20812-
"Shuffle types don't match");
20813-
20814-
SDValue SV0 = N1->getOperand(0);
20815-
SDValue SV1 = N1->getOperand(1);
20816-
bool HasSameOp0 = N0 == SV0;
20817-
bool IsSV1Undef = SV1.isUndef();
20818-
if (HasSameOp0 || IsSV1Undef || N0 == SV1)
20819-
// Commute the operands of this shuffle so that next rule
20820-
// will trigger.
20802+
if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
20803+
// Canonicalize shuffles according to rules:
20804+
// shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
20805+
// shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
20806+
// shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
20807+
if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
20808+
N0.getOpcode() != ISD::VECTOR_SHUFFLE) {
20809+
// The incoming shuffle must be of the same type as the result of the
20810+
// current shuffle.
20811+
assert(N1->getOperand(0).getValueType() == VT &&
20812+
"Shuffle types don't match");
20813+
20814+
SDValue SV0 = N1->getOperand(0);
20815+
SDValue SV1 = N1->getOperand(1);
20816+
bool HasSameOp0 = N0 == SV0;
20817+
bool IsSV1Undef = SV1.isUndef();
20818+
if (HasSameOp0 || IsSV1Undef || N0 == SV1)
20819+
// Commute the operands of this shuffle so merging below will trigger.
20820+
return DAG.getCommutedVectorShuffle(*SVN);
20821+
}
20822+
20823+
// Canonicalize splat shuffles to the RHS to improve merging below.
20824+
// shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
20825+
if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
20826+
N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
20827+
cast<ShuffleVectorSDNode>(N0)->isSplat() &&
20828+
!cast<ShuffleVectorSDNode>(N1)->isSplat()) {
2082120829
return DAG.getCommutedVectorShuffle(*SVN);
20830+
}
2082220831
}
2082320832

2082420833
// Compute the combined shuffle mask for a shuffle with SV0 as the first

llvm/test/CodeGen/X86/haddsub-undef.ll

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -583,17 +583,11 @@ define <4 x float> @add_ps_016(<4 x float> %0, <4 x float> %1) {
583583
; SSE-NEXT: movaps %xmm1, %xmm0
584584
; SSE-NEXT: retq
585585
;
586-
; AVX-SLOW-LABEL: add_ps_016:
587-
; AVX-SLOW: # %bb.0:
588-
; AVX-SLOW-NEXT: vhaddps %xmm0, %xmm1, %xmm0
589-
; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,3,3]
590-
; AVX-SLOW-NEXT: retq
591-
;
592-
; AVX-FAST-LABEL: add_ps_016:
593-
; AVX-FAST: # %bb.0:
594-
; AVX-FAST-NEXT: vhaddps %xmm0, %xmm1, %xmm0
595-
; AVX-FAST-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,3,1]
596-
; AVX-FAST-NEXT: retq
586+
; AVX-LABEL: add_ps_016:
587+
; AVX: # %bb.0:
588+
; AVX-NEXT: vhaddps %xmm0, %xmm1, %xmm0
589+
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,3,3]
590+
; AVX-NEXT: retq
597591
%3 = shufflevector <4 x float> %1, <4 x float> %0, <2 x i32> <i32 0, i32 6>
598592
%4 = shufflevector <4 x float> %1, <4 x float> %0, <2 x i32> <i32 1, i32 7>
599593
%5 = fadd <2 x float> %3, %4

0 commit comments

Comments
 (0)