Skip to content

Commit 329dfa1

Browse files
committed
[X86] fixup-blend.ll - add commuted load test coverage
1 parent 01f9dff commit 329dfa1

File tree

1 file changed

+187
-21
lines changed

1 file changed

+187
-21
lines changed

llvm/test/CodeGen/X86/fixup-blend.ll

Lines changed: 187 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -59,21 +59,45 @@ define <2 x double> @test_v2f64_blend_movsd_optsize(<2 x double> %a0, <2 x doubl
5959
ret <2 x double> %r
6060
}
6161

62-
define <2 x double> @test_v2f64_blend_movsd_load(<2 x double> %a0, ptr %p1, <2 x double> %a2) {
62+
define <2 x double> @test_v2f64_blend_movsd_load(ptr %p0, <2 x double> %a1, <2 x double> %a2) {
6363
; SSE2-LABEL: test_v2f64_blend_movsd_load:
6464
; SSE2: # %bb.0:
65-
; SSE2-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
65+
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],mem[1]
6666
; SSE2-NEXT: addpd %xmm1, %xmm0
6767
; SSE2-NEXT: retq
6868
;
6969
; SSE4-LABEL: test_v2f64_blend_movsd_load:
7070
; SSE4: # %bb.0:
71-
; SSE4-NEXT: blendpd {{.*#+}} xmm0 = mem[0],xmm0[1]
71+
; SSE4-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1]
7272
; SSE4-NEXT: addpd %xmm1, %xmm0
7373
; SSE4-NEXT: retq
7474
;
7575
; AVX-LABEL: test_v2f64_blend_movsd_load:
7676
; AVX: # %bb.0:
77+
; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1]
78+
; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
79+
; AVX-NEXT: retq
80+
%a0 = load <2 x double>, ptr %p0
81+
%s = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 2, i32 1>
82+
%r = fadd <2 x double> %s, %a2
83+
ret <2 x double> %r
84+
}
85+
86+
define <2 x double> @test_v2f64_blend_movsd_load_commute(<2 x double> %a0, ptr %p1, <2 x double> %a2) {
87+
; SSE2-LABEL: test_v2f64_blend_movsd_load_commute:
88+
; SSE2: # %bb.0:
89+
; SSE2-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
90+
; SSE2-NEXT: addpd %xmm1, %xmm0
91+
; SSE2-NEXT: retq
92+
;
93+
; SSE4-LABEL: test_v2f64_blend_movsd_load_commute:
94+
; SSE4: # %bb.0:
95+
; SSE4-NEXT: blendpd {{.*#+}} xmm0 = mem[0],xmm0[1]
96+
; SSE4-NEXT: addpd %xmm1, %xmm0
97+
; SSE4-NEXT: retq
98+
;
99+
; AVX-LABEL: test_v2f64_blend_movsd_load_commute:
100+
; AVX: # %bb.0:
77101
; AVX-NEXT: vblendpd {{.*#+}} xmm0 = mem[0],xmm0[1]
78102
; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
79103
; AVX-NEXT: retq
@@ -178,27 +202,57 @@ define <2 x i64> @test_v2i64_blend_movsd_optsize(<2 x i64> %a0, <2 x i64> %a1, <
178202
ret <2 x i64> %r
179203
}
180204

181-
define <2 x i64> @test_v2i64_blend_movsd_load(<2 x i64> %a0, ptr %p1, <2 x i64> %a2) {
205+
define <2 x i64> @test_v2i64_blend_movsd_load(ptr %p0, <2 x i64> %a1, <2 x i64> %a2) {
182206
; SSE2-LABEL: test_v2i64_blend_movsd_load:
183207
; SSE2: # %bb.0:
184-
; SSE2-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
208+
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],mem[1]
185209
; SSE2-NEXT: paddq %xmm1, %xmm0
186210
; SSE2-NEXT: retq
187211
;
188212
; SSE4-LABEL: test_v2i64_blend_movsd_load:
189213
; SSE4: # %bb.0:
190-
; SSE4-NEXT: pblendw {{.*#+}} xmm0 = mem[0,1,2,3],xmm0[4,5,6,7]
214+
; SSE4-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],mem[4,5,6,7]
191215
; SSE4-NEXT: paddq %xmm1, %xmm0
192216
; SSE4-NEXT: retq
193217
;
194218
; AVX1-LABEL: test_v2i64_blend_movsd_load:
195219
; AVX1: # %bb.0:
196-
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = mem[0,1,2,3],xmm0[4,5,6,7]
220+
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],mem[4,5,6,7]
197221
; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
198222
; AVX1-NEXT: retq
199223
;
200224
; AVX2-LABEL: test_v2i64_blend_movsd_load:
201225
; AVX2: # %bb.0:
226+
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],mem[2,3]
227+
; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
228+
; AVX2-NEXT: retq
229+
%a0 = load <2 x i64>, ptr %p0
230+
%s = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 2, i32 1>
231+
%r = add <2 x i64> %s, %a2
232+
ret <2 x i64> %r
233+
}
234+
235+
define <2 x i64> @test_v2i64_blend_movsd_load_commute(<2 x i64> %a0, ptr %p1, <2 x i64> %a2) {
236+
; SSE2-LABEL: test_v2i64_blend_movsd_load_commute:
237+
; SSE2: # %bb.0:
238+
; SSE2-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
239+
; SSE2-NEXT: paddq %xmm1, %xmm0
240+
; SSE2-NEXT: retq
241+
;
242+
; SSE4-LABEL: test_v2i64_blend_movsd_load_commute:
243+
; SSE4: # %bb.0:
244+
; SSE4-NEXT: pblendw {{.*#+}} xmm0 = mem[0,1,2,3],xmm0[4,5,6,7]
245+
; SSE4-NEXT: paddq %xmm1, %xmm0
246+
; SSE4-NEXT: retq
247+
;
248+
; AVX1-LABEL: test_v2i64_blend_movsd_load_commute:
249+
; AVX1: # %bb.0:
250+
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = mem[0,1,2,3],xmm0[4,5,6,7]
251+
; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
252+
; AVX1-NEXT: retq
253+
;
254+
; AVX2-LABEL: test_v2i64_blend_movsd_load_commute:
255+
; AVX2: # %bb.0:
202256
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
203257
; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
204258
; AVX2-NEXT: retq
@@ -338,21 +392,47 @@ define <4 x float> @test_v4f32_blend_movsd_optsize(<4 x float> %a0, <4 x float>
338392
ret <4 x float> %r
339393
}
340394

341-
define <4 x float> @test_v4f32_blend_movss_load(<4 x float> %a0, ptr %p1, <4 x float> %a2) {
395+
define <4 x float> @test_v4f32_blend_movss_load(ptr %p0, <4 x float> %a1, <4 x float> %a2) {
342396
; SSE2-LABEL: test_v4f32_blend_movss_load:
343397
; SSE2: # %bb.0:
344398
; SSE2-NEXT: movaps (%rdi), %xmm2
399+
; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
400+
; SSE2-NEXT: addps %xmm1, %xmm2
401+
; SSE2-NEXT: movaps %xmm2, %xmm0
402+
; SSE2-NEXT: retq
403+
;
404+
; SSE4-LABEL: test_v4f32_blend_movss_load:
405+
; SSE4: # %bb.0:
406+
; SSE4-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],mem[1,2,3]
407+
; SSE4-NEXT: addps %xmm1, %xmm0
408+
; SSE4-NEXT: retq
409+
;
410+
; AVX-LABEL: test_v4f32_blend_movss_load:
411+
; AVX: # %bb.0:
412+
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1,2,3]
413+
; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
414+
; AVX-NEXT: retq
415+
%a0 = load <4 x float>, ptr %p0
416+
%s = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
417+
%r = fadd <4 x float> %s, %a2
418+
ret <4 x float> %r
419+
}
420+
421+
define <4 x float> @test_v4f32_blend_movss_load_commute(<4 x float> %a0, ptr %p1, <4 x float> %a2) {
422+
; SSE2-LABEL: test_v4f32_blend_movss_load_commute:
423+
; SSE2: # %bb.0:
424+
; SSE2-NEXT: movaps (%rdi), %xmm2
345425
; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
346426
; SSE2-NEXT: addps %xmm1, %xmm0
347427
; SSE2-NEXT: retq
348428
;
349-
; SSE4-LABEL: test_v4f32_blend_movss_load:
429+
; SSE4-LABEL: test_v4f32_blend_movss_load_commute:
350430
; SSE4: # %bb.0:
351431
; SSE4-NEXT: blendps {{.*#+}} xmm0 = mem[0],xmm0[1,2,3]
352432
; SSE4-NEXT: addps %xmm1, %xmm0
353433
; SSE4-NEXT: retq
354434
;
355-
; AVX-LABEL: test_v4f32_blend_movss_load:
435+
; AVX-LABEL: test_v4f32_blend_movss_load_commute:
356436
; AVX: # %bb.0:
357437
; AVX-NEXT: vblendps {{.*#+}} xmm0 = mem[0],xmm0[1,2,3]
358438
; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
@@ -363,21 +443,45 @@ define <4 x float> @test_v4f32_blend_movss_load(<4 x float> %a0, ptr %p1, <4 x f
363443
ret <4 x float> %r
364444
}
365445

366-
define <4 x float> @test_v4f32_blend_movsd_load(<4 x float> %a0, ptr %p1, <4 x float> %a2) {
446+
define <4 x float> @test_v4f32_blend_movsd_load(ptr %p0, <4 x float> %a1, <4 x float> %a2) {
367447
; SSE2-LABEL: test_v4f32_blend_movsd_load:
368448
; SSE2: # %bb.0:
369-
; SSE2-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
449+
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],mem[1]
370450
; SSE2-NEXT: addps %xmm1, %xmm0
371451
; SSE2-NEXT: retq
372452
;
373453
; SSE4-LABEL: test_v4f32_blend_movsd_load:
374454
; SSE4: # %bb.0:
375-
; SSE4-NEXT: blendps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
455+
; SSE4-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],mem[2,3]
376456
; SSE4-NEXT: addps %xmm1, %xmm0
377457
; SSE4-NEXT: retq
378458
;
379459
; AVX-LABEL: test_v4f32_blend_movsd_load:
380460
; AVX: # %bb.0:
461+
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],mem[2,3]
462+
; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
463+
; AVX-NEXT: retq
464+
%a0 = load <4 x float>, ptr %p0
465+
%s = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
466+
%r = fadd <4 x float> %s, %a2
467+
ret <4 x float> %r
468+
}
469+
470+
define <4 x float> @test_v4f32_blend_movsd_load_commute(<4 x float> %a0, ptr %p1, <4 x float> %a2) {
471+
; SSE2-LABEL: test_v4f32_blend_movsd_load_commute:
472+
; SSE2: # %bb.0:
473+
; SSE2-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
474+
; SSE2-NEXT: addps %xmm1, %xmm0
475+
; SSE2-NEXT: retq
476+
;
477+
; SSE4-LABEL: test_v4f32_blend_movsd_load_commute:
478+
; SSE4: # %bb.0:
479+
; SSE4-NEXT: blendps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
480+
; SSE4-NEXT: addps %xmm1, %xmm0
481+
; SSE4-NEXT: retq
482+
;
483+
; AVX-LABEL: test_v4f32_blend_movsd_load_commute:
484+
; AVX: # %bb.0:
381485
; AVX-NEXT: vblendps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
382486
; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
383487
; AVX-NEXT: retq
@@ -580,27 +684,59 @@ define <4 x i32> @test_v4i32_blend_movsd_optsize(<4 x i32> %a0, <4 x i32> %a1, <
580684
ret <4 x i32> %r
581685
}
582686

583-
define <4 x i32> @test_v4i32_blend_movss_load(<4 x i32> %a0, ptr %p1, <4 x i32> %a2) {
687+
define <4 x i32> @test_v4i32_blend_movss_load(ptr %p0, <4 x i32> %a1, <4 x i32> %a2) {
584688
; SSE2-LABEL: test_v4i32_blend_movss_load:
585689
; SSE2: # %bb.0:
586690
; SSE2-NEXT: movaps (%rdi), %xmm2
691+
; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
692+
; SSE2-NEXT: paddd %xmm1, %xmm2
693+
; SSE2-NEXT: movdqa %xmm2, %xmm0
694+
; SSE2-NEXT: retq
695+
;
696+
; SSE4-LABEL: test_v4i32_blend_movss_load:
697+
; SSE4: # %bb.0:
698+
; SSE4-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3,4,5,6,7]
699+
; SSE4-NEXT: paddd %xmm1, %xmm0
700+
; SSE4-NEXT: retq
701+
;
702+
; AVX1-LABEL: test_v4i32_blend_movss_load:
703+
; AVX1: # %bb.0:
704+
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3,4,5,6,7]
705+
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
706+
; AVX1-NEXT: retq
707+
;
708+
; AVX2-LABEL: test_v4i32_blend_movss_load:
709+
; AVX2: # %bb.0:
710+
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],mem[1,2,3]
711+
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
712+
; AVX2-NEXT: retq
713+
%a0 = load <4 x i32>, ptr %p0
714+
%s = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
715+
%r = add <4 x i32> %s, %a2
716+
ret <4 x i32> %r
717+
}
718+
719+
define <4 x i32> @test_v4i32_blend_movss_load_commute(<4 x i32> %a0, ptr %p1, <4 x i32> %a2) {
720+
; SSE2-LABEL: test_v4i32_blend_movss_load_commute:
721+
; SSE2: # %bb.0:
722+
; SSE2-NEXT: movaps (%rdi), %xmm2
587723
; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
588724
; SSE2-NEXT: paddd %xmm1, %xmm0
589725
; SSE2-NEXT: retq
590726
;
591-
; SSE4-LABEL: test_v4i32_blend_movss_load:
727+
; SSE4-LABEL: test_v4i32_blend_movss_load_commute:
592728
; SSE4: # %bb.0:
593729
; SSE4-NEXT: pblendw {{.*#+}} xmm0 = mem[0,1],xmm0[2,3,4,5,6,7]
594730
; SSE4-NEXT: paddd %xmm1, %xmm0
595731
; SSE4-NEXT: retq
596732
;
597-
; AVX1-LABEL: test_v4i32_blend_movss_load:
733+
; AVX1-LABEL: test_v4i32_blend_movss_load_commute:
598734
; AVX1: # %bb.0:
599735
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = mem[0,1],xmm0[2,3,4,5,6,7]
600736
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
601737
; AVX1-NEXT: retq
602738
;
603-
; AVX2-LABEL: test_v4i32_blend_movss_load:
739+
; AVX2-LABEL: test_v4i32_blend_movss_load_commute:
604740
; AVX2: # %bb.0:
605741
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = mem[0],xmm0[1,2,3]
606742
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
@@ -611,27 +747,57 @@ define <4 x i32> @test_v4i32_blend_movss_load(<4 x i32> %a0, ptr %p1, <4 x i32>
611747
ret <4 x i32> %r
612748
}
613749

614-
define <4 x i32> @test_v4i32_blend_movsd_load(<4 x i32> %a0, ptr %p1, <4 x i32> %a2) {
750+
define <4 x i32> @test_v4i32_blend_movsd_load(ptr %p0, <4 x i32> %a1, <4 x i32> %a2) {
615751
; SSE2-LABEL: test_v4i32_blend_movsd_load:
616752
; SSE2: # %bb.0:
617-
; SSE2-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
753+
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],mem[1]
618754
; SSE2-NEXT: paddd %xmm1, %xmm0
619755
; SSE2-NEXT: retq
620756
;
621757
; SSE4-LABEL: test_v4i32_blend_movsd_load:
622758
; SSE4: # %bb.0:
623-
; SSE4-NEXT: pblendw {{.*#+}} xmm0 = mem[0,1,2,3],xmm0[4,5,6,7]
759+
; SSE4-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],mem[4,5,6,7]
624760
; SSE4-NEXT: paddd %xmm1, %xmm0
625761
; SSE4-NEXT: retq
626762
;
627763
; AVX1-LABEL: test_v4i32_blend_movsd_load:
628764
; AVX1: # %bb.0:
629-
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = mem[0,1,2,3],xmm0[4,5,6,7]
765+
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],mem[4,5,6,7]
630766
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
631767
; AVX1-NEXT: retq
632768
;
633769
; AVX2-LABEL: test_v4i32_blend_movsd_load:
634770
; AVX2: # %bb.0:
771+
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],mem[2,3]
772+
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
773+
; AVX2-NEXT: retq
774+
%a0 = load <4 x i32>, ptr %p0
775+
%s = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
776+
%r = add <4 x i32> %s, %a2
777+
ret <4 x i32> %r
778+
}
779+
780+
define <4 x i32> @test_v4i32_blend_movsd_load_commute(<4 x i32> %a0, ptr %p1, <4 x i32> %a2) {
781+
; SSE2-LABEL: test_v4i32_blend_movsd_load_commute:
782+
; SSE2: # %bb.0:
783+
; SSE2-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
784+
; SSE2-NEXT: paddd %xmm1, %xmm0
785+
; SSE2-NEXT: retq
786+
;
787+
; SSE4-LABEL: test_v4i32_blend_movsd_load_commute:
788+
; SSE4: # %bb.0:
789+
; SSE4-NEXT: pblendw {{.*#+}} xmm0 = mem[0,1,2,3],xmm0[4,5,6,7]
790+
; SSE4-NEXT: paddd %xmm1, %xmm0
791+
; SSE4-NEXT: retq
792+
;
793+
; AVX1-LABEL: test_v4i32_blend_movsd_load_commute:
794+
; AVX1: # %bb.0:
795+
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = mem[0,1,2,3],xmm0[4,5,6,7]
796+
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
797+
; AVX1-NEXT: retq
798+
;
799+
; AVX2-LABEL: test_v4i32_blend_movsd_load_commute:
800+
; AVX2: # %bb.0:
635801
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
636802
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
637803
; AVX2-NEXT: retq

0 commit comments

Comments
 (0)