Skip to content

Commit 08cf536

Browse files
committed
[X86] Add an additional ReadAfterLoad to EVEX FMA instructions.
These instructions have 3 sources. 2 of them are registers when the load is folded. So we need 2 ReadAfterLoad SchedReads.
1 parent 0c9f6ad commit 08cf536

File tree

1 file changed

+20
-10
lines changed

1 file changed

+20
-10
lines changed

llvm/lib/Target/X86/X86InstrAVX512.td

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6818,7 +6818,8 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
68186818
OpcodeStr, "$src3, $src2", "$src2, $src3",
68196819
(_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
68206820
(_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6821-
EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
6821+
EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold,
6822+
sched.ReadAfterFold]>;
68226823

68236824
defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
68246825
(ins _.RC:$src2, _.ScalarMemOp:$src3),
@@ -6828,7 +6829,8 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
68286829
_.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
68296830
(MaskOpNode _.RC:$src2,
68306831
_.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
6831-
EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6832+
EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
6833+
sched.ReadAfterFold]>;
68326834
}
68336835
}
68346836

@@ -6911,7 +6913,8 @@ multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
69116913
OpcodeStr, "$src3, $src2", "$src2, $src3",
69126914
(_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
69136915
(_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6914-
EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
6916+
EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold,
6917+
sched.ReadAfterFold]>;
69156918

69166919
defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
69176920
(ins _.RC:$src2, _.ScalarMemOp:$src3),
@@ -6923,7 +6926,8 @@ multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
69236926
(_.VT (MaskOpNode _.RC:$src2,
69246927
(_.VT (_.BroadcastLdFrag addr:$src3)),
69256928
_.RC:$src1)), 1, 0>, EVEX_4V, EVEX_B,
6926-
Sched<[sched.Folded, sched.ReadAfterFold]>;
6929+
Sched<[sched.Folded, sched.ReadAfterFold,
6930+
sched.ReadAfterFold]>;
69276931
}
69286932
}
69296933

@@ -7007,7 +7011,8 @@ multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
70077011
OpcodeStr, "$src3, $src2", "$src2, $src3",
70087012
(_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
70097013
(_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
7010-
EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7014+
EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold,
7015+
sched.ReadAfterFold]>;
70117016

70127017
// Pattern is 312 order so that the load is in a different place from the
70137018
// 213 and 231 patterns this helps tablegen's duplicate pattern detection.
@@ -7019,7 +7024,8 @@ multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
70197024
_.RC:$src1, _.RC:$src2)),
70207025
(_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
70217026
_.RC:$src1, _.RC:$src2)), 1, 0>,
7022-
EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
7027+
EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
7028+
sched.ReadAfterFold]>;
70237029
}
70247030
}
70257031

@@ -7097,7 +7103,8 @@ let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
70977103
defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
70987104
(ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
70997105
"$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
7100-
EVEX_4V, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
7106+
EVEX_4V, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold,
7107+
SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
71017108

71027109
let Uses = [MXCSR] in
71037110
defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -7115,7 +7122,8 @@ let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
71157122
(ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
71167123
!strconcat(OpcodeStr,
71177124
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
7118-
[RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, EVEX_4V, SIMD_EXC;
7125+
[RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold,
7126+
SchedWriteFMA.Scl.ReadAfterFold]>, EVEX_4V, SIMD_EXC;
71197127

71207128
let Uses = [MXCSR] in
71217129
def rb : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
@@ -7433,7 +7441,8 @@ multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
74337441
(ins _.RC:$src2, _.MemOp:$src3),
74347442
OpcodeStr, "$src3, $src2", "$src2, $src3",
74357443
(_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
7436-
T8PD, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7444+
T8PD, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold,
7445+
sched.ReadAfterFold]>;
74377446

74387447
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
74397448
(ins _.RC:$src2, _.ScalarMemOp:$src3),
@@ -7442,7 +7451,8 @@ multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
74427451
(OpNode _.RC:$src2,
74437452
(_.VT (_.BroadcastLdFrag addr:$src3)),
74447453
_.RC:$src1)>,
7445-
T8PD, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
7454+
T8PD, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
7455+
sched.ReadAfterFold]>;
74467456
}
74477457
}
74487458
} // Constraints = "$src1 = $dst"

0 commit comments

Comments
 (0)