Skip to content

Commit 02bfe2d

Browse files
committed
[RISCV] Adjust vector immediate store materialization cost
This change updates the costs to make constant pool loads match their actual cost, and adds the broadcast special case to avoid too many regressions. We really need more information about the constants being rematerialized, but this is an incremental improvement. Differential Revision: https://reviews.llvm.org/D134746
1 parent b9cba8c commit 02bfe2d

File tree

4 files changed

+46
-29
lines changed

4 files changed

+46
-29
lines changed

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -666,12 +666,17 @@ InstructionCost RISCVTTIImpl::getStoreImmCost(Type *Ty,
666666
// currently have here.
667667
return 0;
668668

669-
APInt PseudoAddr = APInt::getAllOnes(DL.getPointerSizeInBits());
670-
// Add a cost of address load + the cost of the vector load.
671-
return RISCVMatInt::getIntMatCost(PseudoAddr, DL.getPointerSizeInBits(),
672-
getST()->getFeatureBits()) +
673-
getMemoryOpCost(Instruction::Load, Ty, DL.getABITypeAlign(Ty),
674-
/*AddressSpace=*/0, CostKind);
669+
if (OpInfo.isUniform())
670+
// vmv.x.i, vmv.v.x, or vfmv.v.f
671+
// We ignore the cost of the scalar constant materialization to be consistent
672+
// with how we treat scalar constants themselves just above.
673+
return 1;
674+
675+
// Add a cost of address generation + the cost of the vector load. The
676+
// address is expected to be a PC relative offset to a constant pool entry
677+
// using auipc/addi.
678+
return 2 + getMemoryOpCost(Instruction::Load, Ty, DL.getABITypeAlign(Ty),
679+
/*AddressSpace=*/0, CostKind);
675680
}
676681

677682

llvm/test/Analysis/CostModel/RISCV/rvv-load-store.ll

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -297,17 +297,17 @@ define void @store_of_constant(ptr %p) {
297297
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr %p, align 16
298298
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> zeroinitializer, ptr %p, align 16
299299
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> zeroinitializer, ptr %p, align 32
300-
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr %p, align 16
301-
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <4 x i64> <i64 1, i64 1, i64 1, i64 1>, ptr %p, align 32
302-
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <4 x i32> <i32 4096, i32 4096, i32 4096, i32 4096>, ptr %p, align 16
303-
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <4 x i32> <i32 1, i32 1, i32 2, i32 1>, ptr %p, align 16
304-
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <4 x i32> <i32 2, i32 1, i32 1, i32 1>, ptr %p, align 16
305-
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, ptr %p, align 16
306-
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr %p, align 16
307-
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <4 x i32> <i32 -1, i32 -2, i32 -3, i32 -4>, ptr %p, align 16
308-
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <4 x i32> <i32 2, i32 4, i32 6, i32 8>, ptr %p, align 16
309-
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <4 x i32> <i32 -1, i32 0, i32 2, i32 1>, ptr %p, align 16
310-
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <4 x i32> <i32 256, i32 4096, i32 57, i32 1>, ptr %p, align 16
300+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr %p, align 16
301+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <4 x i64> <i64 1, i64 1, i64 1, i64 1>, ptr %p, align 32
302+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> <i32 4096, i32 4096, i32 4096, i32 4096>, ptr %p, align 16
303+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> <i32 1, i32 1, i32 2, i32 1>, ptr %p, align 16
304+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> <i32 2, i32 1, i32 1, i32 1>, ptr %p, align 16
305+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, ptr %p, align 16
306+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr %p, align 16
307+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> <i32 -1, i32 -2, i32 -3, i32 -4>, ptr %p, align 16
308+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> <i32 2, i32 4, i32 6, i32 8>, ptr %p, align 16
309+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> <i32 -1, i32 0, i32 2, i32 1>, ptr %p, align 16
310+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> <i32 256, i32 4096, i32 57, i32 1>, ptr %p, align 16
311311
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
312312
;
313313

llvm/test/Transforms/SLPVectorizer/RISCV/load-store.ll

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,13 @@ entry:
222222
define void @store_stepvector_i32(ptr %dest) {
223223
; CHECK-LABEL: @store_stepvector_i32(
224224
; CHECK-NEXT: entry:
225-
; CHECK-NEXT: store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, ptr [[DEST:%.*]], align 4
225+
; CHECK-NEXT: store i32 0, ptr [[DEST:%.*]], align 4
226+
; CHECK-NEXT: [[INC1:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 1
227+
; CHECK-NEXT: store i32 1, ptr [[INC1]], align 2
228+
; CHECK-NEXT: [[INC2:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 2
229+
; CHECK-NEXT: store i32 2, ptr [[INC2]], align 2
230+
; CHECK-NEXT: [[INC3:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 3
231+
; CHECK-NEXT: store i32 3, ptr [[INC3]], align 2
226232
; CHECK-NEXT: ret void
227233
;
228234
; DEFAULT-LABEL: @store_stepvector_i32(
@@ -250,7 +256,13 @@ entry:
250256
define void @store_arbitrary_constant_i32(ptr %dest) {
251257
; CHECK-LABEL: @store_arbitrary_constant_i32(
252258
; CHECK-NEXT: entry:
253-
; CHECK-NEXT: store <4 x i32> <i32 0, i32 -33, i32 44, i32 77>, ptr [[DEST:%.*]], align 4
259+
; CHECK-NEXT: store i32 0, ptr [[DEST:%.*]], align 4
260+
; CHECK-NEXT: [[INC1:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 1
261+
; CHECK-NEXT: store i32 -33, ptr [[INC1]], align 2
262+
; CHECK-NEXT: [[INC2:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 2
263+
; CHECK-NEXT: store i32 44, ptr [[INC2]], align 2
264+
; CHECK-NEXT: [[INC3:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 3
265+
; CHECK-NEXT: store i32 77, ptr [[INC3]], align 2
254266
; CHECK-NEXT: ret void
255267
;
256268
; DEFAULT-LABEL: @store_arbitrary_constant_i32(

llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,33 +14,33 @@ define void @foo(i64* nocapture writeonly %da) {
1414
; CHECK-128-NEXT: entry:
1515
; CHECK-128-NEXT: store i64 0, i64* [[DA:%.*]], align 8
1616
; CHECK-128-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[DA]], i64 1
17-
; CHECK-128-NEXT: store i64 1, i64* [[ARRAYIDX1]], align 8
17+
; CHECK-128-NEXT: store i64 0, i64* [[ARRAYIDX1]], align 8
1818
; CHECK-128-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[DA]], i64 2
19-
; CHECK-128-NEXT: store i64 2, i64* [[ARRAYIDX2]], align 8
19+
; CHECK-128-NEXT: store i64 0, i64* [[ARRAYIDX2]], align 8
2020
; CHECK-128-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[DA]], i64 3
21-
; CHECK-128-NEXT: store i64 3, i64* [[ARRAYIDX3]], align 8
21+
; CHECK-128-NEXT: store i64 0, i64* [[ARRAYIDX3]], align 8
2222
; CHECK-128-NEXT: ret void
2323
;
2424
; CHECK-256-LABEL: @foo(
2525
; CHECK-256-NEXT: entry:
2626
; CHECK-256-NEXT: [[TMP0:%.*]] = bitcast i64* [[DA:%.*]] to <4 x i64>*
27-
; CHECK-256-NEXT: store <4 x i64> <i64 0, i64 1, i64 2, i64 3>, <4 x i64>* [[TMP0]], align 8
27+
; CHECK-256-NEXT: store <4 x i64> zeroinitializer, <4 x i64>* [[TMP0]], align 8
2828
; CHECK-256-NEXT: ret void
2929
;
3030
; CHECK-512-LABEL: @foo(
3131
; CHECK-512-NEXT: entry:
3232
; CHECK-512-NEXT: [[TMP0:%.*]] = bitcast i64* [[DA:%.*]] to <4 x i64>*
33-
; CHECK-512-NEXT: store <4 x i64> <i64 0, i64 1, i64 2, i64 3>, <4 x i64>* [[TMP0]], align 8
33+
; CHECK-512-NEXT: store <4 x i64> zeroinitializer, <4 x i64>* [[TMP0]], align 8
3434
; CHECK-512-NEXT: ret void
3535
;
3636
entry:
3737
store i64 0, i64* %da, align 8
3838
%arrayidx1 = getelementptr inbounds i64, i64* %da, i64 1
39-
store i64 1, i64* %arrayidx1, align 8
39+
store i64 0, i64* %arrayidx1, align 8
4040
%arrayidx2 = getelementptr inbounds i64, i64* %da, i64 2
41-
store i64 2, i64* %arrayidx2, align 8
41+
store i64 0, i64* %arrayidx2, align 8
4242
%arrayidx3 = getelementptr inbounds i64, i64* %da, i64 3
43-
store i64 3, i64* %arrayidx3, align 8
43+
store i64 0, i64* %arrayidx3, align 8
4444
ret void
4545
}
4646

@@ -49,14 +49,14 @@ define void @foo8(i8* nocapture writeonly %da) {
4949
; CHECK-NEXT: entry:
5050
; CHECK-NEXT: store i8 0, i8* [[DA:%.*]], align 8
5151
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[DA]], i8 1
52-
; CHECK-NEXT: store i8 1, i8* [[ARRAYIDX1]], align 8
52+
; CHECK-NEXT: store i8 0, i8* [[ARRAYIDX1]], align 8
5353
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[DA]], i8 2
5454
; CHECK-NEXT: ret void
5555
;
5656
entry:
5757
store i8 0, i8* %da, align 8
5858
%arrayidx1 = getelementptr inbounds i8, i8* %da, i8 1
59-
store i8 1, i8* %arrayidx1, align 8
59+
store i8 0, i8* %arrayidx1, align 8
6060
%arrayidx2 = getelementptr inbounds i8, i8* %da, i8 2
6161
ret void
6262
}

0 commit comments

Comments
 (0)