Skip to content

Commit e724226

Browse files
committed
[VPlan] Return cost of 0 for VPWidenCastRecipe without underlying value.
In some cases, VPWidenCastRecipes are created but not considered in the legacy cost model, including truncates/extends when evaluating a reduction in a smaller type. Return 0 for such casts for now, to avoid divergences between VPlan and legacy cost models. Fixes #113526.
1 parent 8c4bc1e commit e724226

File tree

2 files changed

+70
-0
lines changed

2 files changed

+70
-0
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1524,6 +1524,11 @@ void VPWidenCastRecipe::execute(VPTransformState &State) {
15241524

15251525
InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF,
15261526
VPCostContext &Ctx) const {
1527+
// TODO: In some cases, VPWidenCastRecipes are created but not considered in
1528+
// the legacy cost model, including truncates/extends when evaluating a
1529+
// reduction in a smaller type.
1530+
if (!getUnderlyingValue())
1531+
return 0;
15271532
// Computes the CastContextHint from a recipes that may access memory.
15281533
auto ComputeCCH = [&](const VPRecipeBase *R) -> TTI::CastContextHint {
15291534
if (VF.isScalar())

llvm/test/Transforms/LoopVectorize/X86/cost-model.ll

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1037,6 +1037,71 @@ exit:
10371037
ret i64 %red.mul
10381038
}
10391039

1040+
; Test case for https://github.com/llvm/llvm-project/issues/113526.
1041+
define i32 @narrowed_reduction(ptr %a, i1 %cmp) #0 {
1042+
; CHECK-LABEL: @narrowed_reduction(
1043+
; CHECK-NEXT: entry:
1044+
; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP:%.*]] to i32
1045+
; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1046+
; CHECK: vector.ph:
1047+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[CONV]], i64 0
1048+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
1049+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
1050+
; CHECK: vector.body:
1051+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1052+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
1053+
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
1054+
; CHECK-NEXT: [[TMP0:%.*]] = and <16 x i32> [[VEC_PHI]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1055+
; CHECK-NEXT: [[TMP1:%.*]] = and <16 x i32> [[VEC_PHI1]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1056+
; CHECK-NEXT: [[TMP2:%.*]] = or <16 x i32> [[TMP0]], [[BROADCAST_SPLAT]]
1057+
; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i32> [[TMP1]], [[BROADCAST_SPLAT]]
1058+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <16 x i32> [[TMP2]] to <16 x i1>
1059+
; CHECK-NEXT: [[TMP5:%.*]] = trunc <16 x i32> [[TMP3]] to <16 x i1>
1060+
; CHECK-NEXT: [[TMP6]] = zext <16 x i1> [[TMP4]] to <16 x i32>
1061+
; CHECK-NEXT: [[TMP7]] = zext <16 x i1> [[TMP5]] to <16 x i32>
1062+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32
1063+
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0
1064+
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
1065+
; CHECK: middle.block:
1066+
; CHECK-NEXT: [[TMP9:%.*]] = trunc <16 x i32> [[TMP6]] to <16 x i1>
1067+
; CHECK-NEXT: [[TMP10:%.*]] = trunc <16 x i32> [[TMP7]] to <16 x i1>
1068+
; CHECK-NEXT: [[BIN_RDX:%.*]] = or <16 x i1> [[TMP10]], [[TMP9]]
1069+
; CHECK-NEXT: [[TMP11:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[BIN_RDX]])
1070+
; CHECK-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32
1071+
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
1072+
; CHECK: scalar.ph:
1073+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
1074+
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1075+
; CHECK-NEXT: br label [[LOOP:%.*]]
1076+
; CHECK: loop:
1077+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[LOOP]] ]
1078+
; CHECK-NEXT: [[OR13:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[OR:%.*]], [[LOOP]] ]
1079+
; CHECK-NEXT: [[AND:%.*]] = and i32 [[OR13]], 1
1080+
; CHECK-NEXT: [[OR]] = or i32 [[AND]], [[CONV]]
1081+
; CHECK-NEXT: [[INC]] = add i32 [[IV]], 1
1082+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 0
1083+
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP27:![0-9]+]]
1084+
; CHECK: exit:
1085+
; CHECK-NEXT: [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], [[LOOP]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
1086+
; CHECK-NEXT: ret i32 [[OR_LCSSA]]
1087+
;
1088+
entry:
1089+
%conv = zext i1 %cmp to i32
1090+
br label %loop
1091+
1092+
loop:
1093+
%iv = phi i32 [ 1, %entry ], [ %inc, %loop ]
1094+
%or13 = phi i32 [ 0, %entry ], [ %or, %loop ]
1095+
%and = and i32 %or13, 1
1096+
%or = or i32 %and, %conv
1097+
%inc = add i32 %iv, 1
1098+
%ec = icmp eq i32 %iv, 0
1099+
br i1 %ec, label %exit, label %loop
1100+
1101+
exit:
1102+
ret i32 %or
1103+
}
1104+
10401105
declare void @llvm.assume(i1 noundef) #0
10411106

10421107
attributes #0 = { "target-cpu"="penryn" }

0 commit comments

Comments
 (0)