Skip to content

Commit 813a4e9

Browse files
committed
[ValueTracking] If overlap in unsigned and signed range is contiguous, return it
We have to choose between unsigned and signed if there are two overlaps, aka the range wraps around, meaning [254,255] vs. signed [-128, 125], but [254,255] correspond to [-2 -1] which is in the range [-128, 125]. However, a range that would not work would be one where one has to pick between [0, 129] vs [-127, 127] because 129 is -2 signed. Update ValueTracking.cpp Revert "[ValueTracking] If overlap in unsigned and signed range is contiguous, return it" This reverts commit 22e997c489aad3173db78f6fee17212bd16be96d. ok
1 parent 34c85ed commit 813a4e9

File tree

4 files changed

+136
-49
lines changed

4 files changed

+136
-49
lines changed

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 128 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -9585,56 +9585,151 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
95859585
bool HasNSW = IIQ.hasNoSignedWrap(&BO);
95869586
bool HasNUW = IIQ.hasNoUnsignedWrap(&BO);
95879587

9588-
// If the caller expects a signed compare, then try to use a signed range.
9589-
// Otherwise if both no-wraps are set, use the unsigned range because it
9590-
// is never larger than the signed range. Example:
9591-
// "sub nuw nsw i8 -2, x" is unsigned [0, 254] vs. signed [-128, 126].
9592-
// "sub nuw nsw i8 2, x" is unsigned [0, 2] vs. signed [-125, 127].
9593-
if (PreferSignedRange && HasNSW && HasNUW)
9594-
HasNUW = false;
9595-
9596-
if (HasNUW) {
9597-
// 'sub nuw c, x' produces [0, C].
9598-
Upper = *C + 1;
9599-
} else if (HasNSW) {
9588+
// Build the two candidate ranges as [lo..hi]:
9589+
// unsignedRange: NUW ⇒ [0 .. C]
9590+
// signedRange: NSW ⇒ either [SINT_MIN .. -C - SINT_MIN] or [C -
9591+
// SINT_MAX .. SINT_MAX]
9592+
auto makeUnsignedRange = [&]() {
9593+
return std::pair<APInt, APInt>(APInt::getZero(Width), *C);
9594+
};
9595+
auto makeSignedRange = [&]() {
96009596
if (C->isNegative()) {
9601-
// 'sub nsw -C, x' produces [SINT_MIN, -C - SINT_MIN].
9602-
Lower = APInt::getSignedMinValue(Width);
9603-
Upper = *C - APInt::getSignedMaxValue(Width);
9597+
// sub nsw -C, x
9598+
APInt lo = APInt::getSignedMinValue(Width);
9599+
APInt hi = *C - APInt::getSignedMinValue(Width);
9600+
return std::pair<APInt, APInt>(lo, hi);
9601+
} else {
9602+
// sub nsw C, x
9603+
APInt lo = *C - APInt::getSignedMaxValue(Width);
9604+
APInt hi = APInt::getSignedMaxValue(Width);
9605+
return std::pair<APInt, APInt>(lo, hi);
9606+
}
9607+
};
9608+
9609+
// Split a (possibly wrapping) [lo..hi] into up to two non‑wrapping
9610+
// pieces:
9611+
auto splitPieces = [&](std::pair<APInt, APInt> rng,
9612+
SmallVectorImpl<std::pair<APInt, APInt>> &pieces) {
9613+
APInt lo = rng.first, hi = rng.second;
9614+
if (lo.ugt(hi)) {
9615+
// wraps around 2^n
9616+
pieces.emplace_back(lo, APInt::getMaxValue(Width)); // [lo..2^n-1]
9617+
pieces.emplace_back(APInt::getZero(Width), hi); // [0..hi]
96049618
} else {
9605-
// Note that sub 0, INT_MIN is not NSW. It techically is a signed wrap
9606-
// 'sub nsw C, x' produces [C - SINT_MAX, SINT_MAX].
9607-
Lower = *C - APInt::getSignedMaxValue(Width);
9608-
Upper = APInt::getSignedMinValue(Width);
9619+
pieces.emplace_back(lo, hi);
9620+
}
9621+
};
9622+
9623+
SmallVector<std::pair<APInt, APInt>, 2> piecesU, piecesS;
9624+
if (HasNUW)
9625+
splitPieces(makeUnsignedRange(), piecesU);
9626+
if (HasNSW)
9627+
splitPieces(makeSignedRange(), piecesS);
9628+
9629+
// Intersect piecewise:
9630+
SmallVector<std::pair<APInt, APInt>, 2> inters;
9631+
for (auto &u : piecesU) {
9632+
for (auto &s : piecesS) {
9633+
APInt loI = u.first.ugt(s.first) ? u.first : s.first;
9634+
APInt hiI = u.second.ult(s.second) ? u.second : s.second;
9635+
if (loI.ule(hiI))
9636+
inters.emplace_back(loI, hiI);
96099637
}
96109638
}
9639+
9640+
if (inters.size() == 1) {
9641+
// Exactly one contiguous overlap → use it
9642+
Lower = inters[0].first;
9643+
Upper = inters[0].second;
9644+
} else if (HasNUW && !PreferSignedRange) {
9645+
// Fallback to plain NUW result [0..C]
9646+
Lower = APInt::getZero(Width);
9647+
Upper = *C;
9648+
} else if (HasNSW) {
9649+
// Fallback to plain NSW result
9650+
auto S = makeSignedRange();
9651+
Lower = S.first;
9652+
Upper = S.second;
9653+
}
96119654
}
96129655
break;
96139656
case Instruction::Add:
96149657
if (match(BO.getOperand(1), m_APInt(C)) && !C->isZero()) {
96159658
bool HasNSW = IIQ.hasNoSignedWrap(&BO);
96169659
bool HasNUW = IIQ.hasNoUnsignedWrap(&BO);
96179660

9618-
// If the caller expects a signed compare, then try to use a signed
9619-
// range. Otherwise if both no-wraps are set, use the unsigned range
9620-
// because it is never larger than the signed range. Example: "add nuw
9621-
// nsw i8 X, -2" is unsigned [254,255] vs. signed [-128, 125].
9661+
// If the caller prefers signed ranges when both wraps are forbidden:
96229662
if (PreferSignedRange && HasNSW && HasNUW)
96239663
HasNUW = false;
96249664

9625-
if (HasNUW) {
9626-
// 'add nuw x, C' produces [C, UINT_MAX].
9627-
Lower = *C;
9628-
} else if (HasNSW) {
9665+
// Build the two candidate ranges as [lo..hi] in the unsigned 0..2^n-1
9666+
// world:
9667+
// NUW: 'add nuw x, C' ⇒ [ C .. UINT_MAX ]
9668+
auto makeUnsignedRange = [&]() {
9669+
APInt lo = *C;
9670+
APInt hi = APInt::getMaxValue(Width);
9671+
return std::pair<APInt, APInt>(lo, hi);
9672+
};
9673+
9674+
// NSW: 'add nsw x, C'
9675+
// if C<0: [ SINT_MIN .. SINT_MAX + C ]
9676+
// else: [ SINT_MIN + C .. SINT_MAX ]
9677+
auto makeSignedRange = [&]() {
96299678
if (C->isNegative()) {
9630-
// 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C].
9631-
Lower = APInt::getSignedMinValue(Width);
9632-
Upper = APInt::getSignedMaxValue(Width) + *C + 1;
9679+
APInt lo = APInt::getSignedMinValue(Width);
9680+
APInt hi = APInt::getSignedMaxValue(Width) + *C;
9681+
return std::pair<APInt, APInt>(lo, hi);
9682+
} else {
9683+
APInt lo = APInt::getSignedMinValue(Width) + *C;
9684+
APInt hi = APInt::getSignedMaxValue(Width);
9685+
return std::pair<APInt, APInt>(lo, hi);
9686+
}
9687+
};
9688+
9689+
// Split [lo..hi] into up to two non‑wrapping intervals:
9690+
auto splitPieces = [&](std::pair<APInt, APInt> rng,
9691+
SmallVectorImpl<std::pair<APInt, APInt>> &dst) {
9692+
APInt lo = rng.first, hi = rng.second;
9693+
if (lo.ugt(hi)) {
9694+
// wraps around 2^n
9695+
dst.emplace_back(lo, APInt::getMaxValue(Width));
9696+
dst.emplace_back(APInt::getZero(Width), hi);
96339697
} else {
9634-
// 'add nsw x, +C' produces [SINT_MIN + C, SINT_MAX].
9635-
Lower = APInt::getSignedMinValue(Width) + *C;
9636-
Upper = APInt::getSignedMaxValue(Width) + 1;
9698+
dst.emplace_back(lo, hi);
96379699
}
9700+
};
9701+
9702+
SmallVector<std::pair<APInt, APInt>, 2> piecesU, piecesS;
9703+
if (HasNUW)
9704+
splitPieces(makeUnsignedRange(), piecesU);
9705+
if (HasNSW)
9706+
splitPieces(makeSignedRange(), piecesS);
9707+
9708+
// Intersect piecewise
9709+
SmallVector<std::pair<APInt, APInt>, 2> inters;
9710+
for (auto &u : piecesU) {
9711+
for (auto &s : piecesS) {
9712+
APInt loI = u.first.ugt(s.first) ? u.first : s.first;
9713+
APInt hiI = u.second.ult(s.second) ? u.second : s.second;
9714+
if (loI.ule(hiI))
9715+
inters.emplace_back(loI, hiI);
9716+
}
9717+
}
9718+
9719+
if (inters.size() == 1) {
9720+
// Exactly one contiguous overlap ⇒ use it
9721+
Lower = inters[0].first;
9722+
Upper = inters[0].second +
9723+
1; // make Upper exclusive if you’re following [Lo..Hi)
9724+
} else if (HasNUW && !PreferSignedRange) {
9725+
// Fallback to plain NUW [C..UINT_MAX]
9726+
Lower = *C;
9727+
Upper = APInt::getMaxValue(Width) + 1;
9728+
} else if (HasNSW) {
9729+
// Fallback to plain NSW
9730+
auto S = makeSignedRange();
9731+
Lower = S.first;
9732+
Upper = S.second + 1;
96389733
}
96399734
}
96409735
break;

llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1412,8 +1412,7 @@ define void @interleave_deinterleave_factor3(ptr writeonly noalias %dst, ptr rea
14121412
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
14131413
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
14141414
; CHECK: middle.block:
1415-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
1416-
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1415+
; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
14171416
; CHECK: scalar.ph:
14181417
;
14191418
entry:
@@ -1502,8 +1501,7 @@ define void @interleave_deinterleave(ptr writeonly noalias %dst, ptr readonly %a
15021501
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
15031502
; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
15041503
; CHECK: middle.block:
1505-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
1506-
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
1504+
; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
15071505
; CHECK: scalar.ph:
15081506
;
15091507
entry:

llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no
6767
; SCALAR_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
6868
; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
6969
; SCALAR_TAIL_FOLDING: middle.block:
70-
; SCALAR_TAIL_FOLDING-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
71-
; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
70+
; SCALAR_TAIL_FOLDING-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
7271
; SCALAR_TAIL_FOLDING: scalar.ph:
7372
;
7473
; PREDICATED_TAIL_FOLDING-LABEL: define dso_local void @masked_strided1
@@ -205,8 +204,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no
205204
; SCALAR_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
206205
; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
207206
; SCALAR_TAIL_FOLDING: middle.block:
208-
; SCALAR_TAIL_FOLDING-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
209-
; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
207+
; SCALAR_TAIL_FOLDING-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
210208
; SCALAR_TAIL_FOLDING: scalar.ph:
211209
;
212210
; PREDICATED_TAIL_FOLDING-LABEL: define dso_local void @masked_strided2
@@ -335,8 +333,7 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no
335333
; SCALAR_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
336334
; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
337335
; SCALAR_TAIL_FOLDING: middle.block:
338-
; SCALAR_TAIL_FOLDING-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
339-
; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
336+
; SCALAR_TAIL_FOLDING-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
340337
; SCALAR_TAIL_FOLDING: scalar.ph:
341338
;
342339
; PREDICATED_TAIL_FOLDING-LABEL: define dso_local void @masked_strided3
@@ -490,8 +487,7 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p,
490487
; SCALAR_TAIL_FOLDING-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
491488
; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
492489
; SCALAR_TAIL_FOLDING: middle.block:
493-
; SCALAR_TAIL_FOLDING-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
494-
; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
490+
; SCALAR_TAIL_FOLDING-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
495491
; SCALAR_TAIL_FOLDING: scalar.ph:
496492
;
497493
; PREDICATED_TAIL_FOLDING-LABEL: define dso_local void @masked_strided_factor4

llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,7 @@ define void @masked_strided_factor2(ptr noalias nocapture readonly %p, ptr noali
5252
; SCALAR_EPILOGUE-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
5353
; SCALAR_EPILOGUE-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
5454
; SCALAR_EPILOGUE: middle.block:
55-
; SCALAR_EPILOGUE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
56-
; SCALAR_EPILOGUE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
55+
; SCALAR_EPILOGUE-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
5756
; SCALAR_EPILOGUE: scalar.ph:
5857
;
5958
; PREDICATED_TAIL_FOLDING-LABEL: define void @masked_strided_factor2
@@ -237,8 +236,7 @@ define void @masked_strided_factor4(ptr noalias nocapture readonly %p, ptr noali
237236
; SCALAR_EPILOGUE-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
238237
; SCALAR_EPILOGUE-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
239238
; SCALAR_EPILOGUE: middle.block:
240-
; SCALAR_EPILOGUE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
241-
; SCALAR_EPILOGUE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
239+
; SCALAR_EPILOGUE-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
242240
; SCALAR_EPILOGUE: scalar.ph:
243241
;
244242
; PREDICATED_TAIL_FOLDING-LABEL: define void @masked_strided_factor4

0 commit comments

Comments
 (0)