Skip to content

Commit 8bbef3d

Browse files
authored
[InstCombine] Iterative replacement in PtrReplacer (#137215)
This patch enhances the PtrReplacer as follows: 1. Users are now collected iteratively to be generous on the stack. In the case of PHIs with incoming values which have not yet been visited, they are pushed back into the stack for reconsideration. 2. Replace users of the pointer root in a reverse-postorder traversal, instead of a simple traversal over the collected users. This reordering ensures that the operands of an instruction are replaced before replacing the instruction itself. 3. During the replacement of PHI, use the same incoming value if it does not have a replacement. This patch specifically fixes the case when an incoming value of a PHI is addrspacecasted.
1 parent 22d9ea1 commit 8bbef3d

File tree

2 files changed

+175
-69
lines changed

2 files changed

+175
-69
lines changed

llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp

Lines changed: 96 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -243,11 +243,10 @@ class PointerReplacer {
243243
void replacePointer(Value *V);
244244

245245
private:
246-
bool collectUsersRecursive(Instruction &I);
247246
void replace(Instruction *I);
248-
Value *getReplacement(Value *I);
247+
Value *getReplacement(Value *V) const { return WorkMap.lookup(V); }
249248
bool isAvailable(Instruction *I) const {
250-
return I == &Root || Worklist.contains(I);
249+
return I == &Root || UsersToReplace.contains(I);
251250
}
252251

253252
bool isEqualOrValidAddrSpaceCast(const Instruction *I,
@@ -259,8 +258,7 @@ class PointerReplacer {
259258
return (FromAS == ToAS) || IC.isValidAddrSpaceCast(FromAS, ToAS);
260259
}
261260

262-
SmallPtrSet<Instruction *, 32> ValuesToRevisit;
263-
SmallSetVector<Instruction *, 4> Worklist;
261+
SmallSetVector<Instruction *, 32> UsersToReplace;
264262
MapVector<Value *, Value *> WorkMap;
265263
InstCombinerImpl &IC;
266264
Instruction &Root;
@@ -269,80 +267,119 @@ class PointerReplacer {
269267
} // end anonymous namespace
270268

271269
bool PointerReplacer::collectUsers() {
272-
if (!collectUsersRecursive(Root))
273-
return false;
274-
275-
// Ensure that all outstanding (indirect) users of I
276-
// are inserted into the Worklist. Return false
277-
// otherwise.
278-
return llvm::set_is_subset(ValuesToRevisit, Worklist);
279-
}
270+
SmallVector<Instruction *> Worklist;
271+
SmallSetVector<Instruction *, 32> ValuesToRevisit;
272+
273+
auto PushUsersToWorklist = [&](Instruction *Inst) {
274+
for (auto *U : Inst->users())
275+
if (auto *I = dyn_cast<Instruction>(U))
276+
if (!isAvailable(I) && !ValuesToRevisit.contains(I))
277+
Worklist.emplace_back(I);
278+
};
280279

281-
bool PointerReplacer::collectUsersRecursive(Instruction &I) {
282-
for (auto *U : I.users()) {
283-
auto *Inst = cast<Instruction>(&*U);
280+
PushUsersToWorklist(&Root);
281+
while (!Worklist.empty()) {
282+
Instruction *Inst = Worklist.pop_back_val();
284283
if (auto *Load = dyn_cast<LoadInst>(Inst)) {
285284
if (Load->isVolatile())
286285
return false;
287-
Worklist.insert(Load);
286+
UsersToReplace.insert(Load);
288287
} else if (auto *PHI = dyn_cast<PHINode>(Inst)) {
289-
// All incoming values must be instructions for replacability
290-
if (any_of(PHI->incoming_values(),
291-
[](Value *V) { return !isa<Instruction>(V); }))
292-
return false;
293-
294-
// If at least one incoming value of the PHI is not in Worklist,
295-
// store the PHI for revisiting and skip this iteration of the
296-
// loop.
297-
if (any_of(PHI->incoming_values(), [this](Value *V) {
298-
return !isAvailable(cast<Instruction>(V));
288+
/// TODO: Handle poison and null pointers for PHI and select.
289+
// If all incoming values are available, mark this PHI as
290+
// replacable and push it's users into the worklist.
291+
bool IsReplacable = true;
292+
if (all_of(PHI->incoming_values(), [&](Value *V) {
293+
if (!isa<Instruction>(V))
294+
return IsReplacable = false;
295+
return isAvailable(cast<Instruction>(V));
299296
})) {
300-
ValuesToRevisit.insert(Inst);
297+
UsersToReplace.insert(PHI);
298+
PushUsersToWorklist(PHI);
301299
continue;
302300
}
303301

304-
Worklist.insert(PHI);
305-
if (!collectUsersRecursive(*PHI))
306-
return false;
307-
} else if (auto *SI = dyn_cast<SelectInst>(Inst)) {
308-
if (!isa<Instruction>(SI->getTrueValue()) ||
309-
!isa<Instruction>(SI->getFalseValue()))
302+
// Either an incoming value is not an instruction or not all
303+
// incoming values are available. If this PHI was already
304+
// visited prior to this iteration, return false.
305+
if (!IsReplacable || !ValuesToRevisit.insert(PHI))
310306
return false;
311307

312-
if (!isAvailable(cast<Instruction>(SI->getTrueValue())) ||
313-
!isAvailable(cast<Instruction>(SI->getFalseValue()))) {
314-
ValuesToRevisit.insert(Inst);
315-
continue;
308+
// Push PHI back into the stack, followed by unavailable
309+
// incoming values.
310+
Worklist.emplace_back(PHI);
311+
for (unsigned Idx = 0; Idx < PHI->getNumIncomingValues(); ++Idx) {
312+
auto *IncomingValue = cast<Instruction>(PHI->getIncomingValue(Idx));
313+
if (UsersToReplace.contains(IncomingValue))
314+
continue;
315+
if (!ValuesToRevisit.insert(IncomingValue))
316+
return false;
317+
Worklist.emplace_back(IncomingValue);
316318
}
317-
Worklist.insert(SI);
318-
if (!collectUsersRecursive(*SI))
319-
return false;
320-
} else if (isa<GetElementPtrInst>(Inst)) {
321-
Worklist.insert(Inst);
322-
if (!collectUsersRecursive(*Inst))
319+
} else if (auto *SI = dyn_cast<SelectInst>(Inst)) {
320+
auto *TrueInst = dyn_cast<Instruction>(SI->getTrueValue());
321+
auto *FalseInst = dyn_cast<Instruction>(SI->getFalseValue());
322+
if (!TrueInst || !FalseInst)
323323
return false;
324+
325+
UsersToReplace.insert(SI);
326+
PushUsersToWorklist(SI);
327+
} else if (auto *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
328+
UsersToReplace.insert(GEP);
329+
PushUsersToWorklist(GEP);
324330
} else if (auto *MI = dyn_cast<MemTransferInst>(Inst)) {
325331
if (MI->isVolatile())
326332
return false;
327-
Worklist.insert(Inst);
333+
UsersToReplace.insert(Inst);
328334
} else if (isEqualOrValidAddrSpaceCast(Inst, FromAS)) {
329-
Worklist.insert(Inst);
330-
if (!collectUsersRecursive(*Inst))
331-
return false;
335+
UsersToReplace.insert(Inst);
336+
PushUsersToWorklist(Inst);
332337
} else if (Inst->isLifetimeStartOrEnd()) {
333338
continue;
334339
} else {
335340
// TODO: For arbitrary uses with address space mismatches, should we check
336341
// if we can introduce a valid addrspacecast?
337-
LLVM_DEBUG(dbgs() << "Cannot handle pointer user: " << *U << '\n');
342+
LLVM_DEBUG(dbgs() << "Cannot handle pointer user: " << *Inst << '\n');
338343
return false;
339344
}
340345
}
341346

342347
return true;
343348
}
344349

345-
Value *PointerReplacer::getReplacement(Value *V) { return WorkMap.lookup(V); }
350+
void PointerReplacer::replacePointer(Value *V) {
351+
assert(cast<PointerType>(Root.getType()) != cast<PointerType>(V->getType()) &&
352+
"Invalid usage");
353+
WorkMap[&Root] = V;
354+
SmallVector<Instruction *> Worklist;
355+
SetVector<Instruction *> PostOrderWorklist;
356+
SmallPtrSet<Instruction *, 32> Visited;
357+
358+
// Perform a postorder traversal of the users of Root.
359+
Worklist.push_back(&Root);
360+
while (!Worklist.empty()) {
361+
Instruction *I = Worklist.back();
362+
363+
// If I has not been processed before, push each of its
364+
// replacable users into the worklist.
365+
if (Visited.insert(I).second) {
366+
for (auto *U : I->users()) {
367+
auto *UserInst = cast<Instruction>(U);
368+
if (UsersToReplace.contains(UserInst))
369+
Worklist.push_back(UserInst);
370+
}
371+
// Otherwise, users of I have already been pushed into
372+
// the PostOrderWorklist. Push I as well.
373+
} else {
374+
PostOrderWorklist.insert(I);
375+
Worklist.pop_back();
376+
}
377+
}
378+
379+
// Replace pointers in reverse-postorder.
380+
for (Instruction *I : reverse(PostOrderWorklist))
381+
replace(I);
382+
}
346383

347384
void PointerReplacer::replace(Instruction *I) {
348385
if (getReplacement(I))
@@ -364,13 +401,15 @@ void PointerReplacer::replace(Instruction *I) {
364401
// replacement (new value).
365402
WorkMap[NewI] = NewI;
366403
} else if (auto *PHI = dyn_cast<PHINode>(I)) {
367-
Type *NewTy = getReplacement(PHI->getIncomingValue(0))->getType();
368-
auto *NewPHI = PHINode::Create(NewTy, PHI->getNumIncomingValues(),
369-
PHI->getName(), PHI->getIterator());
370-
for (unsigned int I = 0; I < PHI->getNumIncomingValues(); ++I)
371-
NewPHI->addIncoming(getReplacement(PHI->getIncomingValue(I)),
372-
PHI->getIncomingBlock(I));
373-
WorkMap[PHI] = NewPHI;
404+
// Create a new PHI by replacing any incoming value that is a user of the
405+
// root pointer and has a replacement.
406+
Value *V = WorkMap.lookup(PHI->getIncomingValue(0));
407+
PHI->mutateType(V ? V->getType() : PHI->getIncomingValue(0)->getType());
408+
for (unsigned int I = 0; I < PHI->getNumIncomingValues(); ++I) {
409+
Value *V = WorkMap.lookup(PHI->getIncomingValue(I));
410+
PHI->setIncomingValue(I, V ? V : PHI->getIncomingValue(I));
411+
}
412+
WorkMap[PHI] = PHI;
374413
} else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
375414
auto *V = getReplacement(GEP->getPointerOperand());
376415
assert(V && "Operand not replaced");
@@ -434,18 +473,6 @@ void PointerReplacer::replace(Instruction *I) {
434473
}
435474
}
436475

437-
void PointerReplacer::replacePointer(Value *V) {
438-
#ifndef NDEBUG
439-
auto *PT = cast<PointerType>(Root.getType());
440-
auto *NT = cast<PointerType>(V->getType());
441-
assert(PT != NT && "Invalid usage");
442-
#endif
443-
WorkMap[&Root] = V;
444-
445-
for (Instruction *Workitem : Worklist)
446-
replace(Workitem);
447-
}
448-
449476
Instruction *InstCombinerImpl::visitAllocaInst(AllocaInst &AI) {
450477
if (auto *I = simplifyAllocaArraySize(*this, AI, DT))
451478
return I;
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=instcombine -S < %s | FileCheck %s
3+
4+
%struct.type = type { [256 x <2 x i64>] }
5+
@g1 = external hidden addrspace(3) global %struct.type, align 16
6+
7+
; This test requires the PtrReplacer to replace users in an RPO traversal.
8+
; Furthermore, %ptr.else need not to be replaced so it must be retained in
9+
; %ptr.sink.
10+
define <2 x i64> @func(ptr addrspace(4) byref(%struct.type) align 16 %0, i1 %cmp.0) {
11+
; CHECK-LABEL: define <2 x i64> @func(
12+
; CHECK-SAME: ptr addrspace(4) byref([[STRUCT_TYPE:%.*]]) align 16 [[TMP0:%.*]], i1 [[CMP_0:%.*]]) {
13+
; CHECK-NEXT: [[ENTRY:.*:]]
14+
; CHECK-NEXT: br i1 [[CMP_0]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
15+
; CHECK: [[IF_THEN]]:
16+
; CHECK-NEXT: [[VAL_THEN:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr
17+
; CHECK-NEXT: br label %[[SINK:.*]]
18+
; CHECK: [[IF_ELSE]]:
19+
; CHECK-NEXT: [[PTR_ELSE:%.*]] = load ptr, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @g1, i32 32), align 16
20+
; CHECK-NEXT: br label %[[SINK]]
21+
; CHECK: [[SINK]]:
22+
; CHECK-NEXT: [[PTR_SINK:%.*]] = phi ptr [ [[PTR_ELSE]], %[[IF_ELSE]] ], [ [[VAL_THEN]], %[[IF_THEN]] ]
23+
; CHECK-NEXT: [[VAL_SINK:%.*]] = load <2 x i64>, ptr [[PTR_SINK]], align 16
24+
; CHECK-NEXT: ret <2 x i64> [[VAL_SINK]]
25+
;
26+
entry:
27+
%coerce = alloca %struct.type, align 16, addrspace(5)
28+
call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 16 %coerce, ptr addrspace(4) align 16 %0, i64 4096, i1 false)
29+
br i1 %cmp.0, label %if.then, label %if.else
30+
31+
if.then: ; preds = %entry
32+
%ptr.then = getelementptr inbounds i8, ptr addrspace(5) %coerce, i64 0
33+
%val.then = addrspacecast ptr addrspace(5) %ptr.then to ptr
34+
br label %sink
35+
36+
if.else: ; preds = %entry
37+
%ptr.else = load ptr, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @g1, i32 32), align 16
38+
%val.else = getelementptr inbounds nuw i8, ptr %ptr.else, i64 0
39+
br label %sink
40+
41+
sink:
42+
%ptr.sink = phi ptr [ %val.else, %if.else ], [ %val.then, %if.then ]
43+
%val.sink = load <2 x i64>, ptr %ptr.sink, align 16
44+
ret <2 x i64> %val.sink
45+
}
46+
47+
define <2 x i64> @func_phi_loop(ptr addrspace(4) byref(%struct.type) align 16 %0, i1 %cmp.0) {
48+
; CHECK-LABEL: define <2 x i64> @func_phi_loop(
49+
; CHECK-SAME: ptr addrspace(4) byref([[STRUCT_TYPE:%.*]]) align 16 [[TMP0:%.*]], i1 [[CMP_0:%.*]]) {
50+
; CHECK-NEXT: [[ENTRY:.*]]:
51+
; CHECK-NEXT: [[VAL_0:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr
52+
; CHECK-NEXT: br label %[[LOOP:.*]]
53+
; CHECK: [[LOOP]]:
54+
; CHECK-NEXT: [[PTR_PHI_R:%.*]] = phi ptr [ [[PTR_1:%.*]], %[[LOOP]] ], [ [[VAL_0]], %[[ENTRY]] ]
55+
; CHECK-NEXT: [[PTR_1]] = load ptr, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @g1, i32 32), align 16
56+
; CHECK-NEXT: br i1 [[CMP_0]], label %[[LOOP]], label %[[SINK:.*]]
57+
; CHECK: [[SINK]]:
58+
; CHECK-NEXT: [[VAL_SINK:%.*]] = load <2 x i64>, ptr [[PTR_PHI_R]], align 16
59+
; CHECK-NEXT: ret <2 x i64> [[VAL_SINK]]
60+
;
61+
entry:
62+
%coerce = alloca %struct.type, align 16, addrspace(5)
63+
call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 16 %coerce, ptr addrspace(4) align 16 %0, i64 4096, i1 false)
64+
%ptr.0 = getelementptr inbounds i8, ptr addrspace(5) %coerce, i64 0
65+
%val.0 = addrspacecast ptr addrspace(5) %ptr.0 to ptr
66+
br label %loop
67+
68+
loop:
69+
%ptr.phi = phi ptr [ %val.1, %loop ], [ %val.0, %entry ]
70+
%ptr.1 = load ptr, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @g1, i32 32), align 16
71+
%val.1 = getelementptr inbounds nuw i8, ptr %ptr.1, i64 0
72+
br i1 %cmp.0, label %loop, label %sink
73+
74+
sink:
75+
%val.sink = load <2 x i64>, ptr %ptr.phi, align 16
76+
ret <2 x i64> %val.sink
77+
}
78+
79+
declare void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noalias writeonly captures(none), ptr addrspace(4) noalias readonly captures(none), i64, i1 immarg) #0

0 commit comments

Comments
 (0)