Skip to content

Commit 105dde3

Browse files
committed
[MemCpyOpt] Calculate the offset value to forward memcpy
1 parent 48d62d9 commit 105dde3

File tree

3 files changed

+72
-42
lines changed

3 files changed

+72
-42
lines changed

llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

Lines changed: 61 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
1515
#include "llvm/ADT/DenseSet.h"
1616
#include "llvm/ADT/STLExtras.h"
17+
#include "llvm/ADT/ScopeExit.h"
1718
#include "llvm/ADT/SmallVector.h"
1819
#include "llvm/ADT/Statistic.h"
1920
#include "llvm/ADT/iterator_range.h"
@@ -1121,28 +1122,67 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
11211122
bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
11221123
MemCpyInst *MDep,
11231124
BatchAAResults &BAA) {
1124-
// We can only transforms memcpy's where the dest of one is the source of the
1125-
// other.
1126-
if (M->getSource() != MDep->getDest() || MDep->isVolatile())
1127-
return false;
1128-
11291125
// If dep instruction is reading from our current input, then it is a noop
1130-
// transfer and substituting the input won't change this instruction. Just
1131-
// ignore the input and let someone else zap MDep. This handles cases like:
1126+
// transfer and substituting the input won't change this instruction. Just
1127+
// ignore the input and let someone else zap MDep. This handles cases like:
11321128
// memcpy(a <- a)
11331129
// memcpy(b <- a)
11341130
if (M->getSource() == MDep->getSource())
11351131
return false;
11361132

1137-
// Second, the length of the memcpy's must be the same, or the preceding one
1133+
// We can only optimize non-volatile memcpy's.
1134+
if (MDep->isVolatile())
1135+
return false;
1136+
1137+
int64_t MForwardOffset = 0;
1138+
const DataLayout &DL = M->getModule()->getDataLayout();
1139+
// We can only transforms memcpy's where the dest of one is the source of the
1140+
// other, or they have an offset in a range.
1141+
if (M->getSource() != MDep->getDest()) {
1142+
std::optional<int64_t> Offset =
1143+
M->getSource()->getPointerOffsetFrom(MDep->getDest(), DL);
1144+
if (!Offset || *Offset < 0)
1145+
return false;
1146+
MForwardOffset = *Offset;
1147+
}
1148+
1149+
// The length of the memcpy's must be the same, or the preceding one
11381150
// must be larger than the following one.
1139-
if (MDep->getLength() != M->getLength()) {
1151+
if (MForwardOffset != 0 || (MDep->getLength() != M->getLength())) {
11401152
auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
11411153
auto *MLen = dyn_cast<ConstantInt>(M->getLength());
1142-
if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
1154+
if (!MDepLen || !MLen ||
1155+
MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset)
11431156
return false;
11441157
}
11451158

1159+
IRBuilder<> Builder(M);
1160+
auto *CopySource = MDep->getRawSource();
1161+
auto CleanupOnFailure = llvm::make_scope_exit([&CopySource] {
1162+
if (CopySource->use_empty())
1163+
cast<Instruction>(CopySource)->eraseFromParent();
1164+
});
1165+
MaybeAlign CopySourceAlign = MDep->getSourceAlign();
1166+
// We just need to calculate the actual size of the copy.
1167+
auto MCopyLoc = MemoryLocation::getForSource(MDep).getWithNewSize(
1168+
MemoryLocation::getForSource(M).Size);
1169+
1170+
// We need to update `MCopyLoc` if an offset exists.
1171+
if (MForwardOffset > 0) {
1172+
// The copy destination of `M` maybe can serve as the source of copying.
1173+
std::optional<int64_t> MDestOffset =
1174+
M->getRawDest()->getPointerOffsetFrom(MDep->getRawSource(), DL);
1175+
if (MDestOffset && *MDestOffset == MForwardOffset)
1176+
CopySource = M->getRawDest();
1177+
else
1178+
CopySource = Builder.CreateInBoundsPtrAdd(
1179+
CopySource, ConstantInt::get(Type::getInt64Ty(Builder.getContext()),
1180+
MForwardOffset));
1181+
MCopyLoc = MCopyLoc.getWithNewPtr(CopySource);
1182+
if (CopySourceAlign)
1183+
CopySourceAlign = commonAlignment(*CopySourceAlign, MForwardOffset);
1184+
}
1185+
11461186
// Verify that the copied-from memory doesn't change in between the two
11471187
// transfers. For example, in:
11481188
// memcpy(a <- b)
@@ -1152,10 +1192,8 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
11521192
//
11531193
// TODO: If the code between M and MDep is transparent to the destination "c",
11541194
// then we could still perform the xform by moving M up to the first memcpy.
1155-
// TODO: It would be sufficient to check the MDep source up to the memcpy
1156-
// size of M, rather than MDep.
1157-
if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
1158-
MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
1195+
if (writtenBetween(MSSA, BAA, MCopyLoc, MSSA->getMemoryAccess(MDep),
1196+
MSSA->getMemoryAccess(M)))
11591197
return false;
11601198

11611199
// If the dest of the second might alias the source of the first, then the
@@ -1179,23 +1217,22 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
11791217

11801218
// TODO: Is this worth it if we're creating a less aligned memcpy? For
11811219
// example we could be moving from movaps -> movq on x86.
1182-
IRBuilder<> Builder(M);
11831220
Instruction *NewM;
11841221
if (UseMemMove)
1185-
NewM = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(),
1186-
MDep->getRawSource(), MDep->getSourceAlign(),
1187-
M->getLength(), M->isVolatile());
1222+
NewM =
1223+
Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(), CopySource,
1224+
CopySourceAlign, M->getLength(), M->isVolatile());
11881225
else if (isa<MemCpyInlineInst>(M)) {
11891226
// llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
11901227
// never allowed since that would allow the latter to be lowered as a call
11911228
// to an external function.
1192-
NewM = Builder.CreateMemCpyInline(
1193-
M->getRawDest(), M->getDestAlign(), MDep->getRawSource(),
1194-
MDep->getSourceAlign(), M->getLength(), M->isVolatile());
1229+
NewM = Builder.CreateMemCpyInline(M->getRawDest(), M->getDestAlign(),
1230+
CopySource, CopySourceAlign,
1231+
M->getLength(), M->isVolatile());
11951232
} else
1196-
NewM = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(),
1197-
MDep->getRawSource(), MDep->getSourceAlign(),
1198-
M->getLength(), M->isVolatile());
1233+
NewM =
1234+
Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(), CopySource,
1235+
CopySourceAlign, M->getLength(), M->isVolatile());
11991236
NewM->copyMetadata(*M, LLVMContext::MD_DIAssignID);
12001237

12011238
assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));

llvm/test/Transforms/MemCpyOpt/memcpy-memcpy-offset.ll

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ define void @forward_offset(ptr %dep_src) {
1111
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[DEP_SRC]], i64 7, i1 false)
1212
; CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
1313
; CHECK-NEXT: [[DEP:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 1
14-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP]], ptr align 1 [[SRC]], i64 6, i1 false)
14+
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEP]], ptr align 1 [[DEP]], i64 6, i1 false)
1515
; CHECK-NEXT: ret void
1616
;
1717
%dep_dest = alloca %buf, align 1
@@ -30,7 +30,7 @@ define void @forward_offset_align(ptr %dep_src) {
3030
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 4 [[DEP_SRC]], i64 9, i1 false)
3131
; CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 3
3232
; CHECK-NEXT: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 3
33-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[SRC]], i64 5, i1 false)
33+
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[DEST]], i64 5, i1 false)
3434
; CHECK-NEXT: ret void
3535
;
3636
%dep_dest = alloca %buf, align 1
@@ -49,7 +49,7 @@ define void @forward_offset_align_2(ptr %dep_src) {
4949
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 4 [[DEP_SRC]], i64 9, i1 false)
5050
; CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 2
5151
; CHECK-NEXT: [[DEP:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 2
52-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP]], ptr align 1 [[SRC]], i64 6, i1 false)
52+
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEP]], ptr align 2 [[DEP]], i64 6, i1 false)
5353
; CHECK-NEXT: ret void
5454
;
5555
%dep_dest = alloca %buf, align 1
@@ -68,7 +68,8 @@ define void @forward_offset_with_gep(ptr %dep_src) {
6868
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[DEP_SRC]], i64 7, i1 false)
6969
; CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
7070
; CHECK-NEXT: [[DEP1:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 2
71-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP1]], ptr align 1 [[SRC]], i64 6, i1 false)
71+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 1
72+
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEP1]], ptr align 1 [[TMP1]], i64 6, i1 false)
7273
; CHECK-NEXT: ret void
7374
;
7475
%dep_dest = alloca %buf, align 1
@@ -87,7 +88,8 @@ define void @forward_offset_memcpy(ptr %dep_src) {
8788
; CHECK-NEXT: [[DEST:%.*]] = alloca [9 x i8], align 1
8889
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[DEP_SRC]], i64 7, i1 false)
8990
; CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
90-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[SRC]], i64 6, i1 false)
91+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 1
92+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 6, i1 false)
9193
; CHECK-NEXT: call void @use(ptr [[DEST]])
9294
; CHECK-NEXT: ret void
9395
;
@@ -108,7 +110,8 @@ define void @forward_offset_memcpy_inline(ptr %dep_src) {
108110
; CHECK-NEXT: [[DEST:%.*]] = alloca [9 x i8], align 1
109111
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[DEP_SRC]], i64 7, i1 false)
110112
; CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
111-
; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[SRC]], i64 6, i1 false)
113+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 1
114+
; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 6, i1 false)
112115
; CHECK-NEXT: call void @use(ptr [[DEST]])
113116
; CHECK-NEXT: ret void
114117
;
@@ -151,7 +154,7 @@ define void @forward_offset_and_store(ptr %dep_src) {
151154
; CHECK-NEXT: store i8 1, ptr [[DEP_SRC_END]], align 1
152155
; CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
153156
; CHECK-NEXT: [[DEP:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 1
154-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP]], ptr align 1 [[SRC]], i64 5, i1 false)
157+
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEP]], ptr align 1 [[DEP]], i64 5, i1 false)
155158
; CHECK-NEXT: ret void
156159
;
157160
%dep_dest = alloca %buf, align 1

llvm/test/Transforms/PhaseOrdering/memcpy-offset.ll

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,26 +8,16 @@
88
define void @forward_offset_and_store(ptr %dep_src) {
99
; CUSTOM-LABEL: define void @forward_offset_and_store(
1010
; CUSTOM-SAME: ptr [[DEP_SRC:%.*]]) {
11-
; CUSTOM-NEXT: [[DEP_DEST:%.*]] = alloca [7 x i8], align 1
12-
; CUSTOM-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(7) [[DEP_DEST]], ptr noundef nonnull align 1 dereferenceable(7) [[DEP_SRC]], i64 7, i1 false)
1311
; CUSTOM-NEXT: store i8 1, ptr [[DEP_SRC]], align 1
1412
; CUSTOM-NEXT: [[DEP_SRC_END:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 6
1513
; CUSTOM-NEXT: store i8 1, ptr [[DEP_SRC_END]], align 1
16-
; CUSTOM-NEXT: [[SRC:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
17-
; CUSTOM-NEXT: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 1
18-
; CUSTOM-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(5) [[DEST]], ptr noundef nonnull align 1 dereferenceable(5) [[SRC]], i64 5, i1 false)
1914
; CUSTOM-NEXT: ret void
2015
;
2116
; O2-LABEL: define void @forward_offset_and_store(
22-
; O2-SAME: ptr nocapture [[DEP_SRC:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
23-
; O2-NEXT: [[DEP_DEST:%.*]] = alloca [7 x i8], align 1
24-
; O2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(7) [[DEP_DEST]], ptr noundef nonnull align 1 dereferenceable(7) [[DEP_SRC]], i64 7, i1 false)
17+
; O2-SAME: ptr nocapture writeonly [[DEP_SRC:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
2518
; O2-NEXT: store i8 1, ptr [[DEP_SRC]], align 1
2619
; O2-NEXT: [[DEP_SRC_END:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 6
2720
; O2-NEXT: store i8 1, ptr [[DEP_SRC_END]], align 1
28-
; O2-NEXT: [[SRC:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
29-
; O2-NEXT: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 1
30-
; O2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(5) [[DEST]], ptr noundef nonnull align 1 dereferenceable(5) [[SRC]], i64 5, i1 false)
3121
; O2-NEXT: ret void
3222
;
3323
%dep_dest = alloca %buf, align 1

0 commit comments

Comments
 (0)