14
14
#include " llvm/Transforms/Scalar/MemCpyOptimizer.h"
15
15
#include " llvm/ADT/DenseSet.h"
16
16
#include " llvm/ADT/STLExtras.h"
17
+ #include " llvm/ADT/ScopeExit.h"
17
18
#include " llvm/ADT/SmallVector.h"
18
19
#include " llvm/ADT/Statistic.h"
19
20
#include " llvm/ADT/iterator_range.h"
@@ -1124,28 +1125,67 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
1124
1125
bool MemCpyOptPass::processMemCpyMemCpyDependence (MemCpyInst *M,
1125
1126
MemCpyInst *MDep,
1126
1127
BatchAAResults &BAA) {
1127
- // We can only transforms memcpy's where the dest of one is the source of the
1128
- // other.
1129
- if (M->getSource () != MDep->getDest () || MDep->isVolatile ())
1130
- return false ;
1131
-
1132
1128
// If dep instruction is reading from our current input, then it is a noop
1133
- // transfer and substituting the input won't change this instruction. Just
1134
- // ignore the input and let someone else zap MDep. This handles cases like:
1129
+ // transfer and substituting the input won't change this instruction. Just
1130
+ // ignore the input and let someone else zap MDep. This handles cases like:
1135
1131
// memcpy(a <- a)
1136
1132
// memcpy(b <- a)
1137
1133
if (M->getSource () == MDep->getSource ())
1138
1134
return false ;
1139
1135
1140
- // Second, the length of the memcpy's must be the same, or the preceding one
1136
+ // We can only optimize non-volatile memcpy's.
1137
+ if (MDep->isVolatile ())
1138
+ return false ;
1139
+
1140
+ int64_t MForwardOffset = 0 ;
1141
+ const DataLayout &DL = M->getModule ()->getDataLayout ();
1142
+ // We can only transforms memcpy's where the dest of one is the source of the
1143
+ // other, or they have an offset in a range.
1144
+ if (M->getSource () != MDep->getDest ()) {
1145
+ std::optional<int64_t > Offset =
1146
+ M->getSource ()->getPointerOffsetFrom (MDep->getDest (), DL);
1147
+ if (!Offset || *Offset < 0 )
1148
+ return false ;
1149
+ MForwardOffset = *Offset;
1150
+ }
1151
+
1152
+ // The length of the memcpy's must be the same, or the preceding one
1141
1153
// must be larger than the following one.
1142
- if (MDep->getLength () != M->getLength ()) {
1154
+ if (MForwardOffset != 0 || ( MDep->getLength () != M->getLength () )) {
1143
1155
auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength ());
1144
1156
auto *MLen = dyn_cast<ConstantInt>(M->getLength ());
1145
- if (!MDepLen || !MLen || MDepLen->getZExtValue () < MLen->getZExtValue ())
1157
+ if (!MDepLen || !MLen ||
1158
+ MDepLen->getZExtValue () < MLen->getZExtValue () + MForwardOffset)
1146
1159
return false ;
1147
1160
}
1148
1161
1162
+ IRBuilder<> Builder (M);
1163
+ auto *CopySource = MDep->getRawSource ();
1164
+ auto CleanupOnFailure = llvm::make_scope_exit ([&CopySource] {
1165
+ if (CopySource->use_empty ())
1166
+ cast<Instruction>(CopySource)->eraseFromParent ();
1167
+ });
1168
+ MaybeAlign CopySourceAlign = MDep->getSourceAlign ();
1169
+ // We just need to calculate the actual size of the copy.
1170
+ auto MCopyLoc = MemoryLocation::getForSource (MDep).getWithNewSize (
1171
+ MemoryLocation::getForSource (M).Size );
1172
+
1173
+ // We need to update `MCopyLoc` if an offset exists.
1174
+ if (MForwardOffset > 0 ) {
1175
+ // The copy destination of `M` maybe can serve as the source of copying.
1176
+ std::optional<int64_t > MDestOffset =
1177
+ M->getRawDest ()->getPointerOffsetFrom (MDep->getRawSource (), DL);
1178
+ if (MDestOffset && *MDestOffset == MForwardOffset)
1179
+ CopySource = M->getRawDest ();
1180
+ else
1181
+ CopySource = Builder.CreateInBoundsPtrAdd (
1182
+ CopySource, ConstantInt::get (Type::getInt64Ty (Builder.getContext ()),
1183
+ MForwardOffset));
1184
+ MCopyLoc = MCopyLoc.getWithNewPtr (CopySource);
1185
+ if (CopySourceAlign)
1186
+ CopySourceAlign = commonAlignment (*CopySourceAlign, MForwardOffset);
1187
+ }
1188
+
1149
1189
// Verify that the copied-from memory doesn't change in between the two
1150
1190
// transfers. For example, in:
1151
1191
// memcpy(a <- b)
@@ -1155,10 +1195,8 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1155
1195
//
1156
1196
// TODO: If the code between M and MDep is transparent to the destination "c",
1157
1197
// then we could still perform the xform by moving M up to the first memcpy.
1158
- // TODO: It would be sufficient to check the MDep source up to the memcpy
1159
- // size of M, rather than MDep.
1160
- if (writtenBetween (MSSA, BAA, MemoryLocation::getForSource (MDep),
1161
- MSSA->getMemoryAccess (MDep), MSSA->getMemoryAccess (M)))
1198
+ if (writtenBetween (MSSA, BAA, MCopyLoc, MSSA->getMemoryAccess (MDep),
1199
+ MSSA->getMemoryAccess (M)))
1162
1200
return false ;
1163
1201
1164
1202
// If the dest of the second might alias the source of the first, then the
@@ -1183,23 +1221,22 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1183
1221
1184
1222
// TODO: Is this worth it if we're creating a less aligned memcpy? For
1185
1223
// example we could be moving from movaps -> movq on x86.
1186
- IRBuilder<> Builder (M);
1187
1224
Instruction *NewM;
1188
1225
if (UseMemMove)
1189
- NewM = Builder. CreateMemMove (M-> getRawDest (), M-> getDestAlign (),
1190
- MDep-> getRawSource (), MDep-> getSourceAlign () ,
1191
- M->getLength (), M->isVolatile ());
1226
+ NewM =
1227
+ Builder. CreateMemMove (M-> getRawDest (), M-> getDestAlign (), CopySource ,
1228
+ CopySourceAlign, M->getLength (), M->isVolatile ());
1192
1229
else if (isa<MemCpyInlineInst>(M)) {
1193
1230
// llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
1194
1231
// never allowed since that would allow the latter to be lowered as a call
1195
1232
// to an external function.
1196
- NewM = Builder.CreateMemCpyInline (
1197
- M-> getRawDest (), M-> getDestAlign (), MDep-> getRawSource () ,
1198
- MDep-> getSourceAlign (), M->getLength (), M->isVolatile ());
1233
+ NewM = Builder.CreateMemCpyInline (M-> getRawDest (), M-> getDestAlign (),
1234
+ CopySource, CopySourceAlign ,
1235
+ M->getLength (), M->isVolatile ());
1199
1236
} else
1200
- NewM = Builder. CreateMemCpy (M-> getRawDest (), M-> getDestAlign (),
1201
- MDep-> getRawSource (), MDep-> getSourceAlign () ,
1202
- M->getLength (), M->isVolatile ());
1237
+ NewM =
1238
+ Builder. CreateMemCpy (M-> getRawDest (), M-> getDestAlign (), CopySource ,
1239
+ CopySourceAlign, M->getLength (), M->isVolatile ());
1203
1240
NewM->copyMetadata (*M, LLVMContext::MD_DIAssignID);
1204
1241
1205
1242
assert (isa<MemoryDef>(MSSAU->getMemorySSA ()->getMemoryAccess (M)));
0 commit comments