14
14
#include " llvm/Transforms/Scalar/MemCpyOptimizer.h"
15
15
#include " llvm/ADT/DenseSet.h"
16
16
#include " llvm/ADT/STLExtras.h"
17
+ #include " llvm/ADT/ScopeExit.h"
17
18
#include " llvm/ADT/SmallVector.h"
18
19
#include " llvm/ADT/Statistic.h"
19
20
#include " llvm/ADT/iterator_range.h"
@@ -1121,28 +1122,67 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
1121
1122
bool MemCpyOptPass::processMemCpyMemCpyDependence (MemCpyInst *M,
1122
1123
MemCpyInst *MDep,
1123
1124
BatchAAResults &BAA) {
1124
- // We can only transforms memcpy's where the dest of one is the source of the
1125
- // other.
1126
- if (M->getSource () != MDep->getDest () || MDep->isVolatile ())
1127
- return false ;
1128
-
1129
1125
// If dep instruction is reading from our current input, then it is a noop
1130
- // transfer and substituting the input won't change this instruction. Just
1131
- // ignore the input and let someone else zap MDep. This handles cases like:
1126
+ // transfer and substituting the input won't change this instruction. Just
1127
+ // ignore the input and let someone else zap MDep. This handles cases like:
1132
1128
// memcpy(a <- a)
1133
1129
// memcpy(b <- a)
1134
1130
if (M->getSource () == MDep->getSource ())
1135
1131
return false ;
1136
1132
1137
- // Second, the length of the memcpy's must be the same, or the preceding one
1133
+ // We can only optimize non-volatile memcpy's.
1134
+ if (MDep->isVolatile ())
1135
+ return false ;
1136
+
1137
+ int64_t MForwardOffset = 0 ;
1138
+ const DataLayout &DL = M->getModule ()->getDataLayout ();
1139
+ // We can only transforms memcpy's where the dest of one is the source of the
1140
+ // other, or they have an offset in a range.
1141
+ if (M->getSource () != MDep->getDest ()) {
1142
+ std::optional<int64_t > Offset =
1143
+ M->getSource ()->getPointerOffsetFrom (MDep->getDest (), DL);
1144
+ if (!Offset || *Offset < 0 )
1145
+ return false ;
1146
+ MForwardOffset = *Offset;
1147
+ }
1148
+
1149
+ // The length of the memcpy's must be the same, or the preceding one
1138
1150
// must be larger than the following one.
1139
- if (MDep->getLength () != M->getLength ()) {
1151
+ if (MForwardOffset != 0 || ( MDep->getLength () != M->getLength () )) {
1140
1152
auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength ());
1141
1153
auto *MLen = dyn_cast<ConstantInt>(M->getLength ());
1142
- if (!MDepLen || !MLen || MDepLen->getZExtValue () < MLen->getZExtValue ())
1154
+ if (!MDepLen || !MLen ||
1155
+ MDepLen->getZExtValue () < MLen->getZExtValue () + MForwardOffset)
1143
1156
return false ;
1144
1157
}
1145
1158
1159
+ IRBuilder<> Builder (M);
1160
+ auto *CopySource = MDep->getRawSource ();
1161
+ auto CleanupOnFailure = llvm::make_scope_exit ([&CopySource] {
1162
+ if (CopySource->use_empty ())
1163
+ cast<Instruction>(CopySource)->eraseFromParent ();
1164
+ });
1165
+ MaybeAlign CopySourceAlign = MDep->getSourceAlign ();
1166
+ // We just need to calculate the actual size of the copy.
1167
+ auto MCopyLoc = MemoryLocation::getForSource (MDep).getWithNewSize (
1168
+ MemoryLocation::getForSource (M).Size );
1169
+
1170
+ // We need to update `MCopyLoc` if an offset exists.
1171
+ if (MForwardOffset > 0 ) {
1172
+ // The copy destination of `M` maybe can serve as the source of copying.
1173
+ std::optional<int64_t > MDestOffset =
1174
+ M->getRawDest ()->getPointerOffsetFrom (MDep->getRawSource (), DL);
1175
+ if (MDestOffset && *MDestOffset == MForwardOffset)
1176
+ CopySource = M->getRawDest ();
1177
+ else
1178
+ CopySource = Builder.CreateInBoundsPtrAdd (
1179
+ CopySource, ConstantInt::get (Type::getInt64Ty (Builder.getContext ()),
1180
+ MForwardOffset));
1181
+ MCopyLoc = MCopyLoc.getWithNewPtr (CopySource);
1182
+ if (CopySourceAlign)
1183
+ CopySourceAlign = commonAlignment (*CopySourceAlign, MForwardOffset);
1184
+ }
1185
+
1146
1186
// Verify that the copied-from memory doesn't change in between the two
1147
1187
// transfers. For example, in:
1148
1188
// memcpy(a <- b)
@@ -1152,10 +1192,8 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1152
1192
//
1153
1193
// TODO: If the code between M and MDep is transparent to the destination "c",
1154
1194
// then we could still perform the xform by moving M up to the first memcpy.
1155
- // TODO: It would be sufficient to check the MDep source up to the memcpy
1156
- // size of M, rather than MDep.
1157
- if (writtenBetween (MSSA, BAA, MemoryLocation::getForSource (MDep),
1158
- MSSA->getMemoryAccess (MDep), MSSA->getMemoryAccess (M)))
1195
+ if (writtenBetween (MSSA, BAA, MCopyLoc, MSSA->getMemoryAccess (MDep),
1196
+ MSSA->getMemoryAccess (M)))
1159
1197
return false ;
1160
1198
1161
1199
// If the dest of the second might alias the source of the first, then the
@@ -1179,23 +1217,22 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1179
1217
1180
1218
// TODO: Is this worth it if we're creating a less aligned memcpy? For
1181
1219
// example we could be moving from movaps -> movq on x86.
1182
- IRBuilder<> Builder (M);
1183
1220
Instruction *NewM;
1184
1221
if (UseMemMove)
1185
- NewM = Builder. CreateMemMove (M-> getRawDest (), M-> getDestAlign (),
1186
- MDep-> getRawSource (), MDep-> getSourceAlign () ,
1187
- M->getLength (), M->isVolatile ());
1222
+ NewM =
1223
+ Builder. CreateMemMove (M-> getRawDest (), M-> getDestAlign (), CopySource ,
1224
+ CopySourceAlign, M->getLength (), M->isVolatile ());
1188
1225
else if (isa<MemCpyInlineInst>(M)) {
1189
1226
// llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
1190
1227
// never allowed since that would allow the latter to be lowered as a call
1191
1228
// to an external function.
1192
- NewM = Builder.CreateMemCpyInline (
1193
- M-> getRawDest (), M-> getDestAlign (), MDep-> getRawSource () ,
1194
- MDep-> getSourceAlign (), M->getLength (), M->isVolatile ());
1229
+ NewM = Builder.CreateMemCpyInline (M-> getRawDest (), M-> getDestAlign (),
1230
+ CopySource, CopySourceAlign ,
1231
+ M->getLength (), M->isVolatile ());
1195
1232
} else
1196
- NewM = Builder. CreateMemCpy (M-> getRawDest (), M-> getDestAlign (),
1197
- MDep-> getRawSource (), MDep-> getSourceAlign () ,
1198
- M->getLength (), M->isVolatile ());
1233
+ NewM =
1234
+ Builder. CreateMemCpy (M-> getRawDest (), M-> getDestAlign (), CopySource ,
1235
+ CopySourceAlign, M->getLength (), M->isVolatile ());
1199
1236
NewM->copyMetadata (*M, LLVMContext::MD_DIAssignID);
1200
1237
1201
1238
assert (isa<MemoryDef>(MSSAU->getMemorySSA ()->getMemoryAccess (M)));
0 commit comments