Skip to content

Commit 3b152f0

Browse files
committed
LLVM: elide some loads when lowering
Generally, the load instruction may need to make a copy of an isByRef=true value, such as in the case of the following code: ```zig pub fn swap(comptime T: type, a: *T, b: *T) void { const tmp = a.*; a.* = b.*; b.* = tmp; } ``` However, it only needs to do so if there are any instructions which can possibly write to memory. When calling functions with isByRef=true parameters, the AIR code that is generated looks like loads followed directly by call. This allows for a peephole optimization when lowering loads: if the load instruction operates on an isByRef=true type and dies before any side effects occur, then we can safely lower the load as a no-op that returns its operand. This is one out of three changes I intend to make to address #11498. However I will put these changes in separate branches and merge them separately so that we can have three independent points on the perf charts.
1 parent c3ef4ac commit 3b152f0

File tree

2 files changed

+407
-5
lines changed

2 files changed

+407
-5
lines changed

src/Liveness.zig

Lines changed: 380 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,386 @@ pub fn clearOperandDeath(l: Liveness, inst: Air.Inst.Index, operand: OperandInt)
112112
l.tomb_bits[usize_index] &= ~mask;
113113
}
114114

115+
fn matchOperandSmallIndex(l: Liveness, inst: Air.Inst.Index, operand: OperandInt, default: MatchedOperand) MatchedOperand {
116+
if (operandDies(l, inst, operand)) {
117+
return .tomb;
118+
} else {
119+
return default;
120+
}
121+
}
122+
123+
const MatchedOperand = enum { none, write, tomb, noret, complex };
124+
125+
/// Given an instruction that we are examining, returns true if and only if it has
126+
/// `operand` as one of the operands, and also the operand dies here.
127+
pub fn matchOperand(
128+
l: Liveness,
129+
air: Air,
130+
inst: Air.Inst.Index,
131+
operand: Air.Inst.Index,
132+
) MatchedOperand {
133+
const air_tags = air.instructions.items(.tag);
134+
const air_datas = air.instructions.items(.data);
135+
const operand_ref = Air.indexToRef(operand);
136+
switch (air_tags[inst]) {
137+
.add,
138+
.addwrap,
139+
.add_sat,
140+
.sub,
141+
.subwrap,
142+
.sub_sat,
143+
.mul,
144+
.mulwrap,
145+
.mul_sat,
146+
.div_float,
147+
.div_trunc,
148+
.div_floor,
149+
.div_exact,
150+
.rem,
151+
.mod,
152+
.bit_and,
153+
.bit_or,
154+
.xor,
155+
.cmp_lt,
156+
.cmp_lte,
157+
.cmp_eq,
158+
.cmp_gte,
159+
.cmp_gt,
160+
.cmp_neq,
161+
.bool_and,
162+
.bool_or,
163+
.array_elem_val,
164+
.slice_elem_val,
165+
.ptr_elem_val,
166+
.shl,
167+
.shl_exact,
168+
.shl_sat,
169+
.shr,
170+
.shr_exact,
171+
.min,
172+
.max,
173+
=> {
174+
const o = air_datas[inst].bin_op;
175+
if (o.lhs == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
176+
if (o.rhs == operand_ref) return matchOperandSmallIndex(l, inst, 1, .none);
177+
return .none;
178+
},
179+
180+
.store,
181+
.atomic_store_unordered,
182+
.atomic_store_monotonic,
183+
.atomic_store_release,
184+
.atomic_store_seq_cst,
185+
.set_union_tag,
186+
=> {
187+
const o = air_datas[inst].bin_op;
188+
if (o.lhs == operand_ref) return matchOperandSmallIndex(l, inst, 0, .write);
189+
if (o.rhs == operand_ref) return matchOperandSmallIndex(l, inst, 1, .write);
190+
return .write;
191+
},
192+
193+
.arg,
194+
.alloc,
195+
.ret_ptr,
196+
.constant,
197+
.const_ty,
198+
.breakpoint,
199+
.dbg_stmt,
200+
.dbg_inline_begin,
201+
.dbg_inline_end,
202+
.dbg_block_begin,
203+
.dbg_block_end,
204+
.unreach,
205+
.ret_addr,
206+
.frame_addr,
207+
.wasm_memory_size,
208+
.err_return_trace,
209+
=> return .none,
210+
211+
.fence => return .write,
212+
213+
.not,
214+
.bitcast,
215+
.load,
216+
.fpext,
217+
.fptrunc,
218+
.intcast,
219+
.trunc,
220+
.optional_payload,
221+
.optional_payload_ptr,
222+
.wrap_optional,
223+
.unwrap_errunion_payload,
224+
.unwrap_errunion_err,
225+
.unwrap_errunion_payload_ptr,
226+
.unwrap_errunion_err_ptr,
227+
.wrap_errunion_payload,
228+
.wrap_errunion_err,
229+
.slice_ptr,
230+
.slice_len,
231+
.ptr_slice_len_ptr,
232+
.ptr_slice_ptr_ptr,
233+
.struct_field_ptr_index_0,
234+
.struct_field_ptr_index_1,
235+
.struct_field_ptr_index_2,
236+
.struct_field_ptr_index_3,
237+
.array_to_slice,
238+
.float_to_int,
239+
.int_to_float,
240+
.get_union_tag,
241+
.clz,
242+
.ctz,
243+
.popcount,
244+
.byte_swap,
245+
.bit_reverse,
246+
.splat,
247+
=> {
248+
const o = air_datas[inst].ty_op;
249+
if (o.operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
250+
return .none;
251+
},
252+
253+
.optional_payload_ptr_set,
254+
.errunion_payload_ptr_set,
255+
=> {
256+
const o = air_datas[inst].ty_op;
257+
if (o.operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .write);
258+
return .write;
259+
},
260+
261+
.is_null,
262+
.is_non_null,
263+
.is_null_ptr,
264+
.is_non_null_ptr,
265+
.is_err,
266+
.is_non_err,
267+
.is_err_ptr,
268+
.is_non_err_ptr,
269+
.ptrtoint,
270+
.bool_to_int,
271+
.tag_name,
272+
.error_name,
273+
.sqrt,
274+
.sin,
275+
.cos,
276+
.tan,
277+
.exp,
278+
.exp2,
279+
.log,
280+
.log2,
281+
.log10,
282+
.fabs,
283+
.floor,
284+
.ceil,
285+
.round,
286+
.trunc_float,
287+
.cmp_lt_errors_len,
288+
=> {
289+
const o = air_datas[inst].un_op;
290+
if (o == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
291+
return .none;
292+
},
293+
294+
.ret,
295+
.ret_load,
296+
=> {
297+
const o = air_datas[inst].un_op;
298+
if (o == operand_ref) return matchOperandSmallIndex(l, inst, 0, .noret);
299+
return .noret;
300+
},
301+
302+
.set_err_return_trace => {
303+
const o = air_datas[inst].un_op;
304+
if (o == operand_ref) return matchOperandSmallIndex(l, inst, 0, .write);
305+
return .write;
306+
},
307+
308+
.add_with_overflow,
309+
.sub_with_overflow,
310+
.mul_with_overflow,
311+
.shl_with_overflow,
312+
.ptr_add,
313+
.ptr_sub,
314+
.ptr_elem_ptr,
315+
.slice_elem_ptr,
316+
.slice,
317+
=> {
318+
const ty_pl = air_datas[inst].ty_pl;
319+
const extra = air.extraData(Air.Bin, ty_pl.payload).data;
320+
if (extra.lhs == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
321+
if (extra.rhs == operand_ref) return matchOperandSmallIndex(l, inst, 1, .none);
322+
return .none;
323+
},
324+
325+
.dbg_var_ptr,
326+
.dbg_var_val,
327+
=> {
328+
const o = air_datas[inst].pl_op.operand;
329+
if (o == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
330+
return .none;
331+
},
332+
333+
.prefetch => {
334+
const prefetch = air_datas[inst].prefetch;
335+
if (prefetch.ptr == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
336+
return .none;
337+
},
338+
339+
.call, .call_always_tail, .call_never_tail, .call_never_inline => {
340+
const inst_data = air_datas[inst].pl_op;
341+
const callee = inst_data.operand;
342+
const extra = air.extraData(Air.Call, inst_data.payload);
343+
const args = @ptrCast([]const Air.Inst.Ref, air.extra[extra.end..][0..extra.data.args_len]);
344+
if (args.len + 1 <= bpi - 1) {
345+
if (callee == operand_ref) return matchOperandSmallIndex(l, inst, 0, .write);
346+
for (args) |arg, i| {
347+
if (arg == operand_ref) return matchOperandSmallIndex(l, inst, @intCast(OperandInt, i + 1), .write);
348+
}
349+
return .write;
350+
}
351+
var bt = l.iterateBigTomb(inst);
352+
if (bt.feed()) {
353+
if (callee == operand_ref) return .tomb;
354+
} else {
355+
if (callee == operand_ref) return .write;
356+
}
357+
for (args) |arg| {
358+
if (bt.feed()) {
359+
if (arg == operand_ref) return .tomb;
360+
} else {
361+
if (arg == operand_ref) return .write;
362+
}
363+
}
364+
return .write;
365+
},
366+
.select => {
367+
const pl_op = air_datas[inst].pl_op;
368+
const extra = air.extraData(Air.Bin, pl_op.payload).data;
369+
if (pl_op.operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
370+
if (extra.lhs == operand_ref) return matchOperandSmallIndex(l, inst, 1, .none);
371+
if (extra.rhs == operand_ref) return matchOperandSmallIndex(l, inst, 2, .none);
372+
return .none;
373+
},
374+
.shuffle => {
375+
const extra = air.extraData(Air.Shuffle, air_datas[inst].ty_pl.payload).data;
376+
if (extra.a == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
377+
if (extra.b == operand_ref) return matchOperandSmallIndex(l, inst, 1, .none);
378+
return .none;
379+
},
380+
.reduce => {
381+
const reduce = air_datas[inst].reduce;
382+
if (reduce.operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
383+
return .none;
384+
},
385+
.cmp_vector => {
386+
const extra = air.extraData(Air.VectorCmp, air_datas[inst].ty_pl.payload).data;
387+
if (extra.lhs == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
388+
if (extra.rhs == operand_ref) return matchOperandSmallIndex(l, inst, 1, .none);
389+
return .none;
390+
},
391+
.aggregate_init => {
392+
const ty_pl = air_datas[inst].ty_pl;
393+
const aggregate_ty = air.getRefType(ty_pl.ty);
394+
const len = @intCast(usize, aggregate_ty.arrayLen());
395+
const elements = @ptrCast([]const Air.Inst.Ref, air.extra[ty_pl.payload..][0..len]);
396+
397+
if (elements.len <= bpi - 1) {
398+
for (elements) |elem, i| {
399+
if (elem == operand_ref) return matchOperandSmallIndex(l, inst, @intCast(OperandInt, i), .none);
400+
}
401+
return .none;
402+
}
403+
404+
var bt = l.iterateBigTomb(inst);
405+
for (elements) |elem| {
406+
if (bt.feed()) {
407+
if (elem == operand_ref) return .tomb;
408+
} else {
409+
if (elem == operand_ref) return .write;
410+
}
411+
}
412+
return .write;
413+
},
414+
.union_init => {
415+
const extra = air.extraData(Air.UnionInit, air_datas[inst].ty_pl.payload).data;
416+
if (extra.init == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
417+
return .none;
418+
},
419+
.struct_field_ptr, .struct_field_val => {
420+
const extra = air.extraData(Air.StructField, air_datas[inst].ty_pl.payload).data;
421+
if (extra.struct_operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
422+
return .none;
423+
},
424+
.field_parent_ptr => {
425+
const extra = air.extraData(Air.FieldParentPtr, air_datas[inst].ty_pl.payload).data;
426+
if (extra.field_ptr == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
427+
return .none;
428+
},
429+
.cmpxchg_strong, .cmpxchg_weak => {
430+
const extra = air.extraData(Air.Cmpxchg, air_datas[inst].ty_pl.payload).data;
431+
if (extra.ptr == operand_ref) return matchOperandSmallIndex(l, inst, 0, .write);
432+
if (extra.expected_value == operand_ref) return matchOperandSmallIndex(l, inst, 1, .write);
433+
if (extra.new_value == operand_ref) return matchOperandSmallIndex(l, inst, 2, .write);
434+
return .write;
435+
},
436+
.mul_add => {
437+
const pl_op = air_datas[inst].pl_op;
438+
const extra = air.extraData(Air.Bin, pl_op.payload).data;
439+
if (extra.lhs == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
440+
if (extra.rhs == operand_ref) return matchOperandSmallIndex(l, inst, 1, .none);
441+
if (pl_op.operand == operand_ref) return matchOperandSmallIndex(l, inst, 2, .none);
442+
return .none;
443+
},
444+
.atomic_load => {
445+
const ptr = air_datas[inst].atomic_load.ptr;
446+
if (ptr == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
447+
return .none;
448+
},
449+
.atomic_rmw => {
450+
const pl_op = air_datas[inst].pl_op;
451+
const extra = air.extraData(Air.AtomicRmw, pl_op.payload).data;
452+
if (pl_op.operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .write);
453+
if (extra.operand == operand_ref) return matchOperandSmallIndex(l, inst, 1, .write);
454+
return .write;
455+
},
456+
.memset,
457+
.memcpy,
458+
=> {
459+
const pl_op = air_datas[inst].pl_op;
460+
const extra = air.extraData(Air.Bin, pl_op.payload).data;
461+
if (pl_op.operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .write);
462+
if (extra.lhs == operand_ref) return matchOperandSmallIndex(l, inst, 1, .write);
463+
if (extra.rhs == operand_ref) return matchOperandSmallIndex(l, inst, 2, .write);
464+
return .write;
465+
},
466+
467+
.br => {
468+
const br = air_datas[inst].br;
469+
if (br.operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .noret);
470+
return .noret;
471+
},
472+
.assembly => {
473+
return .complex;
474+
},
475+
.block => {
476+
return .complex;
477+
},
478+
.loop => {
479+
return .complex;
480+
},
481+
.cond_br => {
482+
return .complex;
483+
},
484+
.switch_br => {
485+
return .complex;
486+
},
487+
.wasm_memory_grow => {
488+
const pl_op = air_datas[inst].pl_op;
489+
if (pl_op.operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
490+
return .none;
491+
},
492+
}
493+
}
494+
115495
/// Higher level API.
116496
pub const CondBrSlices = struct {
117497
then_deaths: []const Air.Inst.Index,

0 commit comments

Comments
 (0)