Skip to content

Commit 3552180

Browse files
committed
optimize @memset with undefined
When using `@memset` to set bytes to `undefined`, Zig notices this case and does a single Valgrind client request rather than N. Speeds up all allocators in safe modes. Closes #2388
1 parent f8117a0 commit 3552180

File tree

2 files changed

+33
-24
lines changed

2 files changed

+33
-24
lines changed

src/codegen.cpp

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,7 +1036,7 @@ static LLVMValueRef get_write_register_fn_val(CodeGen *g) {
10361036
// !0 = !{!"sp\00"}
10371037

10381038
LLVMTypeRef param_types[] = {
1039-
LLVMMetadataTypeInContext(LLVMGetGlobalContext()),
1039+
LLVMMetadataTypeInContext(LLVMGetGlobalContext()),
10401040
LLVMIntType(g->pointer_size_bytes * 8),
10411041
};
10421042

@@ -3491,6 +3491,15 @@ static bool want_valgrind_support(CodeGen *g) {
34913491
zig_unreachable();
34923492
}
34933493

3494+
static void gen_valgrind_undef(CodeGen *g, LLVMValueRef dest_ptr, LLVMValueRef byte_count) {
3495+
static const uint32_t VG_USERREQ__MAKE_MEM_UNDEFINED = 1296236545;
3496+
ZigType *usize = g->builtin_types.entry_usize;
3497+
LLVMValueRef zero = LLVMConstInt(usize->llvm_type, 0, false);
3498+
LLVMValueRef req = LLVMConstInt(usize->llvm_type, VG_USERREQ__MAKE_MEM_UNDEFINED, false);
3499+
LLVMValueRef ptr_as_usize = LLVMBuildPtrToInt(g->builder, dest_ptr, usize->llvm_type, "");
3500+
gen_valgrind_client_request(g, zero, req, ptr_as_usize, byte_count, zero, zero, zero);
3501+
}
3502+
34943503
static void gen_undef_init(CodeGen *g, uint32_t ptr_align_bytes, ZigType *value_type, LLVMValueRef ptr) {
34953504
assert(type_has_bits(value_type));
34963505
uint64_t size_bytes = LLVMStoreSizeOfType(g->target_data_ref, get_llvm_type(g, value_type));
@@ -3505,11 +3514,7 @@ static void gen_undef_init(CodeGen *g, uint32_t ptr_align_bytes, ZigType *value_
35053514
ZigLLVMBuildMemSet(g->builder, dest_ptr, fill_char, byte_count, ptr_align_bytes, false);
35063515
// then tell valgrind that the memory is undefined even though we just memset it
35073516
if (want_valgrind_support(g)) {
3508-
static const uint32_t VG_USERREQ__MAKE_MEM_UNDEFINED = 1296236545;
3509-
LLVMValueRef zero = LLVMConstInt(usize->llvm_type, 0, false);
3510-
LLVMValueRef req = LLVMConstInt(usize->llvm_type, VG_USERREQ__MAKE_MEM_UNDEFINED, false);
3511-
LLVMValueRef ptr_as_usize = LLVMBuildPtrToInt(g->builder, dest_ptr, usize->llvm_type, "");
3512-
gen_valgrind_client_request(g, zero, req, ptr_as_usize, byte_count, zero, zero, zero);
3517+
gen_valgrind_undef(g, dest_ptr, byte_count);
35133518
}
35143519
}
35153520

@@ -3519,14 +3524,14 @@ static LLVMValueRef ir_render_store_ptr(CodeGen *g, IrExecutable *executable, Ir
35193524
if (!type_has_bits(ptr_type))
35203525
return nullptr;
35213526

3522-
bool have_init_expr = !value_is_all_undef(&instruction->value->value);
3527+
bool have_init_expr = !value_is_all_undef(&instruction->value->value);
35233528
if (have_init_expr) {
35243529
LLVMValueRef ptr = ir_llvm_value(g, instruction->ptr);
35253530
LLVMValueRef value = ir_llvm_value(g, instruction->value);
35263531
gen_assign_raw(g, ptr, ptr_type, value);
35273532
} else if (ir_want_runtime_safety(g, &instruction->base)) {
35283533
gen_undef_init(g, get_ptr_align(g, ptr_type), instruction->value->value.type,
3529-
ir_llvm_value(g, instruction->ptr));
3534+
ir_llvm_value(g, instruction->ptr));
35303535
}
35313536
return nullptr;
35323537
}
@@ -3729,7 +3734,7 @@ static LLVMValueRef ir_render_call(CodeGen *g, IrExecutable *executable, IrInstr
37293734
}
37303735
FnWalk fn_walk = {};
37313736
fn_walk.id = FnWalkIdCall;
3732-
fn_walk.data.call.inst = instruction;
3737+
fn_walk.data.call.inst = instruction;
37333738
fn_walk.data.call.is_var_args = is_var_args;
37343739
fn_walk.data.call.gen_param_values = &gen_param_values;
37353740
walk_function_params(g, fn_type, &fn_walk);
@@ -3749,7 +3754,7 @@ static LLVMValueRef ir_render_call(CodeGen *g, IrExecutable *executable, IrInstr
37493754

37503755
LLVMCallConv llvm_cc = get_llvm_cc(g, cc);
37513756
LLVMValueRef result;
3752-
3757+
37533758
if (instruction->new_stack == nullptr) {
37543759
result = ZigLLVMBuildCall(g->builder, fn_val,
37553760
gen_param_values.items, (unsigned)gen_param_values.length, llvm_cc, fn_inline, "");
@@ -4229,7 +4234,7 @@ static LLVMValueRef get_enum_tag_name_function(CodeGen *g, ZigType *enum_type) {
42294234
LLVMTypeRef tag_int_llvm_type = get_llvm_type(g, tag_int_type);
42304235
LLVMTypeRef fn_type_ref = LLVMFunctionType(LLVMPointerType(get_llvm_type(g, u8_slice_type), 0),
42314236
&tag_int_llvm_type, 1, false);
4232-
4237+
42334238
Buf *fn_name = get_mangled_name(g, buf_sprintf("__zig_tag_name_%s", buf_ptr(&enum_type->name)), false);
42344239
LLVMValueRef fn_val = LLVMAddFunction(g->module, buf_ptr(fn_name), fn_type_ref);
42354240
LLVMSetLinkage(fn_val, LLVMInternalLinkage);
@@ -4529,17 +4534,27 @@ static LLVMValueRef ir_render_truncate(CodeGen *g, IrExecutable *executable, IrI
45294534

45304535
static LLVMValueRef ir_render_memset(CodeGen *g, IrExecutable *executable, IrInstructionMemset *instruction) {
45314536
LLVMValueRef dest_ptr = ir_llvm_value(g, instruction->dest_ptr);
4532-
LLVMValueRef char_val = ir_llvm_value(g, instruction->byte);
45334537
LLVMValueRef len_val = ir_llvm_value(g, instruction->count);
45344538

45354539
LLVMTypeRef ptr_u8 = LLVMPointerType(LLVMInt8Type(), 0);
4536-
45374540
LLVMValueRef dest_ptr_casted = LLVMBuildBitCast(g->builder, dest_ptr, ptr_u8, "");
45384541

45394542
ZigType *ptr_type = instruction->dest_ptr->value.type;
45404543
assert(ptr_type->id == ZigTypeIdPointer);
45414544

4542-
ZigLLVMBuildMemSet(g->builder, dest_ptr_casted, char_val, len_val, get_ptr_align(g, ptr_type), ptr_type->data.pointer.is_volatile);
4545+
bool val_is_undef = value_is_all_undef(&instruction->byte->value);
4546+
LLVMValueRef fill_char;
4547+
if (val_is_undef) {
4548+
fill_char = LLVMConstInt(LLVMInt8Type(), 0xaa, false);
4549+
} else {
4550+
fill_char = ir_llvm_value(g, instruction->byte);
4551+
}
4552+
ZigLLVMBuildMemSet(g->builder, dest_ptr_casted, fill_char, len_val, get_ptr_align(g, ptr_type),
4553+
ptr_type->data.pointer.is_volatile);
4554+
4555+
if (val_is_undef && want_valgrind_support(g)) {
4556+
gen_valgrind_undef(g, dest_ptr_casted, len_val);
4557+
}
45434558
return nullptr;
45444559
}
45454560

@@ -6944,7 +6959,7 @@ static void do_code_gen(CodeGen *g) {
69446959
ir_render(g, fn_table_entry);
69456960

69466961
}
6947-
6962+
69486963
assert(!g->errors.length);
69496964

69506965
if (buf_len(&g->global_asm) != 0) {
@@ -7752,7 +7767,7 @@ Buf *codegen_generate_builtin_source(CodeGen *g) {
77527767
assert(ContainerLayoutAuto == 0);
77537768
assert(ContainerLayoutExtern == 1);
77547769
assert(ContainerLayoutPacked == 2);
7755-
7770+
77567771
assert(CallingConventionUnspecified == 0);
77577772
assert(CallingConventionC == 1);
77587773
assert(CallingConventionCold == 2);

std/mem.zig

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -104,10 +104,7 @@ pub const Allocator = struct {
104104
const byte_count = math.mul(usize, @sizeOf(T), n) catch return Error.OutOfMemory;
105105
const byte_slice = try self.reallocFn(self, ([*]u8)(undefined)[0..0], undefined, byte_count, alignment);
106106
assert(byte_slice.len == byte_count);
107-
// This loop gets optimized out in ReleaseFast mode
108-
for (byte_slice) |*byte| {
109-
byte.* = undefined;
110-
}
107+
@memset(byte_slice.ptr, undefined, byte_slice.len);
111108
return @bytesToSlice(T, @alignCast(alignment, byte_slice));
112109
}
113110

@@ -153,10 +150,7 @@ pub const Allocator = struct {
153150
const byte_slice = try self.reallocFn(self, old_byte_slice, Slice.alignment, byte_count, new_alignment);
154151
assert(byte_slice.len == byte_count);
155152
if (new_n > old_mem.len) {
156-
// This loop gets optimized out in ReleaseFast mode
157-
for (byte_slice[old_byte_slice.len..]) |*byte| {
158-
byte.* = undefined;
159-
}
153+
@memset(byte_slice.ptr + old_byte_slice.len, undefined, byte_slice.len - old_byte_slice.len);
160154
}
161155
return @bytesToSlice(T, @alignCast(new_alignment, byte_slice));
162156
}

0 commit comments

Comments
 (0)