Proposal: `@disableIntrinsics()` builtin function #22110

alexrp · 2024-11-30T10:33:12Z

@disableIntrinsics() forbids the compiler from recognizing code sequences and turning them into well-known library calls. The obvious examples of this would be memcpy(), memset(), etc. Additionally, it forbids the compiler from turning calls to such well-known library functions into different code (e.g. turning memcpy() into __aeabi_memcpy() on Arm).

Concretely, for LLVM, this would map to the no-builtins function attribute and the nobuiltin call site attribute, and whatever equivalent exists for other backends.

This builtin function would work similarly to @disableInstrumentation() in that a call to it affects the entire function body rather than the block scope. The existing -fno-builtin compiler option works as if @disableIntrinsics() is called in every function.

Consider one motivating use case from the standard library:

zig/lib/std/start.zig

Lines 450 to 488 in aa7d138

    
           fn posixCallMainAndExit(argc_argv_ptr: [*]usize) callconv(.c) noreturn { 
        
               // We're not ready to panic until thread local storage is initialized. 
        
               @setRuntimeSafety(false); 
        
               // Code coverage instrumentation might try to use thread local variables. 
        
               @disableInstrumentation(); 
        
               const argc = argc_argv_ptr[0]; 
        
               const argv = @as([*][*:0]u8, @ptrCast(argc_argv_ptr + 1)); 
        
               const envp_optional: [*:null]?[*:0]u8 = @ptrCast(@alignCast(argv + argc + 1)); 
        
               var envp_count: usize = 0; 
        
               while (envp_optional[envp_count]) |_| : (envp_count += 1) {} 
        
               const envp = @as([*][*:0]u8, @ptrCast(envp_optional))[0..envp_count]; 
        
               if (native_os == .linux) { 
        
                   // Find the beginning of the auxiliary vector 
        
                   const auxv: [*]elf.Auxv = @ptrCast(@alignCast(envp.ptr + envp_count + 1)); 
        
                   var at_hwcap: usize = 0; 
        
                   const phdrs = init: { 
        
                       var i: usize = 0; 
        
                       var at_phdr: usize = 0; 
        
                       var at_phnum: usize = 0; 
        
                       while (auxv[i].a_type != elf.AT_NULL) : (i += 1) { 
        
                           switch (auxv[i].a_type) { 
        
                               elf.AT_PHNUM => at_phnum = auxv[i].a_un.a_val, 
        
                               elf.AT_PHDR => at_phdr = auxv[i].a_un.a_val, 
        
                               elf.AT_HWCAP => at_hwcap = auxv[i].a_un.a_val, 
        
                               else => continue, 
        
                           } 
        
                       } 
        
                       break :init @as([*]elf.Phdr, @ptrFromInt(at_phdr))[0..at_phnum]; 
        
                   }; 
        
                   // Apply the initial relocations as early as possible in the startup process. We cannot 
        
                   // make calls yet on some architectures (e.g. MIPS) *because* they haven't been applied yet, 
        
                   // so this must be fully inlined. 
        
                   if (builtin.position_independent_executable) { 
        
                       @call(.always_inline, std.os.linux.pie.relocate, .{phdrs}); 
        
                   }

zig/lib/std/os/linux/pie.zig

Lines 202 to 304 in aa7d138

    
           pub fn relocate(phdrs: []elf.Phdr) void { 
        
               @setRuntimeSafety(false); 
        
               @disableInstrumentation(); 
        
               const dynv = getDynamicSymbol(); 
        
               // Recover the delta applied by the loader by comparing the effective and 
        
               // the theoretical load addresses for the `_DYNAMIC` symbol. 
        
               const base_addr = base: { 
        
                   for (phdrs) |*phdr| { 
        
                       if (phdr.p_type != elf.PT_DYNAMIC) continue; 
        
                       break :base @intFromPtr(dynv) - phdr.p_vaddr; 
        
                   } 
        
                   // This is not supposed to happen for well-formed binaries. 
        
                   @trap(); 
        
               }; 
        
               var sorted_dynv: [elf.DT_NUM]elf.Addr = undefined; 
        
               // Zero-initialized this way to prevent the compiler from turning this into 
        
               // `memcpy` or `memset` calls (which can require relocations). 
        
               for (&sorted_dynv) |*dyn| { 
        
                   const pdyn: *volatile elf.Addr = @ptrCast(dyn); 
        
                   pdyn.* = 0; 
        
               } 
        
               { 
        
                   // `dynv` has no defined order. Fix that. 
        
                   var i: usize = 0; 
        
                   while (dynv[i].d_tag != elf.DT_NULL) : (i += 1) { 
        
                       if (dynv[i].d_tag < elf.DT_NUM) sorted_dynv[@bitCast(dynv[i].d_tag)] = dynv[i].d_val; 
        
                   } 
        
               } 
        
               // Deal with the GOT relocations that MIPS uses first. 
        
               if (builtin.cpu.arch.isMIPS()) { 
        
                   const count: elf.Addr = blk: { 
        
                       // This is an architecture-specific tag, so not part of `sorted_dynv`. 
        
                       var i: usize = 0; 
        
                       while (dynv[i].d_tag != elf.DT_NULL) : (i += 1) { 
        
                           if (dynv[i].d_tag == elf.DT_MIPS_LOCAL_GOTNO) break :blk dynv[i].d_val; 
        
                       } 
        
                       break :blk 0; 
        
                   }; 
        
                   const got: [*]usize = @ptrFromInt(base_addr + sorted_dynv[elf.DT_PLTGOT]); 
        
                   for (0..count) |i| { 
        
                       got[i] += base_addr; 
        
                   } 
        
               } 
        
               // Apply normal relocations. 
        
               const rel = sorted_dynv[elf.DT_REL]; 
        
               if (rel != 0) { 
        
                   const rels = @call(.always_inline, std.mem.bytesAsSlice, .{ 
        
                       elf.Rel, 
        
                       @as([*]u8, @ptrFromInt(base_addr + rel))[0..sorted_dynv[elf.DT_RELSZ]], 
        
                   }); 
        
                   for (rels) |r| { 
        
                       if (r.r_type() != R_RELATIVE) continue; 
        
                       @as(*usize, @ptrFromInt(base_addr + r.r_offset)).* += base_addr; 
        
                   } 
        
               } 
        
               const rela = sorted_dynv[elf.DT_RELA]; 
        
               if (rela != 0) { 
        
                   const relas = @call(.always_inline, std.mem.bytesAsSlice, .{ 
        
                       elf.Rela, 
        
                       @as([*]u8, @ptrFromInt(base_addr + rela))[0..sorted_dynv[elf.DT_RELASZ]], 
        
                   }); 
        
                   for (relas) |r| { 
        
                       if (r.r_type() != R_RELATIVE) continue; 
        
                       @as(*usize, @ptrFromInt(base_addr + r.r_offset)).* = base_addr + @as(usize, @bitCast(r.r_addend)); 
        
                   } 
        
               } 
        
               const relr = sorted_dynv[elf.DT_RELR]; 
        
               if (relr != 0) { 
        
                   const relrs = @call(.always_inline, std.mem.bytesAsSlice, .{ 
        
                       elf.Relr, 
        
                       @as([*]u8, @ptrFromInt(base_addr + relr))[0..sorted_dynv[elf.DT_RELRSZ]], 
        
                   }); 
        
                   var current: [*]usize = undefined; 
        
                   for (relrs) |r| { 
        
                       if ((r & 1) == 0) { 
        
                           current = @ptrFromInt(base_addr + r); 
        
                           current[0] += base_addr; 
        
                           current += 1; 
        
                       } else { 
        
                           // Skip the first bit; there are 63 locations in the bitmap. 
        
                           var i: if (@sizeOf(usize) == 8) u6 else u5 = 1; 
        
                           while (i < @bitSizeOf(elf.Relr)) : (i += 1) { 
        
                               if (((r >> i) & 1) != 0) current[i] += base_addr; 
        
                           } 
        
                           current += @bitSizeOf(elf.Relr) - 1; 
        
                       } 
        
                   } 
        
               } 
        
           }

All of the code that runs prior to applying relocations in std.os.linux.pie.relocate() is potentially vulnerable to an optimization pass or compiler backend deciding to turn some code sequence into a library call, and we have to massage it just right to prevent that from happening. This is obviously quite fragile. Other kinds of software like libcs, dynamic linkers, kernels, kernel modules, etc face similar problems and consequently make use of -fno-builtin or Clang's no_builtin attribute.

Another motivating use case can be found here: #21831 (comment)

Yet more use cases would be memcpy(), memset(), etc implementations in compiler-rt.

I don't think -fno-builtin is an appropriate substitute for a language feature here; in the case above, we can't apply -fno-builtin to just that code, but we obviously don't want to apply -fno-builtin to the entire standard library either. That said, I don't think we need the level of granularity that Clang's no_builtin attribute has; I think a builtin function in the form proposed here strikes a good balance.

This proposal is related to #21833. That proposal is about preventing the compiler from turning a single call to a well-known library function into something else (e.g. __aeabi_memcpy() on Arm). This proposal encompasses that behavior, but only for an entire function body rather than individual calls. I suspect this will be good enough in practice, so this proposal could be considered to supersede #21833 (although technically there's no particular reason that they can't both be implemented).

The text was updated successfully, but these errors were encountered:

alexrp · 2024-11-30T10:52:32Z

cc @rohlem @Rexicon226 @dweiller @ParfenovIgor since you participated in the discussion on #21833.

rohlem · 2024-11-30T12:20:51Z

I like the idea, and this mechanism seems like the right tool for the job.

The one detail I'm not 100% sure on is the current interplay with inlining (and I think this isn't fully addressed in the langref either):

This builtin function would work similarly to @setRuntimeSafety() in that it affects the surrounding scope.

fn regular_f() void {} //code
noinline fn noinline_f() void {} //code
inline fn inline_f() void {} //code
fn main() { //scope 0
  @setRuntimeSafety(x);
  @setIntrinsicRecognition(y);
  { //scope 1
    // code
  }
  noinline_f();
  inline_f();
  regular_f();
}

The current example in the langref makes it seem like the settings of scope 0 transitively apply to nested scopes (scope 1 here).
I think it would be highly surprising if noinline functions (and function calls with .never_inline modifier) were affected by the caller scope's settings.

What seems unclear to me is whether an inline function (and a call with .always_inline modifier) would keep the caller's settings (as if it were directly inlined as a scope), or reset to the defaults.
If it does keep the settings, then what about regular_fn? In particular, could the compiler's decision on whether or not to inline a particular function call affect that function's runtime safety and intrinsic recognition settings? That would seem very surprising to me.

I think the simplest (most regular and least surprising) answer would be to reset these scope-local settings for all functions, even for inline fn.
(If we do want to support all combinations, maybe @call's CallModifier enum could be expanded into a CallOptions struct, allowing a caller to specify the callee's default runtime safety and intrinsic recognition settings.)

alexrp · 2024-12-05T09:40:35Z

The one detail I'm not 100% sure on is the current interplay with inlining (and I think this isn't fully addressed in the langref either):

Whatever we decide here, it just needs to be consistent among these scoped @set*() builtin functions.

I think the simplest (most regular and least surprising) answer would be to reset these scope-local settings for all functions, even for inline fn.

I think I'd agree with this.

alexrp · 2024-12-05T10:03:26Z

Hmm... one important thing to note here is that LLVM only allows no-builtins at the function level. This effectively means that any @setIntrinsicRecognition(false) inside a function would have to win out over any @setIntrinsicRecognition(true) (unless we do outlining, but that's crazy complicated and definitely not worth it just for this).

This definitely calls into question the value of having @setIntrinsicRecognition() be defined as scoped rather than be defined as a property of the function it's invoked in.

dweiller · 2024-12-05T10:14:11Z

I definitely agree with having @set* builtins not affect the caller of inline functions makes sense.

As far as @setIntrinsicRecognition goes I agree that it would be good to be able to control recognition without having to affect a whole module. If we have @setInstrinsicRecognition I would even think it makes sense to remove -fno-builtin and just require calls to @setIntrinsicRecognition in code that needs it. It feels too much like two ways to do the same thing to me, and applying it to whole modules could lead to missed optimisation opportunities for functions that didn't need it.

This definitely calls into question the value of having @setIntrinsicRecognition() be defined as scoped rather than be defined as a property of the function it's invoked in.

I think it could be fine to still have it defined at scope-level and let backends that can't support that level of granularity could just hoist @setIntrinsicRecognition(false) up to whatever scope they support. On the other hand, I'm not sure what kind of code would want different settings at different scopes in the same function - the only use-case I have in mind is preventing recursive function calls. Maybe cases where you could get mutually recursive function calls between intrinsics is where you'd want sub-function-scope targetting?

alexrp · 2024-12-05T10:18:47Z

If we have @setInstrinsicRecognition I would even think it makes sense to remove -fno-builtin and just require calls to @setIntrinsicRecognition in code that needs it. It feels too much like two ways to do the same thing to me, and applying it to whole modules could lead to missed optimisation opportunities for functions that didn't need it.

That's fine by me. We'd need to carefully audit compiler-rt when removing -fno-builtin, but that's doable.

I think it could be fine to still have it defined at scope-level and let backends that can't support that level of granularity could just hoist @setIntrinsicRecognition(false) up to whatever scope they support. On the other hand, I'm not sure what kind of code would want different settings at different scopes in the same function - the only use-case I have in mind is preventing recursive function calls. Maybe cases where you could get mutually recursive function calls between intrinsics is where you'd want sub-function-scope targetting?

I can't think of any real-world use cases either.

It's probably fine for any @setIntrinsicRecognition(false) to win out. I can't actually think of any cases where it would be sensible to actively require @setIntrinsicRecognition(true) to kick in for correctness. Such code would be incredibly fragile (depends on compiler version, build mode, etc...).

alexrp · 2024-12-05T14:16:49Z

I changed my mind on scoped vs function-level; it turns out to be much easier to implement at the function level, and also, that's how @disableInstrumentation() already works, so there's precedent.

Closes ziglang#21833. Closes ziglang#22110.

alexrp · 2024-12-06T01:09:42Z

Proposal text updated based on the implementation PR (#22154).

Closes ziglang#21833. Closes ziglang#22110.

wooster0 · 2025-02-11T06:24:06Z

I think this is another case where this builtin would be useful: #22854

dweiller · 2025-02-11T12:29:12Z

It looks like a setIntrinsicRecognition(false) call just turning on the 'no-builtins' attribute would not be enough for the LLVM backend. We would also have to ensure that calls to problematic LLVM builtins are not produced by the LLVM backend. For example when lowering compiler-rt memcpy the LLVM backend needs to not produce calls to llvm.memcpy, but I believe llvm.memcpy.inline should be fine. This would mean that the backend could just pick which LLVM builtin is used based on the 'intrinsic recognition' setting.

alexrp · 2025-02-11T12:47:12Z

FWIW, I believe we should make that change independent of this proposal. It makes sense even just for -fno-builtin.

dweiller · 2025-02-11T12:52:35Z

Yes, that makes sense - I just wanted to have it mentioned somewhere on the issue tracker, but maybe it should be a separate issue.

andrewrk · 2025-02-23T01:25:50Z

Consider one motivating use case from the standard library:
(posixCallMainAndExit)
(pie relocate)

Just want to note that these are not motivating use cases, since calling into intrinsics such as memcpy would actually be good. Such things would be built in a different compilation unit and not subject to the problem that @disableInstrumentation() is trying to solve.

As far as I can tell, the only motivating use case for these is compiler-rt itself, however, I think that is compelling enough alone to warrant the language feature.

alexrp · 2025-02-23T01:40:47Z

Just want to note that these are not motivating use cases, since calling into intrinsics such as memcpy would actually be good. Such things would be built in a different compilation unit and not subject to the problem that @disableInstrumentation() is trying to solve.

But they are motivating use cases: Because PIE relocations have not been applied yet, we're not allowed to make calls at all because they go through the global pointer/GOT register on some targets (for some targets, even for calls within the same compilation unit!). So we can't have the compiler turning e.g. large load/store operations or copy loops into memcpy/memmove calls.

andrewrk · 2025-02-23T07:54:13Z

I see, thanks for elaborating 👍

Closes ziglang#21833. Closes ziglang#22110.

alexrp added the proposal This issue suggests modifications. If it also has the "accepted" label then it is planned. label Nov 30, 2024

alexrp added this to the unplanned milestone Nov 30, 2024

alexrp added a commit to alexrp/zig that referenced this issue Dec 5, 2024

compiler: Implement @disableIntrinsics() builtin function.

bc04cb8

Closes ziglang#21833. Closes ziglang#22110.

alexrp mentioned this issue Dec 5, 2024

compiler: Implement @disableIntrinsics() builtin function. #22154

Merged

alexrp changed the title ~~Proposal: @setIntrinsicRecognition(comptime value: bool) builtin function~~ Proposal: @disableIntrinsics() builtin function Dec 6, 2024

alexrp added a commit to alexrp/zig that referenced this issue Dec 7, 2024

compiler: Implement @disableIntrinsics() builtin function.

c454172

Closes ziglang#21833. Closes ziglang#22110.

alexrp self-assigned this Dec 7, 2024

alexrp added a commit to alexrp/zig that referenced this issue Jan 24, 2025

compiler: Implement @disableIntrinsics() builtin function.

ffd741c

Closes ziglang#21833. Closes ziglang#22110.

alexrp added a commit to alexrp/zig that referenced this issue Jan 24, 2025

compiler: Implement @disableIntrinsics() builtin function.

1ab58e4

Closes ziglang#21833. Closes ziglang#22110.

andrewrk added the accepted This proposal is planned. label Feb 23, 2025

andrewrk modified the milestones: unplanned, 0.14.0 Feb 23, 2025

andrewrk mentioned this issue Feb 23, 2025

Add CallModifier.never_intrinsify #21900

Closed

alexrp closed this as completed in #22154 Feb 24, 2025

alexrp closed this as completed in 6ba7855 Feb 24, 2025

emar-kar pushed a commit to emar-kar/zig that referenced this issue Feb 24, 2025

compiler: Implement @disableIntrinsics() builtin function.

ed73049

Closes ziglang#21833. Closes ziglang#22110.

a-khabarov pushed a commit to a-khabarov/zig that referenced this issue Feb 25, 2025

compiler: Implement @disableIntrinsics() builtin function.

311c8ee

Closes ziglang#21833. Closes ziglang#22110.

alexrp removed their assignment Mar 9, 2025

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Proposal: `@disableIntrinsics()` builtin function #22110

Proposal: `@disableIntrinsics()` builtin function #22110

alexrp commented Nov 30, 2024 •

edited

Loading

alexrp commented Nov 30, 2024

rohlem commented Nov 30, 2024 •

edited

Loading

alexrp commented Dec 5, 2024

alexrp commented Dec 5, 2024 •

edited

Loading

dweiller commented Dec 5, 2024

alexrp commented Dec 5, 2024 •

edited

Loading

alexrp commented Dec 5, 2024

alexrp commented Dec 6, 2024

wooster0 commented Feb 11, 2025

dweiller commented Feb 11, 2025 •

edited

Loading

alexrp commented Feb 11, 2025

dweiller commented Feb 11, 2025 •

edited

Loading

andrewrk commented Feb 23, 2025

alexrp commented Feb 23, 2025 •

edited

Loading

andrewrk commented Feb 23, 2025

Proposal: @disableIntrinsics() builtin function #22110

Proposal: @disableIntrinsics() builtin function #22110

Comments

alexrp commented Nov 30, 2024 • edited Loading

alexrp commented Nov 30, 2024

rohlem commented Nov 30, 2024 • edited Loading

alexrp commented Dec 5, 2024

alexrp commented Dec 5, 2024 • edited Loading

dweiller commented Dec 5, 2024

alexrp commented Dec 5, 2024 • edited Loading

alexrp commented Dec 5, 2024

alexrp commented Dec 6, 2024

wooster0 commented Feb 11, 2025

dweiller commented Feb 11, 2025 • edited Loading

alexrp commented Feb 11, 2025

dweiller commented Feb 11, 2025 • edited Loading

andrewrk commented Feb 23, 2025

alexrp commented Feb 23, 2025 • edited Loading

andrewrk commented Feb 23, 2025

Proposal: `@disableIntrinsics()` builtin function #22110

Proposal: `@disableIntrinsics()` builtin function #22110

alexrp commented Nov 30, 2024 •

edited

Loading

rohlem commented Nov 30, 2024 •

edited

Loading

alexrp commented Dec 5, 2024 •

edited

Loading

alexrp commented Dec 5, 2024 •

edited

Loading

dweiller commented Feb 11, 2025 •

edited

Loading

dweiller commented Feb 11, 2025 •

edited

Loading

alexrp commented Feb 23, 2025 •

edited

Loading