Skip to content

stage2 segfault slicing zero length array field of struct #11787

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Vexu opened this issue Jun 3, 2022 · 2 comments
Closed

stage2 segfault slicing zero length array field of struct #11787

Vexu opened this issue Jun 3, 2022 · 2 comments
Labels
bug Observed behavior contradicts documented or intended behavior frontend Tokenization, parsing, AstGen, Sema, and Liveness.
Milestone

Comments

@Vexu
Copy link
Member

Vexu commented Jun 3, 2022

pub export fn entry() void {
    const S = struct {
        a: [0]usize,
        fn foo(self: *@This(), start: usize, end: usize) []usize {
            return self.a[start..end];
        }
    };
    var s: S = undefined;
    _ = s.foo(0, 0);
}

Segfault happens in LLVM when doing buildInBoundsGEP in airPtrAdd.

@Vexu Vexu added bug Observed behavior contradicts documented or intended behavior frontend Tokenization, parsing, AstGen, Sema, and Liveness. labels Jun 3, 2022
@Vexu Vexu added this to the 0.10.0 milestone Jun 3, 2022
@martinhath
Copy link
Contributor

I've done a bunch of digging, and while I think I have narrowed the problem down to at least understand what's happening, I'm not sure what the best approach is for a fix.
Issue #6706 seems very related.

The segfault comes from a failed assertion:

zig: /home/mht/src/llvm-project-13/llvm/include/llvm/IR/Instructions.h:923: llvm::Type* llvm::checkGEPType(Type*): Assertion `Ty && "Invalid GetElementPtrInst indices for type!"' failed.

in the call to buildInBoundsGEP

zig/src/codegen/llvm.zig

Lines 6609 to 6614 in a12abc6

if (ptr_ty.ptrSize() == .One) {
// It's a pointer to an array, so according to LLVM we need an extra GEP index.
const indices: [2]*const llvm.Value = .{
self.context.intType(32).constNull(), negative_offset,
};
return self.builder.buildInBoundsGEP(base_ptr, &indices, indices.len, "");

base_ptr and offset have these values:

(gdb) p (('llvm::Value' *) base_ptr)->dump()
  %5 = getelementptr inbounds i8, i8* %4, i64 1, !dbg !22
(gdb) p (('llvm::Value' *) base_ptr)->getType()->dump()
i8*
(gdb) p (('llvm::Value' *) offset)->dump()
i64 %1

There were two things that I was confused about: (1) why base_ptr is a i8*, and (2) why we've got a i64 1 in the GEP.

(1) is due to

zig/src/codegen/llvm.zig

Lines 2561 to 2570 in a12abc6

const lower_elem_ty = switch (elem_ty.zigTypeTag()) {
.Opaque, .Fn => true,
.Array => elem_ty.childType().hasRuntimeBitsIgnoreComptime(),
else => elem_ty.hasRuntimeBitsIgnoreComptime(),
};
const llvm_elem_ty = if (lower_elem_ty)
try dg.lowerType(elem_ty)
else
dg.context.intType(8);
return llvm_elem_ty.pointerType(llvm_addrspace);

I.e. we're lowering pointers to all zero-sized types to *i8.

(2) is due to

zig/src/codegen/llvm.zig

Lines 8899 to 8911 in a12abc6

var ty_buf: Type.Payload.Pointer = undefined;
if (llvmFieldIndex(struct_ty, field_index, target, &ty_buf)) |llvm_field_index| {
return self.builder.buildStructGEP(struct_ptr, llvm_field_index, "");
} else {
// If we found no index then this means this is a zero sized field at the
// end of the struct. Treat our struct pointer as an array of two and get
// the index to the element at index `1` to get a pointer to the end of
// the struct.
const llvm_usize = try self.dg.lowerType(Type.usize);
const llvm_index = llvm_usize.constInt(1, .False);
const indices: [1]*const llvm.Value = .{llvm_index};
return self.builder.buildInBoundsGEP(struct_ptr, &indices, indices.len, "");
}

From what I can tell, LLVM is complaining about the indices because we're indexing with a i32 (which is for struct fields) and then a i64 (pointer offsets), and our base type is just a *i8, so the first indexing doesn't really make sense.

The lowering to *i8 does not happen if we have a pointer directly to the zero-sized array though; in that case it is lowered to a [0 x i64]*, which I think is telling that we're doing something weird here.

Code to reproduce
const S = struct {
    a: [0]usize,
};

fn foo(s: *S, start: usize, end: usize) []usize {
    return s.a[start..end];
}

pub export fn entry() void {
    var s = S{ .a = .{}, };
    _ = foo(&s, 0, 0);
}
AIR for the snippet
%0 = arg(*11787.S)
%1 = arg(usize)
%2 = arg(usize)
%3!= dbg_block_begin()
%4!= dbg_stmt(2:5)
%6 = alloc(**11787.S)
%8!= store(%6, %0!)
%10 = bitcast(*const *11787.S, %6!)
%12 = load(*11787.S, %10!)
%14 = struct_field_ptr_index_0(*[0]usize, %12!)
%16 = ptr_add(%14!, %1)
%18 = sub_with_overflow(%2, %1)
%19 = struct_field_val(%18, 1)
%21 = cmp_eq(%19!, %20!)
%41!= block(void, {
  %42!= cond_br(%21!, {
    %24! %31! %39!
    %43!= br(%41, @Zir.Inst.Ref.void_value)
  }, {
    %40!= call(%31!, [%24!, %39!])
  })
})
%44 = struct_field_val(%18!, 0)
%46 = cmp_lte(%2, %45)
%55!= block(void, {
  %56!= cond_br(%46!, {
    %45!
    %57!= br(%55, @Zir.Inst.Ref.void_value)
  }, {
    %54!= call(%53, [%2, %45!])
  })
})
%58 = cmp_lte(%1, %2)
%63!= block(void, {
  %64!= cond_br(%58!, {
    %1! %53! %2!
    %65!= br(%63, @Zir.Inst.Ref.void_value)
  }, {
    %62!= call(%53!, [%1!, %2!])
  })
})
%67 = slice(%16!, %44!)
%68!= dbg_stmt(2:5)
%69!= dbg_block_end()
%70!= ret(%67!)
Full stack trace
Thread 1 "zig" received signal SIGABRT, Aborted.
0x00007ffff7c8e36c in ?? () from /lib64/libc.so.6
(gdb) bt
#0  0x00007ffff7c8e36c in ?? () from /lib64/libc.so.6
#1  0x00007ffff7c3e838 in raise () from /lib64/libc.so.6
#2  0x00007ffff7c28535 in abort () from /lib64/libc.so.6
#3  0x00007ffff7c2845c in ?? () from /lib64/libc.so.6
#4  0x00007ffff7c37366 in __assert_fail () from /lib64/libc.so.6
#5  0x00000000082df6ab in llvm::checkGEPType (Ty=0x0) at /home/mht/src/llvm-project-13/llvm/include/llvm/IR/Instructions.h:923
#6  0x00000000082df88f in llvm::GetElementPtrInst::getGEPReturnType (ElTy=0xff16aa8, Ptr=0xff510c0, IdxList=...)
    at /home/mht/src/llvm-project-13/llvm/include/llvm/IR/Instructions.h:1080
#7  0x00000000082dfa25 in llvm::GetElementPtrInst::GetElementPtrInst (this=0xff4d750, PointeeType=0xff16aa8, Ptr=0xff510c0,
    IdxList=..., Values=3, NameStr=..., InsertBefore=0x0) at /home/mht/src/llvm-project-13/llvm/include/llvm/IR/Instructions.h:1157
#8  0x00000000082df7a9 in llvm::GetElementPtrInst::Create (PointeeType=0xff16aa8, Ptr=0xff510c0, IdxList=..., NameStr=...,
    InsertBefore=0x0) at /home/mht/src/llvm-project-13/llvm/include/llvm/IR/Instructions.h:965
#9  0x00000000082df7ff in llvm::GetElementPtrInst::CreateInBounds (PointeeType=0xff16aa8, Ptr=0xff510c0, IdxList=..., NameStr=...,
    InsertBefore=0x0) at /home/mht/src/llvm-project-13/llvm/include/llvm/IR/Instructions.h:987
#10 0x0000000008af4e16 in llvm::IRBuilderBase::CreateInBoundsGEP (this=0xff47110, Ty=0xff16aa8, Ptr=0xff510c0, IdxList=...,
    Name=...) at /home/mht/src/llvm-project-13/llvm/include/llvm/IR/IRBuilder.h:1746
#11 0x000000000a6c382c in LLVMBuildInBoundsGEP (B=0xff47110, Pointer=0xff510c0, Indices=0x7fffffff20e8, NumIndices=2,
    Name=0x32c527 <__unnamed_1900> "") at /home/mht/src/llvm-project-13/llvm/lib/IR/Core.cpp:3714
#12 0x00000000074c2bf9 in codegen.llvm.FuncGen.airPtrAdd (self=0x7fffffff37c0, inst=16)
    at /home/mht/src/zig/src/codegen/llvm.zig:6593
#13 0x00000000074b01f4 in codegen.llvm.FuncGen.genBody (self=0x7fffffff37c0, body=...)
    at /home/mht/src/zig/src/codegen/llvm.zig:4104
#14 0x00000000074ad88c in codegen.llvm.Object.updateFunc (o=0xff16050, module=0xff14db8, func=0xff47e80, air=..., liveness=...)
    at /home/mht/src/zig/src/codegen/llvm.zig:1026
#15 0x000000000725f6d8 in link.Elf.updateFunc (self=0xff15950, module=0xff14db8, func=0xff47e80, air=..., liveness=...)
    at /home/mht/src/zig/src/link/Elf.zig:2386
#16 0x000000000707d25e in link.File.updateFunc (base=0xff15950, module=0xff14db8, func=0xff47e80, air=..., liveness=...)
    at link.zig:510
#17 0x000000000705d503 in Module.ensureFuncBodyAnalyzed (mod=0xff14db8, func=0xff47e80) at Module.zig:4249
#18 0x0000000006d9b49b in Compilation.processOneJob (comp=0xff13d88, job=...) at Compilation.zig:2966
#19 0x0000000006d848fe in Compilation.performAllTheWork (comp=0xff13d88, main_progress_node=0x7fffffff5770) at Compilation.zig:2898
#20 0x0000000006d7cdf4 in Compilation.update (comp=0xff13d88) at Compilation.zig:2238
#21 0x0000000006ceb850 in updateModule (gpa=..., comp=0xff13d88, hook=...) at main.zig:3327
#22 0x0000000006cb4015 in buildOutputType (gpa=..., arena=..., all_args=..., arg_mode=...) at main.zig:3016
#23 0x0000000006c97d12 in mainArgs (gpa=..., arena=..., args=...) at main.zig:234
#24 0x0000000006c96e35 in main () at main.zig:174
#25 0x0000000006c99760 in std.start.callMain () at /home/mht/src/zig/lib/std/start.zig:576
#26 std.start.initEventLoopAndCallMain () at /home/mht/src/zig/lib/std/start.zig:510
#27 std.start.callMainWithArgs (argc=7, argv=0x7fffffffe0f8, envp=...) at /home/mht/src/zig/lib/std/start.zig:460
#28 std.start.main (c_argc=7, c_argv=0x7fffffffe0f8, c_envp=0x7fffffffe138) at /home/mht/src/zig/lib/std/start.zig:475

@davidgmbb
Copy link
Contributor

This is no longer crashing

@andrewrk andrewrk modified the milestones: 0.10.0, 0.10.1 Oct 12, 2022
wooster0 added a commit to wooster0/zig that referenced this issue Dec 7, 2022
wooster0 added a commit to wooster0/zig that referenced this issue Dec 7, 2022
wooster0 added a commit to wooster0/zig that referenced this issue Dec 7, 2022
wooster0 added a commit to wooster0/zig that referenced this issue Dec 7, 2022
wooster0 added a commit to wooster0/zig that referenced this issue Dec 8, 2022
wooster0 added a commit to wooster0/zig that referenced this issue Dec 8, 2022
wooster0 added a commit to wooster0/zig that referenced this issue Dec 9, 2022
kcbanner pushed a commit to kcbanner/zig that referenced this issue Dec 10, 2022
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Observed behavior contradicts documented or intended behavior frontend Tokenization, parsing, AstGen, Sema, and Liveness.
Projects
None yet
Development

No branches or pull requests

4 participants