From ab48934e9cefb510d39ba3fe8c0dcf7619bec4cf Mon Sep 17 00:00:00 2001 From: Ben Noordhuis Date: Mon, 19 Feb 2018 23:06:54 +0100 Subject: [PATCH 1/2] add support for stack traces on macosx Add basic address->symbol resolution support. Uses symtab data from the MachO image, not external dSYM data; that's left as a future exercise. The net effect is that we can now map addresses to function names but not much more. File names and line number data will have to wait until a future pull request. Partially fixes #434. --- CMakeLists.txt | 1 + std/debug/index.zig | 148 ++++++++++++++++++++++-------------- std/index.zig | 2 + std/macho.zig | 177 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 271 insertions(+), 57 deletions(-) create mode 100644 std/macho.zig diff --git a/CMakeLists.txt b/CMakeLists.txt index bdc34658302e..2856a2060665 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -386,6 +386,7 @@ set(ZIG_STD_FILES "index.zig" "io.zig" "linked_list.zig" + "macho.zig" "math/acos.zig" "math/acosh.zig" "math/asin.zig" diff --git a/std/debug/index.zig b/std/debug/index.zig index cc4832b1ea82..24186549862a 100644 --- a/std/debug/index.zig +++ b/std/debug/index.zig @@ -5,6 +5,7 @@ const io = std.io; const os = std.os; const elf = std.elf; const DW = std.dwarf; +const macho = std.macho; const ArrayList = std.ArrayList; const builtin = @import("builtin"); @@ -180,43 +181,57 @@ pub fn writeCurrentStackTrace(out_stream: var, allocator: &mem.Allocator, } fn printSourceAtAddress(debug_info: &ElfStackTrace, out_stream: var, address: usize) !void { - if (builtin.os == builtin.Os.windows) { - return error.UnsupportedDebugInfo; - } // TODO we really should be able to convert @sizeOf(usize) * 2 to a string literal // at compile time. I'll call it issue #313 const ptr_hex = if (@sizeOf(usize) == 4) "0x{x8}" else "0x{x16}"; - const compile_unit = findCompileUnit(debug_info, address) catch { - try out_stream.print("???:?:?: " ++ DIM ++ ptr_hex ++ " in ??? (???)" ++ RESET ++ "\n ???\n\n", - address); - return; - }; - const compile_unit_name = try compile_unit.die.getAttrString(debug_info, DW.AT_name); - if (getLineNumberInfo(debug_info, compile_unit, address - 1)) |line_info| { - defer line_info.deinit(); - try out_stream.print(WHITE ++ "{}:{}:{}" ++ RESET ++ ": " ++ - DIM ++ ptr_hex ++ " in ??? ({})" ++ RESET ++ "\n", - line_info.file_name, line_info.line, line_info.column, - address, compile_unit_name); - if (printLineFromFile(debug_info.allocator(), out_stream, line_info)) { - if (line_info.column == 0) { - try out_stream.write("\n"); - } else { - {var col_i: usize = 1; while (col_i < line_info.column) : (col_i += 1) { - try out_stream.writeByte(' '); - }} - try out_stream.write(GREEN ++ "^" ++ RESET ++ "\n"); + switch (builtin.os) { + builtin.Os.windows => return error.UnsupportedDebugInfo, + builtin.Os.macosx => { + // TODO(bnoordhuis) It's theoretically possible to obtain the + // compilation unit from the symbtab but it's not that useful + // in practice because the compiler dumps everything in a single + // object file. Future improvement: use external dSYM data when + // available. + const unknown = macho.Symbol { .name = "???", .address = address }; + const symbol = debug_info.symbol_table.search(address) ?? &unknown; + try out_stream.print(WHITE ++ "{}" ++ RESET ++ ": " ++ + DIM ++ ptr_hex ++ " in ??? (???)" ++ RESET ++ "\n", + symbol.name, address); + }, + else => { + const compile_unit = findCompileUnit(debug_info, address) catch { + try out_stream.print("???:?:?: " ++ DIM ++ ptr_hex ++ " in ??? (???)" ++ RESET ++ "\n ???\n\n", + address); + return; + }; + const compile_unit_name = try compile_unit.die.getAttrString(debug_info, DW.AT_name); + if (getLineNumberInfo(debug_info, compile_unit, address - 1)) |line_info| { + defer line_info.deinit(); + try out_stream.print(WHITE ++ "{}:{}:{}" ++ RESET ++ ": " ++ + DIM ++ ptr_hex ++ " in ??? ({})" ++ RESET ++ "\n", + line_info.file_name, line_info.line, line_info.column, + address, compile_unit_name); + if (printLineFromFile(debug_info.allocator(), out_stream, line_info)) { + if (line_info.column == 0) { + try out_stream.write("\n"); + } else { + {var col_i: usize = 1; while (col_i < line_info.column) : (col_i += 1) { + try out_stream.writeByte(' '); + }} + try out_stream.write(GREEN ++ "^" ++ RESET ++ "\n"); + } + } else |err| switch (err) { + error.EndOfFile => {}, + else => return err, + } + } else |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => { + try out_stream.print(ptr_hex ++ " in ??? ({})\n", address, compile_unit_name); + }, + else => return err, } - } else |err| switch (err) { - error.EndOfFile => {}, - else => return err, - } - } else |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => { - try out_stream.print(ptr_hex ++ " in ??? ({})\n", address, compile_unit_name); }, - else => return err, } } @@ -249,12 +264,22 @@ pub fn openSelfDebugInfo(allocator: &mem.Allocator) !&ElfStackTrace { try scanAllCompileUnits(st); return st; }, + builtin.ObjectFormat.macho => { + var exe_file = try os.openSelfExe(); + defer exe_file.close(); + + const st = try allocator.create(ElfStackTrace); + errdefer allocator.destroy(st); + + *st = ElfStackTrace { + .symbol_table = try macho.loadSymbols(allocator, &io.FileInStream.init(&exe_file)), + }; + + return st; + }, builtin.ObjectFormat.coff => { return error.TodoSupportCoffDebugInfo; }, - builtin.ObjectFormat.macho => { - return error.TodoSupportMachoDebugInfo; - }, builtin.ObjectFormat.wasm => { return error.TodoSupportCOFFDebugInfo; }, @@ -297,31 +322,40 @@ fn printLineFromFile(allocator: &mem.Allocator, out_stream: var, line_info: &con } } -pub const ElfStackTrace = struct { - self_exe_file: os.File, - elf: elf.Elf, - debug_info: &elf.SectionHeader, - debug_abbrev: &elf.SectionHeader, - debug_str: &elf.SectionHeader, - debug_line: &elf.SectionHeader, - debug_ranges: ?&elf.SectionHeader, - abbrev_table_list: ArrayList(AbbrevTableHeader), - compile_unit_list: ArrayList(CompileUnit), - - pub fn allocator(self: &const ElfStackTrace) &mem.Allocator { - return self.abbrev_table_list.allocator; - } +pub const ElfStackTrace = switch (builtin.os) { + builtin.Os.macosx => struct { + symbol_table: macho.SymbolTable, - pub fn readString(self: &ElfStackTrace) ![]u8 { - var in_file_stream = io.FileInStream.init(&self.self_exe_file); - const in_stream = &in_file_stream.stream; - return readStringRaw(self.allocator(), in_stream); - } + pub fn close(self: &ElfStackTrace) void { + self.symbol_table.deinit(); + } + }, + else => struct { + self_exe_file: os.File, + elf: elf.Elf, + debug_info: &elf.SectionHeader, + debug_abbrev: &elf.SectionHeader, + debug_str: &elf.SectionHeader, + debug_line: &elf.SectionHeader, + debug_ranges: ?&elf.SectionHeader, + abbrev_table_list: ArrayList(AbbrevTableHeader), + compile_unit_list: ArrayList(CompileUnit), + + pub fn allocator(self: &const ElfStackTrace) &mem.Allocator { + return self.abbrev_table_list.allocator; + } - pub fn close(self: &ElfStackTrace) void { - self.self_exe_file.close(); - self.elf.close(); - } + pub fn readString(self: &ElfStackTrace) ![]u8 { + var in_file_stream = io.FileInStream.init(&self.self_exe_file); + const in_stream = &in_file_stream.stream; + return readStringRaw(self.allocator(), in_stream); + } + + pub fn close(self: &ElfStackTrace) void { + self.self_exe_file.close(); + self.elf.close(); + } + }, }; const PcRange = struct { diff --git a/std/index.zig b/std/index.zig index 8d292c2f5c4d..179eae159ec5 100644 --- a/std/index.zig +++ b/std/index.zig @@ -21,6 +21,7 @@ pub const endian = @import("endian.zig"); pub const fmt = @import("fmt/index.zig"); pub const heap = @import("heap.zig"); pub const io = @import("io.zig"); +pub const macho = @import("macho.zig"); pub const math = @import("math/index.zig"); pub const mem = @import("mem.zig"); pub const net = @import("net.zig"); @@ -51,6 +52,7 @@ test "std" { _ = @import("endian.zig"); _ = @import("fmt/index.zig"); _ = @import("io.zig"); + _ = @import("macho.zig"); _ = @import("math/index.zig"); _ = @import("mem.zig"); _ = @import("heap.zig"); diff --git a/std/macho.zig b/std/macho.zig new file mode 100644 index 000000000000..05239bf19188 --- /dev/null +++ b/std/macho.zig @@ -0,0 +1,177 @@ +const builtin = @import("builtin"); +const std = @import("index.zig"); +const io = std.io; +const mem = std.mem; + +const MH_MAGIC_64 = 0xFEEDFACF; +const MH_PIE = 0x200000; +const LC_SYMTAB = 2; + +const MachHeader64 = packed struct { + magic: u32, + cputype: u32, + cpusubtype: u32, + filetype: u32, + ncmds: u32, + sizeofcmds: u32, + flags: u32, + reserved: u32, +}; + +const LoadCommand = packed struct { + cmd: u32, + cmdsize: u32, +}; + +const SymtabCommand = packed struct { + symoff: u32, + nsyms: u32, + stroff: u32, + strsize: u32, +}; + +const Nlist64 = packed struct { + n_strx: u32, + n_type: u8, + n_sect: u8, + n_desc: u16, + n_value: u64, +}; + +pub const Symbol = struct { + name: []const u8, + address: u64, + + fn addressLessThan(lhs: &const Symbol, rhs: &const Symbol) bool { + return lhs.address < rhs.address; + } +}; + +pub const SymbolTable = struct { + allocator: &mem.Allocator, + symbols: []const Symbol, + strings: []const u8, + + // Doubles as an eyecatcher to calculate the PIE slide, see loadSymbols(). + // Ideally we'd use _mh_execute_header because it's always at 0x100000000 + // in the image but as it's located in a different section than executable + // code, its displacement is different. + pub fn deinit(self: &SymbolTable) void { + self.allocator.free(self.symbols); + self.symbols = []const Symbol {}; + + self.allocator.free(self.strings); + self.strings = []const u8 {}; + } + + pub fn search(self: &const SymbolTable, address: usize) ?&const Symbol { + var min: usize = 0; + var max: usize = self.symbols.len - 1; // Exclude sentinel. + while (min < max) { + const mid = min + (max - min) / 2; + const curr = &self.symbols[mid]; + const next = &self.symbols[mid + 1]; + if (address >= next.address) { + min = mid + 1; + } else if (address < curr.address) { + max = mid; + } else { + return curr; + } + } + return null; + } +}; + +pub fn loadSymbols(allocator: &mem.Allocator, in: &io.FileInStream) !SymbolTable { + var file = in.file; + try file.seekTo(0); + + var hdr: MachHeader64 = undefined; + try readNoEof(in, &hdr); + if (hdr.magic != MH_MAGIC_64) return error.MissingDebugInfo; + const is_pie = MH_PIE == (hdr.flags & MH_PIE); + + var pos: usize = @sizeOf(@typeOf(hdr)); + var ncmd: u32 = hdr.ncmds; + while (ncmd != 0) : (ncmd -= 1) { + try file.seekTo(pos); + var lc: LoadCommand = undefined; + try readNoEof(in, &lc); + if (lc.cmd == LC_SYMTAB) break; + pos += lc.cmdsize; + } else { + return error.MissingDebugInfo; + } + + var cmd: SymtabCommand = undefined; + try readNoEof(in, &cmd); + + try file.seekTo(cmd.symoff); + var syms = try allocator.alloc(Nlist64, cmd.nsyms); + defer allocator.free(syms); + try readNoEof(in, syms); + + try file.seekTo(cmd.stroff); + var strings = try allocator.alloc(u8, cmd.strsize); + errdefer allocator.free(strings); + try in.stream.readNoEof(strings); + + var nsyms: usize = 0; + for (syms) |sym| if (isSymbol(sym)) nsyms += 1; + if (nsyms == 0) return error.MissingDebugInfo; + + var symbols = try allocator.alloc(Symbol, nsyms + 1); // Room for sentinel. + errdefer allocator.free(symbols); + + var pie_slide: usize = 0; + var nsym: usize = 0; + for (syms) |sym| { + if (!isSymbol(sym)) continue; + const start = sym.n_strx; + const end = ??mem.indexOfScalarPos(u8, strings, start, 0); + const name = strings[start..end]; + const address = sym.n_value; + symbols[nsym] = Symbol { .name = name, .address = address }; + nsym += 1; + if (is_pie and mem.eql(u8, name, "_SymbolTable_deinit")) { + pie_slide = @ptrToInt(SymbolTable.deinit) - address; + } + } + + // Effectively a no-op, lld emits symbols in ascending order. + std.sort.insertionSort(Symbol, symbols[0..nsyms], Symbol.addressLessThan); + + // Insert the sentinel. Since we don't know where the last function ends, + // we arbitrarily limit it to the start address + 4 KB. + const top = symbols[nsyms - 1].address + 4096; + symbols[nsyms] = Symbol { .name = "", .address = top }; + + if (pie_slide != 0) { + for (symbols) |*symbol| symbol.address += pie_slide; + } + + return SymbolTable { + .allocator = allocator, + .symbols = symbols, + .strings = strings, + }; +} + +fn readNoEof(in: &io.FileInStream, sink: var) !void { + if (@typeOf(sink) == []Nlist64) { + const T = @typeOf(sink[0]); + const len = @sizeOf(T) * sink.len; + const bytes = @ptrCast(&u8, &sink[0]); + return in.stream.readNoEof(bytes[0..len]); + } else { + const T = @typeOf(*sink); + const len = @sizeOf(T); + const bytes = @ptrCast(&u8, sink); + return in.stream.readNoEof(bytes[0..len]); + } +} + +fn isSymbol(sym: &const Nlist64) bool { + return sym.n_value != 0 and sym.n_desc == 0; +} From 2b35615ffbe238c8ec421654a7e1ae0890477fe0 Mon Sep 17 00:00:00 2001 From: Ben Noordhuis Date: Mon, 19 Feb 2018 23:06:54 +0100 Subject: [PATCH 2/2] fix memory leak in std.debug.openSelfDebugInfo() --- std/debug/index.zig | 1 + 1 file changed, 1 insertion(+) diff --git a/std/debug/index.zig b/std/debug/index.zig index 24186549862a..5de201b0e66e 100644 --- a/std/debug/index.zig +++ b/std/debug/index.zig @@ -239,6 +239,7 @@ pub fn openSelfDebugInfo(allocator: &mem.Allocator) !&ElfStackTrace { switch (builtin.object_format) { builtin.ObjectFormat.elf => { const st = try allocator.create(ElfStackTrace); + errdefer allocator.destroy(st); *st = ElfStackTrace { .self_exe_file = undefined, .elf = undefined,