Skip to content

debug info: resolve relative paths to source files into absolute paths #13843

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 0 additions & 25 deletions src/Compilation.zig
Original file line number Diff line number Diff line change
Expand Up @@ -1035,9 +1035,6 @@ pub const InitOptions = struct {
/// (Darwin) remove dylibs that are unreachable by the entry point or exported symbols
dead_strip_dylibs: bool = false,
libcxx_abi_version: libcxx.AbiVersion = libcxx.AbiVersion.default,
/// (Windows) PDB source path prefix to instruct the linker how to resolve relative
/// paths when consolidating CodeView streams into a single PDB file.
pdb_source_path: ?[]const u8 = null,
};

fn addPackageTableToCacheHash(
Expand Down Expand Up @@ -1740,27 +1737,6 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation {
};
};

const pdb_source_path: ?[]const u8 = options.pdb_source_path orelse blk: {
if (builtin.target.os.tag == .windows) {
// PDB requires all file paths to be fully resolved, and it is really the
// linker's responsibility to canonicalize any path extracted from the CodeView
// in the object file. However, LLD-link has some very questionable defaults, and
// in particular, it purposely bakes in path separator of the host system it was
// built on rather than the targets, or just throw an error. Thankfully, they have
// left a backdoor we can use via -PDBSOURCEPATH.
const mod = module orelse break :blk null;
var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
const resolved_path = if (mod.main_pkg.root_src_directory.path) |base_path| p: {
if (std.fs.path.isAbsolute(base_path)) break :blk base_path;
const resolved_path = std.os.realpath(base_path, &buffer) catch break :blk null;
const pos = std.mem.lastIndexOfLinear(u8, resolved_path, base_path) orelse resolved_path.len;
break :p resolved_path[0..pos];
} else std.os.realpath(".", &buffer) catch break :blk null;
break :blk try arena.dupe(u8, resolved_path);
}
break :blk null;
};

const implib_emit: ?link.Emit = blk: {
const emit_implib = options.emit_implib orelse break :blk null;

Expand Down Expand Up @@ -1906,7 +1882,6 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation {
.headerpad_max_install_names = options.headerpad_max_install_names,
.dead_strip_dylibs = options.dead_strip_dylibs,
.force_undefined_symbols = .{},
.pdb_source_path = pdb_source_path,
});
errdefer bin_file.destroy();
comp.* = .{
Expand Down
44 changes: 25 additions & 19 deletions src/codegen/llvm.zig
Original file line number Diff line number Diff line change
Expand Up @@ -443,20 +443,19 @@ pub const Object = struct {
});
defer gpa.free(producer);

// For macOS stack traces, we want to avoid having to parse the compilation unit debug
// info. As long as each debug info file has a path independent of the compilation unit
// directory (DW_AT_comp_dir), then we never have to look at the compilation unit debug
// info. If we provide an absolute path to LLVM here for the compilation unit debug
// info, LLVM will emit DWARF info that depends on DW_AT_comp_dir. To avoid this, we
// pass "." for the compilation unit directory. This forces each debug file to have a
// directory rather than be relative to DW_AT_comp_dir. According to DWARF 5, debug
// files will no longer reference DW_AT_comp_dir, for the purpose of being able to
// support the common practice of stripping all but the line number sections from an
// executable.
const compile_unit_dir = d: {
if (options.target.isDarwin()) break :d ".";
const mod = options.module orelse break :d ".";
break :d mod.root_pkg.root_src_directory.path orelse ".";
// We fully resolve all paths at this point to avoid lack of source line info in stack
// traces or lack of debugging information which, if relative paths were used, would
// be very location dependent.
// TODO: the only concern I have with this is WASI as either host or target, should
// we leave the paths as relative then?
var buf: [std.fs.MAX_PATH_BYTES]u8 = undefined;
const compile_unit_dir = blk: {
const path = d: {
const mod = options.module orelse break :d ".";
break :d mod.root_pkg.root_src_directory.path orelse ".";
};
if (std.fs.path.isAbsolute(path)) break :blk path;
break :blk std.os.realpath(path, &buf) catch path; // If realpath fails, fallback to whatever path was
};
const compile_unit_dir_z = try gpa.dupeZ(u8, compile_unit_dir);
defer gpa.free(compile_unit_dir_z);
Expand Down Expand Up @@ -1389,13 +1388,20 @@ pub const Object = struct {
if (gop.found_existing) {
return @ptrCast(*llvm.DIFile, gop.value_ptr.*);
}
const dir_path = file.pkg.root_src_directory.path orelse ".";
const dir_path_z = d: {
var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
const dir_path = file.pkg.root_src_directory.path orelse ".";
const resolved_dir_path = if (std.fs.path.isAbsolute(dir_path))
dir_path
else
std.os.realpath(dir_path, &buffer) catch dir_path; // If realpath fails, fallback to whatever dir_path was
break :d try std.fs.path.joinZ(gpa, &.{
resolved_dir_path, std.fs.path.dirname(file.sub_file_path) orelse "",
});
};
defer gpa.free(dir_path_z);
const sub_file_path_z = try gpa.dupeZ(u8, std.fs.path.basename(file.sub_file_path));
defer gpa.free(sub_file_path_z);
const dir_path_z = try std.fs.path.joinZ(gpa, &.{
dir_path, std.fs.path.dirname(file.sub_file_path) orelse "",
});
defer gpa.free(dir_path_z);
const di_file = o.di_builder.?.createFile(sub_file_path_z, dir_path_z);
gop.value_ptr.* = di_file.toNode();
return di_file;
Expand Down
4 changes: 0 additions & 4 deletions src/link.zig
Original file line number Diff line number Diff line change
Expand Up @@ -217,10 +217,6 @@ pub const Options = struct {
/// (Darwin) remove dylibs that are unreachable by the entry point or exported symbols
dead_strip_dylibs: bool = false,

/// (Windows) PDB source path prefix to instruct the linker how to resolve relative
/// paths when consolidating CodeView streams into a single PDB file.
pdb_source_path: ?[]const u8 = null,

pub fn effectiveOutputMode(options: Options) std.builtin.OutputMode {
return if (options.use_lld) .Obj else options.output_mode;
}
Expand Down
4 changes: 0 additions & 4 deletions src/link/Coff/lld.zig
Original file line number Diff line number Diff line change
Expand Up @@ -171,10 +171,6 @@ pub fn linkWithLLD(self: *Coff, comp: *Compilation, prog_node: *std.Progress.Nod
});
try argv.append(try allocPrint(arena, "-PDB:{s}", .{out_pdb}));
try argv.append(try allocPrint(arena, "-PDBALTPATH:{s}", .{out_pdb}));

if (self.base.options.pdb_source_path) |path| {
try argv.append(try std.fmt.allocPrint(arena, "-PDBSOURCEPATH:{s}", .{path}));
}
}
if (self.base.options.lto) {
switch (self.base.options.optimize_mode) {
Expand Down
54 changes: 27 additions & 27 deletions src/link/Dwarf.zig
Original file line number Diff line number Diff line change
Expand Up @@ -1771,7 +1771,8 @@ pub fn writeDbgInfoHeader(self: *Dwarf, module: *Module, low_pc: u64, high_pc: u
}
// Write the form for the compile unit, which must match the abbrev table above.
const name_strp = try self.makeString(module.root_pkg.root_src_path);
const compile_unit_dir = self.getCompDir(module);
var compile_unit_dir_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
const compile_unit_dir = resolveCompilationDir(module, &compile_unit_dir_buffer);
const comp_dir_strp = try self.makeString(compile_unit_dir);
const producer_strp = try self.makeString(link.producer_string);

Expand Down Expand Up @@ -1823,19 +1824,15 @@ pub fn writeDbgInfoHeader(self: *Dwarf, module: *Module, low_pc: u64, high_pc: u
}
}

fn getCompDir(self: Dwarf, module: *Module) []const u8 {
// For macOS stack traces, we want to avoid having to parse the compilation unit debug
// info. As long as each debug info file has a path independent of the compilation unit
// directory (DW_AT_comp_dir), then we never have to look at the compilation unit debug
// info. If we provide an absolute path to LLVM here for the compilation unit debug
// info, LLVM will emit DWARF info that depends on DW_AT_comp_dir. To avoid this, we
// pass "." for the compilation unit directory. This forces each debug file to have a
// directory rather than be relative to DW_AT_comp_dir. According to DWARF 5, debug
// files will no longer reference DW_AT_comp_dir, for the purpose of being able to
// support the common practice of stripping all but the line number sections from an
// executable.
if (self.bin_file.tag == .macho) return ".";
return module.root_pkg.root_src_directory.path orelse ".";
fn resolveCompilationDir(module: *Module, buffer: *[std.fs.MAX_PATH_BYTES]u8) []const u8 {
// We fully resolve all paths at this point to avoid lack of source line info in stack
// traces or lack of debugging information which, if relative paths were used, would
// be very location dependent.
// TODO: the only concern I have with this is WASI as either host or target, should
// we leave the paths as relative then?
const comp_dir_path = module.root_pkg.root_src_directory.path orelse ".";
if (std.fs.path.isAbsolute(comp_dir_path)) return comp_dir_path;
return std.os.realpath(comp_dir_path, buffer) catch comp_dir_path; // If realpath fails, fallback to whatever comp_dir_path was
}

fn writeAddrAssumeCapacity(self: *Dwarf, buf: *std.ArrayList(u8), addr: u64) void {
Expand Down Expand Up @@ -2149,7 +2146,7 @@ pub fn writeDbgAranges(self: *Dwarf, addr: u64, size: u64) !void {
}
}

pub fn writeDbgLineHeader(self: *Dwarf, module: *Module) !void {
pub fn writeDbgLineHeader(self: *Dwarf) !void {
const gpa = self.allocator;

const ptr_width_bytes: u8 = self.ptrWidthBytes();
Expand All @@ -2167,7 +2164,7 @@ pub fn writeDbgLineHeader(self: *Dwarf, module: *Module) !void {
// Convert all input DI files into a set of include dirs and file names.
var arena = std.heap.ArenaAllocator.init(gpa);
defer arena.deinit();
const paths = try self.genIncludeDirsAndFileNames(arena.allocator(), module);
const paths = try self.genIncludeDirsAndFileNames(arena.allocator());

// The size of this header is variable, depending on the number of directories,
// files, and padding. We have a function to compute the upper bound size, however,
Expand Down Expand Up @@ -2551,7 +2548,7 @@ fn addDIFile(self: *Dwarf, mod: *Module, decl_index: Module.Decl.Index) !u28 {
return @intCast(u28, gop.index + 1);
}

fn genIncludeDirsAndFileNames(self: *Dwarf, arena: Allocator, module: *Module) !struct {
fn genIncludeDirsAndFileNames(self: *Dwarf, arena: Allocator) !struct {
dirs: []const []const u8,
files: []const []const u8,
files_dirs_indexes: []u28,
Expand All @@ -2565,19 +2562,22 @@ fn genIncludeDirsAndFileNames(self: *Dwarf, arena: Allocator, module: *Module) !
var files_dir_indexes = std.ArrayList(u28).init(arena);
try files_dir_indexes.ensureTotalCapacity(self.di_files.count());

const comp_dir = self.getCompDir(module);

for (self.di_files.keys()) |dif| {
const full_path = try dif.fullPath(arena);
const dir_path = std.fs.path.dirname(full_path) orelse ".";
const sub_file_path = std.fs.path.basename(full_path);
const dir_path = d: {
var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
const dir_path = dif.pkg.root_src_directory.path orelse ".";
const abs_dir_path = if (std.fs.path.isAbsolute(dir_path))
dir_path
else
std.os.realpath(dir_path, &buffer) catch dir_path; // If realpath fails, fallback to whatever dir_path was
break :d try std.fs.path.join(arena, &.{
abs_dir_path, std.fs.path.dirname(dif.sub_file_path) orelse "",
});
};
const sub_file_path = try arena.dupe(u8, std.fs.path.basename(dif.sub_file_path));

const dir_index: u28 = blk: {
const actual_dir_path = if (mem.indexOf(u8, dir_path, comp_dir)) |_| inner: {
if (comp_dir.len == dir_path.len) break :blk 0;
break :inner dir_path[comp_dir.len + 1 ..];
} else dir_path;
const dirs_gop = dirs.getOrPutAssumeCapacity(actual_dir_path);
const dirs_gop = dirs.getOrPutAssumeCapacity(dir_path);
break :blk @intCast(u28, dirs_gop.index + 1);
};

Expand Down
2 changes: 1 addition & 1 deletion src/link/Elf.zig
Original file line number Diff line number Diff line change
Expand Up @@ -1149,7 +1149,7 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node
}

if (self.debug_line_header_dirty) {
try dw.writeDbgLineHeader(module);
try dw.writeDbgLineHeader();
self.debug_line_header_dirty = false;
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/link/MachO/DebugSymbols.zig
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ pub fn flushModule(self: *DebugSymbols, macho_file: *MachO) !void {
}

if (self.debug_line_header_dirty) {
try self.dwarf.writeDbgLineHeader(module);
try self.dwarf.writeDbgLineHeader();
self.debug_line_header_dirty = false;
}

Expand Down
2 changes: 1 addition & 1 deletion src/link/Wasm.zig
Original file line number Diff line number Diff line change
Expand Up @@ -2672,7 +2672,7 @@ pub fn flushModule(wasm: *Wasm, comp: *Compilation, prog_node: *std.Progress.Nod
// as locations are always offsets relative to 'code' section.
try dwarf.writeDbgInfoHeader(mod, 0, code_section_size);
try dwarf.writeDbgAranges(0, code_section_size);
try dwarf.writeDbgLineHeader(mod);
try dwarf.writeDbgLineHeader();
}

var debug_bytes = std.ArrayList(u8).init(wasm.base.allocator);
Expand Down