diff --git a/README.md b/README.md index 51c39f079679..7c4adbf82aea 100644 --- a/README.md +++ b/README.md @@ -13,11 +13,9 @@ Documentation** corresponding to the version of Zig that you are using by following the appropriate link on the [download page](https://ziglang.org/download). -Otherwise, you're looking at a release of Zig, and you can find documentation -here: - - * doc/langref.html - * doc/std/index.html +Otherwise, you're looking at a release of Zig, so you can find the language +reference at `doc/langref.html`, and the standard library documentation by +running `zig std`, which will open a browser tab. ## Installation diff --git a/lib/compiler/build_runner.zig b/lib/compiler/build_runner.zig index 8bb03939dc6f..690c93754553 100644 --- a/lib/compiler/build_runner.zig +++ b/lib/compiler/build_runner.zig @@ -17,6 +17,12 @@ const runner = @This(); pub const root = @import("@build"); pub const dependencies = @import("@dependencies"); +pub const std_options: std.Options = .{ + .side_channels_mitigations = .none, + .http_disable_tls = true, + .crypto_fork_safety = false, +}; + pub fn main() !void { // Here we use an ArenaAllocator backed by a page allocator because a build is a short-lived, // one shot program. We don't need to waste time freeing memory and finding places to squish @@ -106,6 +112,7 @@ pub fn main() !void { var watch = false; var fuzz = false; var debounce_interval_ms: u16 = 50; + var listen_port: u16 = 0; while (nextArg(args, &arg_idx)) |arg| { if (mem.startsWith(u8, arg, "-Z")) { @@ -203,6 +210,14 @@ pub fn main() !void { next_arg, @errorName(err), }); }; + } else if (mem.eql(u8, arg, "--port")) { + const next_arg = nextArg(args, &arg_idx) orelse + fatalWithHint("expected u16 after '{s}'", .{arg}); + listen_port = std.fmt.parseUnsigned(u16, next_arg, 10) catch |err| { + fatal("unable to parse port '{s}' as unsigned 16-bit integer: {s}\n", .{ + next_arg, @errorName(err), + }); + }; } else if (mem.eql(u8, arg, "--debug-log")) { const next_arg = nextArgOrFatal(args, &arg_idx); try debug_log_scopes.append(next_arg); @@ -403,7 +418,27 @@ pub fn main() !void { else => return err, }; if (fuzz) { - Fuzz.start(&run.thread_pool, run.step_stack.keys(), run.ttyconf, main_progress_node); + switch (builtin.os.tag) { + // Current implementation depends on two things that need to be ported to Windows: + // * Memory-mapping to share data between the fuzzer and build runner. + // * COFF/PE support added to `std.debug.Info` (it needs a batching API for resolving + // many addresses to source locations). + .windows => fatal("--fuzz not yet implemented for {s}", .{@tagName(builtin.os.tag)}), + else => {}, + } + const listen_address = std.net.Address.parseIp("127.0.0.1", listen_port) catch unreachable; + try Fuzz.start( + gpa, + arena, + global_cache_directory, + zig_lib_directory, + zig_exe, + &run.thread_pool, + run.step_stack.keys(), + run.ttyconf, + listen_address, + main_progress_node, + ); } if (!watch) return cleanExit(); diff --git a/lib/compiler/std-docs.zig b/lib/compiler/std-docs.zig index eaabcfa93643..c11665101c39 100644 --- a/lib/compiler/std-docs.zig +++ b/lib/compiler/std-docs.zig @@ -275,10 +275,6 @@ fn buildWasmBinary( ) ![]const u8 { const gpa = context.gpa; - const main_src_path = try std.fs.path.join(arena, &.{ - context.zig_lib_directory, "docs", "wasm", "main.zig", - }); - var argv: std.ArrayListUnmanaged([]const u8) = .{}; try argv.appendSlice(arena, &.{ @@ -298,7 +294,10 @@ fn buildWasmBinary( "--name", "autodoc", "-rdynamic", - main_src_path, + "--dep", + "Walk", + try std.fmt.allocPrint(arena, "-Mroot={s}/docs/wasm/main.zig", .{context.zig_lib_directory}), + try std.fmt.allocPrint(arena, "-MWalk={s}/docs/wasm/Walk.zig", .{context.zig_lib_directory}), "--listen=-", }); diff --git a/lib/compiler/test_runner.zig b/lib/compiler/test_runner.zig index be793376a570..4be261912c1b 100644 --- a/lib/compiler/test_runner.zig +++ b/lib/compiler/test_runner.zig @@ -1,8 +1,10 @@ //! Default test runner for unit tests. const builtin = @import("builtin"); + const std = @import("std"); const io = std.io; const testing = std.testing; +const assert = std.debug.assert; pub const std_options = .{ .logFn = log, @@ -28,6 +30,7 @@ pub fn main() void { @panic("unable to parse command line args"); var listen = false; + var opt_cache_dir: ?[]const u8 = null; for (args[1..]) |arg| { if (std.mem.eql(u8, arg, "--listen=-")) { @@ -35,12 +38,18 @@ pub fn main() void { } else if (std.mem.startsWith(u8, arg, "--seed=")) { testing.random_seed = std.fmt.parseUnsigned(u32, arg["--seed=".len..], 0) catch @panic("unable to parse --seed command line argument"); + } else if (std.mem.startsWith(u8, arg, "--cache-dir")) { + opt_cache_dir = arg["--cache-dir=".len..]; } else { @panic("unrecognized command line argument"); } } fba.reset(); + if (builtin.fuzz) { + const cache_dir = opt_cache_dir orelse @panic("missing --cache-dir=[path] argument"); + fuzzer_init(FuzzerSlice.fromSlice(cache_dir)); + } if (listen) { return mainServer() catch @panic("internal test runner failure"); @@ -59,6 +68,11 @@ fn mainServer() !void { }); defer server.deinit(); + if (builtin.fuzz) { + const coverage_id = fuzzer_coverage_id(); + try server.serveU64Message(.coverage_id, coverage_id); + } + while (true) { const hdr = try server.receiveMessage(); switch (hdr.tag) { @@ -129,7 +143,9 @@ fn mainServer() !void { }); }, .start_fuzzing => { + if (!builtin.fuzz) unreachable; const index = try server.receiveBody_u32(); + var first = true; const test_fn = builtin.test_functions[index]; while (true) { testing.allocator_instance = .{}; @@ -148,6 +164,10 @@ fn mainServer() !void { }; if (!is_fuzz_test) @panic("missed call to std.testing.fuzzInput"); if (log_err_count != 0) @panic("error logs detected"); + if (first) { + first = false; + try server.serveU64Message(.fuzz_start_addr, entry_addr); + } } }, @@ -315,20 +335,32 @@ const FuzzerSlice = extern struct { ptr: [*]const u8, len: usize, + /// Inline to avoid fuzzer instrumentation. inline fn toSlice(s: FuzzerSlice) []const u8 { return s.ptr[0..s.len]; } + + /// Inline to avoid fuzzer instrumentation. + inline fn fromSlice(s: []const u8) FuzzerSlice { + return .{ .ptr = s.ptr, .len = s.len }; + } }; var is_fuzz_test: bool = undefined; +var entry_addr: usize = 0; extern fn fuzzer_next() FuzzerSlice; +extern fn fuzzer_init(cache_dir: FuzzerSlice) void; +extern fn fuzzer_coverage_id() u64; pub fn fuzzInput(options: testing.FuzzInputOptions) []const u8 { @disableInstrumentation(); if (crippled) return ""; is_fuzz_test = true; - if (builtin.fuzz) return fuzzer_next().toSlice(); + if (builtin.fuzz) { + if (entry_addr == 0) entry_addr = @returnAddress(); + return fuzzer_next().toSlice(); + } if (options.corpus.len == 0) return ""; var prng = std.Random.DefaultPrng.init(testing.random_seed); const random = prng.random(); diff --git a/lib/docs/wasm/Decl.zig b/lib/docs/wasm/Decl.zig index 0260ce02850d..254635598799 100644 --- a/lib/docs/wasm/Decl.zig +++ b/lib/docs/wasm/Decl.zig @@ -1,3 +1,12 @@ +const Decl = @This(); +const std = @import("std"); +const Ast = std.zig.Ast; +const Walk = @import("Walk.zig"); +const gpa = std.heap.wasm_allocator; +const assert = std.debug.assert; +const log = std.log; +const Oom = error{OutOfMemory}; + ast_node: Ast.Node.Index, file: Walk.File.Index, /// The decl whose namespace this is in. @@ -215,12 +224,3 @@ pub fn find(search_string: []const u8) Decl.Index { } return current_decl_index; } - -const Decl = @This(); -const std = @import("std"); -const Ast = std.zig.Ast; -const Walk = @import("Walk.zig"); -const gpa = std.heap.wasm_allocator; -const assert = std.debug.assert; -const log = std.log; -const Oom = error{OutOfMemory}; diff --git a/lib/docs/wasm/Walk.zig b/lib/docs/wasm/Walk.zig index a22da861a8ac..ae924b8c3825 100644 --- a/lib/docs/wasm/Walk.zig +++ b/lib/docs/wasm/Walk.zig @@ -1,4 +1,15 @@ //! Find and annotate identifiers with links to their declarations. + +const Walk = @This(); +const std = @import("std"); +const Ast = std.zig.Ast; +const assert = std.debug.assert; +const log = std.log; +const gpa = std.heap.wasm_allocator; +const Oom = error{OutOfMemory}; + +pub const Decl = @import("Decl.zig"); + pub var files: std.StringArrayHashMapUnmanaged(File) = .{}; pub var decls: std.ArrayListUnmanaged(Decl) = .{}; pub var modules: std.StringArrayHashMapUnmanaged(File.Index) = .{}; @@ -1120,15 +1131,6 @@ pub fn isPrimitiveNonType(name: []const u8) bool { // try w.root(); //} -const Walk = @This(); -const std = @import("std"); -const Ast = std.zig.Ast; -const assert = std.debug.assert; -const Decl = @import("Decl.zig"); -const log = std.log; -const gpa = std.heap.wasm_allocator; -const Oom = error{OutOfMemory}; - fn shrinkToFit(m: anytype) void { m.shrinkAndFree(gpa, m.entries.len); } diff --git a/lib/docs/wasm/html_render.zig b/lib/docs/wasm/html_render.zig new file mode 100644 index 000000000000..a5211fc77f4d --- /dev/null +++ b/lib/docs/wasm/html_render.zig @@ -0,0 +1,412 @@ +const std = @import("std"); +const Ast = std.zig.Ast; +const assert = std.debug.assert; + +const Walk = @import("Walk"); +const Decl = Walk.Decl; + +const gpa = std.heap.wasm_allocator; +const Oom = error{OutOfMemory}; + +/// Delete this to find out where URL escaping needs to be added. +pub const missing_feature_url_escape = true; + +pub const RenderSourceOptions = struct { + skip_doc_comments: bool = false, + skip_comments: bool = false, + collapse_whitespace: bool = false, + fn_link: Decl.Index = .none, + /// Assumed to be sorted ascending. + source_location_annotations: []const Annotation = &.{}, + /// Concatenated with dom_id. + annotation_prefix: []const u8 = "l", +}; + +pub const Annotation = struct { + file_byte_offset: u32, + /// Concatenated with annotation_prefix. + dom_id: u32, +}; + +pub fn fileSourceHtml( + file_index: Walk.File.Index, + out: *std.ArrayListUnmanaged(u8), + root_node: Ast.Node.Index, + options: RenderSourceOptions, +) !void { + const ast = file_index.get_ast(); + const file = file_index.get(); + + const g = struct { + var field_access_buffer: std.ArrayListUnmanaged(u8) = .{}; + }; + + const token_tags = ast.tokens.items(.tag); + const token_starts = ast.tokens.items(.start); + const main_tokens = ast.nodes.items(.main_token); + + const start_token = ast.firstToken(root_node); + const end_token = ast.lastToken(root_node) + 1; + + var cursor: usize = token_starts[start_token]; + + var indent: usize = 0; + if (std.mem.lastIndexOf(u8, ast.source[0..cursor], "\n")) |newline_index| { + for (ast.source[newline_index + 1 .. cursor]) |c| { + if (c == ' ') { + indent += 1; + } else { + break; + } + } + } + + var next_annotate_index: usize = 0; + + for ( + token_tags[start_token..end_token], + token_starts[start_token..end_token], + start_token.., + ) |tag, start, token_index| { + const between = ast.source[cursor..start]; + if (std.mem.trim(u8, between, " \t\r\n").len > 0) { + if (!options.skip_comments) { + try out.appendSlice(gpa, ""); + try appendUnindented(out, between, indent); + try out.appendSlice(gpa, ""); + } + } else if (between.len > 0) { + if (options.collapse_whitespace) { + if (out.items.len > 0 and out.items[out.items.len - 1] != ' ') + try out.append(gpa, ' '); + } else { + try appendUnindented(out, between, indent); + } + } + if (tag == .eof) break; + const slice = ast.tokenSlice(token_index); + cursor = start + slice.len; + + // Insert annotations. + while (true) { + if (next_annotate_index >= options.source_location_annotations.len) break; + const next_annotation = options.source_location_annotations[next_annotate_index]; + if (cursor <= next_annotation.file_byte_offset) break; + try out.writer(gpa).print("", .{ + options.annotation_prefix, next_annotation.dom_id, + }); + next_annotate_index += 1; + } + + switch (tag) { + .eof => unreachable, + + .keyword_addrspace, + .keyword_align, + .keyword_and, + .keyword_asm, + .keyword_async, + .keyword_await, + .keyword_break, + .keyword_catch, + .keyword_comptime, + .keyword_const, + .keyword_continue, + .keyword_defer, + .keyword_else, + .keyword_enum, + .keyword_errdefer, + .keyword_error, + .keyword_export, + .keyword_extern, + .keyword_for, + .keyword_if, + .keyword_inline, + .keyword_noalias, + .keyword_noinline, + .keyword_nosuspend, + .keyword_opaque, + .keyword_or, + .keyword_orelse, + .keyword_packed, + .keyword_anyframe, + .keyword_pub, + .keyword_resume, + .keyword_return, + .keyword_linksection, + .keyword_callconv, + .keyword_struct, + .keyword_suspend, + .keyword_switch, + .keyword_test, + .keyword_threadlocal, + .keyword_try, + .keyword_union, + .keyword_unreachable, + .keyword_usingnamespace, + .keyword_var, + .keyword_volatile, + .keyword_allowzero, + .keyword_while, + .keyword_anytype, + .keyword_fn, + => { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + }, + + .string_literal, + .char_literal, + .multiline_string_literal_line, + => { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + }, + + .builtin => { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + }, + + .doc_comment, + .container_doc_comment, + => { + if (!options.skip_doc_comments) { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + } + }, + + .identifier => i: { + if (options.fn_link != .none) { + const fn_link = options.fn_link.get(); + const fn_token = main_tokens[fn_link.ast_node]; + if (token_index == fn_token + 1) { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + break :i; + } + } + + if (token_index > 0 and token_tags[token_index - 1] == .keyword_fn) { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + break :i; + } + + if (Walk.isPrimitiveNonType(slice)) { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + break :i; + } + + if (std.zig.primitives.isPrimitive(slice)) { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + break :i; + } + + if (file.token_parents.get(token_index)) |field_access_node| { + g.field_access_buffer.clearRetainingCapacity(); + try walkFieldAccesses(file_index, &g.field_access_buffer, field_access_node); + if (g.field_access_buffer.items.len > 0) { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + } else { + try appendEscaped(out, slice); + } + break :i; + } + + { + g.field_access_buffer.clearRetainingCapacity(); + try resolveIdentLink(file_index, &g.field_access_buffer, token_index); + if (g.field_access_buffer.items.len > 0) { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + break :i; + } + } + + try appendEscaped(out, slice); + }, + + .number_literal => { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + }, + + .bang, + .pipe, + .pipe_pipe, + .pipe_equal, + .equal, + .equal_equal, + .equal_angle_bracket_right, + .bang_equal, + .l_paren, + .r_paren, + .semicolon, + .percent, + .percent_equal, + .l_brace, + .r_brace, + .l_bracket, + .r_bracket, + .period, + .period_asterisk, + .ellipsis2, + .ellipsis3, + .caret, + .caret_equal, + .plus, + .plus_plus, + .plus_equal, + .plus_percent, + .plus_percent_equal, + .plus_pipe, + .plus_pipe_equal, + .minus, + .minus_equal, + .minus_percent, + .minus_percent_equal, + .minus_pipe, + .minus_pipe_equal, + .asterisk, + .asterisk_equal, + .asterisk_asterisk, + .asterisk_percent, + .asterisk_percent_equal, + .asterisk_pipe, + .asterisk_pipe_equal, + .arrow, + .colon, + .slash, + .slash_equal, + .comma, + .ampersand, + .ampersand_equal, + .question_mark, + .angle_bracket_left, + .angle_bracket_left_equal, + .angle_bracket_angle_bracket_left, + .angle_bracket_angle_bracket_left_equal, + .angle_bracket_angle_bracket_left_pipe, + .angle_bracket_angle_bracket_left_pipe_equal, + .angle_bracket_right, + .angle_bracket_right_equal, + .angle_bracket_angle_bracket_right, + .angle_bracket_angle_bracket_right_equal, + .tilde, + => try appendEscaped(out, slice), + + .invalid, .invalid_periodasterisks => return error.InvalidToken, + } + } +} + +fn appendUnindented(out: *std.ArrayListUnmanaged(u8), s: []const u8, indent: usize) !void { + var it = std.mem.splitScalar(u8, s, '\n'); + var is_first_line = true; + while (it.next()) |line| { + if (is_first_line) { + try appendEscaped(out, line); + is_first_line = false; + } else { + try out.appendSlice(gpa, "\n"); + try appendEscaped(out, unindent(line, indent)); + } + } +} + +pub fn appendEscaped(out: *std.ArrayListUnmanaged(u8), s: []const u8) !void { + for (s) |c| { + try out.ensureUnusedCapacity(gpa, 6); + switch (c) { + '&' => out.appendSliceAssumeCapacity("&"), + '<' => out.appendSliceAssumeCapacity("<"), + '>' => out.appendSliceAssumeCapacity(">"), + '"' => out.appendSliceAssumeCapacity("""), + else => out.appendAssumeCapacity(c), + } + } +} + +fn walkFieldAccesses( + file_index: Walk.File.Index, + out: *std.ArrayListUnmanaged(u8), + node: Ast.Node.Index, +) Oom!void { + const ast = file_index.get_ast(); + const node_tags = ast.nodes.items(.tag); + assert(node_tags[node] == .field_access); + const node_datas = ast.nodes.items(.data); + const main_tokens = ast.nodes.items(.main_token); + const object_node = node_datas[node].lhs; + const dot_token = main_tokens[node]; + const field_ident = dot_token + 1; + switch (node_tags[object_node]) { + .identifier => { + const lhs_ident = main_tokens[object_node]; + try resolveIdentLink(file_index, out, lhs_ident); + }, + .field_access => { + try walkFieldAccesses(file_index, out, object_node); + }, + else => {}, + } + if (out.items.len > 0) { + try out.append(gpa, '.'); + try out.appendSlice(gpa, ast.tokenSlice(field_ident)); + } +} + +fn resolveIdentLink( + file_index: Walk.File.Index, + out: *std.ArrayListUnmanaged(u8), + ident_token: Ast.TokenIndex, +) Oom!void { + const decl_index = file_index.get().lookup_token(ident_token); + if (decl_index == .none) return; + try resolveDeclLink(decl_index, out); +} + +fn unindent(s: []const u8, indent: usize) []const u8 { + var indent_idx: usize = 0; + for (s) |c| { + if (c == ' ' and indent_idx < indent) { + indent_idx += 1; + } else { + break; + } + } + return s[indent_idx..]; +} + +pub fn resolveDeclLink(decl_index: Decl.Index, out: *std.ArrayListUnmanaged(u8)) Oom!void { + const decl = decl_index.get(); + switch (decl.categorize()) { + .alias => |alias_decl| try alias_decl.get().fqn(out), + else => try decl.fqn(out), + } +} diff --git a/lib/docs/wasm/main.zig b/lib/docs/wasm/main.zig index f5ce02d7d669..55882aaf7df5 100644 --- a/lib/docs/wasm/main.zig +++ b/lib/docs/wasm/main.zig @@ -1,15 +1,17 @@ -/// Delete this to find out where URL escaping needs to be added. -const missing_feature_url_escape = true; - -const gpa = std.heap.wasm_allocator; - const std = @import("std"); const log = std.log; const assert = std.debug.assert; const Ast = std.zig.Ast; -const Walk = @import("Walk.zig"); +const Walk = @import("Walk"); const markdown = @import("markdown.zig"); -const Decl = @import("Decl.zig"); +const Decl = Walk.Decl; + +const fileSourceHtml = @import("html_render.zig").fileSourceHtml; +const appendEscaped = @import("html_render.zig").appendEscaped; +const resolveDeclLink = @import("html_render.zig").resolveDeclLink; +const missing_feature_url_escape = @import("html_render.zig").missing_feature_url_escape; + +const gpa = std.heap.wasm_allocator; const js = struct { extern "js" fn log(ptr: [*]const u8, len: usize) void; @@ -53,7 +55,7 @@ export fn unpack(tar_ptr: [*]u8, tar_len: usize) void { const tar_bytes = tar_ptr[0..tar_len]; //log.debug("received {d} bytes of tar file", .{tar_bytes.len}); - unpack_inner(tar_bytes) catch |err| { + unpackInner(tar_bytes) catch |err| { fatal("unable to unpack tar: {s}", .{@errorName(err)}); }; } @@ -439,7 +441,7 @@ fn decl_field_html_fallible( const decl = decl_index.get(); const ast = decl.file.get_ast(); try out.appendSlice(gpa, "
");
-    try file_source_html(decl.file, out, field_node, .{});
+    try fileSourceHtml(decl.file, out, field_node, .{});
     try out.appendSlice(gpa, "
"); const field = ast.fullContainerField(field_node).?; @@ -478,7 +480,7 @@ fn decl_param_html_fallible( try out.appendSlice(gpa, "
");
     try appendEscaped(out, name);
     try out.appendSlice(gpa, ": ");
-    try file_source_html(decl.file, out, param_node, .{});
+    try fileSourceHtml(decl.file, out, param_node, .{});
     try out.appendSlice(gpa, "
"); if (ast.tokens.items(.tag)[first_doc_comment] == .doc_comment) { @@ -506,7 +508,7 @@ export fn decl_fn_proto_html(decl_index: Decl.Index, linkify_fn_name: bool) Stri }; string_result.clearRetainingCapacity(); - file_source_html(decl.file, &string_result, proto_node, .{ + fileSourceHtml(decl.file, &string_result, proto_node, .{ .skip_doc_comments = true, .skip_comments = true, .collapse_whitespace = true, @@ -521,7 +523,7 @@ export fn decl_source_html(decl_index: Decl.Index) String { const decl = decl_index.get(); string_result.clearRetainingCapacity(); - file_source_html(decl.file, &string_result, decl.ast_node, .{}) catch |err| { + fileSourceHtml(decl.file, &string_result, decl.ast_node, .{}) catch |err| { fatal("unable to render source: {s}", .{@errorName(err)}); }; return String.init(string_result.items); @@ -533,7 +535,7 @@ export fn decl_doctest_html(decl_index: Decl.Index) String { return String.init(""); string_result.clearRetainingCapacity(); - file_source_html(decl.file, &string_result, doctest_ast_node, .{}) catch |err| { + fileSourceHtml(decl.file, &string_result, doctest_ast_node, .{}) catch |err| { fatal("unable to render source: {s}", .{@errorName(err)}); }; return String.init(string_result.items); @@ -691,7 +693,7 @@ fn render_docs( const content = doc.string(data.text.content); if (resolve_decl_path(r.context, content)) |resolved_decl_index| { g.link_buffer.clearRetainingCapacity(); - try resolve_decl_link(resolved_decl_index, &g.link_buffer); + try resolveDeclLink(resolved_decl_index, &g.link_buffer); try writer.writeAll("") catch @panic("OOM"); - file_source_html(decl.file, &string_result, var_decl.ast.type_node, .{ + fileSourceHtml(decl.file, &string_result, var_decl.ast.type_node, .{ .skip_comments = true, .collapse_whitespace = true, }) catch |e| { @@ -750,7 +752,7 @@ export fn decl_type_html(decl_index: Decl.Index) String { const Oom = error{OutOfMemory}; -fn unpack_inner(tar_bytes: []u8) !void { +fn unpackInner(tar_bytes: []u8) !void { var fbs = std.io.fixedBufferStream(tar_bytes); var file_name_buffer: [1024]u8 = undefined; var link_name_buffer: [1024]u8 = undefined; @@ -902,382 +904,6 @@ export fn namespace_members(parent: Decl.Index, include_private: bool) Slice(Dec return Slice(Decl.Index).init(g.members.items); } -const RenderSourceOptions = struct { - skip_doc_comments: bool = false, - skip_comments: bool = false, - collapse_whitespace: bool = false, - fn_link: Decl.Index = .none, -}; - -fn file_source_html( - file_index: Walk.File.Index, - out: *std.ArrayListUnmanaged(u8), - root_node: Ast.Node.Index, - options: RenderSourceOptions, -) !void { - const ast = file_index.get_ast(); - const file = file_index.get(); - - const g = struct { - var field_access_buffer: std.ArrayListUnmanaged(u8) = .{}; - }; - - const token_tags = ast.tokens.items(.tag); - const token_starts = ast.tokens.items(.start); - const main_tokens = ast.nodes.items(.main_token); - - const start_token = ast.firstToken(root_node); - const end_token = ast.lastToken(root_node) + 1; - - var cursor: usize = token_starts[start_token]; - - var indent: usize = 0; - if (std.mem.lastIndexOf(u8, ast.source[0..cursor], "\n")) |newline_index| { - for (ast.source[newline_index + 1 .. cursor]) |c| { - if (c == ' ') { - indent += 1; - } else { - break; - } - } - } - - for ( - token_tags[start_token..end_token], - token_starts[start_token..end_token], - start_token.., - ) |tag, start, token_index| { - const between = ast.source[cursor..start]; - if (std.mem.trim(u8, between, " \t\r\n").len > 0) { - if (!options.skip_comments) { - try out.appendSlice(gpa, ""); - try appendUnindented(out, between, indent); - try out.appendSlice(gpa, ""); - } - } else if (between.len > 0) { - if (options.collapse_whitespace) { - if (out.items.len > 0 and out.items[out.items.len - 1] != ' ') - try out.append(gpa, ' '); - } else { - try appendUnindented(out, between, indent); - } - } - if (tag == .eof) break; - const slice = ast.tokenSlice(token_index); - cursor = start + slice.len; - switch (tag) { - .eof => unreachable, - - .keyword_addrspace, - .keyword_align, - .keyword_and, - .keyword_asm, - .keyword_async, - .keyword_await, - .keyword_break, - .keyword_catch, - .keyword_comptime, - .keyword_const, - .keyword_continue, - .keyword_defer, - .keyword_else, - .keyword_enum, - .keyword_errdefer, - .keyword_error, - .keyword_export, - .keyword_extern, - .keyword_for, - .keyword_if, - .keyword_inline, - .keyword_noalias, - .keyword_noinline, - .keyword_nosuspend, - .keyword_opaque, - .keyword_or, - .keyword_orelse, - .keyword_packed, - .keyword_anyframe, - .keyword_pub, - .keyword_resume, - .keyword_return, - .keyword_linksection, - .keyword_callconv, - .keyword_struct, - .keyword_suspend, - .keyword_switch, - .keyword_test, - .keyword_threadlocal, - .keyword_try, - .keyword_union, - .keyword_unreachable, - .keyword_usingnamespace, - .keyword_var, - .keyword_volatile, - .keyword_allowzero, - .keyword_while, - .keyword_anytype, - .keyword_fn, - => { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); - }, - - .string_literal, - .char_literal, - .multiline_string_literal_line, - => { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); - }, - - .builtin => { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); - }, - - .doc_comment, - .container_doc_comment, - => { - if (!options.skip_doc_comments) { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); - } - }, - - .identifier => i: { - if (options.fn_link != .none) { - const fn_link = options.fn_link.get(); - const fn_token = main_tokens[fn_link.ast_node]; - if (token_index == fn_token + 1) { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); - break :i; - } - } - - if (token_index > 0 and token_tags[token_index - 1] == .keyword_fn) { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); - break :i; - } - - if (Walk.isPrimitiveNonType(slice)) { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); - break :i; - } - - if (std.zig.primitives.isPrimitive(slice)) { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); - break :i; - } - - if (file.token_parents.get(token_index)) |field_access_node| { - g.field_access_buffer.clearRetainingCapacity(); - try walk_field_accesses(file_index, &g.field_access_buffer, field_access_node); - if (g.field_access_buffer.items.len > 0) { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); - } else { - try appendEscaped(out, slice); - } - break :i; - } - - { - g.field_access_buffer.clearRetainingCapacity(); - try resolve_ident_link(file_index, &g.field_access_buffer, token_index); - if (g.field_access_buffer.items.len > 0) { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); - break :i; - } - } - - try appendEscaped(out, slice); - }, - - .number_literal => { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); - }, - - .bang, - .pipe, - .pipe_pipe, - .pipe_equal, - .equal, - .equal_equal, - .equal_angle_bracket_right, - .bang_equal, - .l_paren, - .r_paren, - .semicolon, - .percent, - .percent_equal, - .l_brace, - .r_brace, - .l_bracket, - .r_bracket, - .period, - .period_asterisk, - .ellipsis2, - .ellipsis3, - .caret, - .caret_equal, - .plus, - .plus_plus, - .plus_equal, - .plus_percent, - .plus_percent_equal, - .plus_pipe, - .plus_pipe_equal, - .minus, - .minus_equal, - .minus_percent, - .minus_percent_equal, - .minus_pipe, - .minus_pipe_equal, - .asterisk, - .asterisk_equal, - .asterisk_asterisk, - .asterisk_percent, - .asterisk_percent_equal, - .asterisk_pipe, - .asterisk_pipe_equal, - .arrow, - .colon, - .slash, - .slash_equal, - .comma, - .ampersand, - .ampersand_equal, - .question_mark, - .angle_bracket_left, - .angle_bracket_left_equal, - .angle_bracket_angle_bracket_left, - .angle_bracket_angle_bracket_left_equal, - .angle_bracket_angle_bracket_left_pipe, - .angle_bracket_angle_bracket_left_pipe_equal, - .angle_bracket_right, - .angle_bracket_right_equal, - .angle_bracket_angle_bracket_right, - .angle_bracket_angle_bracket_right_equal, - .tilde, - => try appendEscaped(out, slice), - - .invalid, .invalid_periodasterisks => return error.InvalidToken, - } - } -} - -fn unindent(s: []const u8, indent: usize) []const u8 { - var indent_idx: usize = 0; - for (s) |c| { - if (c == ' ' and indent_idx < indent) { - indent_idx += 1; - } else { - break; - } - } - return s[indent_idx..]; -} - -fn appendUnindented(out: *std.ArrayListUnmanaged(u8), s: []const u8, indent: usize) !void { - var it = std.mem.splitScalar(u8, s, '\n'); - var is_first_line = true; - while (it.next()) |line| { - if (is_first_line) { - try appendEscaped(out, line); - is_first_line = false; - } else { - try out.appendSlice(gpa, "\n"); - try appendEscaped(out, unindent(line, indent)); - } - } -} - -fn resolve_ident_link( - file_index: Walk.File.Index, - out: *std.ArrayListUnmanaged(u8), - ident_token: Ast.TokenIndex, -) Oom!void { - const decl_index = file_index.get().lookup_token(ident_token); - if (decl_index == .none) return; - try resolve_decl_link(decl_index, out); -} - -fn resolve_decl_link(decl_index: Decl.Index, out: *std.ArrayListUnmanaged(u8)) Oom!void { - const decl = decl_index.get(); - switch (decl.categorize()) { - .alias => |alias_decl| try alias_decl.get().fqn(out), - else => try decl.fqn(out), - } -} - -fn walk_field_accesses( - file_index: Walk.File.Index, - out: *std.ArrayListUnmanaged(u8), - node: Ast.Node.Index, -) Oom!void { - const ast = file_index.get_ast(); - const node_tags = ast.nodes.items(.tag); - assert(node_tags[node] == .field_access); - const node_datas = ast.nodes.items(.data); - const main_tokens = ast.nodes.items(.main_token); - const object_node = node_datas[node].lhs; - const dot_token = main_tokens[node]; - const field_ident = dot_token + 1; - switch (node_tags[object_node]) { - .identifier => { - const lhs_ident = main_tokens[object_node]; - try resolve_ident_link(file_index, out, lhs_ident); - }, - .field_access => { - try walk_field_accesses(file_index, out, object_node); - }, - else => {}, - } - if (out.items.len > 0) { - try out.append(gpa, '.'); - try out.appendSlice(gpa, ast.tokenSlice(field_ident)); - } -} - -fn appendEscaped(out: *std.ArrayListUnmanaged(u8), s: []const u8) !void { - for (s) |c| { - try out.ensureUnusedCapacity(gpa, 6); - switch (c) { - '&' => out.appendSliceAssumeCapacity("&"), - '<' => out.appendSliceAssumeCapacity("<"), - '>' => out.appendSliceAssumeCapacity(">"), - '"' => out.appendSliceAssumeCapacity("""), - else => out.appendAssumeCapacity(c), - } - } -} - fn count_scalar(haystack: []const u8, needle: u8) usize { var total: usize = 0; for (haystack) |elem| { diff --git a/lib/fuzzer.zig b/lib/fuzzer.zig index 60876e0bfb25..a5d8c9bbbf92 100644 --- a/lib/fuzzer.zig +++ b/lib/fuzzer.zig @@ -2,6 +2,8 @@ const builtin = @import("builtin"); const std = @import("std"); const Allocator = std.mem.Allocator; const assert = std.debug.assert; +const fatal = std.process.fatal; +const SeenPcsHeader = std.Build.Fuzz.abi.SeenPcsHeader; pub const std_options = .{ .logFn = logOverride, @@ -15,9 +17,9 @@ fn logOverride( comptime format: []const u8, args: anytype, ) void { - if (builtin.mode != .Debug) return; const f = if (log_file) |f| f else f: { - const f = std.fs.cwd().createFile("libfuzzer.log", .{}) catch @panic("failed to open fuzzer log file"); + const f = fuzzer.cache_dir.createFile("tmp/libfuzzer.log", .{}) catch + @panic("failed to open fuzzer log file"); log_file = f; break :f f; }; @@ -26,18 +28,19 @@ fn logOverride( f.writer().print(prefix1 ++ prefix2 ++ format ++ "\n", args) catch @panic("failed to write to fuzzer log"); } -export threadlocal var __sancov_lowest_stack: usize = 0; +export threadlocal var __sancov_lowest_stack: usize = std.math.maxInt(usize); -export fn __sanitizer_cov_8bit_counters_init(start: [*]u8, stop: [*]u8) void { - std.log.debug("__sanitizer_cov_8bit_counters_init start={*}, stop={*}", .{ start, stop }); +var module_count_8bc: usize = 0; +var module_count_pcs: usize = 0; + +export fn __sanitizer_cov_8bit_counters_init(start: [*]u8, end: [*]u8) void { + assert(@atomicRmw(usize, &module_count_8bc, .Add, 1, .monotonic) == 0); + fuzzer.pc_counters = start[0 .. end - start]; } -export fn __sanitizer_cov_pcs_init(pc_start: [*]const usize, pc_end: [*]const usize) void { - std.log.debug("__sanitizer_cov_pcs_init pc_start={*}, pc_end={*}", .{ pc_start, pc_end }); - fuzzer.pc_range = .{ - .start = @intFromPtr(pc_start), - .end = @intFromPtr(pc_start), - }; +export fn __sanitizer_cov_pcs_init(start: [*]const Fuzzer.FlaggedPc, end: [*]const Fuzzer.FlaggedPc) void { + assert(@atomicRmw(usize, &module_count_pcs, .Add, 1, .monotonic) == 0); + fuzzer.flagged_pcs = start[0 .. end - start]; } export fn __sanitizer_cov_trace_const_cmp1(arg1: u8, arg2: u8) void { @@ -102,11 +105,21 @@ const Fuzzer = struct { gpa: Allocator, rng: std.Random.DefaultPrng, input: std.ArrayListUnmanaged(u8), - pc_range: PcRange, - count: usize, + flagged_pcs: []const FlaggedPc, + pc_counters: []u8, + n_runs: usize, recent_cases: RunMap, - deduplicated_runs: usize, + /// Data collected from code coverage instrumentation from one execution of + /// the test function. coverage: Coverage, + /// Tracks which PCs have been seen across all runs that do not crash the fuzzer process. + /// Stored in a memory-mapped file so that it can be shared with other + /// processes and viewed while the fuzzer is running. + seen_pcs: MemoryMappedList, + cache_dir: std.fs.Dir, + /// Identifies the file name that will be used to store coverage + /// information, available to other processes. + coverage_id: u64, const RunMap = std.ArrayHashMapUnmanaged(Run, void, Run.HashContext, false); @@ -161,9 +174,12 @@ const Fuzzer = struct { } }; - const PcRange = struct { - start: usize, - end: usize, + const FlaggedPc = extern struct { + addr: usize, + flags: packed struct(usize) { + entry: bool, + _: @Type(.{ .Int = .{ .signedness = .unsigned, .bits = @bitSizeOf(usize) - 1 } }), + }, }; const Analysis = struct { @@ -171,6 +187,72 @@ const Fuzzer = struct { id: Run.Id, }; + fn init(f: *Fuzzer, cache_dir: std.fs.Dir) !void { + const flagged_pcs = f.flagged_pcs; + + f.cache_dir = cache_dir; + + // Choose a file name for the coverage based on a hash of the PCs that will be stored within. + const pc_digest = d: { + var hasher = std.hash.Wyhash.init(0); + for (flagged_pcs) |flagged_pc| { + hasher.update(std.mem.asBytes(&flagged_pc.addr)); + } + break :d f.coverage.run_id_hasher.final(); + }; + f.coverage_id = pc_digest; + const hex_digest = std.fmt.hex(pc_digest); + const coverage_file_path = "v/" ++ hex_digest; + + // Layout of this file: + // - Header + // - list of PC addresses (usize elements) + // - list of hit flag, 1 bit per address (stored in u8 elements) + const coverage_file = createFileBail(cache_dir, coverage_file_path, .{ + .read = true, + .truncate = false, + }); + defer coverage_file.close(); + const n_bitset_elems = (flagged_pcs.len + 7) / 8; + const bytes_len = @sizeOf(SeenPcsHeader) + flagged_pcs.len * @sizeOf(usize) + n_bitset_elems; + const existing_len = coverage_file.getEndPos() catch |err| { + fatal("unable to check len of coverage file: {s}", .{@errorName(err)}); + }; + if (existing_len == 0) { + coverage_file.setEndPos(bytes_len) catch |err| { + fatal("unable to set len of coverage file: {s}", .{@errorName(err)}); + }; + } else if (existing_len != bytes_len) { + fatal("incompatible existing coverage file (differing lengths)", .{}); + } + f.seen_pcs = MemoryMappedList.init(coverage_file, existing_len, bytes_len) catch |err| { + fatal("unable to init coverage memory map: {s}", .{@errorName(err)}); + }; + if (existing_len != 0) { + const existing_pcs_bytes = f.seen_pcs.items[@sizeOf(SeenPcsHeader)..][0 .. flagged_pcs.len * @sizeOf(usize)]; + const existing_pcs = std.mem.bytesAsSlice(usize, existing_pcs_bytes); + for (existing_pcs, flagged_pcs, 0..) |old, new, i| { + if (old != new.addr) { + fatal("incompatible existing coverage file (differing PC at index {d}: {x} != {x})", .{ + i, old, new.addr, + }); + } + } + } else { + const header: SeenPcsHeader = .{ + .n_runs = 0, + .unique_runs = 0, + .pcs_len = flagged_pcs.len, + .lowest_stack = std.math.maxInt(usize), + }; + f.seen_pcs.appendSliceAssumeCapacity(std.mem.asBytes(&header)); + for (flagged_pcs) |flagged_pc| { + f.seen_pcs.appendSliceAssumeCapacity(std.mem.asBytes(&flagged_pc.addr)); + } + f.seen_pcs.appendNTimesAssumeCapacity(0, n_bitset_elems); + } + } + fn analyzeLastRun(f: *Fuzzer) Analysis { return .{ .id = f.coverage.run_id_hasher.final(), @@ -194,7 +276,7 @@ const Fuzzer = struct { .score = 0, }, {}); } else { - if (f.count % 1000 == 0) f.dumpStats(); + if (f.n_runs % 10000 == 0) f.dumpStats(); const analysis = f.analyzeLastRun(); const gop = f.recent_cases.getOrPutAssumeCapacity(.{ @@ -204,7 +286,6 @@ const Fuzzer = struct { }); if (gop.found_existing) { //std.log.info("duplicate analysis: score={d} id={d}", .{ analysis.score, analysis.id }); - f.deduplicated_runs += 1; if (f.input.items.len < gop.key_ptr.input.len or gop.key_ptr.score == 0) { gpa.free(gop.key_ptr.input); gop.key_ptr.input = try gpa.dupe(u8, f.input.items); @@ -217,6 +298,28 @@ const Fuzzer = struct { .input = try gpa.dupe(u8, f.input.items), .score = analysis.score, }; + + // Track code coverage from all runs. + { + const seen_pcs = f.seen_pcs.items[@sizeOf(SeenPcsHeader) + f.flagged_pcs.len * @sizeOf(usize) ..]; + for (seen_pcs, 0..) |*elem, i| { + const byte_i = i * 8; + const mask: u8 = + (@as(u8, @intFromBool(f.pc_counters.ptr[byte_i + 0] != 0)) << 0) | + (@as(u8, @intFromBool(f.pc_counters.ptr[byte_i + 1] != 0)) << 1) | + (@as(u8, @intFromBool(f.pc_counters.ptr[byte_i + 2] != 0)) << 2) | + (@as(u8, @intFromBool(f.pc_counters.ptr[byte_i + 3] != 0)) << 3) | + (@as(u8, @intFromBool(f.pc_counters.ptr[byte_i + 4] != 0)) << 4) | + (@as(u8, @intFromBool(f.pc_counters.ptr[byte_i + 5] != 0)) << 5) | + (@as(u8, @intFromBool(f.pc_counters.ptr[byte_i + 6] != 0)) << 6) | + (@as(u8, @intFromBool(f.pc_counters.ptr[byte_i + 7] != 0)) << 7); + + _ = @atomicRmw(u8, elem, .Or, mask, .monotonic); + } + } + + const header: *volatile SeenPcsHeader = @ptrCast(f.seen_pcs.items[0..@sizeOf(SeenPcsHeader)]); + _ = @atomicRmw(usize, &header.unique_runs, .Add, 1, .monotonic); } if (f.recent_cases.entries.len >= 100) { @@ -244,8 +347,12 @@ const Fuzzer = struct { f.input.appendSliceAssumeCapacity(run.input); try f.mutate(); + f.n_runs += 1; + const header: *volatile SeenPcsHeader = @ptrCast(f.seen_pcs.items[0..@sizeOf(SeenPcsHeader)]); + _ = @atomicRmw(usize, &header.n_runs, .Add, 1, .monotonic); + _ = @atomicRmw(usize, &header.lowest_stack, .Min, __sancov_lowest_stack, .monotonic); + @memset(f.pc_counters, 0); f.coverage.reset(); - f.count += 1; return f.input.items; } @@ -256,10 +363,6 @@ const Fuzzer = struct { } fn dumpStats(f: *Fuzzer) void { - std.log.info("stats: runs={d} deduplicated={d}", .{ - f.count, - f.deduplicated_runs, - }); for (f.recent_cases.keys()[0..@min(f.recent_cases.entries.len, 5)], 0..) |run, i| { std.log.info("best[{d}] id={x} score={d} input: '{}'", .{ i, run.id, run.score, std.zig.fmtEscapes(run.input), @@ -291,6 +394,21 @@ const Fuzzer = struct { } }; +fn createFileBail(dir: std.fs.Dir, sub_path: []const u8, flags: std.fs.File.CreateFlags) std.fs.File { + return dir.createFile(sub_path, flags) catch |err| switch (err) { + error.FileNotFound => { + const dir_name = std.fs.path.dirname(sub_path).?; + dir.makePath(dir_name) catch |e| { + fatal("unable to make path '{s}': {s}", .{ dir_name, @errorName(e) }); + }; + return dir.createFile(sub_path, flags) catch |e| { + fatal("unable to create file '{s}': {s}", .{ sub_path, @errorName(e) }); + }; + }, + else => fatal("unable to create file '{s}': {s}", .{ sub_path, @errorName(err) }), + }; +} + fn oom(err: anytype) noreturn { switch (err) { error.OutOfMemory => @panic("out of memory"), @@ -303,15 +421,88 @@ var fuzzer: Fuzzer = .{ .gpa = general_purpose_allocator.allocator(), .rng = std.Random.DefaultPrng.init(0), .input = .{}, - .pc_range = .{ .start = 0, .end = 0 }, - .count = 0, - .deduplicated_runs = 0, + .flagged_pcs = undefined, + .pc_counters = undefined, + .n_runs = 0, .recent_cases = .{}, .coverage = undefined, + .cache_dir = undefined, + .seen_pcs = undefined, + .coverage_id = undefined, }; +/// Invalid until `fuzzer_init` is called. +export fn fuzzer_coverage_id() u64 { + return fuzzer.coverage_id; +} + export fn fuzzer_next() Fuzzer.Slice { return Fuzzer.Slice.fromZig(fuzzer.next() catch |err| switch (err) { error.OutOfMemory => @panic("out of memory"), }); } + +export fn fuzzer_init(cache_dir_struct: Fuzzer.Slice) void { + if (module_count_8bc == 0) fatal("__sanitizer_cov_8bit_counters_init was never called", .{}); + if (module_count_pcs == 0) fatal("__sanitizer_cov_pcs_init was never called", .{}); + + const cache_dir_path = cache_dir_struct.toZig(); + const cache_dir = if (cache_dir_path.len == 0) + std.fs.cwd() + else + std.fs.cwd().makeOpenPath(cache_dir_path, .{ .iterate = true }) catch |err| { + fatal("unable to open fuzz directory '{s}': {s}", .{ cache_dir_path, @errorName(err) }); + }; + + fuzzer.init(cache_dir) catch |err| fatal("unable to init fuzzer: {s}", .{@errorName(err)}); +} + +/// Like `std.ArrayListUnmanaged(u8)` but backed by memory mapping. +pub const MemoryMappedList = struct { + /// Contents of the list. + /// + /// Pointers to elements in this slice are invalidated by various functions + /// of this ArrayList in accordance with the respective documentation. In + /// all cases, "invalidated" means that the memory has been passed to this + /// allocator's resize or free function. + items: []align(std.mem.page_size) volatile u8, + /// How many bytes this list can hold without allocating additional memory. + capacity: usize, + + pub fn init(file: std.fs.File, length: usize, capacity: usize) !MemoryMappedList { + const ptr = try std.posix.mmap( + null, + capacity, + std.posix.PROT.READ | std.posix.PROT.WRITE, + .{ .TYPE = .SHARED }, + file.handle, + 0, + ); + return .{ + .items = ptr[0..length], + .capacity = capacity, + }; + } + + /// Append the slice of items to the list. + /// Asserts that the list can hold the additional items. + pub fn appendSliceAssumeCapacity(l: *MemoryMappedList, items: []const u8) void { + const old_len = l.items.len; + const new_len = old_len + items.len; + assert(new_len <= l.capacity); + l.items.len = new_len; + @memcpy(l.items[old_len..][0..items.len], items); + } + + /// Append a value to the list `n` times. + /// Never invalidates element pointers. + /// The function is inline so that a comptime-known `value` parameter will + /// have better memset codegen in case it has a repeated byte pattern. + /// Asserts that the list can hold the additional items. + pub inline fn appendNTimesAssumeCapacity(l: *MemoryMappedList, value: u8, n: usize) void { + const new_len = l.items.len + n; + assert(new_len <= l.capacity); + @memset(l.items.ptr[l.items.len..new_len], value); + l.items.len = new_len; + } +}; diff --git a/lib/fuzzer/index.html b/lib/fuzzer/index.html new file mode 100644 index 000000000000..16fa87991377 --- /dev/null +++ b/lib/fuzzer/index.html @@ -0,0 +1,161 @@ + + + + + Zig Build System Interface + + + +

Loading JavaScript...

+ + + + + + diff --git a/lib/fuzzer/main.js b/lib/fuzzer/main.js new file mode 100644 index 000000000000..ce02276f9819 --- /dev/null +++ b/lib/fuzzer/main.js @@ -0,0 +1,249 @@ +(function() { + const domStatus = document.getElementById("status"); + const domSectSource = document.getElementById("sectSource"); + const domSectStats = document.getElementById("sectStats"); + const domSourceText = document.getElementById("sourceText"); + const domStatTotalRuns = document.getElementById("statTotalRuns"); + const domStatUniqueRuns = document.getElementById("statUniqueRuns"); + const domStatCoverage = document.getElementById("statCoverage"); + const domStatLowestStack = document.getElementById("statLowestStack"); + const domEntryPointsList = document.getElementById("entryPointsList"); + + let wasm_promise = fetch("main.wasm"); + let sources_promise = fetch("sources.tar").then(function(response) { + if (!response.ok) throw new Error("unable to download sources"); + return response.arrayBuffer(); + }); + var wasm_exports = null; + var curNavSearch = null; + var curNavLocation = null; + + const text_decoder = new TextDecoder(); + const text_encoder = new TextEncoder(); + + domStatus.textContent = "Loading WebAssembly..."; + WebAssembly.instantiateStreaming(wasm_promise, { + js: { + log: function(ptr, len) { + const msg = decodeString(ptr, len); + console.log(msg); + }, + panic: function (ptr, len) { + const msg = decodeString(ptr, len); + throw new Error("panic: " + msg); + }, + emitSourceIndexChange: onSourceIndexChange, + emitCoverageUpdate: onCoverageUpdate, + emitEntryPointsUpdate: renderStats, + }, + }).then(function(obj) { + wasm_exports = obj.instance.exports; + window.wasm = obj; // for debugging + domStatus.textContent = "Loading sources tarball..."; + + sources_promise.then(function(buffer) { + domStatus.textContent = "Parsing sources..."; + const js_array = new Uint8Array(buffer); + const ptr = wasm_exports.alloc(js_array.length); + const wasm_array = new Uint8Array(wasm_exports.memory.buffer, ptr, js_array.length); + wasm_array.set(js_array); + wasm_exports.unpack(ptr, js_array.length); + + window.addEventListener('popstate', onPopState, false); + onHashChange(null); + + domStatus.textContent = "Waiting for server to send source location metadata..."; + connectWebSocket(); + }); + }); + + function onPopState(ev) { + onHashChange(ev.state); + } + + function onHashChange(state) { + history.replaceState({}, ""); + navigate(location.hash); + if (state == null) window.scrollTo({top: 0}); + } + + function navigate(location_hash) { + domSectSource.classList.add("hidden"); + + curNavLocation = null; + curNavSearch = null; + + if (location_hash.length > 1 && location_hash[0] === '#') { + const query = location_hash.substring(1); + const qpos = query.indexOf("?"); + let nonSearchPart; + if (qpos === -1) { + nonSearchPart = query; + } else { + nonSearchPart = query.substring(0, qpos); + curNavSearch = decodeURIComponent(query.substring(qpos + 1)); + } + + if (nonSearchPart[0] == "l") { + curNavLocation = +nonSearchPart.substring(1); + renderSource(curNavLocation); + } + } + + render(); + } + + function connectWebSocket() { + const host = document.location.host; + const pathname = document.location.pathname; + const isHttps = document.location.protocol === 'https:'; + const match = host.match(/^(.+):(\d+)$/); + const defaultPort = isHttps ? 443 : 80; + const port = match ? parseInt(match[2], 10) : defaultPort; + const hostName = match ? match[1] : host; + const wsProto = isHttps ? "wss:" : "ws:"; + const wsUrl = wsProto + '//' + hostName + ':' + port + pathname; + ws = new WebSocket(wsUrl); + ws.binaryType = "arraybuffer"; + ws.addEventListener('message', onWebSocketMessage, false); + ws.addEventListener('error', timeoutThenCreateNew, false); + ws.addEventListener('close', timeoutThenCreateNew, false); + ws.addEventListener('open', onWebSocketOpen, false); + } + + function onWebSocketOpen() { + //console.log("web socket opened"); + } + + function onWebSocketMessage(ev) { + wasmOnMessage(ev.data); + } + + function timeoutThenCreateNew() { + ws.removeEventListener('message', onWebSocketMessage, false); + ws.removeEventListener('error', timeoutThenCreateNew, false); + ws.removeEventListener('close', timeoutThenCreateNew, false); + ws.removeEventListener('open', onWebSocketOpen, false); + ws = null; + setTimeout(connectWebSocket, 1000); + } + + function wasmOnMessage(data) { + const jsArray = new Uint8Array(data); + const ptr = wasm_exports.message_begin(jsArray.length); + const wasmArray = new Uint8Array(wasm_exports.memory.buffer, ptr, jsArray.length); + wasmArray.set(jsArray); + wasm_exports.message_end(); + } + + function onSourceIndexChange() { + render(); + if (curNavLocation != null) renderSource(curNavLocation); + } + + function onCoverageUpdate() { + renderStats(); + renderCoverage(); + } + + function render() { + domStatus.classList.add("hidden"); + } + + function renderStats() { + const totalRuns = wasm_exports.totalRuns(); + const uniqueRuns = wasm_exports.uniqueRuns(); + const totalSourceLocations = wasm_exports.totalSourceLocations(); + const coveredSourceLocations = wasm_exports.coveredSourceLocations(); + domStatTotalRuns.innerText = totalRuns; + domStatUniqueRuns.innerText = uniqueRuns + " (" + percent(uniqueRuns, totalRuns) + "%)"; + domStatCoverage.innerText = coveredSourceLocations + " / " + totalSourceLocations + " (" + percent(coveredSourceLocations, totalSourceLocations) + "%)"; + domStatLowestStack.innerText = unwrapString(wasm_exports.lowestStack()); + + const entryPoints = unwrapInt32Array(wasm_exports.entryPoints()); + resizeDomList(domEntryPointsList, entryPoints.length, "
  • "); + for (let i = 0; i < entryPoints.length; i += 1) { + const liDom = domEntryPointsList.children[i]; + liDom.innerHTML = unwrapString(wasm_exports.sourceLocationLinkHtml(entryPoints[i])); + } + + + domSectStats.classList.remove("hidden"); + } + + function renderCoverage() { + if (curNavLocation == null) return; + const sourceLocationIndex = curNavLocation; + + for (let i = 0; i < domSourceText.children.length; i += 1) { + const childDom = domSourceText.children[i]; + if (childDom.id != null && childDom.id[0] == "l") { + childDom.classList.add("l"); + childDom.classList.remove("c"); + } + } + const coveredList = unwrapInt32Array(wasm_exports.sourceLocationFileCoveredList(sourceLocationIndex)); + for (let i = 0; i < coveredList.length; i += 1) { + document.getElementById("l" + coveredList[i]).classList.add("c"); + } + } + + function resizeDomList(listDom, desiredLen, templateHtml) { + for (let i = listDom.childElementCount; i < desiredLen; i += 1) { + listDom.insertAdjacentHTML('beforeend', templateHtml); + } + while (desiredLen < listDom.childElementCount) { + listDom.removeChild(listDom.lastChild); + } + } + + function percent(a, b) { + return ((Number(a) / Number(b)) * 100).toFixed(1); + } + + function renderSource(sourceLocationIndex) { + const pathName = unwrapString(wasm_exports.sourceLocationPath(sourceLocationIndex)); + if (pathName.length === 0) return; + + const h2 = domSectSource.children[0]; + h2.innerText = pathName; + domSourceText.innerHTML = unwrapString(wasm_exports.sourceLocationFileHtml(sourceLocationIndex)); + + domSectSource.classList.remove("hidden"); + + // Empirically, Firefox needs this requestAnimationFrame in order for the scrollIntoView to work. + requestAnimationFrame(function() { + const slDom = document.getElementById("l" + sourceLocationIndex); + if (slDom != null) slDom.scrollIntoView({ + behavior: "smooth", + block: "center", + }); + }); + } + + function decodeString(ptr, len) { + if (len === 0) return ""; + return text_decoder.decode(new Uint8Array(wasm_exports.memory.buffer, ptr, len)); + } + + function unwrapInt32Array(bigint) { + const ptr = Number(bigint & 0xffffffffn); + const len = Number(bigint >> 32n); + if (len === 0) return new Uint32Array(); + return new Uint32Array(wasm_exports.memory.buffer, ptr, len); + } + + function setInputString(s) { + const jsArray = text_encoder.encode(s); + const len = jsArray.length; + const ptr = wasm_exports.set_input_string(len); + const wasmArray = new Uint8Array(wasm_exports.memory.buffer, ptr, len); + wasmArray.set(jsArray); + } + + function unwrapString(bigint) { + const ptr = Number(bigint & 0xffffffffn); + const len = Number(bigint >> 32n); + return decodeString(ptr, len); + } +})(); diff --git a/lib/fuzzer/wasm/main.zig b/lib/fuzzer/wasm/main.zig new file mode 100644 index 000000000000..a352458afc4a --- /dev/null +++ b/lib/fuzzer/wasm/main.zig @@ -0,0 +1,424 @@ +const std = @import("std"); +const assert = std.debug.assert; +const abi = std.Build.Fuzz.abi; +const gpa = std.heap.wasm_allocator; +const log = std.log; +const Coverage = std.debug.Coverage; +const Allocator = std.mem.Allocator; + +const Walk = @import("Walk"); +const Decl = Walk.Decl; +const html_render = @import("html_render"); + +const js = struct { + extern "js" fn log(ptr: [*]const u8, len: usize) void; + extern "js" fn panic(ptr: [*]const u8, len: usize) noreturn; + extern "js" fn emitSourceIndexChange() void; + extern "js" fn emitCoverageUpdate() void; + extern "js" fn emitEntryPointsUpdate() void; +}; + +pub const std_options: std.Options = .{ + .logFn = logFn, +}; + +pub fn panic(msg: []const u8, st: ?*std.builtin.StackTrace, addr: ?usize) noreturn { + _ = st; + _ = addr; + log.err("panic: {s}", .{msg}); + @trap(); +} + +fn logFn( + comptime message_level: log.Level, + comptime scope: @TypeOf(.enum_literal), + comptime format: []const u8, + args: anytype, +) void { + const level_txt = comptime message_level.asText(); + const prefix2 = if (scope == .default) ": " else "(" ++ @tagName(scope) ++ "): "; + var buf: [500]u8 = undefined; + const line = std.fmt.bufPrint(&buf, level_txt ++ prefix2 ++ format, args) catch l: { + buf[buf.len - 3 ..][0..3].* = "...".*; + break :l &buf; + }; + js.log(line.ptr, line.len); +} + +export fn alloc(n: usize) [*]u8 { + const slice = gpa.alloc(u8, n) catch @panic("OOM"); + return slice.ptr; +} + +var message_buffer: std.ArrayListAlignedUnmanaged(u8, @alignOf(u64)) = .{}; + +/// Resizes the message buffer to be the correct length; returns the pointer to +/// the query string. +export fn message_begin(len: usize) [*]u8 { + message_buffer.resize(gpa, len) catch @panic("OOM"); + return message_buffer.items.ptr; +} + +export fn message_end() void { + const msg_bytes = message_buffer.items; + + const tag: abi.ToClientTag = @enumFromInt(msg_bytes[0]); + switch (tag) { + .source_index => return sourceIndexMessage(msg_bytes) catch @panic("OOM"), + .coverage_update => return coverageUpdateMessage(msg_bytes) catch @panic("OOM"), + .entry_points => return entryPointsMessage(msg_bytes) catch @panic("OOM"), + _ => unreachable, + } +} + +export fn unpack(tar_ptr: [*]u8, tar_len: usize) void { + const tar_bytes = tar_ptr[0..tar_len]; + log.debug("received {d} bytes of tar file", .{tar_bytes.len}); + + unpackInner(tar_bytes) catch |err| { + fatal("unable to unpack tar: {s}", .{@errorName(err)}); + }; +} + +/// Set by `set_input_string`. +var input_string: std.ArrayListUnmanaged(u8) = .{}; +var string_result: std.ArrayListUnmanaged(u8) = .{}; + +export fn set_input_string(len: usize) [*]u8 { + input_string.resize(gpa, len) catch @panic("OOM"); + return input_string.items.ptr; +} + +/// Looks up the root struct decl corresponding to a file by path. +/// Uses `input_string`. +export fn find_file_root() Decl.Index { + const file: Walk.File.Index = @enumFromInt(Walk.files.getIndex(input_string.items) orelse return .none); + return file.findRootDecl(); +} + +export fn decl_source_html(decl_index: Decl.Index) String { + const decl = decl_index.get(); + + string_result.clearRetainingCapacity(); + html_render.fileSourceHtml(decl.file, &string_result, decl.ast_node, .{}) catch |err| { + fatal("unable to render source: {s}", .{@errorName(err)}); + }; + return String.init(string_result.items); +} + +export fn lowestStack() String { + const header: *abi.CoverageUpdateHeader = @ptrCast(recent_coverage_update.items[0..@sizeOf(abi.CoverageUpdateHeader)]); + string_result.clearRetainingCapacity(); + string_result.writer(gpa).print("0x{d}", .{header.lowest_stack}) catch @panic("OOM"); + return String.init(string_result.items); +} + +export fn totalSourceLocations() usize { + return coverage_source_locations.items.len; +} + +export fn coveredSourceLocations() usize { + const covered_bits = recent_coverage_update.items[@sizeOf(abi.CoverageUpdateHeader)..]; + var count: usize = 0; + for (covered_bits) |byte| count += @popCount(byte); + return count; +} + +export fn totalRuns() u64 { + const header: *abi.CoverageUpdateHeader = @ptrCast(recent_coverage_update.items[0..@sizeOf(abi.CoverageUpdateHeader)]); + return header.n_runs; +} + +export fn uniqueRuns() u64 { + const header: *abi.CoverageUpdateHeader = @ptrCast(recent_coverage_update.items[0..@sizeOf(abi.CoverageUpdateHeader)]); + return header.unique_runs; +} + +const String = Slice(u8); + +fn Slice(T: type) type { + return packed struct(u64) { + ptr: u32, + len: u32, + + fn init(s: []const T) @This() { + return .{ + .ptr = @intFromPtr(s.ptr), + .len = s.len, + }; + } + }; +} + +fn unpackInner(tar_bytes: []u8) !void { + var fbs = std.io.fixedBufferStream(tar_bytes); + var file_name_buffer: [1024]u8 = undefined; + var link_name_buffer: [1024]u8 = undefined; + var it = std.tar.iterator(fbs.reader(), .{ + .file_name_buffer = &file_name_buffer, + .link_name_buffer = &link_name_buffer, + }); + while (try it.next()) |tar_file| { + switch (tar_file.kind) { + .file => { + if (tar_file.size == 0 and tar_file.name.len == 0) break; + if (std.mem.endsWith(u8, tar_file.name, ".zig")) { + log.debug("found file: '{s}'", .{tar_file.name}); + const file_name = try gpa.dupe(u8, tar_file.name); + if (std.mem.indexOfScalar(u8, file_name, '/')) |pkg_name_end| { + const pkg_name = file_name[0..pkg_name_end]; + const gop = try Walk.modules.getOrPut(gpa, pkg_name); + const file: Walk.File.Index = @enumFromInt(Walk.files.entries.len); + if (!gop.found_existing or + std.mem.eql(u8, file_name[pkg_name_end..], "/root.zig") or + std.mem.eql(u8, file_name[pkg_name_end + 1 .. file_name.len - ".zig".len], pkg_name)) + { + gop.value_ptr.* = file; + } + const file_bytes = tar_bytes[fbs.pos..][0..@intCast(tar_file.size)]; + assert(file == try Walk.add_file(file_name, file_bytes)); + } + } else { + log.warn("skipping: '{s}' - the tar creation should have done that", .{tar_file.name}); + } + }, + else => continue, + } + } +} + +fn fatal(comptime format: []const u8, args: anytype) noreturn { + var buf: [500]u8 = undefined; + const line = std.fmt.bufPrint(&buf, format, args) catch l: { + buf[buf.len - 3 ..][0..3].* = "...".*; + break :l &buf; + }; + js.panic(line.ptr, line.len); +} + +fn sourceIndexMessage(msg_bytes: []u8) error{OutOfMemory}!void { + const Header = abi.SourceIndexHeader; + const header: Header = @bitCast(msg_bytes[0..@sizeOf(Header)].*); + + const directories_start = @sizeOf(Header); + const directories_end = directories_start + header.directories_len * @sizeOf(Coverage.String); + const files_start = directories_end; + const files_end = files_start + header.files_len * @sizeOf(Coverage.File); + const source_locations_start = files_end; + const source_locations_end = source_locations_start + header.source_locations_len * @sizeOf(Coverage.SourceLocation); + const string_bytes = msg_bytes[source_locations_end..][0..header.string_bytes_len]; + + const directories: []const Coverage.String = @alignCast(std.mem.bytesAsSlice(Coverage.String, msg_bytes[directories_start..directories_end])); + const files: []const Coverage.File = @alignCast(std.mem.bytesAsSlice(Coverage.File, msg_bytes[files_start..files_end])); + const source_locations: []const Coverage.SourceLocation = @alignCast(std.mem.bytesAsSlice(Coverage.SourceLocation, msg_bytes[source_locations_start..source_locations_end])); + + try updateCoverage(directories, files, source_locations, string_bytes); + js.emitSourceIndexChange(); +} + +fn coverageUpdateMessage(msg_bytes: []u8) error{OutOfMemory}!void { + recent_coverage_update.clearRetainingCapacity(); + recent_coverage_update.appendSlice(gpa, msg_bytes) catch @panic("OOM"); + js.emitCoverageUpdate(); +} + +var entry_points: std.ArrayListUnmanaged(u32) = .{}; + +fn entryPointsMessage(msg_bytes: []u8) error{OutOfMemory}!void { + const header: abi.EntryPointHeader = @bitCast(msg_bytes[0..@sizeOf(abi.EntryPointHeader)].*); + entry_points.resize(gpa, header.flags.locs_len) catch @panic("OOM"); + @memcpy(entry_points.items, std.mem.bytesAsSlice(u32, msg_bytes[@sizeOf(abi.EntryPointHeader)..])); + js.emitEntryPointsUpdate(); +} + +export fn entryPoints() Slice(u32) { + return Slice(u32).init(entry_points.items); +} + +/// Index into `coverage_source_locations`. +const SourceLocationIndex = enum(u32) { + _, + + fn haveCoverage(sli: SourceLocationIndex) bool { + return @intFromEnum(sli) < coverage_source_locations.items.len; + } + + fn ptr(sli: SourceLocationIndex) *Coverage.SourceLocation { + return &coverage_source_locations.items[@intFromEnum(sli)]; + } + + fn sourceLocationLinkHtml( + sli: SourceLocationIndex, + out: *std.ArrayListUnmanaged(u8), + ) Allocator.Error!void { + const sl = sli.ptr(); + try out.writer(gpa).print("", .{@intFromEnum(sli)}); + try sli.appendPath(out); + try out.writer(gpa).print(":{d}:{d}", .{ sl.line, sl.column }); + } + + fn appendPath(sli: SourceLocationIndex, out: *std.ArrayListUnmanaged(u8)) Allocator.Error!void { + const sl = sli.ptr(); + const file = coverage.fileAt(sl.file); + const file_name = coverage.stringAt(file.basename); + const dir_name = coverage.stringAt(coverage.directories.keys()[file.directory_index]); + try html_render.appendEscaped(out, dir_name); + try out.appendSlice(gpa, "/"); + try html_render.appendEscaped(out, file_name); + } + + fn toWalkFile(sli: SourceLocationIndex) ?Walk.File.Index { + var buf: std.ArrayListUnmanaged(u8) = .{}; + defer buf.deinit(gpa); + sli.appendPath(&buf) catch @panic("OOM"); + return @enumFromInt(Walk.files.getIndex(buf.items) orelse return null); + } + + fn fileHtml( + sli: SourceLocationIndex, + out: *std.ArrayListUnmanaged(u8), + ) error{ OutOfMemory, SourceUnavailable }!void { + const walk_file_index = sli.toWalkFile() orelse return error.SourceUnavailable; + const root_node = walk_file_index.findRootDecl().get().ast_node; + var annotations: std.ArrayListUnmanaged(html_render.Annotation) = .{}; + defer annotations.deinit(gpa); + try computeSourceAnnotations(sli.ptr().file, walk_file_index, &annotations, coverage_source_locations.items); + html_render.fileSourceHtml(walk_file_index, out, root_node, .{ + .source_location_annotations = annotations.items, + }) catch |err| { + fatal("unable to render source: {s}", .{@errorName(err)}); + }; + } +}; + +fn computeSourceAnnotations( + cov_file_index: Coverage.File.Index, + walk_file_index: Walk.File.Index, + annotations: *std.ArrayListUnmanaged(html_render.Annotation), + source_locations: []const Coverage.SourceLocation, +) !void { + // Collect all the source locations from only this file into this array + // first, then sort by line, col, so that we can collect annotations with + // O(N) time complexity. + var locs: std.ArrayListUnmanaged(SourceLocationIndex) = .{}; + defer locs.deinit(gpa); + + for (source_locations, 0..) |sl, sli_usize| { + if (sl.file != cov_file_index) continue; + const sli: SourceLocationIndex = @enumFromInt(sli_usize); + try locs.append(gpa, sli); + } + + std.mem.sortUnstable(SourceLocationIndex, locs.items, {}, struct { + pub fn lessThan(context: void, lhs: SourceLocationIndex, rhs: SourceLocationIndex) bool { + _ = context; + const lhs_ptr = lhs.ptr(); + const rhs_ptr = rhs.ptr(); + if (lhs_ptr.line < rhs_ptr.line) return true; + if (lhs_ptr.line > rhs_ptr.line) return false; + return lhs_ptr.column < rhs_ptr.column; + } + }.lessThan); + + const source = walk_file_index.get_ast().source; + var line: usize = 1; + var column: usize = 1; + var next_loc_index: usize = 0; + for (source, 0..) |byte, offset| { + if (byte == '\n') { + line += 1; + column = 1; + } else { + column += 1; + } + while (true) { + if (next_loc_index >= locs.items.len) return; + const next_sli = locs.items[next_loc_index]; + const next_sl = next_sli.ptr(); + if (next_sl.line > line or (next_sl.line == line and next_sl.column > column)) break; + try annotations.append(gpa, .{ + .file_byte_offset = offset, + .dom_id = @intFromEnum(next_sli), + }); + next_loc_index += 1; + } + } +} + +var coverage = Coverage.init; +/// Index of type `SourceLocationIndex`. +var coverage_source_locations: std.ArrayListUnmanaged(Coverage.SourceLocation) = .{}; +/// Contains the most recent coverage update message, unmodified. +var recent_coverage_update: std.ArrayListUnmanaged(u8) = .{}; + +fn updateCoverage( + directories: []const Coverage.String, + files: []const Coverage.File, + source_locations: []const Coverage.SourceLocation, + string_bytes: []const u8, +) !void { + coverage.directories.clearRetainingCapacity(); + coverage.files.clearRetainingCapacity(); + coverage.string_bytes.clearRetainingCapacity(); + coverage_source_locations.clearRetainingCapacity(); + + try coverage_source_locations.appendSlice(gpa, source_locations); + try coverage.string_bytes.appendSlice(gpa, string_bytes); + + try coverage.files.entries.resize(gpa, files.len); + @memcpy(coverage.files.entries.items(.key), files); + try coverage.files.reIndexContext(gpa, .{ .string_bytes = coverage.string_bytes.items }); + + try coverage.directories.entries.resize(gpa, directories.len); + @memcpy(coverage.directories.entries.items(.key), directories); + try coverage.directories.reIndexContext(gpa, .{ .string_bytes = coverage.string_bytes.items }); +} + +export fn sourceLocationLinkHtml(index: SourceLocationIndex) String { + string_result.clearRetainingCapacity(); + index.sourceLocationLinkHtml(&string_result) catch @panic("OOM"); + return String.init(string_result.items); +} + +/// Returns empty string if coverage metadata is not available for this source location. +export fn sourceLocationPath(sli: SourceLocationIndex) String { + string_result.clearRetainingCapacity(); + if (sli.haveCoverage()) sli.appendPath(&string_result) catch @panic("OOM"); + return String.init(string_result.items); +} + +export fn sourceLocationFileHtml(sli: SourceLocationIndex) String { + string_result.clearRetainingCapacity(); + sli.fileHtml(&string_result) catch |err| switch (err) { + error.OutOfMemory => @panic("OOM"), + error.SourceUnavailable => {}, + }; + return String.init(string_result.items); +} + +export fn sourceLocationFileCoveredList(sli_file: SourceLocationIndex) Slice(SourceLocationIndex) { + const global = struct { + var result: std.ArrayListUnmanaged(SourceLocationIndex) = .{}; + fn add(i: u32, want_file: Coverage.File.Index) void { + const src_loc_index: SourceLocationIndex = @enumFromInt(i); + if (src_loc_index.ptr().file == want_file) result.appendAssumeCapacity(src_loc_index); + } + }; + const want_file = sli_file.ptr().file; + global.result.clearRetainingCapacity(); + const covered_bits = recent_coverage_update.items[@sizeOf(abi.CoverageUpdateHeader)..]; + var sli: u32 = 0; + for (covered_bits) |byte| { + global.result.ensureUnusedCapacity(gpa, 8) catch @panic("OOM"); + if ((byte & 0b0000_0001) != 0) global.add(sli + 0, want_file); + if ((byte & 0b0000_0010) != 0) global.add(sli + 1, want_file); + if ((byte & 0b0000_0100) != 0) global.add(sli + 2, want_file); + if ((byte & 0b0000_1000) != 0) global.add(sli + 3, want_file); + if ((byte & 0b0001_0000) != 0) global.add(sli + 4, want_file); + if ((byte & 0b0010_0000) != 0) global.add(sli + 5, want_file); + if ((byte & 0b0100_0000) != 0) global.add(sli + 6, want_file); + if ((byte & 0b1000_0000) != 0) global.add(sli + 7, want_file); + sli += 8; + } + return Slice(SourceLocationIndex).init(global.result.items); +} diff --git a/lib/std/Build.zig b/lib/std/Build.zig index 7612ad0d6d1a..03743cf52e3a 100644 --- a/lib/std/Build.zig +++ b/lib/std/Build.zig @@ -2300,22 +2300,26 @@ pub const LazyPath = union(enum) { } pub fn path(lazy_path: LazyPath, b: *Build, sub_path: []const u8) LazyPath { + return lazy_path.join(b.allocator, sub_path) catch @panic("OOM"); + } + + pub fn join(lazy_path: LazyPath, arena: Allocator, sub_path: []const u8) Allocator.Error!LazyPath { return switch (lazy_path) { .src_path => |src| .{ .src_path = .{ .owner = src.owner, - .sub_path = b.pathResolve(&.{ src.sub_path, sub_path }), + .sub_path = try fs.path.resolve(arena, &.{ src.sub_path, sub_path }), } }, .generated => |gen| .{ .generated = .{ .file = gen.file, .up = gen.up, - .sub_path = b.pathResolve(&.{ gen.sub_path, sub_path }), + .sub_path = try fs.path.resolve(arena, &.{ gen.sub_path, sub_path }), } }, .cwd_relative => |cwd_relative| .{ - .cwd_relative = b.pathResolve(&.{ cwd_relative, sub_path }), + .cwd_relative = try fs.path.resolve(arena, &.{ cwd_relative, sub_path }), }, .dependency => |dep| .{ .dependency = .{ .dependency = dep.dependency, - .sub_path = b.pathResolve(&.{ dep.sub_path, sub_path }), + .sub_path = try fs.path.resolve(arena, &.{ dep.sub_path, sub_path }), } }, }; } diff --git a/lib/std/Build/Cache/Path.zig b/lib/std/Build/Cache/Path.zig index b81786d0a8b6..65c6f6a9bc1a 100644 --- a/lib/std/Build/Cache/Path.zig +++ b/lib/std/Build/Cache/Path.zig @@ -32,16 +32,16 @@ pub fn resolvePosix(p: Path, arena: Allocator, sub_path: []const u8) Allocator.E }; } -pub fn joinString(p: Path, allocator: Allocator, sub_path: []const u8) Allocator.Error![]u8 { +pub fn joinString(p: Path, gpa: Allocator, sub_path: []const u8) Allocator.Error![]u8 { const parts: []const []const u8 = if (p.sub_path.len == 0) &.{sub_path} else &.{ p.sub_path, sub_path }; - return p.root_dir.join(allocator, parts); + return p.root_dir.join(gpa, parts); } -pub fn joinStringZ(p: Path, allocator: Allocator, sub_path: []const u8) Allocator.Error![:0]u8 { +pub fn joinStringZ(p: Path, gpa: Allocator, sub_path: []const u8) Allocator.Error![:0]u8 { const parts: []const []const u8 = if (p.sub_path.len == 0) &.{sub_path} else &.{ p.sub_path, sub_path }; - return p.root_dir.joinZ(allocator, parts); + return p.root_dir.joinZ(gpa, parts); } pub fn openFile( diff --git a/lib/std/Build/Fuzz.zig b/lib/std/Build/Fuzz.zig index 2628b9251621..9857db5a1fe2 100644 --- a/lib/std/Build/Fuzz.zig +++ b/lib/std/Build/Fuzz.zig @@ -1,57 +1,102 @@ +const builtin = @import("builtin"); const std = @import("../std.zig"); -const Fuzz = @This(); +const Build = std.Build; const Step = std.Build.Step; const assert = std.debug.assert; const fatal = std.process.fatal; +const Allocator = std.mem.Allocator; +const log = std.log; + +const Fuzz = @This(); const build_runner = @import("root"); +pub const WebServer = @import("Fuzz/WebServer.zig"); +pub const abi = @import("Fuzz/abi.zig"); + pub fn start( + gpa: Allocator, + arena: Allocator, + global_cache_directory: Build.Cache.Directory, + zig_lib_directory: Build.Cache.Directory, + zig_exe_path: []const u8, thread_pool: *std.Thread.Pool, all_steps: []const *Step, ttyconf: std.io.tty.Config, + listen_address: std.net.Address, prog_node: std.Progress.Node, -) void { - const count = block: { +) Allocator.Error!void { + const fuzz_run_steps = block: { const rebuild_node = prog_node.start("Rebuilding Unit Tests", 0); defer rebuild_node.end(); - var count: usize = 0; var wait_group: std.Thread.WaitGroup = .{}; defer wait_group.wait(); + var fuzz_run_steps: std.ArrayListUnmanaged(*Step.Run) = .{}; + defer fuzz_run_steps.deinit(gpa); for (all_steps) |step| { const run = step.cast(Step.Run) orelse continue; if (run.fuzz_tests.items.len > 0 and run.producer != null) { thread_pool.spawnWg(&wait_group, rebuildTestsWorkerRun, .{ run, ttyconf, rebuild_node }); - count += 1; + try fuzz_run_steps.append(gpa, run); } } - if (count == 0) fatal("no fuzz tests found", .{}); - rebuild_node.setEstimatedTotalItems(count); - break :block count; + if (fuzz_run_steps.items.len == 0) fatal("no fuzz tests found", .{}); + rebuild_node.setEstimatedTotalItems(fuzz_run_steps.items.len); + break :block try arena.dupe(*Step.Run, fuzz_run_steps.items); }; // Detect failure. - for (all_steps) |step| { - const run = step.cast(Step.Run) orelse continue; - if (run.fuzz_tests.items.len > 0 and run.rebuilt_executable == null) + for (fuzz_run_steps) |run| { + assert(run.fuzz_tests.items.len > 0); + if (run.rebuilt_executable == null) fatal("one or more unit tests failed to be rebuilt in fuzz mode", .{}); } + var web_server: WebServer = .{ + .gpa = gpa, + .global_cache_directory = global_cache_directory, + .zig_lib_directory = zig_lib_directory, + .zig_exe_path = zig_exe_path, + .listen_address = listen_address, + .fuzz_run_steps = fuzz_run_steps, + + .msg_queue = .{}, + .mutex = .{}, + .condition = .{}, + + .coverage_files = .{}, + .coverage_mutex = .{}, + .coverage_condition = .{}, + }; + + // For accepting HTTP connections. + const web_server_thread = std.Thread.spawn(.{}, WebServer.run, .{&web_server}) catch |err| { + fatal("unable to spawn web server thread: {s}", .{@errorName(err)}); + }; + defer web_server_thread.join(); + + // For polling messages and sending updates to subscribers. + const coverage_thread = std.Thread.spawn(.{}, WebServer.coverageRun, .{&web_server}) catch |err| { + fatal("unable to spawn coverage thread: {s}", .{@errorName(err)}); + }; + defer coverage_thread.join(); + { - const fuzz_node = prog_node.start("Fuzzing", count); + const fuzz_node = prog_node.start("Fuzzing", fuzz_run_steps.len); defer fuzz_node.end(); var wait_group: std.Thread.WaitGroup = .{}; defer wait_group.wait(); - for (all_steps) |step| { - const run = step.cast(Step.Run) orelse continue; + for (fuzz_run_steps) |run| { for (run.fuzz_tests.items) |unit_test_index| { assert(run.rebuilt_executable != null); - thread_pool.spawnWg(&wait_group, fuzzWorkerRun, .{ run, unit_test_index, ttyconf, fuzz_node }); + thread_pool.spawnWg(&wait_group, fuzzWorkerRun, .{ + run, &web_server, unit_test_index, ttyconf, fuzz_node, + }); } } } - fatal("all fuzz workers crashed", .{}); + log.err("all fuzz workers crashed", .{}); } fn rebuildTestsWorkerRun(run: *Step.Run, ttyconf: std.io.tty.Config, parent_prog_node: std.Progress.Node) void { @@ -74,20 +119,21 @@ fn rebuildTestsWorkerRun(run: *Step.Run, ttyconf: std.io.tty.Config, parent_prog build_runner.printErrorMessages(gpa, &compile.step, ttyconf, stderr, false) catch {}; } - if (result) |rebuilt_bin_path| { - run.rebuilt_executable = rebuilt_bin_path; - } else |err| switch (err) { - error.MakeFailed => {}, + const rebuilt_bin_path = result catch |err| switch (err) { + error.MakeFailed => return, else => { - std.debug.print("step '{s}': failed to rebuild in fuzz mode: {s}\n", .{ + log.err("step '{s}': failed to rebuild in fuzz mode: {s}", .{ compile.step.name, @errorName(err), }); + return; }, - } + }; + run.rebuilt_executable = rebuilt_bin_path; } fn fuzzWorkerRun( run: *Step.Run, + web_server: *WebServer, unit_test_index: u32, ttyconf: std.io.tty.Config, parent_prog_node: std.Progress.Node, @@ -98,17 +144,19 @@ fn fuzzWorkerRun( const prog_node = parent_prog_node.start(test_name, 0); defer prog_node.end(); - run.rerunInFuzzMode(unit_test_index, prog_node) catch |err| switch (err) { + run.rerunInFuzzMode(web_server, unit_test_index, prog_node) catch |err| switch (err) { error.MakeFailed => { const stderr = std.io.getStdErr(); std.debug.lockStdErr(); defer std.debug.unlockStdErr(); build_runner.printErrorMessages(gpa, &run.step, ttyconf, stderr, false) catch {}; + return; }, else => { - std.debug.print("step '{s}': failed to rebuild '{s}' in fuzz mode: {s}\n", .{ + log.err("step '{s}': failed to rerun '{s}' in fuzz mode: {s}", .{ run.step.name, test_name, @errorName(err), }); + return; }, }; } diff --git a/lib/std/Build/Fuzz/WebServer.zig b/lib/std/Build/Fuzz/WebServer.zig new file mode 100644 index 000000000000..c0dfddacd566 --- /dev/null +++ b/lib/std/Build/Fuzz/WebServer.zig @@ -0,0 +1,679 @@ +const builtin = @import("builtin"); + +const std = @import("../../std.zig"); +const Allocator = std.mem.Allocator; +const Build = std.Build; +const Step = std.Build.Step; +const Coverage = std.debug.Coverage; +const abi = std.Build.Fuzz.abi; +const log = std.log; + +const WebServer = @This(); + +gpa: Allocator, +global_cache_directory: Build.Cache.Directory, +zig_lib_directory: Build.Cache.Directory, +zig_exe_path: []const u8, +listen_address: std.net.Address, +fuzz_run_steps: []const *Step.Run, + +/// Messages from fuzz workers. Protected by mutex. +msg_queue: std.ArrayListUnmanaged(Msg), +/// Protects `msg_queue` only. +mutex: std.Thread.Mutex, +/// Signaled when there is a message in `msg_queue`. +condition: std.Thread.Condition, + +coverage_files: std.AutoArrayHashMapUnmanaged(u64, CoverageMap), +/// Protects `coverage_files` only. +coverage_mutex: std.Thread.Mutex, +/// Signaled when `coverage_files` changes. +coverage_condition: std.Thread.Condition, + +const CoverageMap = struct { + mapped_memory: []align(std.mem.page_size) const u8, + coverage: Coverage, + source_locations: []Coverage.SourceLocation, + /// Elements are indexes into `source_locations` pointing to the unit tests that are being fuzz tested. + entry_points: std.ArrayListUnmanaged(u32), + + fn deinit(cm: *CoverageMap, gpa: Allocator) void { + std.posix.munmap(cm.mapped_memory); + cm.coverage.deinit(gpa); + cm.* = undefined; + } +}; + +const Msg = union(enum) { + coverage: struct { + id: u64, + run: *Step.Run, + }, + entry_point: struct { + coverage_id: u64, + addr: u64, + }, +}; + +pub fn run(ws: *WebServer) void { + var http_server = ws.listen_address.listen(.{ + .reuse_address = true, + }) catch |err| { + log.err("failed to listen to port {d}: {s}", .{ ws.listen_address.in.getPort(), @errorName(err) }); + return; + }; + const port = http_server.listen_address.in.getPort(); + log.info("web interface listening at http://127.0.0.1:{d}/", .{port}); + + while (true) { + const connection = http_server.accept() catch |err| { + log.err("failed to accept connection: {s}", .{@errorName(err)}); + return; + }; + _ = std.Thread.spawn(.{}, accept, .{ ws, connection }) catch |err| { + log.err("unable to spawn connection thread: {s}", .{@errorName(err)}); + connection.stream.close(); + continue; + }; + } +} + +fn accept(ws: *WebServer, connection: std.net.Server.Connection) void { + defer connection.stream.close(); + + var read_buffer: [0x4000]u8 = undefined; + var server = std.http.Server.init(connection, &read_buffer); + var web_socket: std.http.WebSocket = undefined; + var send_buffer: [0x4000]u8 = undefined; + var ws_recv_buffer: [0x4000]u8 align(4) = undefined; + while (server.state == .ready) { + var request = server.receiveHead() catch |err| switch (err) { + error.HttpConnectionClosing => return, + else => { + log.err("closing http connection: {s}", .{@errorName(err)}); + return; + }, + }; + if (web_socket.init(&request, &send_buffer, &ws_recv_buffer) catch |err| { + log.err("initializing web socket: {s}", .{@errorName(err)}); + return; + }) { + serveWebSocket(ws, &web_socket) catch |err| { + log.err("unable to serve web socket connection: {s}", .{@errorName(err)}); + return; + }; + } else { + serveRequest(ws, &request) catch |err| switch (err) { + error.AlreadyReported => return, + else => |e| { + log.err("unable to serve {s}: {s}", .{ request.head.target, @errorName(e) }); + return; + }, + }; + } + } +} + +fn serveRequest(ws: *WebServer, request: *std.http.Server.Request) !void { + if (std.mem.eql(u8, request.head.target, "/") or + std.mem.eql(u8, request.head.target, "/debug") or + std.mem.eql(u8, request.head.target, "/debug/")) + { + try serveFile(ws, request, "fuzzer/index.html", "text/html"); + } else if (std.mem.eql(u8, request.head.target, "/main.js") or + std.mem.eql(u8, request.head.target, "/debug/main.js")) + { + try serveFile(ws, request, "fuzzer/main.js", "application/javascript"); + } else if (std.mem.eql(u8, request.head.target, "/main.wasm")) { + try serveWasm(ws, request, .ReleaseFast); + } else if (std.mem.eql(u8, request.head.target, "/debug/main.wasm")) { + try serveWasm(ws, request, .Debug); + } else if (std.mem.eql(u8, request.head.target, "/sources.tar") or + std.mem.eql(u8, request.head.target, "/debug/sources.tar")) + { + try serveSourcesTar(ws, request); + } else { + try request.respond("not found", .{ + .status = .not_found, + .extra_headers = &.{ + .{ .name = "content-type", .value = "text/plain" }, + }, + }); + } +} + +fn serveFile( + ws: *WebServer, + request: *std.http.Server.Request, + name: []const u8, + content_type: []const u8, +) !void { + const gpa = ws.gpa; + // The desired API is actually sendfile, which will require enhancing std.http.Server. + // We load the file with every request so that the user can make changes to the file + // and refresh the HTML page without restarting this server. + const file_contents = ws.zig_lib_directory.handle.readFileAlloc(gpa, name, 10 * 1024 * 1024) catch |err| { + log.err("failed to read '{}{s}': {s}", .{ ws.zig_lib_directory, name, @errorName(err) }); + return error.AlreadyReported; + }; + defer gpa.free(file_contents); + try request.respond(file_contents, .{ + .extra_headers = &.{ + .{ .name = "content-type", .value = content_type }, + cache_control_header, + }, + }); +} + +fn serveWasm( + ws: *WebServer, + request: *std.http.Server.Request, + optimize_mode: std.builtin.OptimizeMode, +) !void { + const gpa = ws.gpa; + + var arena_instance = std.heap.ArenaAllocator.init(gpa); + defer arena_instance.deinit(); + const arena = arena_instance.allocator(); + + // Do the compilation every request, so that the user can edit the files + // and see the changes without restarting the server. + const wasm_binary_path = try buildWasmBinary(ws, arena, optimize_mode); + // std.http.Server does not have a sendfile API yet. + const file_contents = try std.fs.cwd().readFileAlloc(gpa, wasm_binary_path, 10 * 1024 * 1024); + defer gpa.free(file_contents); + try request.respond(file_contents, .{ + .extra_headers = &.{ + .{ .name = "content-type", .value = "application/wasm" }, + cache_control_header, + }, + }); +} + +fn buildWasmBinary( + ws: *WebServer, + arena: Allocator, + optimize_mode: std.builtin.OptimizeMode, +) ![]const u8 { + const gpa = ws.gpa; + + const main_src_path: Build.Cache.Path = .{ + .root_dir = ws.zig_lib_directory, + .sub_path = "fuzzer/wasm/main.zig", + }; + const walk_src_path: Build.Cache.Path = .{ + .root_dir = ws.zig_lib_directory, + .sub_path = "docs/wasm/Walk.zig", + }; + const html_render_src_path: Build.Cache.Path = .{ + .root_dir = ws.zig_lib_directory, + .sub_path = "docs/wasm/html_render.zig", + }; + + var argv: std.ArrayListUnmanaged([]const u8) = .{}; + + try argv.appendSlice(arena, &.{ + ws.zig_exe_path, "build-exe", // + "-fno-entry", // + "-O", @tagName(optimize_mode), // + "-target", "wasm32-freestanding", // + "-mcpu", "baseline+atomics+bulk_memory+multivalue+mutable_globals+nontrapping_fptoint+reference_types+sign_ext", // + "--cache-dir", ws.global_cache_directory.path orelse ".", // + "--global-cache-dir", ws.global_cache_directory.path orelse ".", // + "--name", "fuzzer", // + "-rdynamic", // + "-fsingle-threaded", // + "--dep", "Walk", // + "--dep", "html_render", // + try std.fmt.allocPrint(arena, "-Mroot={}", .{main_src_path}), // + try std.fmt.allocPrint(arena, "-MWalk={}", .{walk_src_path}), // + "--dep", "Walk", // + try std.fmt.allocPrint(arena, "-Mhtml_render={}", .{html_render_src_path}), // + "--listen=-", + }); + + var child = std.process.Child.init(argv.items, gpa); + child.stdin_behavior = .Pipe; + child.stdout_behavior = .Pipe; + child.stderr_behavior = .Pipe; + try child.spawn(); + + var poller = std.io.poll(gpa, enum { stdout, stderr }, .{ + .stdout = child.stdout.?, + .stderr = child.stderr.?, + }); + defer poller.deinit(); + + try sendMessage(child.stdin.?, .update); + try sendMessage(child.stdin.?, .exit); + + const Header = std.zig.Server.Message.Header; + var result: ?[]const u8 = null; + var result_error_bundle = std.zig.ErrorBundle.empty; + + const stdout = poller.fifo(.stdout); + + poll: while (true) { + while (stdout.readableLength() < @sizeOf(Header)) { + if (!(try poller.poll())) break :poll; + } + const header = stdout.reader().readStruct(Header) catch unreachable; + while (stdout.readableLength() < header.bytes_len) { + if (!(try poller.poll())) break :poll; + } + const body = stdout.readableSliceOfLen(header.bytes_len); + + switch (header.tag) { + .zig_version => { + if (!std.mem.eql(u8, builtin.zig_version_string, body)) { + return error.ZigProtocolVersionMismatch; + } + }, + .error_bundle => { + const EbHdr = std.zig.Server.Message.ErrorBundle; + const eb_hdr = @as(*align(1) const EbHdr, @ptrCast(body)); + const extra_bytes = + body[@sizeOf(EbHdr)..][0 .. @sizeOf(u32) * eb_hdr.extra_len]; + const string_bytes = + body[@sizeOf(EbHdr) + extra_bytes.len ..][0..eb_hdr.string_bytes_len]; + // TODO: use @ptrCast when the compiler supports it + const unaligned_extra = std.mem.bytesAsSlice(u32, extra_bytes); + const extra_array = try arena.alloc(u32, unaligned_extra.len); + @memcpy(extra_array, unaligned_extra); + result_error_bundle = .{ + .string_bytes = try arena.dupe(u8, string_bytes), + .extra = extra_array, + }; + }, + .emit_bin_path => { + const EbpHdr = std.zig.Server.Message.EmitBinPath; + const ebp_hdr = @as(*align(1) const EbpHdr, @ptrCast(body)); + if (!ebp_hdr.flags.cache_hit) { + log.info("source changes detected; rebuilt wasm component", .{}); + } + result = try arena.dupe(u8, body[@sizeOf(EbpHdr)..]); + }, + else => {}, // ignore other messages + } + + stdout.discard(body.len); + } + + const stderr = poller.fifo(.stderr); + if (stderr.readableLength() > 0) { + const owned_stderr = try stderr.toOwnedSlice(); + defer gpa.free(owned_stderr); + std.debug.print("{s}", .{owned_stderr}); + } + + // Send EOF to stdin. + child.stdin.?.close(); + child.stdin = null; + + switch (try child.wait()) { + .Exited => |code| { + if (code != 0) { + log.err( + "the following command exited with error code {d}:\n{s}", + .{ code, try Build.Step.allocPrintCmd(arena, null, argv.items) }, + ); + return error.WasmCompilationFailed; + } + }, + .Signal, .Stopped, .Unknown => { + log.err( + "the following command terminated unexpectedly:\n{s}", + .{try Build.Step.allocPrintCmd(arena, null, argv.items)}, + ); + return error.WasmCompilationFailed; + }, + } + + if (result_error_bundle.errorMessageCount() > 0) { + const color = std.zig.Color.auto; + result_error_bundle.renderToStdErr(color.renderOptions()); + log.err("the following command failed with {d} compilation errors:\n{s}", .{ + result_error_bundle.errorMessageCount(), + try Build.Step.allocPrintCmd(arena, null, argv.items), + }); + return error.WasmCompilationFailed; + } + + return result orelse { + log.err("child process failed to report result\n{s}", .{ + try Build.Step.allocPrintCmd(arena, null, argv.items), + }); + return error.WasmCompilationFailed; + }; +} + +fn sendMessage(file: std.fs.File, tag: std.zig.Client.Message.Tag) !void { + const header: std.zig.Client.Message.Header = .{ + .tag = tag, + .bytes_len = 0, + }; + try file.writeAll(std.mem.asBytes(&header)); +} + +fn serveWebSocket(ws: *WebServer, web_socket: *std.http.WebSocket) !void { + ws.coverage_mutex.lock(); + defer ws.coverage_mutex.unlock(); + + // On first connection, the client needs all the coverage information + // so that subsequent updates can contain only the updated bits. + var prev_unique_runs: usize = 0; + var prev_entry_points: usize = 0; + try sendCoverageContext(ws, web_socket, &prev_unique_runs, &prev_entry_points); + while (true) { + ws.coverage_condition.timedWait(&ws.coverage_mutex, std.time.ns_per_ms * 500) catch {}; + try sendCoverageContext(ws, web_socket, &prev_unique_runs, &prev_entry_points); + } +} + +fn sendCoverageContext( + ws: *WebServer, + web_socket: *std.http.WebSocket, + prev_unique_runs: *usize, + prev_entry_points: *usize, +) !void { + const coverage_maps = ws.coverage_files.values(); + if (coverage_maps.len == 0) return; + // TODO: make each events URL correspond to one coverage map + const coverage_map = &coverage_maps[0]; + const cov_header: *const abi.SeenPcsHeader = @ptrCast(coverage_map.mapped_memory[0..@sizeOf(abi.SeenPcsHeader)]); + const seen_pcs = coverage_map.mapped_memory[@sizeOf(abi.SeenPcsHeader) + coverage_map.source_locations.len * @sizeOf(usize) ..]; + const n_runs = @atomicLoad(usize, &cov_header.n_runs, .monotonic); + const unique_runs = @atomicLoad(usize, &cov_header.unique_runs, .monotonic); + const lowest_stack = @atomicLoad(usize, &cov_header.lowest_stack, .monotonic); + if (prev_unique_runs.* != unique_runs) { + // There has been an update. + if (prev_unique_runs.* == 0) { + // We need to send initial context. + const header: abi.SourceIndexHeader = .{ + .flags = .{}, + .directories_len = @intCast(coverage_map.coverage.directories.entries.len), + .files_len = @intCast(coverage_map.coverage.files.entries.len), + .source_locations_len = @intCast(coverage_map.source_locations.len), + .string_bytes_len = @intCast(coverage_map.coverage.string_bytes.items.len), + }; + const iovecs: [5]std.posix.iovec_const = .{ + makeIov(std.mem.asBytes(&header)), + makeIov(std.mem.sliceAsBytes(coverage_map.coverage.directories.keys())), + makeIov(std.mem.sliceAsBytes(coverage_map.coverage.files.keys())), + makeIov(std.mem.sliceAsBytes(coverage_map.source_locations)), + makeIov(coverage_map.coverage.string_bytes.items), + }; + try web_socket.writeMessagev(&iovecs, .binary); + } + + const header: abi.CoverageUpdateHeader = .{ + .n_runs = n_runs, + .unique_runs = unique_runs, + .lowest_stack = lowest_stack, + }; + const iovecs: [2]std.posix.iovec_const = .{ + makeIov(std.mem.asBytes(&header)), + makeIov(seen_pcs), + }; + try web_socket.writeMessagev(&iovecs, .binary); + + prev_unique_runs.* = unique_runs; + } + + if (prev_entry_points.* != coverage_map.entry_points.items.len) { + const header: abi.EntryPointHeader = .{ + .flags = .{ + .locs_len = @intCast(coverage_map.entry_points.items.len), + }, + }; + const iovecs: [2]std.posix.iovec_const = .{ + makeIov(std.mem.asBytes(&header)), + makeIov(std.mem.sliceAsBytes(coverage_map.entry_points.items)), + }; + try web_socket.writeMessagev(&iovecs, .binary); + + prev_entry_points.* = coverage_map.entry_points.items.len; + } +} + +fn serveSourcesTar(ws: *WebServer, request: *std.http.Server.Request) !void { + const gpa = ws.gpa; + + var arena_instance = std.heap.ArenaAllocator.init(gpa); + defer arena_instance.deinit(); + const arena = arena_instance.allocator(); + + var send_buffer: [0x4000]u8 = undefined; + var response = request.respondStreaming(.{ + .send_buffer = &send_buffer, + .respond_options = .{ + .extra_headers = &.{ + .{ .name = "content-type", .value = "application/x-tar" }, + cache_control_header, + }, + }, + }); + const w = response.writer(); + + const DedupeTable = std.ArrayHashMapUnmanaged(Build.Cache.Path, void, Build.Cache.Path.TableAdapter, false); + var dedupe_table: DedupeTable = .{}; + defer dedupe_table.deinit(gpa); + + for (ws.fuzz_run_steps) |run_step| { + const compile_step_inputs = run_step.producer.?.step.inputs.table; + for (compile_step_inputs.keys(), compile_step_inputs.values()) |dir_path, *file_list| { + try dedupe_table.ensureUnusedCapacity(gpa, file_list.items.len); + for (file_list.items) |sub_path| { + // Special file "." means the entire directory. + if (std.mem.eql(u8, sub_path, ".")) continue; + const joined_path = try dir_path.join(arena, sub_path); + _ = dedupe_table.getOrPutAssumeCapacity(joined_path); + } + } + } + + const deduped_paths = dedupe_table.keys(); + const SortContext = struct { + pub fn lessThan(this: @This(), lhs: Build.Cache.Path, rhs: Build.Cache.Path) bool { + _ = this; + return switch (std.mem.order(u8, lhs.root_dir.path orelse ".", rhs.root_dir.path orelse ".")) { + .lt => true, + .gt => false, + .eq => std.mem.lessThan(u8, lhs.sub_path, rhs.sub_path), + }; + } + }; + std.mem.sortUnstable(Build.Cache.Path, deduped_paths, SortContext{}, SortContext.lessThan); + + var cwd_cache: ?[]const u8 = null; + + for (deduped_paths) |joined_path| { + var file = joined_path.root_dir.handle.openFile(joined_path.sub_path, .{}) catch |err| { + log.err("failed to open {}: {s}", .{ joined_path, @errorName(err) }); + continue; + }; + defer file.close(); + + const stat = file.stat() catch |err| { + log.err("failed to stat {}: {s}", .{ joined_path, @errorName(err) }); + continue; + }; + if (stat.kind != .file) + continue; + + const padding = p: { + const remainder = stat.size % 512; + break :p if (remainder > 0) 512 - remainder else 0; + }; + + var file_header = std.tar.output.Header.init(); + file_header.typeflag = .regular; + try file_header.setPath( + joined_path.root_dir.path orelse try memoizedCwd(arena, &cwd_cache), + joined_path.sub_path, + ); + try file_header.setSize(stat.size); + try file_header.updateChecksum(); + try w.writeAll(std.mem.asBytes(&file_header)); + try w.writeFile(file); + try w.writeByteNTimes(0, padding); + } + + // intentionally omitting the pointless trailer + //try w.writeByteNTimes(0, 512 * 2); + try response.end(); +} + +fn memoizedCwd(arena: Allocator, opt_ptr: *?[]const u8) ![]const u8 { + if (opt_ptr.*) |cached| return cached; + const result = try std.process.getCwdAlloc(arena); + opt_ptr.* = result; + return result; +} + +const cache_control_header: std.http.Header = .{ + .name = "cache-control", + .value = "max-age=0, must-revalidate", +}; + +pub fn coverageRun(ws: *WebServer) void { + ws.mutex.lock(); + defer ws.mutex.unlock(); + + while (true) { + ws.condition.wait(&ws.mutex); + for (ws.msg_queue.items) |msg| switch (msg) { + .coverage => |coverage| prepareTables(ws, coverage.run, coverage.id) catch |err| switch (err) { + error.AlreadyReported => continue, + else => |e| log.err("failed to prepare code coverage tables: {s}", .{@errorName(e)}), + }, + .entry_point => |entry_point| addEntryPoint(ws, entry_point.coverage_id, entry_point.addr) catch |err| switch (err) { + error.AlreadyReported => continue, + else => |e| log.err("failed to prepare code coverage tables: {s}", .{@errorName(e)}), + }, + }; + ws.msg_queue.clearRetainingCapacity(); + } +} + +fn prepareTables( + ws: *WebServer, + run_step: *Step.Run, + coverage_id: u64, +) error{ OutOfMemory, AlreadyReported }!void { + const gpa = ws.gpa; + + ws.coverage_mutex.lock(); + defer ws.coverage_mutex.unlock(); + + const gop = try ws.coverage_files.getOrPut(gpa, coverage_id); + if (gop.found_existing) { + // We are fuzzing the same executable with multiple threads. + // Perhaps the same unit test; perhaps a different one. In any + // case, since the coverage file is the same, we only have to + // notice changes to that one file in order to learn coverage for + // this particular executable. + return; + } + errdefer _ = ws.coverage_files.pop(); + + gop.value_ptr.* = .{ + .coverage = std.debug.Coverage.init, + .mapped_memory = undefined, // populated below + .source_locations = undefined, // populated below + .entry_points = .{}, + }; + errdefer gop.value_ptr.coverage.deinit(gpa); + + const rebuilt_exe_path: Build.Cache.Path = .{ + .root_dir = Build.Cache.Directory.cwd(), + .sub_path = run_step.rebuilt_executable.?, + }; + var debug_info = std.debug.Info.load(gpa, rebuilt_exe_path, &gop.value_ptr.coverage) catch |err| { + log.err("step '{s}': failed to load debug information for '{}': {s}", .{ + run_step.step.name, rebuilt_exe_path, @errorName(err), + }); + return error.AlreadyReported; + }; + defer debug_info.deinit(gpa); + + const coverage_file_path: Build.Cache.Path = .{ + .root_dir = run_step.step.owner.cache_root, + .sub_path = "v/" ++ std.fmt.hex(coverage_id), + }; + var coverage_file = coverage_file_path.root_dir.handle.openFile(coverage_file_path.sub_path, .{}) catch |err| { + log.err("step '{s}': failed to load coverage file '{}': {s}", .{ + run_step.step.name, coverage_file_path, @errorName(err), + }); + return error.AlreadyReported; + }; + defer coverage_file.close(); + + const file_size = coverage_file.getEndPos() catch |err| { + log.err("unable to check len of coverage file '{}': {s}", .{ coverage_file_path, @errorName(err) }); + return error.AlreadyReported; + }; + + const mapped_memory = std.posix.mmap( + null, + file_size, + std.posix.PROT.READ, + .{ .TYPE = .SHARED }, + coverage_file.handle, + 0, + ) catch |err| { + log.err("failed to map coverage file '{}': {s}", .{ coverage_file_path, @errorName(err) }); + return error.AlreadyReported; + }; + gop.value_ptr.mapped_memory = mapped_memory; + + const header: *const abi.SeenPcsHeader = @ptrCast(mapped_memory[0..@sizeOf(abi.SeenPcsHeader)]); + const pcs_bytes = mapped_memory[@sizeOf(abi.SeenPcsHeader)..][0 .. header.pcs_len * @sizeOf(usize)]; + const pcs = std.mem.bytesAsSlice(usize, pcs_bytes); + const source_locations = try gpa.alloc(Coverage.SourceLocation, pcs.len); + errdefer gpa.free(source_locations); + debug_info.resolveAddresses(gpa, pcs, source_locations) catch |err| { + log.err("failed to resolve addresses to source locations: {s}", .{@errorName(err)}); + return error.AlreadyReported; + }; + gop.value_ptr.source_locations = source_locations; + + ws.coverage_condition.broadcast(); +} + +fn addEntryPoint(ws: *WebServer, coverage_id: u64, addr: u64) error{ AlreadyReported, OutOfMemory }!void { + ws.coverage_mutex.lock(); + defer ws.coverage_mutex.unlock(); + + const coverage_map = ws.coverage_files.getPtr(coverage_id).?; + const ptr = coverage_map.mapped_memory; + const pcs_bytes = ptr[@sizeOf(abi.SeenPcsHeader)..][0 .. coverage_map.source_locations.len * @sizeOf(usize)]; + const pcs: []const usize = @alignCast(std.mem.bytesAsSlice(usize, pcs_bytes)); + const index = std.sort.upperBound(usize, pcs, addr, struct { + fn order(context: usize, item: usize) std.math.Order { + return std.math.order(item, context); + } + }.order); + if (index >= pcs.len) { + log.err("unable to find unit test entry address 0x{x} in source locations (range: 0x{x} to 0x{x})", .{ + addr, pcs[0], pcs[pcs.len - 1], + }); + return error.AlreadyReported; + } + if (false) { + const sl = coverage_map.source_locations[index]; + const file_name = coverage_map.coverage.stringAt(coverage_map.coverage.fileAt(sl.file).basename); + log.debug("server found entry point {s}:{d}:{d}", .{ + file_name, sl.line, sl.column, + }); + } + const gpa = ws.gpa; + try coverage_map.entry_points.append(gpa, @intCast(index)); +} + +fn makeIov(s: []const u8) std.posix.iovec_const { + return .{ + .base = s.ptr, + .len = s.len, + }; +} diff --git a/lib/std/Build/Fuzz/abi.zig b/lib/std/Build/Fuzz/abi.zig new file mode 100644 index 000000000000..f385f9b08a6b --- /dev/null +++ b/lib/std/Build/Fuzz/abi.zig @@ -0,0 +1,69 @@ +//! This file is shared among Zig code running in wildly different contexts: +//! libfuzzer, compiled alongside unit tests, the build runner, running on the +//! host computer, and the fuzzing web interface webassembly code running in +//! the browser. All of these components interface to some degree via an ABI. + +/// libfuzzer uses this and its usize is the one that counts. To match the ABI, +/// make the ints be the size of the target used with libfuzzer. +/// +/// Trailing: +/// * pc_addr: usize for each pcs_len +/// * 1 bit per pc_addr, usize elements +pub const SeenPcsHeader = extern struct { + n_runs: usize, + unique_runs: usize, + pcs_len: usize, + lowest_stack: usize, +}; + +pub const ToClientTag = enum(u8) { + source_index, + coverage_update, + entry_points, + _, +}; + +/// Sent to the fuzzer web client on first connection to the websocket URL. +/// +/// Trailing: +/// * std.debug.Coverage.String for each directories_len +/// * std.debug.Coverage.File for each files_len +/// * std.debug.Coverage.SourceLocation for each source_locations_len +/// * u8 for each string_bytes_len +pub const SourceIndexHeader = extern struct { + flags: Flags, + directories_len: u32, + files_len: u32, + source_locations_len: u32, + string_bytes_len: u32, + + pub const Flags = packed struct(u32) { + tag: ToClientTag = .source_index, + _: u24 = 0, + }; +}; + +/// Sent to the fuzzer web client whenever the set of covered source locations +/// changes. +/// +/// Trailing: +/// * one bit per source_locations_len, contained in u8 elements +pub const CoverageUpdateHeader = extern struct { + tag: ToClientTag = .coverage_update, + n_runs: u64 align(1), + unique_runs: u64 align(1), + lowest_stack: u64 align(1), +}; + +/// Sent to the fuzzer web client when the set of entry points is updated. +/// +/// Trailing: +/// * one u32 index of source_locations per locs_len +pub const EntryPointHeader = extern struct { + flags: Flags, + + pub const Flags = packed struct(u32) { + tag: ToClientTag = .entry_points, + locs_len: u24, + }; +}; diff --git a/lib/std/Build/Step.zig b/lib/std/Build/Step.zig index 8f3236d867a5..47a6e49a82c2 100644 --- a/lib/std/Build/Step.zig +++ b/lib/std/Build/Step.zig @@ -559,7 +559,8 @@ fn zigProcessUpdate(s: *Step, zp: *ZigProcess, watch: bool) !?[]const u8 { }, .zig_lib => zl: { if (s.cast(Step.Compile)) |compile| { - if (compile.zig_lib_dir) |lp| { + if (compile.zig_lib_dir) |zig_lib_dir| { + const lp = try zig_lib_dir.join(arena, sub_path); try addWatchInput(s, lp); break :zl; } diff --git a/lib/std/Build/Step/Run.zig b/lib/std/Build/Step/Run.zig index c2d25cd82cbb..5d9ebce9aa44 100644 --- a/lib/std/Build/Step/Run.zig +++ b/lib/std/Build/Step/Run.zig @@ -205,6 +205,7 @@ pub fn enableTestRunnerMode(run: *Run) void { run.stdio = .zig_test; run.addArgs(&.{ std.fmt.allocPrint(arena, "--seed=0x{x}", .{b.graph.random_seed}) catch @panic("OOM"), + std.fmt.allocPrint(arena, "--cache-dir={s}", .{b.cache_root.path orelse ""}) catch @panic("OOM"), "--listen=-", }); } @@ -845,7 +846,12 @@ fn make(step: *Step, options: Step.MakeOptions) !void { ); } -pub fn rerunInFuzzMode(run: *Run, unit_test_index: u32, prog_node: std.Progress.Node) !void { +pub fn rerunInFuzzMode( + run: *Run, + web_server: *std.Build.Fuzz.WebServer, + unit_test_index: u32, + prog_node: std.Progress.Node, +) !void { const step = &run.step; const b = step.owner; const arena = b.allocator; @@ -877,7 +883,10 @@ pub fn rerunInFuzzMode(run: *Run, unit_test_index: u32, prog_node: std.Progress. const has_side_effects = false; const rand_int = std.crypto.random.int(u64); const tmp_dir_path = "tmp" ++ fs.path.sep_str ++ std.fmt.hex(rand_int); - try runCommand(run, argv_list.items, has_side_effects, tmp_dir_path, prog_node, unit_test_index); + try runCommand(run, argv_list.items, has_side_effects, tmp_dir_path, prog_node, .{ + .unit_test_index = unit_test_index, + .web_server = web_server, + }); } fn populateGeneratedPaths( @@ -952,13 +961,18 @@ fn termMatches(expected: ?std.process.Child.Term, actual: std.process.Child.Term }; } +const FuzzContext = struct { + web_server: *std.Build.Fuzz.WebServer, + unit_test_index: u32, +}; + fn runCommand( run: *Run, argv: []const []const u8, has_side_effects: bool, output_dir_path: []const u8, prog_node: std.Progress.Node, - fuzz_unit_test_index: ?u32, + fuzz_context: ?FuzzContext, ) !void { const step = &run.step; const b = step.owner; @@ -977,7 +991,7 @@ fn runCommand( var interp_argv = std.ArrayList([]const u8).init(b.allocator); defer interp_argv.deinit(); - const result = spawnChildAndCollect(run, argv, has_side_effects, prog_node, fuzz_unit_test_index) catch |err| term: { + const result = spawnChildAndCollect(run, argv, has_side_effects, prog_node, fuzz_context) catch |err| term: { // InvalidExe: cpu arch mismatch // FileNotFound: can happen with a wrong dynamic linker path if (err == error.InvalidExe or err == error.FileNotFound) interpret: { @@ -1113,7 +1127,7 @@ fn runCommand( try Step.handleVerbose2(step.owner, cwd, run.env_map, interp_argv.items); - break :term spawnChildAndCollect(run, interp_argv.items, has_side_effects, prog_node, fuzz_unit_test_index) catch |e| { + break :term spawnChildAndCollect(run, interp_argv.items, has_side_effects, prog_node, fuzz_context) catch |e| { if (!run.failing_to_execute_foreign_is_an_error) return error.MakeSkipped; return step.fail("unable to spawn interpreter {s}: {s}", .{ @@ -1133,7 +1147,7 @@ fn runCommand( const final_argv = if (interp_argv.items.len == 0) argv else interp_argv.items; - if (fuzz_unit_test_index != null) { + if (fuzz_context != null) { try step.handleChildProcessTerm(result.term, cwd, final_argv); return; } @@ -1298,12 +1312,12 @@ fn spawnChildAndCollect( argv: []const []const u8, has_side_effects: bool, prog_node: std.Progress.Node, - fuzz_unit_test_index: ?u32, + fuzz_context: ?FuzzContext, ) !ChildProcResult { const b = run.step.owner; const arena = b.allocator; - if (fuzz_unit_test_index != null) { + if (fuzz_context != null) { assert(!has_side_effects); assert(run.stdio == .zig_test); } @@ -1357,7 +1371,7 @@ fn spawnChildAndCollect( var timer = try std.time.Timer.start(); const result = if (run.stdio == .zig_test) - evalZigTest(run, &child, prog_node, fuzz_unit_test_index) + evalZigTest(run, &child, prog_node, fuzz_context) else evalGeneric(run, &child); @@ -1383,7 +1397,7 @@ fn evalZigTest( run: *Run, child: *std.process.Child, prog_node: std.Progress.Node, - fuzz_unit_test_index: ?u32, + fuzz_context: ?FuzzContext, ) !StdIoResult { const gpa = run.step.owner.allocator; const arena = run.step.owner.allocator; @@ -1394,8 +1408,8 @@ fn evalZigTest( }); defer poller.deinit(); - if (fuzz_unit_test_index) |index| { - try sendRunTestMessage(child.stdin.?, .start_fuzzing, index); + if (fuzz_context) |fuzz| { + try sendRunTestMessage(child.stdin.?, .start_fuzzing, fuzz.unit_test_index); } else { run.fuzz_tests.clearRetainingCapacity(); try sendMessage(child.stdin.?, .query_test_metadata); @@ -1413,6 +1427,7 @@ fn evalZigTest( var log_err_count: u32 = 0; var metadata: ?TestMetadata = null; + var coverage_id: ?u64 = null; var sub_prog_node: ?std.Progress.Node = null; defer if (sub_prog_node) |n| n.end(); @@ -1437,7 +1452,7 @@ fn evalZigTest( } }, .test_metadata => { - assert(fuzz_unit_test_index == null); + assert(fuzz_context == null); const TmHdr = std.zig.Server.Message.TestMetadata; const tm_hdr = @as(*align(1) const TmHdr, @ptrCast(body)); test_count = tm_hdr.tests_len; @@ -1466,7 +1481,7 @@ fn evalZigTest( try requestNextTest(child.stdin.?, &metadata.?, &sub_prog_node); }, .test_results => { - assert(fuzz_unit_test_index == null); + assert(fuzz_context == null); const md = metadata.?; const TrHdr = std.zig.Server.Message.TestResults; @@ -1500,6 +1515,34 @@ fn evalZigTest( try requestNextTest(child.stdin.?, &metadata.?, &sub_prog_node); }, + .coverage_id => { + const web_server = fuzz_context.?.web_server; + const msg_ptr: *align(1) const u64 = @ptrCast(body); + coverage_id = msg_ptr.*; + { + web_server.mutex.lock(); + defer web_server.mutex.unlock(); + try web_server.msg_queue.append(web_server.gpa, .{ .coverage = .{ + .id = coverage_id.?, + .run = run, + } }); + web_server.condition.signal(); + } + }, + .fuzz_start_addr => { + const web_server = fuzz_context.?.web_server; + const msg_ptr: *align(1) const u64 = @ptrCast(body); + const addr = msg_ptr.*; + { + web_server.mutex.lock(); + defer web_server.mutex.unlock(); + try web_server.msg_queue.append(web_server.gpa, .{ .entry_point = .{ + .addr = addr, + .coverage_id = coverage_id.?, + } }); + web_server.condition.signal(); + } + }, else => {}, // ignore other messages } diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 4d3437f665c6..a3a8a533eed0 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -14,9 +14,12 @@ const native_os = builtin.os.tag; const native_endian = native_arch.endian(); pub const MemoryAccessor = @import("debug/MemoryAccessor.zig"); +pub const FixedBufferReader = @import("debug/FixedBufferReader.zig"); pub const Dwarf = @import("debug/Dwarf.zig"); pub const Pdb = @import("debug/Pdb.zig"); pub const SelfInfo = @import("debug/SelfInfo.zig"); +pub const Info = @import("debug/Info.zig"); +pub const Coverage = @import("debug/Coverage.zig"); /// Unresolved source locations can be represented with a single `usize` that /// corresponds to a virtual memory address of the program counter. Combined @@ -26,6 +29,18 @@ pub const SourceLocation = struct { line: u64, column: u64, file_name: []const u8, + + pub const invalid: SourceLocation = .{ + .line = 0, + .column = 0, + .file_name = &.{}, + }; +}; + +pub const Symbol = struct { + name: []const u8 = "???", + compile_unit_name: []const u8 = "???", + source_location: ?SourceLocation = null, }; /// Deprecated because it returns the optimization mode of the standard @@ -748,7 +763,7 @@ pub fn writeCurrentStackTrace( // an overflow. We do not need to signal `StackIterator` as it will correctly detect this // condition on the subsequent iteration and return `null` thus terminating the loop. // same behaviour for x86-windows-msvc - const address = if (return_address == 0) return_address else return_address - 1; + const address = return_address -| 1; try printSourceAtAddress(debug_info, out_stream, address, tty_config); } else printLastUnwindError(&it, debug_info, out_stream, tty_config); } @@ -871,13 +886,13 @@ pub fn printSourceAtAddress(debug_info: *SelfInfo, out_stream: anytype, address: error.MissingDebugInfo, error.InvalidDebugInfo => return printUnknownSource(debug_info, out_stream, address, tty_config), else => return err, }; - defer symbol_info.deinit(debug_info.allocator); + defer if (symbol_info.source_location) |sl| debug_info.allocator.free(sl.file_name); return printLineInfo( out_stream, - symbol_info.line_info, + symbol_info.source_location, address, - symbol_info.symbol_name, + symbol_info.name, symbol_info.compile_unit_name, tty_config, printLineFromFileAnyOs, @@ -886,7 +901,7 @@ pub fn printSourceAtAddress(debug_info: *SelfInfo, out_stream: anytype, address: fn printLineInfo( out_stream: anytype, - line_info: ?SourceLocation, + source_location: ?SourceLocation, address: usize, symbol_name: []const u8, compile_unit_name: []const u8, @@ -896,8 +911,8 @@ fn printLineInfo( nosuspend { try tty_config.setColor(out_stream, .bold); - if (line_info) |*li| { - try out_stream.print("{s}:{d}:{d}", .{ li.file_name, li.line, li.column }); + if (source_location) |*sl| { + try out_stream.print("{s}:{d}:{d}", .{ sl.file_name, sl.line, sl.column }); } else { try out_stream.writeAll("???:?:?"); } @@ -910,11 +925,11 @@ fn printLineInfo( try out_stream.writeAll("\n"); // Show the matching source code line if possible - if (line_info) |li| { - if (printLineFromFile(out_stream, li)) { - if (li.column > 0) { + if (source_location) |sl| { + if (printLineFromFile(out_stream, sl)) { + if (sl.column > 0) { // The caret already takes one char - const space_needed = @as(usize, @intCast(li.column - 1)); + const space_needed = @as(usize, @intCast(sl.column - 1)); try out_stream.writeByteNTimes(' ', space_needed); try tty_config.setColor(out_stream, .green); @@ -932,10 +947,10 @@ fn printLineInfo( } } -fn printLineFromFileAnyOs(out_stream: anytype, line_info: SourceLocation) !void { +fn printLineFromFileAnyOs(out_stream: anytype, source_location: SourceLocation) !void { // Need this to always block even in async I/O mode, because this could potentially // be called from e.g. the event loop code crashing. - var f = try fs.cwd().openFile(line_info.file_name, .{}); + var f = try fs.cwd().openFile(source_location.file_name, .{}); defer f.close(); // TODO fstat and make sure that the file has the correct size @@ -944,7 +959,7 @@ fn printLineFromFileAnyOs(out_stream: anytype, line_info: SourceLocation) !void const line_start = seek: { var current_line_start: usize = 0; var next_line: usize = 1; - while (next_line != line_info.line) { + while (next_line != source_location.line) { const slice = buf[current_line_start..amt_read]; if (mem.indexOfScalar(u8, slice, '\n')) |pos| { next_line += 1; @@ -1481,99 +1496,6 @@ pub const SafetyLock = struct { } }; -/// Deprecated. Don't use this, just read from your memory directly. -/// -/// This only exists because someone was too lazy to rework logic that used to -/// operate on an open file to operate on a memory buffer instead. -pub const DeprecatedFixedBufferReader = struct { - buf: []const u8, - pos: usize = 0, - endian: std.builtin.Endian, - - pub const Error = error{ EndOfBuffer, Overflow, InvalidBuffer }; - - pub fn seekTo(fbr: *DeprecatedFixedBufferReader, pos: u64) Error!void { - if (pos > fbr.buf.len) return error.EndOfBuffer; - fbr.pos = @intCast(pos); - } - - pub fn seekForward(fbr: *DeprecatedFixedBufferReader, amount: u64) Error!void { - if (fbr.buf.len - fbr.pos < amount) return error.EndOfBuffer; - fbr.pos += @intCast(amount); - } - - pub inline fn readByte(fbr: *DeprecatedFixedBufferReader) Error!u8 { - if (fbr.pos >= fbr.buf.len) return error.EndOfBuffer; - defer fbr.pos += 1; - return fbr.buf[fbr.pos]; - } - - pub fn readByteSigned(fbr: *DeprecatedFixedBufferReader) Error!i8 { - return @bitCast(try fbr.readByte()); - } - - pub fn readInt(fbr: *DeprecatedFixedBufferReader, comptime T: type) Error!T { - const size = @divExact(@typeInfo(T).Int.bits, 8); - if (fbr.buf.len - fbr.pos < size) return error.EndOfBuffer; - defer fbr.pos += size; - return std.mem.readInt(T, fbr.buf[fbr.pos..][0..size], fbr.endian); - } - - pub fn readIntChecked( - fbr: *DeprecatedFixedBufferReader, - comptime T: type, - ma: *MemoryAccessor, - ) Error!T { - if (ma.load(T, @intFromPtr(fbr.buf[fbr.pos..].ptr)) == null) - return error.InvalidBuffer; - - return fbr.readInt(T); - } - - pub fn readUleb128(fbr: *DeprecatedFixedBufferReader, comptime T: type) Error!T { - return std.leb.readUleb128(T, fbr); - } - - pub fn readIleb128(fbr: *DeprecatedFixedBufferReader, comptime T: type) Error!T { - return std.leb.readIleb128(T, fbr); - } - - pub fn readAddress(fbr: *DeprecatedFixedBufferReader, format: std.dwarf.Format) Error!u64 { - return switch (format) { - .@"32" => try fbr.readInt(u32), - .@"64" => try fbr.readInt(u64), - }; - } - - pub fn readAddressChecked( - fbr: *DeprecatedFixedBufferReader, - format: std.dwarf.Format, - ma: *MemoryAccessor, - ) Error!u64 { - return switch (format) { - .@"32" => try fbr.readIntChecked(u32, ma), - .@"64" => try fbr.readIntChecked(u64, ma), - }; - } - - pub fn readBytes(fbr: *DeprecatedFixedBufferReader, len: usize) Error![]const u8 { - if (fbr.buf.len - fbr.pos < len) return error.EndOfBuffer; - defer fbr.pos += len; - return fbr.buf[fbr.pos..][0..len]; - } - - pub fn readBytesTo(fbr: *DeprecatedFixedBufferReader, comptime sentinel: u8) Error![:sentinel]const u8 { - const end = @call(.always_inline, std.mem.indexOfScalarPos, .{ - u8, - fbr.buf, - fbr.pos, - sentinel, - }) orelse return error.EndOfBuffer; - defer fbr.pos = end + 1; - return fbr.buf[fbr.pos..end :sentinel]; - } -}; - /// Detect whether the program is being executed in the Valgrind virtual machine. /// /// When Valgrind integrations are disabled, this returns comptime-known false. @@ -1587,6 +1509,7 @@ pub inline fn inValgrind() bool { test { _ = &Dwarf; _ = &MemoryAccessor; + _ = &FixedBufferReader; _ = &Pdb; _ = &SelfInfo; _ = &dumpHex; diff --git a/lib/std/debug/Coverage.zig b/lib/std/debug/Coverage.zig new file mode 100644 index 000000000000..f341efaffbc2 --- /dev/null +++ b/lib/std/debug/Coverage.zig @@ -0,0 +1,244 @@ +const std = @import("../std.zig"); +const Allocator = std.mem.Allocator; +const Hash = std.hash.Wyhash; +const Dwarf = std.debug.Dwarf; +const assert = std.debug.assert; + +const Coverage = @This(); + +/// Provides a globally-scoped integer index for directories. +/// +/// As opposed to, for example, a directory index that is compilation-unit +/// scoped inside a single ELF module. +/// +/// String memory references the memory-mapped debug information. +/// +/// Protected by `mutex`. +directories: std.ArrayHashMapUnmanaged(String, void, String.MapContext, false), +/// Provides a globally-scoped integer index for files. +/// +/// String memory references the memory-mapped debug information. +/// +/// Protected by `mutex`. +files: std.ArrayHashMapUnmanaged(File, void, File.MapContext, false), +string_bytes: std.ArrayListUnmanaged(u8), +/// Protects the other fields. +mutex: std.Thread.Mutex, + +pub const init: Coverage = .{ + .directories = .{}, + .files = .{}, + .mutex = .{}, + .string_bytes = .{}, +}; + +pub const String = enum(u32) { + _, + + pub const MapContext = struct { + string_bytes: []const u8, + + pub fn eql(self: @This(), a: String, b: String, b_index: usize) bool { + _ = b_index; + const a_slice = span(self.string_bytes[@intFromEnum(a)..]); + const b_slice = span(self.string_bytes[@intFromEnum(b)..]); + return std.mem.eql(u8, a_slice, b_slice); + } + + pub fn hash(self: @This(), a: String) u32 { + return @truncate(Hash.hash(0, span(self.string_bytes[@intFromEnum(a)..]))); + } + }; + + pub const SliceAdapter = struct { + string_bytes: []const u8, + + pub fn eql(self: @This(), a_slice: []const u8, b: String, b_index: usize) bool { + _ = b_index; + const b_slice = span(self.string_bytes[@intFromEnum(b)..]); + return std.mem.eql(u8, a_slice, b_slice); + } + pub fn hash(self: @This(), a: []const u8) u32 { + _ = self; + return @truncate(Hash.hash(0, a)); + } + }; +}; + +pub const SourceLocation = extern struct { + file: File.Index, + line: u32, + column: u32, + + pub const invalid: SourceLocation = .{ + .file = .invalid, + .line = 0, + .column = 0, + }; +}; + +pub const File = extern struct { + directory_index: u32, + basename: String, + + pub const Index = enum(u32) { + invalid = std.math.maxInt(u32), + _, + }; + + pub const MapContext = struct { + string_bytes: []const u8, + + pub fn hash(self: MapContext, a: File) u32 { + const a_basename = span(self.string_bytes[@intFromEnum(a.basename)..]); + return @truncate(Hash.hash(a.directory_index, a_basename)); + } + + pub fn eql(self: MapContext, a: File, b: File, b_index: usize) bool { + _ = b_index; + if (a.directory_index != b.directory_index) return false; + const a_basename = span(self.string_bytes[@intFromEnum(a.basename)..]); + const b_basename = span(self.string_bytes[@intFromEnum(b.basename)..]); + return std.mem.eql(u8, a_basename, b_basename); + } + }; + + pub const SliceAdapter = struct { + string_bytes: []const u8, + + pub const Entry = struct { + directory_index: u32, + basename: []const u8, + }; + + pub fn hash(self: @This(), a: Entry) u32 { + _ = self; + return @truncate(Hash.hash(a.directory_index, a.basename)); + } + + pub fn eql(self: @This(), a: Entry, b: File, b_index: usize) bool { + _ = b_index; + if (a.directory_index != b.directory_index) return false; + const b_basename = span(self.string_bytes[@intFromEnum(b.basename)..]); + return std.mem.eql(u8, a.basename, b_basename); + } + }; +}; + +pub fn deinit(cov: *Coverage, gpa: Allocator) void { + cov.directories.deinit(gpa); + cov.files.deinit(gpa); + cov.string_bytes.deinit(gpa); + cov.* = undefined; +} + +pub fn fileAt(cov: *Coverage, index: File.Index) *File { + return &cov.files.keys()[@intFromEnum(index)]; +} + +pub fn stringAt(cov: *Coverage, index: String) [:0]const u8 { + return span(cov.string_bytes.items[@intFromEnum(index)..]); +} + +pub const ResolveAddressesDwarfError = Dwarf.ScanError; + +pub fn resolveAddressesDwarf( + cov: *Coverage, + gpa: Allocator, + sorted_pc_addrs: []const u64, + /// Asserts its length equals length of `sorted_pc_addrs`. + output: []SourceLocation, + d: *Dwarf, +) ResolveAddressesDwarfError!void { + assert(sorted_pc_addrs.len == output.len); + assert(d.compile_units_sorted); + + var cu_i: usize = 0; + var line_table_i: usize = 0; + var cu: *Dwarf.CompileUnit = &d.compile_unit_list.items[0]; + var range = cu.pc_range.?; + // Protects directories and files tables from other threads. + cov.mutex.lock(); + defer cov.mutex.unlock(); + next_pc: for (sorted_pc_addrs, output) |pc, *out| { + while (pc >= range.end) { + cu_i += 1; + if (cu_i >= d.compile_unit_list.items.len) { + out.* = SourceLocation.invalid; + continue :next_pc; + } + cu = &d.compile_unit_list.items[cu_i]; + line_table_i = 0; + range = cu.pc_range orelse { + out.* = SourceLocation.invalid; + continue :next_pc; + }; + } + if (pc < range.start) { + out.* = SourceLocation.invalid; + continue :next_pc; + } + if (line_table_i == 0) { + line_table_i = 1; + cov.mutex.unlock(); + defer cov.mutex.lock(); + d.populateSrcLocCache(gpa, cu) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => { + out.* = SourceLocation.invalid; + cu_i += 1; + if (cu_i < d.compile_unit_list.items.len) { + cu = &d.compile_unit_list.items[cu_i]; + line_table_i = 0; + if (cu.pc_range) |r| range = r; + } + continue :next_pc; + }, + else => |e| return e, + }; + } + const slc = &cu.src_loc_cache.?; + const table_addrs = slc.line_table.keys(); + while (line_table_i < table_addrs.len and table_addrs[line_table_i] < pc) line_table_i += 1; + + const entry = slc.line_table.values()[line_table_i - 1]; + const corrected_file_index = entry.file - @intFromBool(slc.version < 5); + const file_entry = slc.files[corrected_file_index]; + const dir_path = slc.directories[file_entry.dir_index].path; + try cov.string_bytes.ensureUnusedCapacity(gpa, dir_path.len + file_entry.path.len + 2); + const dir_gop = try cov.directories.getOrPutContextAdapted(gpa, dir_path, String.SliceAdapter{ + .string_bytes = cov.string_bytes.items, + }, String.MapContext{ + .string_bytes = cov.string_bytes.items, + }); + if (!dir_gop.found_existing) + dir_gop.key_ptr.* = addStringAssumeCapacity(cov, dir_path); + const file_gop = try cov.files.getOrPutContextAdapted(gpa, File.SliceAdapter.Entry{ + .directory_index = @intCast(dir_gop.index), + .basename = file_entry.path, + }, File.SliceAdapter{ + .string_bytes = cov.string_bytes.items, + }, File.MapContext{ + .string_bytes = cov.string_bytes.items, + }); + if (!file_gop.found_existing) file_gop.key_ptr.* = .{ + .directory_index = @intCast(dir_gop.index), + .basename = addStringAssumeCapacity(cov, file_entry.path), + }; + out.* = .{ + .file = @enumFromInt(file_gop.index), + .line = entry.line, + .column = entry.column, + }; + } +} + +pub fn addStringAssumeCapacity(cov: *Coverage, s: []const u8) String { + const result: String = @enumFromInt(cov.string_bytes.items.len); + cov.string_bytes.appendSliceAssumeCapacity(s); + cov.string_bytes.appendAssumeCapacity(0); + return result; +} + +fn span(s: []const u8) [:0]const u8 { + return std.mem.sliceTo(@as([:0]const u8, @ptrCast(s)), 0); +} diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 991c7315492c..e3d4ab1a8fe4 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -12,6 +12,8 @@ const native_endian = builtin.cpu.arch.endian(); const std = @import("../std.zig"); const Allocator = std.mem.Allocator; +const elf = std.elf; +const mem = std.mem; const DW = std.dwarf; const AT = DW.AT; const EH = DW.EH; @@ -22,11 +24,10 @@ const UT = DW.UT; const assert = std.debug.assert; const cast = std.math.cast; const maxInt = std.math.maxInt; -const readInt = std.mem.readInt; const MemoryAccessor = std.debug.MemoryAccessor; +const Path = std.Build.Cache.Path; -/// Did I mention this is deprecated? -const DeprecatedFixedBufferReader = std.debug.DeprecatedFixedBufferReader; +const FixedBufferReader = std.debug.FixedBufferReader; const Dwarf = @This(); @@ -37,6 +38,7 @@ pub const call_frame = @import("Dwarf/call_frame.zig"); endian: std.builtin.Endian, sections: SectionArray = null_section_array, is_macho: bool, +compile_units_sorted: bool, // Filled later by the initializer abbrev_table_list: std.ArrayListUnmanaged(Abbrev.Table) = .{}, @@ -136,6 +138,34 @@ pub const CompileUnit = struct { rnglists_base: usize, loclists_base: usize, frame_base: ?*const FormValue, + + src_loc_cache: ?SrcLocCache, + + pub const SrcLocCache = struct { + line_table: LineTable, + directories: []const FileEntry, + files: []FileEntry, + version: u16, + + pub const LineTable = std.AutoArrayHashMapUnmanaged(u64, LineEntry); + + pub const LineEntry = struct { + line: u32, + column: u32, + /// Offset by 1 depending on whether Dwarf version is >= 5. + file: u32, + }; + + pub fn findSource(slc: *const SrcLocCache, address: u64) !LineEntry { + const index = std.sort.upperBound(u64, slc.line_table.keys(), address, struct { + fn order(context: u64, item: u64) std.math.Order { + return std.math.order(item, context); + } + }.order); + if (index == 0) return missing(); + return slc.line_table.values()[index - 1]; + } + }; }; pub const FormValue = union(enum) { @@ -252,13 +282,13 @@ pub const Die = struct { .@"32" => { const byte_offset = compile_unit.str_offsets_base + 4 * index; if (byte_offset + 4 > debug_str_offsets.len) return bad(); - const offset = readInt(u32, debug_str_offsets[byte_offset..][0..4], di.endian); + const offset = mem.readInt(u32, debug_str_offsets[byte_offset..][0..4], di.endian); return getStringGeneric(opt_str, offset); }, .@"64" => { const byte_offset = compile_unit.str_offsets_base + 8 * index; if (byte_offset + 8 > debug_str_offsets.len) return bad(); - const offset = readInt(u64, debug_str_offsets[byte_offset..][0..8], di.endian); + const offset = mem.readInt(u64, debug_str_offsets[byte_offset..][0..8], di.endian); return getStringGeneric(opt_str, offset); }, } @@ -325,7 +355,7 @@ pub const ExceptionFrameHeader = struct { var left: usize = 0; var len: usize = self.fde_count; - var fbr: DeprecatedFixedBufferReader = .{ .buf = self.entries, .endian = native_endian }; + var fbr: FixedBufferReader = .{ .buf = self.entries, .endian = native_endian }; while (len > 1) { const mid = left + len / 2; @@ -368,7 +398,7 @@ pub const ExceptionFrameHeader = struct { const eh_frame = @as([*]const u8, @ptrFromInt(self.eh_frame_ptr))[0 .. eh_frame_len orelse maxInt(u32)]; const fde_offset = fde_ptr - self.eh_frame_ptr; - var eh_frame_fbr: DeprecatedFixedBufferReader = .{ + var eh_frame_fbr: FixedBufferReader = .{ .buf = eh_frame, .pos = fde_offset, .endian = native_endian, @@ -426,9 +456,9 @@ pub const EntryHeader = struct { } /// Reads a header for either an FDE or a CIE, then advances the fbr to the position after the trailing structure. - /// `fbr` must be a DeprecatedFixedBufferReader backed by either the .eh_frame or .debug_frame sections. + /// `fbr` must be a FixedBufferReader backed by either the .eh_frame or .debug_frame sections. pub fn read( - fbr: *DeprecatedFixedBufferReader, + fbr: *FixedBufferReader, opt_ma: ?*MemoryAccessor, dwarf_section: Section.Id, ) !EntryHeader { @@ -541,7 +571,7 @@ pub const CommonInformationEntry = struct { ) !CommonInformationEntry { if (addr_size_bytes > 8) return error.UnsupportedAddrSize; - var fbr: DeprecatedFixedBufferReader = .{ .buf = cie_bytes, .endian = endian }; + var fbr: FixedBufferReader = .{ .buf = cie_bytes, .endian = endian }; const version = try fbr.readByte(); switch (dwarf_section) { @@ -675,7 +705,7 @@ pub const FrameDescriptionEntry = struct { ) !FrameDescriptionEntry { if (addr_size_bytes > 8) return error.InvalidAddrSize; - var fbr: DeprecatedFixedBufferReader = .{ .buf = fde_bytes, .endian = endian }; + var fbr: FixedBufferReader = .{ .buf = fde_bytes, .endian = endian }; const pc_begin = try readEhPointer(&fbr, cie.fde_pointer_enc, addr_size_bytes, .{ .pc_rel_base = try pcRelBase(@intFromPtr(&fde_bytes[fbr.pos]), pc_rel_offset), @@ -721,12 +751,14 @@ const num_sections = std.enums.directEnumArrayLen(Section.Id, 0); pub const SectionArray = [num_sections]?Section; pub const null_section_array = [_]?Section{null} ** num_sections; +pub const OpenError = ScanError; + /// Initialize DWARF info. The caller has the responsibility to initialize most /// the `Dwarf` fields before calling. `binary_mem` is the raw bytes of the /// main binary file (not the secondary debug info file). -pub fn open(di: *Dwarf, allocator: Allocator) !void { - try di.scanAllFunctions(allocator); - try di.scanAllCompileUnits(allocator); +pub fn open(d: *Dwarf, gpa: Allocator) OpenError!void { + try d.scanAllFunctions(gpa); + try d.scanAllCompileUnits(gpa); } const PcRange = struct { @@ -747,21 +779,26 @@ pub fn sectionVirtualOffset(di: Dwarf, dwarf_section: Section.Id, base_address: return if (di.sections[@intFromEnum(dwarf_section)]) |s| s.virtualOffset(base_address) else null; } -pub fn deinit(di: *Dwarf, allocator: Allocator) void { +pub fn deinit(di: *Dwarf, gpa: Allocator) void { for (di.sections) |opt_section| { - if (opt_section) |s| if (s.owned) allocator.free(s.data); + if (opt_section) |s| if (s.owned) gpa.free(s.data); } for (di.abbrev_table_list.items) |*abbrev| { - abbrev.deinit(allocator); + abbrev.deinit(gpa); } - di.abbrev_table_list.deinit(allocator); + di.abbrev_table_list.deinit(gpa); for (di.compile_unit_list.items) |*cu| { - cu.die.deinit(allocator); + if (cu.src_loc_cache) |*slc| { + slc.line_table.deinit(gpa); + gpa.free(slc.directories); + gpa.free(slc.files); + } + cu.die.deinit(gpa); } - di.compile_unit_list.deinit(allocator); - di.func_list.deinit(allocator); - di.cie_map.deinit(allocator); - di.fde_list.deinit(allocator); + di.compile_unit_list.deinit(gpa); + di.func_list.deinit(gpa); + di.cie_map.deinit(gpa); + di.fde_list.deinit(gpa); di.* = undefined; } @@ -777,8 +814,13 @@ pub fn getSymbolName(di: *Dwarf, address: u64) ?[]const u8 { return null; } -fn scanAllFunctions(di: *Dwarf, allocator: Allocator) !void { - var fbr: DeprecatedFixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; +pub const ScanError = error{ + InvalidDebugInfo, + MissingDebugInfo, +} || Allocator.Error || std.debug.FixedBufferReader.Error; + +fn scanAllFunctions(di: *Dwarf, allocator: Allocator) ScanError!void { + var fbr: FixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; var this_unit_offset: u64 = 0; while (this_unit_offset < fbr.buf.len) { @@ -837,6 +879,7 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) !void { .rnglists_base = 0, .loclists_base = 0, .frame_base = null, + .src_loc_cache = null, }; while (true) { @@ -964,8 +1007,8 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) !void { } } -fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) !void { - var fbr: DeprecatedFixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; +fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) ScanError!void { + var fbr: FixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; var this_unit_offset: u64 = 0; var attrs_buf = std.ArrayList(Die.Attr).init(allocator); @@ -1023,6 +1066,7 @@ fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) !void { .rnglists_base = if (compile_unit_die.getAttr(AT.rnglists_base)) |fv| try fv.getUInt(usize) else 0, .loclists_base = if (compile_unit_die.getAttr(AT.loclists_base)) |fv| try fv.getUInt(usize) else 0, .frame_base = compile_unit_die.getAttr(AT.frame_base), + .src_loc_cache = null, }; compile_unit.pc_range = x: { @@ -1052,12 +1096,45 @@ fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) !void { } } +/// Populate missing PC ranges in compilation units, and then sort them by start address. +/// Does not guarantee pc_range to be non-null because there could be missing debug info. +pub fn sortCompileUnits(d: *Dwarf) ScanError!void { + assert(!d.compile_units_sorted); + + for (d.compile_unit_list.items) |*cu| { + if (cu.pc_range != null) continue; + const ranges_value = cu.die.getAttr(AT.ranges) orelse continue; + var iter = DebugRangeIterator.init(ranges_value, d, cu) catch continue; + var start: u64 = maxInt(u64); + var end: u64 = 0; + while (try iter.next()) |range| { + start = @min(start, range.start_addr); + end = @max(end, range.end_addr); + } + if (end != 0) cu.pc_range = .{ + .start = start, + .end = end, + }; + } + + std.mem.sortUnstable(CompileUnit, d.compile_unit_list.items, {}, struct { + pub fn lessThan(ctx: void, a: CompileUnit, b: CompileUnit) bool { + _ = ctx; + const a_range = a.pc_range orelse return false; + const b_range = b.pc_range orelse return true; + return a_range.start < b_range.start; + } + }.lessThan); + + d.compile_units_sorted = true; +} + const DebugRangeIterator = struct { base_address: u64, section_type: Section.Id, di: *const Dwarf, compile_unit: *const CompileUnit, - fbr: DeprecatedFixedBufferReader, + fbr: FixedBufferReader, pub fn init(ranges_value: *const FormValue, di: *const Dwarf, compile_unit: *const CompileUnit) !@This() { const section_type = if (compile_unit.version >= 5) Section.Id.debug_rnglists else Section.Id.debug_ranges; @@ -1070,13 +1147,13 @@ const DebugRangeIterator = struct { .@"32" => { const offset_loc = @as(usize, @intCast(compile_unit.rnglists_base + 4 * idx)); if (offset_loc + 4 > debug_ranges.len) return bad(); - const offset = readInt(u32, debug_ranges[offset_loc..][0..4], di.endian); + const offset = mem.readInt(u32, debug_ranges[offset_loc..][0..4], di.endian); break :off compile_unit.rnglists_base + offset; }, .@"64" => { const offset_loc = @as(usize, @intCast(compile_unit.rnglists_base + 8 * idx)); if (offset_loc + 8 > debug_ranges.len) return bad(); - const offset = readInt(u64, debug_ranges[offset_loc..][0..8], di.endian); + const offset = mem.readInt(u64, debug_ranges[offset_loc..][0..8], di.endian); break :off compile_unit.rnglists_base + offset; }, } @@ -1199,7 +1276,8 @@ const DebugRangeIterator = struct { } }; -pub fn findCompileUnit(di: *const Dwarf, target_address: u64) !*const CompileUnit { +/// TODO: change this to binary searching the sorted compile unit list +pub fn findCompileUnit(di: *const Dwarf, target_address: u64) !*CompileUnit { for (di.compile_unit_list.items) |*compile_unit| { if (compile_unit.pc_range) |range| { if (target_address >= range.start and target_address < range.end) return compile_unit; @@ -1231,7 +1309,7 @@ fn getAbbrevTable(di: *Dwarf, allocator: Allocator, abbrev_offset: u64) !*const } fn parseAbbrevTable(di: *Dwarf, allocator: Allocator, offset: u64) !Abbrev.Table { - var fbr: DeprecatedFixedBufferReader = .{ + var fbr: FixedBufferReader = .{ .buf = di.section(.debug_abbrev).?, .pos = cast(usize, offset) orelse return bad(), .endian = di.endian, @@ -1283,11 +1361,11 @@ fn parseAbbrevTable(di: *Dwarf, allocator: Allocator, offset: u64) !Abbrev.Table } fn parseDie( - fbr: *DeprecatedFixedBufferReader, + fbr: *FixedBufferReader, attrs_buf: []Die.Attr, abbrev_table: *const Abbrev.Table, format: Format, -) !?Die { +) ScanError!?Die { const abbrev_code = try fbr.readUleb128(u64); if (abbrev_code == 0) return null; const table_entry = abbrev_table.get(abbrev_code) orelse return bad(); @@ -1309,34 +1387,36 @@ fn parseDie( }; } -pub fn getLineNumberInfo( - di: *Dwarf, - allocator: Allocator, - compile_unit: CompileUnit, - target_address: u64, -) !std.debug.SourceLocation { - const compile_unit_cwd = try compile_unit.die.getAttrString(di, AT.comp_dir, di.section(.debug_line_str), compile_unit); +fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, compile_unit: *CompileUnit) !CompileUnit.SrcLocCache { + const compile_unit_cwd = try compile_unit.die.getAttrString(d, AT.comp_dir, d.section(.debug_line_str), compile_unit.*); const line_info_offset = try compile_unit.die.getAttrSecOffset(AT.stmt_list); - var fbr: DeprecatedFixedBufferReader = .{ .buf = di.section(.debug_line).?, .endian = di.endian }; + var fbr: FixedBufferReader = .{ + .buf = d.section(.debug_line).?, + .endian = d.endian, + }; try fbr.seekTo(line_info_offset); const unit_header = try readUnitHeader(&fbr, null); if (unit_header.unit_length == 0) return missing(); + const next_offset = unit_header.header_length + unit_header.unit_length; const version = try fbr.readInt(u16); if (version < 2) return bad(); - var addr_size: u8 = switch (unit_header.format) { - .@"32" => 4, - .@"64" => 8, + const addr_size: u8, const seg_size: u8 = if (version >= 5) .{ + try fbr.readByte(), + try fbr.readByte(), + } else .{ + switch (unit_header.format) { + .@"32" => 4, + .@"64" => 8, + }, + 0, }; - var seg_size: u8 = 0; - if (version >= 5) { - addr_size = try fbr.readByte(); - seg_size = try fbr.readByte(); - } + _ = addr_size; + _ = seg_size; const prologue_length = try fbr.readAddress(unit_header.format); const prog_start_offset = fbr.pos + prologue_length; @@ -1345,8 +1425,8 @@ pub fn getLineNumberInfo( if (minimum_instruction_length == 0) return bad(); if (version >= 4) { - // maximum_operations_per_instruction - _ = try fbr.readByte(); + const maximum_operations_per_instruction = try fbr.readByte(); + _ = maximum_operations_per_instruction; } const default_is_stmt = (try fbr.readByte()) != 0; @@ -1359,18 +1439,18 @@ pub fn getLineNumberInfo( const standard_opcode_lengths = try fbr.readBytes(opcode_base - 1); - var include_directories = std.ArrayList(FileEntry).init(allocator); - defer include_directories.deinit(); - var file_entries = std.ArrayList(FileEntry).init(allocator); - defer file_entries.deinit(); + var directories: std.ArrayListUnmanaged(FileEntry) = .{}; + defer directories.deinit(gpa); + var file_entries: std.ArrayListUnmanaged(FileEntry) = .{}; + defer file_entries.deinit(gpa); if (version < 5) { - try include_directories.append(.{ .path = compile_unit_cwd }); + try directories.append(gpa, .{ .path = compile_unit_cwd }); while (true) { const dir = try fbr.readBytesTo(0); if (dir.len == 0) break; - try include_directories.append(.{ .path = dir }); + try directories.append(gpa, .{ .path = dir }); } while (true) { @@ -1379,7 +1459,7 @@ pub fn getLineNumberInfo( const dir_index = try fbr.readUleb128(u32); const mtime = try fbr.readUleb128(u64); const size = try fbr.readUleb128(u64); - try file_entries.append(.{ + try file_entries.append(gpa, .{ .path = file_name, .dir_index = dir_index, .mtime = mtime, @@ -1403,52 +1483,10 @@ pub fn getLineNumberInfo( } const directories_count = try fbr.readUleb128(usize); - try include_directories.ensureUnusedCapacity(directories_count); - { - var i: usize = 0; - while (i < directories_count) : (i += 1) { - var e: FileEntry = .{ .path = &.{} }; - for (dir_ent_fmt_buf[0..directory_entry_format_count]) |ent_fmt| { - const form_value = try parseFormValue( - &fbr, - ent_fmt.form_code, - unit_header.format, - null, - ); - switch (ent_fmt.content_type_code) { - DW.LNCT.path => e.path = try form_value.getString(di.*), - DW.LNCT.directory_index => e.dir_index = try form_value.getUInt(u32), - DW.LNCT.timestamp => e.mtime = try form_value.getUInt(u64), - DW.LNCT.size => e.size = try form_value.getUInt(u64), - DW.LNCT.MD5 => e.md5 = switch (form_value) { - .data16 => |data16| data16.*, - else => return bad(), - }, - else => continue, - } - } - include_directories.appendAssumeCapacity(e); - } - } - } - - var file_ent_fmt_buf: [10]FileEntFmt = undefined; - const file_name_entry_format_count = try fbr.readByte(); - if (file_name_entry_format_count > file_ent_fmt_buf.len) return bad(); - for (file_ent_fmt_buf[0..file_name_entry_format_count]) |*ent_fmt| { - ent_fmt.* = .{ - .content_type_code = try fbr.readUleb128(u8), - .form_code = try fbr.readUleb128(u16), - }; - } - const file_names_count = try fbr.readUleb128(usize); - try file_entries.ensureUnusedCapacity(file_names_count); - { - var i: usize = 0; - while (i < file_names_count) : (i += 1) { - var e: FileEntry = .{ .path = &.{} }; - for (file_ent_fmt_buf[0..file_name_entry_format_count]) |ent_fmt| { + for (try directories.addManyAsSlice(gpa, directories_count)) |*e| { + e.* = .{ .path = &.{} }; + for (dir_ent_fmt_buf[0..directory_entry_format_count]) |ent_fmt| { const form_value = try parseFormValue( &fbr, ent_fmt.form_code, @@ -1456,7 +1494,7 @@ pub fn getLineNumberInfo( null, ); switch (ent_fmt.content_type_code) { - DW.LNCT.path => e.path = try form_value.getString(di.*), + DW.LNCT.path => e.path = try form_value.getString(d.*), DW.LNCT.directory_index => e.dir_index = try form_value.getUInt(u32), DW.LNCT.timestamp => e.mtime = try form_value.getUInt(u64), DW.LNCT.size => e.size = try form_value.getUInt(u64), @@ -1467,17 +1505,49 @@ pub fn getLineNumberInfo( else => continue, } } - file_entries.appendAssumeCapacity(e); + } + } + + var file_ent_fmt_buf: [10]FileEntFmt = undefined; + const file_name_entry_format_count = try fbr.readByte(); + if (file_name_entry_format_count > file_ent_fmt_buf.len) return bad(); + for (file_ent_fmt_buf[0..file_name_entry_format_count]) |*ent_fmt| { + ent_fmt.* = .{ + .content_type_code = try fbr.readUleb128(u8), + .form_code = try fbr.readUleb128(u16), + }; + } + + const file_names_count = try fbr.readUleb128(usize); + try file_entries.ensureUnusedCapacity(gpa, file_names_count); + + for (try file_entries.addManyAsSlice(gpa, file_names_count)) |*e| { + e.* = .{ .path = &.{} }; + for (file_ent_fmt_buf[0..file_name_entry_format_count]) |ent_fmt| { + const form_value = try parseFormValue( + &fbr, + ent_fmt.form_code, + unit_header.format, + null, + ); + switch (ent_fmt.content_type_code) { + DW.LNCT.path => e.path = try form_value.getString(d.*), + DW.LNCT.directory_index => e.dir_index = try form_value.getUInt(u32), + DW.LNCT.timestamp => e.mtime = try form_value.getUInt(u64), + DW.LNCT.size => e.size = try form_value.getUInt(u64), + DW.LNCT.MD5 => e.md5 = switch (form_value) { + .data16 => |data16| data16.*, + else => return bad(), + }, + else => continue, + } } } } - var prog = LineNumberProgram.init( - default_is_stmt, - include_directories.items, - target_address, - version, - ); + var prog = LineNumberProgram.init(default_is_stmt, version); + var line_table: CompileUnit.SrcLocCache.LineTable = .{}; + errdefer line_table.deinit(gpa); try fbr.seekTo(prog_start_offset); @@ -1493,7 +1563,7 @@ pub fn getLineNumberInfo( switch (sub_op) { DW.LNE.end_sequence => { prog.end_sequence = true; - if (try prog.checkLineMatch(allocator, file_entries.items)) |info| return info; + try prog.addRow(gpa, &line_table); prog.reset(); }, DW.LNE.set_address => { @@ -1505,7 +1575,7 @@ pub fn getLineNumberInfo( const dir_index = try fbr.readUleb128(u32); const mtime = try fbr.readUleb128(u64); const size = try fbr.readUleb128(u64); - try file_entries.append(.{ + try file_entries.append(gpa, .{ .path = path, .dir_index = dir_index, .mtime = mtime, @@ -1521,12 +1591,12 @@ pub fn getLineNumberInfo( const inc_line = @as(i32, line_base) + @as(i32, adjusted_opcode % line_range); prog.line += inc_line; prog.address += inc_addr; - if (try prog.checkLineMatch(allocator, file_entries.items)) |info| return info; + try prog.addRow(gpa, &line_table); prog.basic_block = false; } else { switch (opcode) { DW.LNS.copy => { - if (try prog.checkLineMatch(allocator, file_entries.items)) |info| return info; + try prog.addRow(gpa, &line_table); prog.basic_block = false; }, DW.LNS.advance_pc => { @@ -1568,7 +1638,39 @@ pub fn getLineNumberInfo( } } - return missing(); + return .{ + .line_table = line_table, + .directories = try directories.toOwnedSlice(gpa), + .files = try file_entries.toOwnedSlice(gpa), + .version = version, + }; +} + +pub fn populateSrcLocCache(d: *Dwarf, gpa: Allocator, cu: *CompileUnit) ScanError!void { + if (cu.src_loc_cache != null) return; + cu.src_loc_cache = try runLineNumberProgram(d, gpa, cu); +} + +pub fn getLineNumberInfo( + d: *Dwarf, + gpa: Allocator, + compile_unit: *CompileUnit, + target_address: u64, +) !std.debug.SourceLocation { + try populateSrcLocCache(d, gpa, compile_unit); + const slc = &compile_unit.src_loc_cache.?; + const entry = try slc.findSource(target_address); + const file_index = entry.file - @intFromBool(slc.version < 5); + if (file_index >= slc.files.len) return bad(); + const file_entry = &slc.files[file_index]; + if (file_entry.dir_index >= slc.directories.len) return bad(); + const dir_name = slc.directories[file_entry.dir_index].path; + const file_name = try std.fs.path.join(gpa, &.{ dir_name, file_entry.path }); + return .{ + .line = entry.line, + .column = entry.column, + .file_name = file_name, + }; } fn getString(di: Dwarf, offset: u64) ![:0]const u8 { @@ -1588,7 +1690,7 @@ fn readDebugAddr(di: Dwarf, compile_unit: CompileUnit, index: u64) !u64 { // The header is 8 or 12 bytes depending on is_64. if (compile_unit.addr_base < 8) return bad(); - const version = readInt(u16, debug_addr[compile_unit.addr_base - 4 ..][0..2], di.endian); + const version = mem.readInt(u16, debug_addr[compile_unit.addr_base - 4 ..][0..2], di.endian); if (version != 5) return bad(); const addr_size = debug_addr[compile_unit.addr_base - 2]; @@ -1598,9 +1700,9 @@ fn readDebugAddr(di: Dwarf, compile_unit: CompileUnit, index: u64) !u64 { if (byte_offset + addr_size > debug_addr.len) return bad(); return switch (addr_size) { 1 => debug_addr[byte_offset], - 2 => readInt(u16, debug_addr[byte_offset..][0..2], di.endian), - 4 => readInt(u32, debug_addr[byte_offset..][0..4], di.endian), - 8 => readInt(u64, debug_addr[byte_offset..][0..8], di.endian), + 2 => mem.readInt(u16, debug_addr[byte_offset..][0..2], di.endian), + 4 => mem.readInt(u32, debug_addr[byte_offset..][0..4], di.endian), + 8 => mem.readInt(u64, debug_addr[byte_offset..][0..8], di.endian), else => bad(), }; } @@ -1611,7 +1713,7 @@ fn readDebugAddr(di: Dwarf, compile_unit: CompileUnit, index: u64) !u64 { /// of FDEs is built for binary searching during unwinding. pub fn scanAllUnwindInfo(di: *Dwarf, allocator: Allocator, base_address: usize) !void { if (di.section(.eh_frame_hdr)) |eh_frame_hdr| blk: { - var fbr: DeprecatedFixedBufferReader = .{ .buf = eh_frame_hdr, .endian = native_endian }; + var fbr: FixedBufferReader = .{ .buf = eh_frame_hdr, .endian = native_endian }; const version = try fbr.readByte(); if (version != 1) break :blk; @@ -1651,7 +1753,7 @@ pub fn scanAllUnwindInfo(di: *Dwarf, allocator: Allocator, base_address: usize) const frame_sections = [2]Section.Id{ .eh_frame, .debug_frame }; for (frame_sections) |frame_section| { if (di.section(frame_section)) |section_data| { - var fbr: DeprecatedFixedBufferReader = .{ .buf = section_data, .endian = di.endian }; + var fbr: FixedBufferReader = .{ .buf = section_data, .endian = di.endian }; while (fbr.pos < fbr.buf.len) { const entry_header = try EntryHeader.read(&fbr, null, frame_section); switch (entry_header.type) { @@ -1695,11 +1797,11 @@ pub fn scanAllUnwindInfo(di: *Dwarf, allocator: Allocator, base_address: usize) } fn parseFormValue( - fbr: *DeprecatedFixedBufferReader, + fbr: *FixedBufferReader, form_id: u64, format: Format, implicit_const: ?i64, -) anyerror!FormValue { +) ScanError!FormValue { return switch (form_id) { FORM.addr => .{ .addr = try fbr.readAddress(switch (@bitSizeOf(usize)) { 32 => .@"32", @@ -1783,17 +1885,6 @@ const LineNumberProgram = struct { end_sequence: bool, default_is_stmt: bool, - target_address: u64, - include_dirs: []const FileEntry, - - prev_valid: bool, - prev_address: u64, - prev_file: usize, - prev_line: i64, - prev_column: u64, - prev_is_stmt: bool, - prev_basic_block: bool, - prev_end_sequence: bool, // Reset the state machine following the DWARF specification pub fn reset(self: *LineNumberProgram) void { @@ -1804,24 +1895,10 @@ const LineNumberProgram = struct { self.is_stmt = self.default_is_stmt; self.basic_block = false; self.end_sequence = false; - // Invalidate all the remaining fields - self.prev_valid = false; - self.prev_address = 0; - self.prev_file = undefined; - self.prev_line = undefined; - self.prev_column = undefined; - self.prev_is_stmt = undefined; - self.prev_basic_block = undefined; - self.prev_end_sequence = undefined; } - pub fn init( - is_stmt: bool, - include_dirs: []const FileEntry, - target_address: u64, - version: u16, - ) LineNumberProgram { - return LineNumberProgram{ + pub fn init(is_stmt: bool, version: u16) LineNumberProgram { + return .{ .address = 0, .file = 1, .line = 1, @@ -1830,60 +1907,17 @@ const LineNumberProgram = struct { .is_stmt = is_stmt, .basic_block = false, .end_sequence = false, - .include_dirs = include_dirs, .default_is_stmt = is_stmt, - .target_address = target_address, - .prev_valid = false, - .prev_address = 0, - .prev_file = undefined, - .prev_line = undefined, - .prev_column = undefined, - .prev_is_stmt = undefined, - .prev_basic_block = undefined, - .prev_end_sequence = undefined, }; } - pub fn checkLineMatch( - self: *LineNumberProgram, - allocator: Allocator, - file_entries: []const FileEntry, - ) !?std.debug.SourceLocation { - if (self.prev_valid and - self.target_address >= self.prev_address and - self.target_address < self.address) - { - const file_index = if (self.version >= 5) self.prev_file else i: { - if (self.prev_file == 0) return missing(); - break :i self.prev_file - 1; - }; - - if (file_index >= file_entries.len) return bad(); - const file_entry = &file_entries[file_index]; - - if (file_entry.dir_index >= self.include_dirs.len) return bad(); - const dir_name = self.include_dirs[file_entry.dir_index].path; - - const file_name = try std.fs.path.join(allocator, &[_][]const u8{ - dir_name, file_entry.path, - }); - - return std.debug.SourceLocation{ - .line = if (self.prev_line >= 0) @as(u64, @intCast(self.prev_line)) else 0, - .column = self.prev_column, - .file_name = file_name, - }; - } - - self.prev_valid = true; - self.prev_address = self.address; - self.prev_file = self.file; - self.prev_line = self.line; - self.prev_column = self.column; - self.prev_is_stmt = self.is_stmt; - self.prev_basic_block = self.basic_block; - self.prev_end_sequence = self.end_sequence; - return null; + pub fn addRow(prog: *LineNumberProgram, gpa: Allocator, table: *CompileUnit.SrcLocCache.LineTable) !void { + if (prog.line == 0) return; // garbage data + try table.put(gpa, prog.address, .{ + .line = cast(u32, prog.line) orelse maxInt(u32), + .column = cast(u32, prog.column) orelse maxInt(u32), + .file = cast(u32, prog.file) orelse return bad(), + }); } }; @@ -1892,7 +1926,8 @@ const UnitHeader = struct { header_length: u4, unit_length: u64, }; -fn readUnitHeader(fbr: *DeprecatedFixedBufferReader, opt_ma: ?*MemoryAccessor) !UnitHeader { + +fn readUnitHeader(fbr: *FixedBufferReader, opt_ma: ?*MemoryAccessor) ScanError!UnitHeader { return switch (try if (opt_ma) |ma| fbr.readIntChecked(u32, ma) else fbr.readInt(u32)) { 0...0xfffffff0 - 1 => |unit_length| .{ .format = .@"32", @@ -1957,7 +1992,7 @@ const EhPointerContext = struct { text_rel_base: ?u64 = null, function_rel_base: ?u64 = null, }; -fn readEhPointer(fbr: *DeprecatedFixedBufferReader, enc: u8, addr_size_bytes: u8, ctx: EhPointerContext) !?u64 { +fn readEhPointer(fbr: *FixedBufferReader, enc: u8, addr_size_bytes: u8, ctx: EhPointerContext) !?u64 { if (enc == EH.PE.omit) return null; const value: union(enum) { @@ -2023,3 +2058,320 @@ fn pcRelBase(field_ptr: usize, pc_rel_offset: i64) !usize { return std.math.add(usize, field_ptr, @as(usize, @intCast(pc_rel_offset))); } } + +pub const ElfModule = struct { + base_address: usize, + dwarf: Dwarf, + mapped_memory: []align(std.mem.page_size) const u8, + external_mapped_memory: ?[]align(std.mem.page_size) const u8, + + pub fn deinit(self: *@This(), allocator: Allocator) void { + self.dwarf.deinit(allocator); + std.posix.munmap(self.mapped_memory); + if (self.external_mapped_memory) |m| std.posix.munmap(m); + } + + pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol { + // Translate the VA into an address into this object + const relocated_address = address - self.base_address; + return self.dwarf.getSymbol(allocator, relocated_address); + } + + pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { + _ = allocator; + _ = address; + return &self.dwarf; + } + + pub const LoadError = error{ + InvalidDebugInfo, + MissingDebugInfo, + InvalidElfMagic, + InvalidElfVersion, + InvalidElfEndian, + /// TODO: implement this and then remove this error code + UnimplementedDwarfForeignEndian, + /// The debug info may be valid but this implementation uses memory + /// mapping which limits things to usize. If the target debug info is + /// 64-bit and host is 32-bit, there may be debug info that is not + /// supportable using this method. + Overflow, + + PermissionDenied, + LockedMemoryLimitExceeded, + MemoryMappingNotSupported, + } || Allocator.Error || std.fs.File.OpenError || OpenError; + + /// Reads debug info from an already mapped ELF file. + /// + /// If the required sections aren't present but a reference to external debug + /// info is, then this this function will recurse to attempt to load the debug + /// sections from an external file. + pub fn load( + gpa: Allocator, + mapped_mem: []align(std.mem.page_size) const u8, + build_id: ?[]const u8, + expected_crc: ?u32, + parent_sections: *Dwarf.SectionArray, + parent_mapped_mem: ?[]align(std.mem.page_size) const u8, + elf_filename: ?[]const u8, + ) LoadError!Dwarf.ElfModule { + if (expected_crc) |crc| if (crc != std.hash.crc.Crc32.hash(mapped_mem)) return error.InvalidDebugInfo; + + const hdr: *const elf.Ehdr = @ptrCast(&mapped_mem[0]); + if (!mem.eql(u8, hdr.e_ident[0..4], elf.MAGIC)) return error.InvalidElfMagic; + if (hdr.e_ident[elf.EI_VERSION] != 1) return error.InvalidElfVersion; + + const endian: std.builtin.Endian = switch (hdr.e_ident[elf.EI_DATA]) { + elf.ELFDATA2LSB => .little, + elf.ELFDATA2MSB => .big, + else => return error.InvalidElfEndian, + }; + if (endian != native_endian) return error.UnimplementedDwarfForeignEndian; + + const shoff = hdr.e_shoff; + const str_section_off = shoff + @as(u64, hdr.e_shentsize) * @as(u64, hdr.e_shstrndx); + const str_shdr: *const elf.Shdr = @ptrCast(@alignCast(&mapped_mem[cast(usize, str_section_off) orelse return error.Overflow])); + const header_strings = mapped_mem[str_shdr.sh_offset..][0..str_shdr.sh_size]; + const shdrs = @as( + [*]const elf.Shdr, + @ptrCast(@alignCast(&mapped_mem[shoff])), + )[0..hdr.e_shnum]; + + var sections: Dwarf.SectionArray = Dwarf.null_section_array; + + // Combine section list. This takes ownership over any owned sections from the parent scope. + for (parent_sections, §ions) |*parent, *section_elem| { + if (parent.*) |*p| { + section_elem.* = p.*; + p.owned = false; + } + } + errdefer for (sections) |opt_section| if (opt_section) |s| if (s.owned) gpa.free(s.data); + + var separate_debug_filename: ?[]const u8 = null; + var separate_debug_crc: ?u32 = null; + + for (shdrs) |*shdr| { + if (shdr.sh_type == elf.SHT_NULL or shdr.sh_type == elf.SHT_NOBITS) continue; + const name = mem.sliceTo(header_strings[shdr.sh_name..], 0); + + if (mem.eql(u8, name, ".gnu_debuglink")) { + const gnu_debuglink = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); + const debug_filename = mem.sliceTo(@as([*:0]const u8, @ptrCast(gnu_debuglink.ptr)), 0); + const crc_offset = mem.alignForward(usize, @intFromPtr(&debug_filename[debug_filename.len]) + 1, 4) - @intFromPtr(gnu_debuglink.ptr); + const crc_bytes = gnu_debuglink[crc_offset..][0..4]; + separate_debug_crc = mem.readInt(u32, crc_bytes, native_endian); + separate_debug_filename = debug_filename; + continue; + } + + var section_index: ?usize = null; + inline for (@typeInfo(Dwarf.Section.Id).Enum.fields, 0..) |sect, i| { + if (mem.eql(u8, "." ++ sect.name, name)) section_index = i; + } + if (section_index == null) continue; + if (sections[section_index.?] != null) continue; + + const section_bytes = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); + sections[section_index.?] = if ((shdr.sh_flags & elf.SHF_COMPRESSED) > 0) blk: { + var section_stream = std.io.fixedBufferStream(section_bytes); + const section_reader = section_stream.reader(); + const chdr = section_reader.readStruct(elf.Chdr) catch continue; + if (chdr.ch_type != .ZLIB) continue; + + var zlib_stream = std.compress.zlib.decompressor(section_reader); + + const decompressed_section = try gpa.alloc(u8, chdr.ch_size); + errdefer gpa.free(decompressed_section); + + const read = zlib_stream.reader().readAll(decompressed_section) catch continue; + assert(read == decompressed_section.len); + + break :blk .{ + .data = decompressed_section, + .virtual_address = shdr.sh_addr, + .owned = true, + }; + } else .{ + .data = section_bytes, + .virtual_address = shdr.sh_addr, + .owned = false, + }; + } + + const missing_debug_info = + sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; + + // Attempt to load debug info from an external file + // See: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html + if (missing_debug_info) { + + // Only allow one level of debug info nesting + if (parent_mapped_mem) |_| { + return error.MissingDebugInfo; + } + + const global_debug_directories = [_][]const u8{ + "/usr/lib/debug", + }; + + // /.build-id/<2-character id prefix>/.debug + if (build_id) |id| blk: { + if (id.len < 3) break :blk; + + // Either md5 (16 bytes) or sha1 (20 bytes) are used here in practice + const extension = ".debug"; + var id_prefix_buf: [2]u8 = undefined; + var filename_buf: [38 + extension.len]u8 = undefined; + + _ = std.fmt.bufPrint(&id_prefix_buf, "{s}", .{std.fmt.fmtSliceHexLower(id[0..1])}) catch unreachable; + const filename = std.fmt.bufPrint( + &filename_buf, + "{s}" ++ extension, + .{std.fmt.fmtSliceHexLower(id[1..])}, + ) catch break :blk; + + for (global_debug_directories) |global_directory| { + const path: Path = .{ + .root_dir = std.Build.Cache.Directory.cwd(), + .sub_path = try std.fs.path.join(gpa, &.{ + global_directory, ".build-id", &id_prefix_buf, filename, + }), + }; + defer gpa.free(path.sub_path); + + return loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem) catch continue; + } + } + + // use the path from .gnu_debuglink, in the same search order as gdb + if (separate_debug_filename) |separate_filename| blk: { + if (elf_filename != null and mem.eql(u8, elf_filename.?, separate_filename)) + return error.MissingDebugInfo; + + // / + if (loadPath( + gpa, + .{ + .root_dir = std.Build.Cache.Directory.cwd(), + .sub_path = separate_filename, + }, + null, + separate_debug_crc, + §ions, + mapped_mem, + )) |debug_info| { + return debug_info; + } else |_| {} + + // /.debug/ + { + const path: Path = .{ + .root_dir = std.Build.Cache.Directory.cwd(), + .sub_path = try std.fs.path.join(gpa, &.{ ".debug", separate_filename }), + }; + defer gpa.free(path.sub_path); + + if (loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} + } + + var cwd_buf: [std.fs.max_path_bytes]u8 = undefined; + const cwd_path = std.posix.realpath(".", &cwd_buf) catch break :blk; + + // // + for (global_debug_directories) |global_directory| { + const path: Path = .{ + .root_dir = std.Build.Cache.Directory.cwd(), + .sub_path = try std.fs.path.join(gpa, &.{ global_directory, cwd_path, separate_filename }), + }; + defer gpa.free(path.sub_path); + if (loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} + } + } + + return error.MissingDebugInfo; + } + + var di: Dwarf = .{ + .endian = endian, + .sections = sections, + .is_macho = false, + .compile_units_sorted = false, + }; + + try Dwarf.open(&di, gpa); + + return .{ + .base_address = 0, + .dwarf = di, + .mapped_memory = parent_mapped_mem orelse mapped_mem, + .external_mapped_memory = if (parent_mapped_mem != null) mapped_mem else null, + }; + } + + pub fn loadPath( + gpa: Allocator, + elf_file_path: Path, + build_id: ?[]const u8, + expected_crc: ?u32, + parent_sections: *Dwarf.SectionArray, + parent_mapped_mem: ?[]align(std.mem.page_size) const u8, + ) LoadError!Dwarf.ElfModule { + const elf_file = elf_file_path.root_dir.handle.openFile(elf_file_path.sub_path, .{}) catch |err| switch (err) { + error.FileNotFound => return missing(), + else => return err, + }; + defer elf_file.close(); + + const end_pos = elf_file.getEndPos() catch return bad(); + const file_len = cast(usize, end_pos) orelse return error.Overflow; + + const mapped_mem = try std.posix.mmap( + null, + file_len, + std.posix.PROT.READ, + .{ .TYPE = .SHARED }, + elf_file.handle, + 0, + ); + errdefer std.posix.munmap(mapped_mem); + + return load( + gpa, + mapped_mem, + build_id, + expected_crc, + parent_sections, + parent_mapped_mem, + elf_file_path.sub_path, + ); + } +}; + +pub fn getSymbol(di: *Dwarf, allocator: Allocator, address: u64) !std.debug.Symbol { + if (di.findCompileUnit(address)) |compile_unit| { + return .{ + .name = di.getSymbolName(address) orelse "???", + .compile_unit_name = compile_unit.die.getAttrString(di, std.dwarf.AT.name, di.section(.debug_str), compile_unit.*) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => "???", + }, + .source_location = di.getLineNumberInfo(allocator, compile_unit, address) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => null, + else => return err, + }, + }; + } else |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => return .{}, + else => return err, + } +} + +pub fn chopSlice(ptr: []const u8, offset: u64, size: u64) error{Overflow}![]const u8 { + const start = cast(usize, offset) orelse return error.Overflow; + const end = start + (cast(usize, size) orelse return error.Overflow); + return ptr[start..end]; +} diff --git a/lib/std/debug/FixedBufferReader.zig b/lib/std/debug/FixedBufferReader.zig new file mode 100644 index 000000000000..494245a9e979 --- /dev/null +++ b/lib/std/debug/FixedBufferReader.zig @@ -0,0 +1,93 @@ +//! Optimized for performance in debug builds. + +const std = @import("../std.zig"); +const MemoryAccessor = std.debug.MemoryAccessor; + +const FixedBufferReader = @This(); + +buf: []const u8, +pos: usize = 0, +endian: std.builtin.Endian, + +pub const Error = error{ EndOfBuffer, Overflow, InvalidBuffer }; + +pub fn seekTo(fbr: *FixedBufferReader, pos: u64) Error!void { + if (pos > fbr.buf.len) return error.EndOfBuffer; + fbr.pos = @intCast(pos); +} + +pub fn seekForward(fbr: *FixedBufferReader, amount: u64) Error!void { + if (fbr.buf.len - fbr.pos < amount) return error.EndOfBuffer; + fbr.pos += @intCast(amount); +} + +pub inline fn readByte(fbr: *FixedBufferReader) Error!u8 { + if (fbr.pos >= fbr.buf.len) return error.EndOfBuffer; + defer fbr.pos += 1; + return fbr.buf[fbr.pos]; +} + +pub fn readByteSigned(fbr: *FixedBufferReader) Error!i8 { + return @bitCast(try fbr.readByte()); +} + +pub fn readInt(fbr: *FixedBufferReader, comptime T: type) Error!T { + const size = @divExact(@typeInfo(T).Int.bits, 8); + if (fbr.buf.len - fbr.pos < size) return error.EndOfBuffer; + defer fbr.pos += size; + return std.mem.readInt(T, fbr.buf[fbr.pos..][0..size], fbr.endian); +} + +pub fn readIntChecked( + fbr: *FixedBufferReader, + comptime T: type, + ma: *MemoryAccessor, +) Error!T { + if (ma.load(T, @intFromPtr(fbr.buf[fbr.pos..].ptr)) == null) + return error.InvalidBuffer; + + return fbr.readInt(T); +} + +pub fn readUleb128(fbr: *FixedBufferReader, comptime T: type) Error!T { + return std.leb.readUleb128(T, fbr); +} + +pub fn readIleb128(fbr: *FixedBufferReader, comptime T: type) Error!T { + return std.leb.readIleb128(T, fbr); +} + +pub fn readAddress(fbr: *FixedBufferReader, format: std.dwarf.Format) Error!u64 { + return switch (format) { + .@"32" => try fbr.readInt(u32), + .@"64" => try fbr.readInt(u64), + }; +} + +pub fn readAddressChecked( + fbr: *FixedBufferReader, + format: std.dwarf.Format, + ma: *MemoryAccessor, +) Error!u64 { + return switch (format) { + .@"32" => try fbr.readIntChecked(u32, ma), + .@"64" => try fbr.readIntChecked(u64, ma), + }; +} + +pub fn readBytes(fbr: *FixedBufferReader, len: usize) Error![]const u8 { + if (fbr.buf.len - fbr.pos < len) return error.EndOfBuffer; + defer fbr.pos += len; + return fbr.buf[fbr.pos..][0..len]; +} + +pub fn readBytesTo(fbr: *FixedBufferReader, comptime sentinel: u8) Error![:sentinel]const u8 { + const end = @call(.always_inline, std.mem.indexOfScalarPos, .{ + u8, + fbr.buf, + fbr.pos, + sentinel, + }) orelse return error.EndOfBuffer; + defer fbr.pos = end + 1; + return fbr.buf[fbr.pos..end :sentinel]; +} diff --git a/lib/std/debug/Info.zig b/lib/std/debug/Info.zig new file mode 100644 index 000000000000..ee191d2c128d --- /dev/null +++ b/lib/std/debug/Info.zig @@ -0,0 +1,62 @@ +//! Cross-platform abstraction for loading debug information into an in-memory +//! format that supports queries such as "what is the source location of this +//! virtual memory address?" +//! +//! Unlike `std.debug.SelfInfo`, this API does not assume the debug information +//! in question happens to match the host CPU architecture, OS, or other target +//! properties. + +const std = @import("../std.zig"); +const Allocator = std.mem.Allocator; +const Path = std.Build.Cache.Path; +const Dwarf = std.debug.Dwarf; +const page_size = std.mem.page_size; +const assert = std.debug.assert; +const Coverage = std.debug.Coverage; +const SourceLocation = std.debug.Coverage.SourceLocation; + +const Info = @This(); + +/// Sorted by key, ascending. +address_map: std.AutoArrayHashMapUnmanaged(u64, Dwarf.ElfModule), +/// Externally managed, outlives this `Info` instance. +coverage: *Coverage, + +pub const LoadError = Dwarf.ElfModule.LoadError; + +pub fn load(gpa: Allocator, path: Path, coverage: *Coverage) LoadError!Info { + var sections: Dwarf.SectionArray = Dwarf.null_section_array; + var elf_module = try Dwarf.ElfModule.loadPath(gpa, path, null, null, §ions, null); + try elf_module.dwarf.sortCompileUnits(); + var info: Info = .{ + .address_map = .{}, + .coverage = coverage, + }; + try info.address_map.put(gpa, elf_module.base_address, elf_module); + return info; +} + +pub fn deinit(info: *Info, gpa: Allocator) void { + for (info.address_map.values()) |*elf_module| { + elf_module.dwarf.deinit(gpa); + } + info.address_map.deinit(gpa); + info.* = undefined; +} + +pub const ResolveAddressesError = Coverage.ResolveAddressesDwarfError; + +/// Given an array of virtual memory addresses, sorted ascending, outputs a +/// corresponding array of source locations. +pub fn resolveAddresses( + info: *Info, + gpa: Allocator, + sorted_pc_addrs: []const u64, + /// Asserts its length equals length of `sorted_pc_addrs`. + output: []SourceLocation, +) ResolveAddressesError!void { + assert(sorted_pc_addrs.len == output.len); + if (info.address_map.entries.len != 1) @panic("TODO"); + const elf_module = &info.address_map.values()[0]; + return info.coverage.resolveAddressesDwarf(gpa, sorted_pc_addrs, output, &elf_module.dwarf); +} diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index f9747a088ea7..2d87243c5d37 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -587,7 +587,7 @@ pub const Module = switch (native_os) { } if (section_index == null) continue; - const section_bytes = try chopSlice(mapped_mem, sect.offset, sect.size); + const section_bytes = try Dwarf.chopSlice(mapped_mem, sect.offset, sect.size); sections[section_index.?] = .{ .data = section_bytes, .virtual_address = sect.addr, @@ -602,10 +602,11 @@ pub const Module = switch (native_os) { sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; if (missing_debug_info) return error.MissingDebugInfo; - var di = Dwarf{ + var di: Dwarf = .{ .endian = .little, .sections = sections, .is_macho = true, + .compile_units_sorted = false, }; try Dwarf.open(&di, allocator); @@ -622,7 +623,7 @@ pub const Module = switch (native_os) { return result.value_ptr; } - pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo { + pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol { nosuspend { const result = try self.getOFileInfoForAddress(allocator, address); if (result.symbol == null) return .{}; @@ -630,19 +631,19 @@ pub const Module = switch (native_os) { // Take the symbol name from the N_FUN STAB entry, we're going to // use it if we fail to find the DWARF infos const stab_symbol = mem.sliceTo(self.strings[result.symbol.?.strx..], 0); - if (result.o_file_info == null) return .{ .symbol_name = stab_symbol }; + if (result.o_file_info == null) return .{ .name = stab_symbol }; // Translate again the address, this time into an address inside the // .o file const relocated_address_o = result.o_file_info.?.addr_table.get(stab_symbol) orelse return .{ - .symbol_name = "???", + .name = "???", }; const addr_off = result.relocated_address - result.symbol.?.addr; const o_file_di = &result.o_file_info.?.di; if (o_file_di.findCompileUnit(relocated_address_o)) |compile_unit| { - return SymbolInfo{ - .symbol_name = o_file_di.getSymbolName(relocated_address_o) orelse "???", + return .{ + .name = o_file_di.getSymbolName(relocated_address_o) orelse "???", .compile_unit_name = compile_unit.die.getAttrString( o_file_di, std.dwarf.AT.name, @@ -651,9 +652,9 @@ pub const Module = switch (native_os) { ) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => "???", }, - .line_info = o_file_di.getLineNumberInfo( + .source_location = o_file_di.getLineNumberInfo( allocator, - compile_unit.*, + compile_unit, relocated_address_o + addr_off, ) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => null, @@ -662,7 +663,7 @@ pub const Module = switch (native_os) { }; } else |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => { - return SymbolInfo{ .symbol_name = stab_symbol }; + return .{ .name = stab_symbol }; }, else => return err, } @@ -729,7 +730,7 @@ pub const Module = switch (native_os) { } } - fn getSymbolFromPdb(self: *@This(), relocated_address: usize) !?SymbolInfo { + fn getSymbolFromPdb(self: *@This(), relocated_address: usize) !?std.debug.Symbol { var coff_section: *align(1) const coff.SectionHeader = undefined; const mod_index = for (self.pdb.?.sect_contribs) |sect_contrib| { if (sect_contrib.Section > self.coff_section_headers.len) continue; @@ -759,14 +760,14 @@ pub const Module = switch (native_os) { relocated_address - coff_section.virtual_address, ); - return SymbolInfo{ - .symbol_name = symbol_name, + return .{ + .name = symbol_name, .compile_unit_name = obj_basename, - .line_info = opt_line_info, + .source_location = opt_line_info, }; } - pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo { + pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol { // Translate the VA into an address into this object const relocated_address = address - self.base_address; @@ -776,10 +777,10 @@ pub const Module = switch (native_os) { if (self.dwarf) |*dwarf| { const dwarf_address = relocated_address + self.coff_image_base; - return getSymbolFromDwarf(allocator, dwarf_address, dwarf); + return dwarf.getSymbol(allocator, dwarf_address); } - return SymbolInfo{}; + return .{}; } pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { @@ -792,41 +793,18 @@ pub const Module = switch (native_os) { }; } }, - .linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => struct { - base_address: usize, - dwarf: Dwarf, - mapped_memory: []align(mem.page_size) const u8, - external_mapped_memory: ?[]align(mem.page_size) const u8, - - pub fn deinit(self: *@This(), allocator: Allocator) void { - self.dwarf.deinit(allocator); - posix.munmap(self.mapped_memory); - if (self.external_mapped_memory) |m| posix.munmap(m); - } - - pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo { - // Translate the VA into an address into this object - const relocated_address = address - self.base_address; - return getSymbolFromDwarf(allocator, relocated_address, &self.dwarf); - } - - pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { - _ = allocator; - _ = address; - return &self.dwarf; - } - }, + .linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => Dwarf.ElfModule, .wasi, .emscripten => struct { pub fn deinit(self: *@This(), allocator: Allocator) void { _ = self; _ = allocator; } - pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo { + pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol { _ = self; _ = allocator; _ = address; - return SymbolInfo{}; + return .{}; } pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { @@ -1014,10 +992,11 @@ fn readCoffDebugInfo(allocator: Allocator, coff_obj: *coff.Coff) !Module { } else null; } - var dwarf = Dwarf{ + var dwarf: Dwarf = .{ .endian = native_endian, .sections = sections, .is_macho = false, + .compile_units_sorted = false, }; try Dwarf.open(&dwarf, allocator); @@ -1068,7 +1047,7 @@ pub fn readElfDebugInfo( expected_crc: ?u32, parent_sections: *Dwarf.SectionArray, parent_mapped_mem: ?[]align(mem.page_size) const u8, -) !Module { +) !Dwarf.ElfModule { nosuspend { const elf_file = (if (elf_filename) |filename| blk: { break :blk fs.cwd().openFile(filename, .{}); @@ -1078,176 +1057,15 @@ pub fn readElfDebugInfo( }; const mapped_mem = try mapWholeFile(elf_file); - if (expected_crc) |crc| if (crc != std.hash.crc.Crc32.hash(mapped_mem)) return error.InvalidDebugInfo; - - const hdr: *const elf.Ehdr = @ptrCast(&mapped_mem[0]); - if (!mem.eql(u8, hdr.e_ident[0..4], elf.MAGIC)) return error.InvalidElfMagic; - if (hdr.e_ident[elf.EI_VERSION] != 1) return error.InvalidElfVersion; - - const endian: std.builtin.Endian = switch (hdr.e_ident[elf.EI_DATA]) { - elf.ELFDATA2LSB => .little, - elf.ELFDATA2MSB => .big, - else => return error.InvalidElfEndian, - }; - assert(endian == native_endian); // this is our own debug info - - const shoff = hdr.e_shoff; - const str_section_off = shoff + @as(u64, hdr.e_shentsize) * @as(u64, hdr.e_shstrndx); - const str_shdr: *const elf.Shdr = @ptrCast(@alignCast(&mapped_mem[math.cast(usize, str_section_off) orelse return error.Overflow])); - const header_strings = mapped_mem[str_shdr.sh_offset..][0..str_shdr.sh_size]; - const shdrs = @as( - [*]const elf.Shdr, - @ptrCast(@alignCast(&mapped_mem[shoff])), - )[0..hdr.e_shnum]; - - var sections: Dwarf.SectionArray = Dwarf.null_section_array; - - // Combine section list. This takes ownership over any owned sections from the parent scope. - for (parent_sections, §ions) |*parent, *section| { - if (parent.*) |*p| { - section.* = p.*; - p.owned = false; - } - } - errdefer for (sections) |section| if (section) |s| if (s.owned) allocator.free(s.data); - - var separate_debug_filename: ?[]const u8 = null; - var separate_debug_crc: ?u32 = null; - - for (shdrs) |*shdr| { - if (shdr.sh_type == elf.SHT_NULL or shdr.sh_type == elf.SHT_NOBITS) continue; - const name = mem.sliceTo(header_strings[shdr.sh_name..], 0); - - if (mem.eql(u8, name, ".gnu_debuglink")) { - const gnu_debuglink = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); - const debug_filename = mem.sliceTo(@as([*:0]const u8, @ptrCast(gnu_debuglink.ptr)), 0); - const crc_offset = mem.alignForward(usize, @intFromPtr(&debug_filename[debug_filename.len]) + 1, 4) - @intFromPtr(gnu_debuglink.ptr); - const crc_bytes = gnu_debuglink[crc_offset..][0..4]; - separate_debug_crc = mem.readInt(u32, crc_bytes, native_endian); - separate_debug_filename = debug_filename; - continue; - } - - var section_index: ?usize = null; - inline for (@typeInfo(Dwarf.Section.Id).Enum.fields, 0..) |section, i| { - if (mem.eql(u8, "." ++ section.name, name)) section_index = i; - } - if (section_index == null) continue; - if (sections[section_index.?] != null) continue; - - const section_bytes = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); - sections[section_index.?] = if ((shdr.sh_flags & elf.SHF_COMPRESSED) > 0) blk: { - var section_stream = std.io.fixedBufferStream(section_bytes); - var section_reader = section_stream.reader(); - const chdr = section_reader.readStruct(elf.Chdr) catch continue; - if (chdr.ch_type != .ZLIB) continue; - - var zlib_stream = std.compress.zlib.decompressor(section_stream.reader()); - - const decompressed_section = try allocator.alloc(u8, chdr.ch_size); - errdefer allocator.free(decompressed_section); - - const read = zlib_stream.reader().readAll(decompressed_section) catch continue; - assert(read == decompressed_section.len); - - break :blk .{ - .data = decompressed_section, - .virtual_address = shdr.sh_addr, - .owned = true, - }; - } else .{ - .data = section_bytes, - .virtual_address = shdr.sh_addr, - .owned = false, - }; - } - - const missing_debug_info = - sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; - - // Attempt to load debug info from an external file - // See: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html - if (missing_debug_info) { - - // Only allow one level of debug info nesting - if (parent_mapped_mem) |_| { - return error.MissingDebugInfo; - } - - const global_debug_directories = [_][]const u8{ - "/usr/lib/debug", - }; - - // /.build-id/<2-character id prefix>/.debug - if (build_id) |id| blk: { - if (id.len < 3) break :blk; - - // Either md5 (16 bytes) or sha1 (20 bytes) are used here in practice - const extension = ".debug"; - var id_prefix_buf: [2]u8 = undefined; - var filename_buf: [38 + extension.len]u8 = undefined; - - _ = std.fmt.bufPrint(&id_prefix_buf, "{s}", .{std.fmt.fmtSliceHexLower(id[0..1])}) catch unreachable; - const filename = std.fmt.bufPrint( - &filename_buf, - "{s}" ++ extension, - .{std.fmt.fmtSliceHexLower(id[1..])}, - ) catch break :blk; - - for (global_debug_directories) |global_directory| { - const path = try fs.path.join(allocator, &.{ global_directory, ".build-id", &id_prefix_buf, filename }); - defer allocator.free(path); - - return readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem) catch continue; - } - } - - // use the path from .gnu_debuglink, in the same search order as gdb - if (separate_debug_filename) |separate_filename| blk: { - if (elf_filename != null and mem.eql(u8, elf_filename.?, separate_filename)) return error.MissingDebugInfo; - - // / - if (readElfDebugInfo(allocator, separate_filename, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} - - // /.debug/ - { - const path = try fs.path.join(allocator, &.{ ".debug", separate_filename }); - defer allocator.free(path); - - if (readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} - } - - var cwd_buf: [fs.max_path_bytes]u8 = undefined; - const cwd_path = posix.realpath(".", &cwd_buf) catch break :blk; - - // // - for (global_debug_directories) |global_directory| { - const path = try fs.path.join(allocator, &.{ global_directory, cwd_path, separate_filename }); - defer allocator.free(path); - if (readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} - } - } - - return error.MissingDebugInfo; - } - - var di = Dwarf{ - .endian = endian, - .sections = sections, - .is_macho = false, - }; - - try Dwarf.open(&di, allocator); - - return .{ - .base_address = undefined, - .dwarf = di, - .mapped_memory = parent_mapped_mem orelse mapped_mem, - .external_mapped_memory = if (parent_mapped_mem != null) mapped_mem else null, - }; + return Dwarf.ElfModule.load( + allocator, + mapped_mem, + build_id, + expected_crc, + parent_sections, + parent_mapped_mem, + elf_filename, + ); } } @@ -1289,22 +1107,6 @@ fn mapWholeFile(file: File) ![]align(mem.page_size) const u8 { } } -fn chopSlice(ptr: []const u8, offset: u64, size: u64) error{Overflow}![]const u8 { - const start = math.cast(usize, offset) orelse return error.Overflow; - const end = start + (math.cast(usize, size) orelse return error.Overflow); - return ptr[start..end]; -} - -pub const SymbolInfo = struct { - symbol_name: []const u8 = "???", - compile_unit_name: []const u8 = "???", - line_info: ?std.debug.SourceLocation = null, - - pub fn deinit(self: SymbolInfo, allocator: Allocator) void { - if (self.line_info) |li| allocator.free(li.file_name); - } -}; - fn machoSearchSymbols(symbols: []const MachoSymbol, address: usize) ?*const MachoSymbol { var min: usize = 0; var max: usize = symbols.len - 1; @@ -1350,26 +1152,6 @@ test machoSearchSymbols { try testing.expectEqual(&symbols[2], machoSearchSymbols(&symbols, 5000).?); } -fn getSymbolFromDwarf(allocator: Allocator, address: u64, di: *Dwarf) !SymbolInfo { - if (nosuspend di.findCompileUnit(address)) |compile_unit| { - return SymbolInfo{ - .symbol_name = nosuspend di.getSymbolName(address) orelse "???", - .compile_unit_name = compile_unit.die.getAttrString(di, std.dwarf.AT.name, di.section(.debug_str), compile_unit.*) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => "???", - }, - .line_info = nosuspend di.getLineNumberInfo(allocator, compile_unit.*, address) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => null, - else => return err, - }, - }; - } else |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => { - return SymbolInfo{}; - }, - else => return err, - } -} - /// Unwind a frame using MachO compact unwind info (from __unwind_info). /// If the compact encoding can't encode a way to unwind a frame, it will /// defer unwinding to DWARF, in which case `.eh_frame` will be used if available. @@ -1796,7 +1578,7 @@ pub fn unwindFrameDwarf( const frame_section = di.section(dwarf_section) orelse return error.MissingFDE; if (fde_offset >= frame_section.len) return error.MissingFDE; - var fbr: std.debug.DeprecatedFixedBufferReader = .{ + var fbr: std.debug.FixedBufferReader = .{ .buf = frame_section, .pos = fde_offset, .endian = di.endian, @@ -2028,6 +1810,7 @@ fn unwindFrameMachODwarf( var di: Dwarf = .{ .endian = native_endian, .is_macho = true, + .compile_units_sorted = false, }; defer di.deinit(context.allocator); diff --git a/lib/std/http.zig b/lib/std/http.zig index 621c7a5f0d74..d5d5583299e2 100644 --- a/lib/std/http.zig +++ b/lib/std/http.zig @@ -4,6 +4,7 @@ pub const protocol = @import("http/protocol.zig"); pub const HeadParser = @import("http/HeadParser.zig"); pub const ChunkParser = @import("http/ChunkParser.zig"); pub const HeaderIterator = @import("http/HeaderIterator.zig"); +pub const WebSocket = @import("http/WebSocket.zig"); pub const Version = enum { @"HTTP/1.0", @@ -318,6 +319,7 @@ test { _ = Status; _ = HeadParser; _ = ChunkParser; + _ = WebSocket; _ = @import("http/test.zig"); } } diff --git a/lib/std/http/WebSocket.zig b/lib/std/http/WebSocket.zig new file mode 100644 index 000000000000..ad513fddf8af --- /dev/null +++ b/lib/std/http/WebSocket.zig @@ -0,0 +1,243 @@ +//! See https://tools.ietf.org/html/rfc6455 + +const builtin = @import("builtin"); +const std = @import("std"); +const WebSocket = @This(); +const assert = std.debug.assert; +const native_endian = builtin.cpu.arch.endian(); + +key: []const u8, +request: *std.http.Server.Request, +recv_fifo: std.fifo.LinearFifo(u8, .Slice), +reader: std.io.AnyReader, +response: std.http.Server.Response, +/// Number of bytes that have been peeked but not discarded yet. +outstanding_len: usize, + +pub const InitError = error{WebSocketUpgradeMissingKey} || + std.http.Server.Request.ReaderError; + +pub fn init( + ws: *WebSocket, + request: *std.http.Server.Request, + send_buffer: []u8, + recv_buffer: []align(4) u8, +) InitError!bool { + var sec_websocket_key: ?[]const u8 = null; + var upgrade_websocket: bool = false; + var it = request.iterateHeaders(); + while (it.next()) |header| { + if (std.ascii.eqlIgnoreCase(header.name, "sec-websocket-key")) { + sec_websocket_key = header.value; + } else if (std.ascii.eqlIgnoreCase(header.name, "upgrade")) { + if (!std.mem.eql(u8, header.value, "websocket")) + return false; + upgrade_websocket = true; + } + } + if (!upgrade_websocket) + return false; + + const key = sec_websocket_key orelse return error.WebSocketUpgradeMissingKey; + + var sha1 = std.crypto.hash.Sha1.init(.{}); + sha1.update(key); + sha1.update("258EAFA5-E914-47DA-95CA-C5AB0DC85B11"); + var digest: [std.crypto.hash.Sha1.digest_length]u8 = undefined; + sha1.final(&digest); + var base64_digest: [28]u8 = undefined; + assert(std.base64.standard.Encoder.encode(&base64_digest, &digest).len == base64_digest.len); + + request.head.content_length = std.math.maxInt(u64); + + ws.* = .{ + .key = key, + .recv_fifo = std.fifo.LinearFifo(u8, .Slice).init(recv_buffer), + .reader = try request.reader(), + .response = request.respondStreaming(.{ + .send_buffer = send_buffer, + .respond_options = .{ + .status = .switching_protocols, + .extra_headers = &.{ + .{ .name = "upgrade", .value = "websocket" }, + .{ .name = "connection", .value = "upgrade" }, + .{ .name = "sec-websocket-accept", .value = &base64_digest }, + }, + .transfer_encoding = .none, + }, + }), + .request = request, + .outstanding_len = 0, + }; + return true; +} + +pub const Header0 = packed struct(u8) { + opcode: Opcode, + rsv3: u1 = 0, + rsv2: u1 = 0, + rsv1: u1 = 0, + fin: bool, +}; + +pub const Header1 = packed struct(u8) { + payload_len: enum(u7) { + len16 = 126, + len64 = 127, + _, + }, + mask: bool, +}; + +pub const Opcode = enum(u4) { + continuation = 0, + text = 1, + binary = 2, + connection_close = 8, + ping = 9, + /// "A Pong frame MAY be sent unsolicited. This serves as a unidirectional + /// heartbeat. A response to an unsolicited Pong frame is not expected." + pong = 10, + _, +}; + +pub const ReadSmallTextMessageError = error{ + ConnectionClose, + UnexpectedOpCode, + MessageTooBig, + MissingMaskBit, +} || RecvError; + +pub const SmallMessage = struct { + /// Can be text, binary, or ping. + opcode: Opcode, + data: []u8, +}; + +/// Reads the next message from the WebSocket stream, failing if the message does not fit +/// into `recv_buffer`. +pub fn readSmallMessage(ws: *WebSocket) ReadSmallTextMessageError!SmallMessage { + while (true) { + const header_bytes = (try recv(ws, 2))[0..2]; + const h0: Header0 = @bitCast(header_bytes[0]); + const h1: Header1 = @bitCast(header_bytes[1]); + + switch (h0.opcode) { + .text, .binary, .pong, .ping => {}, + .connection_close => return error.ConnectionClose, + .continuation => return error.UnexpectedOpCode, + _ => return error.UnexpectedOpCode, + } + + if (!h0.fin) return error.MessageTooBig; + if (!h1.mask) return error.MissingMaskBit; + + const len: usize = switch (h1.payload_len) { + .len16 => try recvReadInt(ws, u16), + .len64 => std.math.cast(usize, try recvReadInt(ws, u64)) orelse return error.MessageTooBig, + else => @intFromEnum(h1.payload_len), + }; + if (len > ws.recv_fifo.buf.len) return error.MessageTooBig; + + const mask: u32 = @bitCast((try recv(ws, 4))[0..4].*); + const payload = try recv(ws, len); + + // Skip pongs. + if (h0.opcode == .pong) continue; + + // The last item may contain a partial word of unused data. + const floored_len = (payload.len / 4) * 4; + const u32_payload: []align(1) u32 = @alignCast(std.mem.bytesAsSlice(u32, payload[0..floored_len])); + for (u32_payload) |*elem| elem.* ^= mask; + const mask_bytes = std.mem.asBytes(&mask)[0 .. payload.len - floored_len]; + for (payload[floored_len..], mask_bytes) |*leftover, m| leftover.* ^= m; + + return .{ + .opcode = h0.opcode, + .data = payload, + }; + } +} + +const RecvError = std.http.Server.Request.ReadError || error{EndOfStream}; + +fn recv(ws: *WebSocket, len: usize) RecvError![]u8 { + ws.recv_fifo.discard(ws.outstanding_len); + assert(len <= ws.recv_fifo.buf.len); + if (len > ws.recv_fifo.count) { + const small_buf = ws.recv_fifo.writableSlice(0); + const needed = len - ws.recv_fifo.count; + const buf = if (small_buf.len >= needed) small_buf else b: { + ws.recv_fifo.realign(); + break :b ws.recv_fifo.writableSlice(0); + }; + const n = try @as(RecvError!usize, @errorCast(ws.reader.readAtLeast(buf, needed))); + if (n < needed) return error.EndOfStream; + ws.recv_fifo.update(n); + } + ws.outstanding_len = len; + // TODO: improve the std lib API so this cast isn't necessary. + return @constCast(ws.recv_fifo.readableSliceOfLen(len)); +} + +fn recvReadInt(ws: *WebSocket, comptime I: type) !I { + const unswapped: I = @bitCast((try recv(ws, @sizeOf(I)))[0..@sizeOf(I)].*); + return switch (native_endian) { + .little => @byteSwap(unswapped), + .big => unswapped, + }; +} + +pub const WriteError = std.http.Server.Response.WriteError; + +pub fn writeMessage(ws: *WebSocket, message: []const u8, opcode: Opcode) WriteError!void { + const iovecs: [1]std.posix.iovec_const = .{ + .{ .base = message.ptr, .len = message.len }, + }; + return writeMessagev(ws, &iovecs, opcode); +} + +pub fn writeMessagev(ws: *WebSocket, message: []const std.posix.iovec_const, opcode: Opcode) WriteError!void { + const total_len = l: { + var total_len: u64 = 0; + for (message) |iovec| total_len += iovec.len; + break :l total_len; + }; + + var header_buf: [2 + 8]u8 = undefined; + header_buf[0] = @bitCast(@as(Header0, .{ + .opcode = opcode, + .fin = true, + })); + const header = switch (total_len) { + 0...125 => blk: { + header_buf[1] = @bitCast(@as(Header1, .{ + .payload_len = @enumFromInt(total_len), + .mask = false, + })); + break :blk header_buf[0..2]; + }, + 126...0xffff => blk: { + header_buf[1] = @bitCast(@as(Header1, .{ + .payload_len = .len16, + .mask = false, + })); + std.mem.writeInt(u16, header_buf[2..4], @intCast(total_len), .big); + break :blk header_buf[0..4]; + }, + else => blk: { + header_buf[1] = @bitCast(@as(Header1, .{ + .payload_len = .len64, + .mask = false, + })); + std.mem.writeInt(u64, header_buf[2..10], total_len, .big); + break :blk header_buf[0..10]; + }, + }; + + const response = &ws.response; + try response.writeAll(header); + for (message) |iovec| + try response.writeAll(iovec.base[0..iovec.len]); + try response.flush(); +} diff --git a/lib/std/posix.zig b/lib/std/posix.zig index e04efbbcc061..02f2d975ddad 100644 --- a/lib/std/posix.zig +++ b/lib/std/posix.zig @@ -47,6 +47,11 @@ else switch (native_os) { .plan9 => std.os.plan9, else => struct { pub const ucontext_t = void; + pub const pid_t = void; + pub const pollfd = void; + pub const fd_t = void; + pub const uid_t = void; + pub const gid_t = void; }, }; diff --git a/lib/std/zig/Server.zig b/lib/std/zig/Server.zig index f1e564d43e02..7ce017045d96 100644 --- a/lib/std/zig/Server.zig +++ b/lib/std/zig/Server.zig @@ -28,6 +28,14 @@ pub const Message = struct { /// The remaining bytes is the file path relative to that prefix. /// The prefixes are hard-coded in Compilation.create (cwd, zig lib dir, local cache dir) file_system_inputs, + /// Body is a u64le that indicates the file path within the cache used + /// to store coverage information. The integer is a hash of the PCs + /// stored within that file. + coverage_id, + /// Body is a u64le that indicates the function pointer virtual memory + /// address of the fuzz unit test. This is used to provide a starting + /// point to view coverage. + fuzz_start_addr, _, }; @@ -180,6 +188,14 @@ pub fn serveMessage( try s.out.writevAll(iovecs[0 .. bufs.len + 1]); } +pub fn serveU64Message(s: *Server, tag: OutMessage.Tag, int: u64) !void { + const msg_le = bswap(int); + return s.serveMessage(.{ + .tag = tag, + .bytes_len = @sizeOf(u64), + }, &.{std.mem.asBytes(&msg_le)}); +} + pub fn serveEmitBinPath( s: *Server, fs_path: []const u8, @@ -187,7 +203,7 @@ pub fn serveEmitBinPath( ) !void { try s.serveMessage(.{ .tag = .emit_bin_path, - .bytes_len = @as(u32, @intCast(fs_path.len + @sizeOf(OutMessage.EmitBinPath))), + .bytes_len = @intCast(fs_path.len + @sizeOf(OutMessage.EmitBinPath)), }, &.{ std.mem.asBytes(&header), fs_path, @@ -201,7 +217,7 @@ pub fn serveTestResults( const msg_le = bswap(msg); try s.serveMessage(.{ .tag = .test_results, - .bytes_len = @as(u32, @intCast(@sizeOf(OutMessage.TestResults))), + .bytes_len = @intCast(@sizeOf(OutMessage.TestResults)), }, &.{ std.mem.asBytes(&msg_le), }); @@ -209,14 +225,14 @@ pub fn serveTestResults( pub fn serveErrorBundle(s: *Server, error_bundle: std.zig.ErrorBundle) !void { const eb_hdr: OutMessage.ErrorBundle = .{ - .extra_len = @as(u32, @intCast(error_bundle.extra.len)), - .string_bytes_len = @as(u32, @intCast(error_bundle.string_bytes.len)), + .extra_len = @intCast(error_bundle.extra.len), + .string_bytes_len = @intCast(error_bundle.string_bytes.len), }; const bytes_len = @sizeOf(OutMessage.ErrorBundle) + 4 * error_bundle.extra.len + error_bundle.string_bytes.len; try s.serveMessage(.{ .tag = .error_bundle, - .bytes_len = @as(u32, @intCast(bytes_len)), + .bytes_len = @intCast(bytes_len), }, &.{ std.mem.asBytes(&eb_hdr), // TODO: implement @ptrCast between slices changing the length @@ -251,7 +267,7 @@ pub fn serveTestMetadata(s: *Server, test_metadata: TestMetadata) !void { return s.serveMessage(.{ .tag = .test_metadata, - .bytes_len = @as(u32, @intCast(bytes_len)), + .bytes_len = @intCast(bytes_len), }, &.{ std.mem.asBytes(&header), // TODO: implement @ptrCast between slices changing the length diff --git a/lib/std/zig/tokenizer.zig b/lib/std/zig/tokenizer.zig index c375818770ab..b63bde563385 100644 --- a/lib/std/zig/tokenizer.zig +++ b/lib/std/zig/tokenizer.zig @@ -1840,3 +1840,48 @@ fn testTokenize(source: [:0]const u8, expected_token_tags: []const Token.Tag) !v try std.testing.expectEqual(source.len, last_token.loc.start); try std.testing.expectEqual(source.len, last_token.loc.end); } + +test "fuzzable properties upheld" { + const source = std.testing.fuzzInput(.{}); + const source0 = try std.testing.allocator.dupeZ(u8, source); + defer std.testing.allocator.free(source0); + var tokenizer = Tokenizer.init(source0); + var tokenization_failed = false; + while (true) { + const token = tokenizer.next(); + + // Property: token end location after start location (or equal) + try std.testing.expect(token.loc.end >= token.loc.start); + + switch (token.tag) { + .invalid => { + tokenization_failed = true; + + // Property: invalid token always ends at newline or eof + try std.testing.expect(source0[token.loc.end] == '\n' or source0[token.loc.end] == 0); + }, + .eof => { + // Property: EOF token is always 0-length at end of source. + try std.testing.expectEqual(source0.len, token.loc.start); + try std.testing.expectEqual(source0.len, token.loc.end); + break; + }, + else => continue, + } + } + + if (source0.len > 0) for (source0, source0[1..][0..source0.len]) |cur, next| { + // Property: No null byte allowed except at end. + if (cur == 0) { + try std.testing.expect(tokenization_failed); + } + // Property: No ASCII control characters other than \n and \t are allowed. + if (std.ascii.isControl(cur) and cur != '\n' and cur != '\t') { + try std.testing.expect(tokenization_failed); + } + // Property: All '\r' must be followed by '\n'. + if (cur == '\r' and next != '\n') { + try std.testing.expect(tokenization_failed); + } + }; +} diff --git a/src/Compilation.zig b/src/Compilation.zig index 8c9e18bc98fa..1fd23801374f 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -4201,10 +4201,11 @@ fn workerDocsWasm(comp: *Compilation, parent_prog_node: std.Progress.Node) void const prog_node = parent_prog_node.start("Compile Autodocs", 0); defer prog_node.end(); - workerDocsWasmFallible(comp, prog_node) catch |err| { - comp.lockAndSetMiscFailure(.docs_wasm, "unable to build autodocs: {s}", .{ + workerDocsWasmFallible(comp, prog_node) catch |err| switch (err) { + error.SubCompilationFailed => return, // error reported already + else => comp.lockAndSetMiscFailure(.docs_wasm, "unable to build autodocs: {s}", .{ @errorName(err), - }); + }), }; } @@ -4274,8 +4275,29 @@ fn workerDocsWasmFallible(comp: *Compilation, prog_node: std.Progress.Node) anye .cc_argv = &.{}, .parent = null, .builtin_mod = null, - .builtin_modules = null, // there is only one module in this compilation + .builtin_modules = null, + }); + const walk_mod = try Package.Module.create(arena, .{ + .global_cache_directory = comp.global_cache_directory, + .paths = .{ + .root = .{ + .root_dir = comp.zig_lib_directory, + .sub_path = "docs/wasm", + }, + .root_src_path = "Walk.zig", + }, + .fully_qualified_name = "Walk", + .inherited = .{ + .resolved_target = resolved_target, + .optimize_mode = optimize_mode, + }, + .global = config, + .cc_argv = &.{}, + .parent = root_mod, + .builtin_mod = root_mod.getBuiltinDependency(), + .builtin_modules = null, // `builtin_mod` is set }); + try root_mod.deps.put(arena, "Walk", walk_mod); const bin_basename = try std.zig.binNameAlloc(arena, .{ .root_name = root_name, .target = resolved_target.result, diff --git a/test/standalone/coff_dwarf/build.zig b/test/standalone/coff_dwarf/build.zig index f0a6d5a1f74a..26a8a87da8c9 100644 --- a/test/standalone/coff_dwarf/build.zig +++ b/test/standalone/coff_dwarf/build.zig @@ -7,9 +7,10 @@ pub fn build(b: *std.Build) void { b.default_step = test_step; const optimize: std.builtin.OptimizeMode = .Debug; - const target = b.standardTargetOptions(.{}); - - if (builtin.os.tag != .windows) return; + const target = if (builtin.os.tag == .windows) + b.standardTargetOptions(.{}) + else + b.resolveTargetQuery(.{ .os_tag = .windows }); if (builtin.cpu.arch == .aarch64) { // https://github.com/ziglang/zig/issues/18427 diff --git a/test/standalone/coff_dwarf/main.zig b/test/standalone/coff_dwarf/main.zig index 1cf2587e58e5..18a7262a3076 100644 --- a/test/standalone/coff_dwarf/main.zig +++ b/test/standalone/coff_dwarf/main.zig @@ -17,11 +17,11 @@ pub fn main() !void { const module = try debug_info.getModuleForAddress(add_addr); const symbol = try module.getSymbolAtAddress(allocator, add_addr); - defer symbol.deinit(allocator); + defer if (symbol.source_location) |sl| allocator.free(sl.file_name); - try testing.expectEqualStrings("add", symbol.symbol_name); - try testing.expect(symbol.line_info != null); - try testing.expectEqualStrings("shared_lib.c", std.fs.path.basename(symbol.line_info.?.file_name)); - try testing.expectEqual(@as(u64, 3), symbol.line_info.?.line); - try testing.expectEqual(@as(u64, 0), symbol.line_info.?.column); + try testing.expectEqualStrings("add", symbol.name); + try testing.expect(symbol.source_location != null); + try testing.expectEqualStrings("shared_lib.c", std.fs.path.basename(symbol.source_location.?.file_name)); + try testing.expectEqual(@as(u64, 3), symbol.source_location.?.line); + try testing.expectEqual(@as(u64, 0), symbol.source_location.?.column); } diff --git a/tools/dump-cov.zig b/tools/dump-cov.zig new file mode 100644 index 000000000000..7c96b29cfa04 --- /dev/null +++ b/tools/dump-cov.zig @@ -0,0 +1,79 @@ +//! Reads a Zig coverage file and prints human-readable information to stdout, +//! including file:line:column information for each PC. + +const std = @import("std"); +const fatal = std.process.fatal; +const Path = std.Build.Cache.Path; +const assert = std.debug.assert; + +pub fn main() !void { + var general_purpose_allocator: std.heap.GeneralPurposeAllocator(.{}) = .{}; + defer _ = general_purpose_allocator.deinit(); + const gpa = general_purpose_allocator.allocator(); + + var arena_instance = std.heap.ArenaAllocator.init(gpa); + defer arena_instance.deinit(); + const arena = arena_instance.allocator(); + + const args = try std.process.argsAlloc(arena); + const exe_file_name = args[1]; + const cov_file_name = args[2]; + + const exe_path: Path = .{ + .root_dir = std.Build.Cache.Directory.cwd(), + .sub_path = exe_file_name, + }; + const cov_path: Path = .{ + .root_dir = std.Build.Cache.Directory.cwd(), + .sub_path = cov_file_name, + }; + + var coverage = std.debug.Coverage.init; + defer coverage.deinit(gpa); + + var debug_info = std.debug.Info.load(gpa, exe_path, &coverage) catch |err| { + fatal("failed to load debug info for {}: {s}", .{ exe_path, @errorName(err) }); + }; + defer debug_info.deinit(gpa); + + const cov_bytes = cov_path.root_dir.handle.readFileAlloc(arena, cov_path.sub_path, 1 << 30) catch |err| { + fatal("failed to load coverage file {}: {s}", .{ cov_path, @errorName(err) }); + }; + + var bw = std.io.bufferedWriter(std.io.getStdOut().writer()); + const stdout = bw.writer(); + + const header: *align(1) SeenPcsHeader = @ptrCast(cov_bytes); + try stdout.print("{any}\n", .{header.*}); + //const n_bitset_elems = (header.pcs_len + 7) / 8; + const pcs_bytes = cov_bytes[@sizeOf(SeenPcsHeader)..][0 .. header.pcs_len * @sizeOf(usize)]; + const pcs = try arena.alloc(usize, header.pcs_len); + for (0..pcs_bytes.len / @sizeOf(usize), pcs) |i, *pc| { + pc.* = std.mem.readInt(usize, pcs_bytes[i * @sizeOf(usize) ..][0..@sizeOf(usize)], .little); + } + assert(std.sort.isSorted(usize, pcs, {}, std.sort.asc(usize))); + + const seen_pcs = cov_bytes[@sizeOf(SeenPcsHeader) + pcs.len * @sizeOf(usize) ..]; + + const source_locations = try arena.alloc(std.debug.Coverage.SourceLocation, pcs.len); + try debug_info.resolveAddresses(gpa, pcs, source_locations); + + for (pcs, source_locations, 0..) |pc, sl, i| { + const file = debug_info.coverage.fileAt(sl.file); + const dir_name = debug_info.coverage.directories.keys()[file.directory_index]; + const dir_name_slice = debug_info.coverage.stringAt(dir_name); + const hit: u1 = @truncate(seen_pcs[i / 8] >> @intCast(i % 8)); + try stdout.print("{c}{x}: {s}/{s}:{d}:{d}\n", .{ + "-+"[hit], pc, dir_name_slice, debug_info.coverage.stringAt(file.basename), sl.line, sl.column, + }); + } + + try bw.flush(); +} + +const SeenPcsHeader = extern struct { + n_runs: usize, + deduplicated_runs: usize, + pcs_len: usize, + lowest_stack: usize, +};