diff --git a/lib/compiler/test_runner.zig b/lib/compiler/test_runner.zig index 15ae84510ae3..aeeeadbf3a88 100644 --- a/lib/compiler/test_runner.zig +++ b/lib/compiler/test_runner.zig @@ -341,7 +341,10 @@ const FuzzerSlice = extern struct { var is_fuzz_test: bool = undefined; -extern fn fuzzer_start(testOne: *const fn ([*]const u8, usize) callconv(.C) void) void; +extern fn fuzzer_start( + testOne: *const fn ([*]const u8, usize) callconv(.C) void, + options: *const std.testing.FuzzInputOptions, +) void; extern fn fuzzer_init(cache_dir: FuzzerSlice) void; extern fn fuzzer_coverage_id() u64; @@ -395,7 +398,7 @@ pub fn fuzz( if (builtin.fuzz) { const prev_allocator_state = testing.allocator_instance; testing.allocator_instance = .{}; - fuzzer_start(&global.fuzzer_one); + fuzzer_start(&global.fuzzer_one, &options); testing.allocator_instance = prev_allocator_state; return; } diff --git a/lib/fuzzer.zig b/lib/fuzzer.zig index 3c00ec7b48cd..e1dc53dda7be 100644 --- a/lib/fuzzer.zig +++ b/lib/fuzzer.zig @@ -3,436 +3,103 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const assert = std.debug.assert; const fatal = std.process.fatal; -const SeenPcsHeader = std.Build.Fuzz.abi.SeenPcsHeader; +const Fuzzer = @import("fuzzer/main.zig").Fuzzer; +const fc = @import("fuzzer/feature_capture.zig"); -pub const std_options = std.Options{ - .logFn = logOverride, -}; +/// Type for passing slices across extern functions where we can't use zig +/// types +pub const Slice = extern struct { + ptr: [*]const u8, + len: usize, -var log_file: ?std.fs.File = null; - -fn logOverride( - comptime level: std.log.Level, - comptime scope: @Type(.enum_literal), - comptime format: []const u8, - args: anytype, -) void { - const f = if (log_file) |f| f else f: { - const f = fuzzer.cache_dir.createFile("tmp/libfuzzer.log", .{}) catch - @panic("failed to open fuzzer log file"); - log_file = f; - break :f f; - }; - const prefix1 = comptime level.asText(); - const prefix2 = if (scope == .default) ": " else "(" ++ @tagName(scope) ++ "): "; - f.writer().print(prefix1 ++ prefix2 ++ format ++ "\n", args) catch @panic("failed to write to fuzzer log"); -} - -/// Helps determine run uniqueness in the face of recursion. -export threadlocal var __sancov_lowest_stack: usize = 0; - -export fn __sanitizer_cov_trace_const_cmp1(arg1: u8, arg2: u8) void { - handleCmp(@returnAddress(), arg1, arg2); -} + pub fn toZig(s: Slice) []const u8 { + return s.ptr[0..s.len]; + } +}; -export fn __sanitizer_cov_trace_cmp1(arg1: u8, arg2: u8) void { - handleCmp(@returnAddress(), arg1, arg2); -} +// ==== global state ==== -export fn __sanitizer_cov_trace_const_cmp2(arg1: u16, arg2: u16) void { - handleCmp(@returnAddress(), arg1, arg2); -} - -export fn __sanitizer_cov_trace_cmp2(arg1: u16, arg2: u16) void { - handleCmp(@returnAddress(), arg1, arg2); -} +var log_file: ?std.fs.File = null; -export fn __sanitizer_cov_trace_const_cmp4(arg1: u32, arg2: u32) void { - handleCmp(@returnAddress(), arg1, arg2); -} +var fuzzer: Fuzzer = undefined; -export fn __sanitizer_cov_trace_cmp4(arg1: u32, arg2: u32) void { - handleCmp(@returnAddress(), arg1, arg2); -} +// ==== llvm callbacks ==== -export fn __sanitizer_cov_trace_const_cmp8(arg1: u64, arg2: u64) void { - handleCmp(@returnAddress(), arg1, arg2); -} +// zig fmt: off +export fn __sanitizer_cov_trace_const_cmp1(arg1: u8 , arg2: u8 ) void { handleCmp(@returnAddress(), arg1, arg2); } +export fn __sanitizer_cov_trace_cmp1 (arg1: u8 , arg2: u8 ) void { handleCmp(@returnAddress(), arg1, arg2); } +export fn __sanitizer_cov_trace_const_cmp2(arg1: u16, arg2: u16) void { handleCmp(@returnAddress(), arg1, arg2); } +export fn __sanitizer_cov_trace_cmp2 (arg1: u16, arg2: u16) void { handleCmp(@returnAddress(), arg1, arg2); } +export fn __sanitizer_cov_trace_const_cmp4(arg1: u32, arg2: u32) void { handleCmp(@returnAddress(), arg1, arg2); } +export fn __sanitizer_cov_trace_cmp4 (arg1: u32, arg2: u32) void { handleCmp(@returnAddress(), arg1, arg2); } +export fn __sanitizer_cov_trace_const_cmp8(arg1: u64, arg2: u64) void { handleCmp(@returnAddress(), arg1, arg2); } +export fn __sanitizer_cov_trace_cmp8 (arg1: u64, arg2: u64) void { handleCmp(@returnAddress(), arg1, arg2); } +// zig fmt: on -export fn __sanitizer_cov_trace_cmp8(arg1: u64, arg2: u64) void { - handleCmp(@returnAddress(), arg1, arg2); +fn handleCmp(pc: usize, arg1: u64, arg2: u64) void { + // TODO: TORC + _ = arg1; + _ = arg2; + const c: u64 = pc; + const lo: u32 = @truncate(c); + const hi: u32 = @intCast(c >> 32); + fc.newFeature(lo ^ hi); } -export fn __sanitizer_cov_trace_switch(val: u64, cases_ptr: [*]u64) void { +export fn __sanitizer_cov_trace_switch(val: u64, _: [*]u64) void { + // TODO: is this called? const pc = @returnAddress(); - const len = cases_ptr[0]; - const val_size_in_bits = cases_ptr[1]; - const cases = cases_ptr[2..][0..len]; _ = val; - fuzzer.visitPc(pc); - _ = val_size_in_bits; - _ = cases; - //std.log.debug("0x{x}: switch on value {d} ({d} bits) with {d} cases", .{ - // pc, val, val_size_in_bits, cases.len, - //}); + const c: u64 = pc; + const lo: u32 = @truncate(c); + const hi: u32 = @intCast(c >> 32); + fc.newFeature(lo ^ hi); } export fn __sanitizer_cov_trace_pc_indir(callee: usize) void { + // TODO: is this called? const pc = @returnAddress(); - _ = callee; - fuzzer.visitPc(pc); - //std.log.debug("0x{x}: indirect call to 0x{x}", .{ pc, callee }); -} - -fn handleCmp(pc: usize, arg1: u64, arg2: u64) void { - fuzzer.visitPc(pc ^ arg1 ^ arg2); - //std.log.debug("0x{x}: comparison of {d} and {d}", .{ pc, arg1, arg2 }); + const c: u64 = pc ^ callee; + const lo: u32 = @truncate(c); + const hi: u32 = @intCast(c >> 32); + fc.newFeature(lo ^ hi); } -const Fuzzer = struct { - gpa: Allocator, - rng: std.Random.DefaultPrng, - input: std.ArrayListUnmanaged(u8), - pcs: []const usize, - pc_counters: []u8, - n_runs: usize, - recent_cases: RunMap, - /// Data collected from code coverage instrumentation from one execution of - /// the test function. - coverage: Coverage, - /// Tracks which PCs have been seen across all runs that do not crash the fuzzer process. - /// Stored in a memory-mapped file so that it can be shared with other - /// processes and viewed while the fuzzer is running. - seen_pcs: MemoryMappedList, - cache_dir: std.fs.Dir, - /// Identifies the file name that will be used to store coverage - /// information, available to other processes. - coverage_id: u64, - - const RunMap = std.ArrayHashMapUnmanaged(Run, void, Run.HashContext, false); - - const Coverage = struct { - pc_table: std.AutoArrayHashMapUnmanaged(usize, void), - run_id_hasher: std.hash.Wyhash, - - fn reset(cov: *Coverage) void { - cov.pc_table.clearRetainingCapacity(); - cov.run_id_hasher = std.hash.Wyhash.init(0); - } - }; - - const Run = struct { - id: Id, - input: []const u8, - score: usize, - - const Id = u64; - - const HashContext = struct { - pub fn eql(ctx: HashContext, a: Run, b: Run, b_index: usize) bool { - _ = b_index; - _ = ctx; - return a.id == b.id; - } - pub fn hash(ctx: HashContext, a: Run) u32 { - _ = ctx; - return @truncate(a.id); - } - }; - - fn deinit(run: *Run, gpa: Allocator) void { - gpa.free(run.input); - run.* = undefined; - } - }; - - const Slice = extern struct { - ptr: [*]const u8, - len: usize, +// ==== libfuzzer API ==== - fn toZig(s: Slice) []const u8 { - return s.ptr[0..s.len]; - } - - fn fromZig(s: []const u8) Slice { - return .{ - .ptr = s.ptr, - .len = s.len, - }; - } - }; - - const Analysis = struct { - score: usize, - id: Run.Id, - }; - - fn init(f: *Fuzzer, cache_dir: std.fs.Dir, pc_counters: []u8, pcs: []const usize) !void { - f.cache_dir = cache_dir; - f.pc_counters = pc_counters; - f.pcs = pcs; - - // Choose a file name for the coverage based on a hash of the PCs that will be stored within. - const pc_digest = std.hash.Wyhash.hash(0, std.mem.sliceAsBytes(pcs)); - f.coverage_id = pc_digest; - const hex_digest = std.fmt.hex(pc_digest); - const coverage_file_path = "v/" ++ hex_digest; - - // Layout of this file: - // - Header - // - list of PC addresses (usize elements) - // - list of hit flag, 1 bit per address (stored in u8 elements) - const coverage_file = createFileBail(cache_dir, coverage_file_path, .{ - .read = true, - .truncate = false, - }); - defer coverage_file.close(); - const n_bitset_elems = (pcs.len + @bitSizeOf(usize) - 1) / @bitSizeOf(usize); - comptime assert(SeenPcsHeader.trailing[0] == .pc_bits_usize); - comptime assert(SeenPcsHeader.trailing[1] == .pc_addr); - const bytes_len = @sizeOf(SeenPcsHeader) + - n_bitset_elems * @sizeOf(usize) + - pcs.len * @sizeOf(usize); - const existing_len = coverage_file.getEndPos() catch |err| { - fatal("unable to check len of coverage file: {s}", .{@errorName(err)}); - }; - if (existing_len == 0) { - coverage_file.setEndPos(bytes_len) catch |err| { - fatal("unable to set len of coverage file: {s}", .{@errorName(err)}); - }; - } else if (existing_len != bytes_len) { - fatal("incompatible existing coverage file (differing lengths)", .{}); - } - f.seen_pcs = MemoryMappedList.init(coverage_file, existing_len, bytes_len) catch |err| { - fatal("unable to init coverage memory map: {s}", .{@errorName(err)}); - }; - if (existing_len != 0) { - const existing_pcs_bytes = f.seen_pcs.items[@sizeOf(SeenPcsHeader) + @sizeOf(usize) * n_bitset_elems ..][0 .. pcs.len * @sizeOf(usize)]; - const existing_pcs = std.mem.bytesAsSlice(usize, existing_pcs_bytes); - for (existing_pcs, pcs, 0..) |old, new, i| { - if (old != new) { - fatal("incompatible existing coverage file (differing PC at index {d}: {x} != {x})", .{ - i, old, new, - }); - } - } - } else { - const header: SeenPcsHeader = .{ - .n_runs = 0, - .unique_runs = 0, - .pcs_len = pcs.len, - }; - f.seen_pcs.appendSliceAssumeCapacity(std.mem.asBytes(&header)); - f.seen_pcs.appendNTimesAssumeCapacity(0, n_bitset_elems * @sizeOf(usize)); - f.seen_pcs.appendSliceAssumeCapacity(std.mem.sliceAsBytes(pcs)); - } - } - - fn analyzeLastRun(f: *Fuzzer) Analysis { - return .{ - .id = f.coverage.run_id_hasher.final(), - .score = f.coverage.pc_table.count(), - }; - } - - fn start(f: *Fuzzer) !void { - const gpa = f.gpa; - const rng = fuzzer.rng.random(); - - // Prepare initial input. - assert(f.recent_cases.entries.len == 0); - assert(f.n_runs == 0); - try f.recent_cases.ensureUnusedCapacity(gpa, 100); - const len = rng.uintLessThanBiased(usize, 80); - try f.input.resize(gpa, len); - rng.bytes(f.input.items); - f.recent_cases.putAssumeCapacity(.{ - .id = 0, - .input = try gpa.dupe(u8, f.input.items), - .score = 0, - }, {}); - - const header: *volatile SeenPcsHeader = @ptrCast(f.seen_pcs.items[0..@sizeOf(SeenPcsHeader)]); - - while (true) { - const chosen_index = rng.uintLessThanBiased(usize, f.recent_cases.entries.len); - const run = &f.recent_cases.keys()[chosen_index]; - f.input.clearRetainingCapacity(); - f.input.appendSliceAssumeCapacity(run.input); - try f.mutate(); - - @memset(f.pc_counters, 0); - __sancov_lowest_stack = std.math.maxInt(usize); - f.coverage.reset(); - - fuzzer_one(f.input.items.ptr, f.input.items.len); - - f.n_runs += 1; - _ = @atomicRmw(usize, &header.n_runs, .Add, 1, .monotonic); - - if (f.n_runs % 10000 == 0) f.dumpStats(); - - const analysis = f.analyzeLastRun(); - const gop = f.recent_cases.getOrPutAssumeCapacity(.{ - .id = analysis.id, - .input = undefined, - .score = undefined, - }); - if (gop.found_existing) { - //std.log.info("duplicate analysis: score={d} id={d}", .{ analysis.score, analysis.id }); - if (f.input.items.len < gop.key_ptr.input.len or gop.key_ptr.score == 0) { - gpa.free(gop.key_ptr.input); - gop.key_ptr.input = try gpa.dupe(u8, f.input.items); - gop.key_ptr.score = analysis.score; - } - } else { - std.log.info("unique analysis: score={d} id={d}", .{ analysis.score, analysis.id }); - gop.key_ptr.* = .{ - .id = analysis.id, - .input = try gpa.dupe(u8, f.input.items), - .score = analysis.score, - }; - - { - // Track code coverage from all runs. - comptime assert(SeenPcsHeader.trailing[0] == .pc_bits_usize); - const header_end_ptr: [*]volatile usize = @ptrCast(f.seen_pcs.items[@sizeOf(SeenPcsHeader)..]); - const remainder = f.pcs.len % @bitSizeOf(usize); - const aligned_len = f.pcs.len - remainder; - const seen_pcs = header_end_ptr[0..aligned_len]; - const pc_counters = std.mem.bytesAsSlice([@bitSizeOf(usize)]u8, f.pc_counters[0..aligned_len]); - const V = @Vector(@bitSizeOf(usize), u8); - const zero_v: V = @splat(0); - - for (header_end_ptr[0..pc_counters.len], pc_counters) |*elem, *array| { - const v: V = array.*; - const mask: usize = @bitCast(v != zero_v); - _ = @atomicRmw(usize, elem, .Or, mask, .monotonic); - } - if (remainder > 0) { - const i = pc_counters.len; - const elem = &seen_pcs[i]; - var mask: usize = 0; - for (f.pc_counters[i * @bitSizeOf(usize) ..][0..remainder], 0..) |byte, bit_index| { - mask |= @as(usize, @intFromBool(byte != 0)) << @intCast(bit_index); - } - _ = @atomicRmw(usize, elem, .Or, mask, .monotonic); - } - } - - _ = @atomicRmw(usize, &header.unique_runs, .Add, 1, .monotonic); - } +/// Invalid until `fuzzer_init` is called. +export fn fuzzer_coverage_id() u64 { + return fuzzer.coverage_id; +} - if (f.recent_cases.entries.len >= 100) { - const Context = struct { - values: []const Run, - pub fn lessThan(ctx: @This(), a_index: usize, b_index: usize) bool { - return ctx.values[b_index].score < ctx.values[a_index].score; - } - }; - f.recent_cases.sortUnstable(Context{ .values = f.recent_cases.keys() }); - const cap = 50; - // This has to be done before deinitializing the deleted items. - const doomed_runs = f.recent_cases.keys()[cap..]; - f.recent_cases.shrinkRetainingCapacity(cap); - for (doomed_runs) |*doomed_run| { - std.log.info("culling score={d} id={d}", .{ doomed_run.score, doomed_run.id }); - doomed_run.deinit(gpa); - } +export fn fuzzer_start( + testOne: *const fn ([*]const u8, usize) callconv(.C) void, + options: *const std.testing.FuzzInputOptions, +) void { + fuzzer.start(testOne, options.*) catch |e| switch (e) { + error.OutOfMemory => { + std.debug.print("fuzzer OOM\n", .{}); + if (@errorReturnTrace()) |trace| { + std.debug.dumpStackTrace(trace.*); } - } - } - - fn visitPc(f: *Fuzzer, pc: usize) void { - errdefer |err| oom(err); - try f.coverage.pc_table.put(f.gpa, pc, {}); - f.coverage.run_id_hasher.update(std.mem.asBytes(&pc)); - } - - fn dumpStats(f: *Fuzzer) void { - for (f.recent_cases.keys()[0..@min(f.recent_cases.entries.len, 5)], 0..) |run, i| { - std.log.info("best[{d}] id={x} score={d} input: '{}'", .{ - i, run.id, run.score, std.zig.fmtEscapes(run.input), - }); - } - } - - fn mutate(f: *Fuzzer) !void { - const gpa = f.gpa; - const rng = fuzzer.rng.random(); - - if (f.input.items.len == 0) { - const len = rng.uintLessThanBiased(usize, 80); - try f.input.resize(gpa, len); - rng.bytes(f.input.items); - return; - } - - const index = rng.uintLessThanBiased(usize, f.input.items.len * 3); - if (index < f.input.items.len) { - f.input.items[index] = rng.int(u8); - } else if (index < f.input.items.len * 2) { - _ = f.input.orderedRemove(index - f.input.items.len); - } else if (index < f.input.items.len * 3) { - try f.input.insert(gpa, index - f.input.items.len * 2, rng.int(u8)); - } else { - unreachable; - } - } -}; - -fn createFileBail(dir: std.fs.Dir, sub_path: []const u8, flags: std.fs.File.CreateFlags) std.fs.File { - return dir.createFile(sub_path, flags) catch |err| switch (err) { - error.FileNotFound => { - const dir_name = std.fs.path.dirname(sub_path).?; - dir.makePath(dir_name) catch |e| { - fatal("unable to make path '{s}': {s}", .{ dir_name, @errorName(e) }); - }; - return dir.createFile(sub_path, flags) catch |e| { - fatal("unable to create file '{s}': {s}", .{ sub_path, @errorName(e) }); - }; }, - else => fatal("unable to create file '{s}': {s}", .{ sub_path, @errorName(err) }), }; } -fn oom(err: anytype) noreturn { - switch (err) { - error.OutOfMemory => @panic("out of memory"), - } -} - -var general_purpose_allocator: std.heap.GeneralPurposeAllocator(.{}) = .init; - -var fuzzer: Fuzzer = .{ - .gpa = general_purpose_allocator.allocator(), - .rng = std.Random.DefaultPrng.init(0), - .input = .{}, - .pcs = undefined, - .pc_counters = undefined, - .n_runs = 0, - .recent_cases = .{}, - .coverage = undefined, - .cache_dir = undefined, - .seen_pcs = undefined, - .coverage_id = undefined, -}; - -/// Invalid until `fuzzer_init` is called. -export fn fuzzer_coverage_id() u64 { - return fuzzer.coverage_id; -} +export fn fuzzer_init(cache_dir_struct: Slice) void { + // setup log file as soon as possible + const cache_dir_path = cache_dir_struct.toZig(); + const cache_dir = if (cache_dir_path.len == 0) + std.fs.cwd() + else + std.fs.cwd().makeOpenPath(cache_dir_path, .{ .iterate = true }) catch |err| + std.debug.panic("unable to open fuzz directory: {}", .{err}); // cant call fatal since it depends on std.log.err -var fuzzer_one: *const fn (input_ptr: [*]const u8, input_len: usize) callconv(.C) void = undefined; + log_file = cache_dir.createFile("tmp/libfuzzer.log", .{}) catch |err| + std.debug.panic("create log file failed: {}", .{err}); // cant call fatal since it depends on std.log.err -export fn fuzzer_start(testOne: @TypeOf(fuzzer_one)) void { - fuzzer_one = testOne; - fuzzer.start() catch |err| switch (err) { - error.OutOfMemory => fatal("out of memory", .{}), - }; -} + std.log.info("Cache dir @'{s}'", .{cache_dir_path}); -export fn fuzzer_init(cache_dir_struct: Fuzzer.Slice) void { // Linkers are expected to automatically add `__start_
` and // `__stop_
` symbols when section names are valid C identifiers. @@ -460,64 +127,48 @@ export fn fuzzer_init(cache_dir_struct: Fuzzer.Slice) void { const pcs = pcs_start[0 .. pcs_end - pcs_start]; - const cache_dir_path = cache_dir_struct.toZig(); - const cache_dir = if (cache_dir_path.len == 0) - std.fs.cwd() - else - std.fs.cwd().makeOpenPath(cache_dir_path, .{ .iterate = true }) catch |err| { - fatal("unable to open fuzz directory '{s}': {s}", .{ cache_dir_path, @errorName(err) }); - }; - - fuzzer.init(cache_dir, pc_counters, pcs) catch |err| - fatal("unable to init fuzzer: {s}", .{@errorName(err)}); + fuzzer = Fuzzer.init(cache_dir, pc_counters, pcs); } -/// Like `std.ArrayListUnmanaged(u8)` but backed by memory mapping. -pub const MemoryMappedList = struct { - /// Contents of the list. - /// - /// Pointers to elements in this slice are invalidated by various functions - /// of this ArrayList in accordance with the respective documentation. In - /// all cases, "invalidated" means that the memory has been passed to this - /// allocator's resize or free function. - items: []align(std.mem.page_size) volatile u8, - /// How many bytes this list can hold without allocating additional memory. - capacity: usize, - - pub fn init(file: std.fs.File, length: usize, capacity: usize) !MemoryMappedList { - const ptr = try std.posix.mmap( - null, - capacity, - std.posix.PROT.READ | std.posix.PROT.WRITE, - .{ .TYPE = .SHARED }, - file.handle, - 0, - ); - return .{ - .items = ptr[0..length], - .capacity = capacity, - }; - } +// ==== log ==== - /// Append the slice of items to the list. - /// Asserts that the list can hold the additional items. - pub fn appendSliceAssumeCapacity(l: *MemoryMappedList, items: []const u8) void { - const old_len = l.items.len; - const new_len = old_len + items.len; - assert(new_len <= l.capacity); - l.items.len = new_len; - @memcpy(l.items[old_len..][0..items.len], items); - } +pub const std_options = std.Options{ + .logFn = logOverride, + .log_level = .debug, +}; + +fn logOverride( + comptime level: std.log.Level, + comptime scope: @TypeOf(.EnumLiteral), + comptime format: []const u8, + args: anytype, +) void { + const w = (log_file orelse unreachable).writer(); + var bw = std.io.bufferedWriter(w); + + const prefix1 = comptime level.asText(); + const prefix2 = if (scope == .default) ": " else "(" ++ @tagName(scope) ++ "): "; - /// Append a value to the list `n` times. - /// Never invalidates element pointers. - /// The function is inline so that a comptime-known `value` parameter will - /// have better memset codegen in case it has a repeated byte pattern. - /// Asserts that the list can hold the additional items. - pub inline fn appendNTimesAssumeCapacity(l: *MemoryMappedList, value: u8, n: usize) void { - const new_len = l.items.len + n; - assert(new_len <= l.capacity); - @memset(l.items.ptr[l.items.len..new_len], value); - l.items.len = new_len; + errdefer |err| std.debug.panic("io error while logging: {}", .{err}); + + // NOTE(prokop): These bubble through to be a `write` syscall on Linux + // which guarantees atomic operation as long as the message is not too long + // (4k i think?). It might not be a bad idea to put a lock here... + try bw.writer().print(prefix1 ++ prefix2 ++ format ++ "\n", args); + try bw.flush(); +} + +// ==== panic handler ==== + +pub fn panic( + msg: []const u8, + trace: ?*std.builtin.StackTrace, + _: ?usize, +) noreturn { + @branchHint(.cold); + std.debug.print("fuzzer panic: {s}\n", .{msg}); + if (trace) |t| { + std.debug.dumpStackTrace(t.*); } -}; + std.process.exit(1); +} diff --git a/lib/fuzzer/InputPoolPosix.zig b/lib/fuzzer/InputPoolPosix.zig new file mode 100644 index 000000000000..82fbf2b1a297 --- /dev/null +++ b/lib/fuzzer/InputPoolPosix.zig @@ -0,0 +1,174 @@ +//! The inputs are densly packed inside a file that all fuzzing processes have +//! mapped into memory. To know where inputs start and end, there is a second +//! file mapped by all fuzzing processes that stores the end index of every +//! stored string and some metadata (see the meta field). +//! +//! When a process finds a new good input, it (under a lock) appends to both of +//! these files. All other processes can immediately start working on this +//! input +//! +//! We never shrink the corpus when fuzzing. One has to stop fuzzing and run a +//! separate program. That is because erasing strings would require complicated +//! synchronization between fuzzing processes. Finding good inputs seems to be +//! rare enough for the input pool to never get too large. + +const std = @import("std"); +const assert = std.debug.assert; +const fatal = std.process.fatal; +const MemoryMappedList = @import("memory_mapped_list.zig").MemoryMappedList; +const File = std.fs.File; + +/// arbitrary limit 2GiB of input data should be enough. 32th bit is delete +/// flag. Note that we only store 'good' inputs, which are rare. +pub const Index = u31; + +const InputPoolPosix = @This(); + +const LatestFormatVersion: u8 = 0; +const SignatureVersion: u32 = @bitCast([4]u8{ 117, 168, 125, LatestFormatVersion }); + +/// mmap-ed file. In reality we mapped 2 GiB but cropped to match the mmaped +/// file size. Doesn't need mremap when growing. +buffer: MemoryMappedList(u8), + +/// mmap-ed file. In reality we mapped 2 GiB but cropped to match the mmaped +/// file size. Doesn't need mremap when growing. +/// +/// layout of the meta file (v0): +/// * MetaHeader +/// * string end offsets (trailing data) +meta: MemoryMappedList(u32), + +const MetaHeader = extern struct { + signature_version: u32, + lock: u64, // space for a lock. Locks can't be placed in extern structs + deleted_bytes: u32, + number_of_string: u32, +}; + +fn getHeader(m: MemoryMappedList(u32)) *align(std.mem.page_size) volatile MetaHeader { + const size32 = @divExact(@sizeOf(MetaHeader), @sizeOf(u32)); + const bytes: *align(std.mem.page_size) volatile [size32]u32 = m.items[0..size32]; + return @ptrCast(bytes); +} + +const Flags = packed struct(u32) { + index: Index, + delete: bool, +}; + +fn getData(m: MemoryMappedList(u32)) []volatile Flags { + const size32 = @divExact(@sizeOf(MetaHeader), @sizeOf(u32)); + const rest: []volatile u32 = m.items[size32..]; + return @ptrCast(rest); +} + +pub fn init(meta_file: File, buffer_file: File) InputPoolPosix { + + // maxInt is not a special value! We allocate 2GiB of virtual address + // space. See comment in memory_mapped_list_posix.zig + const buffer = MemoryMappedList(u8).init(buffer_file, std.math.maxInt(Index)); + var meta = MemoryMappedList(u32).init(meta_file, std.math.maxInt(Index)); + + std.log.info("{} {}", .{ buffer.items.len, meta.items.len }); + + if (meta.fileLen() == 0) { + var header: MetaHeader = .{ + .signature_version = SignatureVersion, + .lock = undefined, + .deleted_bytes = 0, + .number_of_string = 0, + }; + + const lock_ptr: *std.Thread.Mutex = @ptrCast(&header.lock); + lock_ptr.* = std.Thread.Mutex{}; // TODO: make this a inter-process lock + + // []u8 to []u32 conversion + const s = std.mem.asBytes(&header); + const z: [*]const u32 = @ptrCast(s.ptr); + const size32 = @divExact(@sizeOf(MetaHeader), @sizeOf(u32)); + + meta.appendSlice(z[0..size32]); + } else { + const header: *volatile MetaHeader = getHeader(meta); + const sign: *volatile u32 = &header.signature_version; + std.log.info("sign{x} header@{x}", .{ @intFromPtr(sign), @intFromPtr(header) }); + if (sign.* != SignatureVersion) { + fatal( + "input pool file signature is invalid (wanted {x}, got {x})", + .{ SignatureVersion, header.signature_version }, + ); + } + } + + return .{ + .buffer = buffer, + .meta = meta, + }; +} + +pub fn deinit(ip: *InputPoolPosix) void { + ip.buffer.deinit(); + ip.meta.deinit(); +} + +pub fn insertString(ip: *InputPoolPosix, str: []const u8) void { + const header = getHeader(ip.meta); + + // mutex methods don't accept volatile pointers. There is not much I can do + const lock_ptr: *std.Thread.Mutex = @volatileCast(@ptrCast(&header.lock)); + lock_ptr.lock(); + defer lock_ptr.unlock(); + + const fileLen = ip.buffer.fileLen(); + + ip.buffer.appendSlice(str); + ip.meta.append(@intCast(fileLen)); + header.number_of_string += 1; +} + +pub fn len(ip: *InputPoolPosix) u31 { + // number_of_string can't be u31 because the struct is extern and 31 is not + // a power of two + return @intCast(getHeader(ip.meta).number_of_string); +} + +pub fn getString(ip: InputPoolPosix, index: Index) []volatile u8 { + const ends = getData(ip.meta); + const start = if (index == 0) 0 else (ends[index - 1].index); + const one_past_end = ends[index].index; + return ip.buffer.items[start..one_past_end]; +} + +/// Note that this is currently never used as the corpus minimalization is not +/// yet implemented +/// +/// Shifts strings down to fill unused space. Only called when no processes are +/// fuzzing +pub fn repack(ip: InputPoolPosix) void { + var poolWriteHead: usize = 0; + var endsWriteHead: usize = 0; + + const ends = getData(ip.meta); + + for (0..ends.items.len) |i| { + const start = if (i == 0) 0 else ends.items[i - 1].index; + const one_past_end = ends.items[i].index; + const str = ip.buffer.items[start..one_past_end]; + const dest = ip.buffer.items[poolWriteHead..][0..str.len]; + + if (ends.items[i].delete) { + continue; + } + + if (str.ptr != dest.ptr) { + std.mem.copyForwards(u8, dest, str); + ip.ends.items[endsWriteHead].index = poolWriteHead + str.len; + } + poolWriteHead += str.len; + endsWriteHead += 1; + } + + ip.ends.setFileLen(endsWriteHead + @sizeOf(MetaHeader)); + ip.buffer.setFileLen(poolWriteHead); +} diff --git a/lib/fuzzer/README.md b/lib/fuzzer/README.md new file mode 100644 index 000000000000..e7abb6090a1e --- /dev/null +++ b/lib/fuzzer/README.md @@ -0,0 +1,48 @@ +There are two things the fuzzer is storing: explored features and the corpus. + +Explored features is a sorted buffer of u32 values. Explored feature is +anything "interesting" that happened in the code when we run it with some input +and *it did not crash*. Different "interesting things" should correspond to +different u32 values but collisions never 100% avoidable. Explored features are +not shared among different workers. + +Currently tracked "interesting things" are: + +* taken edges in the CFG +* address of cmp instructions executed +* address of switch statements executed +* indirect calls + +Fuzzer is trying to maximize the number of unique explored features over all +inputs. + +The corpus is a set of inputs where input is some array of bytes. The initial +corpus is either provided by the user or generated randomly. The corpus is +stored as two arrays that are shared among all workers. One of the arrays +stores the inputs, densely packed one after another. The other is storing some +metadata and indexes of string ends. Whenever some input explores a new +feature, it is added to the corpus. The corpus is never shrunk, only appended. + +All that the fuzzer does is pick random input, mutate it, see if it hits any +new features and if so, add the mutated input to the corpus and the new +features to explored features. + +Every file starts with a more detailed documentation on the part of the fuzzer +that is implemented in that file: + +* `feature_capture.zig` - storing and deduplication of features that the user + code is emitting +* `InputPool*.zig` - corpus implementations +* `main.zig` - the main loop +* `memory_mapped_list*.zig` - shared growable memory mapped files +* `mutate.zig` - mutations + +Possible improvements: + +* Prioritize mutating inputs that hit rare features +* Table of recently compared values used in mutations +* In-place mutation to avoid copying? +* Implement more mutations +* Multithreading +* Maybe use hash table for explored features instead of sorted array + diff --git a/lib/fuzzer/dump_corpus.zig b/lib/fuzzer/dump_corpus.zig new file mode 100644 index 000000000000..8a0bbc8ffacc --- /dev/null +++ b/lib/fuzzer/dump_corpus.zig @@ -0,0 +1,44 @@ +//! This is a standalone tool that can print the strings stored inside a corpus +//! (buffer + meta file pair in the .zig-cache/v/ directory) + +const std = @import("std"); +const fatal = std.process.fatal; + +const InputPool = @import("input_pool.zig").InputPool; + +pub fn main() void { + var args = std.process.args(); + const bin = args.next(); + const cache_dir_path = args.next(); + const pc_digest_str = args.next(); + + if (cache_dir_path == null or pc_digest_str == null or args.next() != null) { + fatal("usage: {s} CACHE_DIR PC_DIGEST\n", .{bin.?}); + } + + // std.fmt.hex actually produces the hex number in the opposite order than + // parseInt reads... + const pc_digest = @byteSwap(std.fmt.parseInt(u64, pc_digest_str.?, 16) catch |e| + fatal("invalid pc digest: {}", .{e})); + + const cache_dir = std.fs.cwd().makeOpenPath(cache_dir_path.?, .{}) catch |e| + fatal("invalid cache dir: {}", .{e}); + + std.log.info("cache_dir: {s}", .{cache_dir_path.?}); + std.log.info("pc_digest: {x}", .{@byteSwap(pc_digest)}); + + var input_pool = InputPool.init(cache_dir, pc_digest); + + const len = input_pool.len(); + + std.log.info("There are {} strings in the corpus:", .{len}); + + for (0..len) |i| { + const str = input_pool.getString(@intCast(i)); + + // Only writing to this buffer has side effects. + const str2: []const u8 = @volatileCast(str); + + std.log.info("\"{}\"", .{std.zig.fmtEscapes(str2)}); + } +} diff --git a/lib/fuzzer/feature_capture.zig b/lib/fuzzer/feature_capture.zig new file mode 100644 index 000000000000..21817cc504ab --- /dev/null +++ b/lib/fuzzer/feature_capture.zig @@ -0,0 +1,56 @@ +//! This file captures features from the running instrumented program. LLVM +//! inserts callbacks (in ../fuzzer.zig) that call into newFeature here. +//! +//! "Feature" is any interesting thing that happened while the program was +//! running. Usually an edge in the CFG was taken or a specific cmp instruction +//! was executed. They are represented as u32s and are usually created by +//! xoring the program counter and other random unique looking values that are +//! available when the feature was hit. We score inputs based on how many +//! unique features they hit. + +const std = @import("std"); + +var buffer: []u32 = undefined; +var top: usize = 0; + +// TODO: is the size right? libfuzzer uses 1 << 21. Maybe a bloom filter? +/// Primitive prevention of wasting space in the buffer on duplicate features. +const TableSize = 16 * 4096; +var epoch: u8 = 0; +var table: [TableSize]u8 = @splat(0); + +/// Instrumentation inserts call to this with almost-unique values based on the +/// calling context. We don't require any specifics. Fuzzer is trying to +/// maximize number of unique arguments given to this function +pub fn newFeature(f: u32) void { + const hash = std.hash.uint32(f); + const entry: *u8 = &table[hash % table.len]; + + if (entry.* != epoch and top < buffer.len) { + entry.* = epoch; + buffer[top] = f; + top += 1; + } +} + +/// Run before calling the instrumented function +pub fn prepare(b: []u32) void { + buffer = b; + top = 0; + if (epoch == 255) { + @branchHint(.cold); + @memset(&table, 0); + epoch = 1; + } else { + epoch += 1; + } +} + +/// Get the captured features +pub fn values() []u32 { + return buffer[0..top]; +} + +pub fn is_full() bool { + return top == buffer.len; +} diff --git a/lib/fuzzer/input_pool.zig b/lib/fuzzer/input_pool.zig new file mode 100644 index 000000000000..38f100ed30b7 --- /dev/null +++ b/lib/fuzzer/input_pool.zig @@ -0,0 +1,6 @@ +const native_os = @import("builtin").os.tag; + +pub const InputPool = switch (native_os) { + .linux => @import("InputPoolPosix.zig"), + else => @compileError("Unsupported os"), +}; diff --git a/lib/fuzzer/main.zig b/lib/fuzzer/main.zig new file mode 100644 index 000000000000..c035180f39ad --- /dev/null +++ b/lib/fuzzer/main.zig @@ -0,0 +1,481 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const mutate = @import("mutate.zig"); +const InputPool = @import("input_pool.zig").InputPool; +const MemoryMappedList = @import("memory_mapped_list.zig").MemoryMappedList; +const feature_capture = @import("feature_capture.zig"); + +const Allocator = std.mem.Allocator; +const File = std.fs.File; +const Dir = std.fs.Dir; +const ArrayList = std.ArrayList; +const ArrayListUnmanaged = std.ArrayListUnmanaged; +const Options = std.testing.FuzzInputOptions; +const SeenPcsHeader = std.Build.Fuzz.abi.SeenPcsHeader; +const assert = std.debug.assert; +const fatal = std.process.fatal; + +const Testee = *const fn ([*]const u8, usize) callconv(.C) void; + +const InitialFeatureBufferCap = 64; + +// currently unused +export threadlocal var __sancov_lowest_stack: usize = std.math.maxInt(usize); + +/// Deduplicates array of sorted features +fn uniq(a: []u32) []u32 { + var write: usize = 0; + + if (a.len == 0) return a; + + var last: u32 = a[0]; + a[write] = last; + write += 1; + + for (a[1..]) |v| { + if (v != last) { + a[write] = v; + write += 1; + last = v; + } + } + + return a[0..write]; +} + +test uniq { + var data: [9]u32 = (&[_]u32{ 0, 0, 1, 2, 2, 2, 3, 4, 4 }).*; + const cropped = uniq(&data); + try std.testing.expectEqualSlices(u32, &[_]u32{ 0, 1, 2, 3, 4 }, cropped); +} + +/// sorted and dedeuplicated +fn getLastRunFeatures() []u32 { + var features = feature_capture.values(); + std.mem.sort(u32, features, void{}, std.sort.asc(u32)); + features = uniq(features); + return features; +} + +pub const CmpResult = struct { only_a: u32, only_b: u32, both: u32 }; + +/// Compares two sorted lists of features +fn cmp(a: []const u32, b: []const u32) CmpResult { + var ai: u32 = 0; + var bi: u32 = 0; + + var only_a: u32 = 0; + var only_b: u32 = 0; + var both: u32 = 0; + + while (true) { + if (ai == a.len) { + only_b += @intCast(b[bi..].len); + break; + } else if (bi == b.len) { + only_a += @intCast(a[ai..].len); + break; + } + + const i = a[ai]; + const j = b[bi]; + + if (i < j) { + only_a += 1; + ai += 1; + } else if (i > j) { + only_b += 1; + bi += 1; + } else { + both += 1; + ai += 1; + bi += 1; + } + } + + return .{ + .only_a = only_a, + .only_b = only_b, + .both = both, + }; +} + +test cmp { + const e = std.testing.expectEqual; + const R = CmpResult; + try e(R{ .only_a = 0, .only_b = 0, .both = 0 }, cmp(&.{}, &.{})); + try e(R{ .only_a = 1, .only_b = 0, .both = 0 }, cmp(&.{1}, &.{})); + try e(R{ .only_a = 0, .only_b = 1, .both = 0 }, cmp(&.{}, &.{1})); + try e(R{ .only_a = 0, .only_b = 0, .both = 1 }, cmp(&.{1}, &.{1})); + try e(R{ .only_a = 1, .only_b = 1, .both = 0 }, cmp(&.{1}, &.{2})); + try e(R{ .only_a = 1, .only_b = 0, .both = 1 }, cmp(&.{ 1, 2 }, &.{1})); + try e(R{ .only_a = 0, .only_b = 1, .both = 1 }, cmp(&.{1}, &.{ 1, 2 })); + try e(R{ .only_a = 0, .only_b = 0, .both = 2 }, cmp(&.{ 1, 2 }, &.{ 1, 2 })); + try e(R{ .only_a = 3, .only_b = 3, .both = 0 }, cmp(&.{ 1, 2, 3 }, &.{ 4, 5, 6 })); +} + +/// Merges the second sorted list of features into the first list of sorted +/// features +fn merge(dest: *ArrayList(u32), src: []const u32) error{OutOfMemory}!void { + // TODO: can be in O(n) time and O(1) extra space + try dest.appendSlice(src); + std.mem.sort(u32, dest.items, void{}, std.sort.asc(u32)); + dest.items = uniq(dest.items); +} + +fn hashPCs(pcs: []const usize) u64 { + var hasher = std.hash.Wyhash.init(0); + hasher.update(std.mem.sliceAsBytes(pcs)); + return hasher.final(); +} + +/// File contains SeenPcsHeader and its trailing data +fn initCoverageFile(coverage_file: File, pcs: []const usize) MemoryMappedList(u8) { + const n_bitset_elems = (pcs.len + @bitSizeOf(usize) - 1) / @bitSizeOf(usize); + + comptime assert(SeenPcsHeader.trailing[0] == .pc_bits_usize); + comptime assert(SeenPcsHeader.trailing[1] == .pc_addr); + + // how long the file should be + const bytes_len: usize = @sizeOf(SeenPcsHeader) + n_bitset_elems * @sizeOf(usize) + pcs.len * @sizeOf(usize); + + var seen_pcs: MemoryMappedList(u8) = .init(coverage_file, bytes_len); + + // how long the file actually is + const existing_len: usize = seen_pcs.fileLen(); + + if (existing_len == 0) { + // init file content + const header: SeenPcsHeader = .{ + .n_runs = 0, + .unique_runs = 0, + .pcs_len = pcs.len, + }; + seen_pcs.appendSlice(std.mem.asBytes(&header)); + seen_pcs.appendNTimes(0, n_bitset_elems * @sizeOf(usize)); + for (pcs) |pc| { + seen_pcs.appendSlice(std.mem.asBytes(&pc)); + } + } else if (existing_len == bytes_len) { + // check existing file is ok + const existing_pcs_bytes = seen_pcs.items[@sizeOf(SeenPcsHeader) + @sizeOf(usize) * n_bitset_elems ..][0 .. pcs.len * @sizeOf(usize)]; + const existing_pcs = std.mem.bytesAsSlice(usize, existing_pcs_bytes); + for (existing_pcs, pcs) |old, new| { + if (old != new) { + fatal("coverage file is invalid (pc missmatch)", .{}); + } + } + } else { + fatal( + "coverage file is invalid (wrong length. wanted {}, is {})", + .{ bytes_len, existing_len }, + ); + } + + return seen_pcs; +} + +/// Global coverage is set of all PCs that are covered by some fuzz input and +/// did not crash. They show up as green in the web ui +fn updateGlobalCoverage(pc_counters_: []const u8, seen_pcs_: MemoryMappedList(u8)) void { + comptime assert(SeenPcsHeader.trailing[0] == .pc_bits_usize); + const header_end_ptr: [*]volatile usize = @ptrCast(seen_pcs_.items[@sizeOf(SeenPcsHeader)..]); + const remainder = pc_counters_.len % @bitSizeOf(usize); + const aligned_len = pc_counters_.len - remainder; + const seen_pcs = header_end_ptr[0..aligned_len]; + const pc_counters = std.mem.bytesAsSlice([@bitSizeOf(usize)]u8, pc_counters_[0..aligned_len]); + const V = @Vector(@bitSizeOf(usize), u8); + const zero_v: V = @splat(0); + + // update usize wide chunks + for (header_end_ptr[0..pc_counters.len], pc_counters) |*elem, *array| { + const v: V = array.*; + const mask: usize = @bitCast(v != zero_v); + _ = @atomicRmw(usize, elem, .Or, mask, .monotonic); + } + + if (remainder > 0) { + const i = pc_counters.len; + const elem = &seen_pcs[i]; + var mask: usize = 0; + for (pc_counters_[i * @bitSizeOf(usize) ..][0..remainder], 0..) |byte, bit_index| { + mask |= @as(usize, @intFromBool(byte != 0)) << @intCast(bit_index); + } + _ = @atomicRmw(usize, elem, .Or, mask, .monotonic); + } +} + +fn incrementUniqueRuns(seen_pcs: MemoryMappedList(u8)) void { + const header: *volatile SeenPcsHeader = @ptrCast(seen_pcs.items[0..@sizeOf(SeenPcsHeader)]); + _ = @atomicRmw(usize, &header.unique_runs, .Add, 1, .monotonic); +} + +fn incrementNumberOfRuns(seen_pcs: MemoryMappedList(u8)) void { + const header: *volatile SeenPcsHeader = @ptrCast(seen_pcs.items[0..@sizeOf(SeenPcsHeader)]); + _ = @atomicRmw(usize, &header.n_runs, .Add, 1, .monotonic); +} + +fn initialCorpusRandom(ip: *InputPool, rng: std.Random) void { + var buffer: [256]u8 = undefined; + for (0..256) |len| { + const slice = buffer[0..len]; + rng.bytes(slice); + ip.insertString(slice); + } + // TODO: could prune +} + +fn selectAndCopyInput( + a: Allocator, + ip: *InputPool, + rng: std.Random, + input_: ArrayListUnmanaged(u8), +) !ArrayListUnmanaged(u8) { + var input = input_; + const new_input_index = rng.intRangeLessThanBiased(u31, 0, ip.len()); + const new_input = ip.getString(new_input_index); + + // manual slice copy since appendSlice doesn't take volatile slice + input.clearRetainingCapacity(); + try input.ensureTotalCapacity(a, new_input.len); + input.items.len = new_input.len; + @memcpy(input.items, new_input); + + return input; +} + +fn logNewFeatures( + seen_pcs: MemoryMappedList(u8), + features: []u32, + mutation_seed: u64, + total_features: usize, +) void { + var buffer: [128]u8 = undefined; + var fba: std.heap.FixedBufferAllocator = .init(&buffer); + var ar: ArrayList(u8) = .init(fba.allocator()); + mutate.writeMutation(mutation_seed, ar.writer()) catch {}; + std.log.info("new unique run: F:{} \tT:{} \t{s}", .{ + features.len, + total_features, + ar.items, + }); + incrementUniqueRuns(seen_pcs); +} + +fn checksum(str: []const u8) u8 { + // this is very bad checksum but since we run the user's code a lot, it + // will eventually catch when they do it. + var c: u8 = 0; + for (str) |s| { + c ^= s; + } + return c; +} + +fn collectPcCounterFeatures(pc_counters: []u8) void { + for (pc_counters, 0..) |counter, i_| { + if (counter != 0) { + const i: u32 = @intCast(i_); + // TODO: does this do a lot of collisions? + feature_capture.newFeature(std.hash.uint32(i)); + } + } +} + +fn beforeRun(pc_counters: []u8, feature_buffer: []u32) void { + @memset(pc_counters, 0); + feature_capture.prepare(feature_buffer); +} + +fn growFeatureBuffer(a: Allocator, feature_buffer: *ArrayListUnmanaged(u32)) !void { + // avoid copying data + const new_size = feature_buffer.items.len * 2; + feature_buffer.clearRetainingCapacity(); + try feature_buffer.ensureTotalCapacity(a, new_size); +} + +fn runInput( + a: Allocator, + test_one: Testee, + feature_buffer: *ArrayListUnmanaged(u32), + pc_counters: []u8, + input: []const u8, +) !void { + // loop for run retry + while (true) { + beforeRun(pc_counters, feature_buffer.items); + + test_one(input.ptr, input.len); + + collectPcCounterFeatures(pc_counters); + + if (feature_capture.is_full()) { + try growFeatureBuffer(a, feature_buffer); + // rerun same input with larger buffer. By doing this we keep the + // feature_capture callback function trivial + continue; + } + break; + } +} + +/// Returns true when the new features are interesting +fn analyzeFeatures(ip: *InputPool, features: []u32, input: []const u8, all_features: []const u32) bool { + const analysis = cmp(features, all_features); + + if (analysis.only_a > 0) { + ip.insertString(input); + return true; + } + return false; // boring input +} + +fn mergeInput( + a: Allocator, + seen_pcs: MemoryMappedList(u8), + all_features: *ArrayListUnmanaged(u32), + features: []u32, + pc_counters: []u8, +) error{OutOfMemory}!void { + var ar = all_features.toManaged(a); + try merge(&ar, features); + all_features.* = ar.moveToUnmanaged(); + updateGlobalCoverage(pc_counters, seen_pcs); +} + +const Files = struct { buffer: File, meta: File, coverage: File }; + +fn initFilesBail(c: Dir, i: u64) Files { + return setupFiles(c, i) catch |e| fatal("Failed to setup files: {}", .{e}); +} + +fn deinitFiles(f: Files) void { + f.buffer.close(); + f.meta.close(); + f.coverage.close(); +} + +fn setupFiles(cache_dir: Dir, coverage_id: u64) !Files { + // we create 1 folder and 3 files: + // cache/v/ + // cache/v/***buffer + // cache/v/***meta + // cache/v/***coverage + + const hex_digest = std.fmt.hex(coverage_id); + + const flags = File.CreateFlags{ .read = true, .truncate = false }; + + cache_dir.makeDir("v") catch |e| switch (e) { + error.PathAlreadyExists => {}, + else => return e, + }; + + const buffer = try cache_dir.createFile("v/" ++ hex_digest ++ "buffer", flags); + errdefer buffer.close(); + + const meta = try cache_dir.createFile("v/" ++ hex_digest ++ "meta", flags); + errdefer meta.close(); + + const coverage = try cache_dir.createFile("v/" ++ hex_digest ++ "coverage", flags); + errdefer coverage.close(); + + return .{ .buffer = buffer, .meta = meta, .coverage = coverage }; +} + +pub const Fuzzer = struct { + // given to us by LLVM + pcs: []const usize, + pc_counters: []u8, // same length as pcs + + /// Identifies the file name that will be used to store coverage + /// information, available to other processes. + coverage_id: u64, + + cache_dir: Dir, + + pub fn init(cache_dir: Dir, pc_counters: []u8, pcs: []usize) Fuzzer { + assert(pc_counters.len == pcs.len); + + return .{ + .pcs = pcs, + .pc_counters = pc_counters, + .cache_dir = cache_dir, + .coverage_id = hashPCs(pcs), + }; + } + + pub fn start(f: *Fuzzer, test_one: Testee, options: Options) error{OutOfMemory}!void { + var rng_impl = std.Random.DefaultPrng.init(0); + const rng = rng_impl.random(); + + var gpa_impl: std.heap.GeneralPurposeAllocator(.{}) = .{}; + const a = gpa_impl.allocator(); + + var input: ArrayListUnmanaged(u8) = .empty; + defer input.deinit(a); + + var mutate_scratch: ArrayListUnmanaged(u8) = .empty; + defer mutate_scratch.deinit(a); + + var all_features: ArrayListUnmanaged(u32) = .empty; + defer all_features.deinit(a); + + var feature_buffer: ArrayListUnmanaged(u32) = try .initCapacity(a, InitialFeatureBufferCap); + defer feature_buffer.deinit(a); + + const files = initFilesBail(f.cache_dir, f.coverage_id); + defer deinitFiles(files); + + // Tracks which PCs have been seen across all runs that do not crash the fuzzer process. + // Stored in a memory-mapped file so that it can be shared with other + // processes and viewed while the fuzzer is running. + const seen_pcs = initCoverageFile(files.coverage, f.pcs); + + var ip = InputPool.init(files.meta, files.buffer); + defer ip.deinit(); + + std.log.info( + \\Initial corpus of size {} + \\F - this input features + \\T - new unique features + , .{ip.len()}); + + if (ip.len() == 0) { + initialCorpusRandom(&ip, rng); + } + + for (options.corpus) |inp| { + try runInput(a, test_one, &feature_buffer, f.pc_counters, inp); + const features = getLastRunFeatures(); + if (analyzeFeatures(&ip, features, inp, all_features.items)) { + try mergeInput(a, seen_pcs, &all_features, features, f.pc_counters); + } + } + + // fuzzer main loop + while (true) { + incrementNumberOfRuns(seen_pcs); + input = try selectAndCopyInput(a, &ip, rng, input); + const mutation_seed = rng.int(u64); + assert(mutate_scratch.items.len == 0); + try mutate.mutate(&input, mutation_seed, &mutate_scratch, a); + const input_checksum = checksum(input.items); + + try runInput(a, test_one, &feature_buffer, f.pc_counters, input.items); + + if (input_checksum != checksum(input.items)) { + // report the input? it is not very useful since it was written to + std.process.fatal("User code mutated input!", .{}); + } + + const features = getLastRunFeatures(); + if (analyzeFeatures(&ip, features, input.items, all_features.items)) { + logNewFeatures(seen_pcs, features, mutation_seed, all_features.items.len); + try mergeInput(a, seen_pcs, &all_features, features, f.pc_counters); + } + + mutate.mutateReverse(&input, mutation_seed, &mutate_scratch); + } + } +}; diff --git a/lib/fuzzer/memory_mapped_list.zig b/lib/fuzzer/memory_mapped_list.zig new file mode 100644 index 000000000000..a180ce064268 --- /dev/null +++ b/lib/fuzzer/memory_mapped_list.zig @@ -0,0 +1,6 @@ +const native_os = @import("builtin").os.tag; + +pub const MemoryMappedList = switch (native_os) { + .linux => @import("memory_mapped_list_posix.zig").MemoryMappedList, + else => @compileError("Unsupported os"), +}; diff --git a/lib/fuzzer/memory_mapped_list_posix.zig b/lib/fuzzer/memory_mapped_list_posix.zig new file mode 100644 index 000000000000..4a778e723553 --- /dev/null +++ b/lib/fuzzer/memory_mapped_list_posix.zig @@ -0,0 +1,104 @@ +//! Growable shared memory backed by a file +//! +//! We mmap the fd but overshoot the allocation size to the maximum file size +//! we support. Then when we want to grow the file, we can ftruncate the fd and +//! the previously mmaped pages become automatically valid without needing to +//! mremap. +//! +//! The most amazing part is that some other process can do the ftruncate and +//! our memory region grows without the need to synchronize. The only thing +//! needed is an atomic size inside the file that other threads use to find out +//! the current size. Nobody needs to remap anything ever. + +const std = @import("std"); +const assert = std.debug.assert; +const fatal = std.process.fatal; + +pub fn MemoryMappedList(comptime T: type) type { + return struct { + // The entire allocated region + items: []align(std.mem.page_size) volatile T, + file: std.fs.File, + + const Self = @This(); + + pub fn init(f: std.fs.File, max_count: usize) Self { + const max_size_bytes: usize = max_count * @sizeOf(T); + + const slice: []align(std.mem.page_size) u8 = std.posix.mmap( + null, + max_size_bytes, + std.posix.PROT.READ | std.posix.PROT.WRITE, + .{ .TYPE = .SHARED, .NORESERVE = true }, + f.handle, + 0, + ) catch |e| fatal("mmap(len={},fd={}) failed: {}", .{ max_size_bytes, f.handle, e }); + + assert(slice.len == max_size_bytes); + + const items_start: [*]align(std.mem.page_size) volatile T = @ptrCast(slice.ptr); + + return .{ + .items = items_start[0..max_count], + .file = f, + }; + } + + pub fn deinit(self: *Self) void { + // volatileCast is safe. ptr is never used to actually write to or + // read from + const startT: [*]align(std.mem.page_size) T = @volatileCast(self.items.ptr); + const start8: [*]align(std.mem.page_size) u8 = @ptrCast(startT); + const len8: usize = self.items.len * @sizeOf(T); + const memory: []align(std.mem.page_size) u8 = start8[0..len8]; + + std.posix.msync(memory, std.posix.MSF.ASYNC) catch { + // well... + }; + + std.posix.munmap(memory); + } + + pub fn fileLen(self: *Self) usize { + return self.file.getEndPos() catch |e| fatal("getendpos failed: {}", .{e}); + } + + pub fn setFileLen(self: *Self, new_size_bytes: usize) void { + std.posix.ftruncate(self.file.handle, new_size_bytes) catch |e| fatal("ftruncate failed: {}", .{e}); + } + + /// Not thread safe + pub fn append(self: *Self, item: T) void { + return self.appendSlice(&[1]T{item}); + } + + /// Not thread safe + pub fn appendSlice(self: *Self, items: []const T) void { + const space = self.makeSpace(items.len); + @memcpy(space, items); + } + + /// Not thread safe + pub fn appendNTimes(self: *Self, value: T, count: usize) void { + const space = self.makeSpace(count); + @memset(space, value); + } + + /// Not thread safe + /// Grows the file, not the mapping. Returns the new space at the end of the file + pub fn makeSpace(self: *Self, additional_count: usize) []volatile T { + const current_size_bytes = self.fileLen(); + const new_size_bytes: usize = current_size_bytes + additional_count * @sizeOf(T); + + const new_slice = self.items[@divExact(current_size_bytes, @sizeOf(T))..][0..additional_count]; + + if (new_size_bytes > (self.items.len * @sizeOf(T))) { + fatal("List grew beyond the maximum size ({} >= {})", .{ new_size_bytes, self.items.len * @sizeOf(T) }); + } + + self.setFileLen(new_size_bytes); + + return new_slice; + } + }; +} diff --git a/lib/fuzzer/mutate.zig b/lib/fuzzer/mutate.zig new file mode 100644 index 000000000000..b395d8329491 --- /dev/null +++ b/lib/fuzzer/mutate.zig @@ -0,0 +1,369 @@ +//! This file contains implementations of individual mutations we can do on a +//! string and a big function that executes multiple mutations in a sequence. +//! +//! The mutation sequence and individual mutation arguments are generated from +//! a seeded rng. Mutating is completly deterministic. Because of that, we can +//! implement *reverse* mutations that reverse the original mutation given the +//! same seed. We use this to avoid memcpying the input data and instead +//! mutating a single copy in place many times. +//! +//! Note that some mutations delete information (such as the erase bytes one) +//! so we need to have a scratch array where we write deleted information for +//! undoing the mutation later. + +const std = @import("std"); +const assert = std.debug.assert; +const Allocator = std.mem.Allocator; +const ArrayList = std.ArrayList; +const ArrayListUnmanaged = std.ArrayListUnmanaged; + +/// Used in tests +const test_data: [128]u8 = blk: { + var b: [128]u8 = undefined; + var r: std.Random.DefaultPrng = .init(0); + r.fill(&b); + break :blk b; +}; + +const MutationTag = enum(u8) { + shuffle_bytes, + erase_bytes, + insert_byte, + insert_repeated_byte, + change_byte, + change_bit, +}; + +// Choice of mutations copied from llvm's libfuzzer +const Mutation = union(MutationTag) { + shuffle_bytes: struct { seed: u64 }, + erase_bytes: struct { index: u32, len: u8 }, + insert_byte: struct { index: u32, byte: u8 }, + insert_repeated_byte: struct { index: u32, len: u8, byte: u8 }, + change_byte: struct { index: u32, byte: u8 }, + change_bit: struct { index: u32, bit: u3 }, + + // TODO: + // copy_part: void, + // add_word_from_manual_dictionary: void, + // add_word_from_torc: void, // Table of Recently Compared data + // add_word_from_persistent_auto_dictionary: void + // change_ascii_integer: void, + // change_binary_integer: void, +}; + +const MutationSequence = std.BoundedArray(Mutation, 8); + +pub fn mutate(str: *ArrayListUnmanaged(u8), seed: u64, scr: *ArrayListUnmanaged(u8), gpa: Allocator) error{OutOfMemory}!void { + var a = str.toManaged(gpa); + var b = scr.toManaged(gpa); + var rng = std.Random.DefaultPrng.init(seed); + const muts = generateRandomMutationSequence(rng.random()); + try executeMutation(&a, muts, &b); + str.* = a.moveToUnmanaged(); + scr.* = b.moveToUnmanaged(); +} + +pub fn mutateReverse(str: *ArrayListUnmanaged(u8), seed: u64, scr: *ArrayListUnmanaged(u8)) void { + var rng = std.Random.DefaultPrng.init(seed); + const muts = generateRandomMutationSequence(rng.random()); + executeMutationReverse(str, muts, scr); +} + +fn generateRandomMutationSequence(rng: std.Random) MutationSequence { + var muts: MutationSequence = .{}; + + for (0..rng.uintLessThanBiased(usize, muts.capacity())) |_| { + const r = rng.int(u64); + const mutation_tag_count = std.meta.tags(MutationTag).len; + const random_mutation_tag: MutationTag = + @enumFromInt(rng.int(u32) % mutation_tag_count); // enumValue would be great but it is missing a biased variant + muts.appendAssumeCapacity(switch (random_mutation_tag) { + .shuffle_bytes => .{ .shuffle_bytes = .{ .seed = r } }, + .erase_bytes => .{ .erase_bytes = .{ .index = @intCast(r >> 32), .len = @truncate(r) } }, + .insert_byte => .{ .insert_byte = .{ .index = @intCast(r >> 32), .byte = @truncate(r) } }, + .insert_repeated_byte => .{ .insert_repeated_byte = .{ .index = @intCast(r >> 32), .len = @truncate(r >> 8), .byte = @truncate(r) } }, + .change_byte => .{ .change_byte = .{ .index = @intCast(r >> 32), .byte = @truncate(r) } }, + .change_bit => .{ .change_bit = .{ .index = @intCast(r >> 32), .bit = @truncate(r) } }, + }); + } + + return muts; +} + +fn executeMutation(str: *ArrayList(u8), muts: MutationSequence, scr: *ArrayList(u8)) !void { + for (muts.slice()) |mut| { + switch (mut) { + .shuffle_bytes => {}, + .erase_bytes => |a| try mutateEraseBytes(str, scr, a.index, a.len), + .insert_byte => |a| try mutateInsertByte(str, a.index, a.byte), + .insert_repeated_byte => |a| try mutateInsertRepeatedByte(str, a.index, a.len, a.byte), + .change_byte => |a| try mutateChangeByte(str.items, scr, a.index, a.byte), + .change_bit => |a| mutateChangeBit(str.items, a.index, a.bit), + } + } +} + +fn executeMutationReverse(str: *ArrayListUnmanaged(u8), muts: MutationSequence, scr: *ArrayListUnmanaged(u8)) void { + const slice = muts.slice(); + for (0..slice.len) |i| { + const mut = slice[slice.len - i - 1]; + switch (mut) { + .shuffle_bytes => {}, + .erase_bytes => |a| mutateEraseBytesReverse(str, scr, a.index), + .insert_byte => |a| mutateInsertByteReverse(str, a.index), + .insert_repeated_byte => |a| mutateInsertRepeatedByteReverse(str, a.index, a.len), + .change_byte => |a| mutateChangeByteReverse(str.items, scr, a.index), + .change_bit => |a| mutateChangeBitReverse(str.items, a.index, a.bit), + } + } +} + +test "mutate" { + var scr: ArrayListUnmanaged(u8) = .empty; + defer scr.deinit(std.testing.allocator); + var str: ArrayListUnmanaged(u8) = .empty; + defer str.deinit(std.testing.allocator); + var rng = std.Random.DefaultPrng.init(0); + for (0..test_data.len) |l| { + str.clearRetainingCapacity(); + try str.appendSlice(std.testing.allocator, test_data[0..l]); + for (0..1000) |_| { + const seed = rng.next(); + try mutate(&str, seed, &scr, std.testing.allocator); + mutateReverse(&str, seed, &scr); + try std.testing.expectEqualStrings(test_data[0..l], str.items); + try std.testing.expectEqual(0, scr.items.len); + } + } +} + +/// Writes names of the mutations that correspond to this seed. Used to make +/// the log pretty +pub fn writeMutation(seed: u64, writer: anytype) !void { + var rng = std.Random.DefaultPrng.init(seed); + const muts = generateRandomMutationSequence(rng.random()); + for (muts.slice(), 0..) |mut, i| { + if (i != 0) try writer.writeAll(", "); + try writer.writeAll(switch (mut) { + .shuffle_bytes => "Shuffle", + .erase_bytes => "DelBytes", + .insert_byte => "InsByte", + .insert_repeated_byte => "InsBytes", + .change_byte => "ChByte", + .change_bit => "ChBit", + }); + } +} + +fn mutateChangeBit(str: []u8, index: u32, bit: u3) void { + if (str.len == 0) return; + const mask = @as(u8, 1) << bit; + str[index % str.len] ^= mask; +} + +fn mutateChangeBitReverse(str: []u8, index: u32, bit: u3) void { + return mutateChangeBit(str, index, bit); +} + +test "mutate change bit" { + var scr: ArrayListUnmanaged(u8) = .empty; + defer scr.deinit(std.testing.allocator); + var str: ArrayListUnmanaged(u8) = .empty; + defer str.deinit(std.testing.allocator); + var rng_impl = std.Random.DefaultPrng.init(0); + const rng = rng_impl.random(); + for (0..test_data.len) |l| { + str.clearRetainingCapacity(); + try str.appendSlice(std.testing.allocator, test_data[0..l]); + for (0..1000) |_| { + const index = rng.int(u32); + const bit = rng.int(u3); + mutateChangeBit(str.items, index, bit); + mutateChangeBitReverse(str.items, index, bit); + try std.testing.expectEqualStrings(test_data[0..l], str.items); + try std.testing.expectEqual(0, scr.items.len); + } + } +} + +fn mutateChangeByte(str: []u8, scr: *ArrayList(u8), index: u32, byte: u8) !void { + if (str.len == 0) return; + const target = &str[index % str.len]; + try scr.append(target.*); + target.* = byte; +} + +fn mutateChangeByteReverse(str: []u8, scr: *ArrayListUnmanaged(u8), index: u32) void { + if (str.len == 0) return; + str[index % str.len] = scr.pop(); +} + +test "mutate change byte" { + var scr: ArrayList(u8) = .init(std.testing.allocator); + defer scr.deinit(); + var str: ArrayList(u8) = .init(std.testing.allocator); + defer str.deinit(); + var rng_impl = std.Random.DefaultPrng.init(0); + const rng = rng_impl.random(); + for (0..test_data.len) |l| { + str.clearRetainingCapacity(); + try str.appendSlice(test_data[0..l]); + for (0..1000) |_| { + const index = rng.int(u32); + const byte = rng.int(u8); + + try mutateChangeByte(str.items, &scr, index, byte); + + var u = scr.moveToUnmanaged(); + mutateChangeByteReverse(str.items, &u, index); + scr = u.toManaged(std.testing.allocator); + + try std.testing.expectEqualStrings(test_data[0..l], str.items); + try std.testing.expectEqual(0, scr.items.len); + } + } +} + +fn mutateInsertRepeatedByte(str: *ArrayList(u8), index: u32, len_: u8, byte: u8) !void { + const len = @min(24, @max(1, len_)); // arbitrary. good idea to tune + const str_len = str.items.len; + const insert_index = index % (str_len + 1); + + const slice = try str.addManyAt(insert_index, len); + + @memset(slice, byte); +} + +fn mutateInsertRepeatedByteReverse(str: *ArrayListUnmanaged(u8), index: u32, len_: u8) void { + const len = @min(24, @max(1, len_)); + const str_len = str.items.len - len; + const insert_index = index % (str_len + 1); + + // TODO: add this operation to ArrayList + const src = str.items[insert_index + len ..]; + str.items.len -= len; + const dest = str.items[insert_index..]; + std.mem.copyForwards(u8, dest, src); +} + +test "mutate insert repeated byte" { + var str: ArrayList(u8) = .init(std.testing.allocator); + defer str.deinit(); + var rng_impl = std.Random.DefaultPrng.init(0); + const rng = rng_impl.random(); + for (0..test_data.len) |l| { + str.clearRetainingCapacity(); + try str.appendSlice(test_data[0..l]); + for (0..1000) |_| { + const index = rng.int(u32); + const byte = rng.int(u8); + const len = rng.int(u8); + try mutateInsertRepeatedByte(&str, index, len, byte); + + var u = str.moveToUnmanaged(); + mutateInsertRepeatedByteReverse(&u, index, len); + str = u.toManaged(std.testing.allocator); + + try std.testing.expectEqualStrings(test_data[0..l], str.items); + } + } +} + +fn mutateInsertByte(str: *ArrayList(u8), index: u32, byte: u8) !void { + return mutateInsertRepeatedByte(str, index, 1, byte); +} + +fn mutateInsertByteReverse(str: *ArrayListUnmanaged(u8), index: u32) void { + return mutateInsertRepeatedByteReverse(str, index, 1); +} + +test "mutate insert byte" { + var scr: ArrayListUnmanaged(u8) = .empty; + defer scr.deinit(std.testing.allocator); + var str: ArrayList(u8) = .init(std.testing.allocator); + defer str.deinit(); + var rng_impl = std.Random.DefaultPrng.init(0); + const rng = rng_impl.random(); + for (0..test_data.len) |l| { + str.clearRetainingCapacity(); + try str.appendSlice(test_data[0..l]); + for (0..1000) |_| { + const index = rng.int(u32); + const byte = rng.int(u8); + try mutateInsertByte(&str, index, byte); + + var u = str.moveToUnmanaged(); + mutateInsertByteReverse(&u, index); + str = u.toManaged(std.testing.allocator); + + try std.testing.expectEqualStrings(test_data[0..l], str.items); + try std.testing.expectEqual(0, scr.items.len); + } + } +} + +fn mutateEraseBytes(str: *ArrayList(u8), scr: *ArrayList(u8), index: u32, len_: u8) !void { + const upper_bound = @min(str.items.len / 3, 32); + const len = @min(upper_bound, len_); + if (len == 0) { + return scr.append(len); + } + const erase_index = index % (str.items.len - len + 1); + + // TODO: add this operation to ArrayList + + // copy out the erased bytes + const cut = str.items[erase_index..][0..len]; + try scr.appendSlice(cut); + + // shift down + const src = str.items[erase_index + len ..]; + const dest = str.items[erase_index..]; + std.mem.copyForwards(u8, dest, src); + + try scr.append(len); + str.items.len -= len; +} + +fn mutateEraseBytesReverse(str: *ArrayListUnmanaged(u8), scr: *ArrayListUnmanaged(u8), index: u32) void { + const len = scr.pop(); + if (len == 0) return; + const erase_index = index % (str.items.len + 1); + + const dest = str.addManyAtAssumeCapacity(erase_index, len); + const src = scr.items[scr.items.len - len ..]; + + // copy in the erased bytes + @memcpy(dest, src); + + scr.items.len -= len; +} + +test "mutate erase bytes" { + var scr: ArrayList(u8) = .init(std.testing.allocator); + defer scr.deinit(); + var str: ArrayList(u8) = .init(std.testing.allocator); + defer str.deinit(); + var rng_impl = std.Random.DefaultPrng.init(0); + const rng = rng_impl.random(); + for (0..test_data.len) |l| { + str.clearRetainingCapacity(); + try str.appendSlice(test_data[0..l]); + for (0..1000) |_| { + const index = rng.int(u32); + const len = rng.int(u8); + try mutateEraseBytes(&str, &scr, index, len); + + var scru = scr.moveToUnmanaged(); + var stru = str.moveToUnmanaged(); + + mutateEraseBytesReverse(&stru, &scru, index); + scr = scru.toManaged(std.testing.allocator); + str = stru.toManaged(std.testing.allocator); + + try std.testing.expectEqualStrings(test_data[0..l], str.items); + try std.testing.expectEqual(0, scr.items.len); + } + } +} diff --git a/lib/fuzzer/web/index.html b/lib/fuzzer/web/index.html index 325342e8ebf5..3bab30691a77 100644 --- a/lib/fuzzer/web/index.html +++ b/lib/fuzzer/web/index.html @@ -4,53 +4,10 @@ Zig Build System Interface - +

Loading JavaScript...

-