Skip to content

Commit 1a99c99

Browse files
committed
std.Build: gracefully handle child stdin closing when running tests
We have deduced that it seems the sporadic BrokenPipe failures happening on the CI runners (e.g. https://github.com/ziglang/zig/actions/runs/12035916948/job/33555963190) are likely caused by the test runner's stdin pipe abnormally closing, likely due to the process crashing. Here, we introduce error handling for this case, so that if these writes fail, the step is marked as failed correctly, and we still collect the child's stderr to report. This won't fix the CI issues, but it should promote them to proper error messages including child stderr, which -- at least in theory -- should allow us to ultimately track down where the errors come from. Note that this change is desirable regardless of bugs in the test runner or similar, since the child process could terminate abnormally for any number of reasons (e.g. a crashing test), and such cases should be correctly reported by the build runner.
1 parent 3ce6de8 commit 1a99c99

File tree

1 file changed

+33
-10
lines changed

1 file changed

+33
-10
lines changed

lib/std/Build/Step/Run.zig

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1413,12 +1413,23 @@ fn evalZigTest(
14131413
});
14141414
defer poller.deinit();
14151415

1416-
if (fuzz_context) |fuzz| {
1417-
try sendRunTestMessage(child.stdin.?, .start_fuzzing, fuzz.unit_test_index);
1418-
} else {
1416+
// If this is `true`, we avoid ever entering the polling loop below, because the stdin pipe has
1417+
// somehow already closed; instead, we go straight to capturing stderr in case it has anything
1418+
// useful.
1419+
const first_write_failed = if (fuzz_context) |fuzz| failed: {
1420+
sendRunTestMessage(child.stdin.?, .start_fuzzing, fuzz.unit_test_index) catch |err| {
1421+
try run.step.addError("unable to write stdin: {s}", .{@errorName(err)});
1422+
break :failed true;
1423+
};
1424+
break :failed false;
1425+
} else failed: {
14191426
run.fuzz_tests.clearRetainingCapacity();
1420-
try sendMessage(child.stdin.?, .query_test_metadata);
1421-
}
1427+
sendMessage(child.stdin.?, .query_test_metadata) catch |err| {
1428+
try run.step.addError("unable to write stdin: {s}", .{@errorName(err)});
1429+
break :failed true;
1430+
};
1431+
break :failed false;
1432+
};
14221433

14231434
const Header = std.zig.Server.Message.Header;
14241435

@@ -1437,13 +1448,13 @@ fn evalZigTest(
14371448
var sub_prog_node: ?std.Progress.Node = null;
14381449
defer if (sub_prog_node) |n| n.end();
14391450

1440-
poll: while (true) {
1451+
const any_write_failed = first_write_failed or poll: while (true) {
14411452
while (stdout.readableLength() < @sizeOf(Header)) {
1442-
if (!(try poller.poll())) break :poll;
1453+
if (!(try poller.poll())) break :poll false;
14431454
}
14441455
const header = stdout.reader().readStruct(Header) catch unreachable;
14451456
while (stdout.readableLength() < header.bytes_len) {
1446-
if (!(try poller.poll())) break :poll;
1457+
if (!(try poller.poll())) break :poll false;
14471458
}
14481459
const body = stdout.readableSliceOfLen(header.bytes_len);
14491460

@@ -1483,7 +1494,10 @@ fn evalZigTest(
14831494
.prog_node = prog_node,
14841495
};
14851496

1486-
try requestNextTest(child.stdin.?, &metadata.?, &sub_prog_node);
1497+
requestNextTest(child.stdin.?, &metadata.?, &sub_prog_node) catch |err| {
1498+
try run.step.addError("unable to write stdin: {s}", .{@errorName(err)});
1499+
break :poll true;
1500+
};
14871501
},
14881502
.test_results => {
14891503
assert(fuzz_context == null);
@@ -1518,7 +1532,10 @@ fn evalZigTest(
15181532
}
15191533
}
15201534

1521-
try requestNextTest(child.stdin.?, &metadata.?, &sub_prog_node);
1535+
requestNextTest(child.stdin.?, &metadata.?, &sub_prog_node) catch |err| {
1536+
try run.step.addError("unable to write stdin: {s}", .{@errorName(err)});
1537+
break :poll true;
1538+
};
15221539
},
15231540
.coverage_id => {
15241541
const web_server = fuzz_context.?.web_server;
@@ -1552,6 +1569,12 @@ fn evalZigTest(
15521569
}
15531570

15541571
stdout.discard(body.len);
1572+
};
1573+
1574+
if (any_write_failed) {
1575+
// The compiler unexpectedly closed stdin; something is very wrong and has probably crashed.
1576+
// We want to make sure we've captured all of stderr so that it's logged below.
1577+
while (try poller.poll()) {}
15551578
}
15561579

15571580
if (stderr.readableLength() > 0) {

0 commit comments

Comments
 (0)