Skip to content

Commit 8f54cb7

Browse files
author
lif
committed
Since the instance creation request no longer blocks, we need to wait before attempting to send serial console data requests
1 parent 3bce9e8 commit 8f54cb7

File tree

2 files changed

+29
-19
lines changed

2 files changed

+29
-19
lines changed

end-to-end-tests/src/instance_launch.rs

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError};
77
use oxide_client::types::{
88
ByteCount, DiskCreate, DiskSource, ExternalIpCreate, InstanceCpuCount,
99
InstanceCreate, InstanceDiskAttachment, InstanceNetworkInterfaceAttachment,
10-
SshKeyCreate,
10+
InstanceState, SshKeyCreate,
1111
};
1212
use oxide_client::{ClientDisksExt, ClientInstancesExt, ClientSessionExt};
1313
use russh::{ChannelMsg, Disconnect};
@@ -98,6 +98,19 @@ async fn instance_launch() -> Result<()> {
9898
type Error =
9999
CondCheckError<oxide_client::Error<oxide_client::types::Error>>;
100100

101+
let instance_state = ctx
102+
.client
103+
.instance_view()
104+
.project(ctx.project_name.clone())
105+
.instance(instance.name.clone())
106+
.send()
107+
.await?
108+
.run_state;
109+
110+
if instance_state == InstanceState::Starting {
111+
return Err(Error::NotYet);
112+
}
113+
101114
let data = String::from_utf8_lossy(
102115
&ctx.client
103116
.instance_serial_console()

nexus/src/app/instance.rs

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -971,7 +971,14 @@ impl super::Nexus {
971971
}
972972
}
973973

974-
/// TODO describe how this relates to [Self::instance_request_state] (above)
974+
/// For calls to [sled_agent_client::Client::instance_put_state] (such as
975+
/// made by [Self::instance_request_state]) that involve a long-running
976+
/// task such as creating a propolis zone (i.e. during instance creation
977+
/// or migration target provisioning), sled-agent may send the resulting
978+
/// instance state to Nexus via the internal API instead of blocking
979+
/// during the request handler and risking an HTTP request timeout. This
980+
/// function writes the asynchronously-returned updated instance state
981+
/// to the database.
975982
pub(crate) async fn instance_handle_creation_result(
976983
&self,
977984
opctx: &OpContext,
@@ -983,30 +990,20 @@ impl super::Nexus {
983990
.lookup_for(authz::Action::Modify)
984991
.await?;
985992

986-
let state = self
987-
.db_datastore
988-
.instance_fetch_with_vmm(opctx, &authz_instance)
989-
.await?;
990-
991-
// TODO: add param for sled-agent to show its 'previous' and compare with this
992-
// to validate consistency between nexus and sled-agent
993-
let prev_instance_runtime = &state.instance().runtime_state;
994-
995993
match result {
996994
Ok(new_state) => self
997-
.db_datastore
998-
.instance_and_vmm_update_runtime(
999-
instance_id,
1000-
&new_state.instance_state.into(),
1001-
&new_state.propolis_id,
1002-
&new_state.vmm_state.into(),
1003-
)
995+
.write_returned_instance_state(instance_id, Some(new_state))
1004996
.await
1005997
.map(|_| ()),
1006998
Err(error) => {
999+
let state = self
1000+
.db_datastore
1001+
.instance_fetch_with_vmm(opctx, &authz_instance)
1002+
.await?;
1003+
10071004
self.mark_instance_failed(
10081005
instance_id,
1009-
prev_instance_runtime,
1006+
&state.instance().runtime_state,
10101007
error,
10111008
)
10121009
.await

0 commit comments

Comments
 (0)