Skip to content

Commit 252d4ec

Browse files
committed
[sled-agent] Integrate config-reconciler
1 parent 9564992 commit 252d4ec

33 files changed

+793
-3560
lines changed

clients/sled-agent-client/src/lib.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ progenitor::generate_api!(
6767
OmicronPhysicalDiskConfig = omicron_common::disk::OmicronPhysicalDiskConfig,
6868
OmicronPhysicalDisksConfig = omicron_common::disk::OmicronPhysicalDisksConfig,
6969
OmicronSledConfig = nexus_sled_agent_shared::inventory::OmicronSledConfig,
70-
OmicronSledConfigResult = nexus_sled_agent_shared::inventory::OmicronSledConfigResult,
7170
OmicronZoneConfig = nexus_sled_agent_shared::inventory::OmicronZoneConfig,
7271
OmicronZoneDataset = nexus_sled_agent_shared::inventory::OmicronZoneDataset,
7372
OmicronZoneImageSource = nexus_sled_agent_shared::inventory::OmicronZoneImageSource,

dev-tools/omdb/src/bin/omdb/sled_agent.rs

Lines changed: 0 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,6 @@ enum SledAgentCommands {
3434
#[clap(subcommand)]
3535
Zones(ZoneCommands),
3636

37-
/// print information about zpools
38-
#[clap(subcommand)]
39-
Zpools(ZpoolCommands),
40-
41-
/// print information about datasets
42-
#[clap(subcommand)]
43-
Datasets(DatasetCommands),
44-
4537
/// print information about the local bootstore node
4638
#[clap(subcommand)]
4739
Bootstore(BootstoreCommands),
@@ -97,12 +89,6 @@ impl SledAgentArgs {
9789
SledAgentCommands::Zones(ZoneCommands::List) => {
9890
cmd_zones_list(&client).await
9991
}
100-
SledAgentCommands::Zpools(ZpoolCommands::List) => {
101-
cmd_zpools_list(&client).await
102-
}
103-
SledAgentCommands::Datasets(DatasetCommands::List) => {
104-
cmd_datasets_list(&client).await
105-
}
10692
SledAgentCommands::Bootstore(BootstoreCommands::Status) => {
10793
cmd_bootstore_status(&client).await
10894
}
@@ -129,44 +115,6 @@ async fn cmd_zones_list(
129115
Ok(())
130116
}
131117

132-
/// Runs `omdb sled-agent zpools list`
133-
async fn cmd_zpools_list(
134-
client: &sled_agent_client::Client,
135-
) -> Result<(), anyhow::Error> {
136-
let response = client.zpools_get().await.context("listing zpools")?;
137-
let zpools = response.into_inner();
138-
139-
println!("zpools:");
140-
if zpools.is_empty() {
141-
println!(" <none>");
142-
}
143-
for zpool in &zpools {
144-
println!(" {:?}", zpool);
145-
}
146-
147-
Ok(())
148-
}
149-
150-
/// Runs `omdb sled-agent datasets list`
151-
async fn cmd_datasets_list(
152-
client: &sled_agent_client::Client,
153-
) -> Result<(), anyhow::Error> {
154-
let response = client.datasets_get().await.context("listing datasets")?;
155-
let response = response.into_inner();
156-
157-
println!("dataset configuration @ generation {}:", response.generation);
158-
let datasets = response.datasets;
159-
160-
if datasets.is_empty() {
161-
println!(" <none>");
162-
}
163-
for dataset in &datasets {
164-
println!(" {:?}", dataset);
165-
}
166-
167-
Ok(())
168-
}
169-
170118
/// Runs `omdb sled-agent bootstore status`
171119
async fn cmd_bootstore_status(
172120
client: &sled_agent_client::Client,

nexus-sled-agent-shared/src/inventory.rs

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,7 @@ use omicron_common::{
1414
external::{ByteCount, Generation},
1515
internal::shared::{NetworkInterface, SourceNatConfig},
1616
},
17-
disk::{
18-
DatasetConfig, DatasetManagementStatus, DiskManagementStatus,
19-
DiskVariant, OmicronPhysicalDiskConfig,
20-
},
17+
disk::{DatasetConfig, DiskVariant, OmicronPhysicalDiskConfig},
2118
zpool_name::ZpoolName,
2219
};
2320
use omicron_uuid_kinds::{DatasetUuid, OmicronZoneUuid};
@@ -130,8 +127,6 @@ pub enum SledRole {
130127
}
131128

132129
/// Describes the set of Reconfigurator-managed configuration elements of a sled
133-
// TODO this struct should have a generation number; at the moment, each of
134-
// the fields has a separete one internally.
135130
#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq)]
136131
pub struct OmicronSledConfig {
137132
pub generation: Generation,
@@ -140,14 +135,6 @@ pub struct OmicronSledConfig {
140135
pub zones: IdMap<OmicronZoneConfig>,
141136
}
142137

143-
/// Result of the currently-synchronous `omicron_config_put` endpoint.
144-
#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)]
145-
#[must_use = "this `DatasetManagementResult` may contain errors, which should be handled"]
146-
pub struct OmicronSledConfigResult {
147-
pub disks: Vec<DiskManagementStatus>,
148-
pub datasets: Vec<DatasetManagementStatus>,
149-
}
150-
151138
/// Describes the set of Omicron-managed zones running on a sled
152139
#[derive(
153140
Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash,

nexus/reconfigurator/execution/src/omicron_sled_config.rs

Lines changed: 9 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,13 @@ use anyhow::anyhow;
1010
use futures::StreamExt;
1111
use futures::stream;
1212
use nexus_db_queries::context::OpContext;
13-
use nexus_sled_agent_shared::inventory::OmicronSledConfigResult;
1413
use nexus_types::deployment::BlueprintSledConfig;
1514
use omicron_uuid_kinds::GenericUuid;
1615
use omicron_uuid_kinds::SledUuid;
17-
use slog::Logger;
1816
use slog::info;
1917
use slog::warn;
18+
use slog_error_chain::InlineErrorChain;
2019
use std::collections::BTreeMap;
21-
use update_engine::merge_anyhow_list;
2220

2321
/// Idempotently ensure that the specified Omicron sled configs are deployed to
2422
/// the corresponding sleds
@@ -63,13 +61,14 @@ pub(crate) async fn deploy_sled_configs(
6361
format!("Failed to put {config:#?} to sled {sled_id}")
6462
});
6563
match result {
64+
Ok(_) => None,
6665
Err(error) => {
67-
warn!(log, "{error:#}");
66+
warn!(
67+
log, "failed to put sled config";
68+
InlineErrorChain::new(error.as_ref()),
69+
);
6870
Some(error)
6971
}
70-
Ok(result) => {
71-
parse_config_result(result.into_inner(), &log).err()
72-
}
7372
}
7473
})
7574
.collect()
@@ -78,69 +77,6 @@ pub(crate) async fn deploy_sled_configs(
7877
if errors.is_empty() { Ok(()) } else { Err(errors) }
7978
}
8079

81-
fn parse_config_result(
82-
result: OmicronSledConfigResult,
83-
log: &Logger,
84-
) -> anyhow::Result<()> {
85-
let (disk_errs, disk_successes): (Vec<_>, Vec<_>) =
86-
result.disks.into_iter().partition(|status| status.err.is_some());
87-
88-
if !disk_errs.is_empty() {
89-
warn!(
90-
log,
91-
"Failed to deploy disks for sled agent";
92-
"successfully configured disks" => disk_successes.len(),
93-
"failed disk configurations" => disk_errs.len(),
94-
);
95-
for err in &disk_errs {
96-
warn!(log, "{err:?}");
97-
}
98-
return Err(merge_anyhow_list(disk_errs.into_iter().map(|status| {
99-
anyhow!(
100-
"failed to deploy disk {:?}: {:#}",
101-
status.identity,
102-
// `disk_errs` was partitioned by `status.err.is_some()`, so
103-
// this is safe to unwrap.
104-
status.err.unwrap(),
105-
)
106-
})));
107-
}
108-
109-
let (dataset_errs, dataset_successes): (Vec<_>, Vec<_>) =
110-
result.datasets.into_iter().partition(|status| status.err.is_some());
111-
112-
if !dataset_errs.is_empty() {
113-
warn!(
114-
log,
115-
"Failed to deploy datasets for sled agent";
116-
"successfully configured datasets" => dataset_successes.len(),
117-
"failed dataset configurations" => dataset_errs.len(),
118-
);
119-
for err in &dataset_errs {
120-
warn!(log, "{err:?}");
121-
}
122-
return Err(merge_anyhow_list(dataset_errs.into_iter().map(
123-
|status| {
124-
anyhow!(
125-
"failed to deploy dataset {}: {:#}",
126-
status.dataset_name.full_name(),
127-
// `dataset_errs` was partitioned by `status.err.is_some()`,
128-
// so this is safe to unwrap.
129-
status.err.unwrap(),
130-
)
131-
},
132-
)));
133-
}
134-
135-
info!(
136-
log,
137-
"Successfully deployed config to sled agent";
138-
"successfully configured disks" => disk_successes.len(),
139-
"successfully configured datasets" => dataset_successes.len(),
140-
);
141-
Ok(())
142-
}
143-
14480
#[cfg(test)]
14581
mod tests {
14682
use super::*;
@@ -326,6 +262,9 @@ mod tests {
326262

327263
// Observe the latest configuration stored on the simulated sled agent,
328264
// and verify that this output matches the input.
265+
//
266+
// TODO-cleanup Simulated sled-agent should report a unified
267+
// `OmicronSledConfig`.
329268
let observed_disks =
330269
sim_sled_agent.omicron_physical_disks_list().unwrap();
331270
let observed_datasets = sim_sled_agent.datasets_config_list().unwrap();

nexus/reconfigurator/planning/src/example.rs

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ use crate::planner::rng::PlannerRng;
1414
use crate::system::SledBuilder;
1515
use crate::system::SystemDescription;
1616
use nexus_inventory::CollectionBuilderRng;
17-
use nexus_sled_agent_shared::inventory::OmicronZonesConfig;
1817
use nexus_types::deployment::Blueprint;
1918
use nexus_types::deployment::OmicronZoneNic;
2019
use nexus_types::deployment::PlanningInput;
@@ -486,15 +485,7 @@ impl ExampleSystemBuilder {
486485

487486
for (sled_id, sled_cfg) in &blueprint.sleds {
488487
let sled_cfg = sled_cfg.clone().into_in_service_sled_config();
489-
system
490-
.sled_set_omicron_zones(
491-
*sled_id,
492-
OmicronZonesConfig {
493-
generation: sled_cfg.generation,
494-
zones: sled_cfg.zones.into_iter().collect(),
495-
},
496-
)
497-
.unwrap();
488+
system.sled_set_omicron_config(*sled_id, sled_cfg).unwrap();
498489
}
499490

500491
// We just ensured that a handful of datasets should exist in

nexus/reconfigurator/planning/src/planner.rs

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -996,7 +996,6 @@ pub(crate) mod test {
996996
use clickhouse_admin_types::ClickhouseKeeperClusterMembership;
997997
use clickhouse_admin_types::KeeperId;
998998
use expectorate::assert_contents;
999-
use nexus_sled_agent_shared::inventory::OmicronZonesConfig;
1000999
use nexus_types::deployment::BlueprintDatasetDisposition;
10011000
use nexus_types::deployment::BlueprintDiffSummary;
10021001
use nexus_types::deployment::BlueprintPhysicalDiskDisposition;
@@ -1181,18 +1180,15 @@ pub(crate) mod test {
11811180
// example.collection -- this should be addressed via API improvements.
11821181
example
11831182
.system
1184-
.sled_set_omicron_zones(new_sled_id, {
1185-
let sled_cfg = blueprint4
1183+
.sled_set_omicron_config(
1184+
new_sled_id,
1185+
blueprint4
11861186
.sleds
11871187
.get(&new_sled_id)
11881188
.expect("blueprint should contain zones for new sled")
11891189
.clone()
1190-
.into_in_service_sled_config();
1191-
OmicronZonesConfig {
1192-
generation: sled_cfg.generation,
1193-
zones: sled_cfg.zones.into_iter().collect(),
1194-
}
1195-
})
1190+
.into_in_service_sled_config(),
1191+
)
11961192
.unwrap();
11971193
let collection =
11981194
example.system.to_collection_builder().unwrap().build();

nexus/reconfigurator/planning/src/system.rs

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ use nexus_sled_agent_shared::inventory::Inventory;
1717
use nexus_sled_agent_shared::inventory::InventoryDataset;
1818
use nexus_sled_agent_shared::inventory::InventoryDisk;
1919
use nexus_sled_agent_shared::inventory::InventoryZpool;
20+
use nexus_sled_agent_shared::inventory::OmicronSledConfig;
2021
use nexus_sled_agent_shared::inventory::OmicronZonesConfig;
2122
use nexus_sled_agent_shared::inventory::SledRole;
2223
use nexus_types::deployment::ClickhousePolicy;
@@ -353,7 +354,7 @@ impl SystemDescription {
353354
!self.sleds.is_empty()
354355
}
355356

356-
/// Set Omicron zones for a sled.
357+
/// Set Omicron config for a sled.
357358
///
358359
/// The zones will be reported in the collection generated by
359360
/// [`Self::to_collection_builder`].
@@ -362,17 +363,26 @@ impl SystemDescription {
362363
///
363364
/// # Notes
364365
///
365-
/// It is okay to call `sled_set_omicron_zones` in ways that wouldn't
366+
/// It is okay to call `sled_set_omicron_config` in ways that wouldn't
366367
/// happen in production, such as to test illegal states.
367-
pub fn sled_set_omicron_zones(
368+
pub fn sled_set_omicron_config(
368369
&mut self,
369370
sled_id: SledUuid,
370-
omicron_zones: OmicronZonesConfig,
371+
sled_config: OmicronSledConfig,
371372
) -> anyhow::Result<&mut Self> {
372373
let sled = self.sleds.get_mut(&sled_id).with_context(|| {
373374
format!("attempted to access sled {} not found in system", sled_id)
374375
})?;
375-
Arc::make_mut(sled).inventory_sled_agent.omicron_zones = omicron_zones;
376+
let sled = Arc::make_mut(sled);
377+
// TODO this inventory needs to be more robust for the reconciler;
378+
// this is nontrivial so coming in a subsequent PR. For now just
379+
// backfill the existing inventory structure.
380+
sled.inventory_sled_agent.omicron_zones = OmicronZonesConfig {
381+
generation: sled_config.generation,
382+
zones: sled_config.zones.into_iter().collect(),
383+
};
384+
sled.inventory_sled_agent.omicron_physical_disks_generation =
385+
sled_config.generation;
376386
Ok(self)
377387
}
378388

0 commit comments

Comments
 (0)