Skip to content

Commit 1bb75f2

Browse files
authored
Regions can now be read-only (#6150)
Up until this point, regions were only ever read-write, and region snapshots were read-only. In order to support snapshot replacement, Crucible recently gained support for read-only downstairs that performs a "clone" operation to copy blocks from another read-only downstairs. This commit adds a "read-only" flag to Region, and adds support for Nexus initializing a downstairs with this new clone option.
1 parent 013df0a commit 1bb75f2

19 files changed

+214
-133
lines changed

nexus/db-model/src/region.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ pub struct Region {
4343
// The port that was returned when the region was created. This field didn't
4444
// originally exist, so records may not have it filled in.
4545
port: Option<SqlU16>,
46+
47+
// A region may be read-only
48+
read_only: bool,
4649
}
4750

4851
impl Region {
@@ -53,6 +56,7 @@ impl Region {
5356
blocks_per_extent: u64,
5457
extent_count: u64,
5558
port: u16,
59+
read_only: bool,
5660
) -> Self {
5761
Self {
5862
identity: RegionIdentity::new(Uuid::new_v4()),
@@ -62,6 +66,7 @@ impl Region {
6266
blocks_per_extent: blocks_per_extent as i64,
6367
extent_count: extent_count as i64,
6468
port: Some(port.into()),
69+
read_only,
6570
}
6671
}
6772

@@ -91,4 +96,7 @@ impl Region {
9196
pub fn port(&self) -> Option<u16> {
9297
self.port.map(|port| port.into())
9398
}
99+
pub fn read_only(&self) -> bool {
100+
self.read_only
101+
}
94102
}

nexus/db-model/src/schema.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,6 +1046,8 @@ table! {
10461046
extent_count -> Int8,
10471047

10481048
port -> Nullable<Int4>,
1049+
1050+
read_only -> Bool,
10491051
}
10501052
}
10511053

nexus/db-model/src/schema_versions.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ use std::collections::BTreeMap;
1717
///
1818
/// This must be updated when you change the database schema. Refer to
1919
/// schema/crdb/README.adoc in the root of this repository for details.
20-
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(83, 0, 0);
20+
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(84, 0, 0);
2121

2222
/// List of all past database schema versions, in *reverse* order
2323
///
@@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy<Vec<KnownVersion>> = Lazy::new(|| {
2929
// | leaving the first copy as an example for the next person.
3030
// v
3131
// KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"),
32+
KnownVersion::new(84, "region-read-only"),
3233
KnownVersion::new(83, "dataset-address-optional"),
3334
KnownVersion::new(82, "region-port"),
3435
KnownVersion::new(81, "add-nullable-filesystem-pool"),

nexus/db-queries/src/db/datastore/region.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ use crate::db::model::Region;
1919
use crate::db::model::SqlU16;
2020
use crate::db::pagination::paginated;
2121
use crate::db::pagination::Paginator;
22+
use crate::db::queries::region_allocation::RegionParameters;
2223
use crate::db::update_and_check::UpdateAndCheck;
2324
use crate::db::update_and_check::UpdateStatus;
2425
use crate::transaction_retry::OptionalError;
@@ -259,9 +260,12 @@ impl DataStore {
259260
let query = crate::db::queries::region_allocation::allocation_query(
260261
volume_id,
261262
maybe_snapshot_id,
262-
block_size,
263-
blocks_per_extent,
264-
extent_count,
263+
RegionParameters {
264+
block_size,
265+
blocks_per_extent,
266+
extent_count,
267+
read_only: false,
268+
},
265269
allocation_strategy,
266270
num_regions_required,
267271
);

nexus/db-queries/src/db/datastore/volume.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2349,6 +2349,7 @@ mod tests {
23492349
10,
23502350
10,
23512351
10001,
2352+
false,
23522353
);
23532354

23542355
region_and_volume_ids[i].0 = region.id();

nexus/db-queries/src/db/queries/region_allocation.rs

Lines changed: 41 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,18 @@ type SelectableSql<T> = <
6767
<T as diesel::Selectable<Pg>>::SelectExpression as diesel::Expression
6868
>::SqlType;
6969

70+
/// Parameters for the region(s) being allocated
71+
#[derive(Debug, Clone, Copy)]
72+
pub struct RegionParameters {
73+
pub block_size: u64,
74+
pub blocks_per_extent: u64,
75+
pub extent_count: u64,
76+
77+
/// True if the region will be filled with a Clone operation and is meant to
78+
/// be read-only.
79+
pub read_only: bool,
80+
}
81+
7082
/// For a given volume, idempotently allocate enough regions (according to some
7183
/// allocation strategy) to meet some redundancy level. This should only be used
7284
/// for the region set that is in the top level of the Volume (not the deeper
@@ -75,9 +87,7 @@ type SelectableSql<T> = <
7587
pub fn allocation_query(
7688
volume_id: uuid::Uuid,
7789
snapshot_id: Option<uuid::Uuid>,
78-
block_size: u64,
79-
blocks_per_extent: u64,
80-
extent_count: u64,
90+
params: RegionParameters,
8191
allocation_strategy: &RegionAllocationStrategy,
8292
redundancy: usize,
8393
) -> TypedSqlQuery<(SelectableSql<Dataset>, SelectableSql<Region>)> {
@@ -104,7 +114,8 @@ pub fn allocation_query(
104114

105115
let seed = seed.to_le_bytes().to_vec();
106116

107-
let size_delta = block_size * blocks_per_extent * extent_count;
117+
let size_delta =
118+
params.block_size * params.blocks_per_extent * params.extent_count;
108119
let redundancy: i64 = i64::try_from(redundancy).unwrap();
109120

110121
let builder = QueryBuilder::new().sql(
@@ -243,7 +254,8 @@ pub fn allocation_query(
243254
").param().sql(" AS block_size,
244255
").param().sql(" AS blocks_per_extent,
245256
").param().sql(" AS extent_count,
246-
NULL AS port
257+
NULL AS port,
258+
").param().sql(" AS read_only
247259
FROM shuffled_candidate_datasets")
248260
// Only select the *additional* number of candidate regions for the required
249261
// redundancy level
@@ -253,9 +265,10 @@ pub fn allocation_query(
253265
))
254266
),")
255267
.bind::<sql_types::Uuid, _>(volume_id)
256-
.bind::<sql_types::BigInt, _>(block_size as i64)
257-
.bind::<sql_types::BigInt, _>(blocks_per_extent as i64)
258-
.bind::<sql_types::BigInt, _>(extent_count as i64)
268+
.bind::<sql_types::BigInt, _>(params.block_size as i64)
269+
.bind::<sql_types::BigInt, _>(params.blocks_per_extent as i64)
270+
.bind::<sql_types::BigInt, _>(params.extent_count as i64)
271+
.bind::<sql_types::Bool, _>(params.read_only)
259272
.bind::<sql_types::BigInt, _>(redundancy)
260273

261274
// A subquery which summarizes the changes we intend to make, showing:
@@ -355,7 +368,7 @@ pub fn allocation_query(
355368
.sql("
356369
inserted_regions AS (
357370
INSERT INTO region
358-
(id, time_created, time_modified, dataset_id, volume_id, block_size, blocks_per_extent, extent_count, port)
371+
(id, time_created, time_modified, dataset_id, volume_id, block_size, blocks_per_extent, extent_count, port, read_only)
359372
SELECT ").sql(AllColumnsOfRegion::with_prefix("candidate_regions")).sql("
360373
FROM candidate_regions
361374
WHERE
@@ -405,9 +418,12 @@ mod test {
405418
#[tokio::test]
406419
async fn expectorate_query() {
407420
let volume_id = Uuid::nil();
408-
let block_size = 512;
409-
let blocks_per_extent = 4;
410-
let extent_count = 8;
421+
let params = RegionParameters {
422+
block_size: 512,
423+
blocks_per_extent: 4,
424+
extent_count: 8,
425+
read_only: false,
426+
};
411427

412428
// Start with snapshot_id = None
413429

@@ -418,14 +434,13 @@ mod test {
418434
let region_allocate = allocation_query(
419435
volume_id,
420436
snapshot_id,
421-
block_size,
422-
blocks_per_extent,
423-
extent_count,
437+
params,
424438
&RegionAllocationStrategy::RandomWithDistinctSleds {
425439
seed: Some(1),
426440
},
427441
REGION_REDUNDANCY_THRESHOLD,
428442
);
443+
429444
expectorate_query_contents(
430445
&region_allocate,
431446
"tests/output/region_allocate_distinct_sleds.sql",
@@ -437,9 +452,7 @@ mod test {
437452
let region_allocate = allocation_query(
438453
volume_id,
439454
snapshot_id,
440-
block_size,
441-
blocks_per_extent,
442-
extent_count,
455+
params,
443456
&RegionAllocationStrategy::Random { seed: Some(1) },
444457
REGION_REDUNDANCY_THRESHOLD,
445458
);
@@ -458,9 +471,7 @@ mod test {
458471
let region_allocate = allocation_query(
459472
volume_id,
460473
snapshot_id,
461-
block_size,
462-
blocks_per_extent,
463-
extent_count,
474+
params,
464475
&RegionAllocationStrategy::RandomWithDistinctSleds {
465476
seed: Some(1),
466477
},
@@ -477,9 +488,7 @@ mod test {
477488
let region_allocate = allocation_query(
478489
volume_id,
479490
snapshot_id,
480-
block_size,
481-
blocks_per_extent,
482-
extent_count,
491+
params,
483492
&RegionAllocationStrategy::Random { seed: Some(1) },
484493
REGION_REDUNDANCY_THRESHOLD,
485494
);
@@ -502,18 +511,19 @@ mod test {
502511
let conn = pool.pool().get().await.unwrap();
503512

504513
let volume_id = Uuid::new_v4();
505-
let block_size = 512;
506-
let blocks_per_extent = 4;
507-
let extent_count = 8;
514+
let params = RegionParameters {
515+
block_size: 512,
516+
blocks_per_extent: 4,
517+
extent_count: 8,
518+
read_only: false,
519+
};
508520

509521
// First structure: Explain the query with "RandomWithDistinctSleds"
510522

511523
let region_allocate = allocation_query(
512524
volume_id,
513525
None,
514-
block_size,
515-
blocks_per_extent,
516-
extent_count,
526+
params,
517527
&RegionAllocationStrategy::RandomWithDistinctSleds { seed: None },
518528
REGION_REDUNDANCY_THRESHOLD,
519529
);
@@ -527,9 +537,7 @@ mod test {
527537
let region_allocate = allocation_query(
528538
volume_id,
529539
None,
530-
block_size,
531-
blocks_per_extent,
532-
extent_count,
540+
params,
533541
&RegionAllocationStrategy::Random { seed: None },
534542
REGION_REDUNDANCY_THRESHOLD,
535543
);

nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ WITH
1010
region.block_size,
1111
region.blocks_per_extent,
1212
region.extent_count,
13-
region.port
13+
region.port,
14+
region.read_only
1415
FROM
1516
region
1617
WHERE
@@ -99,11 +100,12 @@ WITH
99100
$8 AS block_size,
100101
$9 AS blocks_per_extent,
101102
$10 AS extent_count,
102-
NULL AS port
103+
NULL AS port,
104+
$11 AS read_only
103105
FROM
104106
shuffled_candidate_datasets
105107
LIMIT
106-
$11 - (SELECT count(*) FROM old_regions)
108+
$12 - (SELECT count(*) FROM old_regions)
107109
),
108110
proposed_dataset_changes
109111
AS (
@@ -122,7 +124,7 @@ WITH
122124
SELECT
123125
(
124126
(
125-
(SELECT count(*) FROM old_regions LIMIT 1) < $12
127+
(SELECT count(*) FROM old_regions LIMIT 1) < $13
126128
AND CAST(
127129
IF(
128130
(
@@ -132,7 +134,7 @@ WITH
132134
+ (SELECT count(*) FROM existing_zpools LIMIT 1)
133135
)
134136
)
135-
>= $13
137+
>= $14
136138
),
137139
'TRUE',
138140
'Not enough space'
@@ -149,7 +151,7 @@ WITH
149151
+ (SELECT count(*) FROM old_regions LIMIT 1)
150152
)
151153
)
152-
>= $14
154+
>= $15
153155
),
154156
'TRUE',
155157
'Not enough datasets'
@@ -185,7 +187,7 @@ WITH
185187
1
186188
)
187189
)
188-
>= $15
190+
>= $16
189191
),
190192
'TRUE',
191193
'Not enough unique zpools selected'
@@ -208,7 +210,8 @@ WITH
208210
block_size,
209211
blocks_per_extent,
210212
extent_count,
211-
port
213+
port,
214+
read_only
212215
)
213216
SELECT
214217
candidate_regions.id,
@@ -219,7 +222,8 @@ WITH
219222
candidate_regions.block_size,
220223
candidate_regions.blocks_per_extent,
221224
candidate_regions.extent_count,
222-
candidate_regions.port
225+
candidate_regions.port,
226+
candidate_regions.read_only
223227
FROM
224228
candidate_regions
225229
WHERE
@@ -233,7 +237,8 @@ WITH
233237
region.block_size,
234238
region.blocks_per_extent,
235239
region.extent_count,
236-
region.port
240+
region.port,
241+
region.read_only
237242
),
238243
updated_datasets
239244
AS (
@@ -287,7 +292,8 @@ WITH
287292
old_regions.block_size,
288293
old_regions.blocks_per_extent,
289294
old_regions.extent_count,
290-
old_regions.port
295+
old_regions.port,
296+
old_regions.read_only
291297
FROM
292298
old_regions INNER JOIN dataset ON old_regions.dataset_id = dataset.id
293299
)
@@ -312,7 +318,8 @@ UNION
312318
inserted_regions.block_size,
313319
inserted_regions.blocks_per_extent,
314320
inserted_regions.extent_count,
315-
inserted_regions.port
321+
inserted_regions.port,
322+
inserted_regions.read_only
316323
FROM
317324
inserted_regions
318325
INNER JOIN updated_datasets ON inserted_regions.dataset_id = updated_datasets.id

0 commit comments

Comments
 (0)