Skip to content

Commit 4fe393c

Browse files
author
Paul Dagnelie
committed
Implement dynamic gang header sizes
ZFS gang block headers are currently fixed at 512 bytes. This is increasingly wasteful in the era of larger disk sector sizes. This PR allows any size allocation to work as a gang header. It also contains supporting changes to ZDB to make gang headers easier to work with. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Signed-off-by: Paul Dagnelie <[email protected]>
1 parent 5605513 commit 4fe393c

24 files changed

+4700
-10581
lines changed

cmd/zdb/zdb.c

+3-3
Original file line numberDiff line numberDiff line change
@@ -8588,9 +8588,9 @@ zdb_dump_indirect(blkptr_t *bp, int nbps, int flags)
85888588
}
85898589

85908590
static void
8591-
zdb_dump_gbh(void *buf, int flags)
8591+
zdb_dump_gbh(void *buf, uint64_t size, int flags)
85928592
{
8593-
zdb_dump_indirect((blkptr_t *)buf, SPA_GBH_NBLKPTRS, flags);
8593+
zdb_dump_indirect((blkptr_t *)buf, gbh_nblkptrs(size), flags);
85948594
}
85958595

85968596
static void
@@ -9073,7 +9073,7 @@ zdb_read_block(char *thing, spa_t *spa)
90739073
zdb_dump_indirect((blkptr_t *)buf,
90749074
orig_lsize / sizeof (blkptr_t), flags);
90759075
else if (flags & ZDB_FLAG_GBH)
9076-
zdb_dump_gbh(buf, flags);
9076+
zdb_dump_gbh(buf, lsize, flags);
90779077
else
90789078
zdb_dump_block(thing, buf, lsize, flags);
90799079

include/sys/vdev.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ extern uint64_t vdev_psize_to_asize(vdev_t *vd, uint64_t psize);
148148
static inline uint64_t
149149
vdev_gang_header_asize(vdev_t *vd)
150150
{
151-
return (vdev_psize_to_asize_txg(vd, SPA_GANGBLOCKSIZE, 0));
151+
return (vdev_psize_to_asize_txg(vd, SPA_OLD_GANGBLOCKSIZE, 0));
152152
}
153153

154154
extern int vdev_fault(spa_t *spa, uint64_t guid, vdev_aux_t aux);

include/sys/zio.h

+36-15
Original file line numberDiff line numberDiff line change
@@ -61,19 +61,38 @@ typedef struct zio_eck {
6161
* Gang block headers are self-checksumming and contain an array
6262
* of block pointers.
6363
*/
64-
#define SPA_GANGBLOCKSIZE SPA_MINBLOCKSIZE
65-
#define SPA_GBH_NBLKPTRS ((SPA_GANGBLOCKSIZE - \
66-
sizeof (zio_eck_t)) / sizeof (blkptr_t))
67-
#define SPA_GBH_FILLER ((SPA_GANGBLOCKSIZE - \
68-
sizeof (zio_eck_t) - \
69-
(SPA_GBH_NBLKPTRS * sizeof (blkptr_t))) /\
70-
sizeof (uint64_t))
71-
72-
typedef struct zio_gbh {
73-
blkptr_t zg_blkptr[SPA_GBH_NBLKPTRS];
74-
uint64_t zg_filler[SPA_GBH_FILLER];
75-
zio_eck_t zg_tail;
76-
} zio_gbh_phys_t;
64+
65+
typedef enum zio_gb_version {
66+
ZIO_GB_OLD = 0,
67+
ZIO_GB_SIZED,
68+
ZIO_GB_VERSIONS,
69+
} zio_gb_version_t;
70+
71+
typedef struct zio_gb_tail {
72+
uint64_t zgt_version; /* gang block type */
73+
zio_eck_t zgt_eck; /* embedded checksum */
74+
} zio_gb_tail_t;
75+
76+
#define SPA_OLD_GANGBLOCKSIZE SPA_MINBLOCKSIZE
77+
78+
static inline uint64_t
79+
gbh_nblkptrs(uint64_t size) {
80+
return ((size - sizeof (zio_gb_tail_t)) /
81+
sizeof (blkptr_t));
82+
}
83+
84+
static inline uint64_t
85+
gbh_filler(uint64_t size) {
86+
return ((size - sizeof (zio_gb_tail_t) -
87+
(gbh_nblkptrs(size) * sizeof (blkptr_t))) /
88+
sizeof (uint64_t));
89+
}
90+
91+
static inline zio_gb_tail_t *
92+
gbh_tail(void *gbh, uint64_t size) {
93+
return ((zio_gb_tail_t *)((uintptr_t)gbh + size -
94+
sizeof (zio_gb_tail_t)));
95+
}
7796

7897
enum zio_checksum {
7998
ZIO_CHECKSUM_INHERIT = 0,
@@ -398,8 +417,10 @@ typedef struct zio_vsd_ops {
398417
} zio_vsd_ops_t;
399418

400419
typedef struct zio_gang_node {
401-
zio_gbh_phys_t *gn_gbh;
402-
struct zio_gang_node *gn_child[SPA_GBH_NBLKPTRS];
420+
void *gn_gbh;
421+
uint64_t gn_gangblocksize;
422+
uint64_t gn_orig_gangblocksize;
423+
struct zio_gang_node *gn_child[];
403424
} zio_gang_node_t;
404425

405426
typedef zio_t *zio_gang_issue_func_t(zio_t *zio, blkptr_t *bp,

include/zfeature_common.h

+9-1
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ typedef enum spa_feature {
8787
SPA_FEATURE_FAST_DEDUP,
8888
SPA_FEATURE_LONGNAME,
8989
SPA_FEATURE_LARGE_MICROZAP,
90+
SPA_FEATURE_DYNAMIC_GANG_HEADER,
9091
SPA_FEATURES
9192
} spa_feature_t;
9293

@@ -103,7 +104,14 @@ typedef enum zfeature_flags {
103104
/* Activate this feature at the same time it is enabled. */
104105
ZFEATURE_FLAG_ACTIVATE_ON_ENABLE = (1 << 2),
105106
/* Each dataset has a field set if it has ever used this feature. */
106-
ZFEATURE_FLAG_PER_DATASET = (1 << 3)
107+
ZFEATURE_FLAG_PER_DATASET = (1 << 3),
108+
/*
109+
* This feature isn't enabled by zpool upgrade; it must be explicitly
110+
* listed to be enabled. This also applies to compatibility lists. This
111+
* flag can be removed from a given feature once support is sufficiently
112+
* widespread.
113+
*/
114+
ZFEATURE_FLAG_NO_UPGRADE = (1 << 4)
107115
} zfeature_flags_t;
108116

109117
typedef enum zfeature_type {

lib/libnvpair/libnvpair.abi

+243-854
Large diffs are not rendered by default.

lib/libuutil/libuutil.abi

+250-817
Large diffs are not rendered by default.

lib/libzfs/libzfs.abi

+3,307-7,118
Large diffs are not rendered by default.

lib/libzfs/libzfs_pool.c

+7-3
Original file line numberDiff line numberDiff line change
@@ -5122,9 +5122,13 @@ zpool_load_compat(const char *compat, boolean_t *features, char *report,
51225122
/* special cases (unset), "" and "off" => enable all features */
51235123
if (compat == NULL || compat[0] == '\0' ||
51245124
strcmp(compat, ZPOOL_COMPAT_OFF) == 0) {
5125-
if (features != NULL)
5126-
for (uint_t i = 0; i < SPA_FEATURES; i++)
5127-
features[i] = B_TRUE;
5125+
if (features != NULL) {
5126+
for (uint_t i = 0; i < SPA_FEATURES; i++) {
5127+
if (!(spa_feature_table[i].fi_flags &
5128+
ZFEATURE_FLAG_NO_UPGRADE))
5129+
features[i] = B_TRUE;
5130+
}
5131+
}
51285132
if (report != NULL)
51295133
strlcpy(report, gettext("all features enabled"), rlen);
51305134
return (ZPOOL_COMPATIBILITY_OK);

0 commit comments

Comments
 (0)