Skip to content

Commit 5922927

Browse files
committed
Wire O_DIRECT also to Uncached I/O
Before Direct I/O was implemented, I've implemented lighter version I called Uncached I/O. It uses normal DMU/ARC data path with some optimizations, but evicts data from caches as soon as possible and reasonable. Originally I wired it only to a primarycache property, but now completing the integration all the way up to the VFS. While Direct I/O has the lowest possible memory bandwidth usage, it also has a significant number of limitations. It require I/Os to be page aligned, does not allow speculative prefetch, etc. The Uncached I/O does not have those limitations, but instead require additional memory copy, though still one less than regular cached I/O. As such it should fill the gap in between. Considering this I've disabled annoying EINVAL errors on misaligned requests, adding a tunable for those who wants to test their applications. To pass the information between the layers I had to change a number of APIs. But as side effect upper layers can now control not only the caching, but also speculative prefetch. I haven't wired it to VFS yet, since it require looking on some OS specifics. But while there I've implemented speculative prefetch of indirect blocks for Direct I/O, controllable via all the same mechanisms. Signed-off-by: Alexander Motin <[email protected]> Sponsored by: iXsystems, Inc. Fixes #17027
1 parent c8fa39b commit 5922927

34 files changed

+392
-291
lines changed

cmd/ztest.c

+12-12
Original file line numberDiff line numberDiff line change
@@ -1993,7 +1993,8 @@ ztest_log_write(ztest_ds_t *zd, dmu_tx_t *tx, lr_write_t *lr)
19931993

19941994
if (write_state == WR_COPIED &&
19951995
dmu_read(zd->zd_os, lr->lr_foid, lr->lr_offset, lr->lr_length,
1996-
((lr_write_t *)&itx->itx_lr) + 1, DMU_READ_NO_PREFETCH) != 0) {
1996+
((lr_write_t *)&itx->itx_lr) + 1, DMU_READ_NO_PREFETCH |
1997+
DMU_KEEP_CACHING) != 0) {
19971998
zil_itx_destroy(itx);
19981999
itx = zil_itx_create(TX_WRITE, sizeof (*lr));
19992000
write_state = WR_NEED_COPY;
@@ -2265,19 +2266,19 @@ ztest_replay_write(void *arg1, void *arg2, boolean_t byteswap)
22652266
ASSERT(doi.doi_data_block_size);
22662267
ASSERT0(offset % doi.doi_data_block_size);
22672268
if (ztest_random(4) != 0) {
2268-
int prefetch = ztest_random(2) ?
2269+
dmu_flags_t flags = ztest_random(2) ?
22692270
DMU_READ_PREFETCH : DMU_READ_NO_PREFETCH;
22702271

22712272
/*
22722273
* We will randomly set when to do O_DIRECT on a read.
22732274
*/
22742275
if (ztest_random(4) == 0)
2275-
prefetch |= DMU_DIRECTIO;
2276+
flags |= DMU_DIRECTIO;
22762277

22772278
ztest_block_tag_t rbt;
22782279

22792280
VERIFY(dmu_read(os, lr->lr_foid, offset,
2280-
sizeof (rbt), &rbt, prefetch) == 0);
2281+
sizeof (rbt), &rbt, flags) == 0);
22812282
if (rbt.bt_magic == BT_MAGIC) {
22822283
ztest_bt_verify(&rbt, os, lr->lr_foid, 0,
22832284
offset, gen, txg, crtxg);
@@ -2308,7 +2309,7 @@ ztest_replay_write(void *arg1, void *arg2, boolean_t byteswap)
23082309
dmu_write(os, lr->lr_foid, offset, length, data, tx);
23092310
} else {
23102311
memcpy(abuf->b_data, data, length);
2311-
VERIFY0(dmu_assign_arcbuf_by_dbuf(db, offset, abuf, tx));
2312+
VERIFY0(dmu_assign_arcbuf_by_dbuf(db, offset, abuf, tx, 0));
23122313
}
23132314

23142315
(void) ztest_log_write(zd, tx, lr);
@@ -2533,7 +2534,7 @@ ztest_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
25332534
object, offset, size, ZTRL_READER);
25342535

25352536
error = dmu_read(os, object, offset, size, buf,
2536-
DMU_READ_NO_PREFETCH);
2537+
DMU_READ_NO_PREFETCH | DMU_KEEP_CACHING);
25372538
ASSERT0(error);
25382539
} else {
25392540
ASSERT3P(zio, !=, NULL);
@@ -2549,7 +2550,6 @@ ztest_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
25492550
object, offset, size, ZTRL_READER);
25502551

25512552
error = dmu_buf_hold_noread(os, object, offset, zgd, &db);
2552-
25532553
if (error == 0) {
25542554
blkptr_t *bp = &lr->lr_blkptr;
25552555

@@ -2826,7 +2826,7 @@ ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset)
28262826
enum ztest_io_type io_type;
28272827
uint64_t blocksize;
28282828
void *data;
2829-
uint32_t dmu_read_flags = DMU_READ_NO_PREFETCH;
2829+
dmu_flags_t dmu_read_flags = DMU_READ_NO_PREFETCH;
28302830

28312831
/*
28322832
* We will randomly set when to do O_DIRECT on a read.
@@ -5065,7 +5065,7 @@ ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id)
50655065
uint64_t stride = 123456789ULL;
50665066
uint64_t width = 40;
50675067
int free_percent = 5;
5068-
uint32_t dmu_read_flags = DMU_READ_PREFETCH;
5068+
dmu_flags_t dmu_read_flags = DMU_READ_PREFETCH;
50695069

50705070
/*
50715071
* We will randomly set when to do O_DIRECT on a read.
@@ -5541,13 +5541,13 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
55415541
}
55425542
if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) {
55435543
VERIFY0(dmu_assign_arcbuf_by_dbuf(bonus_db,
5544-
off, bigbuf_arcbufs[j], tx));
5544+
off, bigbuf_arcbufs[j], tx, 0));
55455545
} else {
55465546
VERIFY0(dmu_assign_arcbuf_by_dbuf(bonus_db,
5547-
off, bigbuf_arcbufs[2 * j], tx));
5547+
off, bigbuf_arcbufs[2 * j], tx, 0));
55485548
VERIFY0(dmu_assign_arcbuf_by_dbuf(bonus_db,
55495549
off + chunksize / 2,
5550-
bigbuf_arcbufs[2 * j + 1], tx));
5550+
bigbuf_arcbufs[2 * j + 1], tx, 0));
55515551
}
55525552
if (i == 1) {
55535553
dmu_buf_rele(dbt, FTAG);

include/sys/dbuf.h

+7-18
Original file line numberDiff line numberDiff line change
@@ -45,20 +45,6 @@ extern "C" {
4545

4646
#define IN_DMU_SYNC 2
4747

48-
/*
49-
* define flags for dbuf_read
50-
*/
51-
52-
#define DB_RF_MUST_SUCCEED (1 << 0)
53-
#define DB_RF_CANFAIL (1 << 1)
54-
#define DB_RF_HAVESTRUCT (1 << 2)
55-
#define DB_RF_NOPREFETCH (1 << 3)
56-
#define DB_RF_NEVERWAIT (1 << 4)
57-
#define DB_RF_CACHED (1 << 5)
58-
#define DB_RF_NO_DECRYPT (1 << 6)
59-
#define DB_RF_PARTIAL_FIRST (1 << 7)
60-
#define DB_RF_PARTIAL_MORE (1 << 8)
61-
6248
/*
6349
* The simplified state transition diagram for dbufs looks like:
6450
*
@@ -389,12 +375,15 @@ void dbuf_rele_and_unlock(dmu_buf_impl_t *db, const void *tag,
389375
dmu_buf_impl_t *dbuf_find(struct objset *os, uint64_t object, uint8_t level,
390376
uint64_t blkid, uint64_t *hash_out);
391377

392-
int dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags);
378+
int dbuf_read(dmu_buf_impl_t *db, zio_t *zio, dmu_flags_t flags);
393379
void dmu_buf_will_clone_or_dio(dmu_buf_t *db, dmu_tx_t *tx);
394380
void dmu_buf_will_not_fill(dmu_buf_t *db, dmu_tx_t *tx);
395381
void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx, boolean_t canfail);
382+
void dmu_buf_will_fill_flags(dmu_buf_t *db, dmu_tx_t *tx, boolean_t canfail,
383+
dmu_flags_t flags);
396384
boolean_t dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx, boolean_t failed);
397-
void dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx);
385+
void dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx,
386+
dmu_flags_t flags);
398387
dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
399388
dbuf_dirty_record_t *dbuf_dirty_lightweight(dnode_t *dn, uint64_t blkid,
400389
dmu_tx_t *tx);
@@ -476,10 +465,10 @@ dbuf_find_dirty_eq(dmu_buf_impl_t *db, uint64_t txg)
476465
#define DBUF_GET_BUFC_TYPE(_db) \
477466
(dbuf_is_metadata(_db) ? ARC_BUFC_METADATA : ARC_BUFC_DATA)
478467

479-
#define DBUF_IS_CACHEABLE(_db) \
468+
#define DBUF_IS_CACHEABLE(_db) (!(_db)->db_pending_evict && \
480469
((_db)->db_objset->os_primary_cache == ZFS_CACHE_ALL || \
481470
(dbuf_is_metadata(_db) && \
482-
((_db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA)))
471+
((_db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA))))
483472

484473
boolean_t dbuf_is_l2cacheable(dmu_buf_impl_t *db, blkptr_t *db_bp);
485474

include/sys/dmu.h

+42-25
Original file line numberDiff line numberDiff line change
@@ -532,6 +532,26 @@ void dmu_redact(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
532532
void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp,
533533
struct zio_prop *zp);
534534

535+
/*
536+
* DB_RF_* are to be used for dbuf_read() or in limited other cases.
537+
*/
538+
typedef enum dmu_flags {
539+
DB_RF_MUST_SUCCEED = 0, /* Suspend on I/O errors. */
540+
DB_RF_CANFAIL = 1 << 0, /* Return on I/O errors. */
541+
DB_RF_HAVESTRUCT = 1 << 1, /* dn_struct_rwlock is locked. */
542+
DB_RF_NEVERWAIT = 1 << 2,
543+
DMU_READ_PREFETCH = 0, /* Try speculative prefetch. */
544+
DMU_READ_NO_PREFETCH = 1 << 3, /* Don't prefetch speculatively. */
545+
DB_RF_NOPREFETCH = DMU_READ_NO_PREFETCH,
546+
DMU_READ_NO_DECRYPT = 1 << 4, /* Don't decrypt. */
547+
DB_RF_NO_DECRYPT = DMU_READ_NO_DECRYPT,
548+
DMU_DIRECTIO = 1 << 5, /* Bypass ARC. */
549+
DMU_UNCACHEDIO = 1 << 6, /* Reduce caching. */
550+
DMU_PARTIAL_FIRST = 1 << 7, /* First partial access. */
551+
DMU_PARTIAL_MORE = 1 << 8, /* Following partial access. */
552+
DMU_KEEP_CACHING = 1 << 9, /* Don't affect caching. */
553+
} dmu_flags_t;
554+
535555
/*
536556
* The bonus data is accessed more or less like a regular buffer.
537557
* You must dmu_bonus_hold() to get the buffer, which will give you a
@@ -547,7 +567,7 @@ void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp,
547567
int dmu_bonus_hold(objset_t *os, uint64_t object, const void *tag,
548568
dmu_buf_t **dbp);
549569
int dmu_bonus_hold_by_dnode(dnode_t *dn, const void *tag, dmu_buf_t **dbp,
550-
uint32_t flags);
570+
dmu_flags_t flags);
551571
int dmu_bonus_max(void);
552572
int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *);
553573
int dmu_set_bonustype(dmu_buf_t *, dmu_object_type_t, dmu_tx_t *);
@@ -558,9 +578,9 @@ int dmu_rm_spill(objset_t *, uint64_t, dmu_tx_t *);
558578
* Special spill buffer support used by "SA" framework
559579
*/
560580

561-
int dmu_spill_hold_by_bonus(dmu_buf_t *bonus, uint32_t flags, const void *tag,
562-
dmu_buf_t **dbp);
563-
int dmu_spill_hold_by_dnode(dnode_t *dn, uint32_t flags,
581+
int dmu_spill_hold_by_bonus(dmu_buf_t *bonus, dmu_flags_t flags,
582+
const void *tag, dmu_buf_t **dbp);
583+
int dmu_spill_hold_by_dnode(dnode_t *dn, dmu_flags_t flags,
564584
const void *tag, dmu_buf_t **dbp);
565585
int dmu_spill_hold_existing(dmu_buf_t *bonus, const void *tag, dmu_buf_t **dbp);
566586

@@ -579,17 +599,17 @@ int dmu_spill_hold_existing(dmu_buf_t *bonus, const void *tag, dmu_buf_t **dbp);
579599
* The object number must be a valid, allocated object number.
580600
*/
581601
int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
582-
const void *tag, dmu_buf_t **, int flags);
602+
const void *tag, dmu_buf_t **, dmu_flags_t flags);
583603
int dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset,
584604
uint64_t length, int read, const void *tag, int *numbufsp,
585605
dmu_buf_t ***dbpp);
586606
int dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset,
587607
const void *tag, dmu_buf_t **dbp);
588608
int dmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset,
589-
const void *tag, dmu_buf_t **dbp, int flags);
609+
const void *tag, dmu_buf_t **dbp, dmu_flags_t flags);
590610
int dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset,
591611
uint64_t length, boolean_t read, const void *tag, int *numbufsp,
592-
dmu_buf_t ***dbpp, uint32_t flags);
612+
dmu_buf_t ***dbpp, dmu_flags_t flags);
593613
int dmu_buf_hold_noread_by_dnode(dnode_t *dn, uint64_t offset, const void *tag,
594614
dmu_buf_t **dbp);
595615

@@ -781,6 +801,7 @@ struct blkptr *dmu_buf_get_blkptr(dmu_buf_t *db);
781801
* (ie. you've called dmu_tx_hold_object(tx, db->db_object)).
782802
*/
783803
void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx);
804+
void dmu_buf_will_dirty_flags(dmu_buf_t *db, dmu_tx_t *tx, dmu_flags_t flags);
784805
boolean_t dmu_buf_is_dirty(dmu_buf_t *db, dmu_tx_t *tx);
785806
void dmu_buf_set_crypt_params(dmu_buf_t *db_fake, boolean_t byteorder,
786807
const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx);
@@ -874,40 +895,36 @@ int dmu_free_long_object(objset_t *os, uint64_t object);
874895
* Canfail routines will return 0 on success, or an errno if there is a
875896
* nonrecoverable I/O error.
876897
*/
877-
#define DMU_READ_PREFETCH 0 /* prefetch */
878-
#define DMU_READ_NO_PREFETCH 1 /* don't prefetch */
879-
#define DMU_READ_NO_DECRYPT 2 /* don't decrypt */
880-
#define DMU_DIRECTIO 4 /* use Direct I/O */
881-
882898
int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
883-
void *buf, uint32_t flags);
899+
void *buf, dmu_flags_t flags);
884900
int dmu_read_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, void *buf,
885-
uint32_t flags);
901+
dmu_flags_t flags);
886902
void dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
887903
const void *buf, dmu_tx_t *tx);
888904
int dmu_write_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size,
889-
const void *buf, dmu_tx_t *tx);
890-
int dmu_write_by_dnode_flags(dnode_t *dn, uint64_t offset, uint64_t size,
891-
const void *buf, dmu_tx_t *tx, uint32_t flags);
905+
const void *buf, dmu_tx_t *tx, dmu_flags_t flags);
892906
void dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
893907
dmu_tx_t *tx);
894908
#ifdef _KERNEL
895-
int dmu_read_uio(objset_t *os, uint64_t object, zfs_uio_t *uio, uint64_t size);
896-
int dmu_read_uio_dbuf(dmu_buf_t *zdb, zfs_uio_t *uio, uint64_t size);
897-
int dmu_read_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size);
909+
int dmu_read_uio(objset_t *os, uint64_t object, zfs_uio_t *uio, uint64_t size,
910+
dmu_flags_t flags);
911+
int dmu_read_uio_dbuf(dmu_buf_t *zdb, zfs_uio_t *uio, uint64_t size,
912+
dmu_flags_t flags);
913+
int dmu_read_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size,
914+
dmu_flags_t flags);
898915
int dmu_write_uio(objset_t *os, uint64_t object, zfs_uio_t *uio, uint64_t size,
899-
dmu_tx_t *tx);
916+
dmu_tx_t *tx, dmu_flags_t flags);
900917
int dmu_write_uio_dbuf(dmu_buf_t *zdb, zfs_uio_t *uio, uint64_t size,
901-
dmu_tx_t *tx);
918+
dmu_tx_t *tx, dmu_flags_t flags);
902919
int dmu_write_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size,
903-
dmu_tx_t *tx);
920+
dmu_tx_t *tx, dmu_flags_t flags);
904921
#endif
905922
struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size);
906923
void dmu_return_arcbuf(struct arc_buf *buf);
907924
int dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset,
908-
struct arc_buf *buf, dmu_tx_t *tx);
925+
struct arc_buf *buf, dmu_tx_t *tx, dmu_flags_t flags);
909926
int dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset,
910-
struct arc_buf *buf, dmu_tx_t *tx);
927+
struct arc_buf *buf, dmu_tx_t *tx, dmu_flags_t flags);
911928
#define dmu_assign_arcbuf dmu_assign_arcbuf_by_dbuf
912929
extern uint_t zfs_max_recordsize;
913930

include/sys/dmu_impl.h

+6-4
Original file line numberDiff line numberDiff line change
@@ -270,11 +270,13 @@ void dmu_object_zapify(objset_t *, uint64_t, dmu_object_type_t, dmu_tx_t *);
270270
void dmu_object_free_zapified(objset_t *, uint64_t, dmu_tx_t *);
271271

272272
int dmu_write_direct(zio_t *, dmu_buf_impl_t *, abd_t *, dmu_tx_t *);
273-
int dmu_read_abd(dnode_t *, uint64_t, uint64_t, abd_t *, uint32_t flags);
274-
int dmu_write_abd(dnode_t *, uint64_t, uint64_t, abd_t *, uint32_t, dmu_tx_t *);
273+
int dmu_read_abd(dnode_t *, uint64_t, uint64_t, abd_t *, dmu_flags_t);
274+
int dmu_write_abd(dnode_t *, uint64_t, uint64_t, abd_t *, dmu_flags_t,
275+
dmu_tx_t *);
275276
#if defined(_KERNEL)
276-
int dmu_read_uio_direct(dnode_t *, zfs_uio_t *, uint64_t);
277-
int dmu_write_uio_direct(dnode_t *, zfs_uio_t *, uint64_t, dmu_tx_t *);
277+
int dmu_read_uio_direct(dnode_t *, zfs_uio_t *, uint64_t, dmu_flags_t);
278+
int dmu_write_uio_direct(dnode_t *, zfs_uio_t *, uint64_t, dmu_flags_t,
279+
dmu_tx_t *);
278280
#endif
279281

280282
#ifdef __cplusplus

include/sys/dmu_zfetch.h

+3-2
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,10 @@ void dmu_zfetch_init(zfetch_t *, struct dnode *);
8181
void dmu_zfetch_fini(zfetch_t *);
8282
zstream_t *dmu_zfetch_prepare(zfetch_t *, uint64_t, uint64_t, boolean_t,
8383
boolean_t);
84-
void dmu_zfetch_run(zfetch_t *, zstream_t *, boolean_t, boolean_t);
85-
void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, boolean_t, boolean_t,
84+
void dmu_zfetch_run(zfetch_t *, zstream_t *, boolean_t, boolean_t,
8685
boolean_t);
86+
void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, boolean_t, boolean_t,
87+
boolean_t, boolean_t);
8788

8889

8990
#ifdef __cplusplus

include/sys/spa.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -981,9 +981,9 @@ extern void spa_iostats_trim_add(spa_t *spa, trim_type_t type,
981981
uint64_t extents_skipped, uint64_t bytes_skipped,
982982
uint64_t extents_failed, uint64_t bytes_failed);
983983
extern void spa_iostats_read_add(spa_t *spa, uint64_t size, uint64_t iops,
984-
uint32_t flags);
984+
dmu_flags_t flags);
985985
extern void spa_iostats_write_add(spa_t *spa, uint64_t size, uint64_t iops,
986-
uint32_t flags);
986+
dmu_flags_t flags);
987987
extern void spa_import_progress_add(spa_t *spa);
988988
extern void spa_import_progress_remove(uint64_t spa_guid);
989989
extern int spa_import_progress_set_mmp_check(uint64_t pool_guid,

include/sys/zfs_racct.h

+4-2
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@
3333
/*
3434
* Platform-dependent resource accounting hooks
3535
*/
36-
void zfs_racct_read(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags);
37-
void zfs_racct_write(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags);
36+
void zfs_racct_read(spa_t *spa, uint64_t size, uint64_t iops,
37+
dmu_flags_t flags);
38+
void zfs_racct_write(spa_t *spa, uint64_t size, uint64_t iops,
39+
dmu_flags_t flags);
3840

3941
#endif /* _SYS_ZFS_RACCT_H */

lib/libzpool/zfs_racct.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,13 @@
2727
#include <sys/zfs_racct.h>
2828

2929
void
30-
zfs_racct_read(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags)
30+
zfs_racct_read(spa_t *spa, uint64_t size, uint64_t iops, dmu_flags_t flags)
3131
{
3232
(void) spa, (void) size, (void) iops, (void) flags;
3333
}
3434

3535
void
36-
zfs_racct_write(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags)
36+
zfs_racct_write(spa_t *spa, uint64_t size, uint64_t iops, dmu_flags_t flags)
3737
{
3838
(void) spa, (void) size, (void) iops, (void) flags;
3939
}

man/man4/zfs.4

+6-1
Original file line numberDiff line numberDiff line change
@@ -304,14 +304,19 @@ Default dnode block size as a power of 2.
304304
.It Sy zfs_default_ibs Ns = Ns Sy 17 Po 128 KiB Pc Pq int
305305
Default dnode indirect block size as a power of 2.
306306
.
307-
.It Sy zfs_dio_enabled Ns = Ns Sy 0 Ns | Ns 1 Pq int
307+
.It Sy zfs_dio_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
308308
Enable Direct I/O.
309309
If this setting is 0, then all I/O requests will be directed through the ARC
310310
acting as though the dataset property
311311
.Sy direct
312312
was set to
313313
.Sy disabled .
314314
.
315+
.It Sy zfs_dio_strict Ns = Ns Sy 0 Ns | Ns 1 Pq int
316+
Strictly enforce alignment for Direct I/O requests, returning
317+
.Sy EINVAL
318+
if not page-aligned instead of silently falling back to uncached I/O.
319+
.
315320
.It Sy zfs_history_output_max Ns = Ns Sy 1048576 Ns B Po 1 MiB Pc Pq u64
316321
When attempting to log an output nvlist of an ioctl in the on-disk history,
317322
the output will not be stored if it is larger than this size (in bytes).

0 commit comments

Comments
 (0)