Skip to content

Commit d181e65

Browse files
committed
cred: properly pass and test creds on other threads
### Background Various admin operations will be invoked by some userspace task, but the work will be done on a separate kernel thread at a later time. Snapshots are an example, which are triggered through zfs_ioc_snapshot() -> dsl_dataset_snapshot(), but the actual work is from a task dispatched to dp_sync_taskq. Many such tasks end up in dsl_enforce_ds_ss_limits(), where various limits and permissions are enforced. Among other things, it is necessary to ensure that the invoking task (that is, the user) has permission to do things. We can't simply check if the running task has permission; it is a privileged kernel thread, which can do anything. However, in the general case it's not safe to simply query the task for its permissions at the check time, as the task may not exist any more, or its permissions may have changed since it was first invoked. So instead, we capture the permissions by saving CRED() in the user task, and then using it for the check through the secpolicy_* functions. ### Current implementation The current code calls CRED() to get the credential, which gets a pointer to the cred_t inside the current task and passes it to the worker task. However, it doesn't take a reference to the cred_t, and so expects that it won't change, and that the task continues to exist. In practice that is always the case, because we don't let the calling task return from the kernel until the work is done. For Linux, we also take a reference to the current task, because the Linux credential APIs for the most part do not check an arbitrary credential, but rather, query what a task can do. See secpolicy_zfs_proc(). Again, we don't take a reference on the task, just a pointer to it. ### Changes We change to calling crhold() on the task credential, and crfree() when we're done with it. This ensures it stays alive and unchanged for the duration of the call. On the Linux side, we change the main policy checking function priv_policy_ns() to use override_creds()/revert_creds() if necessary to make the provided credential active in the current task, allowing the standard task-permission APIs to do the needed check. Since the task pointer is no longer required, this lets us entirely remove secpolicy_zfs_proc() and the need to carry a task pointer around as well. Sponsored-by: https://despairlabs.com/sponsor/ Signed-off-by: Rob Norris <[email protected]>
1 parent 63de2d2 commit d181e65

File tree

16 files changed

+116
-112
lines changed

16 files changed

+116
-112
lines changed

include/os/freebsd/spl/sys/policy.h

-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ struct znode;
3939

4040
int secpolicy_nfs(cred_t *cr);
4141
int secpolicy_zfs(cred_t *crd);
42-
int secpolicy_zfs_proc(cred_t *cr, proc_t *proc);
4342
int secpolicy_sys_config(cred_t *cr, int checkonly);
4443
int secpolicy_zinject(cred_t *cr);
4544
int secpolicy_fs_unmount(cred_t *cr, struct mount *vfsp);

include/os/linux/zfs/sys/policy.h

-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ int secpolicy_vnode_setids_setgids(const cred_t *, gid_t, zidmap_t *,
5252
struct user_namespace *);
5353
int secpolicy_zinject(const cred_t *);
5454
int secpolicy_zfs(const cred_t *);
55-
int secpolicy_zfs_proc(const cred_t *, proc_t *);
5655
void secpolicy_setid_clear(vattr_t *, cred_t *);
5756
int secpolicy_setid_setsticky_clear(struct inode *, vattr_t *,
5857
const vattr_t *, cred_t *, zidmap_t *, struct user_namespace *);

include/sys/dmu_recv.h

-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@ typedef struct dmu_recv_cookie {
6060
uint64_t drc_ivset_guid;
6161
void *drc_owner;
6262
cred_t *drc_cred;
63-
proc_t *drc_proc;
6463
nvlist_t *drc_begin_nvl;
6564

6665
objset_t *drc_os;

include/sys/dsl_dataset.h

+1-3
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,6 @@ typedef struct dsl_dataset_promote_arg {
284284
uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap;
285285
nvlist_t *err_ds;
286286
cred_t *cr;
287-
proc_t *proc;
288287
} dsl_dataset_promote_arg_t;
289288

290289
typedef struct dsl_dataset_rollback_arg {
@@ -299,7 +298,6 @@ typedef struct dsl_dataset_snapshot_arg {
299298
nvlist_t *ddsa_props;
300299
nvlist_t *ddsa_errors;
301300
cred_t *ddsa_cr;
302-
proc_t *ddsa_proc;
303301
} dsl_dataset_snapshot_arg_t;
304302

305303
typedef struct dsl_dataset_rename_snapshot_arg {
@@ -459,7 +457,7 @@ int dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
459457
void dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
460458
dsl_dataset_t *origin_head, dmu_tx_t *tx);
461459
int dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname,
462-
dmu_tx_t *tx, boolean_t recv, uint64_t cnt, cred_t *cr, proc_t *proc);
460+
dmu_tx_t *tx, boolean_t recv, uint64_t cnt, cred_t *cr);
463461
void dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname,
464462
dmu_tx_t *tx);
465463

include/sys/dsl_dir.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -185,11 +185,11 @@ int dsl_dir_set_reservation(const char *ddname, zprop_source_t source,
185185
uint64_t reservation);
186186
int dsl_dir_activate_fs_ss_limit(const char *);
187187
int dsl_fs_ss_limit_check(dsl_dir_t *, uint64_t, zfs_prop_t, dsl_dir_t *,
188-
cred_t *, proc_t *);
188+
cred_t *);
189189
void dsl_fs_ss_count_adjust(dsl_dir_t *, int64_t, const char *, dmu_tx_t *);
190190
int dsl_dir_rename(const char *oldname, const char *newname);
191191
int dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd,
192-
uint64_t fs_cnt, uint64_t ss_cnt, uint64_t space, cred_t *, proc_t *);
192+
uint64_t fs_cnt, uint64_t ss_cnt, uint64_t space, cred_t *);
193193
boolean_t dsl_dir_is_clone(dsl_dir_t *dd);
194194
void dsl_dir_new_refreservation(dsl_dir_t *dd, struct dsl_dataset *ds,
195195
uint64_t reservation, cred_t *cr, dmu_tx_t *tx);

include/sys/zcp.h

-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,6 @@ typedef struct zcp_run_info {
7676
* rather than the 'current' thread's.
7777
*/
7878
cred_t *zri_cred;
79-
proc_t *zri_proc;
8079

8180
/*
8281
* The tx in which this channel program is running.

include/sys/zfs_context.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -632,6 +632,9 @@ extern void delay(clock_t ticks);
632632
#define kcred NULL
633633
#define CRED() NULL
634634

635+
#define crhold(cr) ((void)cr)
636+
#define crfree(cr) ((void)cr)
637+
635638
#define ptob(x) ((x) * PAGESIZE)
636639

637640
#define NN_DIVISOR_1000 (1U << 0)
@@ -744,7 +747,6 @@ extern int zfs_secpolicy_rename_perms(const char *from, const char *to,
744747
cred_t *cr);
745748
extern int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr);
746749
extern int secpolicy_zfs(const cred_t *cr);
747-
extern int secpolicy_zfs_proc(const cred_t *cr, proc_t *proc);
748750
extern zoneid_t getzoneid(void);
749751

750752
/* SID stuff */

lib/libzpool/kernel.c

-7
Original file line numberDiff line numberDiff line change
@@ -918,13 +918,6 @@ secpolicy_zfs(const cred_t *cr)
918918
return (0);
919919
}
920920

921-
int
922-
secpolicy_zfs_proc(const cred_t *cr, proc_t *proc)
923-
{
924-
(void) cr, (void) proc;
925-
return (0);
926-
}
927-
928921
ksiddomain_t *
929922
ksid_lookupdomain(const char *dom)
930923
{

module/os/freebsd/spl/spl_policy.c

-7
Original file line numberDiff line numberDiff line change
@@ -52,13 +52,6 @@ secpolicy_zfs(cred_t *cr)
5252
return (priv_check_cred(cr, PRIV_VFS_MOUNT));
5353
}
5454

55-
int
56-
secpolicy_zfs_proc(cred_t *cr, proc_t *proc)
57-
{
58-
59-
return (priv_check_cred(cr, PRIV_VFS_MOUNT));
60-
}
61-
6255
int
6356
secpolicy_sys_config(cred_t *cr, int checkonly __unused)
6457
{

module/os/linux/zfs/policy.c

+18-26
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
2525
* Copyright 2013, Joyent, Inc. All rights reserved.
2626
* Copyright (C) 2016 Lawrence Livermore National Security, LLC.
27+
* Copyright (c) 2025, Rob Norris <[email protected]>
2728
*
2829
* For Linux the vast majority of this enforcement is already handled via
2930
* the standard Linux VFS permission checks. However certain administrative
@@ -35,28 +36,32 @@
3536
#include <linux/security.h>
3637
#include <linux/vfs_compat.h>
3738

38-
/*
39-
* The passed credentials cannot be directly verified because Linux only
40-
* provides and interface to check the *current* process credentials. In
41-
* order to handle this the capable() test is only run when the passed
42-
* credentials match the current process credentials or the kcred. In
43-
* all other cases this function must fail and return the passed err.
44-
*/
4539
static int
4640
priv_policy_ns(const cred_t *cr, int capability, int err,
4741
struct user_namespace *ns)
4842
{
49-
if (cr != CRED() && (cr != kcred))
50-
return (err);
43+
/*
44+
* The passed credentials cannot be directly verified because Linux
45+
* only provides an interface to check the *current* process
46+
* credentials. In order to handle this we check if the passed in
47+
* creds match the current process credentials or the kcred. If not,
48+
* we swap the passed credentials into the current task, perform the
49+
* check, and then revert it before returning.
50+
*/
51+
const cred_t *old =
52+
(cr != CRED() && cr != kcred) ? override_creds(cr) : NULL;
5153

5254
#if defined(CONFIG_USER_NS)
53-
if (!(ns ? ns_capable(ns, capability) : capable(capability)))
55+
if (ns ? ns_capable(ns, capability) : capable(capability))
5456
#else
55-
if (!capable(capability))
57+
if (capable(capability))
5658
#endif
57-
return (err);
59+
err = 0;
5860

59-
return (0);
61+
if (old)
62+
revert_creds(old);
63+
64+
return (err);
6065
}
6166

6267
static int
@@ -249,19 +254,6 @@ secpolicy_zfs(const cred_t *cr)
249254
return (priv_policy(cr, CAP_SYS_ADMIN, EACCES));
250255
}
251256

252-
/*
253-
* Equivalent to secpolicy_zfs(), but works even if the cred_t is not that of
254-
* the current process. Takes both cred_t and proc_t so that this can work
255-
* easily on all platforms.
256-
*/
257-
int
258-
secpolicy_zfs_proc(const cred_t *cr, proc_t *proc)
259-
{
260-
if (!has_capability(proc, CAP_SYS_ADMIN))
261-
return (EACCES);
262-
return (0);
263-
}
264-
265257
void
266258
secpolicy_setid_clear(vattr_t *vap, cred_t *cr)
267259
{

module/zfs/dmu_objset.c

+17-8
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
* Copyright (c) 2019, Klara Inc.
3535
* Copyright (c) 2019, Allan Jude
3636
* Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
37+
* Copyright (c) 2025, Rob Norris <[email protected]>
3738
*/
3839

3940
/* Portions Copyright 2010 Robert Milkowski */
@@ -68,6 +69,7 @@
6869
#include <sys/vdev_impl.h>
6970
#include <sys/arc.h>
7071
#include <cityhash.h>
72+
#include <sys/cred.h>
7173

7274
/*
7375
* Needed to close a window in dnode_move() that allows the objset to be freed
@@ -1179,7 +1181,6 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
11791181
typedef struct dmu_objset_create_arg {
11801182
const char *doca_name;
11811183
cred_t *doca_cred;
1182-
proc_t *doca_proc;
11831184
void (*doca_userfunc)(objset_t *os, void *arg,
11841185
cred_t *cr, dmu_tx_t *tx);
11851186
void *doca_userarg;
@@ -1223,7 +1224,7 @@ dmu_objset_create_check(void *arg, dmu_tx_t *tx)
12231224
}
12241225

12251226
error = dsl_fs_ss_limit_check(pdd, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL,
1226-
doca->doca_cred, doca->doca_proc);
1227+
doca->doca_cred);
12271228
if (error != 0) {
12281229
dsl_dir_rele(pdd, FTAG);
12291230
return (error);
@@ -1350,9 +1351,11 @@ dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
13501351
dmu_objset_create_arg_t doca;
13511352
dsl_crypto_params_t tmp_dcp = { 0 };
13521353

1354+
cred_t *cr = CRED();
1355+
crhold(cr);
1356+
13531357
doca.doca_name = name;
1354-
doca.doca_cred = CRED();
1355-
doca.doca_proc = curproc;
1358+
doca.doca_cred = cr;
13561359
doca.doca_flags = flags;
13571360
doca.doca_userfunc = func;
13581361
doca.doca_userarg = arg;
@@ -1374,14 +1377,16 @@ dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
13741377

13751378
if (rv == 0)
13761379
zvol_create_minor(name);
1380+
1381+
crfree(cr);
1382+
13771383
return (rv);
13781384
}
13791385

13801386
typedef struct dmu_objset_clone_arg {
13811387
const char *doca_clone;
13821388
const char *doca_origin;
13831389
cred_t *doca_cred;
1384-
proc_t *doca_proc;
13851390
} dmu_objset_clone_arg_t;
13861391

13871392
static int
@@ -1409,7 +1414,7 @@ dmu_objset_clone_check(void *arg, dmu_tx_t *tx)
14091414
}
14101415

14111416
error = dsl_fs_ss_limit_check(pdd, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL,
1412-
doca->doca_cred, doca->doca_proc);
1417+
doca->doca_cred);
14131418
if (error != 0) {
14141419
dsl_dir_rele(pdd, FTAG);
14151420
return (SET_ERROR(EDQUOT));
@@ -1465,10 +1470,12 @@ dmu_objset_clone(const char *clone, const char *origin)
14651470
{
14661471
dmu_objset_clone_arg_t doca;
14671472

1473+
cred_t *cr = CRED();
1474+
crhold(cr);
1475+
14681476
doca.doca_clone = clone;
14691477
doca.doca_origin = origin;
1470-
doca.doca_cred = CRED();
1471-
doca.doca_proc = curproc;
1478+
doca.doca_cred = cr;
14721479

14731480
int rv = dsl_sync_task(clone,
14741481
dmu_objset_clone_check, dmu_objset_clone_sync, &doca,
@@ -1477,6 +1484,8 @@ dmu_objset_clone(const char *clone, const char *origin)
14771484
if (rv == 0)
14781485
zvol_create_minor(clone);
14791486

1487+
crfree(cr);
1488+
14801489
return (rv);
14811490
}
14821491

0 commit comments

Comments
 (0)