Skip to content

Commit d9da7a6

Browse files
chucklevergregkh
authored andcommitted
libfs: Use d_children list to iterate simple_offset directories
commit b9b588f upstream. The mtree mechanism has been effective at creating directory offsets that are stable over multiple opendir instances. However, it has not been able to handle the subtleties of renames that are concurrent with readdir. Instead of using the mtree to emit entries in the order of their offset values, use it only to map incoming ctx->pos to a starting entry. Then use the directory's d_children list, which is already maintained properly by the dcache, to find the next child to emit. One of the sneaky things about this is that when the mtree-allocated offset value wraps (which is very rare), looking up ctx->pos++ is not going to find the next entry; it will return NULL. Instead, by following the d_children list, the offset values can appear in any order but all of the entries in the directory will be visited eventually. Note also that the readdir() is guaranteed to reach the tail of this list. Entries are added only at the head of d_children, and readdir walks from its current position in that list towards its tail. Signed-off-by: Chuck Lever <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Christian Brauner <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent f8ba998 commit d9da7a6

File tree

1 file changed

+58
-26
lines changed

1 file changed

+58
-26
lines changed

fs/libfs.c

+58-26
Original file line numberDiff line numberDiff line change
@@ -247,12 +247,13 @@ EXPORT_SYMBOL(simple_dir_inode_operations);
247247

248248
/* simple_offset_add() never assigns these to a dentry */
249249
enum {
250+
DIR_OFFSET_FIRST = 2, /* Find first real entry */
250251
DIR_OFFSET_EOD = S32_MAX,
251252
};
252253

253254
/* simple_offset_add() allocation range */
254255
enum {
255-
DIR_OFFSET_MIN = 2,
256+
DIR_OFFSET_MIN = DIR_OFFSET_FIRST + 1,
256257
DIR_OFFSET_MAX = DIR_OFFSET_EOD - 1,
257258
};
258259

@@ -457,51 +458,82 @@ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
457458
return vfs_setpos(file, offset, LONG_MAX);
458459
}
459460

460-
static struct dentry *offset_find_next(struct offset_ctx *octx, loff_t offset)
461+
static struct dentry *find_positive_dentry(struct dentry *parent,
462+
struct dentry *dentry,
463+
bool next)
461464
{
462-
MA_STATE(mas, &octx->mt, offset, offset);
465+
struct dentry *found = NULL;
466+
467+
spin_lock(&parent->d_lock);
468+
if (next)
469+
dentry = d_next_sibling(dentry);
470+
else if (!dentry)
471+
dentry = d_first_child(parent);
472+
hlist_for_each_entry_from(dentry, d_sib) {
473+
if (!simple_positive(dentry))
474+
continue;
475+
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
476+
if (simple_positive(dentry))
477+
found = dget_dlock(dentry);
478+
spin_unlock(&dentry->d_lock);
479+
if (likely(found))
480+
break;
481+
}
482+
spin_unlock(&parent->d_lock);
483+
return found;
484+
}
485+
486+
static noinline_for_stack struct dentry *
487+
offset_dir_lookup(struct dentry *parent, loff_t offset)
488+
{
489+
struct inode *inode = d_inode(parent);
490+
struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode);
463491
struct dentry *child, *found = NULL;
464492

465-
rcu_read_lock();
466-
child = mas_find(&mas, DIR_OFFSET_MAX);
467-
if (!child)
468-
goto out;
469-
spin_lock(&child->d_lock);
470-
if (simple_positive(child))
471-
found = dget_dlock(child);
472-
spin_unlock(&child->d_lock);
473-
out:
474-
rcu_read_unlock();
493+
MA_STATE(mas, &octx->mt, offset, offset);
494+
495+
if (offset == DIR_OFFSET_FIRST)
496+
found = find_positive_dentry(parent, NULL, false);
497+
else {
498+
rcu_read_lock();
499+
child = mas_find(&mas, DIR_OFFSET_MAX);
500+
found = find_positive_dentry(parent, child, false);
501+
rcu_read_unlock();
502+
}
475503
return found;
476504
}
477505

478506
static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
479507
{
480508
struct inode *inode = d_inode(dentry);
481-
long offset = dentry2offset(dentry);
482509

483-
return ctx->actor(ctx, dentry->d_name.name, dentry->d_name.len, offset,
484-
inode->i_ino, fs_umode_to_dtype(inode->i_mode));
510+
return dir_emit(ctx, dentry->d_name.name, dentry->d_name.len,
511+
inode->i_ino, fs_umode_to_dtype(inode->i_mode));
485512
}
486513

487-
static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
514+
static void offset_iterate_dir(struct file *file, struct dir_context *ctx)
488515
{
489-
struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode);
516+
struct dentry *dir = file->f_path.dentry;
490517
struct dentry *dentry;
491518

519+
dentry = offset_dir_lookup(dir, ctx->pos);
520+
if (!dentry)
521+
goto out_eod;
492522
while (true) {
493-
dentry = offset_find_next(octx, ctx->pos);
494-
if (!dentry)
495-
goto out_eod;
523+
struct dentry *next;
496524

497-
if (!offset_dir_emit(ctx, dentry)) {
498-
dput(dentry);
525+
ctx->pos = dentry2offset(dentry);
526+
if (!offset_dir_emit(ctx, dentry))
499527
break;
500-
}
501528

502-
ctx->pos = dentry2offset(dentry) + 1;
529+
next = find_positive_dentry(dir, dentry, true);
503530
dput(dentry);
531+
532+
if (!next)
533+
goto out_eod;
534+
dentry = next;
504535
}
536+
dput(dentry);
505537
return;
506538

507539
out_eod:
@@ -540,7 +572,7 @@ static int offset_readdir(struct file *file, struct dir_context *ctx)
540572
if (!dir_emit_dots(file, ctx))
541573
return 0;
542574
if (ctx->pos != DIR_OFFSET_EOD)
543-
offset_iterate_dir(d_inode(dir), ctx);
575+
offset_iterate_dir(file, ctx);
544576
return 0;
545577
}
546578

0 commit comments

Comments
 (0)