Skip to content

Commit

Permalink
btrfs: add new ioctl to wait for cleaned subvolumes
Browse files Browse the repository at this point in the history
Add a new unprivileged ioctl that will let the command
'btrfs subvolume sync' work without the (privileged) SEARCH_TREE ioctl.

There are several modes of operation, where the most common ones are to
wait on a specific subvolume or all currently queued for cleaning. This
is utilized e.g. in backup applications that delete subvolumes and wait
until they're cleaned to check for remaining space.

The other modes are for flexibility, e.g. for monitoring or
checkpoints in the queue of deleted subvolumes, again without the need
to use SEARCH_TREE.

Notes:

- waiting is interruptible, the timeout is set to 1 second and is not
  configurable

- repeated calls to the ioctl see a different state, so this is
  inherently racy when using e.g. the count or peek next/last

Use cases:

- a subvolume A was deleted, wait for cleaning (WAIT_FOR_ONE)

- a bunch of subvolumes were deleted, wait for all (WAIT_FOR_QUEUED or
  PEEK_LAST + WAIT_FOR_ONE)

- count how many are queued (not blocking), for monitoring purposes

- report progress (PEEK_NEXT), may miss some if cleaning is quick

- own waiting in user space (PEEK_LAST until it's 0)

Signed-off-by: David Sterba <[email protected]>
  • Loading branch information
kdave authored and fdmanana committed Oct 31, 2024
1 parent 0eb1bf5 commit f1a22e6
Show file tree
Hide file tree
Showing 2 changed files with 153 additions and 0 deletions.
128 changes: 128 additions & 0 deletions fs/btrfs/ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -5021,6 +5021,132 @@ int btrfs_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
return -EINVAL;
}

static int btrfs_ioctl_subvol_sync(struct btrfs_fs_info *fs_info, void __user *argp)
{
struct btrfs_root *root;
struct btrfs_ioctl_subvol_wait args = { 0 };
signed long sched_ret;
int refs;
u64 root_flags;
bool wait_for_deletion = false;
bool found = false;

if (copy_from_user(&args, argp, sizeof(args)))
return -EFAULT;

switch (args.mode) {
case BTRFS_SUBVOL_SYNC_WAIT_FOR_QUEUED:
/*
* Wait for the first one deleted that waits until all previous
* are cleaned.
*/
spin_lock(&fs_info->trans_lock);
if (!list_empty(&fs_info->dead_roots)) {
root = list_last_entry(&fs_info->dead_roots,
struct btrfs_root, root_list);
args.subvolid = btrfs_root_id(root);
found = true;
}
spin_unlock(&fs_info->trans_lock);
if (!found)
return -ENOENT;

fallthrough;
case BTRFS_SUBVOL_SYNC_WAIT_FOR_ONE:
if ((0 < args.subvolid && args.subvolid < BTRFS_FIRST_FREE_OBJECTID) ||
BTRFS_LAST_FREE_OBJECTID < args.subvolid)
return -EINVAL;
break;
case BTRFS_SUBVOL_SYNC_COUNT:
spin_lock(&fs_info->trans_lock);
args.count = list_count_nodes(&fs_info->dead_roots);
spin_unlock(&fs_info->trans_lock);
if (copy_to_user(argp, &args, sizeof(args)))
return -EFAULT;
return 0;
case BTRFS_SUBVOL_SYNC_PEEK_FIRST:
spin_lock(&fs_info->trans_lock);
/* Last in the list was deleted first. */
if (!list_empty(&fs_info->dead_roots)) {
root = list_last_entry(&fs_info->dead_roots,
struct btrfs_root, root_list);
args.subvolid = btrfs_root_id(root);
} else {
args.subvolid = 0;
}
spin_unlock(&fs_info->trans_lock);
if (copy_to_user(argp, &args, sizeof(args)))
return -EFAULT;
return 0;
case BTRFS_SUBVOL_SYNC_PEEK_LAST:
spin_lock(&fs_info->trans_lock);
/* First in the list was deleted last. */
if (!list_empty(&fs_info->dead_roots)) {
root = list_first_entry(&fs_info->dead_roots,
struct btrfs_root, root_list);
args.subvolid = btrfs_root_id(root);
} else {
args.subvolid = 0;
}
spin_unlock(&fs_info->trans_lock);
if (copy_to_user(argp, &args, sizeof(args)))
return -EFAULT;
return 0;
default:
return -EINVAL;
}

/* 32bit limitation: fs_roots_radix key is not wide enough. */
if (sizeof(unsigned long) != sizeof(u64) && args.subvolid > U32_MAX)
return -EOVERFLOW;

while (1) {
/* Wait for the specific one. */
if (down_read_interruptible(&fs_info->subvol_sem) == -EINTR)
return -EINTR;
refs = -1;
spin_lock(&fs_info->fs_roots_radix_lock);
root = radix_tree_lookup(&fs_info->fs_roots_radix,
(unsigned long)args.subvolid);
if (root) {
spin_lock(&root->root_item_lock);
refs = btrfs_root_refs(&root->root_item);
root_flags = btrfs_root_flags(&root->root_item);
spin_unlock(&root->root_item_lock);
}
spin_unlock(&fs_info->fs_roots_radix_lock);
up_read(&fs_info->subvol_sem);

/* Subvolume does not exist. */
if (!root)
return -ENOENT;

/* Subvolume not deleted at all. */
if (refs > 0)
return -EEXIST;
/* We've waited and now the subvolume is gone. */
if (wait_for_deletion && refs == -1) {
/* Return the one we waited for as the last one. */
if (copy_to_user(argp, &args, sizeof(args)))
return -EFAULT;
return 0;
}

/* Subvolume not found on the first try (deleted or never existed). */
if (refs == -1)
return -ENOENT;

wait_for_deletion = true;
ASSERT(root_flags & BTRFS_ROOT_SUBVOL_DEAD);
sched_ret = schedule_timeout_interruptible(HZ);
/* Early wake up or error. */
if (sched_ret != 0)
return -EINTR;
}

return 0;
}

long btrfs_ioctl(struct file *file, unsigned int
cmd, unsigned long arg)
{
Expand Down Expand Up @@ -5172,6 +5298,8 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_ENCODED_WRITE_32:
return btrfs_ioctl_encoded_write(file, argp, true);
#endif
case BTRFS_IOC_SUBVOL_SYNC_WAIT:
return btrfs_ioctl_subvol_sync(fs_info, argp);
}

return -ENOTTY;
Expand Down
25 changes: 25 additions & 0 deletions include/uapi/linux/btrfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -1049,6 +1049,29 @@ struct btrfs_ioctl_encoded_io_args {
#define BTRFS_ENCODED_IO_ENCRYPTION_NONE 0
#define BTRFS_ENCODED_IO_ENCRYPTION_TYPES 1

/*
* Wait for subvolume cleaning process. This queries the kernel queue and it
* can change between the calls.
*
* - FOR_ONE - specify the subvolid
* - FOR_QUEUED - wait for all currently queued
* - COUNT - count number of queued
* - PEEK_FIRST - read which is the first in the queue (to be cleaned or being
* cleaned already), or 0 if the queue is empty
* - PEEK_LAST - read the last subvolid in the queue, or 0 if the queue is empty
*/
struct btrfs_ioctl_subvol_wait {
__u64 subvolid;
__u32 mode;
__u32 count;
};

#define BTRFS_SUBVOL_SYNC_WAIT_FOR_ONE (0)
#define BTRFS_SUBVOL_SYNC_WAIT_FOR_QUEUED (1)
#define BTRFS_SUBVOL_SYNC_COUNT (2)
#define BTRFS_SUBVOL_SYNC_PEEK_FIRST (3)
#define BTRFS_SUBVOL_SYNC_PEEK_LAST (4)

/* Error codes as returned by the kernel */
enum btrfs_err_code {
BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1,
Expand Down Expand Up @@ -1181,6 +1204,8 @@ enum btrfs_err_code {
struct btrfs_ioctl_encoded_io_args)
#define BTRFS_IOC_ENCODED_WRITE _IOW(BTRFS_IOCTL_MAGIC, 64, \
struct btrfs_ioctl_encoded_io_args)
#define BTRFS_IOC_SUBVOL_SYNC_WAIT _IOW(BTRFS_IOCTL_MAGIC, 65, \
struct btrfs_ioctl_subvol_wait)

#ifdef __cplusplus
}
Expand Down

0 comments on commit f1a22e6

Please sign in to comment.