Merge tag 'vfs-7.1-rc1.pidfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull clone and pidfs updates from Christian Brauner:
 "Add three new clone3() flags for pidfd-based process lifecycle
  management.

  CLONE_AUTOREAP:

     CLONE_AUTOREAP makes a child process auto-reap on exit without ever
     becoming a zombie. This is a per-process property in contrast to
     the existing auto-reap mechanism via SA_NOCLDWAIT or SIG_IGN for
     SIGCHLD which applies to all children of a given parent.

     Currently the only way to automatically reap children is to set
     SA_NOCLDWAIT or SIG_IGN on SIGCHLD. This is a parent-scoped
     property affecting all children which makes it unsuitable for
     libraries or applications that need selective auto-reaping of
     specific children while still being able to wait() on others.

     CLONE_AUTOREAP stores an autoreap flag in the child's
     signal_struct. When the child exits do_notify_parent() checks this
     flag and causes exit_notify() to transition the task directly to
     EXIT_DEAD. Since the flag lives on the child it survives
     reparenting: if the original parent exits and the child is
     reparented to a subreaper or init the child still auto-reaps when
     it eventually exits. This is cleaner than forcing the subreaper to
     get SIGCHLD and then reaping it. If the parent doesn't care the
     subreaper won't care. If there's a subreaper that would care it
     would be easy enough to add a prctl() that either just turns back
     on SIGCHLD and turns off auto-reaping or a prctl() that just
     notifies the subreaper whenever a child is reparented to it.

     CLONE_AUTOREAP can be combined with CLONE_PIDFD to allow the parent
     to monitor the child's exit via poll() and retrieve exit status via
     PIDFD_GET_INFO. Without CLONE_PIDFD it provides a fire-and-forget
     pattern. No exit signal is delivered so exit_signal must be zero.
     CLONE_THREAD and CLONE_PARENT are rejected: CLONE_THREAD because
     autoreap is a process-level property, and CLONE_PARENT because an
     autoreap child reparented via CLONE_PARENT could become an
     invisible zombie under a parent that never calls wait().

     The flag is not inherited by the autoreap process's own children.
     Each child that should be autoreaped must be explicitly created
     with CLONE_AUTOREAP.

  CLONE_NNP:

     CLONE_NNP sets no_new_privs on the child at clone time. Unlike
     prctl(PR_SET_NO_NEW_PRIVS) which a process sets on itself,
     CLONE_NNP allows the parent to impose no_new_privs on the child at
     creation without affecting the parent's own privileges.
     CLONE_THREAD is rejected because threads share credentials.
     CLONE_NNP is useful on its own for any spawn-and-sandbox pattern
     but was specifically introduced to enable unprivileged usage of
     CLONE_PIDFD_AUTOKILL.

  CLONE_PIDFD_AUTOKILL:

     This flag ties a child's lifetime to the pidfd returned from
     clone3(). When the last reference to the struct file created by
     clone3() is closed the kernel sends SIGKILL to the child. A pidfd
     obtained via pidfd_open() for the same process does not keep the
     child alive and does not trigger autokill - only the specific
     struct file from clone3() has this property. This is useful for
     container runtimes, service managers, and sandboxed subprocess
     execution - any scenario where the child must die if the parent
     crashes or abandons the pidfd or just wants a throwaway helper
     process.

     CLONE_PIDFD_AUTOKILL requires both CLONE_PIDFD and CLONE_AUTOREAP.
     It requires CLONE_PIDFD because the whole point is tying the
     child's lifetime to the pidfd. It requires CLONE_AUTOREAP because a
     killed child with no one to reap it would become a zombie - the
     primary use case is the parent crashing or abandoning the pidfd so
     no one is around to call waitpid(). CLONE_THREAD is rejected
     because autokill targets a process not a thread.

     If CLONE_NNP is specified together with CLONE_PIDFD_AUTOKILL an
     unprivileged user may spawn a process that is autokilled. The child
     cannot escalate privileges via setuid/setgid exec after being
     spawned. If CLONE_PIDFD_AUTOKILL is specified without CLONE_NNP the
     caller must have have CAP_SYS_ADMIN in its user namespace"

* tag 'vfs-7.1-rc1.pidfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  selftests: check pidfd_info->coredump_code correctness
  pidfds: add coredump_code field to pidfd_info
  kselftest/coredump: reintroduce null pointer dereference
  selftests/pidfd: add CLONE_PIDFD_AUTOKILL tests
  selftests/pidfd: add CLONE_NNP tests
  selftests/pidfd: add CLONE_AUTOREAP tests
  pidfd: add CLONE_PIDFD_AUTOKILL
  clone: add CLONE_NNP
  clone: add CLONE_AUTOREAP
This commit is contained in:
Linus Torvalds
2026-04-13 13:27:11 -07:00
15 changed files with 1075 additions and 20 deletions

View File

@@ -8,6 +8,8 @@
#include <linux/mount.h>
#include <linux/pid.h>
#include <linux/pidfs.h>
#include <linux/sched/signal.h>
#include <linux/signal.h>
#include <linux/pid_namespace.h>
#include <linux/poll.h>
#include <linux/proc_fs.h>
@@ -54,6 +56,7 @@ struct pidfs_anon_attr {
};
__u32 coredump_mask;
__u32 coredump_signal;
__u32 coredump_code;
};
static struct rhashtable pidfs_ino_ht;
@@ -358,7 +361,8 @@ static __u32 pidfs_coredump_mask(unsigned long mm_flags)
PIDFD_INFO_EXIT | \
PIDFD_INFO_COREDUMP | \
PIDFD_INFO_SUPPORTED_MASK | \
PIDFD_INFO_COREDUMP_SIGNAL)
PIDFD_INFO_COREDUMP_SIGNAL | \
PIDFD_INFO_COREDUMP_CODE)
static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
{
@@ -372,7 +376,7 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
const struct cred *c;
__u64 mask;
BUILD_BUG_ON(sizeof(struct pidfd_info) != PIDFD_INFO_SIZE_VER2);
BUILD_BUG_ON(sizeof(struct pidfd_info) != PIDFD_INFO_SIZE_VER3);
if (!uinfo)
return -EINVAL;
@@ -405,9 +409,10 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
if (mask & PIDFD_INFO_COREDUMP) {
if (test_bit(PIDFS_ATTR_BIT_COREDUMP, &attr->attr_mask)) {
smp_rmb();
kinfo.mask |= PIDFD_INFO_COREDUMP | PIDFD_INFO_COREDUMP_SIGNAL;
kinfo.mask |= PIDFD_INFO_COREDUMP | PIDFD_INFO_COREDUMP_SIGNAL | PIDFD_INFO_COREDUMP_CODE;
kinfo.coredump_mask = attr->coredump_mask;
kinfo.coredump_signal = attr->coredump_signal;
kinfo.coredump_code = attr->coredump_code;
}
}
@@ -662,7 +667,28 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return open_namespace(ns_common);
}
static int pidfs_file_release(struct inode *inode, struct file *file)
{
struct pid *pid = inode->i_private;
struct task_struct *task;
if (!(file->f_flags & PIDFD_AUTOKILL))
return 0;
guard(rcu)();
task = pid_task(pid, PIDTYPE_TGID);
if (!task)
return 0;
/* Not available for kthreads or user workers for now. */
if (WARN_ON_ONCE(task->flags & (PF_KTHREAD | PF_USER_WORKER)))
return 0;
do_send_sig_info(SIGKILL, SEND_SIG_PRIV, task, PIDTYPE_TGID);
return 0;
}
static const struct file_operations pidfs_file_operations = {
.release = pidfs_file_release,
.poll = pidfd_poll,
#ifdef CONFIG_PROC_FS
.show_fdinfo = pidfd_show_fdinfo,
@@ -757,8 +783,9 @@ void pidfs_coredump(const struct coredump_params *cprm)
PIDFD_COREDUMPED;
/* If coredumping is set to skip we should never end up here. */
VFS_WARN_ON_ONCE(attr->coredump_mask & PIDFD_COREDUMP_SKIP);
/* Expose the signal number that caused the coredump. */
/* Expose the signal number and code that caused the coredump. */
attr->coredump_signal = cprm->siginfo->si_signo;
attr->coredump_code = cprm->siginfo->si_code;
smp_wmb();
set_bit(PIDFS_ATTR_BIT_COREDUMP, &attr->attr_mask);
}
@@ -1112,11 +1139,11 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags)
int ret;
/*
* Ensure that PIDFD_STALE can be passed as a flag without
* overloading other uapi pidfd flags.
* Ensure that internal pidfd flags don't overlap with each
* other or with uapi pidfd flags.
*/
BUILD_BUG_ON(PIDFD_STALE == PIDFD_THREAD);
BUILD_BUG_ON(PIDFD_STALE == PIDFD_NONBLOCK);
BUILD_BUG_ON(hweight32(PIDFD_THREAD | PIDFD_NONBLOCK |
PIDFD_STALE | PIDFD_AUTOKILL) != 4);
ret = path_from_stashed(&pid->stashed, pidfs_mnt, get_pid(pid), &path);
if (ret < 0)
@@ -1127,9 +1154,12 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags)
flags &= ~PIDFD_STALE;
flags |= O_RDWR;
pidfd_file = dentry_open(&path, flags, current_cred());
/* Raise PIDFD_THREAD explicitly as do_dentry_open() strips it. */
/*
* Raise PIDFD_THREAD and PIDFD_AUTOKILL explicitly as
* do_dentry_open() strips O_EXCL and O_TRUNC.
*/
if (!IS_ERR(pidfd_file))
pidfd_file->f_flags |= (flags & PIDFD_THREAD);
pidfd_file->f_flags |= (flags & (PIDFD_THREAD | PIDFD_AUTOKILL));
return pidfd_file;
}

View File

@@ -132,6 +132,7 @@ struct signal_struct {
*/
unsigned int is_child_subreaper:1;
unsigned int has_child_subreaper:1;
unsigned int autoreap:1;
#ifdef CONFIG_POSIX_TIMERS

View File

@@ -13,6 +13,7 @@
#ifdef __KERNEL__
#include <linux/sched.h>
#define PIDFD_STALE CLONE_PIDFD
#define PIDFD_AUTOKILL O_TRUNC
#endif
/* Flags for pidfd_send_signal(). */
@@ -28,10 +29,12 @@
#define PIDFD_INFO_COREDUMP (1UL << 4) /* Only returned if requested. */
#define PIDFD_INFO_SUPPORTED_MASK (1UL << 5) /* Want/got supported mask flags */
#define PIDFD_INFO_COREDUMP_SIGNAL (1UL << 6) /* Always returned if PIDFD_INFO_COREDUMP is requested. */
#define PIDFD_INFO_COREDUMP_CODE (1UL << 7) /* Always returned if PIDFD_INFO_COREDUMP is requested. */
#define PIDFD_INFO_SIZE_VER0 64 /* sizeof first published struct */
#define PIDFD_INFO_SIZE_VER1 72 /* sizeof second published struct */
#define PIDFD_INFO_SIZE_VER2 80 /* sizeof third published struct */
#define PIDFD_INFO_SIZE_VER3 88 /* sizeof fourth published struct */
/*
* Values for @coredump_mask in pidfd_info.
@@ -98,6 +101,8 @@ struct pidfd_info {
struct /* coredump info */ {
__u32 coredump_mask;
__u32 coredump_signal;
__u32 coredump_code;
__u32 coredump_pad; /* align supported_mask to 8 bytes */
};
__u64 supported_mask; /* Mask flags that this kernel supports */
};

View File

@@ -34,8 +34,11 @@
#define CLONE_IO 0x80000000 /* Clone io context */
/* Flags for the clone3() syscall. */
#define CLONE_CLEAR_SIGHAND 0x100000000ULL /* Clear any signal handler and reset to SIG_DFL. */
#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */
#define CLONE_CLEAR_SIGHAND (1ULL << 32) /* Clear any signal handler and reset to SIG_DFL. */
#define CLONE_INTO_CGROUP (1ULL << 33) /* Clone into a specific cgroup given the right permissions. */
#define CLONE_AUTOREAP (1ULL << 34) /* Auto-reap child on exit. */
#define CLONE_NNP (1ULL << 35) /* Set no_new_privs on child. */
#define CLONE_PIDFD_AUTOKILL (1ULL << 36) /* Kill child when clone pidfd closes. */
/*
* cloning flags intersect with CSIGNAL so can be used with unshare and clone3

View File

@@ -2029,6 +2029,41 @@ __latent_entropy struct task_struct *copy_process(
return ERR_PTR(-EINVAL);
}
if (clone_flags & CLONE_AUTOREAP) {
if (clone_flags & CLONE_THREAD)
return ERR_PTR(-EINVAL);
if (clone_flags & CLONE_PARENT)
return ERR_PTR(-EINVAL);
if (args->exit_signal)
return ERR_PTR(-EINVAL);
}
if ((clone_flags & CLONE_PARENT) && current->signal->autoreap)
return ERR_PTR(-EINVAL);
if (clone_flags & CLONE_NNP) {
if (clone_flags & CLONE_THREAD)
return ERR_PTR(-EINVAL);
}
if (clone_flags & CLONE_PIDFD_AUTOKILL) {
if (!(clone_flags & CLONE_PIDFD))
return ERR_PTR(-EINVAL);
if (!(clone_flags & CLONE_AUTOREAP))
return ERR_PTR(-EINVAL);
if (clone_flags & CLONE_THREAD)
return ERR_PTR(-EINVAL);
/*
* Without CLONE_NNP the child could escalate privileges
* after being spawned, so require CAP_SYS_ADMIN.
* With CLONE_NNP the child can't gain new privileges,
* so allow unprivileged usage.
*/
if (!(clone_flags & CLONE_NNP) &&
!ns_capable(current_user_ns(), CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
}
/*
* Force any signals received before this point to be delivered
* before the fork happens. Collect up signals sent to multiple
@@ -2251,13 +2286,18 @@ __latent_entropy struct task_struct *copy_process(
* if the fd table isn't shared).
*/
if (clone_flags & CLONE_PIDFD) {
int flags = (clone_flags & CLONE_THREAD) ? PIDFD_THREAD : 0;
unsigned flags = PIDFD_STALE;
if (clone_flags & CLONE_THREAD)
flags |= PIDFD_THREAD;
if (clone_flags & CLONE_PIDFD_AUTOKILL)
flags |= PIDFD_AUTOKILL;
/*
* Note that no task has been attached to @pid yet indicate
* that via CLONE_PIDFD.
*/
retval = pidfd_prepare(pid, flags | PIDFD_STALE, &pidfile);
retval = pidfd_prepare(pid, flags, &pidfile);
if (retval < 0)
goto bad_fork_free_pid;
pidfd = retval;
@@ -2413,6 +2453,9 @@ __latent_entropy struct task_struct *copy_process(
*/
copy_seccomp(p);
if (clone_flags & CLONE_NNP)
task_set_no_new_privs(p);
init_task_pid_links(p);
if (likely(p->pid)) {
ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
@@ -2436,6 +2479,8 @@ __latent_entropy struct task_struct *copy_process(
*/
p->signal->has_child_subreaper = p->real_parent->signal->has_child_subreaper ||
p->real_parent->signal->is_child_subreaper;
if (clone_flags & CLONE_AUTOREAP)
p->signal->autoreap = 1;
list_add_tail(&p->sibling, &p->real_parent->children);
list_add_tail_rcu(&p->tasks, &init_task.tasks);
attach_pid(p, PIDTYPE_TGID);
@@ -2897,7 +2942,8 @@ static bool clone3_args_valid(struct kernel_clone_args *kargs)
{
/* Verify that no unknown flags are passed along. */
if (kargs->flags &
~(CLONE_LEGACY_FLAGS | CLONE_CLEAR_SIGHAND | CLONE_INTO_CGROUP))
~(CLONE_LEGACY_FLAGS | CLONE_CLEAR_SIGHAND | CLONE_INTO_CGROUP |
CLONE_AUTOREAP | CLONE_NNP | CLONE_PIDFD_AUTOKILL))
return false;
/*

View File

@@ -549,7 +549,8 @@ static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p)
if (!dead && thread_group_empty(p)) {
if (!same_thread_group(p->real_parent, tracer))
dead = do_notify_parent(p, p->exit_signal);
else if (ignoring_children(tracer->sighand)) {
else if (ignoring_children(tracer->sighand) ||
p->signal->autoreap) {
__wake_up_parent(p, tracer);
dead = true;
}

View File

@@ -2251,6 +2251,10 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
if (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN)
sig = 0;
}
if (!tsk->ptrace && tsk->signal->autoreap) {
autoreap = true;
sig = 0;
}
/*
* Send with __send_signal as si_pid and si_uid are in the
* parent's namespaces.

View File

@@ -1004,6 +1004,8 @@ out:
*
* Verify that when using socket-based coredump protocol,
* the coredump_signal field is correctly exposed as SIGSEGV.
* Also check that the coredump_code field is correctly exposed
* as SEGV_MAPERR.
*/
TEST_F(coredump, socket_coredump_signal_sigsegv)
{
@@ -1079,6 +1081,18 @@ TEST_F(coredump, socket_coredump_signal_sigsegv)
goto out;
}
/* Verify coredump_code is available and correct */
if (!(info.mask & PIDFD_INFO_COREDUMP_CODE)) {
fprintf(stderr, "socket_coredump_signal_sigsegv: PIDFD_INFO_COREDUMP_CODE not set in mask\n");
goto out;
}
if (info.coredump_code != SEGV_MAPERR) {
fprintf(stderr, "socket_coredump_signal_sigsegv: coredump_code=%d, expected SEGV_MAPERR=%d\n",
info.coredump_code, SEGV_MAPERR);
goto out;
}
if (!read_coredump_req(fd_coredump, &req)) {
fprintf(stderr, "socket_coredump_signal_sigsegv: read_coredump_req failed\n");
goto out;
@@ -1128,6 +1142,8 @@ out:
ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP));
ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL));
ASSERT_EQ(info.coredump_signal, SIGSEGV);
ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP_CODE));
ASSERT_EQ(info.coredump_code, SEGV_MAPERR);
wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
}
@@ -1137,6 +1153,8 @@ out:
*
* Verify that when using socket-based coredump protocol,
* the coredump_signal field is correctly exposed as SIGABRT.
* Also check that the coredump_code field is correctly exposed
* as SI_TKILL.
*/
TEST_F(coredump, socket_coredump_signal_sigabrt)
{
@@ -1212,6 +1230,12 @@ TEST_F(coredump, socket_coredump_signal_sigabrt)
goto out;
}
if (info.coredump_code != SI_TKILL) {
fprintf(stderr, "socket_coredump_signal_sigabrt: coredump_code=%d, expected SI_TKILL=%d\n",
info.coredump_code, SI_TKILL);
goto out;
}
if (!read_coredump_req(fd_coredump, &req)) {
fprintf(stderr, "socket_coredump_signal_sigabrt: read_coredump_req failed\n");
goto out;
@@ -1261,6 +1285,8 @@ out:
ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP));
ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL));
ASSERT_EQ(info.coredump_signal, SIGABRT);
ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP_CODE));
ASSERT_EQ(info.coredump_code, SI_TKILL);
wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
}

View File

@@ -435,6 +435,8 @@ out:
*
* Verify that when using simple socket-based coredump (@ pattern),
* the coredump_signal field is correctly exposed as SIGSEGV.
* Also check that the coredump_code field is correctly exposed
* as SEGV_MAPERR.
*/
TEST_F(coredump, socket_coredump_signal_sigsegv)
{
@@ -509,6 +511,18 @@ TEST_F(coredump, socket_coredump_signal_sigsegv)
goto out;
}
/* Verify coredump_code is available and correct */
if (!(info.mask & PIDFD_INFO_COREDUMP_CODE)) {
fprintf(stderr, "socket_coredump_signal_sigsegv: PIDFD_INFO_COREDUMP_CODE not set in mask\n");
goto out;
}
if (info.coredump_code != SEGV_MAPERR) {
fprintf(stderr, "socket_coredump_signal_sigsegv: coredump_code=%d, expected SEGV_MAPERR=%d\n",
info.coredump_code, SEGV_MAPERR);
goto out;
}
fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached);
if (fd_core_file < 0) {
fprintf(stderr, "socket_coredump_signal_sigsegv: open_coredump_tmpfile failed: %m\n");
@@ -572,6 +586,8 @@ out:
ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP));
ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL));
ASSERT_EQ(info.coredump_signal, SIGSEGV);
ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP_CODE));
ASSERT_EQ(info.coredump_code, SEGV_MAPERR);
wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
}
@@ -581,6 +597,8 @@ out:
*
* Verify that when using simple socket-based coredump (@ pattern),
* the coredump_signal field is correctly exposed as SIGABRT.
* Also check that the coredump_code field is correctly exposed
* as SI_TKILL.
*/
TEST_F(coredump, socket_coredump_signal_sigabrt)
{
@@ -655,6 +673,18 @@ TEST_F(coredump, socket_coredump_signal_sigabrt)
goto out;
}
/* Verify coredump_code is available and correct */
if (!(info.mask & PIDFD_INFO_COREDUMP_CODE)) {
fprintf(stderr, "socket_coredump_signal_sigabrt: PIDFD_INFO_COREDUMP_CODE not set in mask\n");
goto out;
}
if (info.coredump_code != SI_TKILL) {
fprintf(stderr, "socket_coredump_signal_sigabrt: coredump_code=%d, expected SI_TKILL=%d\n",
info.coredump_code, SI_TKILL);
goto out;
}
fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached);
if (fd_core_file < 0) {
fprintf(stderr, "socket_coredump_signal_sigabrt: open_coredump_tmpfile failed: %m\n");
@@ -718,6 +748,8 @@ out:
ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP));
ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL));
ASSERT_EQ(info.coredump_signal, SIGABRT);
ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP_CODE));
ASSERT_EQ(info.coredump_code, SI_TKILL);
wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
}

View File

@@ -56,7 +56,7 @@ void crashing_child(void)
pthread_create(&thread, NULL, do_nothing, NULL);
/* crash on purpose */
__builtin_trap();
i = *(volatile int *)NULL;
}
int create_detached_tmpfs(void)
@@ -148,8 +148,8 @@ bool get_pidfd_info(int fd_peer_pidfd, struct pidfd_info *info)
fprintf(stderr, "get_pidfd_info: ioctl(PIDFD_GET_INFO) failed: %m\n");
return false;
}
fprintf(stderr, "get_pidfd_info: mask=0x%llx, coredump_mask=0x%x, coredump_signal=%d\n",
(unsigned long long)info->mask, info->coredump_mask, info->coredump_signal);
fprintf(stderr, "get_pidfd_info: mask=0x%llx, coredump_mask=0x%x, coredump_signal=%d, coredump_code=%d\n",
(unsigned long long)info->mask, info->coredump_mask, info->coredump_signal, info->coredump_code);
return true;
}

View File

@@ -12,3 +12,4 @@ pidfd_info_test
pidfd_exec_helper
pidfd_xattr_test
pidfd_setattr_test
pidfd_autoreap_test

View File

@@ -4,7 +4,7 @@ CFLAGS += -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES) -pthread -Wall
TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \
pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test \
pidfd_file_handle_test pidfd_bind_mount pidfd_info_test \
pidfd_xattr_test pidfd_setattr_test
pidfd_xattr_test pidfd_setattr_test pidfd_autoreap_test
TEST_GEN_PROGS_EXTENDED := pidfd_exec_helper

View File

@@ -156,6 +156,10 @@
#define PIDFD_INFO_COREDUMP_SIGNAL (1UL << 6)
#endif
#ifndef PIDFD_INFO_COREDUMP_CODE
#define PIDFD_INFO_COREDUMP_CODE (1UL << 7)
#endif
#ifndef PIDFD_COREDUMPED
#define PIDFD_COREDUMPED (1U << 0) /* Did crash and... */
#endif
@@ -194,6 +198,7 @@ struct pidfd_info {
struct {
__u32 coredump_mask;
__u32 coredump_signal;
__u32 coredump_code;
};
__u64 supported_mask;
};

View File

@@ -0,0 +1,900 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2026 Christian Brauner <brauner@kernel.org>
#define _GNU_SOURCE
#include <errno.h>
#include <linux/types.h>
#include <poll.h>
#include <pthread.h>
#include <sched.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <syscall.h>
#include <sys/ioctl.h>
#include <sys/prctl.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include "pidfd.h"
#include "kselftest_harness.h"
#ifndef CLONE_AUTOREAP
#define CLONE_AUTOREAP (1ULL << 34)
#endif
#ifndef CLONE_NNP
#define CLONE_NNP (1ULL << 35)
#endif
#ifndef CLONE_PIDFD_AUTOKILL
#define CLONE_PIDFD_AUTOKILL (1ULL << 36)
#endif
#ifndef _LINUX_CAPABILITY_VERSION_3
#define _LINUX_CAPABILITY_VERSION_3 0x20080522
#endif
struct cap_header {
__u32 version;
int pid;
};
struct cap_data {
__u32 effective;
__u32 permitted;
__u32 inheritable;
};
static int drop_all_caps(void)
{
struct cap_header hdr = { .version = _LINUX_CAPABILITY_VERSION_3 };
struct cap_data data[2] = {};
return syscall(__NR_capset, &hdr, data);
}
static pid_t create_autoreap_child(int *pidfd)
{
struct __clone_args args = {
.flags = CLONE_PIDFD | CLONE_AUTOREAP,
.exit_signal = 0,
.pidfd = ptr_to_u64(pidfd),
};
return sys_clone3(&args, sizeof(args));
}
/*
* Test that CLONE_AUTOREAP works without CLONE_PIDFD (fire-and-forget).
*/
TEST(autoreap_without_pidfd)
{
struct __clone_args args = {
.flags = CLONE_AUTOREAP,
.exit_signal = 0,
};
pid_t pid;
int ret;
pid = sys_clone3(&args, sizeof(args));
if (pid < 0 && errno == EINVAL)
SKIP(return, "CLONE_AUTOREAP not supported");
ASSERT_GE(pid, 0);
if (pid == 0)
_exit(0);
/*
* Give the child a moment to exit and be autoreaped.
* Then verify no zombie remains.
*/
usleep(200000);
ret = waitpid(pid, NULL, WNOHANG);
ASSERT_EQ(ret, -1);
ASSERT_EQ(errno, ECHILD);
}
/*
* Test that CLONE_AUTOREAP with a non-zero exit_signal fails.
*/
TEST(autoreap_rejects_exit_signal)
{
struct __clone_args args = {
.flags = CLONE_AUTOREAP,
.exit_signal = SIGCHLD,
};
pid_t pid;
pid = sys_clone3(&args, sizeof(args));
ASSERT_EQ(pid, -1);
ASSERT_EQ(errno, EINVAL);
}
/*
* Test that CLONE_AUTOREAP with CLONE_PARENT fails.
*/
TEST(autoreap_rejects_parent)
{
struct __clone_args args = {
.flags = CLONE_AUTOREAP | CLONE_PARENT,
.exit_signal = 0,
};
pid_t pid;
pid = sys_clone3(&args, sizeof(args));
ASSERT_EQ(pid, -1);
ASSERT_EQ(errno, EINVAL);
}
/*
* Test that CLONE_AUTOREAP with CLONE_THREAD fails.
*/
TEST(autoreap_rejects_thread)
{
struct __clone_args args = {
.flags = CLONE_AUTOREAP | CLONE_THREAD |
CLONE_SIGHAND | CLONE_VM,
.exit_signal = 0,
};
pid_t pid;
pid = sys_clone3(&args, sizeof(args));
ASSERT_EQ(pid, -1);
ASSERT_EQ(errno, EINVAL);
}
/*
* Basic test: create an autoreap child, let it exit, verify:
* - pidfd becomes readable (poll returns POLLIN)
* - PIDFD_GET_INFO returns the correct exit code
* - waitpid() returns -1/ECHILD (no zombie)
*/
TEST(autoreap_basic)
{
struct pidfd_info info = { .mask = PIDFD_INFO_EXIT };
int pidfd = -1, ret;
struct pollfd pfd;
pid_t pid;
pid = create_autoreap_child(&pidfd);
if (pid < 0 && errno == EINVAL)
SKIP(return, "CLONE_AUTOREAP not supported");
ASSERT_GE(pid, 0);
if (pid == 0)
_exit(42);
ASSERT_GE(pidfd, 0);
/* Wait for the child to exit via pidfd poll. */
pfd.fd = pidfd;
pfd.events = POLLIN;
ret = poll(&pfd, 1, 5000);
ASSERT_EQ(ret, 1);
ASSERT_TRUE(pfd.revents & POLLIN);
/* Verify exit info via PIDFD_GET_INFO. */
ret = ioctl(pidfd, PIDFD_GET_INFO, &info);
ASSERT_EQ(ret, 0);
ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT);
/*
* exit_code is in waitpid format: for _exit(42),
* WIFEXITED is true and WEXITSTATUS is 42.
*/
ASSERT_TRUE(WIFEXITED(info.exit_code));
ASSERT_EQ(WEXITSTATUS(info.exit_code), 42);
/* Verify no zombie: waitpid should fail with ECHILD. */
ret = waitpid(pid, NULL, WNOHANG);
ASSERT_EQ(ret, -1);
ASSERT_EQ(errno, ECHILD);
close(pidfd);
}
/*
* Test that an autoreap child killed by a signal reports
* the correct exit info.
*/
TEST(autoreap_signaled)
{
struct pidfd_info info = { .mask = PIDFD_INFO_EXIT };
int pidfd = -1, ret;
struct pollfd pfd;
pid_t pid;
pid = create_autoreap_child(&pidfd);
if (pid < 0 && errno == EINVAL)
SKIP(return, "CLONE_AUTOREAP not supported");
ASSERT_GE(pid, 0);
if (pid == 0) {
pause();
_exit(1);
}
ASSERT_GE(pidfd, 0);
/* Kill the child. */
ret = sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
ASSERT_EQ(ret, 0);
/* Wait for exit via pidfd. */
pfd.fd = pidfd;
pfd.events = POLLIN;
ret = poll(&pfd, 1, 5000);
ASSERT_EQ(ret, 1);
ASSERT_TRUE(pfd.revents & POLLIN);
/* Verify signal info. */
ret = ioctl(pidfd, PIDFD_GET_INFO, &info);
ASSERT_EQ(ret, 0);
ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT);
ASSERT_TRUE(WIFSIGNALED(info.exit_code));
ASSERT_EQ(WTERMSIG(info.exit_code), SIGKILL);
/* No zombie. */
ret = waitpid(pid, NULL, WNOHANG);
ASSERT_EQ(ret, -1);
ASSERT_EQ(errno, ECHILD);
close(pidfd);
}
/*
* Test autoreap survives reparenting: middle process creates an
* autoreap grandchild, then exits. The grandchild gets reparented
* to us (the grandparent, which is a subreaper). When the grandchild
* exits, it should still be autoreaped - no zombie under us.
*/
TEST(autoreap_reparent)
{
int ipc_sockets[2], ret;
int pidfd = -1;
struct pollfd pfd;
pid_t mid_pid, grandchild_pid;
char buf[32] = {};
/* Make ourselves a subreaper so reparented children come to us. */
ret = prctl(PR_SET_CHILD_SUBREAPER, 1);
ASSERT_EQ(ret, 0);
ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
ASSERT_EQ(ret, 0);
mid_pid = fork();
ASSERT_GE(mid_pid, 0);
if (mid_pid == 0) {
/* Middle child: create an autoreap grandchild. */
int gc_pidfd = -1;
close(ipc_sockets[0]);
grandchild_pid = create_autoreap_child(&gc_pidfd);
if (grandchild_pid < 0) {
write_nointr(ipc_sockets[1], "E", 1);
close(ipc_sockets[1]);
_exit(1);
}
if (grandchild_pid == 0) {
/* Grandchild: wait for signal to exit. */
close(ipc_sockets[1]);
if (gc_pidfd >= 0)
close(gc_pidfd);
pause();
_exit(0);
}
/* Send grandchild PID to grandparent. */
snprintf(buf, sizeof(buf), "%d", grandchild_pid);
write_nointr(ipc_sockets[1], buf, strlen(buf));
close(ipc_sockets[1]);
if (gc_pidfd >= 0)
close(gc_pidfd);
/* Middle child exits, grandchild gets reparented. */
_exit(0);
}
close(ipc_sockets[1]);
/* Read grandchild's PID. */
ret = read_nointr(ipc_sockets[0], buf, sizeof(buf) - 1);
close(ipc_sockets[0]);
ASSERT_GT(ret, 0);
if (buf[0] == 'E') {
waitpid(mid_pid, NULL, 0);
prctl(PR_SET_CHILD_SUBREAPER, 0);
SKIP(return, "CLONE_AUTOREAP not supported");
}
grandchild_pid = atoi(buf);
ASSERT_GT(grandchild_pid, 0);
/* Wait for the middle child to exit. */
ret = waitpid(mid_pid, NULL, 0);
ASSERT_EQ(ret, mid_pid);
/*
* Now the grandchild is reparented to us (subreaper).
* Open a pidfd for the grandchild and kill it.
*/
pidfd = sys_pidfd_open(grandchild_pid, 0);
ASSERT_GE(pidfd, 0);
ret = sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
ASSERT_EQ(ret, 0);
/* Wait for it to exit via pidfd poll. */
pfd.fd = pidfd;
pfd.events = POLLIN;
ret = poll(&pfd, 1, 5000);
ASSERT_EQ(ret, 1);
ASSERT_TRUE(pfd.revents & POLLIN);
/*
* The grandchild should have been autoreaped even though
* we (the new parent) haven't set SA_NOCLDWAIT.
* waitpid should return -1/ECHILD.
*/
ret = waitpid(grandchild_pid, NULL, WNOHANG);
EXPECT_EQ(ret, -1);
EXPECT_EQ(errno, ECHILD);
close(pidfd);
/* Clean up subreaper status. */
prctl(PR_SET_CHILD_SUBREAPER, 0);
}
static int thread_sock_fd;
static void *thread_func(void *arg)
{
/* Signal parent we're running. */
write_nointr(thread_sock_fd, "1", 1);
/* Give main thread time to call _exit() first. */
usleep(200000);
return NULL;
}
/*
* Test that an autoreap child with multiple threads is properly
* autoreaped only after all threads have exited.
*/
TEST(autoreap_multithreaded)
{
struct pidfd_info info = { .mask = PIDFD_INFO_EXIT };
int ipc_sockets[2], ret;
int pidfd = -1;
struct pollfd pfd;
pid_t pid;
char c;
ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
ASSERT_EQ(ret, 0);
pid = create_autoreap_child(&pidfd);
if (pid < 0 && errno == EINVAL) {
close(ipc_sockets[0]);
close(ipc_sockets[1]);
SKIP(return, "CLONE_AUTOREAP not supported");
}
ASSERT_GE(pid, 0);
if (pid == 0) {
pthread_t thread;
close(ipc_sockets[0]);
/*
* Create a sub-thread that outlives the main thread.
* The thread signals readiness, then sleeps.
* The main thread waits briefly, then calls _exit().
*/
thread_sock_fd = ipc_sockets[1];
pthread_create(&thread, NULL, thread_func, NULL);
pthread_detach(thread);
/* Wait for thread to be running. */
usleep(100000);
/* Main thread exits; sub-thread is still alive. */
_exit(99);
}
close(ipc_sockets[1]);
/* Wait for the sub-thread to signal readiness. */
ret = read_nointr(ipc_sockets[0], &c, 1);
close(ipc_sockets[0]);
ASSERT_EQ(ret, 1);
/* Wait for the process to fully exit via pidfd poll. */
pfd.fd = pidfd;
pfd.events = POLLIN;
ret = poll(&pfd, 1, 5000);
ASSERT_EQ(ret, 1);
ASSERT_TRUE(pfd.revents & POLLIN);
/* Verify exit info. */
ret = ioctl(pidfd, PIDFD_GET_INFO, &info);
ASSERT_EQ(ret, 0);
ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT);
ASSERT_TRUE(WIFEXITED(info.exit_code));
ASSERT_EQ(WEXITSTATUS(info.exit_code), 99);
/* No zombie. */
ret = waitpid(pid, NULL, WNOHANG);
ASSERT_EQ(ret, -1);
ASSERT_EQ(errno, ECHILD);
close(pidfd);
}
/*
* Test that autoreap is NOT inherited by grandchildren.
*/
TEST(autoreap_no_inherit)
{
int ipc_sockets[2], ret;
int pidfd = -1;
pid_t pid;
char buf[2] = {};
struct pollfd pfd;
ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
ASSERT_EQ(ret, 0);
pid = create_autoreap_child(&pidfd);
if (pid < 0 && errno == EINVAL) {
close(ipc_sockets[0]);
close(ipc_sockets[1]);
SKIP(return, "CLONE_AUTOREAP not supported");
}
ASSERT_GE(pid, 0);
if (pid == 0) {
pid_t gc;
int status;
close(ipc_sockets[0]);
/* Autoreap child forks a grandchild (without autoreap). */
gc = fork();
if (gc < 0) {
write_nointr(ipc_sockets[1], "E", 1);
_exit(1);
}
if (gc == 0) {
/* Grandchild: exit immediately. */
close(ipc_sockets[1]);
_exit(77);
}
/*
* The grandchild should become a regular zombie
* since it was NOT created with CLONE_AUTOREAP.
* Wait for it to verify.
*/
ret = waitpid(gc, &status, 0);
if (ret == gc && WIFEXITED(status) &&
WEXITSTATUS(status) == 77) {
write_nointr(ipc_sockets[1], "P", 1);
} else {
write_nointr(ipc_sockets[1], "F", 1);
}
close(ipc_sockets[1]);
_exit(0);
}
close(ipc_sockets[1]);
ret = read_nointr(ipc_sockets[0], buf, 1);
close(ipc_sockets[0]);
ASSERT_EQ(ret, 1);
/*
* 'P' means the autoreap child was able to waitpid() its
* grandchild (correct - grandchild should be a normal zombie,
* not autoreaped).
*/
ASSERT_EQ(buf[0], 'P');
/* Wait for the autoreap child to exit. */
pfd.fd = pidfd;
pfd.events = POLLIN;
ret = poll(&pfd, 1, 5000);
ASSERT_EQ(ret, 1);
/* Autoreap child itself should be autoreaped. */
ret = waitpid(pid, NULL, WNOHANG);
ASSERT_EQ(ret, -1);
ASSERT_EQ(errno, ECHILD);
close(pidfd);
}
/*
* Test that CLONE_NNP sets no_new_privs on the child.
* The child checks via prctl(PR_GET_NO_NEW_PRIVS) and reports back.
* The parent must NOT have no_new_privs set afterwards.
*/
TEST(nnp_sets_no_new_privs)
{
struct __clone_args args = {
.flags = CLONE_PIDFD | CLONE_AUTOREAP | CLONE_NNP,
.exit_signal = 0,
};
struct pidfd_info info = { .mask = PIDFD_INFO_EXIT };
int pidfd = -1, ret;
struct pollfd pfd;
pid_t pid;
/* Ensure parent does not already have no_new_privs. */
ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
ASSERT_EQ(ret, 0) {
TH_LOG("Parent already has no_new_privs set, cannot run test");
}
args.pidfd = ptr_to_u64(&pidfd);
pid = sys_clone3(&args, sizeof(args));
if (pid < 0 && errno == EINVAL)
SKIP(return, "CLONE_NNP not supported");
ASSERT_GE(pid, 0);
if (pid == 0) {
/*
* Child: check no_new_privs. Exit 0 if set, 1 if not.
*/
ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
_exit(ret == 1 ? 0 : 1);
}
ASSERT_GE(pidfd, 0);
/* Parent must still NOT have no_new_privs. */
ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
ASSERT_EQ(ret, 0) {
TH_LOG("Parent got no_new_privs after creating CLONE_NNP child");
}
/* Wait for child to exit. */
pfd.fd = pidfd;
pfd.events = POLLIN;
ret = poll(&pfd, 1, 5000);
ASSERT_EQ(ret, 1);
/* Verify child exited with 0 (no_new_privs was set). */
ret = ioctl(pidfd, PIDFD_GET_INFO, &info);
ASSERT_EQ(ret, 0);
ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT);
ASSERT_TRUE(WIFEXITED(info.exit_code));
ASSERT_EQ(WEXITSTATUS(info.exit_code), 0) {
TH_LOG("Child did not have no_new_privs set");
}
close(pidfd);
}
/*
* Test that CLONE_NNP with CLONE_THREAD fails with EINVAL.
*/
TEST(nnp_rejects_thread)
{
struct __clone_args args = {
.flags = CLONE_NNP | CLONE_THREAD |
CLONE_SIGHAND | CLONE_VM,
.exit_signal = 0,
};
pid_t pid;
pid = sys_clone3(&args, sizeof(args));
ASSERT_EQ(pid, -1);
ASSERT_EQ(errno, EINVAL);
}
/*
* Test that a plain CLONE_AUTOREAP child does NOT get no_new_privs.
* Only CLONE_NNP should set it.
*/
TEST(autoreap_no_new_privs_unset)
{
struct pidfd_info info = { .mask = PIDFD_INFO_EXIT };
int pidfd = -1, ret;
struct pollfd pfd;
pid_t pid;
pid = create_autoreap_child(&pidfd);
if (pid < 0 && errno == EINVAL)
SKIP(return, "CLONE_AUTOREAP not supported");
ASSERT_GE(pid, 0);
if (pid == 0) {
/*
* Child: check no_new_privs. Exit 0 if NOT set, 1 if set.
*/
ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
_exit(ret == 0 ? 0 : 1);
}
ASSERT_GE(pidfd, 0);
pfd.fd = pidfd;
pfd.events = POLLIN;
ret = poll(&pfd, 1, 5000);
ASSERT_EQ(ret, 1);
ret = ioctl(pidfd, PIDFD_GET_INFO, &info);
ASSERT_EQ(ret, 0);
ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT);
ASSERT_TRUE(WIFEXITED(info.exit_code));
ASSERT_EQ(WEXITSTATUS(info.exit_code), 0) {
TH_LOG("Plain autoreap child unexpectedly has no_new_privs");
}
close(pidfd);
}
/*
* Helper: create a child with CLONE_PIDFD | CLONE_PIDFD_AUTOKILL | CLONE_AUTOREAP | CLONE_NNP.
*/
static pid_t create_autokill_child(int *pidfd)
{
struct __clone_args args = {
.flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL |
CLONE_AUTOREAP | CLONE_NNP,
.exit_signal = 0,
.pidfd = ptr_to_u64(pidfd),
};
return sys_clone3(&args, sizeof(args));
}
/*
* Basic autokill test: child blocks in pause(), parent closes the
* clone3 pidfd, child should be killed and autoreaped.
*/
TEST(autokill_basic)
{
int pidfd = -1, pollfd_fd = -1, ret;
struct pollfd pfd;
pid_t pid;
pid = create_autokill_child(&pidfd);
if (pid < 0 && errno == EINVAL)
SKIP(return, "CLONE_PIDFD_AUTOKILL not supported");
ASSERT_GE(pid, 0);
if (pid == 0) {
pause();
_exit(1);
}
ASSERT_GE(pidfd, 0);
/*
* Open a second pidfd via pidfd_open() so we can observe the
* child's death after closing the clone3 pidfd.
*/
pollfd_fd = sys_pidfd_open(pid, 0);
ASSERT_GE(pollfd_fd, 0);
/* Close the clone3 pidfd — this should trigger autokill. */
close(pidfd);
/* Wait for the child to die via the pidfd_open'd fd. */
pfd.fd = pollfd_fd;
pfd.events = POLLIN;
ret = poll(&pfd, 1, 5000);
ASSERT_EQ(ret, 1);
ASSERT_TRUE(pfd.revents & POLLIN);
/* Child should be autoreaped — no zombie. */
usleep(100000);
ret = waitpid(pid, NULL, WNOHANG);
ASSERT_EQ(ret, -1);
ASSERT_EQ(errno, ECHILD);
close(pollfd_fd);
}
/*
* CLONE_PIDFD_AUTOKILL without CLONE_PIDFD must fail with EINVAL.
*/
TEST(autokill_requires_pidfd)
{
struct __clone_args args = {
.flags = CLONE_PIDFD_AUTOKILL | CLONE_AUTOREAP,
.exit_signal = 0,
};
pid_t pid;
pid = sys_clone3(&args, sizeof(args));
ASSERT_EQ(pid, -1);
ASSERT_EQ(errno, EINVAL);
}
/*
* CLONE_PIDFD_AUTOKILL without CLONE_AUTOREAP must fail with EINVAL.
*/
TEST(autokill_requires_autoreap)
{
int pidfd = -1;
struct __clone_args args = {
.flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL,
.exit_signal = 0,
.pidfd = ptr_to_u64(&pidfd),
};
pid_t pid;
pid = sys_clone3(&args, sizeof(args));
ASSERT_EQ(pid, -1);
ASSERT_EQ(errno, EINVAL);
}
/*
* CLONE_PIDFD_AUTOKILL with CLONE_THREAD must fail with EINVAL.
*/
TEST(autokill_rejects_thread)
{
int pidfd = -1;
struct __clone_args args = {
.flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL |
CLONE_AUTOREAP | CLONE_THREAD |
CLONE_SIGHAND | CLONE_VM,
.exit_signal = 0,
.pidfd = ptr_to_u64(&pidfd),
};
pid_t pid;
pid = sys_clone3(&args, sizeof(args));
ASSERT_EQ(pid, -1);
ASSERT_EQ(errno, EINVAL);
}
/*
* Test that only the clone3 pidfd triggers autokill, not pidfd_open().
* Close the pidfd_open'd fd first child should survive.
* Then close the clone3 pidfd child should be killed and autoreaped.
*/
TEST(autokill_pidfd_open_no_effect)
{
int pidfd = -1, open_fd = -1, ret;
struct pollfd pfd;
pid_t pid;
pid = create_autokill_child(&pidfd);
if (pid < 0 && errno == EINVAL)
SKIP(return, "CLONE_PIDFD_AUTOKILL not supported");
ASSERT_GE(pid, 0);
if (pid == 0) {
pause();
_exit(1);
}
ASSERT_GE(pidfd, 0);
/* Open a second pidfd via pidfd_open(). */
open_fd = sys_pidfd_open(pid, 0);
ASSERT_GE(open_fd, 0);
/*
* Close the pidfd_open'd fd child should survive because
* only the clone3 pidfd has autokill.
*/
close(open_fd);
usleep(200000);
/* Verify child is still alive by polling the clone3 pidfd. */
pfd.fd = pidfd;
pfd.events = POLLIN;
ret = poll(&pfd, 1, 0);
ASSERT_EQ(ret, 0) {
TH_LOG("Child died after closing pidfd_open fd — should still be alive");
}
/* Open another observation fd before triggering autokill. */
open_fd = sys_pidfd_open(pid, 0);
ASSERT_GE(open_fd, 0);
/* Now close the clone3 pidfd — this triggers autokill. */
close(pidfd);
pfd.fd = open_fd;
pfd.events = POLLIN;
ret = poll(&pfd, 1, 5000);
ASSERT_EQ(ret, 1);
ASSERT_TRUE(pfd.revents & POLLIN);
/* Child should be autoreaped — no zombie. */
usleep(100000);
ret = waitpid(pid, NULL, WNOHANG);
ASSERT_EQ(ret, -1);
ASSERT_EQ(errno, ECHILD);
close(open_fd);
}
/*
* Test that CLONE_PIDFD_AUTOKILL without CLONE_NNP fails with EPERM
* for an unprivileged caller.
*/
TEST(autokill_requires_cap_sys_admin)
{
int pidfd = -1, ret;
struct __clone_args args = {
.flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL |
CLONE_AUTOREAP,
.exit_signal = 0,
.pidfd = ptr_to_u64(&pidfd),
};
pid_t pid;
/* Drop all capabilities so we lack CAP_SYS_ADMIN. */
ret = drop_all_caps();
ASSERT_EQ(ret, 0);
pid = sys_clone3(&args, sizeof(args));
ASSERT_EQ(pid, -1);
ASSERT_EQ(errno, EPERM);
}
/*
* Test that CLONE_PIDFD_AUTOKILL without CLONE_NNP succeeds with
* CAP_SYS_ADMIN.
*/
TEST(autokill_without_nnp_with_cap)
{
struct __clone_args args = {
.flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL |
CLONE_AUTOREAP,
.exit_signal = 0,
};
struct pidfd_info info = { .mask = PIDFD_INFO_EXIT };
int pidfd = -1, ret;
struct pollfd pfd;
pid_t pid;
if (geteuid() != 0)
SKIP(return, "Need root/CAP_SYS_ADMIN");
args.pidfd = ptr_to_u64(&pidfd);
pid = sys_clone3(&args, sizeof(args));
if (pid < 0 && errno == EINVAL)
SKIP(return, "CLONE_PIDFD_AUTOKILL not supported");
ASSERT_GE(pid, 0);
if (pid == 0)
_exit(0);
ASSERT_GE(pidfd, 0);
/* Wait for child to exit. */
pfd.fd = pidfd;
pfd.events = POLLIN;
ret = poll(&pfd, 1, 5000);
ASSERT_EQ(ret, 1);
ret = ioctl(pidfd, PIDFD_GET_INFO, &info);
ASSERT_EQ(ret, 0);
ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT);
ASSERT_TRUE(WIFEXITED(info.exit_code));
ASSERT_EQ(WEXITSTATUS(info.exit_code), 0);
close(pidfd);
}
TEST_HARNESS_MAIN

View File

@@ -724,6 +724,7 @@ TEST(supported_mask_field)
ASSERT_TRUE(!!(info.supported_mask & PIDFD_INFO_COREDUMP));
ASSERT_TRUE(!!(info.supported_mask & PIDFD_INFO_SUPPORTED_MASK));
ASSERT_TRUE(!!(info.supported_mask & PIDFD_INFO_COREDUMP_SIGNAL));
ASSERT_TRUE(!!(info.supported_mask & PIDFD_INFO_COREDUMP_CODE));
/* Clean up */
sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0);