mirror of
https://github.com/torvalds/linux.git
synced 2026-04-18 06:44:00 -04:00
Merge tag 'vfs-7.1-rc1.pidfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull clone and pidfs updates from Christian Brauner:
"Add three new clone3() flags for pidfd-based process lifecycle
management.
CLONE_AUTOREAP:
CLONE_AUTOREAP makes a child process auto-reap on exit without ever
becoming a zombie. This is a per-process property in contrast to
the existing auto-reap mechanism via SA_NOCLDWAIT or SIG_IGN for
SIGCHLD which applies to all children of a given parent.
Currently the only way to automatically reap children is to set
SA_NOCLDWAIT or SIG_IGN on SIGCHLD. This is a parent-scoped
property affecting all children which makes it unsuitable for
libraries or applications that need selective auto-reaping of
specific children while still being able to wait() on others.
CLONE_AUTOREAP stores an autoreap flag in the child's
signal_struct. When the child exits do_notify_parent() checks this
flag and causes exit_notify() to transition the task directly to
EXIT_DEAD. Since the flag lives on the child it survives
reparenting: if the original parent exits and the child is
reparented to a subreaper or init the child still auto-reaps when
it eventually exits. This is cleaner than forcing the subreaper to
get SIGCHLD and then reaping it. If the parent doesn't care the
subreaper won't care. If there's a subreaper that would care it
would be easy enough to add a prctl() that either just turns back
on SIGCHLD and turns off auto-reaping or a prctl() that just
notifies the subreaper whenever a child is reparented to it.
CLONE_AUTOREAP can be combined with CLONE_PIDFD to allow the parent
to monitor the child's exit via poll() and retrieve exit status via
PIDFD_GET_INFO. Without CLONE_PIDFD it provides a fire-and-forget
pattern. No exit signal is delivered so exit_signal must be zero.
CLONE_THREAD and CLONE_PARENT are rejected: CLONE_THREAD because
autoreap is a process-level property, and CLONE_PARENT because an
autoreap child reparented via CLONE_PARENT could become an
invisible zombie under a parent that never calls wait().
The flag is not inherited by the autoreap process's own children.
Each child that should be autoreaped must be explicitly created
with CLONE_AUTOREAP.
CLONE_NNP:
CLONE_NNP sets no_new_privs on the child at clone time. Unlike
prctl(PR_SET_NO_NEW_PRIVS) which a process sets on itself,
CLONE_NNP allows the parent to impose no_new_privs on the child at
creation without affecting the parent's own privileges.
CLONE_THREAD is rejected because threads share credentials.
CLONE_NNP is useful on its own for any spawn-and-sandbox pattern
but was specifically introduced to enable unprivileged usage of
CLONE_PIDFD_AUTOKILL.
CLONE_PIDFD_AUTOKILL:
This flag ties a child's lifetime to the pidfd returned from
clone3(). When the last reference to the struct file created by
clone3() is closed the kernel sends SIGKILL to the child. A pidfd
obtained via pidfd_open() for the same process does not keep the
child alive and does not trigger autokill - only the specific
struct file from clone3() has this property. This is useful for
container runtimes, service managers, and sandboxed subprocess
execution - any scenario where the child must die if the parent
crashes or abandons the pidfd or just wants a throwaway helper
process.
CLONE_PIDFD_AUTOKILL requires both CLONE_PIDFD and CLONE_AUTOREAP.
It requires CLONE_PIDFD because the whole point is tying the
child's lifetime to the pidfd. It requires CLONE_AUTOREAP because a
killed child with no one to reap it would become a zombie - the
primary use case is the parent crashing or abandoning the pidfd so
no one is around to call waitpid(). CLONE_THREAD is rejected
because autokill targets a process not a thread.
If CLONE_NNP is specified together with CLONE_PIDFD_AUTOKILL an
unprivileged user may spawn a process that is autokilled. The child
cannot escalate privileges via setuid/setgid exec after being
spawned. If CLONE_PIDFD_AUTOKILL is specified without CLONE_NNP the
caller must have have CAP_SYS_ADMIN in its user namespace"
* tag 'vfs-7.1-rc1.pidfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
selftests: check pidfd_info->coredump_code correctness
pidfds: add coredump_code field to pidfd_info
kselftest/coredump: reintroduce null pointer dereference
selftests/pidfd: add CLONE_PIDFD_AUTOKILL tests
selftests/pidfd: add CLONE_NNP tests
selftests/pidfd: add CLONE_AUTOREAP tests
pidfd: add CLONE_PIDFD_AUTOKILL
clone: add CLONE_NNP
clone: add CLONE_AUTOREAP
This commit is contained in:
50
fs/pidfs.c
50
fs/pidfs.c
@@ -8,6 +8,8 @@
|
||||
#include <linux/mount.h>
|
||||
#include <linux/pid.h>
|
||||
#include <linux/pidfs.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/signal.h>
|
||||
#include <linux/pid_namespace.h>
|
||||
#include <linux/poll.h>
|
||||
#include <linux/proc_fs.h>
|
||||
@@ -54,6 +56,7 @@ struct pidfs_anon_attr {
|
||||
};
|
||||
__u32 coredump_mask;
|
||||
__u32 coredump_signal;
|
||||
__u32 coredump_code;
|
||||
};
|
||||
|
||||
static struct rhashtable pidfs_ino_ht;
|
||||
@@ -358,7 +361,8 @@ static __u32 pidfs_coredump_mask(unsigned long mm_flags)
|
||||
PIDFD_INFO_EXIT | \
|
||||
PIDFD_INFO_COREDUMP | \
|
||||
PIDFD_INFO_SUPPORTED_MASK | \
|
||||
PIDFD_INFO_COREDUMP_SIGNAL)
|
||||
PIDFD_INFO_COREDUMP_SIGNAL | \
|
||||
PIDFD_INFO_COREDUMP_CODE)
|
||||
|
||||
static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
@@ -372,7 +376,7 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
const struct cred *c;
|
||||
__u64 mask;
|
||||
|
||||
BUILD_BUG_ON(sizeof(struct pidfd_info) != PIDFD_INFO_SIZE_VER2);
|
||||
BUILD_BUG_ON(sizeof(struct pidfd_info) != PIDFD_INFO_SIZE_VER3);
|
||||
|
||||
if (!uinfo)
|
||||
return -EINVAL;
|
||||
@@ -405,9 +409,10 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
if (mask & PIDFD_INFO_COREDUMP) {
|
||||
if (test_bit(PIDFS_ATTR_BIT_COREDUMP, &attr->attr_mask)) {
|
||||
smp_rmb();
|
||||
kinfo.mask |= PIDFD_INFO_COREDUMP | PIDFD_INFO_COREDUMP_SIGNAL;
|
||||
kinfo.mask |= PIDFD_INFO_COREDUMP | PIDFD_INFO_COREDUMP_SIGNAL | PIDFD_INFO_COREDUMP_CODE;
|
||||
kinfo.coredump_mask = attr->coredump_mask;
|
||||
kinfo.coredump_signal = attr->coredump_signal;
|
||||
kinfo.coredump_code = attr->coredump_code;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -662,7 +667,28 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
return open_namespace(ns_common);
|
||||
}
|
||||
|
||||
static int pidfs_file_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct pid *pid = inode->i_private;
|
||||
struct task_struct *task;
|
||||
|
||||
if (!(file->f_flags & PIDFD_AUTOKILL))
|
||||
return 0;
|
||||
|
||||
guard(rcu)();
|
||||
task = pid_task(pid, PIDTYPE_TGID);
|
||||
if (!task)
|
||||
return 0;
|
||||
|
||||
/* Not available for kthreads or user workers for now. */
|
||||
if (WARN_ON_ONCE(task->flags & (PF_KTHREAD | PF_USER_WORKER)))
|
||||
return 0;
|
||||
do_send_sig_info(SIGKILL, SEND_SIG_PRIV, task, PIDTYPE_TGID);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct file_operations pidfs_file_operations = {
|
||||
.release = pidfs_file_release,
|
||||
.poll = pidfd_poll,
|
||||
#ifdef CONFIG_PROC_FS
|
||||
.show_fdinfo = pidfd_show_fdinfo,
|
||||
@@ -757,8 +783,9 @@ void pidfs_coredump(const struct coredump_params *cprm)
|
||||
PIDFD_COREDUMPED;
|
||||
/* If coredumping is set to skip we should never end up here. */
|
||||
VFS_WARN_ON_ONCE(attr->coredump_mask & PIDFD_COREDUMP_SKIP);
|
||||
/* Expose the signal number that caused the coredump. */
|
||||
/* Expose the signal number and code that caused the coredump. */
|
||||
attr->coredump_signal = cprm->siginfo->si_signo;
|
||||
attr->coredump_code = cprm->siginfo->si_code;
|
||||
smp_wmb();
|
||||
set_bit(PIDFS_ATTR_BIT_COREDUMP, &attr->attr_mask);
|
||||
}
|
||||
@@ -1112,11 +1139,11 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags)
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Ensure that PIDFD_STALE can be passed as a flag without
|
||||
* overloading other uapi pidfd flags.
|
||||
* Ensure that internal pidfd flags don't overlap with each
|
||||
* other or with uapi pidfd flags.
|
||||
*/
|
||||
BUILD_BUG_ON(PIDFD_STALE == PIDFD_THREAD);
|
||||
BUILD_BUG_ON(PIDFD_STALE == PIDFD_NONBLOCK);
|
||||
BUILD_BUG_ON(hweight32(PIDFD_THREAD | PIDFD_NONBLOCK |
|
||||
PIDFD_STALE | PIDFD_AUTOKILL) != 4);
|
||||
|
||||
ret = path_from_stashed(&pid->stashed, pidfs_mnt, get_pid(pid), &path);
|
||||
if (ret < 0)
|
||||
@@ -1127,9 +1154,12 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags)
|
||||
flags &= ~PIDFD_STALE;
|
||||
flags |= O_RDWR;
|
||||
pidfd_file = dentry_open(&path, flags, current_cred());
|
||||
/* Raise PIDFD_THREAD explicitly as do_dentry_open() strips it. */
|
||||
/*
|
||||
* Raise PIDFD_THREAD and PIDFD_AUTOKILL explicitly as
|
||||
* do_dentry_open() strips O_EXCL and O_TRUNC.
|
||||
*/
|
||||
if (!IS_ERR(pidfd_file))
|
||||
pidfd_file->f_flags |= (flags & PIDFD_THREAD);
|
||||
pidfd_file->f_flags |= (flags & (PIDFD_THREAD | PIDFD_AUTOKILL));
|
||||
|
||||
return pidfd_file;
|
||||
}
|
||||
|
||||
@@ -132,6 +132,7 @@ struct signal_struct {
|
||||
*/
|
||||
unsigned int is_child_subreaper:1;
|
||||
unsigned int has_child_subreaper:1;
|
||||
unsigned int autoreap:1;
|
||||
|
||||
#ifdef CONFIG_POSIX_TIMERS
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#ifdef __KERNEL__
|
||||
#include <linux/sched.h>
|
||||
#define PIDFD_STALE CLONE_PIDFD
|
||||
#define PIDFD_AUTOKILL O_TRUNC
|
||||
#endif
|
||||
|
||||
/* Flags for pidfd_send_signal(). */
|
||||
@@ -28,10 +29,12 @@
|
||||
#define PIDFD_INFO_COREDUMP (1UL << 4) /* Only returned if requested. */
|
||||
#define PIDFD_INFO_SUPPORTED_MASK (1UL << 5) /* Want/got supported mask flags */
|
||||
#define PIDFD_INFO_COREDUMP_SIGNAL (1UL << 6) /* Always returned if PIDFD_INFO_COREDUMP is requested. */
|
||||
#define PIDFD_INFO_COREDUMP_CODE (1UL << 7) /* Always returned if PIDFD_INFO_COREDUMP is requested. */
|
||||
|
||||
#define PIDFD_INFO_SIZE_VER0 64 /* sizeof first published struct */
|
||||
#define PIDFD_INFO_SIZE_VER1 72 /* sizeof second published struct */
|
||||
#define PIDFD_INFO_SIZE_VER2 80 /* sizeof third published struct */
|
||||
#define PIDFD_INFO_SIZE_VER3 88 /* sizeof fourth published struct */
|
||||
|
||||
/*
|
||||
* Values for @coredump_mask in pidfd_info.
|
||||
@@ -98,6 +101,8 @@ struct pidfd_info {
|
||||
struct /* coredump info */ {
|
||||
__u32 coredump_mask;
|
||||
__u32 coredump_signal;
|
||||
__u32 coredump_code;
|
||||
__u32 coredump_pad; /* align supported_mask to 8 bytes */
|
||||
};
|
||||
__u64 supported_mask; /* Mask flags that this kernel supports */
|
||||
};
|
||||
|
||||
@@ -34,8 +34,11 @@
|
||||
#define CLONE_IO 0x80000000 /* Clone io context */
|
||||
|
||||
/* Flags for the clone3() syscall. */
|
||||
#define CLONE_CLEAR_SIGHAND 0x100000000ULL /* Clear any signal handler and reset to SIG_DFL. */
|
||||
#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */
|
||||
#define CLONE_CLEAR_SIGHAND (1ULL << 32) /* Clear any signal handler and reset to SIG_DFL. */
|
||||
#define CLONE_INTO_CGROUP (1ULL << 33) /* Clone into a specific cgroup given the right permissions. */
|
||||
#define CLONE_AUTOREAP (1ULL << 34) /* Auto-reap child on exit. */
|
||||
#define CLONE_NNP (1ULL << 35) /* Set no_new_privs on child. */
|
||||
#define CLONE_PIDFD_AUTOKILL (1ULL << 36) /* Kill child when clone pidfd closes. */
|
||||
|
||||
/*
|
||||
* cloning flags intersect with CSIGNAL so can be used with unshare and clone3
|
||||
|
||||
@@ -2029,6 +2029,41 @@ __latent_entropy struct task_struct *copy_process(
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
if (clone_flags & CLONE_AUTOREAP) {
|
||||
if (clone_flags & CLONE_THREAD)
|
||||
return ERR_PTR(-EINVAL);
|
||||
if (clone_flags & CLONE_PARENT)
|
||||
return ERR_PTR(-EINVAL);
|
||||
if (args->exit_signal)
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
if ((clone_flags & CLONE_PARENT) && current->signal->autoreap)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
if (clone_flags & CLONE_NNP) {
|
||||
if (clone_flags & CLONE_THREAD)
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
if (clone_flags & CLONE_PIDFD_AUTOKILL) {
|
||||
if (!(clone_flags & CLONE_PIDFD))
|
||||
return ERR_PTR(-EINVAL);
|
||||
if (!(clone_flags & CLONE_AUTOREAP))
|
||||
return ERR_PTR(-EINVAL);
|
||||
if (clone_flags & CLONE_THREAD)
|
||||
return ERR_PTR(-EINVAL);
|
||||
/*
|
||||
* Without CLONE_NNP the child could escalate privileges
|
||||
* after being spawned, so require CAP_SYS_ADMIN.
|
||||
* With CLONE_NNP the child can't gain new privileges,
|
||||
* so allow unprivileged usage.
|
||||
*/
|
||||
if (!(clone_flags & CLONE_NNP) &&
|
||||
!ns_capable(current_user_ns(), CAP_SYS_ADMIN))
|
||||
return ERR_PTR(-EPERM);
|
||||
}
|
||||
|
||||
/*
|
||||
* Force any signals received before this point to be delivered
|
||||
* before the fork happens. Collect up signals sent to multiple
|
||||
@@ -2251,13 +2286,18 @@ __latent_entropy struct task_struct *copy_process(
|
||||
* if the fd table isn't shared).
|
||||
*/
|
||||
if (clone_flags & CLONE_PIDFD) {
|
||||
int flags = (clone_flags & CLONE_THREAD) ? PIDFD_THREAD : 0;
|
||||
unsigned flags = PIDFD_STALE;
|
||||
|
||||
if (clone_flags & CLONE_THREAD)
|
||||
flags |= PIDFD_THREAD;
|
||||
if (clone_flags & CLONE_PIDFD_AUTOKILL)
|
||||
flags |= PIDFD_AUTOKILL;
|
||||
|
||||
/*
|
||||
* Note that no task has been attached to @pid yet indicate
|
||||
* that via CLONE_PIDFD.
|
||||
*/
|
||||
retval = pidfd_prepare(pid, flags | PIDFD_STALE, &pidfile);
|
||||
retval = pidfd_prepare(pid, flags, &pidfile);
|
||||
if (retval < 0)
|
||||
goto bad_fork_free_pid;
|
||||
pidfd = retval;
|
||||
@@ -2413,6 +2453,9 @@ __latent_entropy struct task_struct *copy_process(
|
||||
*/
|
||||
copy_seccomp(p);
|
||||
|
||||
if (clone_flags & CLONE_NNP)
|
||||
task_set_no_new_privs(p);
|
||||
|
||||
init_task_pid_links(p);
|
||||
if (likely(p->pid)) {
|
||||
ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
|
||||
@@ -2436,6 +2479,8 @@ __latent_entropy struct task_struct *copy_process(
|
||||
*/
|
||||
p->signal->has_child_subreaper = p->real_parent->signal->has_child_subreaper ||
|
||||
p->real_parent->signal->is_child_subreaper;
|
||||
if (clone_flags & CLONE_AUTOREAP)
|
||||
p->signal->autoreap = 1;
|
||||
list_add_tail(&p->sibling, &p->real_parent->children);
|
||||
list_add_tail_rcu(&p->tasks, &init_task.tasks);
|
||||
attach_pid(p, PIDTYPE_TGID);
|
||||
@@ -2897,7 +2942,8 @@ static bool clone3_args_valid(struct kernel_clone_args *kargs)
|
||||
{
|
||||
/* Verify that no unknown flags are passed along. */
|
||||
if (kargs->flags &
|
||||
~(CLONE_LEGACY_FLAGS | CLONE_CLEAR_SIGHAND | CLONE_INTO_CGROUP))
|
||||
~(CLONE_LEGACY_FLAGS | CLONE_CLEAR_SIGHAND | CLONE_INTO_CGROUP |
|
||||
CLONE_AUTOREAP | CLONE_NNP | CLONE_PIDFD_AUTOKILL))
|
||||
return false;
|
||||
|
||||
/*
|
||||
|
||||
@@ -549,7 +549,8 @@ static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p)
|
||||
if (!dead && thread_group_empty(p)) {
|
||||
if (!same_thread_group(p->real_parent, tracer))
|
||||
dead = do_notify_parent(p, p->exit_signal);
|
||||
else if (ignoring_children(tracer->sighand)) {
|
||||
else if (ignoring_children(tracer->sighand) ||
|
||||
p->signal->autoreap) {
|
||||
__wake_up_parent(p, tracer);
|
||||
dead = true;
|
||||
}
|
||||
|
||||
@@ -2251,6 +2251,10 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
|
||||
if (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN)
|
||||
sig = 0;
|
||||
}
|
||||
if (!tsk->ptrace && tsk->signal->autoreap) {
|
||||
autoreap = true;
|
||||
sig = 0;
|
||||
}
|
||||
/*
|
||||
* Send with __send_signal as si_pid and si_uid are in the
|
||||
* parent's namespaces.
|
||||
|
||||
@@ -1004,6 +1004,8 @@ out:
|
||||
*
|
||||
* Verify that when using socket-based coredump protocol,
|
||||
* the coredump_signal field is correctly exposed as SIGSEGV.
|
||||
* Also check that the coredump_code field is correctly exposed
|
||||
* as SEGV_MAPERR.
|
||||
*/
|
||||
TEST_F(coredump, socket_coredump_signal_sigsegv)
|
||||
{
|
||||
@@ -1079,6 +1081,18 @@ TEST_F(coredump, socket_coredump_signal_sigsegv)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Verify coredump_code is available and correct */
|
||||
if (!(info.mask & PIDFD_INFO_COREDUMP_CODE)) {
|
||||
fprintf(stderr, "socket_coredump_signal_sigsegv: PIDFD_INFO_COREDUMP_CODE not set in mask\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (info.coredump_code != SEGV_MAPERR) {
|
||||
fprintf(stderr, "socket_coredump_signal_sigsegv: coredump_code=%d, expected SEGV_MAPERR=%d\n",
|
||||
info.coredump_code, SEGV_MAPERR);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!read_coredump_req(fd_coredump, &req)) {
|
||||
fprintf(stderr, "socket_coredump_signal_sigsegv: read_coredump_req failed\n");
|
||||
goto out;
|
||||
@@ -1128,6 +1142,8 @@ out:
|
||||
ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP));
|
||||
ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL));
|
||||
ASSERT_EQ(info.coredump_signal, SIGSEGV);
|
||||
ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP_CODE));
|
||||
ASSERT_EQ(info.coredump_code, SEGV_MAPERR);
|
||||
|
||||
wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
|
||||
}
|
||||
@@ -1137,6 +1153,8 @@ out:
|
||||
*
|
||||
* Verify that when using socket-based coredump protocol,
|
||||
* the coredump_signal field is correctly exposed as SIGABRT.
|
||||
* Also check that the coredump_code field is correctly exposed
|
||||
* as SI_TKILL.
|
||||
*/
|
||||
TEST_F(coredump, socket_coredump_signal_sigabrt)
|
||||
{
|
||||
@@ -1212,6 +1230,12 @@ TEST_F(coredump, socket_coredump_signal_sigabrt)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (info.coredump_code != SI_TKILL) {
|
||||
fprintf(stderr, "socket_coredump_signal_sigabrt: coredump_code=%d, expected SI_TKILL=%d\n",
|
||||
info.coredump_code, SI_TKILL);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!read_coredump_req(fd_coredump, &req)) {
|
||||
fprintf(stderr, "socket_coredump_signal_sigabrt: read_coredump_req failed\n");
|
||||
goto out;
|
||||
@@ -1261,6 +1285,8 @@ out:
|
||||
ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP));
|
||||
ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL));
|
||||
ASSERT_EQ(info.coredump_signal, SIGABRT);
|
||||
ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP_CODE));
|
||||
ASSERT_EQ(info.coredump_code, SI_TKILL);
|
||||
|
||||
wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
|
||||
}
|
||||
|
||||
@@ -435,6 +435,8 @@ out:
|
||||
*
|
||||
* Verify that when using simple socket-based coredump (@ pattern),
|
||||
* the coredump_signal field is correctly exposed as SIGSEGV.
|
||||
* Also check that the coredump_code field is correctly exposed
|
||||
* as SEGV_MAPERR.
|
||||
*/
|
||||
TEST_F(coredump, socket_coredump_signal_sigsegv)
|
||||
{
|
||||
@@ -509,6 +511,18 @@ TEST_F(coredump, socket_coredump_signal_sigsegv)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Verify coredump_code is available and correct */
|
||||
if (!(info.mask & PIDFD_INFO_COREDUMP_CODE)) {
|
||||
fprintf(stderr, "socket_coredump_signal_sigsegv: PIDFD_INFO_COREDUMP_CODE not set in mask\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (info.coredump_code != SEGV_MAPERR) {
|
||||
fprintf(stderr, "socket_coredump_signal_sigsegv: coredump_code=%d, expected SEGV_MAPERR=%d\n",
|
||||
info.coredump_code, SEGV_MAPERR);
|
||||
goto out;
|
||||
}
|
||||
|
||||
fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached);
|
||||
if (fd_core_file < 0) {
|
||||
fprintf(stderr, "socket_coredump_signal_sigsegv: open_coredump_tmpfile failed: %m\n");
|
||||
@@ -572,6 +586,8 @@ out:
|
||||
ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP));
|
||||
ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL));
|
||||
ASSERT_EQ(info.coredump_signal, SIGSEGV);
|
||||
ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP_CODE));
|
||||
ASSERT_EQ(info.coredump_code, SEGV_MAPERR);
|
||||
|
||||
wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
|
||||
}
|
||||
@@ -581,6 +597,8 @@ out:
|
||||
*
|
||||
* Verify that when using simple socket-based coredump (@ pattern),
|
||||
* the coredump_signal field is correctly exposed as SIGABRT.
|
||||
* Also check that the coredump_code field is correctly exposed
|
||||
* as SI_TKILL.
|
||||
*/
|
||||
TEST_F(coredump, socket_coredump_signal_sigabrt)
|
||||
{
|
||||
@@ -655,6 +673,18 @@ TEST_F(coredump, socket_coredump_signal_sigabrt)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Verify coredump_code is available and correct */
|
||||
if (!(info.mask & PIDFD_INFO_COREDUMP_CODE)) {
|
||||
fprintf(stderr, "socket_coredump_signal_sigabrt: PIDFD_INFO_COREDUMP_CODE not set in mask\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (info.coredump_code != SI_TKILL) {
|
||||
fprintf(stderr, "socket_coredump_signal_sigabrt: coredump_code=%d, expected SI_TKILL=%d\n",
|
||||
info.coredump_code, SI_TKILL);
|
||||
goto out;
|
||||
}
|
||||
|
||||
fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached);
|
||||
if (fd_core_file < 0) {
|
||||
fprintf(stderr, "socket_coredump_signal_sigabrt: open_coredump_tmpfile failed: %m\n");
|
||||
@@ -718,6 +748,8 @@ out:
|
||||
ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP));
|
||||
ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL));
|
||||
ASSERT_EQ(info.coredump_signal, SIGABRT);
|
||||
ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP_CODE));
|
||||
ASSERT_EQ(info.coredump_code, SI_TKILL);
|
||||
|
||||
wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
|
||||
}
|
||||
|
||||
@@ -56,7 +56,7 @@ void crashing_child(void)
|
||||
pthread_create(&thread, NULL, do_nothing, NULL);
|
||||
|
||||
/* crash on purpose */
|
||||
__builtin_trap();
|
||||
i = *(volatile int *)NULL;
|
||||
}
|
||||
|
||||
int create_detached_tmpfs(void)
|
||||
@@ -148,8 +148,8 @@ bool get_pidfd_info(int fd_peer_pidfd, struct pidfd_info *info)
|
||||
fprintf(stderr, "get_pidfd_info: ioctl(PIDFD_GET_INFO) failed: %m\n");
|
||||
return false;
|
||||
}
|
||||
fprintf(stderr, "get_pidfd_info: mask=0x%llx, coredump_mask=0x%x, coredump_signal=%d\n",
|
||||
(unsigned long long)info->mask, info->coredump_mask, info->coredump_signal);
|
||||
fprintf(stderr, "get_pidfd_info: mask=0x%llx, coredump_mask=0x%x, coredump_signal=%d, coredump_code=%d\n",
|
||||
(unsigned long long)info->mask, info->coredump_mask, info->coredump_signal, info->coredump_code);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
1
tools/testing/selftests/pidfd/.gitignore
vendored
1
tools/testing/selftests/pidfd/.gitignore
vendored
@@ -12,3 +12,4 @@ pidfd_info_test
|
||||
pidfd_exec_helper
|
||||
pidfd_xattr_test
|
||||
pidfd_setattr_test
|
||||
pidfd_autoreap_test
|
||||
|
||||
@@ -4,7 +4,7 @@ CFLAGS += -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES) -pthread -Wall
|
||||
TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \
|
||||
pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test \
|
||||
pidfd_file_handle_test pidfd_bind_mount pidfd_info_test \
|
||||
pidfd_xattr_test pidfd_setattr_test
|
||||
pidfd_xattr_test pidfd_setattr_test pidfd_autoreap_test
|
||||
|
||||
TEST_GEN_PROGS_EXTENDED := pidfd_exec_helper
|
||||
|
||||
|
||||
@@ -156,6 +156,10 @@
|
||||
#define PIDFD_INFO_COREDUMP_SIGNAL (1UL << 6)
|
||||
#endif
|
||||
|
||||
#ifndef PIDFD_INFO_COREDUMP_CODE
|
||||
#define PIDFD_INFO_COREDUMP_CODE (1UL << 7)
|
||||
#endif
|
||||
|
||||
#ifndef PIDFD_COREDUMPED
|
||||
#define PIDFD_COREDUMPED (1U << 0) /* Did crash and... */
|
||||
#endif
|
||||
@@ -194,6 +198,7 @@ struct pidfd_info {
|
||||
struct {
|
||||
__u32 coredump_mask;
|
||||
__u32 coredump_signal;
|
||||
__u32 coredump_code;
|
||||
};
|
||||
__u64 supported_mask;
|
||||
};
|
||||
|
||||
900
tools/testing/selftests/pidfd/pidfd_autoreap_test.c
Normal file
900
tools/testing/selftests/pidfd/pidfd_autoreap_test.c
Normal file
@@ -0,0 +1,900 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
// Copyright (c) 2026 Christian Brauner <brauner@kernel.org>
|
||||
|
||||
#define _GNU_SOURCE
|
||||
#include <errno.h>
|
||||
#include <linux/types.h>
|
||||
#include <poll.h>
|
||||
#include <pthread.h>
|
||||
#include <sched.h>
|
||||
#include <signal.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <syscall.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/prctl.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "pidfd.h"
|
||||
#include "kselftest_harness.h"
|
||||
|
||||
#ifndef CLONE_AUTOREAP
|
||||
#define CLONE_AUTOREAP (1ULL << 34)
|
||||
#endif
|
||||
|
||||
#ifndef CLONE_NNP
|
||||
#define CLONE_NNP (1ULL << 35)
|
||||
#endif
|
||||
|
||||
#ifndef CLONE_PIDFD_AUTOKILL
|
||||
#define CLONE_PIDFD_AUTOKILL (1ULL << 36)
|
||||
#endif
|
||||
|
||||
#ifndef _LINUX_CAPABILITY_VERSION_3
|
||||
#define _LINUX_CAPABILITY_VERSION_3 0x20080522
|
||||
#endif
|
||||
|
||||
struct cap_header {
|
||||
__u32 version;
|
||||
int pid;
|
||||
};
|
||||
|
||||
struct cap_data {
|
||||
__u32 effective;
|
||||
__u32 permitted;
|
||||
__u32 inheritable;
|
||||
};
|
||||
|
||||
static int drop_all_caps(void)
|
||||
{
|
||||
struct cap_header hdr = { .version = _LINUX_CAPABILITY_VERSION_3 };
|
||||
struct cap_data data[2] = {};
|
||||
|
||||
return syscall(__NR_capset, &hdr, data);
|
||||
}
|
||||
|
||||
static pid_t create_autoreap_child(int *pidfd)
|
||||
{
|
||||
struct __clone_args args = {
|
||||
.flags = CLONE_PIDFD | CLONE_AUTOREAP,
|
||||
.exit_signal = 0,
|
||||
.pidfd = ptr_to_u64(pidfd),
|
||||
};
|
||||
|
||||
return sys_clone3(&args, sizeof(args));
|
||||
}
|
||||
|
||||
/*
|
||||
* Test that CLONE_AUTOREAP works without CLONE_PIDFD (fire-and-forget).
|
||||
*/
|
||||
TEST(autoreap_without_pidfd)
|
||||
{
|
||||
struct __clone_args args = {
|
||||
.flags = CLONE_AUTOREAP,
|
||||
.exit_signal = 0,
|
||||
};
|
||||
pid_t pid;
|
||||
int ret;
|
||||
|
||||
pid = sys_clone3(&args, sizeof(args));
|
||||
if (pid < 0 && errno == EINVAL)
|
||||
SKIP(return, "CLONE_AUTOREAP not supported");
|
||||
ASSERT_GE(pid, 0);
|
||||
|
||||
if (pid == 0)
|
||||
_exit(0);
|
||||
|
||||
/*
|
||||
* Give the child a moment to exit and be autoreaped.
|
||||
* Then verify no zombie remains.
|
||||
*/
|
||||
usleep(200000);
|
||||
ret = waitpid(pid, NULL, WNOHANG);
|
||||
ASSERT_EQ(ret, -1);
|
||||
ASSERT_EQ(errno, ECHILD);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test that CLONE_AUTOREAP with a non-zero exit_signal fails.
|
||||
*/
|
||||
TEST(autoreap_rejects_exit_signal)
|
||||
{
|
||||
struct __clone_args args = {
|
||||
.flags = CLONE_AUTOREAP,
|
||||
.exit_signal = SIGCHLD,
|
||||
};
|
||||
pid_t pid;
|
||||
|
||||
pid = sys_clone3(&args, sizeof(args));
|
||||
ASSERT_EQ(pid, -1);
|
||||
ASSERT_EQ(errno, EINVAL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test that CLONE_AUTOREAP with CLONE_PARENT fails.
|
||||
*/
|
||||
TEST(autoreap_rejects_parent)
|
||||
{
|
||||
struct __clone_args args = {
|
||||
.flags = CLONE_AUTOREAP | CLONE_PARENT,
|
||||
.exit_signal = 0,
|
||||
};
|
||||
pid_t pid;
|
||||
|
||||
pid = sys_clone3(&args, sizeof(args));
|
||||
ASSERT_EQ(pid, -1);
|
||||
ASSERT_EQ(errno, EINVAL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test that CLONE_AUTOREAP with CLONE_THREAD fails.
|
||||
*/
|
||||
TEST(autoreap_rejects_thread)
|
||||
{
|
||||
struct __clone_args args = {
|
||||
.flags = CLONE_AUTOREAP | CLONE_THREAD |
|
||||
CLONE_SIGHAND | CLONE_VM,
|
||||
.exit_signal = 0,
|
||||
};
|
||||
pid_t pid;
|
||||
|
||||
pid = sys_clone3(&args, sizeof(args));
|
||||
ASSERT_EQ(pid, -1);
|
||||
ASSERT_EQ(errno, EINVAL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Basic test: create an autoreap child, let it exit, verify:
|
||||
* - pidfd becomes readable (poll returns POLLIN)
|
||||
* - PIDFD_GET_INFO returns the correct exit code
|
||||
* - waitpid() returns -1/ECHILD (no zombie)
|
||||
*/
|
||||
TEST(autoreap_basic)
|
||||
{
|
||||
struct pidfd_info info = { .mask = PIDFD_INFO_EXIT };
|
||||
int pidfd = -1, ret;
|
||||
struct pollfd pfd;
|
||||
pid_t pid;
|
||||
|
||||
pid = create_autoreap_child(&pidfd);
|
||||
if (pid < 0 && errno == EINVAL)
|
||||
SKIP(return, "CLONE_AUTOREAP not supported");
|
||||
ASSERT_GE(pid, 0);
|
||||
|
||||
if (pid == 0)
|
||||
_exit(42);
|
||||
|
||||
ASSERT_GE(pidfd, 0);
|
||||
|
||||
/* Wait for the child to exit via pidfd poll. */
|
||||
pfd.fd = pidfd;
|
||||
pfd.events = POLLIN;
|
||||
ret = poll(&pfd, 1, 5000);
|
||||
ASSERT_EQ(ret, 1);
|
||||
ASSERT_TRUE(pfd.revents & POLLIN);
|
||||
|
||||
/* Verify exit info via PIDFD_GET_INFO. */
|
||||
ret = ioctl(pidfd, PIDFD_GET_INFO, &info);
|
||||
ASSERT_EQ(ret, 0);
|
||||
ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT);
|
||||
/*
|
||||
* exit_code is in waitpid format: for _exit(42),
|
||||
* WIFEXITED is true and WEXITSTATUS is 42.
|
||||
*/
|
||||
ASSERT_TRUE(WIFEXITED(info.exit_code));
|
||||
ASSERT_EQ(WEXITSTATUS(info.exit_code), 42);
|
||||
|
||||
/* Verify no zombie: waitpid should fail with ECHILD. */
|
||||
ret = waitpid(pid, NULL, WNOHANG);
|
||||
ASSERT_EQ(ret, -1);
|
||||
ASSERT_EQ(errno, ECHILD);
|
||||
|
||||
close(pidfd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test that an autoreap child killed by a signal reports
|
||||
* the correct exit info.
|
||||
*/
|
||||
TEST(autoreap_signaled)
|
||||
{
|
||||
struct pidfd_info info = { .mask = PIDFD_INFO_EXIT };
|
||||
int pidfd = -1, ret;
|
||||
struct pollfd pfd;
|
||||
pid_t pid;
|
||||
|
||||
pid = create_autoreap_child(&pidfd);
|
||||
if (pid < 0 && errno == EINVAL)
|
||||
SKIP(return, "CLONE_AUTOREAP not supported");
|
||||
ASSERT_GE(pid, 0);
|
||||
|
||||
if (pid == 0) {
|
||||
pause();
|
||||
_exit(1);
|
||||
}
|
||||
|
||||
ASSERT_GE(pidfd, 0);
|
||||
|
||||
/* Kill the child. */
|
||||
ret = sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
|
||||
ASSERT_EQ(ret, 0);
|
||||
|
||||
/* Wait for exit via pidfd. */
|
||||
pfd.fd = pidfd;
|
||||
pfd.events = POLLIN;
|
||||
ret = poll(&pfd, 1, 5000);
|
||||
ASSERT_EQ(ret, 1);
|
||||
ASSERT_TRUE(pfd.revents & POLLIN);
|
||||
|
||||
/* Verify signal info. */
|
||||
ret = ioctl(pidfd, PIDFD_GET_INFO, &info);
|
||||
ASSERT_EQ(ret, 0);
|
||||
ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT);
|
||||
ASSERT_TRUE(WIFSIGNALED(info.exit_code));
|
||||
ASSERT_EQ(WTERMSIG(info.exit_code), SIGKILL);
|
||||
|
||||
/* No zombie. */
|
||||
ret = waitpid(pid, NULL, WNOHANG);
|
||||
ASSERT_EQ(ret, -1);
|
||||
ASSERT_EQ(errno, ECHILD);
|
||||
|
||||
close(pidfd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test autoreap survives reparenting: middle process creates an
|
||||
* autoreap grandchild, then exits. The grandchild gets reparented
|
||||
* to us (the grandparent, which is a subreaper). When the grandchild
|
||||
* exits, it should still be autoreaped - no zombie under us.
|
||||
*/
|
||||
TEST(autoreap_reparent)
|
||||
{
|
||||
int ipc_sockets[2], ret;
|
||||
int pidfd = -1;
|
||||
struct pollfd pfd;
|
||||
pid_t mid_pid, grandchild_pid;
|
||||
char buf[32] = {};
|
||||
|
||||
/* Make ourselves a subreaper so reparented children come to us. */
|
||||
ret = prctl(PR_SET_CHILD_SUBREAPER, 1);
|
||||
ASSERT_EQ(ret, 0);
|
||||
|
||||
ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
|
||||
ASSERT_EQ(ret, 0);
|
||||
|
||||
mid_pid = fork();
|
||||
ASSERT_GE(mid_pid, 0);
|
||||
|
||||
if (mid_pid == 0) {
|
||||
/* Middle child: create an autoreap grandchild. */
|
||||
int gc_pidfd = -1;
|
||||
|
||||
close(ipc_sockets[0]);
|
||||
|
||||
grandchild_pid = create_autoreap_child(&gc_pidfd);
|
||||
if (grandchild_pid < 0) {
|
||||
write_nointr(ipc_sockets[1], "E", 1);
|
||||
close(ipc_sockets[1]);
|
||||
_exit(1);
|
||||
}
|
||||
|
||||
if (grandchild_pid == 0) {
|
||||
/* Grandchild: wait for signal to exit. */
|
||||
close(ipc_sockets[1]);
|
||||
if (gc_pidfd >= 0)
|
||||
close(gc_pidfd);
|
||||
pause();
|
||||
_exit(0);
|
||||
}
|
||||
|
||||
/* Send grandchild PID to grandparent. */
|
||||
snprintf(buf, sizeof(buf), "%d", grandchild_pid);
|
||||
write_nointr(ipc_sockets[1], buf, strlen(buf));
|
||||
close(ipc_sockets[1]);
|
||||
if (gc_pidfd >= 0)
|
||||
close(gc_pidfd);
|
||||
|
||||
/* Middle child exits, grandchild gets reparented. */
|
||||
_exit(0);
|
||||
}
|
||||
|
||||
close(ipc_sockets[1]);
|
||||
|
||||
/* Read grandchild's PID. */
|
||||
ret = read_nointr(ipc_sockets[0], buf, sizeof(buf) - 1);
|
||||
close(ipc_sockets[0]);
|
||||
ASSERT_GT(ret, 0);
|
||||
|
||||
if (buf[0] == 'E') {
|
||||
waitpid(mid_pid, NULL, 0);
|
||||
prctl(PR_SET_CHILD_SUBREAPER, 0);
|
||||
SKIP(return, "CLONE_AUTOREAP not supported");
|
||||
}
|
||||
|
||||
grandchild_pid = atoi(buf);
|
||||
ASSERT_GT(grandchild_pid, 0);
|
||||
|
||||
/* Wait for the middle child to exit. */
|
||||
ret = waitpid(mid_pid, NULL, 0);
|
||||
ASSERT_EQ(ret, mid_pid);
|
||||
|
||||
/*
|
||||
* Now the grandchild is reparented to us (subreaper).
|
||||
* Open a pidfd for the grandchild and kill it.
|
||||
*/
|
||||
pidfd = sys_pidfd_open(grandchild_pid, 0);
|
||||
ASSERT_GE(pidfd, 0);
|
||||
|
||||
ret = sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
|
||||
ASSERT_EQ(ret, 0);
|
||||
|
||||
/* Wait for it to exit via pidfd poll. */
|
||||
pfd.fd = pidfd;
|
||||
pfd.events = POLLIN;
|
||||
ret = poll(&pfd, 1, 5000);
|
||||
ASSERT_EQ(ret, 1);
|
||||
ASSERT_TRUE(pfd.revents & POLLIN);
|
||||
|
||||
/*
|
||||
* The grandchild should have been autoreaped even though
|
||||
* we (the new parent) haven't set SA_NOCLDWAIT.
|
||||
* waitpid should return -1/ECHILD.
|
||||
*/
|
||||
ret = waitpid(grandchild_pid, NULL, WNOHANG);
|
||||
EXPECT_EQ(ret, -1);
|
||||
EXPECT_EQ(errno, ECHILD);
|
||||
|
||||
close(pidfd);
|
||||
|
||||
/* Clean up subreaper status. */
|
||||
prctl(PR_SET_CHILD_SUBREAPER, 0);
|
||||
}
|
||||
|
||||
static int thread_sock_fd;
|
||||
|
||||
static void *thread_func(void *arg)
|
||||
{
|
||||
/* Signal parent we're running. */
|
||||
write_nointr(thread_sock_fd, "1", 1);
|
||||
|
||||
/* Give main thread time to call _exit() first. */
|
||||
usleep(200000);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Test that an autoreap child with multiple threads is properly
|
||||
* autoreaped only after all threads have exited.
|
||||
*/
|
||||
TEST(autoreap_multithreaded)
|
||||
{
|
||||
struct pidfd_info info = { .mask = PIDFD_INFO_EXIT };
|
||||
int ipc_sockets[2], ret;
|
||||
int pidfd = -1;
|
||||
struct pollfd pfd;
|
||||
pid_t pid;
|
||||
char c;
|
||||
|
||||
ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
|
||||
ASSERT_EQ(ret, 0);
|
||||
|
||||
pid = create_autoreap_child(&pidfd);
|
||||
if (pid < 0 && errno == EINVAL) {
|
||||
close(ipc_sockets[0]);
|
||||
close(ipc_sockets[1]);
|
||||
SKIP(return, "CLONE_AUTOREAP not supported");
|
||||
}
|
||||
ASSERT_GE(pid, 0);
|
||||
|
||||
if (pid == 0) {
|
||||
pthread_t thread;
|
||||
|
||||
close(ipc_sockets[0]);
|
||||
|
||||
/*
|
||||
* Create a sub-thread that outlives the main thread.
|
||||
* The thread signals readiness, then sleeps.
|
||||
* The main thread waits briefly, then calls _exit().
|
||||
*/
|
||||
thread_sock_fd = ipc_sockets[1];
|
||||
pthread_create(&thread, NULL, thread_func, NULL);
|
||||
pthread_detach(thread);
|
||||
|
||||
/* Wait for thread to be running. */
|
||||
usleep(100000);
|
||||
|
||||
/* Main thread exits; sub-thread is still alive. */
|
||||
_exit(99);
|
||||
}
|
||||
|
||||
close(ipc_sockets[1]);
|
||||
|
||||
/* Wait for the sub-thread to signal readiness. */
|
||||
ret = read_nointr(ipc_sockets[0], &c, 1);
|
||||
close(ipc_sockets[0]);
|
||||
ASSERT_EQ(ret, 1);
|
||||
|
||||
/* Wait for the process to fully exit via pidfd poll. */
|
||||
pfd.fd = pidfd;
|
||||
pfd.events = POLLIN;
|
||||
ret = poll(&pfd, 1, 5000);
|
||||
ASSERT_EQ(ret, 1);
|
||||
ASSERT_TRUE(pfd.revents & POLLIN);
|
||||
|
||||
/* Verify exit info. */
|
||||
ret = ioctl(pidfd, PIDFD_GET_INFO, &info);
|
||||
ASSERT_EQ(ret, 0);
|
||||
ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT);
|
||||
ASSERT_TRUE(WIFEXITED(info.exit_code));
|
||||
ASSERT_EQ(WEXITSTATUS(info.exit_code), 99);
|
||||
|
||||
/* No zombie. */
|
||||
ret = waitpid(pid, NULL, WNOHANG);
|
||||
ASSERT_EQ(ret, -1);
|
||||
ASSERT_EQ(errno, ECHILD);
|
||||
|
||||
close(pidfd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test that autoreap is NOT inherited by grandchildren.
|
||||
*/
|
||||
TEST(autoreap_no_inherit)
|
||||
{
|
||||
int ipc_sockets[2], ret;
|
||||
int pidfd = -1;
|
||||
pid_t pid;
|
||||
char buf[2] = {};
|
||||
struct pollfd pfd;
|
||||
|
||||
ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
|
||||
ASSERT_EQ(ret, 0);
|
||||
|
||||
pid = create_autoreap_child(&pidfd);
|
||||
if (pid < 0 && errno == EINVAL) {
|
||||
close(ipc_sockets[0]);
|
||||
close(ipc_sockets[1]);
|
||||
SKIP(return, "CLONE_AUTOREAP not supported");
|
||||
}
|
||||
ASSERT_GE(pid, 0);
|
||||
|
||||
if (pid == 0) {
|
||||
pid_t gc;
|
||||
int status;
|
||||
|
||||
close(ipc_sockets[0]);
|
||||
|
||||
/* Autoreap child forks a grandchild (without autoreap). */
|
||||
gc = fork();
|
||||
if (gc < 0) {
|
||||
write_nointr(ipc_sockets[1], "E", 1);
|
||||
_exit(1);
|
||||
}
|
||||
if (gc == 0) {
|
||||
/* Grandchild: exit immediately. */
|
||||
close(ipc_sockets[1]);
|
||||
_exit(77);
|
||||
}
|
||||
|
||||
/*
|
||||
* The grandchild should become a regular zombie
|
||||
* since it was NOT created with CLONE_AUTOREAP.
|
||||
* Wait for it to verify.
|
||||
*/
|
||||
ret = waitpid(gc, &status, 0);
|
||||
if (ret == gc && WIFEXITED(status) &&
|
||||
WEXITSTATUS(status) == 77) {
|
||||
write_nointr(ipc_sockets[1], "P", 1);
|
||||
} else {
|
||||
write_nointr(ipc_sockets[1], "F", 1);
|
||||
}
|
||||
close(ipc_sockets[1]);
|
||||
_exit(0);
|
||||
}
|
||||
|
||||
close(ipc_sockets[1]);
|
||||
|
||||
ret = read_nointr(ipc_sockets[0], buf, 1);
|
||||
close(ipc_sockets[0]);
|
||||
ASSERT_EQ(ret, 1);
|
||||
|
||||
/*
|
||||
* 'P' means the autoreap child was able to waitpid() its
|
||||
* grandchild (correct - grandchild should be a normal zombie,
|
||||
* not autoreaped).
|
||||
*/
|
||||
ASSERT_EQ(buf[0], 'P');
|
||||
|
||||
/* Wait for the autoreap child to exit. */
|
||||
pfd.fd = pidfd;
|
||||
pfd.events = POLLIN;
|
||||
ret = poll(&pfd, 1, 5000);
|
||||
ASSERT_EQ(ret, 1);
|
||||
|
||||
/* Autoreap child itself should be autoreaped. */
|
||||
ret = waitpid(pid, NULL, WNOHANG);
|
||||
ASSERT_EQ(ret, -1);
|
||||
ASSERT_EQ(errno, ECHILD);
|
||||
|
||||
close(pidfd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test that CLONE_NNP sets no_new_privs on the child.
|
||||
* The child checks via prctl(PR_GET_NO_NEW_PRIVS) and reports back.
|
||||
* The parent must NOT have no_new_privs set afterwards.
|
||||
*/
|
||||
TEST(nnp_sets_no_new_privs)
|
||||
{
|
||||
struct __clone_args args = {
|
||||
.flags = CLONE_PIDFD | CLONE_AUTOREAP | CLONE_NNP,
|
||||
.exit_signal = 0,
|
||||
};
|
||||
struct pidfd_info info = { .mask = PIDFD_INFO_EXIT };
|
||||
int pidfd = -1, ret;
|
||||
struct pollfd pfd;
|
||||
pid_t pid;
|
||||
|
||||
/* Ensure parent does not already have no_new_privs. */
|
||||
ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
|
||||
ASSERT_EQ(ret, 0) {
|
||||
TH_LOG("Parent already has no_new_privs set, cannot run test");
|
||||
}
|
||||
|
||||
args.pidfd = ptr_to_u64(&pidfd);
|
||||
|
||||
pid = sys_clone3(&args, sizeof(args));
|
||||
if (pid < 0 && errno == EINVAL)
|
||||
SKIP(return, "CLONE_NNP not supported");
|
||||
ASSERT_GE(pid, 0);
|
||||
|
||||
if (pid == 0) {
|
||||
/*
|
||||
* Child: check no_new_privs. Exit 0 if set, 1 if not.
|
||||
*/
|
||||
ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
|
||||
_exit(ret == 1 ? 0 : 1);
|
||||
}
|
||||
|
||||
ASSERT_GE(pidfd, 0);
|
||||
|
||||
/* Parent must still NOT have no_new_privs. */
|
||||
ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
|
||||
ASSERT_EQ(ret, 0) {
|
||||
TH_LOG("Parent got no_new_privs after creating CLONE_NNP child");
|
||||
}
|
||||
|
||||
/* Wait for child to exit. */
|
||||
pfd.fd = pidfd;
|
||||
pfd.events = POLLIN;
|
||||
ret = poll(&pfd, 1, 5000);
|
||||
ASSERT_EQ(ret, 1);
|
||||
|
||||
/* Verify child exited with 0 (no_new_privs was set). */
|
||||
ret = ioctl(pidfd, PIDFD_GET_INFO, &info);
|
||||
ASSERT_EQ(ret, 0);
|
||||
ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT);
|
||||
ASSERT_TRUE(WIFEXITED(info.exit_code));
|
||||
ASSERT_EQ(WEXITSTATUS(info.exit_code), 0) {
|
||||
TH_LOG("Child did not have no_new_privs set");
|
||||
}
|
||||
|
||||
close(pidfd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test that CLONE_NNP with CLONE_THREAD fails with EINVAL.
|
||||
*/
|
||||
TEST(nnp_rejects_thread)
|
||||
{
|
||||
struct __clone_args args = {
|
||||
.flags = CLONE_NNP | CLONE_THREAD |
|
||||
CLONE_SIGHAND | CLONE_VM,
|
||||
.exit_signal = 0,
|
||||
};
|
||||
pid_t pid;
|
||||
|
||||
pid = sys_clone3(&args, sizeof(args));
|
||||
ASSERT_EQ(pid, -1);
|
||||
ASSERT_EQ(errno, EINVAL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test that a plain CLONE_AUTOREAP child does NOT get no_new_privs.
|
||||
* Only CLONE_NNP should set it.
|
||||
*/
|
||||
TEST(autoreap_no_new_privs_unset)
|
||||
{
|
||||
struct pidfd_info info = { .mask = PIDFD_INFO_EXIT };
|
||||
int pidfd = -1, ret;
|
||||
struct pollfd pfd;
|
||||
pid_t pid;
|
||||
|
||||
pid = create_autoreap_child(&pidfd);
|
||||
if (pid < 0 && errno == EINVAL)
|
||||
SKIP(return, "CLONE_AUTOREAP not supported");
|
||||
ASSERT_GE(pid, 0);
|
||||
|
||||
if (pid == 0) {
|
||||
/*
|
||||
* Child: check no_new_privs. Exit 0 if NOT set, 1 if set.
|
||||
*/
|
||||
ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
|
||||
_exit(ret == 0 ? 0 : 1);
|
||||
}
|
||||
|
||||
ASSERT_GE(pidfd, 0);
|
||||
|
||||
pfd.fd = pidfd;
|
||||
pfd.events = POLLIN;
|
||||
ret = poll(&pfd, 1, 5000);
|
||||
ASSERT_EQ(ret, 1);
|
||||
|
||||
ret = ioctl(pidfd, PIDFD_GET_INFO, &info);
|
||||
ASSERT_EQ(ret, 0);
|
||||
ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT);
|
||||
ASSERT_TRUE(WIFEXITED(info.exit_code));
|
||||
ASSERT_EQ(WEXITSTATUS(info.exit_code), 0) {
|
||||
TH_LOG("Plain autoreap child unexpectedly has no_new_privs");
|
||||
}
|
||||
|
||||
close(pidfd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper: create a child with CLONE_PIDFD | CLONE_PIDFD_AUTOKILL | CLONE_AUTOREAP | CLONE_NNP.
|
||||
*/
|
||||
static pid_t create_autokill_child(int *pidfd)
|
||||
{
|
||||
struct __clone_args args = {
|
||||
.flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL |
|
||||
CLONE_AUTOREAP | CLONE_NNP,
|
||||
.exit_signal = 0,
|
||||
.pidfd = ptr_to_u64(pidfd),
|
||||
};
|
||||
|
||||
return sys_clone3(&args, sizeof(args));
|
||||
}
|
||||
|
||||
/*
|
||||
* Basic autokill test: child blocks in pause(), parent closes the
|
||||
* clone3 pidfd, child should be killed and autoreaped.
|
||||
*/
|
||||
TEST(autokill_basic)
|
||||
{
|
||||
int pidfd = -1, pollfd_fd = -1, ret;
|
||||
struct pollfd pfd;
|
||||
pid_t pid;
|
||||
|
||||
pid = create_autokill_child(&pidfd);
|
||||
if (pid < 0 && errno == EINVAL)
|
||||
SKIP(return, "CLONE_PIDFD_AUTOKILL not supported");
|
||||
ASSERT_GE(pid, 0);
|
||||
|
||||
if (pid == 0) {
|
||||
pause();
|
||||
_exit(1);
|
||||
}
|
||||
|
||||
ASSERT_GE(pidfd, 0);
|
||||
|
||||
/*
|
||||
* Open a second pidfd via pidfd_open() so we can observe the
|
||||
* child's death after closing the clone3 pidfd.
|
||||
*/
|
||||
pollfd_fd = sys_pidfd_open(pid, 0);
|
||||
ASSERT_GE(pollfd_fd, 0);
|
||||
|
||||
/* Close the clone3 pidfd — this should trigger autokill. */
|
||||
close(pidfd);
|
||||
|
||||
/* Wait for the child to die via the pidfd_open'd fd. */
|
||||
pfd.fd = pollfd_fd;
|
||||
pfd.events = POLLIN;
|
||||
ret = poll(&pfd, 1, 5000);
|
||||
ASSERT_EQ(ret, 1);
|
||||
ASSERT_TRUE(pfd.revents & POLLIN);
|
||||
|
||||
/* Child should be autoreaped — no zombie. */
|
||||
usleep(100000);
|
||||
ret = waitpid(pid, NULL, WNOHANG);
|
||||
ASSERT_EQ(ret, -1);
|
||||
ASSERT_EQ(errno, ECHILD);
|
||||
|
||||
close(pollfd_fd);
|
||||
}
|
||||
|
||||
/*
|
||||
* CLONE_PIDFD_AUTOKILL without CLONE_PIDFD must fail with EINVAL.
|
||||
*/
|
||||
TEST(autokill_requires_pidfd)
|
||||
{
|
||||
struct __clone_args args = {
|
||||
.flags = CLONE_PIDFD_AUTOKILL | CLONE_AUTOREAP,
|
||||
.exit_signal = 0,
|
||||
};
|
||||
pid_t pid;
|
||||
|
||||
pid = sys_clone3(&args, sizeof(args));
|
||||
ASSERT_EQ(pid, -1);
|
||||
ASSERT_EQ(errno, EINVAL);
|
||||
}
|
||||
|
||||
/*
|
||||
* CLONE_PIDFD_AUTOKILL without CLONE_AUTOREAP must fail with EINVAL.
|
||||
*/
|
||||
TEST(autokill_requires_autoreap)
|
||||
{
|
||||
int pidfd = -1;
|
||||
struct __clone_args args = {
|
||||
.flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL,
|
||||
.exit_signal = 0,
|
||||
.pidfd = ptr_to_u64(&pidfd),
|
||||
};
|
||||
pid_t pid;
|
||||
|
||||
pid = sys_clone3(&args, sizeof(args));
|
||||
ASSERT_EQ(pid, -1);
|
||||
ASSERT_EQ(errno, EINVAL);
|
||||
}
|
||||
|
||||
/*
|
||||
* CLONE_PIDFD_AUTOKILL with CLONE_THREAD must fail with EINVAL.
|
||||
*/
|
||||
TEST(autokill_rejects_thread)
|
||||
{
|
||||
int pidfd = -1;
|
||||
struct __clone_args args = {
|
||||
.flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL |
|
||||
CLONE_AUTOREAP | CLONE_THREAD |
|
||||
CLONE_SIGHAND | CLONE_VM,
|
||||
.exit_signal = 0,
|
||||
.pidfd = ptr_to_u64(&pidfd),
|
||||
};
|
||||
pid_t pid;
|
||||
|
||||
pid = sys_clone3(&args, sizeof(args));
|
||||
ASSERT_EQ(pid, -1);
|
||||
ASSERT_EQ(errno, EINVAL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test that only the clone3 pidfd triggers autokill, not pidfd_open().
|
||||
* Close the pidfd_open'd fd first — child should survive.
|
||||
* Then close the clone3 pidfd — child should be killed and autoreaped.
|
||||
*/
|
||||
TEST(autokill_pidfd_open_no_effect)
|
||||
{
|
||||
int pidfd = -1, open_fd = -1, ret;
|
||||
struct pollfd pfd;
|
||||
pid_t pid;
|
||||
|
||||
pid = create_autokill_child(&pidfd);
|
||||
if (pid < 0 && errno == EINVAL)
|
||||
SKIP(return, "CLONE_PIDFD_AUTOKILL not supported");
|
||||
ASSERT_GE(pid, 0);
|
||||
|
||||
if (pid == 0) {
|
||||
pause();
|
||||
_exit(1);
|
||||
}
|
||||
|
||||
ASSERT_GE(pidfd, 0);
|
||||
|
||||
/* Open a second pidfd via pidfd_open(). */
|
||||
open_fd = sys_pidfd_open(pid, 0);
|
||||
ASSERT_GE(open_fd, 0);
|
||||
|
||||
/*
|
||||
* Close the pidfd_open'd fd — child should survive because
|
||||
* only the clone3 pidfd has autokill.
|
||||
*/
|
||||
close(open_fd);
|
||||
usleep(200000);
|
||||
|
||||
/* Verify child is still alive by polling the clone3 pidfd. */
|
||||
pfd.fd = pidfd;
|
||||
pfd.events = POLLIN;
|
||||
ret = poll(&pfd, 1, 0);
|
||||
ASSERT_EQ(ret, 0) {
|
||||
TH_LOG("Child died after closing pidfd_open fd — should still be alive");
|
||||
}
|
||||
|
||||
/* Open another observation fd before triggering autokill. */
|
||||
open_fd = sys_pidfd_open(pid, 0);
|
||||
ASSERT_GE(open_fd, 0);
|
||||
|
||||
/* Now close the clone3 pidfd — this triggers autokill. */
|
||||
close(pidfd);
|
||||
|
||||
pfd.fd = open_fd;
|
||||
pfd.events = POLLIN;
|
||||
ret = poll(&pfd, 1, 5000);
|
||||
ASSERT_EQ(ret, 1);
|
||||
ASSERT_TRUE(pfd.revents & POLLIN);
|
||||
|
||||
/* Child should be autoreaped — no zombie. */
|
||||
usleep(100000);
|
||||
ret = waitpid(pid, NULL, WNOHANG);
|
||||
ASSERT_EQ(ret, -1);
|
||||
ASSERT_EQ(errno, ECHILD);
|
||||
|
||||
close(open_fd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test that CLONE_PIDFD_AUTOKILL without CLONE_NNP fails with EPERM
|
||||
* for an unprivileged caller.
|
||||
*/
|
||||
TEST(autokill_requires_cap_sys_admin)
|
||||
{
|
||||
int pidfd = -1, ret;
|
||||
struct __clone_args args = {
|
||||
.flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL |
|
||||
CLONE_AUTOREAP,
|
||||
.exit_signal = 0,
|
||||
.pidfd = ptr_to_u64(&pidfd),
|
||||
};
|
||||
pid_t pid;
|
||||
|
||||
/* Drop all capabilities so we lack CAP_SYS_ADMIN. */
|
||||
ret = drop_all_caps();
|
||||
ASSERT_EQ(ret, 0);
|
||||
|
||||
pid = sys_clone3(&args, sizeof(args));
|
||||
ASSERT_EQ(pid, -1);
|
||||
ASSERT_EQ(errno, EPERM);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test that CLONE_PIDFD_AUTOKILL without CLONE_NNP succeeds with
|
||||
* CAP_SYS_ADMIN.
|
||||
*/
|
||||
TEST(autokill_without_nnp_with_cap)
|
||||
{
|
||||
struct __clone_args args = {
|
||||
.flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL |
|
||||
CLONE_AUTOREAP,
|
||||
.exit_signal = 0,
|
||||
};
|
||||
struct pidfd_info info = { .mask = PIDFD_INFO_EXIT };
|
||||
int pidfd = -1, ret;
|
||||
struct pollfd pfd;
|
||||
pid_t pid;
|
||||
|
||||
if (geteuid() != 0)
|
||||
SKIP(return, "Need root/CAP_SYS_ADMIN");
|
||||
|
||||
args.pidfd = ptr_to_u64(&pidfd);
|
||||
|
||||
pid = sys_clone3(&args, sizeof(args));
|
||||
if (pid < 0 && errno == EINVAL)
|
||||
SKIP(return, "CLONE_PIDFD_AUTOKILL not supported");
|
||||
ASSERT_GE(pid, 0);
|
||||
|
||||
if (pid == 0)
|
||||
_exit(0);
|
||||
|
||||
ASSERT_GE(pidfd, 0);
|
||||
|
||||
/* Wait for child to exit. */
|
||||
pfd.fd = pidfd;
|
||||
pfd.events = POLLIN;
|
||||
ret = poll(&pfd, 1, 5000);
|
||||
ASSERT_EQ(ret, 1);
|
||||
|
||||
ret = ioctl(pidfd, PIDFD_GET_INFO, &info);
|
||||
ASSERT_EQ(ret, 0);
|
||||
ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT);
|
||||
ASSERT_TRUE(WIFEXITED(info.exit_code));
|
||||
ASSERT_EQ(WEXITSTATUS(info.exit_code), 0);
|
||||
|
||||
close(pidfd);
|
||||
}
|
||||
|
||||
TEST_HARNESS_MAIN
|
||||
@@ -724,6 +724,7 @@ TEST(supported_mask_field)
|
||||
ASSERT_TRUE(!!(info.supported_mask & PIDFD_INFO_COREDUMP));
|
||||
ASSERT_TRUE(!!(info.supported_mask & PIDFD_INFO_SUPPORTED_MASK));
|
||||
ASSERT_TRUE(!!(info.supported_mask & PIDFD_INFO_COREDUMP_SIGNAL));
|
||||
ASSERT_TRUE(!!(info.supported_mask & PIDFD_INFO_COREDUMP_CODE));
|
||||
|
||||
/* Clean up */
|
||||
sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
|
||||
|
||||
Reference in New Issue
Block a user