mirror of
https://github.com/torvalds/linux.git
synced 2026-04-18 06:44:00 -04:00
Merge tag 'landlock-7.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/mic/linux
Pull Landlock update from Mickaël Salaün: "This adds a new Landlock access right for pathname UNIX domain sockets thanks to a new LSM hook, and a few fixes" * tag 'landlock-7.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/mic/linux: (23 commits) landlock: Document fallocate(2) as another truncation corner case landlock: Document FS access right for pathname UNIX sockets selftests/landlock: Simplify ruleset creation and enforcement in fs_test selftests/landlock: Check that coredump sockets stay unrestricted selftests/landlock: Audit test for LANDLOCK_ACCESS_FS_RESOLVE_UNIX selftests/landlock: Test LANDLOCK_ACCESS_FS_RESOLVE_UNIX selftests/landlock: Replace access_fs_16 with ACCESS_ALL in fs_test samples/landlock: Add support for named UNIX domain socket restrictions landlock: Clarify BUILD_BUG_ON check in scoping logic landlock: Control pathname UNIX domain socket resolution by path landlock: Use mem_is_zero() in is_layer_masks_allowed() lsm: Add LSM hook security_unix_find landlock: Fix kernel-doc warning for pointer-to-array parameters landlock: Fix formatting in tsync.c landlock: Improve kernel-doc "Return:" section consistency landlock: Add missing kernel-doc "Return:" sections selftests/landlock: Fix format warning for __u64 in net_test selftests/landlock: Skip stale records in audit_match_record() selftests/landlock: Drain stale audit records on init selftests/landlock: Fix socket file descriptor leaks in audit helpers ...
This commit is contained in:
@@ -7,7 +7,7 @@ Landlock LSM: kernel documentation
|
||||
==================================
|
||||
|
||||
:Author: Mickaël Salaün
|
||||
:Date: September 2025
|
||||
:Date: March 2026
|
||||
|
||||
Landlock's goal is to create scoped access-control (i.e. sandboxing). To
|
||||
harden a whole system, this feature should be available to any process,
|
||||
@@ -89,6 +89,46 @@ this is required to keep access controls consistent over the whole system, and
|
||||
this avoids unattended bypasses through file descriptor passing (i.e. confused
|
||||
deputy attack).
|
||||
|
||||
.. _scoped-flags-interaction:
|
||||
|
||||
Interaction between scoped flags and other access rights
|
||||
--------------------------------------------------------
|
||||
|
||||
The ``scoped`` flags in &struct landlock_ruleset_attr restrict the
|
||||
use of *outgoing* IPC from the created Landlock domain, while they
|
||||
permit reaching out to IPC endpoints *within* the created Landlock
|
||||
domain.
|
||||
|
||||
In the future, scoped flags *may* interact with other access rights,
|
||||
e.g. so that abstract UNIX sockets can be allow-listed by name, or so
|
||||
that signals can be allow-listed by signal number or target process.
|
||||
|
||||
When introducing ``LANDLOCK_ACCESS_FS_RESOLVE_UNIX``, we defined it to
|
||||
implicitly have the same scoping semantics as a
|
||||
``LANDLOCK_SCOPE_PATHNAME_UNIX_SOCKET`` flag would have: connecting to
|
||||
UNIX sockets within the same domain (where
|
||||
``LANDLOCK_ACCESS_FS_RESOLVE_UNIX`` is used) is unconditionally
|
||||
allowed.
|
||||
|
||||
The reasoning is:
|
||||
|
||||
* Like other IPC mechanisms, connecting to named UNIX sockets in the
|
||||
same domain should be expected and harmless. (If needed, users can
|
||||
further refine their Landlock policies with nested domains or by
|
||||
restricting ``LANDLOCK_ACCESS_FS_MAKE_SOCK``.)
|
||||
* We reserve the option to still introduce
|
||||
``LANDLOCK_SCOPE_PATHNAME_UNIX_SOCKET`` in the future. (This would
|
||||
be useful if we wanted to have a Landlock rule to permit IPC access
|
||||
to other Landlock domains.)
|
||||
* But we can postpone the point in time when users have to deal with
|
||||
two interacting flags visible in the userspace API. (In particular,
|
||||
it is possible that it won't be needed in practice, in which case we
|
||||
can avoid the second flag altogether.)
|
||||
* If we *do* introduce ``LANDLOCK_SCOPE_PATHNAME_UNIX_SOCKET`` in the
|
||||
future, setting this scoped flag in a ruleset does *not reduce* the
|
||||
restrictions, because access within the same scope is already
|
||||
allowed based on ``LANDLOCK_ACCESS_FS_RESOLVE_UNIX``.
|
||||
|
||||
Tests
|
||||
=====
|
||||
|
||||
|
||||
@@ -77,7 +77,8 @@ to be explicit about the denied-by-default access rights.
|
||||
LANDLOCK_ACCESS_FS_MAKE_SYM |
|
||||
LANDLOCK_ACCESS_FS_REFER |
|
||||
LANDLOCK_ACCESS_FS_TRUNCATE |
|
||||
LANDLOCK_ACCESS_FS_IOCTL_DEV,
|
||||
LANDLOCK_ACCESS_FS_IOCTL_DEV |
|
||||
LANDLOCK_ACCESS_FS_RESOLVE_UNIX,
|
||||
.handled_access_net =
|
||||
LANDLOCK_ACCESS_NET_BIND_TCP |
|
||||
LANDLOCK_ACCESS_NET_CONNECT_TCP,
|
||||
@@ -127,6 +128,10 @@ version, and only use the available subset of access rights:
|
||||
/* Removes LANDLOCK_SCOPE_* for ABI < 6 */
|
||||
ruleset_attr.scoped &= ~(LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET |
|
||||
LANDLOCK_SCOPE_SIGNAL);
|
||||
__attribute__((fallthrough));
|
||||
case 6 ... 8:
|
||||
/* Removes LANDLOCK_ACCESS_FS_RESOLVE_UNIX for ABI < 9 */
|
||||
ruleset_attr.handled_access_fs &= ~LANDLOCK_ACCESS_FS_RESOLVE_UNIX;
|
||||
}
|
||||
|
||||
This enables the creation of an inclusive ruleset that will contain our rules.
|
||||
@@ -378,8 +383,8 @@ Truncating files
|
||||
|
||||
The operations covered by ``LANDLOCK_ACCESS_FS_WRITE_FILE`` and
|
||||
``LANDLOCK_ACCESS_FS_TRUNCATE`` both change the contents of a file and sometimes
|
||||
overlap in non-intuitive ways. It is recommended to always specify both of
|
||||
these together.
|
||||
overlap in non-intuitive ways. It is strongly recommended to always specify
|
||||
both of these together (either granting both, or granting none).
|
||||
|
||||
A particularly surprising example is :manpage:`creat(2)`. The name suggests
|
||||
that this system call requires the rights to create and write files. However,
|
||||
@@ -391,6 +396,10 @@ It should also be noted that truncating files does not require the
|
||||
system call, this can also be done through :manpage:`open(2)` with the flags
|
||||
``O_RDONLY | O_TRUNC``.
|
||||
|
||||
At the same time, on some filesystems, :manpage:`fallocate(2)` offers a way to
|
||||
shorten file contents with ``FALLOC_FL_COLLAPSE_RANGE`` when the file is opened
|
||||
for writing, sidestepping the ``LANDLOCK_ACCESS_FS_TRUNCATE`` right.
|
||||
|
||||
The truncate right is associated with the opened file (see below).
|
||||
|
||||
Rights associated with file descriptors
|
||||
@@ -700,6 +709,13 @@ enforce Landlock rulesets across all threads of the calling process
|
||||
using the ``LANDLOCK_RESTRICT_SELF_TSYNC`` flag passed to
|
||||
sys_landlock_restrict_self().
|
||||
|
||||
Pathname UNIX sockets (ABI < 9)
|
||||
-------------------------------
|
||||
|
||||
Starting with the Landlock ABI version 9, it is possible to restrict
|
||||
connections to pathname UNIX domain sockets (:manpage:`unix(7)`) using
|
||||
the new ``LANDLOCK_ACCESS_FS_RESOLVE_UNIX`` right.
|
||||
|
||||
.. _kernel_support:
|
||||
|
||||
Kernel support
|
||||
|
||||
@@ -322,6 +322,11 @@ LSM_HOOK(int, 0, post_notification, const struct cred *w_cred,
|
||||
LSM_HOOK(int, 0, watch_key, struct key *key)
|
||||
#endif /* CONFIG_SECURITY && CONFIG_KEY_NOTIFICATIONS */
|
||||
|
||||
#if defined(CONFIG_SECURITY_NETWORK) && defined(CONFIG_SECURITY_PATH)
|
||||
LSM_HOOK(int, 0, unix_find, const struct path *path, struct sock *other,
|
||||
int flags)
|
||||
#endif /* CONFIG_SECURITY_NETWORK && CONFIG_SECURITY_PATH */
|
||||
|
||||
#ifdef CONFIG_SECURITY_NETWORK
|
||||
LSM_HOOK(int, 0, unix_stream_connect, struct sock *sock, struct sock *other,
|
||||
struct sock *newsk)
|
||||
|
||||
@@ -1954,6 +1954,17 @@ static inline int security_mptcp_add_subflow(struct sock *sk, struct sock *ssk)
|
||||
}
|
||||
#endif /* CONFIG_SECURITY_NETWORK */
|
||||
|
||||
#if defined(CONFIG_SECURITY_NETWORK) && defined(CONFIG_SECURITY_PATH)
|
||||
|
||||
int security_unix_find(const struct path *path, struct sock *other, int flags);
|
||||
|
||||
#else /* CONFIG_SECURITY_NETWORK && CONFIG_SECURITY_PATH */
|
||||
static inline int security_unix_find(const struct path *path, struct sock *other, int flags)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_SECURITY_NETWORK && CONFIG_SECURITY_PATH */
|
||||
|
||||
#ifdef CONFIG_SECURITY_INFINIBAND
|
||||
int security_ib_pkey_access(void *sec, u64 subnet_prefix, u16 pkey);
|
||||
int security_ib_endport_manage_subnet(void *sec, const char *name, u8 port_num);
|
||||
|
||||
@@ -116,7 +116,9 @@ struct landlock_ruleset_attr {
|
||||
* ``LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF``, this flag only affects
|
||||
* future nested domains, not the one being created. It can also be used
|
||||
* with a @ruleset_fd value of -1 to mute subdomain logs without creating a
|
||||
* domain.
|
||||
* domain. When combined with %LANDLOCK_RESTRICT_SELF_TSYNC and a
|
||||
* @ruleset_fd value of -1, this configuration is propagated to all threads
|
||||
* of the current process.
|
||||
*
|
||||
* The following flag supports policy enforcement in multithreaded processes:
|
||||
*
|
||||
@@ -248,6 +250,26 @@ struct landlock_net_port_attr {
|
||||
*
|
||||
* This access right is available since the fifth version of the Landlock
|
||||
* ABI.
|
||||
* - %LANDLOCK_ACCESS_FS_RESOLVE_UNIX: Look up pathname UNIX domain sockets
|
||||
* (:manpage:`unix(7)`). On UNIX domain sockets, this restricts both calls to
|
||||
* :manpage:`connect(2)` as well as calls to :manpage:`sendmsg(2)` with an
|
||||
* explicit recipient address.
|
||||
*
|
||||
* This access right only applies to connections to UNIX server sockets which
|
||||
* were created outside of the newly created Landlock domain (e.g. from within
|
||||
* a parent domain or from an unrestricted process). Newly created UNIX
|
||||
* servers within the same Landlock domain continue to be accessible. In this
|
||||
* regard, %LANDLOCK_ACCESS_FS_RESOLVE_UNIX has the same semantics as the
|
||||
* ``LANDLOCK_SCOPE_*`` flags.
|
||||
*
|
||||
* If a resolve attempt is denied, the operation returns an ``EACCES`` error,
|
||||
* in line with other filesystem access rights (but different to denials for
|
||||
* abstract UNIX domain sockets).
|
||||
*
|
||||
* This access right is available since the ninth version of the Landlock ABI.
|
||||
*
|
||||
* The rationale for this design is described in
|
||||
* :ref:`Documentation/security/landlock.rst <scoped-flags-interaction>`.
|
||||
*
|
||||
* Whether an opened file can be truncated with :manpage:`ftruncate(2)` or used
|
||||
* with `ioctl(2)` is determined during :manpage:`open(2)`, in the same way as
|
||||
@@ -333,6 +355,7 @@ struct landlock_net_port_attr {
|
||||
#define LANDLOCK_ACCESS_FS_REFER (1ULL << 13)
|
||||
#define LANDLOCK_ACCESS_FS_TRUNCATE (1ULL << 14)
|
||||
#define LANDLOCK_ACCESS_FS_IOCTL_DEV (1ULL << 15)
|
||||
#define LANDLOCK_ACCESS_FS_RESOLVE_UNIX (1ULL << 16)
|
||||
/* clang-format on */
|
||||
|
||||
/**
|
||||
|
||||
@@ -1231,11 +1231,15 @@ static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
|
||||
goto path_put;
|
||||
|
||||
err = -EPROTOTYPE;
|
||||
if (sk->sk_type == type)
|
||||
touch_atime(&path);
|
||||
else
|
||||
if (sk->sk_type != type)
|
||||
goto sock_put;
|
||||
|
||||
err = security_unix_find(&path, sk, flags);
|
||||
if (err)
|
||||
goto sock_put;
|
||||
|
||||
touch_atime(&path);
|
||||
|
||||
path_put(&path);
|
||||
|
||||
return sk;
|
||||
|
||||
@@ -111,7 +111,8 @@ static int parse_path(char *env_path, const char ***const path_list)
|
||||
LANDLOCK_ACCESS_FS_WRITE_FILE | \
|
||||
LANDLOCK_ACCESS_FS_READ_FILE | \
|
||||
LANDLOCK_ACCESS_FS_TRUNCATE | \
|
||||
LANDLOCK_ACCESS_FS_IOCTL_DEV)
|
||||
LANDLOCK_ACCESS_FS_IOCTL_DEV | \
|
||||
LANDLOCK_ACCESS_FS_RESOLVE_UNIX)
|
||||
|
||||
/* clang-format on */
|
||||
|
||||
@@ -295,11 +296,12 @@ out_unset:
|
||||
LANDLOCK_ACCESS_FS_MAKE_SYM | \
|
||||
LANDLOCK_ACCESS_FS_REFER | \
|
||||
LANDLOCK_ACCESS_FS_TRUNCATE | \
|
||||
LANDLOCK_ACCESS_FS_IOCTL_DEV)
|
||||
LANDLOCK_ACCESS_FS_IOCTL_DEV | \
|
||||
LANDLOCK_ACCESS_FS_RESOLVE_UNIX)
|
||||
|
||||
/* clang-format on */
|
||||
|
||||
#define LANDLOCK_ABI_LAST 8
|
||||
#define LANDLOCK_ABI_LAST 9
|
||||
|
||||
#define XSTR(s) #s
|
||||
#define STR(s) XSTR(s)
|
||||
@@ -438,6 +440,10 @@ int main(const int argc, char *const argv[], char *const *const envp)
|
||||
~LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON;
|
||||
__attribute__((fallthrough));
|
||||
case 7:
|
||||
case 8:
|
||||
/* Removes LANDLOCK_ACCESS_FS_RESOLVE_UNIX for ABI < 9 */
|
||||
ruleset_attr.handled_access_fs &=
|
||||
~LANDLOCK_ACCESS_FS_RESOLVE_UNIX;
|
||||
/* Must be printed for any ABI < LANDLOCK_ABI_LAST. */
|
||||
fprintf(stderr,
|
||||
"Hint: You should update the running kernel "
|
||||
|
||||
@@ -34,7 +34,7 @@
|
||||
LANDLOCK_ACCESS_FS_IOCTL_DEV)
|
||||
/* clang-format on */
|
||||
|
||||
typedef u16 access_mask_t;
|
||||
typedef u32 access_mask_t;
|
||||
|
||||
/* Makes sure all filesystem access rights can be stored. */
|
||||
static_assert(BITS_PER_TYPE(access_mask_t) >= LANDLOCK_NUM_ACCESS_FS);
|
||||
@@ -50,7 +50,7 @@ struct access_masks {
|
||||
access_mask_t fs : LANDLOCK_NUM_ACCESS_FS;
|
||||
access_mask_t net : LANDLOCK_NUM_ACCESS_NET;
|
||||
access_mask_t scope : LANDLOCK_NUM_SCOPE;
|
||||
};
|
||||
} __packed __aligned(sizeof(u32));
|
||||
|
||||
union access_masks_all {
|
||||
struct access_masks masks;
|
||||
|
||||
@@ -37,6 +37,7 @@ static const char *const fs_access_strings[] = {
|
||||
[BIT_INDEX(LANDLOCK_ACCESS_FS_REFER)] = "fs.refer",
|
||||
[BIT_INDEX(LANDLOCK_ACCESS_FS_TRUNCATE)] = "fs.truncate",
|
||||
[BIT_INDEX(LANDLOCK_ACCESS_FS_IOCTL_DEV)] = "fs.ioctl_dev",
|
||||
[BIT_INDEX(LANDLOCK_ACCESS_FS_RESOLVE_UNIX)] = "fs.resolve_unix",
|
||||
};
|
||||
|
||||
static_assert(ARRAY_SIZE(fs_access_strings) == LANDLOCK_NUM_ACCESS_FS);
|
||||
|
||||
@@ -22,10 +22,8 @@ static void hook_cred_transfer(struct cred *const new,
|
||||
const struct landlock_cred_security *const old_llcred =
|
||||
landlock_cred(old);
|
||||
|
||||
if (old_llcred->domain) {
|
||||
landlock_get_ruleset(old_llcred->domain);
|
||||
*landlock_cred(new) = *old_llcred;
|
||||
}
|
||||
landlock_get_ruleset(old_llcred->domain);
|
||||
*landlock_cred(new) = *old_llcred;
|
||||
}
|
||||
|
||||
static int hook_cred_prepare(struct cred *const new,
|
||||
|
||||
@@ -115,7 +115,7 @@ static inline bool landlocked(const struct task_struct *const task)
|
||||
* @handle_layer: returned youngest layer handling a subset of @masks. Not set
|
||||
* if the function returns NULL.
|
||||
*
|
||||
* Returns: landlock_cred(@cred) if any access rights specified in @masks is
|
||||
* Return: landlock_cred(@cred) if any access rights specified in @masks is
|
||||
* handled, or NULL otherwise.
|
||||
*/
|
||||
static inline const struct landlock_cred_security *
|
||||
|
||||
@@ -34,7 +34,7 @@
|
||||
* @exe_size: Returned size of @exe_str (including the trailing null
|
||||
* character), if any.
|
||||
*
|
||||
* Returns: A pointer to an allocated buffer where @exe_str point to, %NULL if
|
||||
* Return: A pointer to an allocated buffer where @exe_str point to, %NULL if
|
||||
* there is no executable path, or an error otherwise.
|
||||
*/
|
||||
static const void *get_current_exe(const char **const exe_str,
|
||||
@@ -73,7 +73,7 @@ static const void *get_current_exe(const char **const exe_str,
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns: A newly allocated object describing a domain, or an error
|
||||
* Return: A newly allocated object describing a domain, or an error
|
||||
* otherwise.
|
||||
*/
|
||||
static struct landlock_details *get_current_details(void)
|
||||
@@ -114,6 +114,8 @@ static struct landlock_details *get_current_details(void)
|
||||
* restriction. The subjective credentials must not be in an overridden state.
|
||||
*
|
||||
* @hierarchy->parent and @hierarchy->usage should already be set.
|
||||
*
|
||||
* Return: 0 on success, -errno on failure.
|
||||
*/
|
||||
int landlock_init_hierarchy_log(struct landlock_hierarchy *const hierarchy)
|
||||
{
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#include <linux/lsm_hooks.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/net.h>
|
||||
#include <linux/path.h>
|
||||
#include <linux/pid.h>
|
||||
#include <linux/rcupdate.h>
|
||||
@@ -36,6 +37,7 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/wait_bit.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <net/af_unix.h>
|
||||
#include <uapi/linux/fiemap.h>
|
||||
#include <uapi/linux/landlock.h>
|
||||
|
||||
@@ -119,8 +121,8 @@ static const struct landlock_object_underops landlock_fs_underops = {
|
||||
* Any new IOCTL commands that are implemented in fs/ioctl.c's do_vfs_ioctl()
|
||||
* should be considered for inclusion here.
|
||||
*
|
||||
* Returns: true if the IOCTL @cmd can not be restricted with Landlock for
|
||||
* device files.
|
||||
* Return: True if the IOCTL @cmd can not be restricted with Landlock for
|
||||
* device files, false otherwise.
|
||||
*/
|
||||
static __attribute_const__ bool is_masked_device_ioctl(const unsigned int cmd)
|
||||
{
|
||||
@@ -314,7 +316,8 @@ retry:
|
||||
LANDLOCK_ACCESS_FS_WRITE_FILE | \
|
||||
LANDLOCK_ACCESS_FS_READ_FILE | \
|
||||
LANDLOCK_ACCESS_FS_TRUNCATE | \
|
||||
LANDLOCK_ACCESS_FS_IOCTL_DEV)
|
||||
LANDLOCK_ACCESS_FS_IOCTL_DEV | \
|
||||
LANDLOCK_ACCESS_FS_RESOLVE_UNIX)
|
||||
/* clang-format on */
|
||||
|
||||
/*
|
||||
@@ -428,10 +431,10 @@ static bool may_refer(const struct layer_access_masks *const src_parent,
|
||||
* Check that a destination file hierarchy has more restrictions than a source
|
||||
* file hierarchy. This is only used for link and rename actions.
|
||||
*
|
||||
* Returns: true if child1 may be moved from parent1 to parent2 without
|
||||
* increasing its access rights. If child2 is set, an additional condition is
|
||||
* Return: True if child1 may be moved from parent1 to parent2 without
|
||||
* increasing its access rights (if child2 is set, an additional condition is
|
||||
* that child2 may be used from parent2 to parent1 without increasing its access
|
||||
* rights.
|
||||
* rights), false otherwise.
|
||||
*/
|
||||
static bool no_more_access(const struct layer_access_masks *const parent1,
|
||||
const struct layer_access_masks *const child1,
|
||||
@@ -564,7 +567,7 @@ static void test_no_more_access(struct kunit *const test)
|
||||
|
||||
static bool is_layer_masks_allowed(const struct layer_access_masks *masks)
|
||||
{
|
||||
return !memchr_inv(&masks->access, 0, sizeof(masks->access));
|
||||
return mem_is_zero(&masks->access, sizeof(masks->access));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -734,9 +737,7 @@ static void test_is_eacces_with_write(struct kunit *const test)
|
||||
* checks that the collected accesses and the remaining ones are enough to
|
||||
* allow the request.
|
||||
*
|
||||
* Returns:
|
||||
* - true if the access request is granted;
|
||||
* - false otherwise.
|
||||
* Return: True if the access request is granted, false otherwise.
|
||||
*/
|
||||
static bool
|
||||
is_access_to_paths_allowed(const struct landlock_ruleset *const domain,
|
||||
@@ -1022,9 +1023,8 @@ static access_mask_t maybe_remove(const struct dentry *const dentry)
|
||||
* only handles walking on the same mount point and only checks one set of
|
||||
* accesses.
|
||||
*
|
||||
* Returns:
|
||||
* - true if all the domain access rights are allowed for @dir;
|
||||
* - false if the walk reached @mnt_root.
|
||||
* Return: True if all the domain access rights are allowed for @dir, false if
|
||||
* the walk reached @mnt_root.
|
||||
*/
|
||||
static bool collect_domain_accesses(const struct landlock_ruleset *const domain,
|
||||
const struct dentry *const mnt_root,
|
||||
@@ -1120,10 +1120,9 @@ static bool collect_domain_accesses(const struct landlock_ruleset *const domain,
|
||||
* ephemeral matrices take some space on the stack, which limits the number of
|
||||
* layers to a deemed reasonable number: 16.
|
||||
*
|
||||
* Returns:
|
||||
* - 0 if access is allowed;
|
||||
* - -EXDEV if @old_dentry would inherit new access rights from @new_dir;
|
||||
* - -EACCES if file removal or creation is denied.
|
||||
* Return: 0 if access is allowed, -EXDEV if @old_dentry would inherit new
|
||||
* access rights from @new_dir, or -EACCES if file removal or creation is
|
||||
* denied.
|
||||
*/
|
||||
static int current_check_refer_path(struct dentry *const old_dentry,
|
||||
const struct path *const new_dir,
|
||||
@@ -1561,6 +1560,133 @@ static int hook_path_truncate(const struct path *const path)
|
||||
return current_check_access_path(path, LANDLOCK_ACCESS_FS_TRUNCATE);
|
||||
}
|
||||
|
||||
/**
|
||||
* unmask_scoped_access - Remove access right bits in @masks in all layers
|
||||
* where @client and @server have the same domain
|
||||
*
|
||||
* This does the same as domain_is_scoped(), but unmasks bits in @masks.
|
||||
* It can not return early as domain_is_scoped() does.
|
||||
*
|
||||
* A scoped access for a given access right bit is allowed iff, for all layer
|
||||
* depths where the access bit is set, the client and server domain are the
|
||||
* same. This function clears the access rights @access in @masks at all layer
|
||||
* depths where the client and server domain are the same, so that, when they
|
||||
* are all cleared, the access is allowed.
|
||||
*
|
||||
* @client: Client domain
|
||||
* @server: Server domain
|
||||
* @masks: Layer access masks to unmask
|
||||
* @access: Access bits that control scoping
|
||||
*/
|
||||
static void unmask_scoped_access(const struct landlock_ruleset *const client,
|
||||
const struct landlock_ruleset *const server,
|
||||
struct layer_access_masks *const masks,
|
||||
const access_mask_t access)
|
||||
{
|
||||
int client_layer, server_layer;
|
||||
const struct landlock_hierarchy *client_walker, *server_walker;
|
||||
|
||||
/* This should not happen. */
|
||||
if (WARN_ON_ONCE(!client))
|
||||
return;
|
||||
|
||||
/* Server has no Landlock domain; nothing to clear. */
|
||||
if (!server)
|
||||
return;
|
||||
|
||||
/*
|
||||
* client_layer must be able to represent all numbers from
|
||||
* LANDLOCK_MAX_NUM_LAYERS - 1 to -1 for the loop below to terminate.
|
||||
* (It must be large enough, and it must be signed.)
|
||||
*/
|
||||
BUILD_BUG_ON(!is_signed_type(typeof(client_layer)));
|
||||
BUILD_BUG_ON(LANDLOCK_MAX_NUM_LAYERS - 1 >
|
||||
type_max(typeof(client_layer)));
|
||||
|
||||
client_layer = client->num_layers - 1;
|
||||
client_walker = client->hierarchy;
|
||||
server_layer = server->num_layers - 1;
|
||||
server_walker = server->hierarchy;
|
||||
|
||||
/*
|
||||
* Clears the access bits at all layers where the client domain is the
|
||||
* same as the server domain. We start the walk at min(client_layer,
|
||||
* server_layer). The layer bits until there can not be cleared because
|
||||
* either the client or the server domain is missing.
|
||||
*/
|
||||
for (; client_layer > server_layer; client_layer--)
|
||||
client_walker = client_walker->parent;
|
||||
|
||||
for (; server_layer > client_layer; server_layer--)
|
||||
server_walker = server_walker->parent;
|
||||
|
||||
for (; client_layer >= 0; client_layer--) {
|
||||
if (masks->access[client_layer] & access &&
|
||||
client_walker == server_walker)
|
||||
masks->access[client_layer] &= ~access;
|
||||
|
||||
client_walker = client_walker->parent;
|
||||
server_walker = server_walker->parent;
|
||||
}
|
||||
}
|
||||
|
||||
static int hook_unix_find(const struct path *const path, struct sock *other,
|
||||
int flags)
|
||||
{
|
||||
const struct landlock_ruleset *dom_other;
|
||||
const struct landlock_cred_security *subject;
|
||||
struct layer_access_masks layer_masks;
|
||||
struct landlock_request request = {};
|
||||
static const struct access_masks fs_resolve_unix = {
|
||||
.fs = LANDLOCK_ACCESS_FS_RESOLVE_UNIX,
|
||||
};
|
||||
|
||||
/* Lookup for the purpose of saving coredumps is OK. */
|
||||
if (unlikely(flags & SOCK_COREDUMP))
|
||||
return 0;
|
||||
|
||||
subject = landlock_get_applicable_subject(current_cred(),
|
||||
fs_resolve_unix, NULL);
|
||||
|
||||
if (!subject)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Ignoring return value: that the domains apply was already checked in
|
||||
* landlock_get_applicable_subject() above.
|
||||
*/
|
||||
landlock_init_layer_masks(subject->domain, fs_resolve_unix.fs,
|
||||
&layer_masks, LANDLOCK_KEY_INODE);
|
||||
|
||||
/* Checks the layers in which we are connecting within the same domain. */
|
||||
unix_state_lock(other);
|
||||
if (unlikely(sock_flag(other, SOCK_DEAD) || !other->sk_socket ||
|
||||
!other->sk_socket->file)) {
|
||||
unix_state_unlock(other);
|
||||
/*
|
||||
* We rely on the caller to catch the (non-reversible) SOCK_DEAD
|
||||
* condition and retry the lookup. If we returned an error
|
||||
* here, the lookup would not get retried.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
dom_other = landlock_cred(other->sk_socket->file->f_cred)->domain;
|
||||
|
||||
/* Access to the same (or a lower) domain is always allowed. */
|
||||
unmask_scoped_access(subject->domain, dom_other, &layer_masks,
|
||||
fs_resolve_unix.fs);
|
||||
unix_state_unlock(other);
|
||||
|
||||
/* Checks the connections to allow-listed paths. */
|
||||
if (is_access_to_paths_allowed(subject->domain, path,
|
||||
fs_resolve_unix.fs, &layer_masks,
|
||||
&request, NULL, 0, NULL, NULL, NULL))
|
||||
return 0;
|
||||
|
||||
landlock_log_denial(subject, &request);
|
||||
return -EACCES;
|
||||
}
|
||||
|
||||
/* File hooks */
|
||||
|
||||
/**
|
||||
@@ -1568,7 +1694,7 @@ static int hook_path_truncate(const struct path *const path)
|
||||
*
|
||||
* @file: File being opened.
|
||||
*
|
||||
* Returns the access rights that are required for opening the given file,
|
||||
* Return: The access rights that are required for opening the given file,
|
||||
* depending on the file type and open mode.
|
||||
*/
|
||||
static access_mask_t
|
||||
@@ -1838,6 +1964,7 @@ static struct security_hook_list landlock_hooks[] __ro_after_init = {
|
||||
LSM_HOOK_INIT(path_unlink, hook_path_unlink),
|
||||
LSM_HOOK_INIT(path_rmdir, hook_path_rmdir),
|
||||
LSM_HOOK_INIT(path_truncate, hook_path_truncate),
|
||||
LSM_HOOK_INIT(unix_find, hook_unix_find),
|
||||
|
||||
LSM_HOOK_INIT(file_alloc_security, hook_file_alloc_security),
|
||||
LSM_HOOK_INIT(file_open, hook_file_open),
|
||||
|
||||
@@ -258,7 +258,7 @@ static void test_range2_rand16(struct kunit *const test)
|
||||
*
|
||||
* @number_of_ids: Number of IDs to hold. Must be greater than one.
|
||||
*
|
||||
* Returns: The first ID in the range.
|
||||
* Return: The first ID in the range.
|
||||
*/
|
||||
u64 landlock_get_id_range(size_t number_of_ids)
|
||||
{
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
#define LANDLOCK_MAX_NUM_LAYERS 16
|
||||
#define LANDLOCK_MAX_NUM_RULES U32_MAX
|
||||
|
||||
#define LANDLOCK_LAST_ACCESS_FS LANDLOCK_ACCESS_FS_IOCTL_DEV
|
||||
#define LANDLOCK_LAST_ACCESS_FS LANDLOCK_ACCESS_FS_RESOLVE_UNIX
|
||||
#define LANDLOCK_MASK_ACCESS_FS ((LANDLOCK_LAST_ACCESS_FS << 1) - 1)
|
||||
#define LANDLOCK_NUM_ACCESS_FS __const_hweight64(LANDLOCK_MASK_ACCESS_FS)
|
||||
|
||||
|
||||
@@ -107,7 +107,7 @@ static bool is_object_pointer(const enum landlock_key_type key_type)
|
||||
|
||||
static struct landlock_rule *
|
||||
create_rule(const struct landlock_id id,
|
||||
const struct landlock_layer (*const layers)[], const u32 num_layers,
|
||||
const struct landlock_layer (*layers)[], const u32 num_layers,
|
||||
const struct landlock_layer *const new_layer)
|
||||
{
|
||||
struct landlock_rule *new_rule;
|
||||
@@ -201,10 +201,12 @@ static void build_check_ruleset(void)
|
||||
* When merging a ruleset in a domain, or copying a domain, @layers will be
|
||||
* added to @ruleset as new constraints, similarly to a boolean AND between
|
||||
* access rights.
|
||||
*
|
||||
* Return: 0 on success, -errno on failure.
|
||||
*/
|
||||
static int insert_rule(struct landlock_ruleset *const ruleset,
|
||||
const struct landlock_id id,
|
||||
const struct landlock_layer (*const layers)[],
|
||||
const struct landlock_layer (*layers)[],
|
||||
const size_t num_layers)
|
||||
{
|
||||
struct rb_node **walker_node;
|
||||
@@ -530,8 +532,8 @@ void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset)
|
||||
* The current task is requesting to be restricted. The subjective credentials
|
||||
* must not be in an overridden state. cf. landlock_init_hierarchy_log().
|
||||
*
|
||||
* Returns the intersection of @parent and @ruleset, or returns @parent if
|
||||
* @ruleset is empty, or returns a duplicate of @ruleset if @parent is empty.
|
||||
* Return: A new domain merging @parent and @ruleset on success, or ERR_PTR()
|
||||
* on failure. If @parent is NULL, the new domain duplicates @ruleset.
|
||||
*/
|
||||
struct landlock_ruleset *
|
||||
landlock_merge_ruleset(struct landlock_ruleset *const parent,
|
||||
@@ -622,7 +624,7 @@ landlock_find_rule(const struct landlock_ruleset *const ruleset,
|
||||
* @rule: A rule that grants a set of access rights for each layer
|
||||
* @masks: A matrix of unfulfilled access rights for each layer
|
||||
*
|
||||
* Returns true if the request is allowed (i.e. the access rights granted all
|
||||
* Return: True if the request is allowed (i.e. the access rights granted all
|
||||
* remaining unfulfilled access rights and masks has no leftover set bits).
|
||||
*/
|
||||
bool landlock_unmask_layers(const struct landlock_rule *const rule,
|
||||
@@ -672,7 +674,7 @@ get_access_mask_t(const struct landlock_ruleset *const ruleset,
|
||||
* @masks: Layer access masks to populate.
|
||||
* @key_type: The key type to switch between access masks of different types.
|
||||
*
|
||||
* Returns: An access mask where each access right bit is set which is handled
|
||||
* Return: An access mask where each access right bit is set which is handled
|
||||
* in any of the active layers in @domain.
|
||||
*/
|
||||
access_mask_t
|
||||
|
||||
@@ -224,7 +224,7 @@ static inline void landlock_get_ruleset(struct landlock_ruleset *const ruleset)
|
||||
*
|
||||
* @domain: Landlock ruleset (used as a domain)
|
||||
*
|
||||
* Returns: an access_masks result of the OR of all the domain's access masks.
|
||||
* Return: An access_masks result of the OR of all the domain's access masks.
|
||||
*/
|
||||
static inline struct access_masks
|
||||
landlock_union_access_masks(const struct landlock_ruleset *const domain)
|
||||
|
||||
@@ -60,6 +60,8 @@ static bool is_initialized(void)
|
||||
* @ksize_min: Minimal required size to be copied.
|
||||
* @src: User space pointer or NULL.
|
||||
* @usize: (Alleged) size of the data pointed to by @src.
|
||||
*
|
||||
* Return: 0 on success, -errno on failure.
|
||||
*/
|
||||
static __always_inline int
|
||||
copy_min_struct_from_user(void *const dst, const size_t ksize,
|
||||
@@ -164,7 +166,7 @@ static const struct file_operations ruleset_fops = {
|
||||
* If the change involves a fix that requires userspace awareness, also update
|
||||
* the errata documentation in Documentation/userspace-api/landlock.rst .
|
||||
*/
|
||||
const int landlock_abi_version = 8;
|
||||
const int landlock_abi_version = 9;
|
||||
|
||||
/**
|
||||
* sys_landlock_create_ruleset - Create a new ruleset
|
||||
@@ -178,16 +180,19 @@ const int landlock_abi_version = 8;
|
||||
* - %LANDLOCK_CREATE_RULESET_VERSION
|
||||
* - %LANDLOCK_CREATE_RULESET_ERRATA
|
||||
*
|
||||
* This system call enables to create a new Landlock ruleset, and returns the
|
||||
* related file descriptor on success.
|
||||
* This system call enables to create a new Landlock ruleset.
|
||||
*
|
||||
* If %LANDLOCK_CREATE_RULESET_VERSION or %LANDLOCK_CREATE_RULESET_ERRATA is
|
||||
* set, then @attr must be NULL and @size must be 0.
|
||||
*
|
||||
* Possible returned errors are:
|
||||
* Return: The ruleset file descriptor on success, the Landlock ABI version if
|
||||
* %LANDLOCK_CREATE_RULESET_VERSION is set, the errata value if
|
||||
* %LANDLOCK_CREATE_RULESET_ERRATA is set, or -errno on failure. Possible
|
||||
* returned errors are:
|
||||
*
|
||||
* - %EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time;
|
||||
* - %EINVAL: unknown @flags, or unknown access, or unknown scope, or too small @size;
|
||||
* - %EINVAL: unknown @flags, or unknown access, or unknown scope, or too small
|
||||
* @size;
|
||||
* - %E2BIG: @attr or @size inconsistencies;
|
||||
* - %EFAULT: @attr or @size inconsistencies;
|
||||
* - %ENOMSG: empty &landlock_ruleset_attr.handled_access_fs.
|
||||
@@ -398,7 +403,7 @@ static int add_rule_net_port(struct landlock_ruleset *ruleset,
|
||||
* This system call enables to define a new rule and add it to an existing
|
||||
* ruleset.
|
||||
*
|
||||
* Possible returned errors are:
|
||||
* Return: 0 on success, or -errno on failure. Possible returned errors are:
|
||||
*
|
||||
* - %EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time;
|
||||
* - %EAFNOSUPPORT: @rule_type is %LANDLOCK_RULE_NET_PORT but TCP/IP is not
|
||||
@@ -464,7 +469,7 @@ SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd,
|
||||
* namespace or is running with no_new_privs. This avoids scenarios where
|
||||
* unprivileged tasks can affect the behavior of privileged children.
|
||||
*
|
||||
* Possible returned errors are:
|
||||
* Return: 0 on success, or -errno on failure. Possible returned errors are:
|
||||
*
|
||||
* - %EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time;
|
||||
* - %EINVAL: @flags contains an unknown bit.
|
||||
@@ -512,10 +517,13 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32,
|
||||
|
||||
/*
|
||||
* It is allowed to set LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF with
|
||||
* -1 as ruleset_fd, but no other flag must be set.
|
||||
* -1 as ruleset_fd, optionally combined with
|
||||
* LANDLOCK_RESTRICT_SELF_TSYNC to propagate this configuration to all
|
||||
* threads. No other flag must be set.
|
||||
*/
|
||||
if (!(ruleset_fd == -1 &&
|
||||
flags == LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)) {
|
||||
(flags & ~LANDLOCK_RESTRICT_SELF_TSYNC) ==
|
||||
LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)) {
|
||||
/* Gets and checks the ruleset. */
|
||||
ruleset = get_ruleset_from_fd(ruleset_fd, FMODE_CAN_READ);
|
||||
if (IS_ERR(ruleset))
|
||||
@@ -537,9 +545,10 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32,
|
||||
|
||||
/*
|
||||
* The only case when a ruleset may not be set is if
|
||||
* LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF is set and ruleset_fd is -1.
|
||||
* We could optimize this case by not calling commit_creds() if this flag
|
||||
* was already set, but it is not worth the complexity.
|
||||
* LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF is set (optionally with
|
||||
* LANDLOCK_RESTRICT_SELF_TSYNC) and ruleset_fd is -1. We could
|
||||
* optimize this case by not calling commit_creds() if this flag was
|
||||
* already set, but it is not worth the complexity.
|
||||
*/
|
||||
if (ruleset) {
|
||||
/*
|
||||
|
||||
@@ -37,6 +37,9 @@
|
||||
*
|
||||
* Checks if the @parent domain is less or equal to (i.e. an ancestor, which
|
||||
* means a subset of) the @child domain.
|
||||
*
|
||||
* Return: True if @parent is an ancestor of or equal to @child, false
|
||||
* otherwise.
|
||||
*/
|
||||
static bool domain_scope_le(const struct landlock_ruleset *const parent,
|
||||
const struct landlock_ruleset *const child)
|
||||
@@ -79,8 +82,7 @@ static int domain_ptrace(const struct landlock_ruleset *const parent,
|
||||
* If the current task has Landlock rules, then the child must have at least
|
||||
* the same rules. Else denied.
|
||||
*
|
||||
* Determines whether a process may access another, returning 0 if permission
|
||||
* granted, -errno if denied.
|
||||
* Return: 0 if permission is granted, -errno if denied.
|
||||
*/
|
||||
static int hook_ptrace_access_check(struct task_struct *const child,
|
||||
const unsigned int mode)
|
||||
@@ -129,8 +131,7 @@ static int hook_ptrace_access_check(struct task_struct *const child,
|
||||
* If the parent has Landlock rules, then the current task must have the same
|
||||
* or more rules. Else denied.
|
||||
*
|
||||
* Determines whether the nominated task is permitted to trace the current
|
||||
* process, returning 0 if permission is granted, -errno if denied.
|
||||
* Return: 0 if permission is granted, -errno if denied.
|
||||
*/
|
||||
static int hook_ptrace_traceme(struct task_struct *const parent)
|
||||
{
|
||||
@@ -173,8 +174,8 @@ static int hook_ptrace_traceme(struct task_struct *const parent)
|
||||
* @server: IPC receiver domain.
|
||||
* @scope: The scope restriction criteria.
|
||||
*
|
||||
* Returns: True if @server is in a different domain from @client, and @client
|
||||
* is scoped to access @server (i.e. access should be denied).
|
||||
* Return: True if @server is in a different domain from @client and @client
|
||||
* is scoped to access @server (i.e. access should be denied), false otherwise.
|
||||
*/
|
||||
static bool domain_is_scoped(const struct landlock_ruleset *const client,
|
||||
const struct landlock_ruleset *const server,
|
||||
@@ -190,10 +191,13 @@ static bool domain_is_scoped(const struct landlock_ruleset *const client,
|
||||
client_layer = client->num_layers - 1;
|
||||
client_walker = client->hierarchy;
|
||||
/*
|
||||
* client_layer must be a signed integer with greater capacity
|
||||
* than client->num_layers to ensure the following loop stops.
|
||||
* client_layer must be able to represent all numbers from
|
||||
* LANDLOCK_MAX_NUM_LAYERS - 1 to -1 for the loop below to terminate.
|
||||
* (It must be large enough, and it must be signed.)
|
||||
*/
|
||||
BUILD_BUG_ON(sizeof(client_layer) > sizeof(client->num_layers));
|
||||
BUILD_BUG_ON(!is_signed_type(typeof(client_layer)));
|
||||
BUILD_BUG_ON(LANDLOCK_MAX_NUM_LAYERS - 1 >
|
||||
type_max(typeof(client_layer)));
|
||||
|
||||
server_layer = server ? (server->num_layers - 1) : -1;
|
||||
server_walker = server ? server->hierarchy : NULL;
|
||||
|
||||
@@ -85,12 +85,14 @@ static void restrict_one_thread(struct tsync_shared_context *ctx)
|
||||
/*
|
||||
* Switch out old_cred with new_cred, if possible.
|
||||
*
|
||||
* In the common case, where all threads initially point to the same
|
||||
* struct cred, this optimization avoids creating separate redundant
|
||||
* credentials objects for each, which would all have the same contents.
|
||||
* In the common case, where all threads initially point to the
|
||||
* same struct cred, this optimization avoids creating separate
|
||||
* redundant credentials objects for each, which would all have
|
||||
* the same contents.
|
||||
*
|
||||
* Note: We are intentionally dropping the const qualifier here, because
|
||||
* it is required by commit_creds() and abort_creds().
|
||||
* Note: We are intentionally dropping the const qualifier
|
||||
* here, because it is required by commit_creds() and
|
||||
* abort_creds().
|
||||
*/
|
||||
cred = (struct cred *)get_cred(ctx->new_cred);
|
||||
} else {
|
||||
@@ -101,8 +103,8 @@ static void restrict_one_thread(struct tsync_shared_context *ctx)
|
||||
atomic_set(&ctx->preparation_error, -ENOMEM);
|
||||
|
||||
/*
|
||||
* Even on error, we need to adhere to the protocol and coordinate
|
||||
* with concurrently running invocations.
|
||||
* Even on error, we need to adhere to the protocol and
|
||||
* coordinate with concurrently running invocations.
|
||||
*/
|
||||
if (atomic_dec_return(&ctx->num_preparing) == 0)
|
||||
complete_all(&ctx->all_prepared);
|
||||
@@ -135,9 +137,9 @@ static void restrict_one_thread(struct tsync_shared_context *ctx)
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure that all sibling tasks fulfill the no_new_privs prerequisite.
|
||||
* (This is in line with Seccomp's SECCOMP_FILTER_FLAG_TSYNC logic in
|
||||
* kernel/seccomp.c)
|
||||
* Make sure that all sibling tasks fulfill the no_new_privs
|
||||
* prerequisite. (This is in line with Seccomp's
|
||||
* SECCOMP_FILTER_FLAG_TSYNC logic in kernel/seccomp.c)
|
||||
*/
|
||||
if (ctx->set_no_new_privs)
|
||||
task_set_no_new_privs(current);
|
||||
@@ -183,10 +185,8 @@ struct tsync_works {
|
||||
* capacity. This can legitimately happen if new threads get started after we
|
||||
* grew the capacity.
|
||||
*
|
||||
* Returns:
|
||||
* A pointer to the preallocated context struct, with task filled in.
|
||||
*
|
||||
* NULL, if we ran out of preallocated context structs.
|
||||
* Return: A pointer to the preallocated context struct with task filled in, or
|
||||
* NULL if preallocated context structs ran out.
|
||||
*/
|
||||
static struct tsync_work *tsync_works_provide(struct tsync_works *s,
|
||||
struct task_struct *task)
|
||||
@@ -223,16 +223,17 @@ static void tsync_works_trim(struct tsync_works *s)
|
||||
ctx = s->works[s->size - 1];
|
||||
|
||||
/*
|
||||
* For consistency, remove the task from ctx so that it does not look like
|
||||
* we handed it a task_work.
|
||||
* For consistency, remove the task from ctx so that it does not look
|
||||
* like we handed it a task_work.
|
||||
*/
|
||||
put_task_struct(ctx->task);
|
||||
*ctx = (typeof(*ctx)){};
|
||||
|
||||
/*
|
||||
* Cancel the tsync_works_provide() change to recycle the reserved memory
|
||||
* for the next thread, if any. This also ensures that cancel_tsync_works()
|
||||
* and tsync_works_release() do not see any NULL task pointers.
|
||||
* Cancel the tsync_works_provide() change to recycle the reserved
|
||||
* memory for the next thread, if any. This also ensures that
|
||||
* cancel_tsync_works() and tsync_works_release() do not see any NULL
|
||||
* task pointers.
|
||||
*/
|
||||
s->size--;
|
||||
}
|
||||
@@ -243,11 +244,8 @@ static void tsync_works_trim(struct tsync_works *s)
|
||||
* On a successful return, the subsequent n calls to tsync_works_provide() are
|
||||
* guaranteed to succeed. (size + n <= capacity)
|
||||
*
|
||||
* Returns:
|
||||
* -ENOMEM if the (re)allocation fails
|
||||
|
||||
* 0 if the allocation succeeds, partially succeeds, or no reallocation
|
||||
* was needed
|
||||
* Return: 0 if sufficient space for n more elements could be provided, -ENOMEM
|
||||
* on allocation errors, -EOVERFLOW in case of integer overflow.
|
||||
*/
|
||||
static int tsync_works_grow_by(struct tsync_works *s, size_t n, gfp_t flags)
|
||||
{
|
||||
@@ -363,8 +361,8 @@ static size_t count_additional_threads(const struct tsync_works *works)
|
||||
* For each added task_work, atomically increments shared_ctx->num_preparing and
|
||||
* shared_ctx->num_unfinished.
|
||||
*
|
||||
* Returns:
|
||||
* true, if at least one eligible sibling thread was found
|
||||
* Return: True if at least one eligible sibling thread was found, false
|
||||
* otherwise.
|
||||
*/
|
||||
static bool schedule_task_work(struct tsync_works *works,
|
||||
struct tsync_shared_context *shared_ctx)
|
||||
@@ -393,17 +391,17 @@ static bool schedule_task_work(struct tsync_works *works,
|
||||
continue;
|
||||
|
||||
/*
|
||||
* We found a sibling thread that is not doing its task_work yet, and
|
||||
* which might spawn new threads before our task work runs, so we need
|
||||
* at least one more round in the outer loop.
|
||||
* We found a sibling thread that is not doing its task_work
|
||||
* yet, and which might spawn new threads before our task work
|
||||
* runs, so we need at least one more round in the outer loop.
|
||||
*/
|
||||
found_more_threads = true;
|
||||
|
||||
ctx = tsync_works_provide(works, thread);
|
||||
if (!ctx) {
|
||||
/*
|
||||
* We ran out of preallocated contexts -- we need to try again with
|
||||
* this thread at a later time!
|
||||
* We ran out of preallocated contexts -- we need to
|
||||
* try again with this thread at a later time!
|
||||
* found_more_threads is already true at this point.
|
||||
*/
|
||||
break;
|
||||
@@ -418,10 +416,10 @@ static bool schedule_task_work(struct tsync_works *works,
|
||||
err = task_work_add(thread, &ctx->work, TWA_SIGNAL);
|
||||
if (unlikely(err)) {
|
||||
/*
|
||||
* task_work_add() only fails if the task is about to exit. We
|
||||
* checked that earlier, but it can happen as a race. Resume
|
||||
* without setting an error, as the task is probably gone in the
|
||||
* next loop iteration.
|
||||
* task_work_add() only fails if the task is about to
|
||||
* exit. We checked that earlier, but it can happen as
|
||||
* a race. Resume without setting an error, as the
|
||||
* task is probably gone in the next loop iteration.
|
||||
*/
|
||||
tsync_works_trim(works);
|
||||
|
||||
@@ -512,24 +510,25 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred,
|
||||
* After this barrier is reached, it's safe to read
|
||||
* shared_ctx.preparation_error.
|
||||
*
|
||||
* 4) reads shared_ctx.preparation_error and then either does commit_creds()
|
||||
* or abort_creds().
|
||||
* 4) reads shared_ctx.preparation_error and then either does
|
||||
* commit_creds() or abort_creds().
|
||||
*
|
||||
* 5) signals that it's done altogether (barrier synchronization
|
||||
* "all_finished")
|
||||
*
|
||||
* Unlike seccomp, which modifies sibling tasks directly, we do not need to
|
||||
* acquire the cred_guard_mutex and sighand->siglock:
|
||||
* Unlike seccomp, which modifies sibling tasks directly, we do not
|
||||
* need to acquire the cred_guard_mutex and sighand->siglock:
|
||||
*
|
||||
* - As in our case, all threads are themselves exchanging their own struct
|
||||
* cred through the credentials API, no locks are needed for that.
|
||||
* - As in our case, all threads are themselves exchanging their own
|
||||
* struct cred through the credentials API, no locks are needed for
|
||||
* that.
|
||||
* - Our for_each_thread() loops are protected by RCU.
|
||||
* - We do not acquire a lock to keep the list of sibling threads stable
|
||||
* between our for_each_thread loops. If the list of available sibling
|
||||
* threads changes between these for_each_thread loops, we make up for
|
||||
* that by continuing to look for threads until they are all discovered
|
||||
* and have entered their task_work, where they are unable to spawn new
|
||||
* threads.
|
||||
* - We do not acquire a lock to keep the list of sibling threads
|
||||
* stable between our for_each_thread loops. If the list of
|
||||
* available sibling threads changes between these for_each_thread
|
||||
* loops, we make up for that by continuing to look for threads until
|
||||
* they are all discovered and have entered their task_work, where
|
||||
* they are unable to spawn new threads.
|
||||
*/
|
||||
do {
|
||||
/* In RCU read-lock, count the threads we need. */
|
||||
@@ -546,31 +545,36 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred,
|
||||
}
|
||||
|
||||
/*
|
||||
* The "all_prepared" barrier is used locally to the loop body, this use
|
||||
* of for_each_thread(). We can reset it on each loop iteration because
|
||||
* all previous loop iterations are done with it already.
|
||||
* The "all_prepared" barrier is used locally to the loop body,
|
||||
* this use of for_each_thread(). We can reset it on each loop
|
||||
* iteration because all previous loop iterations are done with
|
||||
* it already.
|
||||
*
|
||||
* num_preparing is initialized to 1 so that the counter can not go to 0
|
||||
* and mark the completion as done before all task works are registered.
|
||||
* We decrement it at the end of the loop body.
|
||||
* num_preparing is initialized to 1 so that the counter can
|
||||
* not go to 0 and mark the completion as done before all task
|
||||
* works are registered. We decrement it at the end of the
|
||||
* loop body.
|
||||
*/
|
||||
atomic_set(&shared_ctx.num_preparing, 1);
|
||||
reinit_completion(&shared_ctx.all_prepared);
|
||||
|
||||
/*
|
||||
* In RCU read-lock, schedule task work on newly discovered sibling
|
||||
* tasks.
|
||||
* In RCU read-lock, schedule task work on newly discovered
|
||||
* sibling tasks.
|
||||
*/
|
||||
found_more_threads = schedule_task_work(&works, &shared_ctx);
|
||||
|
||||
/*
|
||||
* Decrement num_preparing for current, to undo that we initialized it
|
||||
* to 1 a few lines above.
|
||||
* Decrement num_preparing for current, to undo that we
|
||||
* initialized it to 1 a few lines above.
|
||||
*/
|
||||
if (atomic_dec_return(&shared_ctx.num_preparing) > 0) {
|
||||
if (wait_for_completion_interruptible(
|
||||
&shared_ctx.all_prepared)) {
|
||||
/* In case of interruption, we need to retry the system call. */
|
||||
/*
|
||||
* In case of interruption, we need to retry
|
||||
* the system call.
|
||||
*/
|
||||
atomic_set(&shared_ctx.preparation_error,
|
||||
-ERESTARTNOINTR);
|
||||
|
||||
@@ -603,8 +607,8 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred,
|
||||
complete_all(&shared_ctx.ready_to_commit);
|
||||
|
||||
/*
|
||||
* Decrement num_unfinished for current, to undo that we initialized it to 1
|
||||
* at the beginning.
|
||||
* Decrement num_unfinished for current, to undo that we initialized it
|
||||
* to 1 at the beginning.
|
||||
*/
|
||||
if (atomic_dec_return(&shared_ctx.num_unfinished) > 0)
|
||||
wait_for_completion(&shared_ctx.all_finished);
|
||||
|
||||
@@ -4834,6 +4834,26 @@ int security_mptcp_add_subflow(struct sock *sk, struct sock *ssk)
|
||||
|
||||
#endif /* CONFIG_SECURITY_NETWORK */
|
||||
|
||||
#if defined(CONFIG_SECURITY_NETWORK) && defined(CONFIG_SECURITY_PATH)
|
||||
/**
|
||||
* security_unix_find() - Check if a named AF_UNIX socket can connect
|
||||
* @path: path of the socket being connected to
|
||||
* @other: peer sock
|
||||
* @flags: flags associated with the socket
|
||||
*
|
||||
* This hook is called to check permissions before connecting to a named
|
||||
* AF_UNIX socket. The caller does not hold any locks on @other.
|
||||
*
|
||||
* Return: Returns 0 if permission is granted.
|
||||
*/
|
||||
int security_unix_find(const struct path *path, struct sock *other, int flags)
|
||||
{
|
||||
return call_int_hook(unix_find, path, other, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(security_unix_find);
|
||||
|
||||
#endif /* CONFIG_SECURITY_NETWORK && CONFIG_SECURITY_PATH */
|
||||
|
||||
#ifdef CONFIG_SECURITY_INFINIBAND
|
||||
/**
|
||||
* security_ib_pkey_access() - Check if access to an IB pkey is allowed
|
||||
|
||||
@@ -249,9 +249,9 @@ static __maybe_unused char *regex_escape(const char *const src, char *dst,
|
||||
static int audit_match_record(int audit_fd, const __u16 type,
|
||||
const char *const pattern, __u64 *domain_id)
|
||||
{
|
||||
struct audit_message msg;
|
||||
struct audit_message msg, last_mismatch = {};
|
||||
int ret, err = 0;
|
||||
bool matches_record = !type;
|
||||
int num_type_match = 0;
|
||||
regmatch_t matches[2];
|
||||
regex_t regex;
|
||||
|
||||
@@ -259,21 +259,35 @@ static int audit_match_record(int audit_fd, const __u16 type,
|
||||
if (ret)
|
||||
return -EINVAL;
|
||||
|
||||
do {
|
||||
/*
|
||||
* Reads records until one matches both the expected type and the
|
||||
* pattern. Type-matching records with non-matching content are
|
||||
* silently consumed, which handles stale domain deallocation records
|
||||
* from a previous test emitted asynchronously by kworker threads.
|
||||
*/
|
||||
while (true) {
|
||||
memset(&msg, 0, sizeof(msg));
|
||||
err = audit_recv(audit_fd, &msg);
|
||||
if (err)
|
||||
if (err) {
|
||||
if (num_type_match) {
|
||||
printf("DATA: %s\n", last_mismatch.data);
|
||||
printf("ERROR: %d record(s) matched type %u"
|
||||
" but not pattern: %s\n",
|
||||
num_type_match, type, pattern);
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (msg.header.nlmsg_type == type)
|
||||
matches_record = true;
|
||||
} while (!matches_record);
|
||||
if (type && msg.header.nlmsg_type != type)
|
||||
continue;
|
||||
|
||||
ret = regexec(®ex, msg.data, ARRAY_SIZE(matches), matches, 0);
|
||||
if (ret) {
|
||||
printf("DATA: %s\n", msg.data);
|
||||
printf("ERROR: no match for pattern: %s\n", pattern);
|
||||
err = -ENOENT;
|
||||
ret = regexec(®ex, msg.data, ARRAY_SIZE(matches), matches,
|
||||
0);
|
||||
if (!ret)
|
||||
break;
|
||||
|
||||
num_type_match++;
|
||||
last_mismatch = msg;
|
||||
}
|
||||
|
||||
if (domain_id) {
|
||||
@@ -309,28 +323,56 @@ static int __maybe_unused matches_log_domain_allocated(int audit_fd, pid_t pid,
|
||||
|
||||
log_match_len =
|
||||
snprintf(log_match, sizeof(log_match), log_template, pid);
|
||||
if (log_match_len > sizeof(log_match))
|
||||
if (log_match_len >= sizeof(log_match))
|
||||
return -E2BIG;
|
||||
|
||||
return audit_match_record(audit_fd, AUDIT_LANDLOCK_DOMAIN, log_match,
|
||||
domain_id);
|
||||
}
|
||||
|
||||
static int __maybe_unused matches_log_domain_deallocated(
|
||||
int audit_fd, unsigned int num_denials, __u64 *domain_id)
|
||||
/*
|
||||
* Matches a domain deallocation record. When expected_domain_id is non-zero,
|
||||
* the pattern includes the specific domain ID so that stale deallocation
|
||||
* records from a previous test (with a different domain ID) are skipped by
|
||||
* audit_match_record(), and the socket timeout is temporarily increased to
|
||||
* audit_tv_dom_drop to wait for the asynchronous kworker deallocation.
|
||||
*/
|
||||
static int __maybe_unused
|
||||
matches_log_domain_deallocated(int audit_fd, unsigned int num_denials,
|
||||
__u64 expected_domain_id, __u64 *domain_id)
|
||||
{
|
||||
static const char log_template[] = REGEX_LANDLOCK_PREFIX
|
||||
" status=deallocated denials=%u$";
|
||||
char log_match[sizeof(log_template) + 10];
|
||||
int log_match_len;
|
||||
static const char log_template_with_id[] =
|
||||
"^audit([0-9.:]\\+): domain=\\(%llx\\)"
|
||||
" status=deallocated denials=%u$";
|
||||
char log_match[sizeof(log_template_with_id) + 32];
|
||||
int log_match_len, err;
|
||||
|
||||
log_match_len = snprintf(log_match, sizeof(log_match), log_template,
|
||||
num_denials);
|
||||
if (log_match_len > sizeof(log_match))
|
||||
if (expected_domain_id)
|
||||
log_match_len = snprintf(log_match, sizeof(log_match),
|
||||
log_template_with_id,
|
||||
(unsigned long long)expected_domain_id,
|
||||
num_denials);
|
||||
else
|
||||
log_match_len = snprintf(log_match, sizeof(log_match),
|
||||
log_template, num_denials);
|
||||
|
||||
if (log_match_len >= sizeof(log_match))
|
||||
return -E2BIG;
|
||||
|
||||
return audit_match_record(audit_fd, AUDIT_LANDLOCK_DOMAIN, log_match,
|
||||
domain_id);
|
||||
if (expected_domain_id)
|
||||
setsockopt(audit_fd, SOL_SOCKET, SO_RCVTIMEO,
|
||||
&audit_tv_dom_drop, sizeof(audit_tv_dom_drop));
|
||||
|
||||
err = audit_match_record(audit_fd, AUDIT_LANDLOCK_DOMAIN, log_match,
|
||||
domain_id);
|
||||
|
||||
if (expected_domain_id)
|
||||
setsockopt(audit_fd, SOL_SOCKET, SO_RCVTIMEO, &audit_tv_default,
|
||||
sizeof(audit_tv_default));
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
struct audit_records {
|
||||
@@ -338,6 +380,15 @@ struct audit_records {
|
||||
size_t domain;
|
||||
};
|
||||
|
||||
/*
|
||||
* WARNING: Do not assert records.domain == 0 without a preceding
|
||||
* audit_match_record() call. Domain deallocation records are emitted
|
||||
* asynchronously from kworker threads and can arrive after the drain in
|
||||
* audit_init(), corrupting the domain count. A preceding audit_match_record()
|
||||
* call consumes stale records while scanning, making the assertion safe in
|
||||
* practice because stale deallocation records arrive before the expected access
|
||||
* records.
|
||||
*/
|
||||
static int audit_count_records(int audit_fd, struct audit_records *records)
|
||||
{
|
||||
struct audit_message msg;
|
||||
@@ -379,19 +430,35 @@ static int audit_init(void)
|
||||
|
||||
err = audit_set_status(fd, AUDIT_STATUS_ENABLED, 1);
|
||||
if (err)
|
||||
return err;
|
||||
goto err_close;
|
||||
|
||||
err = audit_set_status(fd, AUDIT_STATUS_PID, getpid());
|
||||
if (err)
|
||||
return err;
|
||||
goto err_close;
|
||||
|
||||
/* Sets a timeout for negative tests. */
|
||||
err = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &audit_tv_default,
|
||||
sizeof(audit_tv_default));
|
||||
if (err)
|
||||
return -errno;
|
||||
if (err) {
|
||||
err = -errno;
|
||||
goto err_close;
|
||||
}
|
||||
|
||||
/*
|
||||
* Drains stale audit records that accumulated in the kernel backlog
|
||||
* while no audit daemon socket was open. This happens when non-audit
|
||||
* Landlock tests generate records while audit_enabled is non-zero (e.g.
|
||||
* from boot configuration), or when domain deallocation records arrive
|
||||
* asynchronously after a previous test's socket was closed.
|
||||
*/
|
||||
while (audit_recv(fd, NULL) == 0)
|
||||
;
|
||||
|
||||
return fd;
|
||||
|
||||
err_close:
|
||||
close(fd);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int audit_init_filter_exe(struct audit_filter *filter, const char *path)
|
||||
@@ -441,8 +508,10 @@ static int audit_cleanup(int audit_fd, struct audit_filter *filter)
|
||||
|
||||
filter = &new_filter;
|
||||
err = audit_init_filter_exe(filter, NULL);
|
||||
if (err)
|
||||
if (err) {
|
||||
close(audit_fd);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
/* Filters might not be in place. */
|
||||
@@ -468,11 +537,15 @@ static int audit_init_with_exe_filter(struct audit_filter *filter)
|
||||
|
||||
err = audit_init_filter_exe(filter, NULL);
|
||||
if (err)
|
||||
return err;
|
||||
goto err_close;
|
||||
|
||||
err = audit_filter_exe(fd, filter, AUDIT_ADD_RULE);
|
||||
if (err)
|
||||
return err;
|
||||
goto err_close;
|
||||
|
||||
return fd;
|
||||
|
||||
err_close:
|
||||
close(fd);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -139,29 +139,31 @@ TEST_F(audit, layers)
|
||||
WEXITSTATUS(status) != EXIT_SUCCESS)
|
||||
_metadata->exit_code = KSFT_FAIL;
|
||||
|
||||
/* Purges log from deallocated domains. */
|
||||
EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
|
||||
&audit_tv_dom_drop, sizeof(audit_tv_dom_drop)));
|
||||
/*
|
||||
* Purges log from deallocated domains. Records arrive in LIFO order
|
||||
* (innermost domain first) because landlock_put_hierarchy() walks the
|
||||
* chain sequentially in a single kworker context.
|
||||
*/
|
||||
for (i = ARRAY_SIZE(*domain_stack) - 1; i >= 0; i--) {
|
||||
__u64 deallocated_dom = 2;
|
||||
|
||||
EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 1,
|
||||
(*domain_stack)[i],
|
||||
&deallocated_dom));
|
||||
EXPECT_EQ((*domain_stack)[i], deallocated_dom)
|
||||
{
|
||||
TH_LOG("Failed to match domain %llx (#%d)",
|
||||
(*domain_stack)[i], i);
|
||||
(unsigned long long)(*domain_stack)[i], i);
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(0, munmap(domain_stack, sizeof(*domain_stack)));
|
||||
EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
|
||||
&audit_tv_default, sizeof(audit_tv_default)));
|
||||
EXPECT_EQ(0, close(ruleset_fd));
|
||||
}
|
||||
|
||||
struct thread_data {
|
||||
pid_t parent_pid;
|
||||
int ruleset_fd, pipe_child, pipe_parent;
|
||||
bool mute_subdomains;
|
||||
};
|
||||
|
||||
static void *thread_audit_test(void *arg)
|
||||
@@ -270,13 +272,329 @@ TEST_F(audit, thread)
|
||||
EXPECT_EQ(0, close(pipe_parent[1]));
|
||||
ASSERT_EQ(0, pthread_join(thread, NULL));
|
||||
|
||||
EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
|
||||
&audit_tv_dom_drop, sizeof(audit_tv_dom_drop)));
|
||||
EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 1,
|
||||
&deallocated_dom));
|
||||
EXPECT_EQ(0, matches_log_domain_deallocated(
|
||||
self->audit_fd, 1, denial_dom, &deallocated_dom));
|
||||
EXPECT_EQ(denial_dom, deallocated_dom);
|
||||
EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
|
||||
&audit_tv_default, sizeof(audit_tv_default)));
|
||||
}
|
||||
|
||||
/*
|
||||
* Verifies that log_subdomains_off set via the ruleset_fd=-1 path (without
|
||||
* creating a domain) is inherited by children across fork(). This exercises
|
||||
* the hook_cred_transfer() fix: the Landlock credential blob must be copied
|
||||
* even when the source credential has no domain.
|
||||
*
|
||||
* Phase 1 (baseline): a child without muting creates a domain and triggers a
|
||||
* denial that IS logged.
|
||||
*
|
||||
* Phase 2 (after muting): the parent mutes subdomain logs, forks another child
|
||||
* who creates a domain and triggers a denial that is NOT logged.
|
||||
*/
|
||||
TEST_F(audit, log_subdomains_off_fork)
|
||||
{
|
||||
const struct landlock_ruleset_attr ruleset_attr = {
|
||||
.scoped = LANDLOCK_SCOPE_SIGNAL,
|
||||
};
|
||||
struct audit_records records;
|
||||
int ruleset_fd, status;
|
||||
pid_t child;
|
||||
|
||||
ruleset_fd =
|
||||
landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
|
||||
ASSERT_LE(0, ruleset_fd);
|
||||
|
||||
ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
|
||||
|
||||
/*
|
||||
* Phase 1: forks a child that creates a domain and triggers a denial
|
||||
* before any muting. This proves the audit path works.
|
||||
*/
|
||||
child = fork();
|
||||
ASSERT_LE(0, child);
|
||||
if (child == 0) {
|
||||
ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
|
||||
ASSERT_EQ(-1, kill(getppid(), 0));
|
||||
ASSERT_EQ(EPERM, errno);
|
||||
_exit(0);
|
||||
return;
|
||||
}
|
||||
|
||||
ASSERT_EQ(child, waitpid(child, &status, 0));
|
||||
ASSERT_EQ(true, WIFEXITED(status));
|
||||
ASSERT_EQ(0, WEXITSTATUS(status));
|
||||
|
||||
/* The denial must be logged (baseline). */
|
||||
EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd, getpid(),
|
||||
NULL));
|
||||
|
||||
/* Drains any remaining records (e.g. domain allocation). */
|
||||
EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
|
||||
|
||||
/*
|
||||
* Mutes subdomain logs without creating a domain. The parent's
|
||||
* credential has domain=NULL and log_subdomains_off=1.
|
||||
*/
|
||||
ASSERT_EQ(0, landlock_restrict_self(
|
||||
-1, LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF));
|
||||
|
||||
/*
|
||||
* Phase 2: forks a child that creates a domain and triggers a denial.
|
||||
* Because log_subdomains_off was inherited via fork(), the child's
|
||||
* domain has log_status=LANDLOCK_LOG_DISABLED.
|
||||
*/
|
||||
child = fork();
|
||||
ASSERT_LE(0, child);
|
||||
if (child == 0) {
|
||||
ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
|
||||
ASSERT_EQ(-1, kill(getppid(), 0));
|
||||
ASSERT_EQ(EPERM, errno);
|
||||
_exit(0);
|
||||
return;
|
||||
}
|
||||
|
||||
ASSERT_EQ(child, waitpid(child, &status, 0));
|
||||
ASSERT_EQ(true, WIFEXITED(status));
|
||||
ASSERT_EQ(0, WEXITSTATUS(status));
|
||||
|
||||
/* No denial record should appear. */
|
||||
EXPECT_EQ(-EAGAIN, matches_log_signal(_metadata, self->audit_fd,
|
||||
getpid(), NULL));
|
||||
|
||||
EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
|
||||
EXPECT_EQ(0, records.access);
|
||||
|
||||
EXPECT_EQ(0, close(ruleset_fd));
|
||||
}
|
||||
|
||||
/*
|
||||
* Thread function: runs two rounds of (create domain, trigger denial, signal
|
||||
* back), waiting for the main thread before each round. When mute_subdomains
|
||||
* is set, phase 1 also mutes subdomain logs via the fd=-1 path before creating
|
||||
* the domain. The ruleset_fd is kept open across both rounds so each
|
||||
* restrict_self call stacks a new domain layer.
|
||||
*/
|
||||
static void *thread_sandbox_deny_twice(void *arg)
|
||||
{
|
||||
const struct thread_data *data = (struct thread_data *)arg;
|
||||
uintptr_t err = 0;
|
||||
char buffer;
|
||||
|
||||
/* Phase 1: optionally mutes, creates a domain, and triggers a denial. */
|
||||
if (read(data->pipe_parent, &buffer, 1) != 1) {
|
||||
err = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (data->mute_subdomains &&
|
||||
landlock_restrict_self(-1,
|
||||
LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF)) {
|
||||
err = 2;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (landlock_restrict_self(data->ruleset_fd, 0)) {
|
||||
err = 3;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (kill(data->parent_pid, 0) != -1 || errno != EPERM) {
|
||||
err = 4;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (write(data->pipe_child, ".", 1) != 1) {
|
||||
err = 5;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Phase 2: stacks another domain and triggers a denial. */
|
||||
if (read(data->pipe_parent, &buffer, 1) != 1) {
|
||||
err = 6;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (landlock_restrict_self(data->ruleset_fd, 0)) {
|
||||
err = 7;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (kill(data->parent_pid, 0) != -1 || errno != EPERM) {
|
||||
err = 8;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (write(data->pipe_child, ".", 1) != 1) {
|
||||
err = 9;
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
close(data->ruleset_fd);
|
||||
close(data->pipe_child);
|
||||
close(data->pipe_parent);
|
||||
return (void *)err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Verifies that LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF with
|
||||
* LANDLOCK_RESTRICT_SELF_TSYNC and ruleset_fd=-1 propagates log_subdomains_off
|
||||
* to a sibling thread, suppressing audit logging on domains it subsequently
|
||||
* creates.
|
||||
*
|
||||
* Phase 1 (before TSYNC) acts as an inline baseline: the sibling creates a
|
||||
* domain and triggers a denial that IS logged.
|
||||
*
|
||||
* Phase 2 (after TSYNC) verifies suppression: the sibling stacks another domain
|
||||
* and triggers a denial that is NOT logged.
|
||||
*/
|
||||
TEST_F(audit, log_subdomains_off_tsync)
|
||||
{
|
||||
const struct landlock_ruleset_attr ruleset_attr = {
|
||||
.scoped = LANDLOCK_SCOPE_SIGNAL,
|
||||
};
|
||||
struct audit_records records;
|
||||
struct thread_data child_data = {};
|
||||
int pipe_child[2], pipe_parent[2];
|
||||
char buffer;
|
||||
pthread_t thread;
|
||||
void *thread_ret;
|
||||
|
||||
child_data.parent_pid = getppid();
|
||||
ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
|
||||
child_data.pipe_child = pipe_child[1];
|
||||
ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
|
||||
child_data.pipe_parent = pipe_parent[0];
|
||||
child_data.ruleset_fd =
|
||||
landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
|
||||
ASSERT_LE(0, child_data.ruleset_fd);
|
||||
|
||||
ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
|
||||
|
||||
/* Creates the sibling thread. */
|
||||
ASSERT_EQ(0, pthread_create(&thread, NULL, thread_sandbox_deny_twice,
|
||||
&child_data));
|
||||
|
||||
/*
|
||||
* Phase 1: the sibling creates a domain and triggers a denial before
|
||||
* any log muting. This proves the audit path works.
|
||||
*/
|
||||
ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
|
||||
ASSERT_EQ(1, read(pipe_child[0], &buffer, 1));
|
||||
|
||||
/* The denial must be logged. */
|
||||
EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd,
|
||||
child_data.parent_pid, NULL));
|
||||
|
||||
/* Drains any remaining records (e.g. domain allocation). */
|
||||
EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
|
||||
|
||||
/*
|
||||
* Mutes subdomain logs and propagates to the sibling thread via TSYNC,
|
||||
* without creating a domain.
|
||||
*/
|
||||
ASSERT_EQ(0, landlock_restrict_self(
|
||||
-1, LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF |
|
||||
LANDLOCK_RESTRICT_SELF_TSYNC));
|
||||
|
||||
/*
|
||||
* Phase 2: the sibling stacks another domain and triggers a denial.
|
||||
* Because log_subdomains_off was propagated via TSYNC, the new domain
|
||||
* has log_status=LANDLOCK_LOG_DISABLED.
|
||||
*/
|
||||
ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
|
||||
ASSERT_EQ(1, read(pipe_child[0], &buffer, 1));
|
||||
|
||||
/* No denial record should appear. */
|
||||
EXPECT_EQ(-EAGAIN, matches_log_signal(_metadata, self->audit_fd,
|
||||
child_data.parent_pid, NULL));
|
||||
|
||||
EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
|
||||
EXPECT_EQ(0, records.access);
|
||||
|
||||
EXPECT_EQ(0, close(pipe_child[0]));
|
||||
EXPECT_EQ(0, close(pipe_parent[1]));
|
||||
ASSERT_EQ(0, pthread_join(thread, &thread_ret));
|
||||
EXPECT_EQ(NULL, thread_ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* Verifies that LANDLOCK_RESTRICT_SELF_TSYNC without
|
||||
* LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF overrides a sibling thread's
|
||||
* log_subdomains_off, re-enabling audit logging on domains the sibling
|
||||
* subsequently creates.
|
||||
*
|
||||
* Phase 1: the sibling sets log_subdomains_off, creates a muted domain, and
|
||||
* triggers a denial that is NOT logged.
|
||||
*
|
||||
* Phase 2 (after TSYNC without LOG_SUBDOMAINS_OFF): the sibling stacks another
|
||||
* domain and triggers a denial that IS logged, proving the muting was
|
||||
* overridden.
|
||||
*/
|
||||
TEST_F(audit, tsync_override_log_subdomains_off)
|
||||
{
|
||||
const struct landlock_ruleset_attr ruleset_attr = {
|
||||
.scoped = LANDLOCK_SCOPE_SIGNAL,
|
||||
};
|
||||
struct audit_records records;
|
||||
struct thread_data child_data = {};
|
||||
int pipe_child[2], pipe_parent[2];
|
||||
char buffer;
|
||||
pthread_t thread;
|
||||
void *thread_ret;
|
||||
|
||||
child_data.parent_pid = getppid();
|
||||
ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
|
||||
child_data.pipe_child = pipe_child[1];
|
||||
ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
|
||||
child_data.pipe_parent = pipe_parent[0];
|
||||
child_data.ruleset_fd =
|
||||
landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
|
||||
ASSERT_LE(0, child_data.ruleset_fd);
|
||||
|
||||
ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
|
||||
|
||||
child_data.mute_subdomains = true;
|
||||
|
||||
/* Creates the sibling thread. */
|
||||
ASSERT_EQ(0, pthread_create(&thread, NULL, thread_sandbox_deny_twice,
|
||||
&child_data));
|
||||
|
||||
/*
|
||||
* Phase 1: the sibling mutes subdomain logs, creates a domain, and
|
||||
* triggers a denial. The denial must not be logged.
|
||||
*/
|
||||
ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
|
||||
ASSERT_EQ(1, read(pipe_child[0], &buffer, 1));
|
||||
|
||||
EXPECT_EQ(-EAGAIN, matches_log_signal(_metadata, self->audit_fd,
|
||||
child_data.parent_pid, NULL));
|
||||
|
||||
/* Drains any remaining records. */
|
||||
EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
|
||||
EXPECT_EQ(0, records.access);
|
||||
|
||||
/*
|
||||
* Overrides the sibling's log_subdomains_off by calling TSYNC without
|
||||
* LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF.
|
||||
*/
|
||||
ASSERT_EQ(0, landlock_restrict_self(child_data.ruleset_fd,
|
||||
LANDLOCK_RESTRICT_SELF_TSYNC));
|
||||
|
||||
/*
|
||||
* Phase 2: the sibling stacks another domain and triggers a denial.
|
||||
* Because TSYNC replaced its log_subdomains_off with 0, the new domain
|
||||
* has log_status=LANDLOCK_LOG_PENDING.
|
||||
*/
|
||||
ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
|
||||
ASSERT_EQ(1, read(pipe_child[0], &buffer, 1));
|
||||
|
||||
/* The denial must be logged. */
|
||||
EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd,
|
||||
child_data.parent_pid, NULL));
|
||||
|
||||
EXPECT_EQ(0, close(pipe_child[0]));
|
||||
EXPECT_EQ(0, close(pipe_parent[1]));
|
||||
ASSERT_EQ(0, pthread_join(thread, &thread_ret));
|
||||
EXPECT_EQ(NULL, thread_ret);
|
||||
}
|
||||
|
||||
FIXTURE(audit_flags)
|
||||
@@ -412,7 +730,6 @@ TEST_F(audit_flags, signal)
|
||||
} else {
|
||||
EXPECT_EQ(1, records.access);
|
||||
}
|
||||
EXPECT_EQ(0, records.domain);
|
||||
|
||||
/* Updates filter rules to match the drop record. */
|
||||
set_cap(_metadata, CAP_AUDIT_CONTROL);
|
||||
@@ -433,22 +750,21 @@ TEST_F(audit_flags, signal)
|
||||
|
||||
if (variant->restrict_flags &
|
||||
LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF) {
|
||||
/*
|
||||
* No deallocation record: denials=0 never matches a real
|
||||
* record.
|
||||
*/
|
||||
EXPECT_EQ(-EAGAIN,
|
||||
matches_log_domain_deallocated(self->audit_fd, 0,
|
||||
matches_log_domain_deallocated(self->audit_fd, 0, 0,
|
||||
&deallocated_dom));
|
||||
EXPECT_EQ(deallocated_dom, 2);
|
||||
} else {
|
||||
EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
|
||||
&audit_tv_dom_drop,
|
||||
sizeof(audit_tv_dom_drop)));
|
||||
EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 2,
|
||||
*self->domain_id,
|
||||
&deallocated_dom));
|
||||
EXPECT_NE(deallocated_dom, 2);
|
||||
EXPECT_NE(deallocated_dom, 0);
|
||||
EXPECT_EQ(deallocated_dom, *self->domain_id);
|
||||
EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
|
||||
&audit_tv_default,
|
||||
sizeof(audit_tv_default)));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -601,7 +917,6 @@ TEST_F(audit_exec, signal_and_open)
|
||||
/* Tests that there was no denial until now. */
|
||||
EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
|
||||
EXPECT_EQ(0, records.access);
|
||||
EXPECT_EQ(0, records.domain);
|
||||
|
||||
/*
|
||||
* Wait for the child to do a first denied action by layer1 and
|
||||
|
||||
@@ -76,7 +76,7 @@ TEST(abi_version)
|
||||
const struct landlock_ruleset_attr ruleset_attr = {
|
||||
.handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
|
||||
};
|
||||
ASSERT_EQ(8, landlock_create_ruleset(NULL, 0,
|
||||
ASSERT_EQ(9, landlock_create_ruleset(NULL, 0,
|
||||
LANDLOCK_CREATE_RULESET_VERSION));
|
||||
|
||||
ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1356,7 +1356,7 @@ TEST_F(mini, network_access_rights)
|
||||
&net_port, 0))
|
||||
{
|
||||
TH_LOG("Failed to add rule with access 0x%llx: %s",
|
||||
access, strerror(errno));
|
||||
(unsigned long long)access, strerror(errno));
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(0, close(ruleset_fd));
|
||||
|
||||
@@ -342,7 +342,6 @@ TEST_F(audit, trace)
|
||||
/* Makes sure there is no superfluous logged records. */
|
||||
EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
|
||||
EXPECT_EQ(0, records.access);
|
||||
EXPECT_EQ(0, records.domain);
|
||||
|
||||
yama_ptrace_scope = get_yama_ptrace_scope();
|
||||
ASSERT_LE(0, yama_ptrace_scope);
|
||||
|
||||
@@ -312,7 +312,6 @@ TEST_F(scoped_audit, connect_to_child)
|
||||
/* Makes sure there is no superfluous logged records. */
|
||||
EXPECT_EQ(0, audit_count_records(self->audit_fd, &records));
|
||||
EXPECT_EQ(0, records.access);
|
||||
EXPECT_EQ(0, records.domain);
|
||||
|
||||
ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
|
||||
ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
|
||||
|
||||
@@ -247,4 +247,81 @@ TEST(tsync_interrupt)
|
||||
EXPECT_EQ(0, close(ruleset_fd));
|
||||
}
|
||||
|
||||
/* clang-format off */
|
||||
FIXTURE(tsync_without_ruleset) {};
|
||||
/* clang-format on */
|
||||
|
||||
FIXTURE_VARIANT(tsync_without_ruleset)
|
||||
{
|
||||
const __u32 flags;
|
||||
const int expected_errno;
|
||||
};
|
||||
|
||||
/* clang-format off */
|
||||
FIXTURE_VARIANT_ADD(tsync_without_ruleset, tsync_only) {
|
||||
/* clang-format on */
|
||||
.flags = LANDLOCK_RESTRICT_SELF_TSYNC,
|
||||
.expected_errno = EBADF,
|
||||
};
|
||||
|
||||
/* clang-format off */
|
||||
FIXTURE_VARIANT_ADD(tsync_without_ruleset, subdomains_off_same_exec_off) {
|
||||
/* clang-format on */
|
||||
.flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF |
|
||||
LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF |
|
||||
LANDLOCK_RESTRICT_SELF_TSYNC,
|
||||
.expected_errno = EBADF,
|
||||
};
|
||||
|
||||
/* clang-format off */
|
||||
FIXTURE_VARIANT_ADD(tsync_without_ruleset, subdomains_off_new_exec_on) {
|
||||
/* clang-format on */
|
||||
.flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF |
|
||||
LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON |
|
||||
LANDLOCK_RESTRICT_SELF_TSYNC,
|
||||
.expected_errno = EBADF,
|
||||
};
|
||||
|
||||
/* clang-format off */
|
||||
FIXTURE_VARIANT_ADD(tsync_without_ruleset, all_flags) {
|
||||
/* clang-format on */
|
||||
.flags = LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF |
|
||||
LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON |
|
||||
LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF |
|
||||
LANDLOCK_RESTRICT_SELF_TSYNC,
|
||||
.expected_errno = EBADF,
|
||||
};
|
||||
|
||||
/* clang-format off */
|
||||
FIXTURE_VARIANT_ADD(tsync_without_ruleset, subdomains_off) {
|
||||
/* clang-format on */
|
||||
.flags = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF |
|
||||
LANDLOCK_RESTRICT_SELF_TSYNC,
|
||||
.expected_errno = 0,
|
||||
};
|
||||
|
||||
FIXTURE_SETUP(tsync_without_ruleset)
|
||||
{
|
||||
disable_caps(_metadata);
|
||||
}
|
||||
|
||||
FIXTURE_TEARDOWN(tsync_without_ruleset)
|
||||
{
|
||||
}
|
||||
|
||||
TEST_F(tsync_without_ruleset, check)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
|
||||
|
||||
ret = landlock_restrict_self(-1, variant->flags);
|
||||
if (variant->expected_errno) {
|
||||
EXPECT_EQ(-1, ret);
|
||||
EXPECT_EQ(variant->expected_errno, errno);
|
||||
} else {
|
||||
EXPECT_EQ(0, ret);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_HARNESS_MAIN
|
||||
|
||||
Reference in New Issue
Block a user