mirror of
https://github.com/torvalds/linux.git
synced 2026-04-18 06:44:00 -04:00
Merge tag 'hardening-v7.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux
Pull hardening updates from Kees Cook: - randomize_kstack: Improve implementation across arches (Ryan Roberts) - lkdtm/fortify: Drop unneeded FORTIFY_STR_OBJECT test - refcount: Remove unused __signed_wrap function annotations * tag 'hardening-v7.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux: lkdtm/fortify: Drop unneeded FORTIFY_STR_OBJECT test refcount: Remove unused __signed_wrap function annotations randomize_kstack: Unify random source across arches randomize_kstack: Maintain kstack_offset per task
This commit is contained in:
@@ -1518,9 +1518,8 @@ config HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
|
||||
def_bool n
|
||||
help
|
||||
An arch should select this symbol if it can support kernel stack
|
||||
offset randomization with calls to add_random_kstack_offset()
|
||||
during syscall entry and choose_random_kstack_offset() during
|
||||
syscall exit. Careful removal of -fstack-protector-strong and
|
||||
offset randomization with a call to add_random_kstack_offset()
|
||||
during syscall entry. Careful removal of -fstack-protector-strong and
|
||||
-fstack-protector should also be applied to the entry code and
|
||||
closely examined, as the artificial stack bump looks like an array
|
||||
to the compiler, so it will attempt to add canary checks regardless
|
||||
|
||||
@@ -52,17 +52,6 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno,
|
||||
}
|
||||
|
||||
syscall_set_return_value(current, regs, 0, ret);
|
||||
|
||||
/*
|
||||
* This value will get limited by KSTACK_OFFSET_MAX(), which is 10
|
||||
* bits. The actual entropy will be further reduced by the compiler
|
||||
* when applying stack alignment constraints: the AAPCS mandates a
|
||||
* 16-byte aligned SP at function boundaries, which will remove the
|
||||
* 4 low bits from any entropy chosen here.
|
||||
*
|
||||
* The resulting 6 bits of entropy is seen in SP[9:4].
|
||||
*/
|
||||
choose_random_kstack_offset(get_random_u16());
|
||||
}
|
||||
|
||||
static inline bool has_syscall_work(unsigned long flags)
|
||||
|
||||
@@ -79,16 +79,5 @@ void noinstr __no_stack_protector do_syscall(struct pt_regs *regs)
|
||||
regs->regs[7], regs->regs[8], regs->regs[9]);
|
||||
}
|
||||
|
||||
/*
|
||||
* This value will get limited by KSTACK_OFFSET_MAX(), which is 10
|
||||
* bits. The actual entropy will be further reduced by the compiler
|
||||
* when applying stack alignment constraints: 16-bytes (i.e. 4-bits)
|
||||
* aligned, which will remove the 4 low bits from any entropy chosen
|
||||
* here.
|
||||
*
|
||||
* The resulting 6 bits of entropy is seen in SP[9:4].
|
||||
*/
|
||||
choose_random_kstack_offset(get_cycles());
|
||||
|
||||
syscall_exit_to_user_mode(regs);
|
||||
}
|
||||
|
||||
@@ -20,8 +20,6 @@ notrace long system_call_exception(struct pt_regs *regs, unsigned long r0)
|
||||
|
||||
kuap_lock();
|
||||
|
||||
add_random_kstack_offset();
|
||||
|
||||
if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
|
||||
BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
|
||||
|
||||
@@ -30,6 +28,8 @@ notrace long system_call_exception(struct pt_regs *regs, unsigned long r0)
|
||||
CT_WARN_ON(ct_state() == CT_STATE_KERNEL);
|
||||
user_exit_irqoff();
|
||||
|
||||
add_random_kstack_offset();
|
||||
|
||||
BUG_ON(regs_is_unrecoverable(regs));
|
||||
BUG_ON(!user_mode(regs));
|
||||
BUG_ON(arch_irq_disabled_regs(regs));
|
||||
@@ -173,17 +173,5 @@ notrace long system_call_exception(struct pt_regs *regs, unsigned long r0)
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
|
||||
* so the maximum stack offset is 1k bytes (10 bits).
|
||||
*
|
||||
* The actual entropy will be further reduced by the compiler when
|
||||
* applying stack alignment constraints: the powerpc architecture
|
||||
* may have two kinds of stack alignment (16-bytes and 8-bytes).
|
||||
*
|
||||
* So the resulting 6 or 7 bits of entropy is seen in SP[9:4] or SP[9:3].
|
||||
*/
|
||||
choose_random_kstack_offset(mftb());
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -344,18 +344,6 @@ void do_trap_ecall_u(struct pt_regs *regs)
|
||||
syscall_handler(regs, syscall);
|
||||
}
|
||||
|
||||
/*
|
||||
* Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
|
||||
* so the maximum stack offset is 1k bytes (10 bits).
|
||||
*
|
||||
* The actual entropy will be further reduced by the compiler when
|
||||
* applying stack alignment constraints: 16-byte (i.e. 4-bit) aligned
|
||||
* for RV32I or RV64I.
|
||||
*
|
||||
* The resulting 6 bits of entropy is seen in SP[9:4].
|
||||
*/
|
||||
choose_random_kstack_offset(get_random_u16());
|
||||
|
||||
syscall_exit_to_user_mode(regs);
|
||||
} else {
|
||||
irqentry_state_t state = irqentry_nmi_enter(regs);
|
||||
|
||||
@@ -51,14 +51,6 @@ static __always_inline void arch_exit_to_user_mode(void)
|
||||
|
||||
#define arch_exit_to_user_mode arch_exit_to_user_mode
|
||||
|
||||
static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
|
||||
unsigned long ti_work)
|
||||
{
|
||||
choose_random_kstack_offset(get_tod_clock_fast());
|
||||
}
|
||||
|
||||
#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
|
||||
|
||||
static __always_inline bool arch_in_rcu_eqs(void)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_KVM))
|
||||
|
||||
@@ -97,8 +97,8 @@ void noinstr __do_syscall(struct pt_regs *regs, int per_trap)
|
||||
{
|
||||
unsigned long nr;
|
||||
|
||||
add_random_kstack_offset();
|
||||
enter_from_user_mode(regs);
|
||||
add_random_kstack_offset();
|
||||
regs->psw = get_lowcore()->svc_old_psw;
|
||||
regs->int_code = get_lowcore()->svc_int_code;
|
||||
update_timer_sys();
|
||||
|
||||
@@ -247,7 +247,6 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
|
||||
{
|
||||
int nr = syscall_32_enter(regs);
|
||||
|
||||
add_random_kstack_offset();
|
||||
/*
|
||||
* Subtlety here: if ptrace pokes something larger than 2^31-1 into
|
||||
* orig_ax, the int return value truncates it. This matches
|
||||
@@ -256,6 +255,7 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
|
||||
nr = syscall_enter_from_user_mode(regs, nr);
|
||||
instrumentation_begin();
|
||||
|
||||
add_random_kstack_offset();
|
||||
do_syscall_32_irqs_on(regs, nr);
|
||||
|
||||
instrumentation_end();
|
||||
@@ -268,7 +268,6 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
|
||||
int nr = syscall_32_enter(regs);
|
||||
int res;
|
||||
|
||||
add_random_kstack_offset();
|
||||
/*
|
||||
* This cannot use syscall_enter_from_user_mode() as it has to
|
||||
* fetch EBP before invoking any of the syscall entry work
|
||||
@@ -277,6 +276,7 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
|
||||
enter_from_user_mode(regs);
|
||||
|
||||
instrumentation_begin();
|
||||
add_random_kstack_offset();
|
||||
local_irq_enable();
|
||||
/* Fetch EBP from where the vDSO stashed it. */
|
||||
if (IS_ENABLED(CONFIG_X86_64)) {
|
||||
|
||||
@@ -86,10 +86,10 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
|
||||
/* Returns true to return using SYSRET, or false to use IRET */
|
||||
__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
|
||||
{
|
||||
add_random_kstack_offset();
|
||||
nr = syscall_enter_from_user_mode(regs, nr);
|
||||
|
||||
instrumentation_begin();
|
||||
add_random_kstack_offset();
|
||||
|
||||
if (!do_syscall_x64(regs, nr) && !do_syscall_x32(regs, nr) && nr != -1) {
|
||||
/* Invalid system call, but still a system call. */
|
||||
|
||||
@@ -82,18 +82,6 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
|
||||
current_thread_info()->status &= ~(TS_COMPAT | TS_I386_REGS_POKED);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This value will get limited by KSTACK_OFFSET_MAX(), which is 10
|
||||
* bits. The actual entropy will be further reduced by the compiler
|
||||
* when applying stack alignment constraints (see cc_stack_align4/8 in
|
||||
* arch/x86/Makefile), which will remove the 3 (x86_64) or 2 (ia32)
|
||||
* low bits from any entropy chosen here.
|
||||
*
|
||||
* Therefore, final stack offset entropy will be 7 (x86_64) or
|
||||
* 8 (ia32) bits.
|
||||
*/
|
||||
choose_random_kstack_offset(rdtsc());
|
||||
|
||||
/* Avoid unnecessary reads of 'x86_ibpb_exit_to_user' */
|
||||
if (cpu_feature_enabled(X86_FEATURE_IBPB_EXIT_TO_USER) &&
|
||||
this_cpu_read(x86_ibpb_exit_to_user)) {
|
||||
|
||||
@@ -10,30 +10,6 @@
|
||||
|
||||
static volatile int fortify_scratch_space;
|
||||
|
||||
static void lkdtm_FORTIFY_STR_OBJECT(void)
|
||||
{
|
||||
struct target {
|
||||
char a[10];
|
||||
int foo;
|
||||
} target[3] = {};
|
||||
/*
|
||||
* Using volatile prevents the compiler from determining the value of
|
||||
* 'size' at compile time. Without that, we would get a compile error
|
||||
* rather than a runtime error.
|
||||
*/
|
||||
volatile int size = 20;
|
||||
|
||||
pr_info("trying to strcmp() past the end of a struct\n");
|
||||
|
||||
strncpy(target[0].a, target[1].a, size);
|
||||
|
||||
/* Store result to global to prevent the code from being eliminated */
|
||||
fortify_scratch_space = target[0].a[3];
|
||||
|
||||
pr_err("FAIL: fortify did not block a strncpy() object write overflow!\n");
|
||||
pr_expected_config(CONFIG_FORTIFY_SOURCE);
|
||||
}
|
||||
|
||||
static void lkdtm_FORTIFY_STR_MEMBER(void)
|
||||
{
|
||||
struct target {
|
||||
@@ -47,22 +23,23 @@ static void lkdtm_FORTIFY_STR_MEMBER(void)
|
||||
if (!src)
|
||||
return;
|
||||
|
||||
/* 15 bytes: past end of a[] but not target. */
|
||||
strscpy(src, "over ten bytes", size);
|
||||
size = strlen(src) + 1;
|
||||
|
||||
pr_info("trying to strncpy() past the end of a struct member...\n");
|
||||
pr_info("trying to strscpy() past the end of a struct member...\n");
|
||||
|
||||
/*
|
||||
* strncpy(target.a, src, 20); will hit a compile error because the
|
||||
* compiler knows at build time that target.a < 20 bytes. Use a
|
||||
* strscpy(target.a, src, 15); will hit a compile error because the
|
||||
* compiler knows at build time that target.a < 15 bytes. Use a
|
||||
* volatile to force a runtime error.
|
||||
*/
|
||||
strncpy(target.a, src, size);
|
||||
strscpy(target.a, src, size);
|
||||
|
||||
/* Store result to global to prevent the code from being eliminated */
|
||||
fortify_scratch_space = target.a[3];
|
||||
|
||||
pr_err("FAIL: fortify did not block a strncpy() struct member write overflow!\n");
|
||||
pr_err("FAIL: fortify did not block a strscpy() struct member write overflow!\n");
|
||||
pr_expected_config(CONFIG_FORTIFY_SOURCE);
|
||||
|
||||
kfree(src);
|
||||
@@ -210,7 +187,6 @@ static void lkdtm_FORTIFY_STRSCPY(void)
|
||||
}
|
||||
|
||||
static struct crashtype crashtypes[] = {
|
||||
CRASHTYPE(FORTIFY_STR_OBJECT),
|
||||
CRASHTYPE(FORTIFY_STR_MEMBER),
|
||||
CRASHTYPE(FORTIFY_MEM_OBJECT),
|
||||
CRASHTYPE(FORTIFY_MEM_MEMBER),
|
||||
|
||||
@@ -432,18 +432,11 @@ struct ftrace_likely_data {
|
||||
#define at_least
|
||||
#endif
|
||||
|
||||
/* Do not trap wrapping arithmetic within an annotated function. */
|
||||
#ifdef CONFIG_UBSAN_INTEGER_WRAP
|
||||
# define __signed_wrap __attribute__((no_sanitize("signed-integer-overflow")))
|
||||
#else
|
||||
# define __signed_wrap
|
||||
#endif
|
||||
|
||||
/* Section for code which can't be instrumented at all */
|
||||
#define __noinstr_section(section) \
|
||||
noinline notrace __attribute((__section__(section))) \
|
||||
__no_kcsan __no_sanitize_address __no_profile __no_sanitize_coverage \
|
||||
__no_sanitize_memory __signed_wrap
|
||||
__no_sanitize_memory
|
||||
|
||||
#define noinstr __noinstr_section(".noinstr.text")
|
||||
|
||||
|
||||
@@ -6,10 +6,10 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/jump_label.h>
|
||||
#include <linux/percpu-defs.h>
|
||||
#include <linux/prandom.h>
|
||||
|
||||
DECLARE_STATIC_KEY_MAYBE(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
|
||||
randomize_kstack_offset);
|
||||
DECLARE_PER_CPU(u32, kstack_offset);
|
||||
|
||||
/*
|
||||
* Do not use this anywhere else in the kernel. This is used here because
|
||||
@@ -46,53 +46,39 @@ DECLARE_PER_CPU(u32, kstack_offset);
|
||||
#define KSTACK_OFFSET_MAX(x) ((x) & 0b1111111100)
|
||||
#endif
|
||||
|
||||
DECLARE_PER_CPU(struct rnd_state, kstack_rnd_state);
|
||||
|
||||
static __always_inline u32 get_kstack_offset(void)
|
||||
{
|
||||
struct rnd_state *state;
|
||||
u32 rnd;
|
||||
|
||||
state = &get_cpu_var(kstack_rnd_state);
|
||||
rnd = prandom_u32_state(state);
|
||||
put_cpu_var(kstack_rnd_state);
|
||||
|
||||
return rnd;
|
||||
}
|
||||
|
||||
/**
|
||||
* add_random_kstack_offset - Increase stack utilization by previously
|
||||
* chosen random offset
|
||||
* add_random_kstack_offset - Increase stack utilization by a random offset.
|
||||
*
|
||||
* This should be used in the syscall entry path when interrupts and
|
||||
* preempt are disabled, and after user registers have been stored to
|
||||
* the stack. For testing the resulting entropy, please see:
|
||||
* tools/testing/selftests/lkdtm/stack-entropy.sh
|
||||
* This should be used in the syscall entry path after user registers have been
|
||||
* stored to the stack. Preemption may be enabled. For testing the resulting
|
||||
* entropy, please see: tools/testing/selftests/lkdtm/stack-entropy.sh
|
||||
*/
|
||||
#define add_random_kstack_offset() do { \
|
||||
if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \
|
||||
&randomize_kstack_offset)) { \
|
||||
u32 offset = raw_cpu_read(kstack_offset); \
|
||||
u32 offset = get_kstack_offset(); \
|
||||
u8 *ptr = __kstack_alloca(KSTACK_OFFSET_MAX(offset)); \
|
||||
/* Keep allocation even after "ptr" loses scope. */ \
|
||||
asm volatile("" :: "r"(ptr) : "memory"); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
* choose_random_kstack_offset - Choose the random offset for the next
|
||||
* add_random_kstack_offset()
|
||||
*
|
||||
* This should only be used during syscall exit when interrupts and
|
||||
* preempt are disabled. This position in the syscall flow is done to
|
||||
* frustrate attacks from userspace attempting to learn the next offset:
|
||||
* - Maximize the timing uncertainty visible from userspace: if the
|
||||
* offset is chosen at syscall entry, userspace has much more control
|
||||
* over the timing between choosing offsets. "How long will we be in
|
||||
* kernel mode?" tends to be more difficult to predict than "how long
|
||||
* will we be in user mode?"
|
||||
* - Reduce the lifetime of the new offset sitting in memory during
|
||||
* kernel mode execution. Exposure of "thread-local" memory content
|
||||
* (e.g. current, percpu, etc) tends to be easier than arbitrary
|
||||
* location memory exposure.
|
||||
*/
|
||||
#define choose_random_kstack_offset(rand) do { \
|
||||
if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \
|
||||
&randomize_kstack_offset)) { \
|
||||
u32 offset = raw_cpu_read(kstack_offset); \
|
||||
offset = ror32(offset, 5) ^ (rand); \
|
||||
raw_cpu_write(kstack_offset, offset); \
|
||||
} \
|
||||
} while (0)
|
||||
#else /* CONFIG_RANDOMIZE_KSTACK_OFFSET */
|
||||
#define add_random_kstack_offset() do { } while (0)
|
||||
#define choose_random_kstack_offset(rand) do { } while (0)
|
||||
#endif /* CONFIG_RANDOMIZE_KSTACK_OFFSET */
|
||||
|
||||
#endif
|
||||
|
||||
@@ -170,7 +170,7 @@ static inline unsigned int refcount_read(const refcount_t *r)
|
||||
return atomic_read(&r->refs);
|
||||
}
|
||||
|
||||
static inline __must_check __signed_wrap
|
||||
static inline __must_check
|
||||
bool __refcount_add_not_zero(int i, refcount_t *r, int *oldp)
|
||||
{
|
||||
int old = refcount_read(r);
|
||||
@@ -212,7 +212,7 @@ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r)
|
||||
return __refcount_add_not_zero(i, r, NULL);
|
||||
}
|
||||
|
||||
static inline __must_check __signed_wrap
|
||||
static inline __must_check
|
||||
bool __refcount_add_not_zero_limited_acquire(int i, refcount_t *r, int *oldp,
|
||||
int limit)
|
||||
{
|
||||
@@ -244,7 +244,7 @@ __refcount_inc_not_zero_limited_acquire(refcount_t *r, int *oldp, int limit)
|
||||
return __refcount_add_not_zero_limited_acquire(1, r, oldp, limit);
|
||||
}
|
||||
|
||||
static inline __must_check __signed_wrap
|
||||
static inline __must_check
|
||||
bool __refcount_add_not_zero_acquire(int i, refcount_t *r, int *oldp)
|
||||
{
|
||||
return __refcount_add_not_zero_limited_acquire(i, r, oldp, INT_MAX);
|
||||
@@ -277,7 +277,7 @@ static inline __must_check bool refcount_add_not_zero_acquire(int i, refcount_t
|
||||
return __refcount_add_not_zero_acquire(i, r, NULL);
|
||||
}
|
||||
|
||||
static inline __signed_wrap
|
||||
static inline
|
||||
void __refcount_add(int i, refcount_t *r, int *oldp)
|
||||
{
|
||||
int old = atomic_fetch_add_relaxed(i, &r->refs);
|
||||
@@ -383,7 +383,7 @@ static inline void refcount_inc(refcount_t *r)
|
||||
__refcount_inc(r, NULL);
|
||||
}
|
||||
|
||||
static inline __must_check __signed_wrap
|
||||
static inline __must_check
|
||||
bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp)
|
||||
{
|
||||
int old = atomic_fetch_sub_release(i, &r->refs);
|
||||
|
||||
@@ -833,7 +833,14 @@ static inline void initcall_debug_enable(void)
|
||||
#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
|
||||
DEFINE_STATIC_KEY_MAYBE_RO(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
|
||||
randomize_kstack_offset);
|
||||
DEFINE_PER_CPU(u32, kstack_offset);
|
||||
DEFINE_PER_CPU(struct rnd_state, kstack_rnd_state);
|
||||
|
||||
static int __init random_kstack_init(void)
|
||||
{
|
||||
prandom_seed_full_state(&kstack_rnd_state);
|
||||
return 0;
|
||||
}
|
||||
late_initcall(random_kstack_init);
|
||||
|
||||
static int __init early_randomize_kstack_offset(char *buf)
|
||||
{
|
||||
|
||||
@@ -96,6 +96,7 @@
|
||||
#include <linux/thread_info.h>
|
||||
#include <linux/kstack_erase.h>
|
||||
#include <linux/kasan.h>
|
||||
#include <linux/randomize_kstack.h>
|
||||
#include <linux/scs.h>
|
||||
#include <linux/io_uring.h>
|
||||
#include <linux/io_uring_types.h>
|
||||
|
||||
@@ -82,7 +82,6 @@ STACKLEAK_ERASING OK: the rest of the thread stack is properly erased
|
||||
CFI_FORWARD_PROTO
|
||||
CFI_BACKWARD call trace:|ok: control flow unchanged
|
||||
FORTIFY_STRSCPY detected buffer overflow
|
||||
FORTIFY_STR_OBJECT detected buffer overflow
|
||||
FORTIFY_STR_MEMBER detected buffer overflow
|
||||
FORTIFY_MEM_OBJECT detected buffer overflow
|
||||
FORTIFY_MEM_MEMBER detected field-spanning write
|
||||
|
||||
Reference in New Issue
Block a user