mirror of
https://github.com/torvalds/linux.git
synced 2026-04-18 06:44:00 -04:00
Pull MM updates from Andrew Morton: - "maple_tree: Replace big node with maple copy" (Liam Howlett) Mainly prepararatory work for ongoing development but it does reduce stack usage and is an improvement. - "mm, swap: swap table phase III: remove swap_map" (Kairui Song) Offers memory savings by removing the static swap_map. It also yields some CPU savings and implements several cleanups. - "mm: memfd_luo: preserve file seals" (Pratyush Yadav) File seal preservation to LUO's memfd code - "mm: zswap: add per-memcg stat for incompressible pages" (Jiayuan Chen) Additional userspace stats reportng to zswap - "arch, mm: consolidate empty_zero_page" (Mike Rapoport) Some cleanups for our handling of ZERO_PAGE() and zero_pfn - "mm/kmemleak: Improve scan_should_stop() implementation" (Zhongqiu Han) A robustness improvement and some cleanups in the kmemleak code - "Improve khugepaged scan logic" (Vernon Yang) Improve khugepaged scan logic and reduce CPU consumption by prioritizing scanning tasks that access memory frequently - "Make KHO Stateless" (Jason Miu) Simplify Kexec Handover by transitioning KHO from an xarray-based metadata tracking system with serialization to a radix tree data structure that can be passed directly to the next kernel - "mm: vmscan: add PID and cgroup ID to vmscan tracepoints" (Thomas Ballasi and Steven Rostedt) Enhance vmscan's tracepointing - "mm: arch/shstk: Common shadow stack mapping helper and VM_NOHUGEPAGE" (Catalin Marinas) Cleanup for the shadow stack code: remove per-arch code in favour of a generic implementation - "Fix KASAN support for KHO restored vmalloc regions" (Pasha Tatashin) Fix a WARN() which can be emitted the KHO restores a vmalloc area - "mm: Remove stray references to pagevec" (Tal Zussman) Several cleanups, mainly udpating references to "struct pagevec", which became folio_batch three years ago - "mm: Eliminate fake head pages from vmemmap optimization" (Kiryl Shutsemau) Simplify the HugeTLB vmemmap optimization (HVO) by changing how tail pages encode their relationship to the head page - "mm/damon/core: improve DAMOS quota efficiency for core layer filters" (SeongJae Park) Improve two problematic behaviors of DAMOS that makes it less efficient when core layer filters are used - "mm/damon: strictly respect min_nr_regions" (SeongJae Park) Improve DAMON usability by extending the treatment of the min_nr_regions user-settable parameter - "mm/page_alloc: pcp locking cleanup" (Vlastimil Babka) The proper fix for a previously hotfixed SMP=n issue. Code simplifications and cleanups ensued - "mm: cleanups around unmapping / zapping" (David Hildenbrand) A bunch of cleanups around unmapping and zapping. Mostly simplifications, code movements, documentation and renaming of zapping functions - "support batched checking of the young flag for MGLRU" (Baolin Wang) Batched checking of the young flag for MGLRU. It's part cleanups; one benchmark shows large performance benefits for arm64 - "memcg: obj stock and slab stat caching cleanups" (Johannes Weiner) memcg cleanup and robustness improvements - "Allow order zero pages in page reporting" (Yuvraj Sakshith) Enhance free page reporting - it is presently and undesirably order-0 pages when reporting free memory. - "mm: vma flag tweaks" (Lorenzo Stoakes) Cleanup work following from the recent conversion of the VMA flags to a bitmap - "mm/damon: add optional debugging-purpose sanity checks" (SeongJae Park) Add some more developer-facing debug checks into DAMON core - "mm/damon: test and document power-of-2 min_region_sz requirement" (SeongJae Park) An additional DAMON kunit test and makes some adjustments to the addr_unit parameter handling - "mm/damon/core: make passed_sample_intervals comparisons overflow-safe" (SeongJae Park) Fix a hard-to-hit time overflow issue in DAMON core - "mm/damon: improve/fixup/update ratio calculation, test and documentation" (SeongJae Park) A batch of misc/minor improvements and fixups for DAMON - "mm: move vma_(kernel|mmu)_pagesize() out of hugetlb.c" (David Hildenbrand) Fix a possible issue with dax-device when CONFIG_HUGETLB=n. Some code movement was required. - "zram: recompression cleanups and tweaks" (Sergey Senozhatsky) A somewhat random mix of fixups, recompression cleanups and improvements in the zram code - "mm/damon: support multiple goal-based quota tuning algorithms" (SeongJae Park) Extend DAMOS quotas goal auto-tuning to support multiple tuning algorithms that users can select - "mm: thp: reduce unnecessary start_stop_khugepaged()" (Breno Leitao) Fix the khugpaged sysfs handling so we no longer spam the logs with reams of junk when starting/stopping khugepaged - "mm: improve map count checks" (Lorenzo Stoakes) Provide some cleanups and slight fixes in the mremap, mmap and vma code - "mm/damon: support addr_unit on default monitoring targets for modules" (SeongJae Park) Extend the use of DAMON core's addr_unit tunable - "mm: khugepaged cleanups and mTHP prerequisites" (Nico Pache) Cleanups to khugepaged and is a base for Nico's planned khugepaged mTHP support - "mm: memory hot(un)plug and SPARSEMEM cleanups" (David Hildenbrand) Code movement and cleanups in the memhotplug and sparsemem code - "mm: remove CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE and cleanup CONFIG_MIGRATION" (David Hildenbrand) Rationalize some memhotplug Kconfig support - "change young flag check functions to return bool" (Baolin Wang) Cleanups to change all young flag check functions to return bool - "mm/damon/sysfs: fix memory leak and NULL dereference issues" (Josh Law and SeongJae Park) Fix a few potential DAMON bugs - "mm/vma: convert vm_flags_t to vma_flags_t in vma code" (Lorenzo Stoakes) Convert a lot of the existing use of the legacy vm_flags_t data type to the new vma_flags_t type which replaces it. Mainly in the vma code. - "mm: expand mmap_prepare functionality and usage" (Lorenzo Stoakes) Expand the mmap_prepare functionality, which is intended to replace the deprecated f_op->mmap hook which has been the source of bugs and security issues for some time. Cleanups, documentation, extension of mmap_prepare into filesystem drivers - "mm/huge_memory: refactor zap_huge_pmd()" (Lorenzo Stoakes) Simplify and clean up zap_huge_pmd(). Additional cleanups around vm_normal_folio_pmd() and the softleaf functionality are performed. * tag 'mm-stable-2026-04-13-21-45' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (369 commits) mm: fix deferred split queue races during migration mm/khugepaged: fix issue with tracking lock mm/huge_memory: add and use has_deposited_pgtable() mm/huge_memory: add and use normal_or_softleaf_folio_pmd() mm: add softleaf_is_valid_pmd_entry(), pmd_to_softleaf_folio() mm/huge_memory: separate out the folio part of zap_huge_pmd() mm/huge_memory: use mm instead of tlb->mm mm/huge_memory: remove unnecessary sanity checks mm/huge_memory: deduplicate zap deposited table call mm/huge_memory: remove unnecessary VM_BUG_ON_PAGE() mm/huge_memory: add a common exit path to zap_huge_pmd() mm/huge_memory: handle buggy PMD entry in zap_huge_pmd() mm/huge_memory: have zap_huge_pmd return a boolean, add kdoc mm/huge: avoid big else branch in zap_huge_pmd() mm/huge_memory: simplify vma_is_specal_huge() mm: on remap assert that input range within the proposed VMA mm: add mmap_action_map_kernel_pages[_full]() uio: replace deprecated mmap hook with mmap_prepare in uio_info drivers: hv: vmbus: replace deprecated mmap hook with mmap_prepare mm: allow handling of stacked mmap_prepare hooks in more drivers ...
532 lines
14 KiB
C
532 lines
14 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright (C) 2024 Rivos, Inc.
|
|
* Deepak Gupta <debug@rivosinc.com>
|
|
*/
|
|
|
|
#include <linux/sched.h>
|
|
#include <linux/bitops.h>
|
|
#include <linux/types.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/mman.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/sizes.h>
|
|
#include <linux/user.h>
|
|
#include <linux/syscalls.h>
|
|
#include <linux/prctl.h>
|
|
#include <asm/csr.h>
|
|
#include <asm/usercfi.h>
|
|
|
|
unsigned long riscv_nousercfi __read_mostly;
|
|
|
|
#define SHSTK_ENTRY_SIZE sizeof(void *)
|
|
|
|
bool is_shstk_enabled(struct task_struct *task)
|
|
{
|
|
return task->thread_info.user_cfi_state.ubcfi_en;
|
|
}
|
|
|
|
bool is_shstk_allocated(struct task_struct *task)
|
|
{
|
|
return task->thread_info.user_cfi_state.shdw_stk_base;
|
|
}
|
|
|
|
bool is_shstk_locked(struct task_struct *task)
|
|
{
|
|
return task->thread_info.user_cfi_state.ubcfi_locked;
|
|
}
|
|
|
|
void set_shstk_base(struct task_struct *task, unsigned long shstk_addr, unsigned long size)
|
|
{
|
|
task->thread_info.user_cfi_state.shdw_stk_base = shstk_addr;
|
|
task->thread_info.user_cfi_state.shdw_stk_size = size;
|
|
}
|
|
|
|
unsigned long get_shstk_base(struct task_struct *task, unsigned long *size)
|
|
{
|
|
if (size)
|
|
*size = task->thread_info.user_cfi_state.shdw_stk_size;
|
|
return task->thread_info.user_cfi_state.shdw_stk_base;
|
|
}
|
|
|
|
void set_active_shstk(struct task_struct *task, unsigned long shstk_addr)
|
|
{
|
|
task->thread_info.user_cfi_state.user_shdw_stk = shstk_addr;
|
|
}
|
|
|
|
unsigned long get_active_shstk(struct task_struct *task)
|
|
{
|
|
return task->thread_info.user_cfi_state.user_shdw_stk;
|
|
}
|
|
|
|
void set_shstk_status(struct task_struct *task, bool enable)
|
|
{
|
|
if (!is_user_shstk_enabled())
|
|
return;
|
|
|
|
task->thread_info.user_cfi_state.ubcfi_en = enable ? 1 : 0;
|
|
|
|
if (enable)
|
|
task->thread.envcfg |= ENVCFG_SSE;
|
|
else
|
|
task->thread.envcfg &= ~ENVCFG_SSE;
|
|
|
|
csr_write(CSR_ENVCFG, task->thread.envcfg);
|
|
}
|
|
|
|
void set_shstk_lock(struct task_struct *task, bool lock)
|
|
{
|
|
task->thread_info.user_cfi_state.ubcfi_locked = lock;
|
|
}
|
|
|
|
bool is_indir_lp_enabled(struct task_struct *task)
|
|
{
|
|
return task->thread_info.user_cfi_state.ufcfi_en;
|
|
}
|
|
|
|
bool is_indir_lp_locked(struct task_struct *task)
|
|
{
|
|
return task->thread_info.user_cfi_state.ufcfi_locked;
|
|
}
|
|
|
|
void set_indir_lp_status(struct task_struct *task, bool enable)
|
|
{
|
|
if (!is_user_lpad_enabled())
|
|
return;
|
|
|
|
task->thread_info.user_cfi_state.ufcfi_en = enable ? 1 : 0;
|
|
|
|
if (enable)
|
|
task->thread.envcfg |= ENVCFG_LPE;
|
|
else
|
|
task->thread.envcfg &= ~ENVCFG_LPE;
|
|
|
|
csr_write(CSR_ENVCFG, task->thread.envcfg);
|
|
}
|
|
|
|
void set_indir_lp_lock(struct task_struct *task, bool lock)
|
|
{
|
|
task->thread_info.user_cfi_state.ufcfi_locked = lock;
|
|
}
|
|
/*
|
|
* If size is 0, then to be compatible with regular stack we want it to be as big as
|
|
* regular stack. Else PAGE_ALIGN it and return back
|
|
*/
|
|
static unsigned long calc_shstk_size(unsigned long size)
|
|
{
|
|
if (size)
|
|
return PAGE_ALIGN(size);
|
|
|
|
return PAGE_ALIGN(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G));
|
|
}
|
|
|
|
/*
|
|
* Writes on shadow stack can either be `sspush` or `ssamoswap`. `sspush` can happen
|
|
* implicitly on current shadow stack pointed to by CSR_SSP. `ssamoswap` takes pointer to
|
|
* shadow stack. To keep it simple, we plan to use `ssamoswap` to perform writes on shadow
|
|
* stack.
|
|
*/
|
|
static noinline unsigned long amo_user_shstk(unsigned long __user *addr, unsigned long val)
|
|
{
|
|
/*
|
|
* Never expect -1 on shadow stack. Expect return addresses and zero
|
|
*/
|
|
unsigned long swap = -1;
|
|
|
|
__enable_user_access();
|
|
asm goto(".option push\n"
|
|
".option arch, +zicfiss\n"
|
|
"1: ssamoswap.d %[swap], %[val], %[addr]\n"
|
|
_ASM_EXTABLE(1b, %l[fault])
|
|
".option pop\n"
|
|
: [swap] "=r" (swap), [addr] "+A" (*(__force unsigned long *)addr)
|
|
: [val] "r" (val)
|
|
: "memory"
|
|
: fault
|
|
);
|
|
__disable_user_access();
|
|
return swap;
|
|
fault:
|
|
__disable_user_access();
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
* Create a restore token on the shadow stack. A token is always XLEN wide
|
|
* and aligned to XLEN.
|
|
*/
|
|
static int create_rstor_token(unsigned long ssp, unsigned long *token_addr)
|
|
{
|
|
unsigned long addr;
|
|
|
|
/* Token must be aligned */
|
|
if (!IS_ALIGNED(ssp, SHSTK_ENTRY_SIZE))
|
|
return -EINVAL;
|
|
|
|
/* On RISC-V we're constructing token to be function of address itself */
|
|
addr = ssp - SHSTK_ENTRY_SIZE;
|
|
|
|
if (amo_user_shstk((unsigned long __user *)addr, (unsigned long)ssp) == -1)
|
|
return -EFAULT;
|
|
|
|
if (token_addr)
|
|
*token_addr = addr;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Save user shadow stack pointer on the shadow stack itself and return a pointer to saved location.
|
|
* Returns -EFAULT if unsuccessful.
|
|
*/
|
|
int save_user_shstk(struct task_struct *tsk, unsigned long *saved_shstk_ptr)
|
|
{
|
|
unsigned long ss_ptr = 0;
|
|
unsigned long token_loc = 0;
|
|
int ret = 0;
|
|
|
|
if (!saved_shstk_ptr)
|
|
return -EINVAL;
|
|
|
|
ss_ptr = get_active_shstk(tsk);
|
|
ret = create_rstor_token(ss_ptr, &token_loc);
|
|
|
|
if (!ret) {
|
|
*saved_shstk_ptr = token_loc;
|
|
set_active_shstk(tsk, token_loc);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Restores the user shadow stack pointer from the token on the shadow stack for task 'tsk'.
|
|
* Returns -EFAULT if unsuccessful.
|
|
*/
|
|
int restore_user_shstk(struct task_struct *tsk, unsigned long shstk_ptr)
|
|
{
|
|
unsigned long token = 0;
|
|
|
|
token = amo_user_shstk((unsigned long __user *)shstk_ptr, 0);
|
|
|
|
if (token == -1)
|
|
return -EFAULT;
|
|
|
|
/* invalid token, return EINVAL */
|
|
if ((token - shstk_ptr) != SHSTK_ENTRY_SIZE) {
|
|
pr_info_ratelimited("%s[%d]: bad restore token in %s: pc=%p sp=%p, token=%p, shstk_ptr=%p\n",
|
|
tsk->comm, task_pid_nr(tsk), __func__,
|
|
(void *)(task_pt_regs(tsk)->epc),
|
|
(void *)(task_pt_regs(tsk)->sp),
|
|
(void *)token, (void *)shstk_ptr);
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* all checks passed, set active shstk and return success */
|
|
set_active_shstk(tsk, token);
|
|
return 0;
|
|
}
|
|
|
|
static unsigned long allocate_shadow_stack(unsigned long addr, unsigned long size,
|
|
unsigned long token_offset, bool set_tok)
|
|
{
|
|
addr = vm_mmap_shadow_stack(addr, size, 0);
|
|
|
|
if (!set_tok || IS_ERR_VALUE(addr))
|
|
goto out;
|
|
|
|
if (create_rstor_token(addr + token_offset, NULL)) {
|
|
vm_munmap(addr, size);
|
|
return -EINVAL;
|
|
}
|
|
|
|
out:
|
|
return addr;
|
|
}
|
|
|
|
SYSCALL_DEFINE3(map_shadow_stack, unsigned long, addr, unsigned long, size, unsigned int, flags)
|
|
{
|
|
bool set_tok = flags & SHADOW_STACK_SET_TOKEN;
|
|
unsigned long aligned_size = 0;
|
|
|
|
if (!is_user_shstk_enabled())
|
|
return -EOPNOTSUPP;
|
|
|
|
/* Anything other than set token should result in invalid param */
|
|
if (flags & ~SHADOW_STACK_SET_TOKEN)
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* Unlike other architectures, on RISC-V, SSP pointer is held in CSR_SSP and is an available
|
|
* CSR in all modes. CSR accesses are performed using 12bit index programmed in instruction
|
|
* itself. This provides static property on register programming and writes to CSR can't
|
|
* be unintentional from programmer's perspective. As long as programmer has guarded areas
|
|
* which perform writes to CSR_SSP properly, shadow stack pivoting is not possible. Since
|
|
* CSR_SSP is writable by user mode, it itself can setup a shadow stack token subsequent
|
|
* to allocation. Although in order to provide portablity with other architectures (because
|
|
* `map_shadow_stack` is arch agnostic syscall), RISC-V will follow expectation of a token
|
|
* flag in flags and if provided in flags, will setup a token at the base.
|
|
*/
|
|
|
|
/* If there isn't space for a token */
|
|
if (set_tok && size < SHSTK_ENTRY_SIZE)
|
|
return -ENOSPC;
|
|
|
|
if (addr && (addr & (PAGE_SIZE - 1)))
|
|
return -EINVAL;
|
|
|
|
aligned_size = PAGE_ALIGN(size);
|
|
if (aligned_size < size)
|
|
return -EOVERFLOW;
|
|
|
|
return allocate_shadow_stack(addr, aligned_size, size, set_tok);
|
|
}
|
|
|
|
/*
|
|
* This gets called during clone/clone3/fork. And is needed to allocate a shadow stack for
|
|
* cases where CLONE_VM is specified and thus a different stack is specified by user. We
|
|
* thus need a separate shadow stack too. How a separate shadow stack is specified by
|
|
* user is still being debated. Once that's settled, remove this part of the comment.
|
|
* This function simply returns 0 if shadow stacks are not supported or if separate shadow
|
|
* stack allocation is not needed (like in case of !CLONE_VM)
|
|
*/
|
|
unsigned long shstk_alloc_thread_stack(struct task_struct *tsk,
|
|
const struct kernel_clone_args *args)
|
|
{
|
|
unsigned long addr, size;
|
|
|
|
/* If shadow stack is not supported, return 0 */
|
|
if (!is_user_shstk_enabled())
|
|
return 0;
|
|
|
|
/*
|
|
* If shadow stack is not enabled on the new thread, skip any
|
|
* switch to a new shadow stack.
|
|
*/
|
|
if (!is_shstk_enabled(tsk))
|
|
return 0;
|
|
|
|
/*
|
|
* For CLONE_VFORK the child will share the parents shadow stack.
|
|
* Set base = 0 and size = 0, this is special means to track this state
|
|
* so the freeing logic run for child knows to leave it alone.
|
|
*/
|
|
if (args->flags & CLONE_VFORK) {
|
|
set_shstk_base(tsk, 0, 0);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* For !CLONE_VM the child will use a copy of the parents shadow
|
|
* stack.
|
|
*/
|
|
if (!(args->flags & CLONE_VM))
|
|
return 0;
|
|
|
|
/*
|
|
* reaching here means, CLONE_VM was specified and thus a separate shadow
|
|
* stack is needed for new cloned thread. Note: below allocation is happening
|
|
* using current mm.
|
|
*/
|
|
size = calc_shstk_size(args->stack_size);
|
|
addr = allocate_shadow_stack(0, size, 0, false);
|
|
if (IS_ERR_VALUE(addr))
|
|
return addr;
|
|
|
|
set_shstk_base(tsk, addr, size);
|
|
|
|
return addr + size;
|
|
}
|
|
|
|
void shstk_release(struct task_struct *tsk)
|
|
{
|
|
unsigned long base = 0, size = 0;
|
|
/* If shadow stack is not supported or not enabled, nothing to release */
|
|
if (!is_user_shstk_enabled() || !is_shstk_enabled(tsk))
|
|
return;
|
|
|
|
/*
|
|
* When fork() with CLONE_VM fails, the child (tsk) already has a
|
|
* shadow stack allocated, and exit_thread() calls this function to
|
|
* free it. In this case the parent (current) and the child share
|
|
* the same mm struct. Move forward only when they're same.
|
|
*/
|
|
if (!tsk->mm || tsk->mm != current->mm)
|
|
return;
|
|
|
|
/*
|
|
* We know shadow stack is enabled but if base is NULL, then
|
|
* this task is not managing its own shadow stack (CLONE_VFORK). So
|
|
* skip freeing it.
|
|
*/
|
|
base = get_shstk_base(tsk, &size);
|
|
if (!base)
|
|
return;
|
|
|
|
vm_munmap(base, size);
|
|
set_shstk_base(tsk, 0, 0);
|
|
}
|
|
|
|
int arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *status)
|
|
{
|
|
unsigned long bcfi_status = 0;
|
|
|
|
if (!is_user_shstk_enabled())
|
|
return -EINVAL;
|
|
|
|
/* this means shadow stack is enabled on the task */
|
|
bcfi_status |= (is_shstk_enabled(t) ? PR_SHADOW_STACK_ENABLE : 0);
|
|
|
|
return copy_to_user(status, &bcfi_status, sizeof(bcfi_status)) ? -EFAULT : 0;
|
|
}
|
|
|
|
int arch_set_shadow_stack_status(struct task_struct *t, unsigned long status)
|
|
{
|
|
unsigned long size = 0, addr = 0;
|
|
bool enable_shstk = false;
|
|
|
|
if (!is_user_shstk_enabled())
|
|
return -EINVAL;
|
|
|
|
/* Reject unknown flags */
|
|
if (status & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK)
|
|
return -EINVAL;
|
|
|
|
/* bcfi status is locked and further can't be modified by user */
|
|
if (is_shstk_locked(t))
|
|
return -EINVAL;
|
|
|
|
enable_shstk = status & PR_SHADOW_STACK_ENABLE;
|
|
/* Request is to enable shadow stack and shadow stack is not enabled already */
|
|
if (enable_shstk && !is_shstk_enabled(t)) {
|
|
/* shadow stack was allocated and enable request again
|
|
* no need to support such usecase and return EINVAL.
|
|
*/
|
|
if (is_shstk_allocated(t))
|
|
return -EINVAL;
|
|
|
|
size = calc_shstk_size(0);
|
|
addr = allocate_shadow_stack(0, size, 0, false);
|
|
if (IS_ERR_VALUE(addr))
|
|
return -ENOMEM;
|
|
set_shstk_base(t, addr, size);
|
|
set_active_shstk(t, addr + size);
|
|
}
|
|
|
|
/*
|
|
* If a request to disable shadow stack happens, let's go ahead and release it
|
|
* Although, if CLONE_VFORKed child did this, then in that case we will end up
|
|
* not releasing the shadow stack (because it might be needed in parent). Although
|
|
* we will disable it for VFORKed child. And if VFORKed child tries to enable again
|
|
* then in that case, it'll get entirely new shadow stack because following condition
|
|
* are true
|
|
* - shadow stack was not enabled for vforked child
|
|
* - shadow stack base was anyways pointing to 0
|
|
* This shouldn't be a big issue because we want parent to have availability of shadow
|
|
* stack whenever VFORKed child releases resources via exit or exec but at the same
|
|
* time we want VFORKed child to break away and establish new shadow stack if it desires
|
|
*
|
|
*/
|
|
if (!enable_shstk)
|
|
shstk_release(t);
|
|
|
|
set_shstk_status(t, enable_shstk);
|
|
return 0;
|
|
}
|
|
|
|
int arch_lock_shadow_stack_status(struct task_struct *task,
|
|
unsigned long arg)
|
|
{
|
|
/* If shtstk not supported or not enabled on task, nothing to lock here */
|
|
if (!is_user_shstk_enabled() ||
|
|
!is_shstk_enabled(task) || arg != 0)
|
|
return -EINVAL;
|
|
|
|
set_shstk_lock(task, true);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int arch_prctl_get_branch_landing_pad_state(struct task_struct *t,
|
|
unsigned long __user *state)
|
|
{
|
|
unsigned long fcfi_status = 0;
|
|
|
|
if (!is_user_lpad_enabled())
|
|
return -EINVAL;
|
|
|
|
fcfi_status = (is_indir_lp_enabled(t) ? PR_CFI_ENABLE : PR_CFI_DISABLE);
|
|
fcfi_status |= (is_indir_lp_locked(t) ? PR_CFI_LOCK : 0);
|
|
|
|
return copy_to_user(state, &fcfi_status, sizeof(fcfi_status)) ? -EFAULT : 0;
|
|
}
|
|
|
|
int arch_prctl_set_branch_landing_pad_state(struct task_struct *t, unsigned long state)
|
|
{
|
|
if (!is_user_lpad_enabled())
|
|
return -EINVAL;
|
|
|
|
/* indirect branch tracking is locked and further can't be modified by user */
|
|
if (is_indir_lp_locked(t))
|
|
return -EINVAL;
|
|
|
|
if (!(state & (PR_CFI_ENABLE | PR_CFI_DISABLE)))
|
|
return -EINVAL;
|
|
|
|
if (state & PR_CFI_ENABLE && state & PR_CFI_DISABLE)
|
|
return -EINVAL;
|
|
|
|
set_indir_lp_status(t, !!(state & PR_CFI_ENABLE));
|
|
|
|
return 0;
|
|
}
|
|
|
|
int arch_prctl_lock_branch_landing_pad_state(struct task_struct *task)
|
|
{
|
|
/*
|
|
* If indirect branch tracking is not supported or not enabled on task,
|
|
* nothing to lock here
|
|
*/
|
|
if (!is_user_lpad_enabled() ||
|
|
!is_indir_lp_enabled(task))
|
|
return -EINVAL;
|
|
|
|
set_indir_lp_lock(task, true);
|
|
|
|
return 0;
|
|
}
|
|
|
|
bool is_user_shstk_enabled(void)
|
|
{
|
|
return (cpu_supports_shadow_stack() &&
|
|
!(riscv_nousercfi & CMDLINE_DISABLE_RISCV_USERCFI_BCFI));
|
|
}
|
|
|
|
bool is_user_lpad_enabled(void)
|
|
{
|
|
return (cpu_supports_indirect_br_lp_instr() &&
|
|
!(riscv_nousercfi & CMDLINE_DISABLE_RISCV_USERCFI_FCFI));
|
|
}
|
|
|
|
static int __init setup_global_riscv_enable(char *str)
|
|
{
|
|
if (strcmp(str, "all") == 0)
|
|
riscv_nousercfi = CMDLINE_DISABLE_RISCV_USERCFI;
|
|
|
|
if (strcmp(str, "fcfi") == 0)
|
|
riscv_nousercfi |= CMDLINE_DISABLE_RISCV_USERCFI_FCFI;
|
|
|
|
if (strcmp(str, "bcfi") == 0)
|
|
riscv_nousercfi |= CMDLINE_DISABLE_RISCV_USERCFI_BCFI;
|
|
|
|
if (riscv_nousercfi)
|
|
pr_info("RISC-V user CFI disabled via cmdline - shadow stack status : %s, landing pad status : %s\n",
|
|
(riscv_nousercfi & CMDLINE_DISABLE_RISCV_USERCFI_BCFI) ? "disabled" :
|
|
"enabled", (riscv_nousercfi & CMDLINE_DISABLE_RISCV_USERCFI_FCFI) ?
|
|
"disabled" : "enabled");
|
|
|
|
return 1;
|
|
}
|
|
|
|
__setup("riscv_nousercfi=", setup_global_riscv_enable);
|