Files
linux/kernel/entry/common.c
Linus Torvalds c43267e679 Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 updates from Catalin Marinas:
 "The biggest changes are MPAM enablement in drivers/resctrl and new PMU
  support under drivers/perf.

  On the core side, FEAT_LSUI lets futex atomic operations with EL0
  permissions, avoiding PAN toggling.

  The rest is mostly TLB invalidation refactoring, further generic entry
  work, sysreg updates and a few fixes.

  Core features:

   - Add support for FEAT_LSUI, allowing futex atomic operations without
     toggling Privileged Access Never (PAN)

   - Further refactor the arm64 exception handling code towards the
     generic entry infrastructure

   - Optimise __READ_ONCE() with CONFIG_LTO=y and allow alias analysis
     through it

  Memory management:

   - Refactor the arm64 TLB invalidation API and implementation for
     better control over barrier placement and level-hinted invalidation

   - Enable batched TLB flushes during memory hot-unplug

   - Fix rodata=full block mapping support for realm guests (when
     BBML2_NOABORT is available)

  Perf and PMU:

   - Add support for a whole bunch of system PMUs featured in NVIDIA's
     Tegra410 SoC (cspmu extensions for the fabric and PCIe, new drivers
     for CPU/C2C memory latency PMUs)

   - Clean up iomem resource handling in the Arm CMN driver

   - Fix signedness handling of AA64DFR0.{PMUVer,PerfMon}

  MPAM (Memory Partitioning And Monitoring):

   - Add architecture context-switch and hiding of the feature from KVM

   - Add interface to allow MPAM to be exposed to user-space using
     resctrl

   - Add errata workaround for some existing platforms

   - Add documentation for using MPAM and what shape of platforms can
     use resctrl

  Miscellaneous:

   - Check DAIF (and PMR, where relevant) at task-switch time

   - Skip TFSR_EL1 checks and barriers in synchronous MTE tag check mode
     (only relevant to asynchronous or asymmetric tag check modes)

   - Remove a duplicate allocation in the kexec code

   - Remove redundant save/restore of SCS SP on entry to/from EL0

   - Generate the KERNEL_HWCAP_ definitions from the arm64 hwcap
     descriptions

   - Add kselftest coverage for cmpbr_sigill()

   - Update sysreg definitions"

* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (109 commits)
  arm64: rsi: use linear-map alias for realm config buffer
  arm64: Kconfig: fix duplicate word in CMDLINE help text
  arm64: mte: Skip TFSR_EL1 checks and barriers in synchronous tag check mode
  arm64/sysreg: Update ID_AA64SMFR0_EL1 description to DDI0601 2025-12
  arm64/sysreg: Update ID_AA64ZFR0_EL1 description to DDI0601 2025-12
  arm64/sysreg: Update ID_AA64FPFR0_EL1 description to DDI0601 2025-12
  arm64/sysreg: Update ID_AA64ISAR2_EL1 description to DDI0601 2025-12
  arm64/sysreg: Update ID_AA64ISAR0_EL1 description to DDI0601 2025-12
  arm64/hwcap: Generate the KERNEL_HWCAP_ definitions for the hwcaps
  arm64: kexec: Remove duplicate allocation for trans_pgd
  ACPI: AGDI: fix missing newline in error message
  arm64: Check DAIF (and PMR) at task-switch time
  arm64: entry: Use split preemption logic
  arm64: entry: Use irqentry_{enter_from,exit_to}_kernel_mode()
  arm64: entry: Consistently prefix arm64-specific wrappers
  arm64: entry: Don't preempt with SError or Debug masked
  entry: Split preemption from irqentry_exit_to_kernel_mode()
  entry: Split kernel mode logic from irqentry_{enter,exit}()
  entry: Move irqentry_enter() prototype later
  entry: Remove local_irq_{enable,disable}_exit_to_user()
  ...
2026-04-14 16:48:56 -07:00

203 lines
5.5 KiB
C

// SPDX-License-Identifier: GPL-2.0
#include <linux/irq-entry-common.h>
#include <linux/resume_user_mode.h>
#include <linux/highmem.h>
#include <linux/jump_label.h>
#include <linux/kmsan.h>
#include <linux/livepatch.h>
#include <linux/tick.h>
/* Workaround to allow gradual conversion of architecture code */
void __weak arch_do_signal_or_restart(struct pt_regs *regs) { }
#ifdef CONFIG_HAVE_GENERIC_TIF_BITS
#define EXIT_TO_USER_MODE_WORK_LOOP (EXIT_TO_USER_MODE_WORK & ~_TIF_RSEQ)
#else
#define EXIT_TO_USER_MODE_WORK_LOOP (EXIT_TO_USER_MODE_WORK)
#endif
/* TIF bits, which prevent a time slice extension. */
#ifdef CONFIG_PREEMPT_RT
/*
* Since rseq slice ext has a direct correlation to the worst case
* scheduling latency (schedule is delayed after all), only have it affect
* LAZY reschedules on PREEMPT_RT for now.
*
* However, since this delay is only applicable to userspace, a value
* for rseq_slice_extension_nsec that is strictly less than the worst case
* kernel space preempt_disable() region, should mean the scheduling latency
* is not affected, even for !LAZY.
*
* However, since this value depends on the hardware at hand, it cannot be
* pre-determined in any sensible way. Hence punt on this problem for now.
*/
# define TIF_SLICE_EXT_SCHED (_TIF_NEED_RESCHED_LAZY)
#else
# define TIF_SLICE_EXT_SCHED (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
#endif
#define TIF_SLICE_EXT_DENY (EXIT_TO_USER_MODE_WORK & ~TIF_SLICE_EXT_SCHED)
static __always_inline unsigned long __exit_to_user_mode_loop(struct pt_regs *regs,
unsigned long ti_work)
{
/*
* Before returning to user space ensure that all pending work
* items have been completed.
*/
while (ti_work & EXIT_TO_USER_MODE_WORK_LOOP) {
local_irq_enable();
if (ti_work & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) {
if (!rseq_grant_slice_extension(ti_work, TIF_SLICE_EXT_DENY))
schedule();
}
if (ti_work & _TIF_UPROBE)
uprobe_notify_resume(regs);
if (ti_work & _TIF_PATCH_PENDING)
klp_update_patch_state(current);
if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
arch_do_signal_or_restart(regs);
if (ti_work & _TIF_NOTIFY_RESUME)
resume_user_mode_work(regs);
/* Architecture specific TIF work */
arch_exit_to_user_mode_work(regs, ti_work);
/*
* Disable interrupts and reevaluate the work flags as they
* might have changed while interrupts and preemption was
* enabled above.
*/
local_irq_disable();
/* Check if any of the above work has queued a deferred wakeup */
tick_nohz_user_enter_prepare();
ti_work = read_thread_flags();
}
/* Return the latest work state for arch_exit_to_user_mode() */
return ti_work;
}
/**
* exit_to_user_mode_loop - do any pending work before leaving to user space
* @regs: Pointer to pt_regs on entry stack
* @ti_work: TIF work flags as read by the caller
*/
__always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
unsigned long ti_work)
{
for (;;) {
ti_work = __exit_to_user_mode_loop(regs, ti_work);
if (likely(!rseq_exit_to_user_mode_restart(regs, ti_work)))
return ti_work;
ti_work = read_thread_flags();
}
}
noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
{
if (user_mode(regs)) {
irqentry_state_t ret = {
.exit_rcu = false,
};
irqentry_enter_from_user_mode(regs);
return ret;
}
return irqentry_enter_from_kernel_mode(regs);
}
/**
* arch_irqentry_exit_need_resched - Architecture specific need resched function
*
* Invoked from raw_irqentry_exit_cond_resched() to check if resched is needed.
* Defaults return true.
*
* The main purpose is to permit arch to avoid preemption of a task from an IRQ.
*/
static inline bool arch_irqentry_exit_need_resched(void);
#ifndef arch_irqentry_exit_need_resched
static inline bool arch_irqentry_exit_need_resched(void) { return true; }
#endif
void raw_irqentry_exit_cond_resched(void)
{
if (!preempt_count()) {
/* Sanity check RCU and thread stack */
rcu_irq_exit_check_preempt();
if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
WARN_ON_ONCE(!on_thread_stack());
if (need_resched() && arch_irqentry_exit_need_resched())
preempt_schedule_irq();
}
}
#ifdef CONFIG_PREEMPT_DYNAMIC
#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
DEFINE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched);
#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
void dynamic_irqentry_exit_cond_resched(void)
{
if (!static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched))
return;
raw_irqentry_exit_cond_resched();
}
#endif
#endif
noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
{
if (user_mode(regs))
irqentry_exit_to_user_mode(regs);
else
irqentry_exit_to_kernel_mode(regs, state);
}
irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs)
{
irqentry_state_t irq_state;
irq_state.lockdep = lockdep_hardirqs_enabled();
__nmi_enter();
lockdep_hardirqs_off(CALLER_ADDR0);
lockdep_hardirq_enter();
ct_nmi_enter();
instrumentation_begin();
kmsan_unpoison_entry_regs(regs);
trace_hardirqs_off_finish();
ftrace_nmi_enter();
instrumentation_end();
return irq_state;
}
void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state)
{
instrumentation_begin();
ftrace_nmi_exit();
if (irq_state.lockdep) {
trace_hardirqs_on_prepare();
lockdep_hardirqs_on_prepare();
}
instrumentation_end();
ct_nmi_exit();
lockdep_hardirq_exit();
if (irq_state.lockdep)
lockdep_hardirqs_on(CALLER_ADDR0);
__nmi_exit();
}