mirror of
https://github.com/torvalds/linux.git
synced 2026-04-18 06:44:00 -04:00
Merge tag 'kvmarm-7.1' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 updates for 7.1 * New features: - Add support for tracing in the standalone EL2 hypervisor code, which should help both debugging and performance analysis. This comes with a full infrastructure for 'remote' trace buffers that can be exposed by non-kernel entities such as firmware. - Add support for GICv5 Per Processor Interrupts (PPIs), as the starting point for supporting the new GIC architecture in KVM. - Finally add support for pKVM protected guests, with anonymous memory being used as a backing store. About time! * Improvements and bug fixes: - Rework the dreaded user_mem_abort() function to make it more maintainable, reducing the amount of state being exposed to the various helpers and rendering a substantial amount of state immutable. - Expand the Stage-2 page table dumper to support NV shadow page tables on a per-VM basis. - Tidy up the pKVM PSCI proxy code to be slightly less hard to follow. - Fix both SPE and TRBE in non-VHE configurations so that they do not generate spurious, out of context table walks that ultimately lead to very bad HW lockups. - A small set of patches fixing the Stage-2 MMU freeing in error cases. - Tighten-up accepted SMC immediate value to be only #0 for host SMCCC calls. - The usual cleanups and other selftest churn.
This commit is contained in:
@@ -3247,8 +3247,8 @@ Kernel parameters
|
||||
for the host. To force nVHE on VHE hardware, add
|
||||
"arm64_sw.hvhe=0 id_aa64mmfr1.vh=0" to the
|
||||
command-line.
|
||||
"nested" is experimental and should be used with
|
||||
extreme caution.
|
||||
"nested" and "protected" are experimental and should be
|
||||
used with extreme caution.
|
||||
|
||||
kvm-arm.vgic_v3_group0_trap=
|
||||
[KVM,ARM,EARLY] Trap guest accesses to GICv3 group-0
|
||||
|
||||
@@ -91,6 +91,17 @@ interactions.
|
||||
user_events
|
||||
uprobetracer
|
||||
|
||||
Remote Tracing
|
||||
--------------
|
||||
|
||||
This section covers the framework to read compatible ring-buffers, written by
|
||||
entities outside of the kernel (most likely firmware or hypervisor)
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
remotes
|
||||
|
||||
Additional Resources
|
||||
--------------------
|
||||
|
||||
|
||||
66
Documentation/trace/remotes.rst
Normal file
66
Documentation/trace/remotes.rst
Normal file
@@ -0,0 +1,66 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===============
|
||||
Tracing Remotes
|
||||
===============
|
||||
|
||||
:Author: Vincent Donnefort <vdonnefort@google.com>
|
||||
|
||||
Overview
|
||||
========
|
||||
Firmware and hypervisors are black boxes to the kernel. Having a way to see what
|
||||
they are doing can be useful to debug both. This is where remote tracing buffers
|
||||
come in. A remote tracing buffer is a ring buffer executed by the firmware or
|
||||
hypervisor into memory that is memory mapped to the host kernel. This is similar
|
||||
to how user space memory maps the kernel ring buffer but in this case the kernel
|
||||
is acting like user space and the firmware or hypervisor is the "kernel" side.
|
||||
With a trace remote ring buffer, the firmware and hypervisor can record events
|
||||
for which the host kernel can see and expose to user space.
|
||||
|
||||
Register a remote
|
||||
=================
|
||||
A remote must provide a set of callbacks `struct trace_remote_callbacks` whom
|
||||
description can be found below. Those callbacks allows Tracefs to enable and
|
||||
disable tracing and events, to load and unload a tracing buffer (a set of
|
||||
ring-buffers) and to swap a reader page with the head page, which enables
|
||||
consuming reading.
|
||||
|
||||
.. kernel-doc:: include/linux/trace_remote.h
|
||||
|
||||
Once registered, an instance will appear for this remote in the Tracefs
|
||||
directory **remotes/**. Buffers can then be read using the usual Tracefs files
|
||||
**trace_pipe** and **trace**.
|
||||
|
||||
Declare a remote event
|
||||
======================
|
||||
Macros are provided to ease the declaration of remote events, in a similar
|
||||
fashion to in-kernel events. A declaration must provide an ID, a description of
|
||||
the event arguments and how to print the event:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
REMOTE_EVENT(foo, EVENT_FOO_ID,
|
||||
RE_STRUCT(
|
||||
re_field(u64, bar)
|
||||
),
|
||||
RE_PRINTK("bar=%lld", __entry->bar)
|
||||
);
|
||||
|
||||
Then those events must be declared in a C file with the following:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
#define REMOTE_EVENT_INCLUDE_FILE foo_events.h
|
||||
#include <trace/define_remote_events.h>
|
||||
|
||||
This will provide a `struct remote_event remote_event_foo` that can be given to
|
||||
`trace_remote_register`.
|
||||
|
||||
Registered events appear in the remote directory under **events/**.
|
||||
|
||||
Simple ring-buffer
|
||||
==================
|
||||
A simple implementation for a ring-buffer writer can be found in
|
||||
kernel/trace/simple_ring_buffer.c.
|
||||
|
||||
.. kernel-doc:: include/linux/simple_ring_buffer.h
|
||||
@@ -907,10 +907,12 @@ The irq_type field has the following values:
|
||||
- KVM_ARM_IRQ_TYPE_CPU:
|
||||
out-of-kernel GIC: irq_id 0 is IRQ, irq_id 1 is FIQ
|
||||
- KVM_ARM_IRQ_TYPE_SPI:
|
||||
in-kernel GIC: SPI, irq_id between 32 and 1019 (incl.)
|
||||
in-kernel GICv2/GICv3: SPI, irq_id between 32 and 1019 (incl.)
|
||||
(the vcpu_index field is ignored)
|
||||
in-kernel GICv5: SPI, irq_id between 0 and 65535 (incl.)
|
||||
- KVM_ARM_IRQ_TYPE_PPI:
|
||||
in-kernel GIC: PPI, irq_id between 16 and 31 (incl.)
|
||||
in-kernel GICv2/GICv3: PPI, irq_id between 16 and 31 (incl.)
|
||||
in-kernel GICv5: PPI, irq_id between 0 and 127 (incl.)
|
||||
|
||||
(The irq_id field thus corresponds nicely to the IRQ ID in the ARM GIC specs)
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ ARM
|
||||
fw-pseudo-registers
|
||||
hyp-abi
|
||||
hypercalls
|
||||
pkvm
|
||||
pvtime
|
||||
ptp_kvm
|
||||
vcpu-features
|
||||
|
||||
106
Documentation/virt/kvm/arm/pkvm.rst
Normal file
106
Documentation/virt/kvm/arm/pkvm.rst
Normal file
@@ -0,0 +1,106 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
====================
|
||||
Protected KVM (pKVM)
|
||||
====================
|
||||
|
||||
**NOTE**: pKVM is currently an experimental, development feature and
|
||||
subject to breaking changes as new isolation features are implemented.
|
||||
Please reach out to the developers at kvmarm@lists.linux.dev if you have
|
||||
any questions.
|
||||
|
||||
Overview
|
||||
========
|
||||
|
||||
Booting a host kernel with '``kvm-arm.mode=protected``' enables
|
||||
"Protected KVM" (pKVM). During boot, pKVM installs a stage-2 identity
|
||||
map page-table for the host and uses it to isolate the hypervisor
|
||||
running at EL2 from the rest of the host running at EL1/0.
|
||||
|
||||
pKVM permits creation of protected virtual machines (pVMs) by passing
|
||||
the ``KVM_VM_TYPE_ARM_PROTECTED`` machine type identifier to the
|
||||
``KVM_CREATE_VM`` ioctl(). The hypervisor isolates pVMs from the host by
|
||||
unmapping pages from the stage-2 identity map as they are accessed by a
|
||||
pVM. Hypercalls are provided for a pVM to share specific regions of its
|
||||
IPA space back with the host, allowing for communication with the VMM.
|
||||
A Linux guest must be configured with ``CONFIG_ARM_PKVM_GUEST=y`` in
|
||||
order to issue these hypercalls.
|
||||
|
||||
See hypercalls.rst for more details.
|
||||
|
||||
Isolation mechanisms
|
||||
====================
|
||||
|
||||
pKVM relies on a number of mechanisms to isolate PVMs from the host:
|
||||
|
||||
CPU memory isolation
|
||||
--------------------
|
||||
|
||||
Status: Isolation of anonymous memory and metadata pages.
|
||||
|
||||
Metadata pages (e.g. page-table pages and '``struct kvm_vcpu``' pages)
|
||||
are donated from the host to the hypervisor during pVM creation and
|
||||
are consequently unmapped from the stage-2 identity map until the pVM is
|
||||
destroyed.
|
||||
|
||||
Similarly to regular KVM, pages are lazily mapped into the guest in
|
||||
response to stage-2 page faults handled by the host. However, when
|
||||
running a pVM, these pages are first pinned and then unmapped from the
|
||||
stage-2 identity map as part of the donation procedure. This gives rise
|
||||
to some user-visible differences when compared to non-protected VMs,
|
||||
largely due to the lack of MMU notifiers:
|
||||
|
||||
* Memslots cannot be moved or deleted once the pVM has started running.
|
||||
* Read-only memslots and dirty logging are not supported.
|
||||
* With the exception of swap, file-backed pages cannot be mapped into a
|
||||
pVM.
|
||||
* Donated pages are accounted against ``RLIMIT_MLOCK`` and so the VMM
|
||||
must have a sufficient resource limit or be granted ``CAP_IPC_LOCK``.
|
||||
The lack of a runtime reclaim mechanism means that memory locked for
|
||||
a pVM will remain locked until the pVM is destroyed.
|
||||
* Changes to the VMM address space (e.g. a ``MAP_FIXED`` mmap() over a
|
||||
mapping associated with a memslot) are not reflected in the guest and
|
||||
may lead to loss of coherency.
|
||||
* Accessing pVM memory that has not been shared back will result in the
|
||||
delivery of a SIGSEGV.
|
||||
* If a system call accesses pVM memory that has not been shared back
|
||||
then it will either return ``-EFAULT`` or forcefully reclaim the
|
||||
memory pages. Reclaimed memory is zeroed by the hypervisor and a
|
||||
subsequent attempt to access it in the pVM will return ``-EFAULT``
|
||||
from the ``VCPU_RUN`` ioctl().
|
||||
|
||||
CPU state isolation
|
||||
-------------------
|
||||
|
||||
Status: **Unimplemented.**
|
||||
|
||||
DMA isolation using an IOMMU
|
||||
----------------------------
|
||||
|
||||
Status: **Unimplemented.**
|
||||
|
||||
Proxying of Trustzone services
|
||||
------------------------------
|
||||
|
||||
Status: FF-A and PSCI calls from the host are proxied by the pKVM
|
||||
hypervisor.
|
||||
|
||||
The FF-A proxy ensures that the host cannot share pVM or hypervisor
|
||||
memory with Trustzone as part of a "confused deputy" attack.
|
||||
|
||||
The PSCI proxy ensures that CPUs always have the stage-2 identity map
|
||||
installed when they are executing in the host.
|
||||
|
||||
Protected VM firmware (pvmfw)
|
||||
-----------------------------
|
||||
|
||||
Status: **Unimplemented.**
|
||||
|
||||
Resources
|
||||
=========
|
||||
|
||||
Quentin Perret's KVM Forum 2022 talk entitled "Protected KVM on arm64: A
|
||||
technical deep dive" remains a good resource for learning more about
|
||||
pKVM, despite some of the details having changed in the meantime:
|
||||
|
||||
https://www.youtube.com/watch?v=9npebeVFbFw
|
||||
50
Documentation/virt/kvm/devices/arm-vgic-v5.rst
Normal file
50
Documentation/virt/kvm/devices/arm-vgic-v5.rst
Normal file
@@ -0,0 +1,50 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
====================================================
|
||||
ARM Virtual Generic Interrupt Controller v5 (VGICv5)
|
||||
====================================================
|
||||
|
||||
|
||||
Device types supported:
|
||||
- KVM_DEV_TYPE_ARM_VGIC_V5 ARM Generic Interrupt Controller v5.0
|
||||
|
||||
Only one VGIC instance may be instantiated through this API. The created VGIC
|
||||
will act as the VM interrupt controller, requiring emulated user-space devices
|
||||
to inject interrupts to the VGIC instead of directly to CPUs.
|
||||
|
||||
Creating a guest GICv5 device requires a host GICv5 host. The current VGICv5
|
||||
device only supports PPI interrupts. These can either be injected from emulated
|
||||
in-kernel devices (such as the Arch Timer, or PMU), or via the KVM_IRQ_LINE
|
||||
ioctl.
|
||||
|
||||
Groups:
|
||||
KVM_DEV_ARM_VGIC_GRP_CTRL
|
||||
Attributes:
|
||||
|
||||
KVM_DEV_ARM_VGIC_CTRL_INIT
|
||||
request the initialization of the VGIC, no additional parameter in
|
||||
kvm_device_attr.addr. Must be called after all VCPUs have been created.
|
||||
|
||||
KVM_DEV_ARM_VGIC_USERPSPACE_PPIs
|
||||
request the mask of userspace-drivable PPIs. Only a subset of the PPIs can
|
||||
be directly driven from userspace with GICv5, and the returned mask
|
||||
informs userspace of which it is allowed to drive via KVM_IRQ_LINE.
|
||||
|
||||
Userspace must allocate and point to __u64[2] of data in
|
||||
kvm_device_attr.addr. When this call returns, the provided memory will be
|
||||
populated with the userspace PPI mask. The lower __u64 contains the mask
|
||||
for the lower 64 PPIS, with the remaining 64 being in the second __u64.
|
||||
|
||||
This is a read-only attribute, and cannot be set. Attempts to set it are
|
||||
rejected.
|
||||
|
||||
Errors:
|
||||
|
||||
======= ========================================================
|
||||
-ENXIO VGIC not properly configured as required prior to calling
|
||||
this attribute
|
||||
-ENODEV no online VCPU
|
||||
-ENOMEM memory shortage when allocating vgic internal data
|
||||
-EFAULT Invalid guest ram access
|
||||
-EBUSY One or more VCPUS are running
|
||||
======= ========================================================
|
||||
@@ -10,6 +10,7 @@ Devices
|
||||
arm-vgic-its
|
||||
arm-vgic
|
||||
arm-vgic-v3
|
||||
arm-vgic-v5
|
||||
mpic
|
||||
s390_flic
|
||||
vcpu
|
||||
|
||||
@@ -37,7 +37,8 @@ Returns:
|
||||
A value describing the PMUv3 (Performance Monitor Unit v3) overflow interrupt
|
||||
number for this vcpu. This interrupt could be a PPI or SPI, but the interrupt
|
||||
type must be same for each vcpu. As a PPI, the interrupt number is the same for
|
||||
all vcpus, while as an SPI it must be a separate number per vcpu.
|
||||
all vcpus, while as an SPI it must be a separate number per vcpu. For
|
||||
GICv5-based guests, the architected PPI (23) must be used.
|
||||
|
||||
1.2 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_INIT
|
||||
---------------------------------------
|
||||
@@ -50,7 +51,7 @@ Returns:
|
||||
-EEXIST Interrupt number already used
|
||||
-ENODEV PMUv3 not supported or GIC not initialized
|
||||
-ENXIO PMUv3 not supported, missing VCPU feature or interrupt
|
||||
number not set
|
||||
number not set (non-GICv5 guests, only)
|
||||
-EBUSY PMUv3 already initialized
|
||||
======= ======================================================
|
||||
|
||||
|
||||
@@ -50,7 +50,6 @@
|
||||
* effectively VHE-only or not.
|
||||
*/
|
||||
msr_hcr_el2 x0 // Setup HCR_EL2 as nVHE
|
||||
isb
|
||||
mov x1, #1 // Write something to FAR_EL1
|
||||
msr far_el1, x1
|
||||
isb
|
||||
@@ -64,7 +63,6 @@
|
||||
.LnE2H0_\@:
|
||||
orr x0, x0, #HCR_E2H
|
||||
msr_hcr_el2 x0
|
||||
isb
|
||||
.LnVHE_\@:
|
||||
.endm
|
||||
|
||||
@@ -248,6 +246,8 @@
|
||||
ICH_HFGWTR_EL2_ICC_CR0_EL1 | \
|
||||
ICH_HFGWTR_EL2_ICC_APR_EL1)
|
||||
msr_s SYS_ICH_HFGWTR_EL2, x0 // Disable reg write traps
|
||||
mov x0, #(ICH_VCTLR_EL2_En)
|
||||
msr_s SYS_ICH_VCTLR_EL2, x0 // Enable vHPPI selection
|
||||
.Lskip_gicv5_\@:
|
||||
.endm
|
||||
|
||||
|
||||
@@ -51,7 +51,7 @@
|
||||
#include <linux/mm.h>
|
||||
|
||||
enum __kvm_host_smccc_func {
|
||||
/* Hypercalls available only prior to pKVM finalisation */
|
||||
/* Hypercalls that are unavailable once pKVM has finalised. */
|
||||
/* __KVM_HOST_SMCCC_FUNC___kvm_hyp_init */
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_init = __KVM_HOST_SMCCC_FUNC___kvm_hyp_init + 1,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping,
|
||||
@@ -60,16 +60,9 @@ enum __kvm_host_smccc_func {
|
||||
__KVM_HOST_SMCCC_FUNC___vgic_v3_init_lrs,
|
||||
__KVM_HOST_SMCCC_FUNC___vgic_v3_get_gic_config,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize,
|
||||
__KVM_HOST_SMCCC_FUNC_MIN_PKVM = __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize,
|
||||
|
||||
/* Hypercalls available after pKVM finalisation */
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_hyp,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_host_share_guest,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_guest,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_host_relax_perms_guest,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_host_wrprotect_guest,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_host_test_clear_young_guest,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_host_mkyoung_guest,
|
||||
/* Hypercalls that are always available and common to [nh]VHE/pKVM. */
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_adjust_pc,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_vcpu_run,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context,
|
||||
@@ -81,14 +74,40 @@ enum __kvm_host_smccc_func {
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
|
||||
__KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs,
|
||||
__KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs,
|
||||
__KVM_HOST_SMCCC_FUNC___vgic_v5_save_apr,
|
||||
__KVM_HOST_SMCCC_FUNC___vgic_v5_restore_vmcr_apr,
|
||||
__KVM_HOST_SMCCC_FUNC_MAX_NO_PKVM = __KVM_HOST_SMCCC_FUNC___vgic_v5_restore_vmcr_apr,
|
||||
|
||||
/* Hypercalls that are available only when pKVM has finalised. */
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_hyp,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_host_donate_guest,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_host_share_guest,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_guest,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_host_relax_perms_guest,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_host_wrprotect_guest,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_host_test_clear_young_guest,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_host_mkyoung_guest,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_reserve_vm,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_unreserve_vm,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_in_poison_fault,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_force_reclaim_guest_page,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_reclaim_dying_guest_page,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_start_teardown_vm,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_finalize_teardown_vm,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_tlb_flush_vmid,
|
||||
__KVM_HOST_SMCCC_FUNC___tracing_load,
|
||||
__KVM_HOST_SMCCC_FUNC___tracing_unload,
|
||||
__KVM_HOST_SMCCC_FUNC___tracing_enable,
|
||||
__KVM_HOST_SMCCC_FUNC___tracing_swap_reader,
|
||||
__KVM_HOST_SMCCC_FUNC___tracing_update_clock,
|
||||
__KVM_HOST_SMCCC_FUNC___tracing_reset,
|
||||
__KVM_HOST_SMCCC_FUNC___tracing_enable_event,
|
||||
__KVM_HOST_SMCCC_FUNC___tracing_write_event,
|
||||
};
|
||||
|
||||
#define DECLARE_KVM_VHE_SYM(sym) extern char sym[]
|
||||
@@ -291,7 +310,8 @@ asmlinkage void __noreturn hyp_panic_bad_stack(void);
|
||||
asmlinkage void kvm_unexpected_el2_exception(void);
|
||||
struct kvm_cpu_context;
|
||||
void handle_trap(struct kvm_cpu_context *host_ctxt);
|
||||
asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on);
|
||||
asmlinkage void __noreturn __kvm_host_psci_cpu_on_entry(void);
|
||||
asmlinkage void __noreturn __kvm_host_psci_cpu_resume_entry(void);
|
||||
void __noreturn __pkvm_init_finalise(void);
|
||||
void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
|
||||
void kvm_patch_vector_branch(struct alt_instr *alt,
|
||||
|
||||
16
arch/arm64/include/asm/kvm_define_hypevents.h
Normal file
16
arch/arm64/include/asm/kvm_define_hypevents.h
Normal file
@@ -0,0 +1,16 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#define REMOTE_EVENT_INCLUDE_FILE arch/arm64/include/asm/kvm_hypevents.h
|
||||
|
||||
#define REMOTE_EVENT_SECTION "_hyp_events"
|
||||
|
||||
#define HE_STRUCT(__args) __args
|
||||
#define HE_PRINTK(__args...) __args
|
||||
#define he_field re_field
|
||||
|
||||
#define HYP_EVENT(__name, __proto, __struct, __assign, __printk) \
|
||||
REMOTE_EVENT(__name, 0, RE_STRUCT(__struct), RE_PRINTK(__printk))
|
||||
|
||||
#define HYP_EVENT_MULTI_READ
|
||||
#include <trace/define_remote_events.h>
|
||||
#undef HYP_EVENT_MULTI_READ
|
||||
@@ -217,6 +217,10 @@ struct kvm_s2_mmu {
|
||||
*/
|
||||
bool nested_stage2_enabled;
|
||||
|
||||
#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
|
||||
struct dentry *shadow_pt_debugfs_dentry;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* true when this MMU needs to be unmapped before being used for a new
|
||||
* purpose.
|
||||
@@ -247,7 +251,7 @@ struct kvm_smccc_features {
|
||||
unsigned long vendor_hyp_bmap_2; /* Function numbers 64-127 */
|
||||
};
|
||||
|
||||
typedef unsigned int pkvm_handle_t;
|
||||
typedef u16 pkvm_handle_t;
|
||||
|
||||
struct kvm_protected_vm {
|
||||
pkvm_handle_t handle;
|
||||
@@ -255,6 +259,13 @@ struct kvm_protected_vm {
|
||||
struct kvm_hyp_memcache stage2_teardown_mc;
|
||||
bool is_protected;
|
||||
bool is_created;
|
||||
|
||||
/*
|
||||
* True when the guest is being torn down. When in this state, the
|
||||
* guest's vCPUs can't be loaded anymore, but its pages can be
|
||||
* reclaimed by the host.
|
||||
*/
|
||||
bool is_dying;
|
||||
};
|
||||
|
||||
struct kvm_mpidr_data {
|
||||
@@ -287,6 +298,9 @@ enum fgt_group_id {
|
||||
HDFGRTR2_GROUP,
|
||||
HDFGWTR2_GROUP = HDFGRTR2_GROUP,
|
||||
HFGITR2_GROUP,
|
||||
ICH_HFGRTR_GROUP,
|
||||
ICH_HFGWTR_GROUP = ICH_HFGRTR_GROUP,
|
||||
ICH_HFGITR_GROUP,
|
||||
|
||||
/* Must be last */
|
||||
__NR_FGT_GROUP_IDS__
|
||||
@@ -405,6 +419,11 @@ struct kvm_arch {
|
||||
* the associated pKVM instance in the hypervisor.
|
||||
*/
|
||||
struct kvm_protected_vm pkvm;
|
||||
|
||||
#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
|
||||
/* Nested virtualization info */
|
||||
struct dentry *debugfs_nv_dentry;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct kvm_vcpu_fault_info {
|
||||
@@ -620,6 +639,10 @@ enum vcpu_sysreg {
|
||||
VNCR(ICH_HCR_EL2),
|
||||
VNCR(ICH_VMCR_EL2),
|
||||
|
||||
VNCR(ICH_HFGRTR_EL2),
|
||||
VNCR(ICH_HFGWTR_EL2),
|
||||
VNCR(ICH_HFGITR_EL2),
|
||||
|
||||
NR_SYS_REGS /* Nothing after this line! */
|
||||
};
|
||||
|
||||
@@ -675,6 +698,9 @@ extern struct fgt_masks hfgwtr2_masks;
|
||||
extern struct fgt_masks hfgitr2_masks;
|
||||
extern struct fgt_masks hdfgrtr2_masks;
|
||||
extern struct fgt_masks hdfgwtr2_masks;
|
||||
extern struct fgt_masks ich_hfgrtr_masks;
|
||||
extern struct fgt_masks ich_hfgwtr_masks;
|
||||
extern struct fgt_masks ich_hfgitr_masks;
|
||||
|
||||
extern struct fgt_masks kvm_nvhe_sym(hfgrtr_masks);
|
||||
extern struct fgt_masks kvm_nvhe_sym(hfgwtr_masks);
|
||||
@@ -687,6 +713,9 @@ extern struct fgt_masks kvm_nvhe_sym(hfgwtr2_masks);
|
||||
extern struct fgt_masks kvm_nvhe_sym(hfgitr2_masks);
|
||||
extern struct fgt_masks kvm_nvhe_sym(hdfgrtr2_masks);
|
||||
extern struct fgt_masks kvm_nvhe_sym(hdfgwtr2_masks);
|
||||
extern struct fgt_masks kvm_nvhe_sym(ich_hfgrtr_masks);
|
||||
extern struct fgt_masks kvm_nvhe_sym(ich_hfgwtr_masks);
|
||||
extern struct fgt_masks kvm_nvhe_sym(ich_hfgitr_masks);
|
||||
|
||||
struct kvm_cpu_context {
|
||||
struct user_pt_regs regs; /* sp = sp_el0 */
|
||||
@@ -768,8 +797,10 @@ struct kvm_host_data {
|
||||
struct kvm_guest_debug_arch regs;
|
||||
/* Statistical profiling extension */
|
||||
u64 pmscr_el1;
|
||||
u64 pmblimitr_el1;
|
||||
/* Self-hosted trace */
|
||||
u64 trfcr_el1;
|
||||
u64 trblimitr_el1;
|
||||
/* Values of trap registers for the host before guest entry. */
|
||||
u64 mdcr_el2;
|
||||
u64 brbcr_el1;
|
||||
@@ -787,6 +818,14 @@ struct kvm_host_data {
|
||||
|
||||
/* Last vgic_irq part of the AP list recorded in an LR */
|
||||
struct vgic_irq *last_lr_irq;
|
||||
|
||||
/* PPI state tracking for GICv5-based guests */
|
||||
struct {
|
||||
DECLARE_BITMAP(pendr, VGIC_V5_NR_PRIVATE_IRQS);
|
||||
|
||||
/* The saved state of the regs when leaving the guest */
|
||||
DECLARE_BITMAP(activer_exit, VGIC_V5_NR_PRIVATE_IRQS);
|
||||
} vgic_v5_ppi_state;
|
||||
};
|
||||
|
||||
struct kvm_host_psci_config {
|
||||
@@ -923,6 +962,9 @@ struct kvm_vcpu_arch {
|
||||
|
||||
/* Per-vcpu TLB for VNCR_EL2 -- NULL when !NV */
|
||||
struct vncr_tlb *vncr_tlb;
|
||||
|
||||
/* Hyp-readable copy of kvm_vcpu::pid */
|
||||
pid_t pid;
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -1659,6 +1701,11 @@ static __always_inline enum fgt_group_id __fgt_reg_to_group_id(enum vcpu_sysreg
|
||||
case HDFGRTR2_EL2:
|
||||
case HDFGWTR2_EL2:
|
||||
return HDFGRTR2_GROUP;
|
||||
case ICH_HFGRTR_EL2:
|
||||
case ICH_HFGWTR_EL2:
|
||||
return ICH_HFGRTR_GROUP;
|
||||
case ICH_HFGITR_EL2:
|
||||
return ICH_HFGITR_GROUP;
|
||||
default:
|
||||
BUILD_BUG_ON(1);
|
||||
}
|
||||
@@ -1673,6 +1720,7 @@ static __always_inline enum fgt_group_id __fgt_reg_to_group_id(enum vcpu_sysreg
|
||||
case HDFGWTR_EL2: \
|
||||
case HFGWTR2_EL2: \
|
||||
case HDFGWTR2_EL2: \
|
||||
case ICH_HFGWTR_EL2: \
|
||||
p = &(vcpu)->arch.fgt[id].w; \
|
||||
break; \
|
||||
default: \
|
||||
|
||||
@@ -87,6 +87,15 @@ void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if);
|
||||
void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if);
|
||||
int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu);
|
||||
|
||||
/* GICv5 */
|
||||
void __vgic_v5_save_apr(struct vgic_v5_cpu_if *cpu_if);
|
||||
void __vgic_v5_restore_vmcr_apr(struct vgic_v5_cpu_if *cpu_if);
|
||||
/* No hypercalls for the following */
|
||||
void __vgic_v5_save_ppi_state(struct vgic_v5_cpu_if *cpu_if);
|
||||
void __vgic_v5_restore_ppi_state(struct vgic_v5_cpu_if *cpu_if);
|
||||
void __vgic_v5_save_state(struct vgic_v5_cpu_if *cpu_if);
|
||||
void __vgic_v5_restore_state(struct vgic_v5_cpu_if *cpu_if);
|
||||
|
||||
#ifdef __KVM_NVHE_HYPERVISOR__
|
||||
void __timer_enable_traps(struct kvm_vcpu *vcpu);
|
||||
void __timer_disable_traps(struct kvm_vcpu *vcpu);
|
||||
@@ -129,13 +138,13 @@ void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr,
|
||||
#ifdef __KVM_NVHE_HYPERVISOR__
|
||||
void __pkvm_init_switch_pgd(phys_addr_t pgd, unsigned long sp,
|
||||
void (*fn)(void));
|
||||
int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus,
|
||||
unsigned long *per_cpu_base, u32 hyp_va_bits);
|
||||
int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long *per_cpu_base, u32 hyp_va_bits);
|
||||
void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt);
|
||||
#endif
|
||||
|
||||
extern u64 kvm_nvhe_sym(id_aa64pfr0_el1_sys_val);
|
||||
extern u64 kvm_nvhe_sym(id_aa64pfr1_el1_sys_val);
|
||||
extern u64 kvm_nvhe_sym(id_aa64pfr2_el1_sys_val);
|
||||
extern u64 kvm_nvhe_sym(id_aa64isar0_el1_sys_val);
|
||||
extern u64 kvm_nvhe_sym(id_aa64isar1_el1_sys_val);
|
||||
extern u64 kvm_nvhe_sym(id_aa64isar2_el1_sys_val);
|
||||
@@ -147,5 +156,6 @@ extern u64 kvm_nvhe_sym(id_aa64smfr0_el1_sys_val);
|
||||
extern unsigned long kvm_nvhe_sym(__icache_flags);
|
||||
extern unsigned int kvm_nvhe_sym(kvm_arm_vmid_bits);
|
||||
extern unsigned int kvm_nvhe_sym(kvm_host_sve_max_vl);
|
||||
extern unsigned long kvm_nvhe_sym(hyp_nr_cpus);
|
||||
|
||||
#endif /* __ARM64_KVM_HYP_H__ */
|
||||
|
||||
60
arch/arm64/include/asm/kvm_hypevents.h
Normal file
60
arch/arm64/include/asm/kvm_hypevents.h
Normal file
@@ -0,0 +1,60 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#if !defined(__ARM64_KVM_HYPEVENTS_H_) || defined(HYP_EVENT_MULTI_READ)
|
||||
#define __ARM64_KVM_HYPEVENTS_H_
|
||||
|
||||
#ifdef __KVM_NVHE_HYPERVISOR__
|
||||
#include <nvhe/trace.h>
|
||||
#endif
|
||||
|
||||
#ifndef __HYP_ENTER_EXIT_REASON
|
||||
#define __HYP_ENTER_EXIT_REASON
|
||||
enum hyp_enter_exit_reason {
|
||||
HYP_REASON_SMC,
|
||||
HYP_REASON_HVC,
|
||||
HYP_REASON_PSCI,
|
||||
HYP_REASON_HOST_ABORT,
|
||||
HYP_REASON_GUEST_EXIT,
|
||||
HYP_REASON_ERET_HOST,
|
||||
HYP_REASON_ERET_GUEST,
|
||||
HYP_REASON_UNKNOWN /* Must be last */
|
||||
};
|
||||
#endif
|
||||
|
||||
HYP_EVENT(hyp_enter,
|
||||
HE_PROTO(struct kvm_cpu_context *host_ctxt, u8 reason),
|
||||
HE_STRUCT(
|
||||
he_field(u8, reason)
|
||||
he_field(pid_t, vcpu)
|
||||
),
|
||||
HE_ASSIGN(
|
||||
__entry->reason = reason;
|
||||
__entry->vcpu = __tracing_get_vcpu_pid(host_ctxt);
|
||||
),
|
||||
HE_PRINTK("reason=%s vcpu=%d", __hyp_enter_exit_reason_str(__entry->reason), __entry->vcpu)
|
||||
);
|
||||
|
||||
HYP_EVENT(hyp_exit,
|
||||
HE_PROTO(struct kvm_cpu_context *host_ctxt, u8 reason),
|
||||
HE_STRUCT(
|
||||
he_field(u8, reason)
|
||||
he_field(pid_t, vcpu)
|
||||
),
|
||||
HE_ASSIGN(
|
||||
__entry->reason = reason;
|
||||
__entry->vcpu = __tracing_get_vcpu_pid(host_ctxt);
|
||||
),
|
||||
HE_PRINTK("reason=%s vcpu=%d", __hyp_enter_exit_reason_str(__entry->reason), __entry->vcpu)
|
||||
);
|
||||
|
||||
HYP_EVENT(selftest,
|
||||
HE_PROTO(u64 id),
|
||||
HE_STRUCT(
|
||||
he_field(u64, id)
|
||||
),
|
||||
HE_ASSIGN(
|
||||
__entry->id = id;
|
||||
),
|
||||
RE_PRINTK("id=%llu", __entry->id)
|
||||
);
|
||||
#endif
|
||||
26
arch/arm64/include/asm/kvm_hyptrace.h
Normal file
26
arch/arm64/include/asm/kvm_hyptrace.h
Normal file
@@ -0,0 +1,26 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
#ifndef __ARM64_KVM_HYPTRACE_H_
|
||||
#define __ARM64_KVM_HYPTRACE_H_
|
||||
|
||||
#include <linux/ring_buffer.h>
|
||||
|
||||
struct hyp_trace_desc {
|
||||
unsigned long bpages_backing_start;
|
||||
size_t bpages_backing_size;
|
||||
struct trace_buffer_desc trace_buffer_desc;
|
||||
|
||||
};
|
||||
|
||||
struct hyp_event_id {
|
||||
unsigned short id;
|
||||
atomic_t enabled;
|
||||
};
|
||||
|
||||
extern struct remote_event __hyp_events_start[];
|
||||
extern struct remote_event __hyp_events_end[];
|
||||
|
||||
/* hyp_event section used by the hypervisor */
|
||||
extern struct hyp_event_id __hyp_event_ids_start[];
|
||||
extern struct hyp_event_id __hyp_event_ids_end[];
|
||||
|
||||
#endif
|
||||
@@ -393,8 +393,12 @@ static inline bool kvm_supports_cacheable_pfnmap(void)
|
||||
|
||||
#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
|
||||
void kvm_s2_ptdump_create_debugfs(struct kvm *kvm);
|
||||
void kvm_nested_s2_ptdump_create_debugfs(struct kvm_s2_mmu *mmu);
|
||||
void kvm_nested_s2_ptdump_remove_debugfs(struct kvm_s2_mmu *mmu);
|
||||
#else
|
||||
static inline void kvm_s2_ptdump_create_debugfs(struct kvm *kvm) {}
|
||||
static inline void kvm_nested_s2_ptdump_create_debugfs(struct kvm_s2_mmu *mmu) {}
|
||||
static inline void kvm_nested_s2_ptdump_remove_debugfs(struct kvm_s2_mmu *mmu) {}
|
||||
#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */
|
||||
|
||||
#endif /* __ASSEMBLER__ */
|
||||
|
||||
@@ -99,14 +99,30 @@ typedef u64 kvm_pte_t;
|
||||
KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
|
||||
KVM_PTE_LEAF_ATTR_HI_S2_XN)
|
||||
|
||||
#define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2)
|
||||
#define KVM_MAX_OWNER_ID 1
|
||||
/* pKVM invalid pte encodings */
|
||||
#define KVM_INVALID_PTE_TYPE_MASK GENMASK(63, 60)
|
||||
#define KVM_INVALID_PTE_ANNOT_MASK ~(KVM_PTE_VALID | \
|
||||
KVM_INVALID_PTE_TYPE_MASK)
|
||||
|
||||
/*
|
||||
* Used to indicate a pte for which a 'break-before-make' sequence is in
|
||||
* progress.
|
||||
*/
|
||||
#define KVM_INVALID_PTE_LOCKED BIT(10)
|
||||
enum kvm_invalid_pte_type {
|
||||
/*
|
||||
* Used to indicate a pte for which a 'break-before-make'
|
||||
* sequence is in progress.
|
||||
*/
|
||||
KVM_INVALID_PTE_TYPE_LOCKED = 1,
|
||||
|
||||
/*
|
||||
* pKVM has unmapped the page from the host due to a change of
|
||||
* ownership.
|
||||
*/
|
||||
KVM_HOST_INVALID_PTE_TYPE_DONATION,
|
||||
|
||||
/*
|
||||
* The page has been forcefully reclaimed from the guest by the
|
||||
* host.
|
||||
*/
|
||||
KVM_GUEST_INVALID_PTE_TYPE_POISONED,
|
||||
};
|
||||
|
||||
static inline bool kvm_pte_valid(kvm_pte_t pte)
|
||||
{
|
||||
@@ -658,14 +674,18 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
void *mc, enum kvm_pgtable_walk_flags flags);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_set_owner() - Unmap and annotate pages in the IPA space to
|
||||
* track ownership.
|
||||
* kvm_pgtable_stage2_annotate() - Unmap and annotate pages in the IPA space
|
||||
* to track ownership (and more).
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
|
||||
* @addr: Base intermediate physical address to annotate.
|
||||
* @size: Size of the annotated range.
|
||||
* @mc: Cache of pre-allocated and zeroed memory from which to allocate
|
||||
* page-table pages.
|
||||
* @owner_id: Unique identifier for the owner of the page.
|
||||
* @type: The type of the annotation, determining its meaning and format.
|
||||
* @annotation: A 59-bit value that will be stored in the page tables.
|
||||
* @annotation[0] and @annotation[63:60] must be 0.
|
||||
* @annotation[59:1] is stored in the page tables, along
|
||||
* with @type.
|
||||
*
|
||||
* By default, all page-tables are owned by identifier 0. This function can be
|
||||
* used to mark portions of the IPA space as owned by other entities. When a
|
||||
@@ -674,8 +694,9 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
*
|
||||
* Return: 0 on success, negative error code on failure.
|
||||
*/
|
||||
int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
void *mc, u8 owner_id);
|
||||
int kvm_pgtable_stage2_annotate(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
void *mc, enum kvm_invalid_pte_type type,
|
||||
kvm_pte_t annotation);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_unmap() - Remove a mapping from a guest stage-2 page-table.
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
|
||||
#define HYP_MEMBLOCK_REGIONS 128
|
||||
|
||||
int pkvm_init_host_vm(struct kvm *kvm);
|
||||
int pkvm_init_host_vm(struct kvm *kvm, unsigned long type);
|
||||
int pkvm_create_hyp_vm(struct kvm *kvm);
|
||||
bool pkvm_hyp_vm_is_created(struct kvm *kvm);
|
||||
void pkvm_destroy_hyp_vm(struct kvm *kvm);
|
||||
@@ -40,8 +40,6 @@ static inline bool kvm_pkvm_ext_allowed(struct kvm *kvm, long ext)
|
||||
case KVM_CAP_MAX_VCPU_ID:
|
||||
case KVM_CAP_MSI_DEVID:
|
||||
case KVM_CAP_ARM_VM_IPA_SIZE:
|
||||
case KVM_CAP_ARM_PMU_V3:
|
||||
case KVM_CAP_ARM_SVE:
|
||||
case KVM_CAP_ARM_PTRAUTH_ADDRESS:
|
||||
case KVM_CAP_ARM_PTRAUTH_GENERIC:
|
||||
return true;
|
||||
|
||||
@@ -1052,6 +1052,7 @@
|
||||
#define GICV5_OP_GIC_CDPRI sys_insn(1, 0, 12, 1, 2)
|
||||
#define GICV5_OP_GIC_CDRCFG sys_insn(1, 0, 12, 1, 5)
|
||||
#define GICV5_OP_GICR_CDIA sys_insn(1, 0, 12, 3, 0)
|
||||
#define GICV5_OP_GICR_CDNMIA sys_insn(1, 0, 12, 3, 1)
|
||||
|
||||
/* Definitions for GIC CDAFF */
|
||||
#define GICV5_GIC_CDAFF_IAFFID_MASK GENMASK_ULL(47, 32)
|
||||
@@ -1098,6 +1099,12 @@
|
||||
#define GICV5_GIC_CDIA_TYPE_MASK GENMASK_ULL(31, 29)
|
||||
#define GICV5_GIC_CDIA_ID_MASK GENMASK_ULL(23, 0)
|
||||
|
||||
/* Definitions for GICR CDNMIA */
|
||||
#define GICV5_GICR_CDNMIA_VALID_MASK BIT_ULL(32)
|
||||
#define GICV5_GICR_CDNMIA_VALID(r) FIELD_GET(GICV5_GICR_CDNMIA_VALID_MASK, r)
|
||||
#define GICV5_GICR_CDNMIA_TYPE_MASK GENMASK_ULL(31, 29)
|
||||
#define GICV5_GICR_CDNMIA_ID_MASK GENMASK_ULL(23, 0)
|
||||
|
||||
#define gicr_insn(insn) read_sysreg_s(GICV5_OP_GICR_##insn)
|
||||
#define gic_insn(v, insn) write_sysreg_s(v, GICV5_OP_GIC_##insn)
|
||||
|
||||
@@ -1114,11 +1121,9 @@
|
||||
.macro msr_hcr_el2, reg
|
||||
#if IS_ENABLED(CONFIG_AMPERE_ERRATUM_AC04_CPU_23)
|
||||
dsb nsh
|
||||
msr hcr_el2, \reg
|
||||
isb
|
||||
#else
|
||||
msr hcr_el2, \reg
|
||||
#endif
|
||||
msr hcr_el2, \reg
|
||||
isb // Required by AMPERE_ERRATUM_AC04_CPU_23
|
||||
.endm
|
||||
#else
|
||||
|
||||
|
||||
@@ -94,6 +94,15 @@ static inline bool is_pkvm_initialized(void)
|
||||
static_branch_likely(&kvm_protected_mode_initialized);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KVM
|
||||
bool pkvm_force_reclaim_guest_page(phys_addr_t phys);
|
||||
#else
|
||||
static inline bool pkvm_force_reclaim_guest_page(phys_addr_t phys)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Reports the availability of HYP mode */
|
||||
static inline bool is_hyp_mode_available(void)
|
||||
{
|
||||
|
||||
@@ -108,5 +108,8 @@
|
||||
#define VNCR_MPAMVPM5_EL2 0x968
|
||||
#define VNCR_MPAMVPM6_EL2 0x970
|
||||
#define VNCR_MPAMVPM7_EL2 0x978
|
||||
#define VNCR_ICH_HFGITR_EL2 0xB10
|
||||
#define VNCR_ICH_HFGRTR_EL2 0xB18
|
||||
#define VNCR_ICH_HFGWTR_EL2 0xB20
|
||||
|
||||
#endif /* __ARM64_VNCR_MAPPING_H__ */
|
||||
|
||||
@@ -428,6 +428,7 @@ enum {
|
||||
#define KVM_DEV_ARM_ITS_RESTORE_TABLES 2
|
||||
#define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3
|
||||
#define KVM_DEV_ARM_ITS_CTRL_RESET 4
|
||||
#define KVM_DEV_ARM_VGIC_USERSPACE_PPIS 5
|
||||
|
||||
/* Device Control API on vcpu fd */
|
||||
#define KVM_ARM_VCPU_PMU_V3_CTRL 0
|
||||
|
||||
@@ -325,6 +325,7 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
|
||||
|
||||
static const struct arm64_ftr_bits ftr_id_aa64pfr2[] = {
|
||||
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR2_EL1_FPMR_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR2_EL1_GCIE_SHIFT, 4, ID_AA64PFR2_EL1_GCIE_NI),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR2_EL1_MTEFAR_SHIFT, 4, ID_AA64PFR2_EL1_MTEFAR_NI),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR2_EL1_MTESTOREONLY_SHIFT, 4, ID_AA64PFR2_EL1_MTESTOREONLY_NI),
|
||||
ARM64_FTR_END,
|
||||
|
||||
@@ -103,7 +103,6 @@ SYM_CODE_START_LOCAL(__finalise_el2)
|
||||
// Engage the VHE magic!
|
||||
mov_q x0, HCR_HOST_VHE_FLAGS
|
||||
msr_hcr_el2 x0
|
||||
isb
|
||||
|
||||
// Use the EL1 allocated stack, per-cpu offset
|
||||
mrs x0, sp_el1
|
||||
|
||||
@@ -138,6 +138,10 @@ KVM_NVHE_ALIAS(__hyp_data_start);
|
||||
KVM_NVHE_ALIAS(__hyp_data_end);
|
||||
KVM_NVHE_ALIAS(__hyp_rodata_start);
|
||||
KVM_NVHE_ALIAS(__hyp_rodata_end);
|
||||
#ifdef CONFIG_NVHE_EL2_TRACING
|
||||
KVM_NVHE_ALIAS(__hyp_event_ids_start);
|
||||
KVM_NVHE_ALIAS(__hyp_event_ids_end);
|
||||
#endif
|
||||
|
||||
/* pKVM static key */
|
||||
KVM_NVHE_ALIAS(kvm_protected_mode_initialized);
|
||||
|
||||
@@ -13,12 +13,23 @@
|
||||
*(__kvm_ex_table) \
|
||||
__stop___kvm_ex_table = .;
|
||||
|
||||
#ifdef CONFIG_NVHE_EL2_TRACING
|
||||
#define HYPERVISOR_EVENT_IDS \
|
||||
. = ALIGN(PAGE_SIZE); \
|
||||
__hyp_event_ids_start = .; \
|
||||
*(HYP_SECTION_NAME(.event_ids)) \
|
||||
__hyp_event_ids_end = .;
|
||||
#else
|
||||
#define HYPERVISOR_EVENT_IDS
|
||||
#endif
|
||||
|
||||
#define HYPERVISOR_RODATA_SECTIONS \
|
||||
HYP_SECTION_NAME(.rodata) : { \
|
||||
. = ALIGN(PAGE_SIZE); \
|
||||
__hyp_rodata_start = .; \
|
||||
*(HYP_SECTION_NAME(.data..ro_after_init)) \
|
||||
*(HYP_SECTION_NAME(.rodata)) \
|
||||
HYPERVISOR_EVENT_IDS \
|
||||
. = ALIGN(PAGE_SIZE); \
|
||||
__hyp_rodata_end = .; \
|
||||
}
|
||||
@@ -308,6 +319,13 @@ SECTIONS
|
||||
|
||||
HYPERVISOR_DATA_SECTION
|
||||
|
||||
#ifdef CONFIG_NVHE_EL2_TRACING
|
||||
.data.hyp_events : {
|
||||
__hyp_events_start = .;
|
||||
*(SORT(_hyp_events.*))
|
||||
__hyp_events_end = .;
|
||||
}
|
||||
#endif
|
||||
/*
|
||||
* Data written with the MMU off but read with the MMU on requires
|
||||
* cache lines to be invalidated, discarding up to a Cache Writeback
|
||||
|
||||
@@ -42,32 +42,10 @@ menuconfig KVM
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config NVHE_EL2_DEBUG
|
||||
bool "Debug mode for non-VHE EL2 object"
|
||||
depends on KVM
|
||||
help
|
||||
Say Y here to enable the debug mode for the non-VHE KVM EL2 object.
|
||||
Failure reports will BUG() in the hypervisor. This is intended for
|
||||
local EL2 hypervisor development.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config PROTECTED_NVHE_STACKTRACE
|
||||
bool "Protected KVM hypervisor stacktraces"
|
||||
depends on NVHE_EL2_DEBUG
|
||||
default n
|
||||
help
|
||||
Say Y here to enable pKVM hypervisor stacktraces on hyp_panic()
|
||||
|
||||
If using protected nVHE mode, but cannot afford the associated
|
||||
memory cost (less than 0.75 page per CPU) of pKVM stacktraces,
|
||||
say N.
|
||||
|
||||
If unsure, or not using protected nVHE (pKVM), say N.
|
||||
if KVM
|
||||
|
||||
config PTDUMP_STAGE2_DEBUGFS
|
||||
bool "Present the stage-2 pagetables to debugfs"
|
||||
depends on KVM
|
||||
depends on DEBUG_KERNEL
|
||||
depends on DEBUG_FS
|
||||
depends on ARCH_HAS_PTDUMP
|
||||
@@ -82,4 +60,48 @@ config PTDUMP_STAGE2_DEBUGFS
|
||||
|
||||
If in doubt, say N.
|
||||
|
||||
config NVHE_EL2_DEBUG
|
||||
bool "Debug mode for non-VHE EL2 object"
|
||||
default n
|
||||
help
|
||||
Say Y here to enable the debug mode for the non-VHE KVM EL2 object.
|
||||
Failure reports will BUG() in the hypervisor. This is intended for
|
||||
local EL2 hypervisor development.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
if NVHE_EL2_DEBUG
|
||||
|
||||
config NVHE_EL2_TRACING
|
||||
bool
|
||||
depends on TRACING && FTRACE
|
||||
select TRACE_REMOTE
|
||||
default y
|
||||
|
||||
config PKVM_DISABLE_STAGE2_ON_PANIC
|
||||
bool "Disable the host stage-2 on panic"
|
||||
default n
|
||||
help
|
||||
Relax the host stage-2 on hypervisor panic to allow the kernel to
|
||||
unwind and symbolize the hypervisor stacktrace. This however tampers
|
||||
the system security. This is intended for local EL2 hypervisor
|
||||
development.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config PKVM_STACKTRACE
|
||||
bool "Protected KVM hypervisor stacktraces"
|
||||
depends on PKVM_DISABLE_STAGE2_ON_PANIC
|
||||
default y
|
||||
help
|
||||
Say Y here to enable pKVM hypervisor stacktraces on hyp_panic()
|
||||
|
||||
If using protected nVHE mode, but cannot afford the associated
|
||||
memory cost (less than 0.75 page per CPU) of pKVM stacktraces,
|
||||
say N.
|
||||
|
||||
If unsure, or not using protected nVHE (pKVM), say N.
|
||||
|
||||
endif # NVHE_EL2_DEBUG
|
||||
endif # KVM
|
||||
endif # VIRTUALIZATION
|
||||
|
||||
@@ -30,6 +30,8 @@ kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o
|
||||
kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o
|
||||
kvm-$(CONFIG_PTDUMP_STAGE2_DEBUGFS) += ptdump.o
|
||||
|
||||
kvm-$(CONFIG_NVHE_EL2_TRACING) += hyp_trace.o
|
||||
|
||||
always-y := hyp_constants.h hyp-constants.s
|
||||
|
||||
define rule_gen_hyp_constants
|
||||
|
||||
@@ -56,6 +56,12 @@ static struct irq_ops arch_timer_irq_ops = {
|
||||
.get_input_level = kvm_arch_timer_get_input_level,
|
||||
};
|
||||
|
||||
static struct irq_ops arch_timer_irq_ops_vgic_v5 = {
|
||||
.get_input_level = kvm_arch_timer_get_input_level,
|
||||
.queue_irq_unlock = vgic_v5_ppi_queue_irq_unlock,
|
||||
.set_direct_injection = vgic_v5_set_ppi_dvi,
|
||||
};
|
||||
|
||||
static int nr_timers(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!vcpu_has_nv(vcpu))
|
||||
@@ -447,6 +453,17 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
|
||||
if (userspace_irqchip(vcpu->kvm))
|
||||
return;
|
||||
|
||||
/* Skip injecting on GICv5 for directly injected (DVI'd) timers */
|
||||
if (vgic_is_v5(vcpu->kvm)) {
|
||||
struct timer_map map;
|
||||
|
||||
get_timer_map(vcpu, &map);
|
||||
|
||||
if (map.direct_ptimer == timer_ctx ||
|
||||
map.direct_vtimer == timer_ctx)
|
||||
return;
|
||||
}
|
||||
|
||||
kvm_vgic_inject_irq(vcpu->kvm, vcpu,
|
||||
timer_irq(timer_ctx),
|
||||
timer_ctx->irq.level,
|
||||
@@ -674,6 +691,7 @@ static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx)
|
||||
phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx));
|
||||
|
||||
phys_active |= ctx->irq.level;
|
||||
phys_active |= vgic_is_v5(vcpu->kvm);
|
||||
|
||||
set_timer_irq_phys_active(ctx, phys_active);
|
||||
}
|
||||
@@ -740,13 +758,11 @@ static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu,
|
||||
|
||||
ret = kvm_vgic_map_phys_irq(vcpu,
|
||||
map->direct_vtimer->host_timer_irq,
|
||||
timer_irq(map->direct_vtimer),
|
||||
&arch_timer_irq_ops);
|
||||
timer_irq(map->direct_vtimer));
|
||||
WARN_ON_ONCE(ret);
|
||||
ret = kvm_vgic_map_phys_irq(vcpu,
|
||||
map->direct_ptimer->host_timer_irq,
|
||||
timer_irq(map->direct_ptimer),
|
||||
&arch_timer_irq_ops);
|
||||
timer_irq(map->direct_ptimer));
|
||||
WARN_ON_ONCE(ret);
|
||||
}
|
||||
}
|
||||
@@ -864,7 +880,8 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
|
||||
get_timer_map(vcpu, &map);
|
||||
|
||||
if (static_branch_likely(&has_gic_active_state)) {
|
||||
if (vcpu_has_nv(vcpu))
|
||||
/* We don't do NV on GICv5, yet */
|
||||
if (vcpu_has_nv(vcpu) && !vgic_is_v5(vcpu->kvm))
|
||||
kvm_timer_vcpu_load_nested_switch(vcpu, &map);
|
||||
|
||||
kvm_timer_vcpu_load_gic(map.direct_vtimer);
|
||||
@@ -934,6 +951,12 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
|
||||
if (kvm_vcpu_is_blocking(vcpu))
|
||||
kvm_timer_blocking(vcpu);
|
||||
|
||||
if (vgic_is_v5(vcpu->kvm)) {
|
||||
set_timer_irq_phys_active(map.direct_vtimer, false);
|
||||
if (map.direct_ptimer)
|
||||
set_timer_irq_phys_active(map.direct_ptimer, false);
|
||||
}
|
||||
}
|
||||
|
||||
void kvm_timer_sync_nested(struct kvm_vcpu *vcpu)
|
||||
@@ -1097,10 +1120,19 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
HRTIMER_MODE_ABS_HARD);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is always called during kvm_arch_init_vm, but will also be
|
||||
* called from kvm_vgic_create if we have a vGICv5.
|
||||
*/
|
||||
void kvm_timer_init_vm(struct kvm *kvm)
|
||||
{
|
||||
/*
|
||||
* Set up the default PPIs - note that we adjust them based on
|
||||
* the model of the GIC as GICv5 uses a different way to
|
||||
* describing interrupts.
|
||||
*/
|
||||
for (int i = 0; i < NR_KVM_TIMERS; i++)
|
||||
kvm->arch.timer_data.ppi[i] = default_ppi[i];
|
||||
kvm->arch.timer_data.ppi[i] = get_vgic_ppi(kvm, default_ppi[i]);
|
||||
}
|
||||
|
||||
void kvm_timer_cpu_up(void)
|
||||
@@ -1269,7 +1301,15 @@ static int timer_irq_set_irqchip_state(struct irq_data *d,
|
||||
|
||||
static void timer_irq_eoi(struct irq_data *d)
|
||||
{
|
||||
if (!irqd_is_forwarded_to_vcpu(d))
|
||||
/*
|
||||
* On a GICv5 host, we still need to call EOI on the parent for
|
||||
* PPIs. The host driver already handles irqs which are forwarded to
|
||||
* vcpus, and skips the GIC CDDI while still doing the GIC CDEOI. This
|
||||
* is required to emulate the EOIMode=1 on GICv5 hardware. Failure to
|
||||
* call EOI unsurprisingly results in *BAD* lock-ups.
|
||||
*/
|
||||
if (!irqd_is_forwarded_to_vcpu(d) ||
|
||||
kvm_vgic_global_state.type == VGIC_V5)
|
||||
irq_chip_eoi_parent(d);
|
||||
}
|
||||
|
||||
@@ -1333,7 +1373,8 @@ static int kvm_irq_init(struct arch_timer_kvm_info *info)
|
||||
host_vtimer_irq = info->virtual_irq;
|
||||
kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags);
|
||||
|
||||
if (kvm_vgic_global_state.no_hw_deactivation) {
|
||||
if (kvm_vgic_global_state.no_hw_deactivation ||
|
||||
kvm_vgic_global_state.type == VGIC_V5) {
|
||||
struct fwnode_handle *fwnode;
|
||||
struct irq_data *data;
|
||||
|
||||
@@ -1351,7 +1392,8 @@ static int kvm_irq_init(struct arch_timer_kvm_info *info)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE;
|
||||
if (kvm_vgic_global_state.no_hw_deactivation)
|
||||
arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE;
|
||||
WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq,
|
||||
(void *)TIMER_VTIMER));
|
||||
}
|
||||
@@ -1501,11 +1543,18 @@ static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
|
||||
if (kvm_vgic_set_owner(vcpu, irq, ctx))
|
||||
break;
|
||||
|
||||
/* With GICv5, the default PPI is what you get -- nothing else */
|
||||
if (vgic_is_v5(vcpu->kvm) && irq != get_vgic_ppi(vcpu->kvm, default_ppi[i]))
|
||||
break;
|
||||
|
||||
/*
|
||||
* We know by construction that we only have PPIs, so
|
||||
* all values are less than 32.
|
||||
* We know by construction that we only have PPIs, so all values
|
||||
* are less than 32 for non-GICv5 VGICs. On GICv5, they are
|
||||
* architecturally defined to be under 32 too. However, we mask
|
||||
* off most of the bits as we might be presented with a GICv5
|
||||
* style PPI where the type is encoded in the top-bits.
|
||||
*/
|
||||
ppis |= BIT(irq);
|
||||
ppis |= BIT(irq & 0x1f);
|
||||
}
|
||||
|
||||
valid = hweight32(ppis) == nr_timers(vcpu);
|
||||
@@ -1543,6 +1592,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
|
||||
struct timer_map map;
|
||||
struct irq_ops *ops;
|
||||
int ret;
|
||||
|
||||
if (timer->enabled)
|
||||
@@ -1563,20 +1613,22 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
|
||||
|
||||
get_timer_map(vcpu, &map);
|
||||
|
||||
ops = vgic_is_v5(vcpu->kvm) ? &arch_timer_irq_ops_vgic_v5 :
|
||||
&arch_timer_irq_ops;
|
||||
|
||||
for (int i = 0; i < nr_timers(vcpu); i++)
|
||||
kvm_vgic_set_irq_ops(vcpu, timer_irq(vcpu_get_timer(vcpu, i)), ops);
|
||||
|
||||
ret = kvm_vgic_map_phys_irq(vcpu,
|
||||
map.direct_vtimer->host_timer_irq,
|
||||
timer_irq(map.direct_vtimer),
|
||||
&arch_timer_irq_ops);
|
||||
timer_irq(map.direct_vtimer));
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (map.direct_ptimer) {
|
||||
if (map.direct_ptimer)
|
||||
ret = kvm_vgic_map_phys_irq(vcpu,
|
||||
map.direct_ptimer->host_timer_irq,
|
||||
timer_irq(map.direct_ptimer),
|
||||
&arch_timer_irq_ops);
|
||||
}
|
||||
|
||||
timer_irq(map.direct_ptimer));
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@@ -1603,15 +1655,14 @@ int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
|
||||
if (get_user(irq, uaddr))
|
||||
return -EFAULT;
|
||||
|
||||
if (!(irq_is_ppi(irq)))
|
||||
if (!(irq_is_ppi(vcpu->kvm, irq)))
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&vcpu->kvm->arch.config_lock);
|
||||
guard(mutex)(&vcpu->kvm->arch.config_lock);
|
||||
|
||||
if (test_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE,
|
||||
&vcpu->kvm->arch.flags)) {
|
||||
ret = -EBUSY;
|
||||
goto out;
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
switch (attr->attr) {
|
||||
@@ -1628,8 +1679,7 @@ int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
|
||||
idx = TIMER_HPTIMER;
|
||||
break;
|
||||
default:
|
||||
ret = -ENXIO;
|
||||
goto out;
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1639,8 +1689,6 @@ int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
|
||||
*/
|
||||
vcpu->kvm->arch.timer_data.ppi[idx] = irq;
|
||||
|
||||
out:
|
||||
mutex_unlock(&vcpu->kvm->arch.config_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include "trace_arm.h"
|
||||
#include "hyp_trace.h"
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
#include <asm/ptrace.h>
|
||||
@@ -35,6 +36,7 @@
|
||||
#include <asm/kvm_arm.h>
|
||||
#include <asm/kvm_asm.h>
|
||||
#include <asm/kvm_emulate.h>
|
||||
#include <asm/kvm_hyp.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
#include <asm/kvm_nested.h>
|
||||
#include <asm/kvm_pkvm.h>
|
||||
@@ -45,6 +47,9 @@
|
||||
#include <kvm/arm_hypercalls.h>
|
||||
#include <kvm/arm_pmu.h>
|
||||
#include <kvm/arm_psci.h>
|
||||
#include <kvm/arm_vgic.h>
|
||||
|
||||
#include <linux/irqchip/arm-gic-v5.h>
|
||||
|
||||
#include "sys_regs.h"
|
||||
|
||||
@@ -203,6 +208,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (type & ~KVM_VM_TYPE_ARM_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_init(&kvm->arch.config_lock);
|
||||
|
||||
#ifdef CONFIG_LOCKDEP
|
||||
@@ -234,9 +242,12 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
* If any failures occur after this is successful, make sure to
|
||||
* call __pkvm_unreserve_vm to unreserve the VM in hyp.
|
||||
*/
|
||||
ret = pkvm_init_host_vm(kvm);
|
||||
ret = pkvm_init_host_vm(kvm, type);
|
||||
if (ret)
|
||||
goto err_free_cpumask;
|
||||
goto err_uninit_mmu;
|
||||
} else if (type & KVM_VM_TYPE_ARM_PROTECTED) {
|
||||
ret = -EINVAL;
|
||||
goto err_uninit_mmu;
|
||||
}
|
||||
|
||||
kvm_vgic_early_init(kvm);
|
||||
@@ -252,6 +263,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
|
||||
return 0;
|
||||
|
||||
err_uninit_mmu:
|
||||
kvm_uninit_stage2_mmu(kvm);
|
||||
err_free_cpumask:
|
||||
free_cpumask_var(kvm->arch.supported_cpus);
|
||||
err_unshare_kvm:
|
||||
@@ -301,6 +314,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
|
||||
if (is_protected_kvm_enabled())
|
||||
pkvm_destroy_hyp_vm(kvm);
|
||||
|
||||
kvm_uninit_stage2_mmu(kvm);
|
||||
kvm_destroy_mpidr_data(kvm);
|
||||
|
||||
kfree(kvm->arch.sysreg_masks);
|
||||
@@ -613,6 +627,9 @@ static bool kvm_vcpu_should_clear_twi(struct kvm_vcpu *vcpu)
|
||||
if (unlikely(kvm_wfi_trap_policy != KVM_WFX_NOTRAP_SINGLE_TASK))
|
||||
return kvm_wfi_trap_policy == KVM_WFX_NOTRAP;
|
||||
|
||||
if (vgic_is_v5(vcpu->kvm))
|
||||
return single_task_running();
|
||||
|
||||
return single_task_running() &&
|
||||
vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 &&
|
||||
(atomic_read(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count) ||
|
||||
@@ -705,6 +722,8 @@ nommu:
|
||||
|
||||
if (!cpumask_test_cpu(cpu, vcpu->kvm->arch.supported_cpus))
|
||||
vcpu_set_on_unsupported_cpu(vcpu);
|
||||
|
||||
vcpu->arch.pid = pid_nr(vcpu->pid);
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
@@ -934,6 +953,10 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = vgic_v5_finalize_ppi_state(kvm);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (is_protected_kvm_enabled()) {
|
||||
ret = pkvm_create_hyp_vm(kvm);
|
||||
if (ret)
|
||||
@@ -1439,10 +1462,11 @@ static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level)
|
||||
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
|
||||
bool line_status)
|
||||
{
|
||||
u32 irq = irq_level->irq;
|
||||
unsigned int irq_type, vcpu_id, irq_num;
|
||||
struct kvm_vcpu *vcpu = NULL;
|
||||
bool level = irq_level->level;
|
||||
u32 irq = irq_level->irq;
|
||||
unsigned long *mask;
|
||||
|
||||
irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK;
|
||||
vcpu_id = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK;
|
||||
@@ -1472,16 +1496,37 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
|
||||
if (!vcpu)
|
||||
return -EINVAL;
|
||||
|
||||
if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS)
|
||||
if (vgic_is_v5(kvm)) {
|
||||
if (irq_num >= VGIC_V5_NR_PRIVATE_IRQS)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Only allow PPIs that are explicitly exposed to
|
||||
* usespace to be driven via KVM_IRQ_LINE
|
||||
*/
|
||||
mask = kvm->arch.vgic.gicv5_vm.userspace_ppis;
|
||||
if (!test_bit(irq_num, mask))
|
||||
return -EINVAL;
|
||||
|
||||
/* Build a GICv5-style IntID here */
|
||||
irq_num = vgic_v5_make_ppi(irq_num);
|
||||
} else if (irq_num < VGIC_NR_SGIS ||
|
||||
irq_num >= VGIC_NR_PRIVATE_IRQS) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return kvm_vgic_inject_irq(kvm, vcpu, irq_num, level, NULL);
|
||||
case KVM_ARM_IRQ_TYPE_SPI:
|
||||
if (!irqchip_in_kernel(kvm))
|
||||
return -ENXIO;
|
||||
|
||||
if (irq_num < VGIC_NR_PRIVATE_IRQS)
|
||||
return -EINVAL;
|
||||
if (vgic_is_v5(kvm)) {
|
||||
/* Build a GICv5-style IntID here */
|
||||
irq_num = vgic_v5_make_spi(irq_num);
|
||||
} else {
|
||||
if (irq_num < VGIC_NR_PRIVATE_IRQS)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return kvm_vgic_inject_irq(kvm, NULL, irq_num, level, NULL);
|
||||
}
|
||||
@@ -2414,6 +2459,10 @@ static int __init init_subsystems(void)
|
||||
|
||||
kvm_register_perf_callbacks();
|
||||
|
||||
err = kvm_hyp_trace_init();
|
||||
if (err)
|
||||
kvm_err("Failed to initialize Hyp tracing\n");
|
||||
|
||||
out:
|
||||
if (err)
|
||||
hyp_cpu_pm_exit();
|
||||
@@ -2465,7 +2514,7 @@ static int __init do_pkvm_init(u32 hyp_va_bits)
|
||||
preempt_disable();
|
||||
cpu_hyp_init_context();
|
||||
ret = kvm_call_hyp_nvhe(__pkvm_init, hyp_mem_base, hyp_mem_size,
|
||||
num_possible_cpus(), kern_hyp_va(per_cpu_base),
|
||||
kern_hyp_va(per_cpu_base),
|
||||
hyp_va_bits);
|
||||
cpu_hyp_init_features();
|
||||
|
||||
@@ -2507,6 +2556,7 @@ static void kvm_hyp_init_symbols(void)
|
||||
{
|
||||
kvm_nvhe_sym(id_aa64pfr0_el1_sys_val) = get_hyp_id_aa64pfr0_el1();
|
||||
kvm_nvhe_sym(id_aa64pfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
|
||||
kvm_nvhe_sym(id_aa64pfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR2_EL1);
|
||||
kvm_nvhe_sym(id_aa64isar0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR0_EL1);
|
||||
kvm_nvhe_sym(id_aa64isar1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR1_EL1);
|
||||
kvm_nvhe_sym(id_aa64isar2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1);
|
||||
@@ -2529,6 +2579,9 @@ static void kvm_hyp_init_symbols(void)
|
||||
kvm_nvhe_sym(hfgitr2_masks) = hfgitr2_masks;
|
||||
kvm_nvhe_sym(hdfgrtr2_masks)= hdfgrtr2_masks;
|
||||
kvm_nvhe_sym(hdfgwtr2_masks)= hdfgwtr2_masks;
|
||||
kvm_nvhe_sym(ich_hfgrtr_masks) = ich_hfgrtr_masks;
|
||||
kvm_nvhe_sym(ich_hfgwtr_masks) = ich_hfgwtr_masks;
|
||||
kvm_nvhe_sym(ich_hfgitr_masks) = ich_hfgitr_masks;
|
||||
|
||||
/*
|
||||
* Flush entire BSS since part of its data containing init symbols is read
|
||||
@@ -2674,6 +2727,8 @@ static int __init init_hyp_mode(void)
|
||||
kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu] = (unsigned long)page_addr;
|
||||
}
|
||||
|
||||
kvm_nvhe_sym(hyp_nr_cpus) = num_possible_cpus();
|
||||
|
||||
/*
|
||||
* Map the Hyp-code called directly from the host
|
||||
*/
|
||||
|
||||
@@ -225,6 +225,7 @@ struct reg_feat_map_desc {
|
||||
#define FEAT_MTPMU ID_AA64DFR0_EL1, MTPMU, IMP
|
||||
#define FEAT_HCX ID_AA64MMFR1_EL1, HCX, IMP
|
||||
#define FEAT_S2PIE ID_AA64MMFR3_EL1, S2PIE, IMP
|
||||
#define FEAT_GCIE ID_AA64PFR2_EL1, GCIE, IMP
|
||||
|
||||
static bool not_feat_aa64el3(struct kvm *kvm)
|
||||
{
|
||||
@@ -1277,6 +1278,58 @@ static const struct reg_bits_to_feat_map vtcr_el2_feat_map[] = {
|
||||
static const DECLARE_FEAT_MAP(vtcr_el2_desc, VTCR_EL2,
|
||||
vtcr_el2_feat_map, FEAT_AA64EL2);
|
||||
|
||||
static const struct reg_bits_to_feat_map ich_hfgrtr_feat_map[] = {
|
||||
NEEDS_FEAT(ICH_HFGRTR_EL2_ICC_APR_EL1 |
|
||||
ICH_HFGRTR_EL2_ICC_IDRn_EL1 |
|
||||
ICH_HFGRTR_EL2_ICC_CR0_EL1 |
|
||||
ICH_HFGRTR_EL2_ICC_HPPIR_EL1 |
|
||||
ICH_HFGRTR_EL2_ICC_PCR_EL1 |
|
||||
ICH_HFGRTR_EL2_ICC_ICSR_EL1 |
|
||||
ICH_HFGRTR_EL2_ICC_IAFFIDR_EL1 |
|
||||
ICH_HFGRTR_EL2_ICC_PPI_HMRn_EL1 |
|
||||
ICH_HFGRTR_EL2_ICC_PPI_ENABLERn_EL1 |
|
||||
ICH_HFGRTR_EL2_ICC_PPI_PENDRn_EL1 |
|
||||
ICH_HFGRTR_EL2_ICC_PPI_PRIORITYRn_EL1 |
|
||||
ICH_HFGRTR_EL2_ICC_PPI_ACTIVERn_EL1,
|
||||
FEAT_GCIE),
|
||||
};
|
||||
|
||||
static const DECLARE_FEAT_MAP_FGT(ich_hfgrtr_desc, ich_hfgrtr_masks,
|
||||
ich_hfgrtr_feat_map, FEAT_GCIE);
|
||||
|
||||
static const struct reg_bits_to_feat_map ich_hfgwtr_feat_map[] = {
|
||||
NEEDS_FEAT(ICH_HFGWTR_EL2_ICC_APR_EL1 |
|
||||
ICH_HFGWTR_EL2_ICC_CR0_EL1 |
|
||||
ICH_HFGWTR_EL2_ICC_PCR_EL1 |
|
||||
ICH_HFGWTR_EL2_ICC_ICSR_EL1 |
|
||||
ICH_HFGWTR_EL2_ICC_PPI_ENABLERn_EL1 |
|
||||
ICH_HFGWTR_EL2_ICC_PPI_PENDRn_EL1 |
|
||||
ICH_HFGWTR_EL2_ICC_PPI_PRIORITYRn_EL1 |
|
||||
ICH_HFGWTR_EL2_ICC_PPI_ACTIVERn_EL1,
|
||||
FEAT_GCIE),
|
||||
};
|
||||
|
||||
static const DECLARE_FEAT_MAP_FGT(ich_hfgwtr_desc, ich_hfgwtr_masks,
|
||||
ich_hfgwtr_feat_map, FEAT_GCIE);
|
||||
|
||||
static const struct reg_bits_to_feat_map ich_hfgitr_feat_map[] = {
|
||||
NEEDS_FEAT(ICH_HFGITR_EL2_GICCDEN |
|
||||
ICH_HFGITR_EL2_GICCDDIS |
|
||||
ICH_HFGITR_EL2_GICCDPRI |
|
||||
ICH_HFGITR_EL2_GICCDAFF |
|
||||
ICH_HFGITR_EL2_GICCDPEND |
|
||||
ICH_HFGITR_EL2_GICCDRCFG |
|
||||
ICH_HFGITR_EL2_GICCDHM |
|
||||
ICH_HFGITR_EL2_GICCDEOI |
|
||||
ICH_HFGITR_EL2_GICCDDI |
|
||||
ICH_HFGITR_EL2_GICRCDIA |
|
||||
ICH_HFGITR_EL2_GICRCDNMIA,
|
||||
FEAT_GCIE),
|
||||
};
|
||||
|
||||
static const DECLARE_FEAT_MAP_FGT(ich_hfgitr_desc, ich_hfgitr_masks,
|
||||
ich_hfgitr_feat_map, FEAT_GCIE);
|
||||
|
||||
static void __init check_feat_map(const struct reg_bits_to_feat_map *map,
|
||||
int map_size, u64 resx, const char *str)
|
||||
{
|
||||
@@ -1328,6 +1381,9 @@ void __init check_feature_map(void)
|
||||
check_reg_desc(&sctlr_el2_desc);
|
||||
check_reg_desc(&mdcr_el2_desc);
|
||||
check_reg_desc(&vtcr_el2_desc);
|
||||
check_reg_desc(&ich_hfgrtr_desc);
|
||||
check_reg_desc(&ich_hfgwtr_desc);
|
||||
check_reg_desc(&ich_hfgitr_desc);
|
||||
}
|
||||
|
||||
static bool idreg_feat_match(struct kvm *kvm, const struct reg_bits_to_feat_map *map)
|
||||
@@ -1460,6 +1516,13 @@ void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt)
|
||||
val |= compute_fgu_bits(kvm, &hdfgrtr2_desc);
|
||||
val |= compute_fgu_bits(kvm, &hdfgwtr2_desc);
|
||||
break;
|
||||
case ICH_HFGRTR_GROUP:
|
||||
val |= compute_fgu_bits(kvm, &ich_hfgrtr_desc);
|
||||
val |= compute_fgu_bits(kvm, &ich_hfgwtr_desc);
|
||||
break;
|
||||
case ICH_HFGITR_GROUP:
|
||||
val |= compute_fgu_bits(kvm, &ich_hfgitr_desc);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
@@ -1531,6 +1594,15 @@ struct resx get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg)
|
||||
case VTCR_EL2:
|
||||
resx = compute_reg_resx_bits(kvm, &vtcr_el2_desc, 0, 0);
|
||||
break;
|
||||
case ICH_HFGRTR_EL2:
|
||||
resx = compute_reg_resx_bits(kvm, &ich_hfgrtr_desc, 0, 0);
|
||||
break;
|
||||
case ICH_HFGWTR_EL2:
|
||||
resx = compute_reg_resx_bits(kvm, &ich_hfgwtr_desc, 0, 0);
|
||||
break;
|
||||
case ICH_HFGITR_EL2:
|
||||
resx = compute_reg_resx_bits(kvm, &ich_hfgitr_desc, 0, 0);
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
resx = (typeof(resx)){};
|
||||
@@ -1565,6 +1637,12 @@ static __always_inline struct fgt_masks *__fgt_reg_to_masks(enum vcpu_sysreg reg
|
||||
return &hdfgrtr2_masks;
|
||||
case HDFGWTR2_EL2:
|
||||
return &hdfgwtr2_masks;
|
||||
case ICH_HFGRTR_EL2:
|
||||
return &ich_hfgrtr_masks;
|
||||
case ICH_HFGWTR_EL2:
|
||||
return &ich_hfgwtr_masks;
|
||||
case ICH_HFGITR_EL2:
|
||||
return &ich_hfgitr_masks;
|
||||
default:
|
||||
BUILD_BUG_ON(1);
|
||||
}
|
||||
@@ -1585,8 +1663,8 @@ static __always_inline void __compute_fgt(struct kvm_vcpu *vcpu, enum vcpu_sysre
|
||||
clear |= ~nested & m->nmask;
|
||||
}
|
||||
|
||||
val |= set;
|
||||
val &= ~clear;
|
||||
val |= set | m->res1;
|
||||
val &= ~(clear | m->res0);
|
||||
*vcpu_fgt(vcpu, reg) = val;
|
||||
}
|
||||
|
||||
@@ -1606,6 +1684,32 @@ static void __compute_hdfgwtr(struct kvm_vcpu *vcpu)
|
||||
*vcpu_fgt(vcpu, HDFGWTR_EL2) |= HDFGWTR_EL2_MDSCR_EL1;
|
||||
}
|
||||
|
||||
static void __compute_ich_hfgrtr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
__compute_fgt(vcpu, ICH_HFGRTR_EL2);
|
||||
|
||||
/*
|
||||
* ICC_IAFFIDR_EL1 *always* needs to be trapped when running a guest.
|
||||
*
|
||||
* We also trap accesses to ICC_IDR0_EL1 to allow us to completely hide
|
||||
* FEAT_GCIE_LEGACY from the guest, and to (potentially) present fewer
|
||||
* ID bits than the host supports.
|
||||
*/
|
||||
*vcpu_fgt(vcpu, ICH_HFGRTR_EL2) &= ~(ICH_HFGRTR_EL2_ICC_IAFFIDR_EL1 |
|
||||
ICH_HFGRTR_EL2_ICC_IDRn_EL1);
|
||||
}
|
||||
|
||||
static void __compute_ich_hfgwtr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
__compute_fgt(vcpu, ICH_HFGWTR_EL2);
|
||||
|
||||
/*
|
||||
* We present a different subset of PPIs the guest from what
|
||||
* exist in real hardware. We only trap writes, not reads.
|
||||
*/
|
||||
*vcpu_fgt(vcpu, ICH_HFGWTR_EL2) &= ~(ICH_HFGWTR_EL2_ICC_PPI_ENABLERn_EL1);
|
||||
}
|
||||
|
||||
void kvm_vcpu_load_fgt(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!cpus_have_final_cap(ARM64_HAS_FGT))
|
||||
@@ -1618,12 +1722,17 @@ void kvm_vcpu_load_fgt(struct kvm_vcpu *vcpu)
|
||||
__compute_hdfgwtr(vcpu);
|
||||
__compute_fgt(vcpu, HAFGRTR_EL2);
|
||||
|
||||
if (!cpus_have_final_cap(ARM64_HAS_FGT2))
|
||||
return;
|
||||
if (cpus_have_final_cap(ARM64_HAS_FGT2)) {
|
||||
__compute_fgt(vcpu, HFGRTR2_EL2);
|
||||
__compute_fgt(vcpu, HFGWTR2_EL2);
|
||||
__compute_fgt(vcpu, HFGITR2_EL2);
|
||||
__compute_fgt(vcpu, HDFGRTR2_EL2);
|
||||
__compute_fgt(vcpu, HDFGWTR2_EL2);
|
||||
}
|
||||
|
||||
__compute_fgt(vcpu, HFGRTR2_EL2);
|
||||
__compute_fgt(vcpu, HFGWTR2_EL2);
|
||||
__compute_fgt(vcpu, HFGITR2_EL2);
|
||||
__compute_fgt(vcpu, HDFGRTR2_EL2);
|
||||
__compute_fgt(vcpu, HDFGWTR2_EL2);
|
||||
if (cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF)) {
|
||||
__compute_ich_hfgrtr(vcpu);
|
||||
__compute_ich_hfgwtr(vcpu);
|
||||
__compute_fgt(vcpu, ICH_HFGITR_EL2);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2053,6 +2053,60 @@ static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = {
|
||||
SR_FGT(SYS_AMEVCNTR0_EL0(2), HAFGRTR, AMEVCNTR02_EL0, 1),
|
||||
SR_FGT(SYS_AMEVCNTR0_EL0(1), HAFGRTR, AMEVCNTR01_EL0, 1),
|
||||
SR_FGT(SYS_AMEVCNTR0_EL0(0), HAFGRTR, AMEVCNTR00_EL0, 1),
|
||||
|
||||
/*
|
||||
* ICH_HFGRTR_EL2 & ICH_HFGWTR_EL2
|
||||
*/
|
||||
SR_FGT(SYS_ICC_APR_EL1, ICH_HFGRTR, ICC_APR_EL1, 0),
|
||||
SR_FGT(SYS_ICC_IDR0_EL1, ICH_HFGRTR, ICC_IDRn_EL1, 0),
|
||||
SR_FGT(SYS_ICC_CR0_EL1, ICH_HFGRTR, ICC_CR0_EL1, 0),
|
||||
SR_FGT(SYS_ICC_HPPIR_EL1, ICH_HFGRTR, ICC_HPPIR_EL1, 0),
|
||||
SR_FGT(SYS_ICC_PCR_EL1, ICH_HFGRTR, ICC_PCR_EL1, 0),
|
||||
SR_FGT(SYS_ICC_ICSR_EL1, ICH_HFGRTR, ICC_ICSR_EL1, 0),
|
||||
SR_FGT(SYS_ICC_IAFFIDR_EL1, ICH_HFGRTR, ICC_IAFFIDR_EL1, 0),
|
||||
SR_FGT(SYS_ICC_PPI_HMR0_EL1, ICH_HFGRTR, ICC_PPI_HMRn_EL1, 0),
|
||||
SR_FGT(SYS_ICC_PPI_HMR1_EL1, ICH_HFGRTR, ICC_PPI_HMRn_EL1, 0),
|
||||
SR_FGT(SYS_ICC_PPI_ENABLER0_EL1, ICH_HFGRTR, ICC_PPI_ENABLERn_EL1, 0),
|
||||
SR_FGT(SYS_ICC_PPI_ENABLER1_EL1, ICH_HFGRTR, ICC_PPI_ENABLERn_EL1, 0),
|
||||
SR_FGT(SYS_ICC_PPI_CPENDR0_EL1, ICH_HFGRTR, ICC_PPI_PENDRn_EL1, 0),
|
||||
SR_FGT(SYS_ICC_PPI_CPENDR1_EL1, ICH_HFGRTR, ICC_PPI_PENDRn_EL1, 0),
|
||||
SR_FGT(SYS_ICC_PPI_SPENDR0_EL1, ICH_HFGRTR, ICC_PPI_PENDRn_EL1, 0),
|
||||
SR_FGT(SYS_ICC_PPI_SPENDR1_EL1, ICH_HFGRTR, ICC_PPI_PENDRn_EL1, 0),
|
||||
SR_FGT(SYS_ICC_PPI_PRIORITYR0_EL1, ICH_HFGRTR, ICC_PPI_PRIORITYRn_EL1, 0),
|
||||
SR_FGT(SYS_ICC_PPI_PRIORITYR1_EL1, ICH_HFGRTR, ICC_PPI_PRIORITYRn_EL1, 0),
|
||||
SR_FGT(SYS_ICC_PPI_PRIORITYR2_EL1, ICH_HFGRTR, ICC_PPI_PRIORITYRn_EL1, 0),
|
||||
SR_FGT(SYS_ICC_PPI_PRIORITYR3_EL1, ICH_HFGRTR, ICC_PPI_PRIORITYRn_EL1, 0),
|
||||
SR_FGT(SYS_ICC_PPI_PRIORITYR4_EL1, ICH_HFGRTR, ICC_PPI_PRIORITYRn_EL1, 0),
|
||||
SR_FGT(SYS_ICC_PPI_PRIORITYR5_EL1, ICH_HFGRTR, ICC_PPI_PRIORITYRn_EL1, 0),
|
||||
SR_FGT(SYS_ICC_PPI_PRIORITYR6_EL1, ICH_HFGRTR, ICC_PPI_PRIORITYRn_EL1, 0),
|
||||
SR_FGT(SYS_ICC_PPI_PRIORITYR7_EL1, ICH_HFGRTR, ICC_PPI_PRIORITYRn_EL1, 0),
|
||||
SR_FGT(SYS_ICC_PPI_PRIORITYR8_EL1, ICH_HFGRTR, ICC_PPI_PRIORITYRn_EL1, 0),
|
||||
SR_FGT(SYS_ICC_PPI_PRIORITYR9_EL1, ICH_HFGRTR, ICC_PPI_PRIORITYRn_EL1, 0),
|
||||
SR_FGT(SYS_ICC_PPI_PRIORITYR10_EL1, ICH_HFGRTR, ICC_PPI_PRIORITYRn_EL1, 0),
|
||||
SR_FGT(SYS_ICC_PPI_PRIORITYR11_EL1, ICH_HFGRTR, ICC_PPI_PRIORITYRn_EL1, 0),
|
||||
SR_FGT(SYS_ICC_PPI_PRIORITYR12_EL1, ICH_HFGRTR, ICC_PPI_PRIORITYRn_EL1, 0),
|
||||
SR_FGT(SYS_ICC_PPI_PRIORITYR13_EL1, ICH_HFGRTR, ICC_PPI_PRIORITYRn_EL1, 0),
|
||||
SR_FGT(SYS_ICC_PPI_PRIORITYR14_EL1, ICH_HFGRTR, ICC_PPI_PRIORITYRn_EL1, 0),
|
||||
SR_FGT(SYS_ICC_PPI_PRIORITYR15_EL1, ICH_HFGRTR, ICC_PPI_PRIORITYRn_EL1, 0),
|
||||
SR_FGT(SYS_ICC_PPI_CACTIVER0_EL1, ICH_HFGRTR, ICC_PPI_ACTIVERn_EL1, 0),
|
||||
SR_FGT(SYS_ICC_PPI_CACTIVER1_EL1, ICH_HFGRTR, ICC_PPI_ACTIVERn_EL1, 0),
|
||||
SR_FGT(SYS_ICC_PPI_SACTIVER0_EL1, ICH_HFGRTR, ICC_PPI_ACTIVERn_EL1, 0),
|
||||
SR_FGT(SYS_ICC_PPI_SACTIVER1_EL1, ICH_HFGRTR, ICC_PPI_ACTIVERn_EL1, 0),
|
||||
|
||||
/*
|
||||
* ICH_HFGITR_EL2
|
||||
*/
|
||||
SR_FGT(GICV5_OP_GIC_CDEN, ICH_HFGITR, GICCDEN, 0),
|
||||
SR_FGT(GICV5_OP_GIC_CDDIS, ICH_HFGITR, GICCDDIS, 0),
|
||||
SR_FGT(GICV5_OP_GIC_CDPRI, ICH_HFGITR, GICCDPRI, 0),
|
||||
SR_FGT(GICV5_OP_GIC_CDAFF, ICH_HFGITR, GICCDAFF, 0),
|
||||
SR_FGT(GICV5_OP_GIC_CDPEND, ICH_HFGITR, GICCDPEND, 0),
|
||||
SR_FGT(GICV5_OP_GIC_CDRCFG, ICH_HFGITR, GICCDRCFG, 0),
|
||||
SR_FGT(GICV5_OP_GIC_CDHM, ICH_HFGITR, GICCDHM, 0),
|
||||
SR_FGT(GICV5_OP_GIC_CDEOI, ICH_HFGITR, GICCDEOI, 0),
|
||||
SR_FGT(GICV5_OP_GIC_CDDI, ICH_HFGITR, GICCDDI, 0),
|
||||
SR_FGT(GICV5_OP_GICR_CDIA, ICH_HFGITR, GICRCDIA, 0),
|
||||
SR_FGT(GICV5_OP_GICR_CDNMIA, ICH_HFGITR, GICRCDNMIA, 0),
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -2127,6 +2181,9 @@ FGT_MASKS(hfgwtr2_masks, HFGWTR2_EL2);
|
||||
FGT_MASKS(hfgitr2_masks, HFGITR2_EL2);
|
||||
FGT_MASKS(hdfgrtr2_masks, HDFGRTR2_EL2);
|
||||
FGT_MASKS(hdfgwtr2_masks, HDFGWTR2_EL2);
|
||||
FGT_MASKS(ich_hfgrtr_masks, ICH_HFGRTR_EL2);
|
||||
FGT_MASKS(ich_hfgwtr_masks, ICH_HFGWTR_EL2);
|
||||
FGT_MASKS(ich_hfgitr_masks, ICH_HFGITR_EL2);
|
||||
|
||||
static __init bool aggregate_fgt(union trap_config tc)
|
||||
{
|
||||
@@ -2162,6 +2219,14 @@ static __init bool aggregate_fgt(union trap_config tc)
|
||||
rmasks = &hfgitr2_masks;
|
||||
wmasks = NULL;
|
||||
break;
|
||||
case ICH_HFGRTR_GROUP:
|
||||
rmasks = &ich_hfgrtr_masks;
|
||||
wmasks = &ich_hfgwtr_masks;
|
||||
break;
|
||||
case ICH_HFGITR_GROUP:
|
||||
rmasks = &ich_hfgitr_masks;
|
||||
wmasks = NULL;
|
||||
break;
|
||||
}
|
||||
|
||||
rresx = rmasks->res0 | rmasks->res1;
|
||||
@@ -2232,6 +2297,9 @@ static __init int check_all_fgt_masks(int ret)
|
||||
&hfgitr2_masks,
|
||||
&hdfgrtr2_masks,
|
||||
&hdfgwtr2_masks,
|
||||
&ich_hfgrtr_masks,
|
||||
&ich_hfgwtr_masks,
|
||||
&ich_hfgitr_masks,
|
||||
};
|
||||
int err = 0;
|
||||
|
||||
|
||||
@@ -539,7 +539,7 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr,
|
||||
|
||||
/* All hyp bugs, including warnings, are treated as fatal. */
|
||||
if (!is_protected_kvm_enabled() ||
|
||||
IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
|
||||
IS_ENABLED(CONFIG_PKVM_DISABLE_STAGE2_ON_PANIC)) {
|
||||
struct bug_entry *bug = find_bug(elr_in_kimg);
|
||||
|
||||
if (bug)
|
||||
|
||||
@@ -233,6 +233,18 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
|
||||
__activate_fgt(hctxt, vcpu, HDFGWTR2_EL2);
|
||||
}
|
||||
|
||||
static inline void __activate_traps_ich_hfgxtr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt);
|
||||
|
||||
if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF))
|
||||
return;
|
||||
|
||||
__activate_fgt(hctxt, vcpu, ICH_HFGRTR_EL2);
|
||||
__activate_fgt(hctxt, vcpu, ICH_HFGWTR_EL2);
|
||||
__activate_fgt(hctxt, vcpu, ICH_HFGITR_EL2);
|
||||
}
|
||||
|
||||
#define __deactivate_fgt(htcxt, vcpu, reg) \
|
||||
do { \
|
||||
write_sysreg_s(ctxt_sys_reg(hctxt, reg), \
|
||||
@@ -265,6 +277,19 @@ static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu)
|
||||
__deactivate_fgt(hctxt, vcpu, HDFGWTR2_EL2);
|
||||
}
|
||||
|
||||
static inline void __deactivate_traps_ich_hfgxtr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt);
|
||||
|
||||
if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF))
|
||||
return;
|
||||
|
||||
__deactivate_fgt(hctxt, vcpu, ICH_HFGRTR_EL2);
|
||||
__deactivate_fgt(hctxt, vcpu, ICH_HFGWTR_EL2);
|
||||
__deactivate_fgt(hctxt, vcpu, ICH_HFGITR_EL2);
|
||||
|
||||
}
|
||||
|
||||
static inline void __activate_traps_mpam(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 r = MPAM2_EL2_TRAPMPAM0EL1 | MPAM2_EL2_TRAPMPAM1EL1;
|
||||
@@ -328,6 +353,7 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
__activate_traps_hfgxtr(vcpu);
|
||||
__activate_traps_ich_hfgxtr(vcpu);
|
||||
__activate_traps_mpam(vcpu);
|
||||
}
|
||||
|
||||
@@ -345,6 +371,7 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
|
||||
write_sysreg_s(ctxt_sys_reg(hctxt, HCRX_EL2), SYS_HCRX_EL2);
|
||||
|
||||
__deactivate_traps_hfgxtr(vcpu);
|
||||
__deactivate_traps_ich_hfgxtr(vcpu);
|
||||
__deactivate_traps_mpam();
|
||||
}
|
||||
|
||||
|
||||
23
arch/arm64/kvm/hyp/include/nvhe/arm-smccc.h
Normal file
23
arch/arm64/kvm/hyp/include/nvhe/arm-smccc.h
Normal file
@@ -0,0 +1,23 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
#ifndef __ARM64_KVM_HYP_NVHE_ARM_SMCCC_H__
|
||||
#define __ARM64_KVM_HYP_NVHE_ARM_SMCCC_H__
|
||||
|
||||
#include <asm/kvm_hypevents.h>
|
||||
|
||||
#include <linux/arm-smccc.h>
|
||||
|
||||
#define hyp_smccc_1_1_smc(...) \
|
||||
do { \
|
||||
trace_hyp_exit(NULL, HYP_REASON_SMC); \
|
||||
arm_smccc_1_1_smc(__VA_ARGS__); \
|
||||
trace_hyp_enter(NULL, HYP_REASON_SMC); \
|
||||
} while (0)
|
||||
|
||||
#define hyp_smccc_1_2_smc(...) \
|
||||
do { \
|
||||
trace_hyp_exit(NULL, HYP_REASON_SMC); \
|
||||
arm_smccc_1_2_smc(__VA_ARGS__); \
|
||||
trace_hyp_enter(NULL, HYP_REASON_SMC); \
|
||||
} while (0)
|
||||
|
||||
#endif /* __ARM64_KVM_HYP_NVHE_ARM_SMCCC_H__ */
|
||||
16
arch/arm64/kvm/hyp/include/nvhe/clock.h
Normal file
16
arch/arm64/kvm/hyp/include/nvhe/clock.h
Normal file
@@ -0,0 +1,16 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __ARM64_KVM_HYP_NVHE_CLOCK_H
|
||||
#define __ARM64_KVM_HYP_NVHE_CLOCK_H
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <asm/kvm_hyp.h>
|
||||
|
||||
#ifdef CONFIG_NVHE_EL2_TRACING
|
||||
void trace_clock_update(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc);
|
||||
u64 trace_clock(void);
|
||||
#else
|
||||
static inline void
|
||||
trace_clock_update(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc) { }
|
||||
static inline u64 trace_clock(void) { return 0; }
|
||||
#endif
|
||||
#endif
|
||||
14
arch/arm64/kvm/hyp/include/nvhe/define_events.h
Normal file
14
arch/arm64/kvm/hyp/include/nvhe/define_events.h
Normal file
@@ -0,0 +1,14 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#undef HYP_EVENT
|
||||
#define HYP_EVENT(__name, __proto, __struct, __assign, __printk) \
|
||||
struct hyp_event_id hyp_event_id_##__name \
|
||||
__section(".hyp.event_ids."#__name) = { \
|
||||
.enabled = ATOMIC_INIT(0), \
|
||||
}
|
||||
|
||||
#define HYP_EVENT_MULTI_READ
|
||||
#include <asm/kvm_hypevents.h>
|
||||
#undef HYP_EVENT_MULTI_READ
|
||||
|
||||
#undef HYP_EVENT
|
||||
@@ -27,18 +27,22 @@ extern struct host_mmu host_mmu;
|
||||
enum pkvm_component_id {
|
||||
PKVM_ID_HOST,
|
||||
PKVM_ID_HYP,
|
||||
PKVM_ID_FFA,
|
||||
PKVM_ID_GUEST,
|
||||
};
|
||||
|
||||
extern unsigned long hyp_nr_cpus;
|
||||
|
||||
int __pkvm_prot_finalize(void);
|
||||
int __pkvm_host_share_hyp(u64 pfn);
|
||||
int __pkvm_guest_share_host(struct pkvm_hyp_vcpu *vcpu, u64 gfn);
|
||||
int __pkvm_guest_unshare_host(struct pkvm_hyp_vcpu *vcpu, u64 gfn);
|
||||
int __pkvm_host_unshare_hyp(u64 pfn);
|
||||
int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages);
|
||||
int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages);
|
||||
int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages);
|
||||
int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages);
|
||||
int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu);
|
||||
int __pkvm_vcpu_in_poison_fault(struct pkvm_hyp_vcpu *hyp_vcpu);
|
||||
int __pkvm_host_force_reclaim_page_guest(phys_addr_t phys);
|
||||
int __pkvm_host_reclaim_page_guest(u64 gfn, struct pkvm_hyp_vm *vm);
|
||||
int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu *vcpu,
|
||||
enum kvm_pgtable_prot prot);
|
||||
int __pkvm_host_unshare_guest(u64 gfn, u64 nr_pages, struct pkvm_hyp_vm *hyp_vm);
|
||||
@@ -70,6 +74,8 @@ static __always_inline void __load_host_stage2(void)
|
||||
|
||||
#ifdef CONFIG_NVHE_EL2_DEBUG
|
||||
void pkvm_ownership_selftest(void *base);
|
||||
struct pkvm_hyp_vcpu *init_selftest_vm(void *virt);
|
||||
void teardown_selftest_vm(void);
|
||||
#else
|
||||
static inline void pkvm_ownership_selftest(void *base) { }
|
||||
#endif
|
||||
|
||||
@@ -30,8 +30,14 @@ enum pkvm_page_state {
|
||||
* struct hyp_page.
|
||||
*/
|
||||
PKVM_NOPAGE = BIT(0) | BIT(1),
|
||||
|
||||
/*
|
||||
* 'Meta-states' which aren't encoded directly in the PTE's SW bits (or
|
||||
* the hyp_vmemmap entry for the host)
|
||||
*/
|
||||
PKVM_POISON = BIT(2),
|
||||
};
|
||||
#define PKVM_PAGE_STATE_MASK (BIT(0) | BIT(1))
|
||||
#define PKVM_PAGE_STATE_VMEMMAP_MASK (BIT(0) | BIT(1))
|
||||
|
||||
#define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1)
|
||||
static inline enum kvm_pgtable_prot pkvm_mkstate(enum kvm_pgtable_prot prot,
|
||||
@@ -108,12 +114,12 @@ static inline void set_host_state(struct hyp_page *p, enum pkvm_page_state state
|
||||
|
||||
static inline enum pkvm_page_state get_hyp_state(struct hyp_page *p)
|
||||
{
|
||||
return p->__hyp_state_comp ^ PKVM_PAGE_STATE_MASK;
|
||||
return p->__hyp_state_comp ^ PKVM_PAGE_STATE_VMEMMAP_MASK;
|
||||
}
|
||||
|
||||
static inline void set_hyp_state(struct hyp_page *p, enum pkvm_page_state state)
|
||||
{
|
||||
p->__hyp_state_comp = state ^ PKVM_PAGE_STATE_MASK;
|
||||
p->__hyp_state_comp = state ^ PKVM_PAGE_STATE_VMEMMAP_MASK;
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -73,8 +73,12 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
|
||||
unsigned long pgd_hva);
|
||||
int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
|
||||
unsigned long vcpu_hva);
|
||||
int __pkvm_teardown_vm(pkvm_handle_t handle);
|
||||
|
||||
int __pkvm_reclaim_dying_guest_page(pkvm_handle_t handle, u64 gfn);
|
||||
int __pkvm_start_teardown_vm(pkvm_handle_t handle);
|
||||
int __pkvm_finalize_teardown_vm(pkvm_handle_t handle);
|
||||
|
||||
struct pkvm_hyp_vm *get_vm_by_handle(pkvm_handle_t handle);
|
||||
struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle,
|
||||
unsigned int vcpu_idx);
|
||||
void pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu);
|
||||
@@ -84,6 +88,7 @@ struct pkvm_hyp_vm *get_pkvm_hyp_vm(pkvm_handle_t handle);
|
||||
struct pkvm_hyp_vm *get_np_pkvm_hyp_vm(pkvm_handle_t handle);
|
||||
void put_pkvm_hyp_vm(struct pkvm_hyp_vm *hyp_vm);
|
||||
|
||||
bool kvm_handle_pvm_hvc64(struct kvm_vcpu *vcpu, u64 *exit_code);
|
||||
bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code);
|
||||
bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code);
|
||||
void kvm_init_pvm_id_regs(struct kvm_vcpu *vcpu);
|
||||
|
||||
70
arch/arm64/kvm/hyp/include/nvhe/trace.h
Normal file
70
arch/arm64/kvm/hyp/include/nvhe/trace.h
Normal file
@@ -0,0 +1,70 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
#ifndef __ARM64_KVM_HYP_NVHE_TRACE_H
|
||||
#define __ARM64_KVM_HYP_NVHE_TRACE_H
|
||||
|
||||
#include <linux/trace_remote_event.h>
|
||||
|
||||
#include <asm/kvm_hyptrace.h>
|
||||
|
||||
static inline pid_t __tracing_get_vcpu_pid(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
if (!host_ctxt)
|
||||
host_ctxt = host_data_ptr(host_ctxt);
|
||||
|
||||
vcpu = host_ctxt->__hyp_running_vcpu;
|
||||
|
||||
return vcpu ? vcpu->arch.pid : 0;
|
||||
}
|
||||
|
||||
#define HE_PROTO(__args...) __args
|
||||
#define HE_ASSIGN(__args...) __args
|
||||
#define HE_STRUCT RE_STRUCT
|
||||
#define he_field re_field
|
||||
|
||||
#ifdef CONFIG_NVHE_EL2_TRACING
|
||||
|
||||
#define HYP_EVENT(__name, __proto, __struct, __assign, __printk) \
|
||||
REMOTE_EVENT_FORMAT(__name, __struct); \
|
||||
extern struct hyp_event_id hyp_event_id_##__name; \
|
||||
static __always_inline void trace_##__name(__proto) \
|
||||
{ \
|
||||
struct remote_event_format_##__name *__entry; \
|
||||
size_t length = sizeof(*__entry); \
|
||||
\
|
||||
if (!atomic_read(&hyp_event_id_##__name.enabled)) \
|
||||
return; \
|
||||
__entry = tracing_reserve_entry(length); \
|
||||
if (!__entry) \
|
||||
return; \
|
||||
__entry->hdr.id = hyp_event_id_##__name.id; \
|
||||
__assign \
|
||||
tracing_commit_entry(); \
|
||||
}
|
||||
|
||||
void *tracing_reserve_entry(unsigned long length);
|
||||
void tracing_commit_entry(void);
|
||||
|
||||
int __tracing_load(unsigned long desc_va, size_t desc_size);
|
||||
void __tracing_unload(void);
|
||||
int __tracing_enable(bool enable);
|
||||
int __tracing_swap_reader(unsigned int cpu);
|
||||
void __tracing_update_clock(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc);
|
||||
int __tracing_reset(unsigned int cpu);
|
||||
int __tracing_enable_event(unsigned short id, bool enable);
|
||||
#else
|
||||
static inline void *tracing_reserve_entry(unsigned long length) { return NULL; }
|
||||
static inline void tracing_commit_entry(void) { }
|
||||
#define HYP_EVENT(__name, __proto, __struct, __assign, __printk) \
|
||||
static inline void trace_##__name(__proto) {}
|
||||
|
||||
static inline int __tracing_load(unsigned long desc_va, size_t desc_size) { return -ENODEV; }
|
||||
static inline void __tracing_unload(void) { }
|
||||
static inline int __tracing_enable(bool enable) { return -ENODEV; }
|
||||
static inline int __tracing_swap_reader(unsigned int cpu) { return -ENODEV; }
|
||||
static inline void __tracing_update_clock(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc) { }
|
||||
static inline int __tracing_reset(unsigned int cpu) { return -ENODEV; }
|
||||
static inline int __tracing_enable_event(unsigned short id, bool enable) { return -ENODEV; }
|
||||
#endif
|
||||
#endif
|
||||
@@ -16,4 +16,6 @@
|
||||
__always_unused int ___check_reg_ ## reg; \
|
||||
type name = (type)cpu_reg(ctxt, (reg))
|
||||
|
||||
void inject_host_exception(u64 esr);
|
||||
|
||||
#endif /* __ARM64_KVM_NVHE_TRAP_HANDLER_H__ */
|
||||
|
||||
@@ -17,7 +17,7 @@ ccflags-y += -fno-stack-protector \
|
||||
hostprogs := gen-hyprel
|
||||
HOST_EXTRACFLAGS += -I$(objtree)/include
|
||||
|
||||
lib-objs := clear_page.o copy_page.o memcpy.o memset.o
|
||||
lib-objs := clear_page.o copy_page.o memcpy.o memset.o tishift.o
|
||||
lib-objs := $(addprefix ../../../lib/, $(lib-objs))
|
||||
|
||||
CFLAGS_switch.nvhe.o += -Wno-override-init
|
||||
@@ -26,11 +26,15 @@ hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o
|
||||
hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \
|
||||
cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o
|
||||
hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
|
||||
../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
|
||||
../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o ../vgic-v5-sr.o
|
||||
hyp-obj-y += ../../../kernel/smccc-call.o
|
||||
hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o
|
||||
hyp-obj-$(CONFIG_NVHE_EL2_TRACING) += clock.o trace.o events.o
|
||||
hyp-obj-y += $(lib-objs)
|
||||
|
||||
# Path to simple_ring_buffer.c
|
||||
CFLAGS_trace.nvhe.o += -I$(srctree)/kernel/trace/
|
||||
|
||||
##
|
||||
## Build rules for compiling nVHE hyp code
|
||||
## Output of this folder is `kvm_nvhe.o`, a partially linked object
|
||||
|
||||
65
arch/arm64/kvm/hyp/nvhe/clock.c
Normal file
65
arch/arm64/kvm/hyp/nvhe/clock.c
Normal file
@@ -0,0 +1,65 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2025 Google LLC
|
||||
* Author: Vincent Donnefort <vdonnefort@google.com>
|
||||
*/
|
||||
|
||||
#include <nvhe/clock.h>
|
||||
|
||||
#include <asm/arch_timer.h>
|
||||
#include <asm/div64.h>
|
||||
|
||||
static struct clock_data {
|
||||
struct {
|
||||
u32 mult;
|
||||
u32 shift;
|
||||
u64 epoch_ns;
|
||||
u64 epoch_cyc;
|
||||
u64 cyc_overflow64;
|
||||
} data[2];
|
||||
u64 cur;
|
||||
} trace_clock_data;
|
||||
|
||||
static u64 __clock_mult_uint128(u64 cyc, u32 mult, u32 shift)
|
||||
{
|
||||
__uint128_t ns = (__uint128_t)cyc * mult;
|
||||
|
||||
ns >>= shift;
|
||||
|
||||
return (u64)ns;
|
||||
}
|
||||
|
||||
/* Does not guarantee no reader on the modified bank. */
|
||||
void trace_clock_update(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc)
|
||||
{
|
||||
struct clock_data *clock = &trace_clock_data;
|
||||
u64 bank = clock->cur ^ 1;
|
||||
|
||||
clock->data[bank].mult = mult;
|
||||
clock->data[bank].shift = shift;
|
||||
clock->data[bank].epoch_ns = epoch_ns;
|
||||
clock->data[bank].epoch_cyc = epoch_cyc;
|
||||
clock->data[bank].cyc_overflow64 = ULONG_MAX / mult;
|
||||
|
||||
smp_store_release(&clock->cur, bank);
|
||||
}
|
||||
|
||||
/* Use untrusted host data */
|
||||
u64 trace_clock(void)
|
||||
{
|
||||
struct clock_data *clock = &trace_clock_data;
|
||||
u64 bank = smp_load_acquire(&clock->cur);
|
||||
u64 cyc, ns;
|
||||
|
||||
cyc = __arch_counter_get_cntvct() - clock->data[bank].epoch_cyc;
|
||||
|
||||
if (likely(cyc < clock->data[bank].cyc_overflow64)) {
|
||||
ns = cyc * clock->data[bank].mult;
|
||||
ns >>= clock->data[bank].shift;
|
||||
} else {
|
||||
ns = __clock_mult_uint128(cyc, clock->data[bank].mult,
|
||||
clock->data[bank].shift);
|
||||
}
|
||||
|
||||
return (u64)ns + clock->data[bank].epoch_ns;
|
||||
}
|
||||
@@ -14,20 +14,20 @@
|
||||
#include <asm/kvm_hyp.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
|
||||
static void __debug_save_spe(u64 *pmscr_el1)
|
||||
static void __debug_save_spe(void)
|
||||
{
|
||||
u64 reg;
|
||||
u64 *pmscr_el1, *pmblimitr_el1;
|
||||
|
||||
/* Clear pmscr in case of early return */
|
||||
*pmscr_el1 = 0;
|
||||
pmscr_el1 = host_data_ptr(host_debug_state.pmscr_el1);
|
||||
pmblimitr_el1 = host_data_ptr(host_debug_state.pmblimitr_el1);
|
||||
|
||||
/*
|
||||
* At this point, we know that this CPU implements
|
||||
* SPE and is available to the host.
|
||||
* Check if the host is actually using it ?
|
||||
*/
|
||||
reg = read_sysreg_s(SYS_PMBLIMITR_EL1);
|
||||
if (!(reg & BIT(PMBLIMITR_EL1_E_SHIFT)))
|
||||
*pmblimitr_el1 = read_sysreg_s(SYS_PMBLIMITR_EL1);
|
||||
if (!(*pmblimitr_el1 & BIT(PMBLIMITR_EL1_E_SHIFT)))
|
||||
return;
|
||||
|
||||
/* Yes; save the control register and disable data generation */
|
||||
@@ -37,18 +37,29 @@ static void __debug_save_spe(u64 *pmscr_el1)
|
||||
|
||||
/* Now drain all buffered data to memory */
|
||||
psb_csync();
|
||||
dsb(nsh);
|
||||
|
||||
/* And disable the profiling buffer */
|
||||
write_sysreg_s(0, SYS_PMBLIMITR_EL1);
|
||||
isb();
|
||||
}
|
||||
|
||||
static void __debug_restore_spe(u64 pmscr_el1)
|
||||
static void __debug_restore_spe(void)
|
||||
{
|
||||
if (!pmscr_el1)
|
||||
u64 pmblimitr_el1 = *host_data_ptr(host_debug_state.pmblimitr_el1);
|
||||
|
||||
if (!(pmblimitr_el1 & BIT(PMBLIMITR_EL1_E_SHIFT)))
|
||||
return;
|
||||
|
||||
/* The host page table is installed, but not yet synchronised */
|
||||
isb();
|
||||
|
||||
/* Re-enable the profiling buffer. */
|
||||
write_sysreg_s(pmblimitr_el1, SYS_PMBLIMITR_EL1);
|
||||
isb();
|
||||
|
||||
/* Re-enable data generation */
|
||||
write_sysreg_el1(pmscr_el1, SYS_PMSCR);
|
||||
write_sysreg_el1(*host_data_ptr(host_debug_state.pmscr_el1), SYS_PMSCR);
|
||||
}
|
||||
|
||||
static void __trace_do_switch(u64 *saved_trfcr, u64 new_trfcr)
|
||||
@@ -57,12 +68,54 @@ static void __trace_do_switch(u64 *saved_trfcr, u64 new_trfcr)
|
||||
write_sysreg_el1(new_trfcr, SYS_TRFCR);
|
||||
}
|
||||
|
||||
static bool __trace_needs_drain(void)
|
||||
static void __trace_drain_and_disable(void)
|
||||
{
|
||||
if (is_protected_kvm_enabled() && host_data_test_flag(HAS_TRBE))
|
||||
return read_sysreg_s(SYS_TRBLIMITR_EL1) & TRBLIMITR_EL1_E;
|
||||
u64 *trblimitr_el1 = host_data_ptr(host_debug_state.trblimitr_el1);
|
||||
bool needs_drain = is_protected_kvm_enabled() ?
|
||||
host_data_test_flag(HAS_TRBE) :
|
||||
host_data_test_flag(TRBE_ENABLED);
|
||||
|
||||
return host_data_test_flag(TRBE_ENABLED);
|
||||
if (!needs_drain) {
|
||||
*trblimitr_el1 = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
*trblimitr_el1 = read_sysreg_s(SYS_TRBLIMITR_EL1);
|
||||
if (*trblimitr_el1 & TRBLIMITR_EL1_E) {
|
||||
/*
|
||||
* The host has enabled the Trace Buffer Unit so we have
|
||||
* to beat the CPU with a stick until it stops accessing
|
||||
* memory.
|
||||
*/
|
||||
|
||||
/* First, ensure that our prior write to TRFCR has stuck. */
|
||||
isb();
|
||||
|
||||
/* Now synchronise with the trace and drain the buffer. */
|
||||
tsb_csync();
|
||||
dsb(nsh);
|
||||
|
||||
/*
|
||||
* With no more trace being generated, we can disable the
|
||||
* Trace Buffer Unit.
|
||||
*/
|
||||
write_sysreg_s(0, SYS_TRBLIMITR_EL1);
|
||||
if (cpus_have_final_cap(ARM64_WORKAROUND_2064142)) {
|
||||
/*
|
||||
* Some CPUs are so good, we have to drain 'em
|
||||
* twice.
|
||||
*/
|
||||
tsb_csync();
|
||||
dsb(nsh);
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure that the Trace Buffer Unit is disabled before
|
||||
* we start mucking with the stage-2 and trap
|
||||
* configuration.
|
||||
*/
|
||||
isb();
|
||||
}
|
||||
}
|
||||
|
||||
static bool __trace_needs_switch(void)
|
||||
@@ -79,21 +132,34 @@ static void __trace_switch_to_guest(void)
|
||||
|
||||
__trace_do_switch(host_data_ptr(host_debug_state.trfcr_el1),
|
||||
*host_data_ptr(trfcr_while_in_guest));
|
||||
|
||||
if (__trace_needs_drain()) {
|
||||
isb();
|
||||
tsb_csync();
|
||||
}
|
||||
__trace_drain_and_disable();
|
||||
}
|
||||
|
||||
static void __trace_switch_to_host(void)
|
||||
{
|
||||
u64 trblimitr_el1 = *host_data_ptr(host_debug_state.trblimitr_el1);
|
||||
|
||||
if (trblimitr_el1 & TRBLIMITR_EL1_E) {
|
||||
/* Re-enable the Trace Buffer Unit for the host. */
|
||||
write_sysreg_s(trblimitr_el1, SYS_TRBLIMITR_EL1);
|
||||
isb();
|
||||
if (cpus_have_final_cap(ARM64_WORKAROUND_2038923)) {
|
||||
/*
|
||||
* Make sure the unit is re-enabled before we
|
||||
* poke TRFCR.
|
||||
*/
|
||||
isb();
|
||||
}
|
||||
}
|
||||
|
||||
__trace_do_switch(host_data_ptr(trfcr_while_in_guest),
|
||||
*host_data_ptr(host_debug_state.trfcr_el1));
|
||||
}
|
||||
|
||||
static void __debug_save_brbe(u64 *brbcr_el1)
|
||||
static void __debug_save_brbe(void)
|
||||
{
|
||||
u64 *brbcr_el1 = host_data_ptr(host_debug_state.brbcr_el1);
|
||||
|
||||
*brbcr_el1 = 0;
|
||||
|
||||
/* Check if the BRBE is enabled */
|
||||
@@ -109,8 +175,10 @@ static void __debug_save_brbe(u64 *brbcr_el1)
|
||||
write_sysreg_el1(0, SYS_BRBCR);
|
||||
}
|
||||
|
||||
static void __debug_restore_brbe(u64 brbcr_el1)
|
||||
static void __debug_restore_brbe(void)
|
||||
{
|
||||
u64 brbcr_el1 = *host_data_ptr(host_debug_state.brbcr_el1);
|
||||
|
||||
if (!brbcr_el1)
|
||||
return;
|
||||
|
||||
@@ -122,11 +190,11 @@ void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* Disable and flush SPE data generation */
|
||||
if (host_data_test_flag(HAS_SPE))
|
||||
__debug_save_spe(host_data_ptr(host_debug_state.pmscr_el1));
|
||||
__debug_save_spe();
|
||||
|
||||
/* Disable BRBE branch records */
|
||||
if (host_data_test_flag(HAS_BRBE))
|
||||
__debug_save_brbe(host_data_ptr(host_debug_state.brbcr_el1));
|
||||
__debug_save_brbe();
|
||||
|
||||
if (__trace_needs_switch())
|
||||
__trace_switch_to_guest();
|
||||
@@ -140,9 +208,9 @@ void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
|
||||
void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (host_data_test_flag(HAS_SPE))
|
||||
__debug_restore_spe(*host_data_ptr(host_debug_state.pmscr_el1));
|
||||
__debug_restore_spe();
|
||||
if (host_data_test_flag(HAS_BRBE))
|
||||
__debug_restore_brbe(*host_data_ptr(host_debug_state.brbcr_el1));
|
||||
__debug_restore_brbe();
|
||||
if (__trace_needs_switch())
|
||||
__trace_switch_to_host();
|
||||
}
|
||||
|
||||
25
arch/arm64/kvm/hyp/nvhe/events.c
Normal file
25
arch/arm64/kvm/hyp/nvhe/events.c
Normal file
@@ -0,0 +1,25 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2025 Google LLC
|
||||
* Author: Vincent Donnefort <vdonnefort@google.com>
|
||||
*/
|
||||
|
||||
#include <nvhe/mm.h>
|
||||
#include <nvhe/trace.h>
|
||||
|
||||
#include <nvhe/define_events.h>
|
||||
|
||||
int __tracing_enable_event(unsigned short id, bool enable)
|
||||
{
|
||||
struct hyp_event_id *event_id = &__hyp_event_ids_start[id];
|
||||
atomic_t *enabled;
|
||||
|
||||
if (event_id >= __hyp_event_ids_end)
|
||||
return -EINVAL;
|
||||
|
||||
enabled = hyp_fixmap_map(__hyp_pa(&event_id->enabled));
|
||||
atomic_set(enabled, enable);
|
||||
hyp_fixmap_unmap();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -26,10 +26,10 @@
|
||||
* the duration and are therefore serialised.
|
||||
*/
|
||||
|
||||
#include <linux/arm-smccc.h>
|
||||
#include <linux/arm_ffa.h>
|
||||
#include <asm/kvm_pkvm.h>
|
||||
|
||||
#include <nvhe/arm-smccc.h>
|
||||
#include <nvhe/ffa.h>
|
||||
#include <nvhe/mem_protect.h>
|
||||
#include <nvhe/memory.h>
|
||||
@@ -147,7 +147,7 @@ static int ffa_map_hyp_buffers(u64 ffa_page_count)
|
||||
{
|
||||
struct arm_smccc_1_2_regs res;
|
||||
|
||||
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
hyp_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
.a0 = FFA_FN64_RXTX_MAP,
|
||||
.a1 = hyp_virt_to_phys(hyp_buffers.tx),
|
||||
.a2 = hyp_virt_to_phys(hyp_buffers.rx),
|
||||
@@ -161,7 +161,7 @@ static int ffa_unmap_hyp_buffers(void)
|
||||
{
|
||||
struct arm_smccc_1_2_regs res;
|
||||
|
||||
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
hyp_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
.a0 = FFA_RXTX_UNMAP,
|
||||
.a1 = HOST_FFA_ID,
|
||||
}, &res);
|
||||
@@ -172,7 +172,7 @@ static int ffa_unmap_hyp_buffers(void)
|
||||
static void ffa_mem_frag_tx(struct arm_smccc_1_2_regs *res, u32 handle_lo,
|
||||
u32 handle_hi, u32 fraglen, u32 endpoint_id)
|
||||
{
|
||||
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
hyp_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
.a0 = FFA_MEM_FRAG_TX,
|
||||
.a1 = handle_lo,
|
||||
.a2 = handle_hi,
|
||||
@@ -184,7 +184,7 @@ static void ffa_mem_frag_tx(struct arm_smccc_1_2_regs *res, u32 handle_lo,
|
||||
static void ffa_mem_frag_rx(struct arm_smccc_1_2_regs *res, u32 handle_lo,
|
||||
u32 handle_hi, u32 fragoff)
|
||||
{
|
||||
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
hyp_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
.a0 = FFA_MEM_FRAG_RX,
|
||||
.a1 = handle_lo,
|
||||
.a2 = handle_hi,
|
||||
@@ -196,7 +196,7 @@ static void ffa_mem_frag_rx(struct arm_smccc_1_2_regs *res, u32 handle_lo,
|
||||
static void ffa_mem_xfer(struct arm_smccc_1_2_regs *res, u64 func_id, u32 len,
|
||||
u32 fraglen)
|
||||
{
|
||||
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
hyp_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
.a0 = func_id,
|
||||
.a1 = len,
|
||||
.a2 = fraglen,
|
||||
@@ -206,7 +206,7 @@ static void ffa_mem_xfer(struct arm_smccc_1_2_regs *res, u64 func_id, u32 len,
|
||||
static void ffa_mem_reclaim(struct arm_smccc_1_2_regs *res, u32 handle_lo,
|
||||
u32 handle_hi, u32 flags)
|
||||
{
|
||||
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
hyp_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
.a0 = FFA_MEM_RECLAIM,
|
||||
.a1 = handle_lo,
|
||||
.a2 = handle_hi,
|
||||
@@ -216,7 +216,7 @@ static void ffa_mem_reclaim(struct arm_smccc_1_2_regs *res, u32 handle_lo,
|
||||
|
||||
static void ffa_retrieve_req(struct arm_smccc_1_2_regs *res, u32 len)
|
||||
{
|
||||
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
hyp_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
.a0 = FFA_FN64_MEM_RETRIEVE_REQ,
|
||||
.a1 = len,
|
||||
.a2 = len,
|
||||
@@ -225,7 +225,7 @@ static void ffa_retrieve_req(struct arm_smccc_1_2_regs *res, u32 len)
|
||||
|
||||
static void ffa_rx_release(struct arm_smccc_1_2_regs *res)
|
||||
{
|
||||
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
hyp_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
.a0 = FFA_RX_RELEASE,
|
||||
}, res);
|
||||
}
|
||||
@@ -728,7 +728,7 @@ static int hyp_ffa_post_init(void)
|
||||
size_t min_rxtx_sz;
|
||||
struct arm_smccc_1_2_regs res;
|
||||
|
||||
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs){
|
||||
hyp_smccc_1_2_smc(&(struct arm_smccc_1_2_regs){
|
||||
.a0 = FFA_ID_GET,
|
||||
}, &res);
|
||||
if (res.a0 != FFA_SUCCESS)
|
||||
@@ -737,7 +737,7 @@ static int hyp_ffa_post_init(void)
|
||||
if (res.a2 != HOST_FFA_ID)
|
||||
return -EINVAL;
|
||||
|
||||
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs){
|
||||
hyp_smccc_1_2_smc(&(struct arm_smccc_1_2_regs){
|
||||
.a0 = FFA_FEATURES,
|
||||
.a1 = FFA_FN64_RXTX_MAP,
|
||||
}, &res);
|
||||
@@ -788,7 +788,7 @@ static void do_ffa_version(struct arm_smccc_1_2_regs *res,
|
||||
* first if TEE supports it.
|
||||
*/
|
||||
if (FFA_MINOR_VERSION(ffa_req_version) < FFA_MINOR_VERSION(hyp_ffa_version)) {
|
||||
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
hyp_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
.a0 = FFA_VERSION,
|
||||
.a1 = ffa_req_version,
|
||||
}, res);
|
||||
@@ -824,7 +824,7 @@ static void do_ffa_part_get(struct arm_smccc_1_2_regs *res,
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
hyp_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
.a0 = FFA_PARTITION_INFO_GET,
|
||||
.a1 = uuid0,
|
||||
.a2 = uuid1,
|
||||
@@ -939,7 +939,7 @@ int hyp_ffa_init(void *pages)
|
||||
if (kvm_host_psci_config.smccc_version < ARM_SMCCC_VERSION_1_2)
|
||||
return 0;
|
||||
|
||||
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
hyp_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
.a0 = FFA_VERSION,
|
||||
.a1 = FFA_VERSION_1_2,
|
||||
}, &res);
|
||||
|
||||
@@ -120,12 +120,11 @@ SYM_FUNC_START(__hyp_do_panic)
|
||||
|
||||
mov x29, x0
|
||||
|
||||
#ifdef CONFIG_NVHE_EL2_DEBUG
|
||||
#ifdef PKVM_DISABLE_STAGE2_ON_PANIC
|
||||
/* Ensure host stage-2 is disabled */
|
||||
mrs x0, hcr_el2
|
||||
bic x0, x0, #HCR_VM
|
||||
msr_hcr_el2 x0
|
||||
isb
|
||||
tlbi vmalls12e1
|
||||
dsb nsh
|
||||
#endif
|
||||
@@ -291,13 +290,3 @@ SYM_CODE_START(__kvm_hyp_host_forward_smc)
|
||||
|
||||
ret
|
||||
SYM_CODE_END(__kvm_hyp_host_forward_smc)
|
||||
|
||||
/*
|
||||
* kvm_host_psci_cpu_entry is called through br instruction, which requires
|
||||
* bti j instruction as compilers (gcc and llvm) doesn't insert bti j for external
|
||||
* functions, but bti c instead.
|
||||
*/
|
||||
SYM_CODE_START(kvm_host_psci_cpu_entry)
|
||||
bti j
|
||||
b __kvm_host_psci_cpu_entry
|
||||
SYM_CODE_END(kvm_host_psci_cpu_entry)
|
||||
|
||||
@@ -173,9 +173,8 @@ SYM_CODE_END(___kvm_hyp_init)
|
||||
* x0: struct kvm_nvhe_init_params PA
|
||||
*/
|
||||
SYM_CODE_START(kvm_hyp_cpu_entry)
|
||||
mov x1, #1 // is_cpu_on = true
|
||||
ldr x29, =__kvm_host_psci_cpu_on_entry
|
||||
b __kvm_hyp_init_cpu
|
||||
SYM_CODE_END(kvm_hyp_cpu_entry)
|
||||
|
||||
/*
|
||||
* PSCI CPU_SUSPEND / SYSTEM_SUSPEND entry point
|
||||
@@ -183,32 +182,17 @@ SYM_CODE_END(kvm_hyp_cpu_entry)
|
||||
* x0: struct kvm_nvhe_init_params PA
|
||||
*/
|
||||
SYM_CODE_START(kvm_hyp_cpu_resume)
|
||||
mov x1, #0 // is_cpu_on = false
|
||||
b __kvm_hyp_init_cpu
|
||||
SYM_CODE_END(kvm_hyp_cpu_resume)
|
||||
ldr x29, =__kvm_host_psci_cpu_resume_entry
|
||||
|
||||
/*
|
||||
* Common code for CPU entry points. Initializes EL2 state and
|
||||
* installs the hypervisor before handing over to a C handler.
|
||||
*
|
||||
* x0: struct kvm_nvhe_init_params PA
|
||||
* x1: bool is_cpu_on
|
||||
*/
|
||||
SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu)
|
||||
SYM_INNER_LABEL(__kvm_hyp_init_cpu, SYM_L_LOCAL)
|
||||
mov x28, x0 // Stash arguments
|
||||
mov x29, x1
|
||||
|
||||
/* Check that the core was booted in EL2. */
|
||||
mrs x0, CurrentEL
|
||||
cmp x0, #CurrentEL_EL2
|
||||
b.eq 2f
|
||||
b.ne 1f
|
||||
|
||||
/* The core booted in EL1. KVM cannot be initialized on it. */
|
||||
1: wfe
|
||||
wfi
|
||||
b 1b
|
||||
|
||||
2: msr SPsel, #1 // We want to use SP_EL{1,2}
|
||||
msr SPsel, #1 // We want to use SP_EL2
|
||||
|
||||
init_el2_hcr 0
|
||||
|
||||
@@ -218,11 +202,16 @@ SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu)
|
||||
mov x0, x28
|
||||
bl ___kvm_hyp_init // Clobbers x0..x2
|
||||
|
||||
/* Leave idmap. */
|
||||
mov x0, x29
|
||||
ldr x1, =kvm_host_psci_cpu_entry
|
||||
br x1
|
||||
SYM_CODE_END(__kvm_hyp_init_cpu)
|
||||
/* Leave idmap -- using BLR is OK, LR is restored from host context */
|
||||
blr x29
|
||||
|
||||
// The core booted in EL1, or the C code unexpectedly returned.
|
||||
// Either way, KVM cannot be initialized on it.
|
||||
1: wfe
|
||||
wfi
|
||||
b 1b
|
||||
SYM_CODE_END(kvm_hyp_cpu_resume)
|
||||
SYM_CODE_END(kvm_hyp_cpu_entry)
|
||||
|
||||
SYM_CODE_START(__kvm_handle_stub_hvc)
|
||||
/*
|
||||
|
||||
@@ -12,12 +12,14 @@
|
||||
#include <asm/kvm_emulate.h>
|
||||
#include <asm/kvm_host.h>
|
||||
#include <asm/kvm_hyp.h>
|
||||
#include <asm/kvm_hypevents.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
|
||||
#include <nvhe/ffa.h>
|
||||
#include <nvhe/mem_protect.h>
|
||||
#include <nvhe/mm.h>
|
||||
#include <nvhe/pkvm.h>
|
||||
#include <nvhe/trace.h>
|
||||
#include <nvhe/trap_handler.h>
|
||||
|
||||
DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
|
||||
@@ -136,6 +138,8 @@ static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
|
||||
hyp_vcpu->vcpu.arch.vsesr_el2 = host_vcpu->arch.vsesr_el2;
|
||||
|
||||
hyp_vcpu->vcpu.arch.vgic_cpu.vgic_v3 = host_vcpu->arch.vgic_cpu.vgic_v3;
|
||||
|
||||
hyp_vcpu->vcpu.arch.pid = host_vcpu->arch.pid;
|
||||
}
|
||||
|
||||
static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
|
||||
@@ -169,9 +173,6 @@ static void handle___pkvm_vcpu_load(struct kvm_cpu_context *host_ctxt)
|
||||
DECLARE_REG(u64, hcr_el2, host_ctxt, 3);
|
||||
struct pkvm_hyp_vcpu *hyp_vcpu;
|
||||
|
||||
if (!is_protected_kvm_enabled())
|
||||
return;
|
||||
|
||||
hyp_vcpu = pkvm_load_hyp_vcpu(handle, vcpu_idx);
|
||||
if (!hyp_vcpu)
|
||||
return;
|
||||
@@ -188,12 +189,8 @@ static void handle___pkvm_vcpu_load(struct kvm_cpu_context *host_ctxt)
|
||||
|
||||
static void handle___pkvm_vcpu_put(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
struct pkvm_hyp_vcpu *hyp_vcpu;
|
||||
struct pkvm_hyp_vcpu *hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
|
||||
|
||||
if (!is_protected_kvm_enabled())
|
||||
return;
|
||||
|
||||
hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
|
||||
if (hyp_vcpu)
|
||||
pkvm_put_hyp_vcpu(hyp_vcpu);
|
||||
}
|
||||
@@ -248,6 +245,26 @@ static int pkvm_refill_memcache(struct pkvm_hyp_vcpu *hyp_vcpu)
|
||||
&host_vcpu->arch.pkvm_memcache);
|
||||
}
|
||||
|
||||
static void handle___pkvm_host_donate_guest(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(u64, pfn, host_ctxt, 1);
|
||||
DECLARE_REG(u64, gfn, host_ctxt, 2);
|
||||
struct pkvm_hyp_vcpu *hyp_vcpu;
|
||||
int ret = -EINVAL;
|
||||
|
||||
hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
|
||||
if (!hyp_vcpu || !pkvm_hyp_vcpu_is_protected(hyp_vcpu))
|
||||
goto out;
|
||||
|
||||
ret = pkvm_refill_memcache(hyp_vcpu);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = __pkvm_host_donate_guest(pfn, gfn, hyp_vcpu);
|
||||
out:
|
||||
cpu_reg(host_ctxt, 1) = ret;
|
||||
}
|
||||
|
||||
static void handle___pkvm_host_share_guest(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(u64, pfn, host_ctxt, 1);
|
||||
@@ -257,9 +274,6 @@ static void handle___pkvm_host_share_guest(struct kvm_cpu_context *host_ctxt)
|
||||
struct pkvm_hyp_vcpu *hyp_vcpu;
|
||||
int ret = -EINVAL;
|
||||
|
||||
if (!is_protected_kvm_enabled())
|
||||
goto out;
|
||||
|
||||
hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
|
||||
if (!hyp_vcpu || pkvm_hyp_vcpu_is_protected(hyp_vcpu))
|
||||
goto out;
|
||||
@@ -281,9 +295,6 @@ static void handle___pkvm_host_unshare_guest(struct kvm_cpu_context *host_ctxt)
|
||||
struct pkvm_hyp_vm *hyp_vm;
|
||||
int ret = -EINVAL;
|
||||
|
||||
if (!is_protected_kvm_enabled())
|
||||
goto out;
|
||||
|
||||
hyp_vm = get_np_pkvm_hyp_vm(handle);
|
||||
if (!hyp_vm)
|
||||
goto out;
|
||||
@@ -301,9 +312,6 @@ static void handle___pkvm_host_relax_perms_guest(struct kvm_cpu_context *host_ct
|
||||
struct pkvm_hyp_vcpu *hyp_vcpu;
|
||||
int ret = -EINVAL;
|
||||
|
||||
if (!is_protected_kvm_enabled())
|
||||
goto out;
|
||||
|
||||
hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
|
||||
if (!hyp_vcpu || pkvm_hyp_vcpu_is_protected(hyp_vcpu))
|
||||
goto out;
|
||||
@@ -321,9 +329,6 @@ static void handle___pkvm_host_wrprotect_guest(struct kvm_cpu_context *host_ctxt
|
||||
struct pkvm_hyp_vm *hyp_vm;
|
||||
int ret = -EINVAL;
|
||||
|
||||
if (!is_protected_kvm_enabled())
|
||||
goto out;
|
||||
|
||||
hyp_vm = get_np_pkvm_hyp_vm(handle);
|
||||
if (!hyp_vm)
|
||||
goto out;
|
||||
@@ -343,9 +348,6 @@ static void handle___pkvm_host_test_clear_young_guest(struct kvm_cpu_context *ho
|
||||
struct pkvm_hyp_vm *hyp_vm;
|
||||
int ret = -EINVAL;
|
||||
|
||||
if (!is_protected_kvm_enabled())
|
||||
goto out;
|
||||
|
||||
hyp_vm = get_np_pkvm_hyp_vm(handle);
|
||||
if (!hyp_vm)
|
||||
goto out;
|
||||
@@ -362,9 +364,6 @@ static void handle___pkvm_host_mkyoung_guest(struct kvm_cpu_context *host_ctxt)
|
||||
struct pkvm_hyp_vcpu *hyp_vcpu;
|
||||
int ret = -EINVAL;
|
||||
|
||||
if (!is_protected_kvm_enabled())
|
||||
goto out;
|
||||
|
||||
hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
|
||||
if (!hyp_vcpu || pkvm_hyp_vcpu_is_protected(hyp_vcpu))
|
||||
goto out;
|
||||
@@ -424,12 +423,8 @@ static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
|
||||
static void handle___pkvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
|
||||
struct pkvm_hyp_vm *hyp_vm;
|
||||
struct pkvm_hyp_vm *hyp_vm = get_np_pkvm_hyp_vm(handle);
|
||||
|
||||
if (!is_protected_kvm_enabled())
|
||||
return;
|
||||
|
||||
hyp_vm = get_np_pkvm_hyp_vm(handle);
|
||||
if (!hyp_vm)
|
||||
return;
|
||||
|
||||
@@ -486,17 +481,15 @@ static void handle___pkvm_init(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(phys_addr_t, phys, host_ctxt, 1);
|
||||
DECLARE_REG(unsigned long, size, host_ctxt, 2);
|
||||
DECLARE_REG(unsigned long, nr_cpus, host_ctxt, 3);
|
||||
DECLARE_REG(unsigned long *, per_cpu_base, host_ctxt, 4);
|
||||
DECLARE_REG(u32, hyp_va_bits, host_ctxt, 5);
|
||||
DECLARE_REG(unsigned long *, per_cpu_base, host_ctxt, 3);
|
||||
DECLARE_REG(u32, hyp_va_bits, host_ctxt, 4);
|
||||
|
||||
/*
|
||||
* __pkvm_init() will return only if an error occurred, otherwise it
|
||||
* will tail-call in __pkvm_init_finalise() which will have to deal
|
||||
* with the host context directly.
|
||||
*/
|
||||
cpu_reg(host_ctxt, 1) = __pkvm_init(phys, size, nr_cpus, per_cpu_base,
|
||||
hyp_va_bits);
|
||||
cpu_reg(host_ctxt, 1) = __pkvm_init(phys, size, per_cpu_base, hyp_va_bits);
|
||||
}
|
||||
|
||||
static void handle___pkvm_cpu_set_vector(struct kvm_cpu_context *host_ctxt)
|
||||
@@ -582,11 +575,115 @@ static void handle___pkvm_init_vcpu(struct kvm_cpu_context *host_ctxt)
|
||||
cpu_reg(host_ctxt, 1) = __pkvm_init_vcpu(handle, host_vcpu, vcpu_hva);
|
||||
}
|
||||
|
||||
static void handle___pkvm_teardown_vm(struct kvm_cpu_context *host_ctxt)
|
||||
static void handle___pkvm_vcpu_in_poison_fault(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
int ret;
|
||||
struct pkvm_hyp_vcpu *hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
|
||||
|
||||
ret = hyp_vcpu ? __pkvm_vcpu_in_poison_fault(hyp_vcpu) : -EINVAL;
|
||||
cpu_reg(host_ctxt, 1) = ret;
|
||||
}
|
||||
|
||||
static void handle___pkvm_force_reclaim_guest_page(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(phys_addr_t, phys, host_ctxt, 1);
|
||||
|
||||
cpu_reg(host_ctxt, 1) = __pkvm_host_force_reclaim_page_guest(phys);
|
||||
}
|
||||
|
||||
static void handle___pkvm_reclaim_dying_guest_page(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
|
||||
DECLARE_REG(u64, gfn, host_ctxt, 2);
|
||||
|
||||
cpu_reg(host_ctxt, 1) = __pkvm_reclaim_dying_guest_page(handle, gfn);
|
||||
}
|
||||
|
||||
static void handle___pkvm_start_teardown_vm(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
|
||||
|
||||
cpu_reg(host_ctxt, 1) = __pkvm_teardown_vm(handle);
|
||||
cpu_reg(host_ctxt, 1) = __pkvm_start_teardown_vm(handle);
|
||||
}
|
||||
|
||||
static void handle___pkvm_finalize_teardown_vm(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
|
||||
|
||||
cpu_reg(host_ctxt, 1) = __pkvm_finalize_teardown_vm(handle);
|
||||
}
|
||||
|
||||
static void handle___tracing_load(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(unsigned long, desc_hva, host_ctxt, 1);
|
||||
DECLARE_REG(size_t, desc_size, host_ctxt, 2);
|
||||
|
||||
cpu_reg(host_ctxt, 1) = __tracing_load(desc_hva, desc_size);
|
||||
}
|
||||
|
||||
static void handle___tracing_unload(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
__tracing_unload();
|
||||
}
|
||||
|
||||
static void handle___tracing_enable(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(bool, enable, host_ctxt, 1);
|
||||
|
||||
cpu_reg(host_ctxt, 1) = __tracing_enable(enable);
|
||||
}
|
||||
|
||||
static void handle___tracing_swap_reader(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(unsigned int, cpu, host_ctxt, 1);
|
||||
|
||||
cpu_reg(host_ctxt, 1) = __tracing_swap_reader(cpu);
|
||||
}
|
||||
|
||||
static void handle___tracing_update_clock(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(u32, mult, host_ctxt, 1);
|
||||
DECLARE_REG(u32, shift, host_ctxt, 2);
|
||||
DECLARE_REG(u64, epoch_ns, host_ctxt, 3);
|
||||
DECLARE_REG(u64, epoch_cyc, host_ctxt, 4);
|
||||
|
||||
__tracing_update_clock(mult, shift, epoch_ns, epoch_cyc);
|
||||
}
|
||||
|
||||
static void handle___tracing_reset(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(unsigned int, cpu, host_ctxt, 1);
|
||||
|
||||
cpu_reg(host_ctxt, 1) = __tracing_reset(cpu);
|
||||
}
|
||||
|
||||
static void handle___tracing_enable_event(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(unsigned short, id, host_ctxt, 1);
|
||||
DECLARE_REG(bool, enable, host_ctxt, 2);
|
||||
|
||||
cpu_reg(host_ctxt, 1) = __tracing_enable_event(id, enable);
|
||||
}
|
||||
|
||||
static void handle___tracing_write_event(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(u64, id, host_ctxt, 1);
|
||||
|
||||
trace_selftest(id);
|
||||
}
|
||||
|
||||
static void handle___vgic_v5_save_apr(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(struct vgic_v5_cpu_if *, cpu_if, host_ctxt, 1);
|
||||
|
||||
__vgic_v5_save_apr(kern_hyp_va(cpu_if));
|
||||
}
|
||||
|
||||
static void handle___vgic_v5_restore_vmcr_apr(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(struct vgic_v5_cpu_if *, cpu_if, host_ctxt, 1);
|
||||
|
||||
__vgic_v5_restore_vmcr_apr(kern_hyp_va(cpu_if));
|
||||
}
|
||||
|
||||
typedef void (*hcall_t)(struct kvm_cpu_context *);
|
||||
@@ -603,14 +700,6 @@ static const hcall_t host_hcall[] = {
|
||||
HANDLE_FUNC(__vgic_v3_get_gic_config),
|
||||
HANDLE_FUNC(__pkvm_prot_finalize),
|
||||
|
||||
HANDLE_FUNC(__pkvm_host_share_hyp),
|
||||
HANDLE_FUNC(__pkvm_host_unshare_hyp),
|
||||
HANDLE_FUNC(__pkvm_host_share_guest),
|
||||
HANDLE_FUNC(__pkvm_host_unshare_guest),
|
||||
HANDLE_FUNC(__pkvm_host_relax_perms_guest),
|
||||
HANDLE_FUNC(__pkvm_host_wrprotect_guest),
|
||||
HANDLE_FUNC(__pkvm_host_test_clear_young_guest),
|
||||
HANDLE_FUNC(__pkvm_host_mkyoung_guest),
|
||||
HANDLE_FUNC(__kvm_adjust_pc),
|
||||
HANDLE_FUNC(__kvm_vcpu_run),
|
||||
HANDLE_FUNC(__kvm_flush_vm_context),
|
||||
@@ -622,20 +711,44 @@ static const hcall_t host_hcall[] = {
|
||||
HANDLE_FUNC(__kvm_timer_set_cntvoff),
|
||||
HANDLE_FUNC(__vgic_v3_save_aprs),
|
||||
HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs),
|
||||
HANDLE_FUNC(__vgic_v5_save_apr),
|
||||
HANDLE_FUNC(__vgic_v5_restore_vmcr_apr),
|
||||
|
||||
HANDLE_FUNC(__pkvm_host_share_hyp),
|
||||
HANDLE_FUNC(__pkvm_host_unshare_hyp),
|
||||
HANDLE_FUNC(__pkvm_host_donate_guest),
|
||||
HANDLE_FUNC(__pkvm_host_share_guest),
|
||||
HANDLE_FUNC(__pkvm_host_unshare_guest),
|
||||
HANDLE_FUNC(__pkvm_host_relax_perms_guest),
|
||||
HANDLE_FUNC(__pkvm_host_wrprotect_guest),
|
||||
HANDLE_FUNC(__pkvm_host_test_clear_young_guest),
|
||||
HANDLE_FUNC(__pkvm_host_mkyoung_guest),
|
||||
HANDLE_FUNC(__pkvm_reserve_vm),
|
||||
HANDLE_FUNC(__pkvm_unreserve_vm),
|
||||
HANDLE_FUNC(__pkvm_init_vm),
|
||||
HANDLE_FUNC(__pkvm_init_vcpu),
|
||||
HANDLE_FUNC(__pkvm_teardown_vm),
|
||||
HANDLE_FUNC(__pkvm_vcpu_in_poison_fault),
|
||||
HANDLE_FUNC(__pkvm_force_reclaim_guest_page),
|
||||
HANDLE_FUNC(__pkvm_reclaim_dying_guest_page),
|
||||
HANDLE_FUNC(__pkvm_start_teardown_vm),
|
||||
HANDLE_FUNC(__pkvm_finalize_teardown_vm),
|
||||
HANDLE_FUNC(__pkvm_vcpu_load),
|
||||
HANDLE_FUNC(__pkvm_vcpu_put),
|
||||
HANDLE_FUNC(__pkvm_tlb_flush_vmid),
|
||||
HANDLE_FUNC(__tracing_load),
|
||||
HANDLE_FUNC(__tracing_unload),
|
||||
HANDLE_FUNC(__tracing_enable),
|
||||
HANDLE_FUNC(__tracing_swap_reader),
|
||||
HANDLE_FUNC(__tracing_update_clock),
|
||||
HANDLE_FUNC(__tracing_reset),
|
||||
HANDLE_FUNC(__tracing_enable_event),
|
||||
HANDLE_FUNC(__tracing_write_event),
|
||||
};
|
||||
|
||||
static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(unsigned long, id, host_ctxt, 0);
|
||||
unsigned long hcall_min = 0;
|
||||
unsigned long hcall_min = 0, hcall_max = -1;
|
||||
hcall_t hfn;
|
||||
|
||||
/*
|
||||
@@ -647,14 +760,19 @@ static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
|
||||
* basis. This is all fine, however, since __pkvm_prot_finalize
|
||||
* returns -EPERM after the first call for a given CPU.
|
||||
*/
|
||||
if (static_branch_unlikely(&kvm_protected_mode_initialized))
|
||||
hcall_min = __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize;
|
||||
if (static_branch_unlikely(&kvm_protected_mode_initialized)) {
|
||||
hcall_min = __KVM_HOST_SMCCC_FUNC_MIN_PKVM;
|
||||
} else {
|
||||
hcall_max = __KVM_HOST_SMCCC_FUNC_MAX_NO_PKVM;
|
||||
}
|
||||
|
||||
id &= ~ARM_SMCCC_CALL_HINTS;
|
||||
id -= KVM_HOST_SMCCC_ID(0);
|
||||
|
||||
if (unlikely(id < hcall_min || id >= ARRAY_SIZE(host_hcall)))
|
||||
if (unlikely(id < hcall_min || id > hcall_max ||
|
||||
id >= ARRAY_SIZE(host_hcall))) {
|
||||
goto inval;
|
||||
}
|
||||
|
||||
hfn = host_hcall[id];
|
||||
if (unlikely(!hfn))
|
||||
@@ -670,14 +788,22 @@ inval:
|
||||
|
||||
static void default_host_smc_handler(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
trace_hyp_exit(host_ctxt, HYP_REASON_SMC);
|
||||
__kvm_hyp_host_forward_smc(host_ctxt);
|
||||
trace_hyp_enter(host_ctxt, HYP_REASON_SMC);
|
||||
}
|
||||
|
||||
static void handle_host_smc(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(u64, func_id, host_ctxt, 0);
|
||||
u64 esr = read_sysreg_el2(SYS_ESR);
|
||||
bool handled;
|
||||
|
||||
if (esr & ESR_ELx_xVC_IMM_MASK) {
|
||||
cpu_reg(host_ctxt, 0) = SMCCC_RET_NOT_SUPPORTED;
|
||||
goto exit_skip_instr;
|
||||
}
|
||||
|
||||
func_id &= ~ARM_SMCCC_CALL_HINTS;
|
||||
|
||||
handled = kvm_host_psci_handler(host_ctxt, func_id);
|
||||
@@ -686,47 +812,57 @@ static void handle_host_smc(struct kvm_cpu_context *host_ctxt)
|
||||
if (!handled)
|
||||
default_host_smc_handler(host_ctxt);
|
||||
|
||||
exit_skip_instr:
|
||||
/* SMC was trapped, move ELR past the current PC. */
|
||||
kvm_skip_host_instr();
|
||||
}
|
||||
|
||||
/*
|
||||
* Inject an Undefined Instruction exception into the host.
|
||||
*
|
||||
* This is open-coded to allow control over PSTATE construction without
|
||||
* complicating the generic exception entry helpers.
|
||||
*/
|
||||
static void inject_undef64(void)
|
||||
void inject_host_exception(u64 esr)
|
||||
{
|
||||
u64 spsr_mask, vbar, sctlr, old_spsr, new_spsr, esr, offset;
|
||||
u64 sctlr, spsr_el1, spsr_el2, exc_offset = except_type_sync;
|
||||
const u64 spsr_mask = PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT |
|
||||
PSR_V_BIT | PSR_DIT_BIT | PSR_PAN_BIT;
|
||||
|
||||
spsr_mask = PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT | PSR_V_BIT | PSR_DIT_BIT | PSR_PAN_BIT;
|
||||
spsr_el1 = spsr_el2 = read_sysreg_el2(SYS_SPSR);
|
||||
switch (spsr_el1 & (PSR_MODE_MASK | PSR_MODE32_BIT)) {
|
||||
case PSR_MODE_EL0t:
|
||||
exc_offset += LOWER_EL_AArch64_VECTOR;
|
||||
break;
|
||||
case PSR_MODE_EL0t | PSR_MODE32_BIT:
|
||||
exc_offset += LOWER_EL_AArch32_VECTOR;
|
||||
break;
|
||||
default:
|
||||
exc_offset += CURRENT_EL_SP_ELx_VECTOR;
|
||||
}
|
||||
|
||||
spsr_el2 &= spsr_mask;
|
||||
spsr_el2 |= PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT |
|
||||
PSR_MODE_EL1h;
|
||||
|
||||
vbar = read_sysreg_el1(SYS_VBAR);
|
||||
sctlr = read_sysreg_el1(SYS_SCTLR);
|
||||
old_spsr = read_sysreg_el2(SYS_SPSR);
|
||||
|
||||
new_spsr = old_spsr & spsr_mask;
|
||||
new_spsr |= PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT;
|
||||
new_spsr |= PSR_MODE_EL1h;
|
||||
|
||||
if (!(sctlr & SCTLR_EL1_SPAN))
|
||||
new_spsr |= PSR_PAN_BIT;
|
||||
spsr_el2 |= PSR_PAN_BIT;
|
||||
|
||||
if (sctlr & SCTLR_ELx_DSSBS)
|
||||
new_spsr |= PSR_SSBS_BIT;
|
||||
spsr_el2 |= PSR_SSBS_BIT;
|
||||
|
||||
if (system_supports_mte())
|
||||
new_spsr |= PSR_TCO_BIT;
|
||||
spsr_el2 |= PSR_TCO_BIT;
|
||||
|
||||
esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT) | ESR_ELx_IL;
|
||||
offset = CURRENT_EL_SP_ELx_VECTOR + except_type_sync;
|
||||
if (esr_fsc_is_translation_fault(esr))
|
||||
write_sysreg_el1(read_sysreg_el2(SYS_FAR), SYS_FAR);
|
||||
|
||||
write_sysreg_el1(esr, SYS_ESR);
|
||||
write_sysreg_el1(read_sysreg_el2(SYS_ELR), SYS_ELR);
|
||||
write_sysreg_el1(old_spsr, SYS_SPSR);
|
||||
write_sysreg_el2(vbar + offset, SYS_ELR);
|
||||
write_sysreg_el2(new_spsr, SYS_SPSR);
|
||||
write_sysreg_el1(spsr_el1, SYS_SPSR);
|
||||
write_sysreg_el2(read_sysreg_el1(SYS_VBAR) + exc_offset, SYS_ELR);
|
||||
write_sysreg_el2(spsr_el2, SYS_SPSR);
|
||||
}
|
||||
|
||||
static void inject_host_undef64(void)
|
||||
{
|
||||
inject_host_exception((ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT) |
|
||||
ESR_ELx_IL);
|
||||
}
|
||||
|
||||
static bool handle_host_mte(u64 esr)
|
||||
@@ -749,7 +885,7 @@ static bool handle_host_mte(u64 esr)
|
||||
return false;
|
||||
}
|
||||
|
||||
inject_undef64();
|
||||
inject_host_undef64();
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -757,15 +893,19 @@ void handle_trap(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
u64 esr = read_sysreg_el2(SYS_ESR);
|
||||
|
||||
|
||||
switch (ESR_ELx_EC(esr)) {
|
||||
case ESR_ELx_EC_HVC64:
|
||||
trace_hyp_enter(host_ctxt, HYP_REASON_HVC);
|
||||
handle_host_hcall(host_ctxt);
|
||||
break;
|
||||
case ESR_ELx_EC_SMC64:
|
||||
trace_hyp_enter(host_ctxt, HYP_REASON_SMC);
|
||||
handle_host_smc(host_ctxt);
|
||||
break;
|
||||
case ESR_ELx_EC_IABT_LOW:
|
||||
case ESR_ELx_EC_DABT_LOW:
|
||||
trace_hyp_enter(host_ctxt, HYP_REASON_HOST_ABORT);
|
||||
handle_host_mem_abort(host_ctxt);
|
||||
break;
|
||||
case ESR_ELx_EC_SYS64:
|
||||
@@ -775,4 +915,6 @@ void handle_trap(struct kvm_cpu_context *host_ctxt)
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
trace_hyp_exit(host_ctxt, HYP_REASON_ERET_HOST);
|
||||
}
|
||||
|
||||
@@ -16,6 +16,12 @@ SECTIONS {
|
||||
HYP_SECTION(.text)
|
||||
HYP_SECTION(.data..ro_after_init)
|
||||
HYP_SECTION(.rodata)
|
||||
#ifdef CONFIG_NVHE_EL2_TRACING
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
BEGIN_HYP_SECTION(.event_ids)
|
||||
*(SORT(.hyp.event_ids.*))
|
||||
END_HYP_SECTION
|
||||
#endif
|
||||
|
||||
/*
|
||||
* .hyp..data..percpu needs to be page aligned to maintain the same
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
#include <nvhe/memory.h>
|
||||
#include <nvhe/mem_protect.h>
|
||||
#include <nvhe/mm.h>
|
||||
#include <nvhe/trap_handler.h>
|
||||
|
||||
#define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_AS_S1 | KVM_PGTABLE_S2_IDMAP)
|
||||
|
||||
@@ -461,8 +462,15 @@ static bool range_is_memory(u64 start, u64 end)
|
||||
static inline int __host_stage2_idmap(u64 start, u64 end,
|
||||
enum kvm_pgtable_prot prot)
|
||||
{
|
||||
/*
|
||||
* We don't make permission changes to the host idmap after
|
||||
* initialisation, so we can squash -EAGAIN to save callers
|
||||
* having to treat it like success in the case that they try to
|
||||
* map something that is already mapped.
|
||||
*/
|
||||
return kvm_pgtable_stage2_map(&host_mmu.pgt, start, end - start, start,
|
||||
prot, &host_s2_pool, 0);
|
||||
prot, &host_s2_pool,
|
||||
KVM_PGTABLE_WALK_IGNORE_EAGAIN);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -504,7 +512,7 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
|
||||
return ret;
|
||||
|
||||
if (kvm_pte_valid(pte))
|
||||
return -EAGAIN;
|
||||
return -EEXIST;
|
||||
|
||||
if (pte) {
|
||||
WARN_ON(addr_is_memory(addr) &&
|
||||
@@ -541,24 +549,99 @@ static void __host_update_page_state(phys_addr_t addr, u64 size, enum pkvm_page_
|
||||
set_host_state(page, state);
|
||||
}
|
||||
|
||||
int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
|
||||
#define KVM_HOST_DONATION_PTE_OWNER_MASK GENMASK(3, 1)
|
||||
#define KVM_HOST_DONATION_PTE_EXTRA_MASK GENMASK(59, 4)
|
||||
static int host_stage2_set_owner_metadata_locked(phys_addr_t addr, u64 size,
|
||||
u8 owner_id, u64 meta)
|
||||
{
|
||||
kvm_pte_t annotation;
|
||||
int ret;
|
||||
|
||||
if (owner_id == PKVM_ID_HOST)
|
||||
return -EINVAL;
|
||||
|
||||
if (!range_is_memory(addr, addr + size))
|
||||
return -EPERM;
|
||||
|
||||
ret = host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt,
|
||||
addr, size, &host_s2_pool, owner_id);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (!FIELD_FIT(KVM_HOST_DONATION_PTE_OWNER_MASK, owner_id))
|
||||
return -EINVAL;
|
||||
|
||||
/* Don't forget to update the vmemmap tracking for the host */
|
||||
if (owner_id == PKVM_ID_HOST)
|
||||
__host_update_page_state(addr, size, PKVM_PAGE_OWNED);
|
||||
else
|
||||
if (!FIELD_FIT(KVM_HOST_DONATION_PTE_EXTRA_MASK, meta))
|
||||
return -EINVAL;
|
||||
|
||||
annotation = FIELD_PREP(KVM_HOST_DONATION_PTE_OWNER_MASK, owner_id) |
|
||||
FIELD_PREP(KVM_HOST_DONATION_PTE_EXTRA_MASK, meta);
|
||||
ret = host_stage2_try(kvm_pgtable_stage2_annotate, &host_mmu.pgt,
|
||||
addr, size, &host_s2_pool,
|
||||
KVM_HOST_INVALID_PTE_TYPE_DONATION, annotation);
|
||||
if (!ret)
|
||||
__host_update_page_state(addr, size, PKVM_NOPAGE);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
|
||||
{
|
||||
int ret = -EINVAL;
|
||||
|
||||
switch (owner_id) {
|
||||
case PKVM_ID_HOST:
|
||||
if (!range_is_memory(addr, addr + size))
|
||||
return -EPERM;
|
||||
|
||||
ret = host_stage2_idmap_locked(addr, size, PKVM_HOST_MEM_PROT);
|
||||
if (!ret)
|
||||
__host_update_page_state(addr, size, PKVM_PAGE_OWNED);
|
||||
break;
|
||||
case PKVM_ID_HYP:
|
||||
ret = host_stage2_set_owner_metadata_locked(addr, size,
|
||||
owner_id, 0);
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define KVM_HOST_PTE_OWNER_GUEST_HANDLE_MASK GENMASK(15, 0)
|
||||
/* We need 40 bits for the GFN to cover a 52-bit IPA with 4k pages and LPA2 */
|
||||
#define KVM_HOST_PTE_OWNER_GUEST_GFN_MASK GENMASK(55, 16)
|
||||
static u64 host_stage2_encode_gfn_meta(struct pkvm_hyp_vm *vm, u64 gfn)
|
||||
{
|
||||
pkvm_handle_t handle = vm->kvm.arch.pkvm.handle;
|
||||
|
||||
BUILD_BUG_ON((pkvm_handle_t)-1 > KVM_HOST_PTE_OWNER_GUEST_HANDLE_MASK);
|
||||
WARN_ON(!FIELD_FIT(KVM_HOST_PTE_OWNER_GUEST_GFN_MASK, gfn));
|
||||
|
||||
return FIELD_PREP(KVM_HOST_PTE_OWNER_GUEST_HANDLE_MASK, handle) |
|
||||
FIELD_PREP(KVM_HOST_PTE_OWNER_GUEST_GFN_MASK, gfn);
|
||||
}
|
||||
|
||||
static int host_stage2_decode_gfn_meta(kvm_pte_t pte, struct pkvm_hyp_vm **vm,
|
||||
u64 *gfn)
|
||||
{
|
||||
pkvm_handle_t handle;
|
||||
u64 meta;
|
||||
|
||||
if (WARN_ON(kvm_pte_valid(pte)))
|
||||
return -EINVAL;
|
||||
|
||||
if (FIELD_GET(KVM_INVALID_PTE_TYPE_MASK, pte) !=
|
||||
KVM_HOST_INVALID_PTE_TYPE_DONATION) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (FIELD_GET(KVM_HOST_DONATION_PTE_OWNER_MASK, pte) != PKVM_ID_GUEST)
|
||||
return -EPERM;
|
||||
|
||||
meta = FIELD_GET(KVM_HOST_DONATION_PTE_EXTRA_MASK, pte);
|
||||
handle = FIELD_GET(KVM_HOST_PTE_OWNER_GUEST_HANDLE_MASK, meta);
|
||||
*vm = get_vm_by_handle(handle);
|
||||
if (!*vm) {
|
||||
/* We probably raced with teardown; try again */
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
*gfn = FIELD_GET(KVM_HOST_PTE_OWNER_GUEST_GFN_MASK, meta);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -605,11 +688,43 @@ unlock:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void host_inject_mem_abort(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
u64 ec, esr, spsr;
|
||||
|
||||
esr = read_sysreg_el2(SYS_ESR);
|
||||
spsr = read_sysreg_el2(SYS_SPSR);
|
||||
|
||||
/* Repaint the ESR to report a same-level fault if taken from EL1 */
|
||||
if ((spsr & PSR_MODE_MASK) != PSR_MODE_EL0t) {
|
||||
ec = ESR_ELx_EC(esr);
|
||||
if (ec == ESR_ELx_EC_DABT_LOW)
|
||||
ec = ESR_ELx_EC_DABT_CUR;
|
||||
else if (ec == ESR_ELx_EC_IABT_LOW)
|
||||
ec = ESR_ELx_EC_IABT_CUR;
|
||||
else
|
||||
WARN_ON(1);
|
||||
esr &= ~ESR_ELx_EC_MASK;
|
||||
esr |= ec << ESR_ELx_EC_SHIFT;
|
||||
}
|
||||
|
||||
/*
|
||||
* Since S1PTW should only ever be set for stage-2 faults, we're pretty
|
||||
* much guaranteed that it won't be set in ESR_EL1 by the hardware. So,
|
||||
* let's use that bit to allow the host abort handler to differentiate
|
||||
* this abort from normal userspace faults.
|
||||
*
|
||||
* Note: although S1PTW is RES0 at EL1, it is guaranteed by the
|
||||
* architecture to be backed by flops, so it should be safe to use.
|
||||
*/
|
||||
esr |= ESR_ELx_S1PTW;
|
||||
inject_host_exception(esr);
|
||||
}
|
||||
|
||||
void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
struct kvm_vcpu_fault_info fault;
|
||||
u64 esr, addr;
|
||||
int ret = 0;
|
||||
|
||||
esr = read_sysreg_el2(SYS_ESR);
|
||||
if (!__get_fault_info(esr, &fault)) {
|
||||
@@ -628,8 +743,16 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
|
||||
BUG_ON(!(fault.hpfar_el2 & HPFAR_EL2_NS));
|
||||
addr = FIELD_GET(HPFAR_EL2_FIPA, fault.hpfar_el2) << 12;
|
||||
|
||||
ret = host_stage2_idmap(addr);
|
||||
BUG_ON(ret && ret != -EAGAIN);
|
||||
switch (host_stage2_idmap(addr)) {
|
||||
case -EPERM:
|
||||
host_inject_mem_abort(host_ctxt);
|
||||
fallthrough;
|
||||
case -EEXIST:
|
||||
case 0:
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
struct check_walk_data {
|
||||
@@ -707,8 +830,20 @@ static int __hyp_check_page_state_range(phys_addr_t phys, u64 size, enum pkvm_pa
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool guest_pte_is_poisoned(kvm_pte_t pte)
|
||||
{
|
||||
if (kvm_pte_valid(pte))
|
||||
return false;
|
||||
|
||||
return FIELD_GET(KVM_INVALID_PTE_TYPE_MASK, pte) ==
|
||||
KVM_GUEST_INVALID_PTE_TYPE_POISONED;
|
||||
}
|
||||
|
||||
static enum pkvm_page_state guest_get_page_state(kvm_pte_t pte, u64 addr)
|
||||
{
|
||||
if (guest_pte_is_poisoned(pte))
|
||||
return PKVM_POISON;
|
||||
|
||||
if (!kvm_pte_valid(pte))
|
||||
return PKVM_NOPAGE;
|
||||
|
||||
@@ -727,6 +862,77 @@ static int __guest_check_page_state_range(struct pkvm_hyp_vm *vm, u64 addr,
|
||||
return check_page_state_range(&vm->pgt, addr, size, &d);
|
||||
}
|
||||
|
||||
static int get_valid_guest_pte(struct pkvm_hyp_vm *vm, u64 ipa, kvm_pte_t *ptep, u64 *physp)
|
||||
{
|
||||
kvm_pte_t pte;
|
||||
u64 phys;
|
||||
s8 level;
|
||||
int ret;
|
||||
|
||||
ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (guest_pte_is_poisoned(pte))
|
||||
return -EHWPOISON;
|
||||
if (!kvm_pte_valid(pte))
|
||||
return -ENOENT;
|
||||
if (level != KVM_PGTABLE_LAST_LEVEL)
|
||||
return -E2BIG;
|
||||
|
||||
phys = kvm_pte_to_phys(pte);
|
||||
ret = check_range_allowed_memory(phys, phys + PAGE_SIZE);
|
||||
if (WARN_ON(ret))
|
||||
return ret;
|
||||
|
||||
*ptep = pte;
|
||||
*physp = phys;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __pkvm_vcpu_in_poison_fault(struct pkvm_hyp_vcpu *hyp_vcpu)
|
||||
{
|
||||
struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
|
||||
kvm_pte_t pte;
|
||||
s8 level;
|
||||
u64 ipa;
|
||||
int ret;
|
||||
|
||||
switch (kvm_vcpu_trap_get_class(&hyp_vcpu->vcpu)) {
|
||||
case ESR_ELx_EC_DABT_LOW:
|
||||
case ESR_ELx_EC_IABT_LOW:
|
||||
if (kvm_vcpu_trap_is_translation_fault(&hyp_vcpu->vcpu))
|
||||
break;
|
||||
fallthrough;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* The host has the faulting IPA when it calls us from the guest
|
||||
* fault handler but we retrieve it ourselves from the FAR so as
|
||||
* to avoid exposing an "oracle" that could reveal data access
|
||||
* patterns of the guest after initial donation of its pages.
|
||||
*/
|
||||
ipa = kvm_vcpu_get_fault_ipa(&hyp_vcpu->vcpu);
|
||||
ipa |= FAR_TO_FIPA_OFFSET(kvm_vcpu_get_hfar(&hyp_vcpu->vcpu));
|
||||
|
||||
guest_lock_component(vm);
|
||||
ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
if (level != KVM_PGTABLE_LAST_LEVEL) {
|
||||
ret = -EINVAL;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
ret = guest_pte_is_poisoned(pte);
|
||||
unlock:
|
||||
guest_unlock_component(vm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __pkvm_host_share_hyp(u64 pfn)
|
||||
{
|
||||
u64 phys = hyp_pfn_to_phys(pfn);
|
||||
@@ -753,6 +959,72 @@ unlock:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __pkvm_guest_share_host(struct pkvm_hyp_vcpu *vcpu, u64 gfn)
|
||||
{
|
||||
struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
|
||||
u64 phys, ipa = hyp_pfn_to_phys(gfn);
|
||||
kvm_pte_t pte;
|
||||
int ret;
|
||||
|
||||
host_lock_component();
|
||||
guest_lock_component(vm);
|
||||
|
||||
ret = get_valid_guest_pte(vm, ipa, &pte, &phys);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
ret = -EPERM;
|
||||
if (pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte)) != PKVM_PAGE_OWNED)
|
||||
goto unlock;
|
||||
if (__host_check_page_state_range(phys, PAGE_SIZE, PKVM_NOPAGE))
|
||||
goto unlock;
|
||||
|
||||
ret = 0;
|
||||
WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, PAGE_SIZE, phys,
|
||||
pkvm_mkstate(KVM_PGTABLE_PROT_RWX, PKVM_PAGE_SHARED_OWNED),
|
||||
&vcpu->vcpu.arch.pkvm_memcache, 0));
|
||||
WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_BORROWED));
|
||||
unlock:
|
||||
guest_unlock_component(vm);
|
||||
host_unlock_component();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __pkvm_guest_unshare_host(struct pkvm_hyp_vcpu *vcpu, u64 gfn)
|
||||
{
|
||||
struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
|
||||
u64 meta, phys, ipa = hyp_pfn_to_phys(gfn);
|
||||
kvm_pte_t pte;
|
||||
int ret;
|
||||
|
||||
host_lock_component();
|
||||
guest_lock_component(vm);
|
||||
|
||||
ret = get_valid_guest_pte(vm, ipa, &pte, &phys);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
ret = -EPERM;
|
||||
if (pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte)) != PKVM_PAGE_SHARED_OWNED)
|
||||
goto unlock;
|
||||
if (__host_check_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_BORROWED))
|
||||
goto unlock;
|
||||
|
||||
ret = 0;
|
||||
meta = host_stage2_encode_gfn_meta(vm, gfn);
|
||||
WARN_ON(host_stage2_set_owner_metadata_locked(phys, PAGE_SIZE,
|
||||
PKVM_ID_GUEST, meta));
|
||||
WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, PAGE_SIZE, phys,
|
||||
pkvm_mkstate(KVM_PGTABLE_PROT_RWX, PKVM_PAGE_OWNED),
|
||||
&vcpu->vcpu.arch.pkvm_memcache, 0));
|
||||
unlock:
|
||||
guest_unlock_component(vm);
|
||||
host_unlock_component();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __pkvm_host_unshare_hyp(u64 pfn)
|
||||
{
|
||||
u64 phys = hyp_pfn_to_phys(pfn);
|
||||
@@ -960,6 +1232,176 @@ static int __guest_check_transition_size(u64 phys, u64 ipa, u64 nr_pages, u64 *s
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void hyp_poison_page(phys_addr_t phys)
|
||||
{
|
||||
void *addr = hyp_fixmap_map(phys);
|
||||
|
||||
memset(addr, 0, PAGE_SIZE);
|
||||
/*
|
||||
* Prefer kvm_flush_dcache_to_poc() over __clean_dcache_guest_page()
|
||||
* here as the latter may elide the CMO under the assumption that FWB
|
||||
* will be enabled on CPUs that support it. This is incorrect for the
|
||||
* host stage-2 and would otherwise lead to a malicious host potentially
|
||||
* being able to read the contents of newly reclaimed guest pages.
|
||||
*/
|
||||
kvm_flush_dcache_to_poc(addr, PAGE_SIZE);
|
||||
hyp_fixmap_unmap();
|
||||
}
|
||||
|
||||
static int host_stage2_get_guest_info(phys_addr_t phys, struct pkvm_hyp_vm **vm,
|
||||
u64 *gfn)
|
||||
{
|
||||
enum pkvm_page_state state;
|
||||
kvm_pte_t pte;
|
||||
s8 level;
|
||||
int ret;
|
||||
|
||||
if (!addr_is_memory(phys))
|
||||
return -EFAULT;
|
||||
|
||||
state = get_host_state(hyp_phys_to_page(phys));
|
||||
switch (state) {
|
||||
case PKVM_PAGE_OWNED:
|
||||
case PKVM_PAGE_SHARED_OWNED:
|
||||
case PKVM_PAGE_SHARED_BORROWED:
|
||||
/* The access should no longer fault; try again. */
|
||||
return -EAGAIN;
|
||||
case PKVM_NOPAGE:
|
||||
break;
|
||||
default:
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
ret = kvm_pgtable_get_leaf(&host_mmu.pgt, phys, &pte, &level);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (WARN_ON(level != KVM_PGTABLE_LAST_LEVEL))
|
||||
return -EINVAL;
|
||||
|
||||
return host_stage2_decode_gfn_meta(pte, vm, gfn);
|
||||
}
|
||||
|
||||
int __pkvm_host_force_reclaim_page_guest(phys_addr_t phys)
|
||||
{
|
||||
struct pkvm_hyp_vm *vm;
|
||||
u64 gfn, ipa, pa;
|
||||
kvm_pte_t pte;
|
||||
int ret;
|
||||
|
||||
phys &= PAGE_MASK;
|
||||
|
||||
hyp_spin_lock(&vm_table_lock);
|
||||
host_lock_component();
|
||||
|
||||
ret = host_stage2_get_guest_info(phys, &vm, &gfn);
|
||||
if (ret)
|
||||
goto unlock_host;
|
||||
|
||||
ipa = hyp_pfn_to_phys(gfn);
|
||||
guest_lock_component(vm);
|
||||
ret = get_valid_guest_pte(vm, ipa, &pte, &pa);
|
||||
if (ret)
|
||||
goto unlock_guest;
|
||||
|
||||
WARN_ON(pa != phys);
|
||||
if (guest_get_page_state(pte, ipa) != PKVM_PAGE_OWNED) {
|
||||
ret = -EPERM;
|
||||
goto unlock_guest;
|
||||
}
|
||||
|
||||
/* We really shouldn't be allocating, so don't pass a memcache */
|
||||
ret = kvm_pgtable_stage2_annotate(&vm->pgt, ipa, PAGE_SIZE, NULL,
|
||||
KVM_GUEST_INVALID_PTE_TYPE_POISONED,
|
||||
0);
|
||||
if (ret)
|
||||
goto unlock_guest;
|
||||
|
||||
hyp_poison_page(phys);
|
||||
WARN_ON(host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HOST));
|
||||
unlock_guest:
|
||||
guest_unlock_component(vm);
|
||||
unlock_host:
|
||||
host_unlock_component();
|
||||
hyp_spin_unlock(&vm_table_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __pkvm_host_reclaim_page_guest(u64 gfn, struct pkvm_hyp_vm *vm)
|
||||
{
|
||||
u64 ipa = hyp_pfn_to_phys(gfn);
|
||||
kvm_pte_t pte;
|
||||
u64 phys;
|
||||
int ret;
|
||||
|
||||
host_lock_component();
|
||||
guest_lock_component(vm);
|
||||
|
||||
ret = get_valid_guest_pte(vm, ipa, &pte, &phys);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
switch (guest_get_page_state(pte, ipa)) {
|
||||
case PKVM_PAGE_OWNED:
|
||||
WARN_ON(__host_check_page_state_range(phys, PAGE_SIZE, PKVM_NOPAGE));
|
||||
hyp_poison_page(phys);
|
||||
break;
|
||||
case PKVM_PAGE_SHARED_OWNED:
|
||||
WARN_ON(__host_check_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_BORROWED));
|
||||
break;
|
||||
default:
|
||||
ret = -EPERM;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
WARN_ON(kvm_pgtable_stage2_unmap(&vm->pgt, ipa, PAGE_SIZE));
|
||||
WARN_ON(host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HOST));
|
||||
|
||||
unlock:
|
||||
guest_unlock_component(vm);
|
||||
host_unlock_component();
|
||||
|
||||
/*
|
||||
* -EHWPOISON implies that the page was forcefully reclaimed already
|
||||
* so return success for the GUP pin to be dropped.
|
||||
*/
|
||||
return ret && ret != -EHWPOISON ? ret : 0;
|
||||
}
|
||||
|
||||
int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu)
|
||||
{
|
||||
struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
|
||||
u64 phys = hyp_pfn_to_phys(pfn);
|
||||
u64 ipa = hyp_pfn_to_phys(gfn);
|
||||
u64 meta;
|
||||
int ret;
|
||||
|
||||
host_lock_component();
|
||||
guest_lock_component(vm);
|
||||
|
||||
ret = __host_check_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_OWNED);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
ret = __guest_check_page_state_range(vm, ipa, PAGE_SIZE, PKVM_NOPAGE);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
meta = host_stage2_encode_gfn_meta(vm, gfn);
|
||||
WARN_ON(host_stage2_set_owner_metadata_locked(phys, PAGE_SIZE,
|
||||
PKVM_ID_GUEST, meta));
|
||||
WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, PAGE_SIZE, phys,
|
||||
pkvm_mkstate(KVM_PGTABLE_PROT_RWX, PKVM_PAGE_OWNED),
|
||||
&vcpu->vcpu.arch.pkvm_memcache, 0));
|
||||
|
||||
unlock:
|
||||
guest_unlock_component(vm);
|
||||
host_unlock_component();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu *vcpu,
|
||||
enum kvm_pgtable_prot prot)
|
||||
{
|
||||
@@ -1206,53 +1648,18 @@ struct pkvm_expected_state {
|
||||
|
||||
static struct pkvm_expected_state selftest_state;
|
||||
static struct hyp_page *selftest_page;
|
||||
|
||||
static struct pkvm_hyp_vm selftest_vm = {
|
||||
.kvm = {
|
||||
.arch = {
|
||||
.mmu = {
|
||||
.arch = &selftest_vm.kvm.arch,
|
||||
.pgt = &selftest_vm.pgt,
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
static struct pkvm_hyp_vcpu selftest_vcpu = {
|
||||
.vcpu = {
|
||||
.arch = {
|
||||
.hw_mmu = &selftest_vm.kvm.arch.mmu,
|
||||
},
|
||||
.kvm = &selftest_vm.kvm,
|
||||
},
|
||||
};
|
||||
|
||||
static void init_selftest_vm(void *virt)
|
||||
{
|
||||
struct hyp_page *p = hyp_virt_to_page(virt);
|
||||
int i;
|
||||
|
||||
selftest_vm.kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr;
|
||||
WARN_ON(kvm_guest_prepare_stage2(&selftest_vm, virt));
|
||||
|
||||
for (i = 0; i < pkvm_selftest_pages(); i++) {
|
||||
if (p[i].refcount)
|
||||
continue;
|
||||
p[i].refcount = 1;
|
||||
hyp_put_page(&selftest_vm.pool, hyp_page_to_virt(&p[i]));
|
||||
}
|
||||
}
|
||||
static struct pkvm_hyp_vcpu *selftest_vcpu;
|
||||
|
||||
static u64 selftest_ipa(void)
|
||||
{
|
||||
return BIT(selftest_vm.pgt.ia_bits - 1);
|
||||
return BIT(selftest_vcpu->vcpu.arch.hw_mmu->pgt->ia_bits - 1);
|
||||
}
|
||||
|
||||
static void assert_page_state(void)
|
||||
{
|
||||
void *virt = hyp_page_to_virt(selftest_page);
|
||||
u64 size = PAGE_SIZE << selftest_page->order;
|
||||
struct pkvm_hyp_vcpu *vcpu = &selftest_vcpu;
|
||||
struct pkvm_hyp_vcpu *vcpu = selftest_vcpu;
|
||||
u64 phys = hyp_virt_to_phys(virt);
|
||||
u64 ipa[2] = { selftest_ipa(), selftest_ipa() + PAGE_SIZE };
|
||||
struct pkvm_hyp_vm *vm;
|
||||
@@ -1267,10 +1674,10 @@ static void assert_page_state(void)
|
||||
WARN_ON(__hyp_check_page_state_range(phys, size, selftest_state.hyp));
|
||||
hyp_unlock_component();
|
||||
|
||||
guest_lock_component(&selftest_vm);
|
||||
guest_lock_component(vm);
|
||||
WARN_ON(__guest_check_page_state_range(vm, ipa[0], size, selftest_state.guest[0]));
|
||||
WARN_ON(__guest_check_page_state_range(vm, ipa[1], size, selftest_state.guest[1]));
|
||||
guest_unlock_component(&selftest_vm);
|
||||
guest_unlock_component(vm);
|
||||
}
|
||||
|
||||
#define assert_transition_res(res, fn, ...) \
|
||||
@@ -1283,14 +1690,15 @@ void pkvm_ownership_selftest(void *base)
|
||||
{
|
||||
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_RWX;
|
||||
void *virt = hyp_alloc_pages(&host_s2_pool, 0);
|
||||
struct pkvm_hyp_vcpu *vcpu = &selftest_vcpu;
|
||||
struct pkvm_hyp_vm *vm = &selftest_vm;
|
||||
struct pkvm_hyp_vcpu *vcpu;
|
||||
u64 phys, size, pfn, gfn;
|
||||
struct pkvm_hyp_vm *vm;
|
||||
|
||||
WARN_ON(!virt);
|
||||
selftest_page = hyp_virt_to_page(virt);
|
||||
selftest_page->refcount = 0;
|
||||
init_selftest_vm(base);
|
||||
selftest_vcpu = vcpu = init_selftest_vm(base);
|
||||
vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
|
||||
|
||||
size = PAGE_SIZE << selftest_page->order;
|
||||
phys = hyp_virt_to_phys(virt);
|
||||
@@ -1309,6 +1717,7 @@ void pkvm_ownership_selftest(void *base)
|
||||
assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size);
|
||||
assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
|
||||
assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm);
|
||||
assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu);
|
||||
|
||||
selftest_state.host = PKVM_PAGE_OWNED;
|
||||
selftest_state.hyp = PKVM_NOPAGE;
|
||||
@@ -1328,6 +1737,7 @@ void pkvm_ownership_selftest(void *base)
|
||||
assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
|
||||
assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
|
||||
assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm);
|
||||
assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu);
|
||||
|
||||
assert_transition_res(0, hyp_pin_shared_mem, virt, virt + size);
|
||||
assert_transition_res(0, hyp_pin_shared_mem, virt, virt + size);
|
||||
@@ -1340,6 +1750,7 @@ void pkvm_ownership_selftest(void *base)
|
||||
assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
|
||||
assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
|
||||
assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm);
|
||||
assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu);
|
||||
|
||||
hyp_unpin_shared_mem(virt, virt + size);
|
||||
assert_page_state();
|
||||
@@ -1359,6 +1770,7 @@ void pkvm_ownership_selftest(void *base)
|
||||
assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
|
||||
assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
|
||||
assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm);
|
||||
assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu);
|
||||
assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size);
|
||||
|
||||
selftest_state.host = PKVM_PAGE_OWNED;
|
||||
@@ -1375,6 +1787,7 @@ void pkvm_ownership_selftest(void *base)
|
||||
assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
|
||||
assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);
|
||||
assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
|
||||
assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu);
|
||||
assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size);
|
||||
|
||||
selftest_state.guest[1] = PKVM_PAGE_SHARED_BORROWED;
|
||||
@@ -1388,10 +1801,70 @@ void pkvm_ownership_selftest(void *base)
|
||||
selftest_state.host = PKVM_PAGE_OWNED;
|
||||
assert_transition_res(0, __pkvm_host_unshare_guest, gfn + 1, 1, vm);
|
||||
|
||||
selftest_state.host = PKVM_NOPAGE;
|
||||
selftest_state.guest[0] = PKVM_PAGE_OWNED;
|
||||
assert_transition_res(0, __pkvm_host_donate_guest, pfn, gfn, vcpu);
|
||||
assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu);
|
||||
assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn + 1, vcpu);
|
||||
assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
|
||||
assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn + 1, 1, vcpu, prot);
|
||||
assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
|
||||
assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
|
||||
assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
|
||||
assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);
|
||||
assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
|
||||
|
||||
selftest_state.host = PKVM_PAGE_SHARED_BORROWED;
|
||||
selftest_state.guest[0] = PKVM_PAGE_SHARED_OWNED;
|
||||
assert_transition_res(0, __pkvm_guest_share_host, vcpu, gfn);
|
||||
assert_transition_res(-EPERM, __pkvm_guest_share_host, vcpu, gfn);
|
||||
assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu);
|
||||
assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn + 1, vcpu);
|
||||
assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
|
||||
assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn + 1, 1, vcpu, prot);
|
||||
assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
|
||||
assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
|
||||
assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
|
||||
assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);
|
||||
assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
|
||||
|
||||
selftest_state.host = PKVM_NOPAGE;
|
||||
selftest_state.guest[0] = PKVM_PAGE_OWNED;
|
||||
assert_transition_res(0, __pkvm_guest_unshare_host, vcpu, gfn);
|
||||
assert_transition_res(-EPERM, __pkvm_guest_unshare_host, vcpu, gfn);
|
||||
assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu);
|
||||
assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn + 1, vcpu);
|
||||
assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
|
||||
assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn + 1, 1, vcpu, prot);
|
||||
assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
|
||||
assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
|
||||
assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
|
||||
assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);
|
||||
assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
|
||||
|
||||
selftest_state.host = PKVM_PAGE_OWNED;
|
||||
selftest_state.guest[0] = PKVM_POISON;
|
||||
assert_transition_res(0, __pkvm_host_force_reclaim_page_guest, phys);
|
||||
assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu);
|
||||
assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
|
||||
assert_transition_res(-EHWPOISON, __pkvm_guest_share_host, vcpu, gfn);
|
||||
assert_transition_res(-EHWPOISON, __pkvm_guest_unshare_host, vcpu, gfn);
|
||||
|
||||
selftest_state.host = PKVM_NOPAGE;
|
||||
selftest_state.guest[1] = PKVM_PAGE_OWNED;
|
||||
assert_transition_res(0, __pkvm_host_donate_guest, pfn, gfn + 1, vcpu);
|
||||
|
||||
selftest_state.host = PKVM_PAGE_OWNED;
|
||||
selftest_state.guest[1] = PKVM_NOPAGE;
|
||||
assert_transition_res(0, __pkvm_host_reclaim_page_guest, gfn + 1, vm);
|
||||
assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu);
|
||||
assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
|
||||
|
||||
selftest_state.host = PKVM_NOPAGE;
|
||||
selftest_state.hyp = PKVM_PAGE_OWNED;
|
||||
assert_transition_res(0, __pkvm_host_donate_hyp, pfn, 1);
|
||||
|
||||
teardown_selftest_vm();
|
||||
selftest_page->refcount = 1;
|
||||
hyp_put_page(&host_s2_pool, virt);
|
||||
}
|
||||
|
||||
@@ -244,7 +244,7 @@ static void *fixmap_map_slot(struct hyp_fixmap_slot *slot, phys_addr_t phys)
|
||||
|
||||
void *hyp_fixmap_map(phys_addr_t phys)
|
||||
{
|
||||
return fixmap_map_slot(this_cpu_ptr(&fixmap_slots), phys);
|
||||
return fixmap_map_slot(this_cpu_ptr(&fixmap_slots), phys) + offset_in_page(phys);
|
||||
}
|
||||
|
||||
static void fixmap_clear_slot(struct hyp_fixmap_slot *slot)
|
||||
@@ -366,7 +366,7 @@ void *hyp_fixblock_map(phys_addr_t phys, size_t *size)
|
||||
#ifdef HAS_FIXBLOCK
|
||||
*size = PMD_SIZE;
|
||||
hyp_spin_lock(&hyp_fixblock_lock);
|
||||
return fixmap_map_slot(&hyp_fixblock_slot, phys);
|
||||
return fixmap_map_slot(&hyp_fixblock_slot, phys) + offset_in_page(phys);
|
||||
#else
|
||||
*size = PAGE_SIZE;
|
||||
return hyp_fixmap_map(phys);
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
* Author: Fuad Tabba <tabba@google.com>
|
||||
*/
|
||||
|
||||
#include <kvm/arm_hypercalls.h>
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/mm.h>
|
||||
|
||||
@@ -222,6 +224,7 @@ static struct pkvm_hyp_vm **vm_table;
|
||||
|
||||
void pkvm_hyp_vm_table_init(void *tbl)
|
||||
{
|
||||
BUILD_BUG_ON((u64)HANDLE_OFFSET + KVM_MAX_PVMS > (pkvm_handle_t)-1);
|
||||
WARN_ON(vm_table);
|
||||
vm_table = tbl;
|
||||
}
|
||||
@@ -229,10 +232,12 @@ void pkvm_hyp_vm_table_init(void *tbl)
|
||||
/*
|
||||
* Return the hyp vm structure corresponding to the handle.
|
||||
*/
|
||||
static struct pkvm_hyp_vm *get_vm_by_handle(pkvm_handle_t handle)
|
||||
struct pkvm_hyp_vm *get_vm_by_handle(pkvm_handle_t handle)
|
||||
{
|
||||
unsigned int idx = vm_handle_to_idx(handle);
|
||||
|
||||
hyp_assert_lock_held(&vm_table_lock);
|
||||
|
||||
if (unlikely(idx >= KVM_MAX_PVMS))
|
||||
return NULL;
|
||||
|
||||
@@ -255,7 +260,10 @@ struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle,
|
||||
|
||||
hyp_spin_lock(&vm_table_lock);
|
||||
hyp_vm = get_vm_by_handle(handle);
|
||||
if (!hyp_vm || hyp_vm->kvm.created_vcpus <= vcpu_idx)
|
||||
if (!hyp_vm || hyp_vm->kvm.arch.pkvm.is_dying)
|
||||
goto unlock;
|
||||
|
||||
if (hyp_vm->kvm.created_vcpus <= vcpu_idx)
|
||||
goto unlock;
|
||||
|
||||
hyp_vcpu = hyp_vm->vcpus[vcpu_idx];
|
||||
@@ -719,6 +727,55 @@ void __pkvm_unreserve_vm(pkvm_handle_t handle)
|
||||
hyp_spin_unlock(&vm_table_lock);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVHE_EL2_DEBUG
|
||||
static struct pkvm_hyp_vm selftest_vm = {
|
||||
.kvm = {
|
||||
.arch = {
|
||||
.mmu = {
|
||||
.arch = &selftest_vm.kvm.arch,
|
||||
.pgt = &selftest_vm.pgt,
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
static struct pkvm_hyp_vcpu selftest_vcpu = {
|
||||
.vcpu = {
|
||||
.arch = {
|
||||
.hw_mmu = &selftest_vm.kvm.arch.mmu,
|
||||
},
|
||||
.kvm = &selftest_vm.kvm,
|
||||
},
|
||||
};
|
||||
|
||||
struct pkvm_hyp_vcpu *init_selftest_vm(void *virt)
|
||||
{
|
||||
struct hyp_page *p = hyp_virt_to_page(virt);
|
||||
int i;
|
||||
|
||||
selftest_vm.kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr;
|
||||
WARN_ON(kvm_guest_prepare_stage2(&selftest_vm, virt));
|
||||
|
||||
for (i = 0; i < pkvm_selftest_pages(); i++) {
|
||||
if (p[i].refcount)
|
||||
continue;
|
||||
p[i].refcount = 1;
|
||||
hyp_put_page(&selftest_vm.pool, hyp_page_to_virt(&p[i]));
|
||||
}
|
||||
|
||||
selftest_vm.kvm.arch.pkvm.handle = __pkvm_reserve_vm();
|
||||
insert_vm_table_entry(selftest_vm.kvm.arch.pkvm.handle, &selftest_vm);
|
||||
return &selftest_vcpu;
|
||||
}
|
||||
|
||||
void teardown_selftest_vm(void)
|
||||
{
|
||||
hyp_spin_lock(&vm_table_lock);
|
||||
remove_vm_table_entry(selftest_vm.kvm.arch.pkvm.handle);
|
||||
hyp_spin_unlock(&vm_table_lock);
|
||||
}
|
||||
#endif /* CONFIG_NVHE_EL2_DEBUG */
|
||||
|
||||
/*
|
||||
* Initialize the hypervisor copy of the VM state using host-donated memory.
|
||||
*
|
||||
@@ -859,7 +916,54 @@ teardown_donated_memory(struct kvm_hyp_memcache *mc, void *addr, size_t size)
|
||||
unmap_donated_memory_noclear(addr, size);
|
||||
}
|
||||
|
||||
int __pkvm_teardown_vm(pkvm_handle_t handle)
|
||||
int __pkvm_reclaim_dying_guest_page(pkvm_handle_t handle, u64 gfn)
|
||||
{
|
||||
struct pkvm_hyp_vm *hyp_vm = get_pkvm_hyp_vm(handle);
|
||||
int ret = -EINVAL;
|
||||
|
||||
if (!hyp_vm)
|
||||
return ret;
|
||||
|
||||
if (hyp_vm->kvm.arch.pkvm.is_dying)
|
||||
ret = __pkvm_host_reclaim_page_guest(gfn, hyp_vm);
|
||||
|
||||
put_pkvm_hyp_vm(hyp_vm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct pkvm_hyp_vm *get_pkvm_unref_hyp_vm_locked(pkvm_handle_t handle)
|
||||
{
|
||||
struct pkvm_hyp_vm *hyp_vm;
|
||||
|
||||
hyp_assert_lock_held(&vm_table_lock);
|
||||
|
||||
hyp_vm = get_vm_by_handle(handle);
|
||||
if (!hyp_vm || hyp_page_count(hyp_vm))
|
||||
return NULL;
|
||||
|
||||
return hyp_vm;
|
||||
}
|
||||
|
||||
int __pkvm_start_teardown_vm(pkvm_handle_t handle)
|
||||
{
|
||||
struct pkvm_hyp_vm *hyp_vm;
|
||||
int ret = 0;
|
||||
|
||||
hyp_spin_lock(&vm_table_lock);
|
||||
hyp_vm = get_pkvm_unref_hyp_vm_locked(handle);
|
||||
if (!hyp_vm || hyp_vm->kvm.arch.pkvm.is_dying) {
|
||||
ret = -EINVAL;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
hyp_vm->kvm.arch.pkvm.is_dying = true;
|
||||
unlock:
|
||||
hyp_spin_unlock(&vm_table_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __pkvm_finalize_teardown_vm(pkvm_handle_t handle)
|
||||
{
|
||||
struct kvm_hyp_memcache *mc, *stage2_mc;
|
||||
struct pkvm_hyp_vm *hyp_vm;
|
||||
@@ -869,14 +973,9 @@ int __pkvm_teardown_vm(pkvm_handle_t handle)
|
||||
int err;
|
||||
|
||||
hyp_spin_lock(&vm_table_lock);
|
||||
hyp_vm = get_vm_by_handle(handle);
|
||||
if (!hyp_vm) {
|
||||
err = -ENOENT;
|
||||
goto err_unlock;
|
||||
}
|
||||
|
||||
if (WARN_ON(hyp_page_count(hyp_vm))) {
|
||||
err = -EBUSY;
|
||||
hyp_vm = get_pkvm_unref_hyp_vm_locked(handle);
|
||||
if (!hyp_vm || !hyp_vm->kvm.arch.pkvm.is_dying) {
|
||||
err = -EINVAL;
|
||||
goto err_unlock;
|
||||
}
|
||||
|
||||
@@ -922,3 +1021,121 @@ err_unlock:
|
||||
hyp_spin_unlock(&vm_table_lock);
|
||||
return err;
|
||||
}
|
||||
|
||||
static u64 __pkvm_memshare_page_req(struct kvm_vcpu *vcpu, u64 ipa)
|
||||
{
|
||||
u64 elr;
|
||||
|
||||
/* Fake up a data abort (level 3 translation fault on write) */
|
||||
vcpu->arch.fault.esr_el2 = (ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT) |
|
||||
ESR_ELx_WNR | ESR_ELx_FSC_FAULT |
|
||||
FIELD_PREP(ESR_ELx_FSC_LEVEL, 3);
|
||||
|
||||
/* Shuffle the IPA around into the HPFAR */
|
||||
vcpu->arch.fault.hpfar_el2 = (HPFAR_EL2_NS | (ipa >> 8)) & HPFAR_MASK;
|
||||
|
||||
/* This is a virtual address. 0's good. Let's go with 0. */
|
||||
vcpu->arch.fault.far_el2 = 0;
|
||||
|
||||
/* Rewind the ELR so we return to the HVC once the IPA is mapped */
|
||||
elr = read_sysreg(elr_el2);
|
||||
elr -= 4;
|
||||
write_sysreg(elr, elr_el2);
|
||||
|
||||
return ARM_EXCEPTION_TRAP;
|
||||
}
|
||||
|
||||
static bool pkvm_memshare_call(u64 *ret, struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
struct pkvm_hyp_vcpu *hyp_vcpu;
|
||||
u64 ipa = smccc_get_arg1(vcpu);
|
||||
|
||||
if (!PAGE_ALIGNED(ipa))
|
||||
goto out_guest;
|
||||
|
||||
hyp_vcpu = container_of(vcpu, struct pkvm_hyp_vcpu, vcpu);
|
||||
switch (__pkvm_guest_share_host(hyp_vcpu, hyp_phys_to_pfn(ipa))) {
|
||||
case 0:
|
||||
ret[0] = SMCCC_RET_SUCCESS;
|
||||
goto out_guest;
|
||||
case -ENOENT:
|
||||
/*
|
||||
* Convert the exception into a data abort so that the page
|
||||
* being shared is mapped into the guest next time.
|
||||
*/
|
||||
*exit_code = __pkvm_memshare_page_req(vcpu, ipa);
|
||||
goto out_host;
|
||||
}
|
||||
|
||||
out_guest:
|
||||
return true;
|
||||
out_host:
|
||||
return false;
|
||||
}
|
||||
|
||||
static void pkvm_memunshare_call(u64 *ret, struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct pkvm_hyp_vcpu *hyp_vcpu;
|
||||
u64 ipa = smccc_get_arg1(vcpu);
|
||||
|
||||
if (!PAGE_ALIGNED(ipa))
|
||||
return;
|
||||
|
||||
hyp_vcpu = container_of(vcpu, struct pkvm_hyp_vcpu, vcpu);
|
||||
if (!__pkvm_guest_unshare_host(hyp_vcpu, hyp_phys_to_pfn(ipa)))
|
||||
ret[0] = SMCCC_RET_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handler for protected VM HVC calls.
|
||||
*
|
||||
* Returns true if the hypervisor has handled the exit (and control
|
||||
* should return to the guest) or false if it hasn't (and the handling
|
||||
* should be performed by the host).
|
||||
*/
|
||||
bool kvm_handle_pvm_hvc64(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
{
|
||||
u64 val[4] = { SMCCC_RET_INVALID_PARAMETER };
|
||||
bool handled = true;
|
||||
|
||||
switch (smccc_get_function(vcpu)) {
|
||||
case ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID:
|
||||
val[0] = BIT(ARM_SMCCC_KVM_FUNC_FEATURES);
|
||||
val[0] |= BIT(ARM_SMCCC_KVM_FUNC_HYP_MEMINFO);
|
||||
val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MEM_SHARE);
|
||||
val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MEM_UNSHARE);
|
||||
break;
|
||||
case ARM_SMCCC_VENDOR_HYP_KVM_HYP_MEMINFO_FUNC_ID:
|
||||
if (smccc_get_arg1(vcpu) ||
|
||||
smccc_get_arg2(vcpu) ||
|
||||
smccc_get_arg3(vcpu)) {
|
||||
break;
|
||||
}
|
||||
|
||||
val[0] = PAGE_SIZE;
|
||||
break;
|
||||
case ARM_SMCCC_VENDOR_HYP_KVM_MEM_SHARE_FUNC_ID:
|
||||
if (smccc_get_arg2(vcpu) ||
|
||||
smccc_get_arg3(vcpu)) {
|
||||
break;
|
||||
}
|
||||
|
||||
handled = pkvm_memshare_call(val, vcpu, exit_code);
|
||||
break;
|
||||
case ARM_SMCCC_VENDOR_HYP_KVM_MEM_UNSHARE_FUNC_ID:
|
||||
if (smccc_get_arg2(vcpu) ||
|
||||
smccc_get_arg3(vcpu)) {
|
||||
break;
|
||||
}
|
||||
|
||||
pkvm_memunshare_call(val, vcpu);
|
||||
break;
|
||||
default:
|
||||
/* Punt everything else back to the host, for now. */
|
||||
handled = false;
|
||||
}
|
||||
|
||||
if (handled)
|
||||
smccc_set_retval(vcpu, val[0], val[1], val[2], val[3]);
|
||||
return handled;
|
||||
}
|
||||
|
||||
@@ -6,11 +6,12 @@
|
||||
|
||||
#include <asm/kvm_asm.h>
|
||||
#include <asm/kvm_hyp.h>
|
||||
#include <asm/kvm_hypevents.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
#include <linux/arm-smccc.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <uapi/linux/psci.h>
|
||||
|
||||
#include <nvhe/arm-smccc.h>
|
||||
#include <nvhe/memory.h>
|
||||
#include <nvhe/trap_handler.h>
|
||||
|
||||
@@ -65,7 +66,7 @@ static unsigned long psci_call(unsigned long fn, unsigned long arg0,
|
||||
{
|
||||
struct arm_smccc_res res;
|
||||
|
||||
arm_smccc_1_1_smc(fn, arg0, arg1, arg2, &res);
|
||||
hyp_smccc_1_1_smc(fn, arg0, arg1, arg2, &res);
|
||||
return res.a0;
|
||||
}
|
||||
|
||||
@@ -200,30 +201,42 @@ static int psci_system_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt)
|
||||
__hyp_pa(init_params), 0);
|
||||
}
|
||||
|
||||
asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on)
|
||||
static void __noreturn __kvm_host_psci_cpu_entry(unsigned long pc, unsigned long r0)
|
||||
{
|
||||
struct psci_boot_args *boot_args;
|
||||
struct kvm_cpu_context *host_ctxt;
|
||||
struct kvm_cpu_context *host_ctxt = host_data_ptr(host_ctxt);
|
||||
|
||||
host_ctxt = host_data_ptr(host_ctxt);
|
||||
trace_hyp_enter(host_ctxt, HYP_REASON_PSCI);
|
||||
|
||||
if (is_cpu_on)
|
||||
boot_args = this_cpu_ptr(&cpu_on_args);
|
||||
else
|
||||
boot_args = this_cpu_ptr(&suspend_args);
|
||||
|
||||
cpu_reg(host_ctxt, 0) = boot_args->r0;
|
||||
write_sysreg_el2(boot_args->pc, SYS_ELR);
|
||||
|
||||
if (is_cpu_on)
|
||||
release_boot_args(boot_args);
|
||||
cpu_reg(host_ctxt, 0) = r0;
|
||||
write_sysreg_el2(pc, SYS_ELR);
|
||||
|
||||
write_sysreg_el1(INIT_SCTLR_EL1_MMU_OFF, SYS_SCTLR);
|
||||
write_sysreg(INIT_PSTATE_EL1, SPSR_EL2);
|
||||
|
||||
trace_hyp_exit(host_ctxt, HYP_REASON_PSCI);
|
||||
__host_enter(host_ctxt);
|
||||
}
|
||||
|
||||
asmlinkage void __noreturn __kvm_host_psci_cpu_on_entry(void)
|
||||
{
|
||||
struct psci_boot_args *boot_args = this_cpu_ptr(&cpu_on_args);
|
||||
unsigned long pc, r0;
|
||||
|
||||
pc = READ_ONCE(boot_args->pc);
|
||||
r0 = READ_ONCE(boot_args->r0);
|
||||
|
||||
release_boot_args(boot_args);
|
||||
|
||||
__kvm_host_psci_cpu_entry(pc, r0);
|
||||
}
|
||||
|
||||
asmlinkage void __noreturn __kvm_host_psci_cpu_resume_entry(void)
|
||||
{
|
||||
struct psci_boot_args *boot_args = this_cpu_ptr(&suspend_args);
|
||||
|
||||
__kvm_host_psci_cpu_entry(boot_args->pc, boot_args->r0);
|
||||
}
|
||||
|
||||
static unsigned long psci_0_1_handler(u64 func_id, struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
if (is_psci_0_1(cpu_off, func_id) || is_psci_0_1(migrate, func_id))
|
||||
|
||||
@@ -341,8 +341,7 @@ out:
|
||||
__host_enter(host_ctxt);
|
||||
}
|
||||
|
||||
int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus,
|
||||
unsigned long *per_cpu_base, u32 hyp_va_bits)
|
||||
int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long *per_cpu_base, u32 hyp_va_bits)
|
||||
{
|
||||
struct kvm_nvhe_init_params *params;
|
||||
void *virt = hyp_phys_to_virt(phys);
|
||||
@@ -355,7 +354,6 @@ int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus,
|
||||
return -EINVAL;
|
||||
|
||||
hyp_spin_lock_init(&pkvm_pgd_lock);
|
||||
hyp_nr_cpus = nr_cpus;
|
||||
|
||||
ret = divide_memory_pool(virt, size);
|
||||
if (ret)
|
||||
|
||||
@@ -34,7 +34,7 @@ static void hyp_prepare_backtrace(unsigned long fp, unsigned long pc)
|
||||
stacktrace_info->pc = pc;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROTECTED_NVHE_STACKTRACE
|
||||
#ifdef CONFIG_PKVM_STACKTRACE
|
||||
#include <asm/stacktrace/nvhe.h>
|
||||
|
||||
DEFINE_PER_CPU(unsigned long [NVHE_STACKTRACE_SIZE/sizeof(long)], pkvm_stacktrace);
|
||||
@@ -134,11 +134,11 @@ static void pkvm_save_backtrace(unsigned long fp, unsigned long pc)
|
||||
|
||||
unwind(&state, pkvm_save_backtrace_entry, &idx);
|
||||
}
|
||||
#else /* !CONFIG_PROTECTED_NVHE_STACKTRACE */
|
||||
#else /* !CONFIG_PKVM_STACKTRACE */
|
||||
static void pkvm_save_backtrace(unsigned long fp, unsigned long pc)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_PROTECTED_NVHE_STACKTRACE */
|
||||
#endif /* CONFIG_PKVM_STACKTRACE */
|
||||
|
||||
/*
|
||||
* kvm_nvhe_prepare_backtrace - prepare to dump the nVHE backtrace
|
||||
|
||||
@@ -7,7 +7,6 @@
|
||||
#include <hyp/switch.h>
|
||||
#include <hyp/sysreg-sr.h>
|
||||
|
||||
#include <linux/arm-smccc.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/jump_label.h>
|
||||
@@ -21,6 +20,7 @@
|
||||
#include <asm/kvm_asm.h>
|
||||
#include <asm/kvm_emulate.h>
|
||||
#include <asm/kvm_hyp.h>
|
||||
#include <asm/kvm_hypevents.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
#include <asm/fpsimd.h>
|
||||
#include <asm/debug-monitors.h>
|
||||
@@ -44,6 +44,9 @@ struct fgt_masks hfgwtr2_masks;
|
||||
struct fgt_masks hfgitr2_masks;
|
||||
struct fgt_masks hdfgrtr2_masks;
|
||||
struct fgt_masks hdfgwtr2_masks;
|
||||
struct fgt_masks ich_hfgrtr_masks;
|
||||
struct fgt_masks ich_hfgwtr_masks;
|
||||
struct fgt_masks ich_hfgitr_masks;
|
||||
|
||||
extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
|
||||
|
||||
@@ -110,6 +113,12 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
|
||||
/* Save VGICv3 state on non-VHE systems */
|
||||
static void __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (vgic_is_v5(kern_hyp_va(vcpu->kvm))) {
|
||||
__vgic_v5_save_state(&vcpu->arch.vgic_cpu.vgic_v5);
|
||||
__vgic_v5_save_ppi_state(&vcpu->arch.vgic_cpu.vgic_v5);
|
||||
return;
|
||||
}
|
||||
|
||||
if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
|
||||
__vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3);
|
||||
__vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
|
||||
@@ -119,6 +128,12 @@ static void __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
|
||||
/* Restore VGICv3 state on non-VHE systems */
|
||||
static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (vgic_is_v5(kern_hyp_va(vcpu->kvm))) {
|
||||
__vgic_v5_restore_state(&vcpu->arch.vgic_cpu.vgic_v5);
|
||||
__vgic_v5_restore_ppi_state(&vcpu->arch.vgic_cpu.vgic_v5);
|
||||
return;
|
||||
}
|
||||
|
||||
if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
|
||||
__vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
|
||||
__vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3);
|
||||
@@ -190,6 +205,7 @@ static const exit_handler_fn hyp_exit_handlers[] = {
|
||||
|
||||
static const exit_handler_fn pvm_exit_handlers[] = {
|
||||
[0 ... ESR_ELx_EC_MAX] = NULL,
|
||||
[ESR_ELx_EC_HVC64] = kvm_handle_pvm_hvc64,
|
||||
[ESR_ELx_EC_SYS64] = kvm_handle_pvm_sys64,
|
||||
[ESR_ELx_EC_SVE] = kvm_handle_pvm_restricted,
|
||||
[ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
|
||||
@@ -278,7 +294,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
|
||||
* We're about to restore some new MMU state. Make sure
|
||||
* ongoing page-table walks that have started before we
|
||||
* trapped to EL2 have completed. This also synchronises the
|
||||
* above disabling of BRBE, SPE and TRBE.
|
||||
* above disabling of BRBE.
|
||||
*
|
||||
* See DDI0487I.a D8.1.5 "Out-of-context translation regimes",
|
||||
* rule R_LFHQG and subsequent information statements.
|
||||
@@ -308,10 +324,13 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
|
||||
__debug_switch_to_guest(vcpu);
|
||||
|
||||
do {
|
||||
trace_hyp_exit(host_ctxt, HYP_REASON_ERET_GUEST);
|
||||
|
||||
/* Jump in the fire! */
|
||||
exit_code = __guest_enter(vcpu);
|
||||
|
||||
/* And we're baaack! */
|
||||
trace_hyp_enter(host_ctxt, HYP_REASON_GUEST_EXIT);
|
||||
} while (fixup_guest_exit(vcpu, &exit_code));
|
||||
|
||||
__sysreg_save_state_nvhe(guest_ctxt);
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
*/
|
||||
u64 id_aa64pfr0_el1_sys_val;
|
||||
u64 id_aa64pfr1_el1_sys_val;
|
||||
u64 id_aa64pfr2_el1_sys_val;
|
||||
u64 id_aa64isar0_el1_sys_val;
|
||||
u64 id_aa64isar1_el1_sys_val;
|
||||
u64 id_aa64isar2_el1_sys_val;
|
||||
@@ -108,6 +109,11 @@ static const struct pvm_ftr_bits pvmid_aa64pfr1[] = {
|
||||
FEAT_END
|
||||
};
|
||||
|
||||
static const struct pvm_ftr_bits pvmid_aa64pfr2[] = {
|
||||
MAX_FEAT(ID_AA64PFR2_EL1, GCIE, NI),
|
||||
FEAT_END
|
||||
};
|
||||
|
||||
static const struct pvm_ftr_bits pvmid_aa64mmfr0[] = {
|
||||
MAX_FEAT_ENUM(ID_AA64MMFR0_EL1, PARANGE, 40),
|
||||
MAX_FEAT_ENUM(ID_AA64MMFR0_EL1, ASIDBITS, 16),
|
||||
@@ -221,6 +227,8 @@ static u64 pvm_calc_id_reg(const struct kvm_vcpu *vcpu, u32 id)
|
||||
return get_restricted_features(vcpu, id_aa64pfr0_el1_sys_val, pvmid_aa64pfr0);
|
||||
case SYS_ID_AA64PFR1_EL1:
|
||||
return get_restricted_features(vcpu, id_aa64pfr1_el1_sys_val, pvmid_aa64pfr1);
|
||||
case SYS_ID_AA64PFR2_EL1:
|
||||
return get_restricted_features(vcpu, id_aa64pfr2_el1_sys_val, pvmid_aa64pfr2);
|
||||
case SYS_ID_AA64ISAR0_EL1:
|
||||
return id_aa64isar0_el1_sys_val;
|
||||
case SYS_ID_AA64ISAR1_EL1:
|
||||
@@ -392,6 +400,14 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = {
|
||||
/* Cache maintenance by set/way operations are restricted. */
|
||||
|
||||
/* Debug and Trace Registers are restricted. */
|
||||
RAZ_WI(SYS_DBGBVRn_EL1(0)),
|
||||
RAZ_WI(SYS_DBGBCRn_EL1(0)),
|
||||
RAZ_WI(SYS_DBGWVRn_EL1(0)),
|
||||
RAZ_WI(SYS_DBGWCRn_EL1(0)),
|
||||
RAZ_WI(SYS_MDSCR_EL1),
|
||||
RAZ_WI(SYS_OSLAR_EL1),
|
||||
RAZ_WI(SYS_OSLSR_EL1),
|
||||
RAZ_WI(SYS_OSDLR_EL1),
|
||||
|
||||
/* Group 1 ID registers */
|
||||
HOST_HANDLED(SYS_REVIDR_EL1),
|
||||
@@ -431,7 +447,7 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = {
|
||||
/* CRm=4 */
|
||||
AARCH64(SYS_ID_AA64PFR0_EL1),
|
||||
AARCH64(SYS_ID_AA64PFR1_EL1),
|
||||
ID_UNALLOCATED(4,2),
|
||||
AARCH64(SYS_ID_AA64PFR2_EL1),
|
||||
ID_UNALLOCATED(4,3),
|
||||
AARCH64(SYS_ID_AA64ZFR0_EL1),
|
||||
ID_UNALLOCATED(4,5),
|
||||
|
||||
306
arch/arm64/kvm/hyp/nvhe/trace.c
Normal file
306
arch/arm64/kvm/hyp/nvhe/trace.c
Normal file
@@ -0,0 +1,306 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2025 Google LLC
|
||||
* Author: Vincent Donnefort <vdonnefort@google.com>
|
||||
*/
|
||||
|
||||
#include <nvhe/clock.h>
|
||||
#include <nvhe/mem_protect.h>
|
||||
#include <nvhe/mm.h>
|
||||
#include <nvhe/trace.h>
|
||||
|
||||
#include <asm/percpu.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
#include <asm/local.h>
|
||||
|
||||
#include "simple_ring_buffer.c"
|
||||
|
||||
static DEFINE_PER_CPU(struct simple_rb_per_cpu, __simple_rbs);
|
||||
|
||||
static struct hyp_trace_buffer {
|
||||
struct simple_rb_per_cpu __percpu *simple_rbs;
|
||||
void *bpages_backing_start;
|
||||
size_t bpages_backing_size;
|
||||
hyp_spinlock_t lock;
|
||||
} trace_buffer = {
|
||||
.simple_rbs = &__simple_rbs,
|
||||
.lock = __HYP_SPIN_LOCK_UNLOCKED,
|
||||
};
|
||||
|
||||
static bool hyp_trace_buffer_loaded(struct hyp_trace_buffer *trace_buffer)
|
||||
{
|
||||
return trace_buffer->bpages_backing_size > 0;
|
||||
}
|
||||
|
||||
void *tracing_reserve_entry(unsigned long length)
|
||||
{
|
||||
return simple_ring_buffer_reserve(this_cpu_ptr(trace_buffer.simple_rbs), length,
|
||||
trace_clock());
|
||||
}
|
||||
|
||||
void tracing_commit_entry(void)
|
||||
{
|
||||
simple_ring_buffer_commit(this_cpu_ptr(trace_buffer.simple_rbs));
|
||||
}
|
||||
|
||||
static int __admit_host_mem(void *start, u64 size)
|
||||
{
|
||||
if (!PAGE_ALIGNED(start) || !PAGE_ALIGNED(size) || !size)
|
||||
return -EINVAL;
|
||||
|
||||
if (!is_protected_kvm_enabled())
|
||||
return 0;
|
||||
|
||||
return __pkvm_host_donate_hyp(hyp_virt_to_pfn(start), size >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
static void __release_host_mem(void *start, u64 size)
|
||||
{
|
||||
if (!is_protected_kvm_enabled())
|
||||
return;
|
||||
|
||||
WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(start), size >> PAGE_SHIFT));
|
||||
}
|
||||
|
||||
static int hyp_trace_buffer_load_bpage_backing(struct hyp_trace_buffer *trace_buffer,
|
||||
struct hyp_trace_desc *desc)
|
||||
{
|
||||
void *start = (void *)kern_hyp_va(desc->bpages_backing_start);
|
||||
size_t size = desc->bpages_backing_size;
|
||||
int ret;
|
||||
|
||||
ret = __admit_host_mem(start, size);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
memset(start, 0, size);
|
||||
|
||||
trace_buffer->bpages_backing_start = start;
|
||||
trace_buffer->bpages_backing_size = size;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void hyp_trace_buffer_unload_bpage_backing(struct hyp_trace_buffer *trace_buffer)
|
||||
{
|
||||
void *start = trace_buffer->bpages_backing_start;
|
||||
size_t size = trace_buffer->bpages_backing_size;
|
||||
|
||||
if (!size)
|
||||
return;
|
||||
|
||||
memset(start, 0, size);
|
||||
|
||||
__release_host_mem(start, size);
|
||||
|
||||
trace_buffer->bpages_backing_start = 0;
|
||||
trace_buffer->bpages_backing_size = 0;
|
||||
}
|
||||
|
||||
static void *__pin_shared_page(unsigned long kern_va)
|
||||
{
|
||||
void *va = kern_hyp_va((void *)kern_va);
|
||||
|
||||
if (!is_protected_kvm_enabled())
|
||||
return va;
|
||||
|
||||
return hyp_pin_shared_mem(va, va + PAGE_SIZE) ? NULL : va;
|
||||
}
|
||||
|
||||
static void __unpin_shared_page(void *va)
|
||||
{
|
||||
if (!is_protected_kvm_enabled())
|
||||
return;
|
||||
|
||||
hyp_unpin_shared_mem(va, va + PAGE_SIZE);
|
||||
}
|
||||
|
||||
static void hyp_trace_buffer_unload(struct hyp_trace_buffer *trace_buffer)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
hyp_assert_lock_held(&trace_buffer->lock);
|
||||
|
||||
if (!hyp_trace_buffer_loaded(trace_buffer))
|
||||
return;
|
||||
|
||||
for (cpu = 0; cpu < hyp_nr_cpus; cpu++)
|
||||
simple_ring_buffer_unload_mm(per_cpu_ptr(trace_buffer->simple_rbs, cpu),
|
||||
__unpin_shared_page);
|
||||
|
||||
hyp_trace_buffer_unload_bpage_backing(trace_buffer);
|
||||
}
|
||||
|
||||
static int hyp_trace_buffer_load(struct hyp_trace_buffer *trace_buffer,
|
||||
struct hyp_trace_desc *desc)
|
||||
{
|
||||
struct simple_buffer_page *bpages;
|
||||
struct ring_buffer_desc *rb_desc;
|
||||
int ret, cpu;
|
||||
|
||||
hyp_assert_lock_held(&trace_buffer->lock);
|
||||
|
||||
if (hyp_trace_buffer_loaded(trace_buffer))
|
||||
return -EINVAL;
|
||||
|
||||
ret = hyp_trace_buffer_load_bpage_backing(trace_buffer, desc);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bpages = trace_buffer->bpages_backing_start;
|
||||
for_each_ring_buffer_desc(rb_desc, cpu, &desc->trace_buffer_desc) {
|
||||
ret = simple_ring_buffer_init_mm(per_cpu_ptr(trace_buffer->simple_rbs, cpu),
|
||||
bpages, rb_desc, __pin_shared_page,
|
||||
__unpin_shared_page);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
bpages += rb_desc->nr_page_va;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
hyp_trace_buffer_unload(trace_buffer);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool hyp_trace_desc_validate(struct hyp_trace_desc *desc, size_t desc_size)
|
||||
{
|
||||
struct ring_buffer_desc *rb_desc;
|
||||
unsigned int cpu;
|
||||
size_t nr_bpages;
|
||||
void *desc_end;
|
||||
|
||||
/*
|
||||
* Both desc_size and bpages_backing_size are untrusted host-provided
|
||||
* values. We rely on __pkvm_host_donate_hyp() to enforce their validity.
|
||||
*/
|
||||
desc_end = (void *)desc + desc_size;
|
||||
nr_bpages = desc->bpages_backing_size / sizeof(struct simple_buffer_page);
|
||||
|
||||
for_each_ring_buffer_desc(rb_desc, cpu, &desc->trace_buffer_desc) {
|
||||
/* Can we read nr_page_va? */
|
||||
if ((void *)rb_desc + struct_size(rb_desc, page_va, 0) > desc_end)
|
||||
return false;
|
||||
|
||||
/* Overflow desc? */
|
||||
if ((void *)rb_desc + struct_size(rb_desc, page_va, rb_desc->nr_page_va) > desc_end)
|
||||
return false;
|
||||
|
||||
/* Overflow bpages backing memory? */
|
||||
if (nr_bpages < rb_desc->nr_page_va)
|
||||
return false;
|
||||
|
||||
if (cpu >= hyp_nr_cpus)
|
||||
return false;
|
||||
|
||||
if (cpu != rb_desc->cpu)
|
||||
return false;
|
||||
|
||||
nr_bpages -= rb_desc->nr_page_va;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int __tracing_load(unsigned long desc_hva, size_t desc_size)
|
||||
{
|
||||
struct hyp_trace_desc *desc = (struct hyp_trace_desc *)kern_hyp_va(desc_hva);
|
||||
int ret;
|
||||
|
||||
ret = __admit_host_mem(desc, desc_size);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!hyp_trace_desc_validate(desc, desc_size))
|
||||
goto err_release_desc;
|
||||
|
||||
hyp_spin_lock(&trace_buffer.lock);
|
||||
|
||||
ret = hyp_trace_buffer_load(&trace_buffer, desc);
|
||||
|
||||
hyp_spin_unlock(&trace_buffer.lock);
|
||||
|
||||
err_release_desc:
|
||||
__release_host_mem(desc, desc_size);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void __tracing_unload(void)
|
||||
{
|
||||
hyp_spin_lock(&trace_buffer.lock);
|
||||
hyp_trace_buffer_unload(&trace_buffer);
|
||||
hyp_spin_unlock(&trace_buffer.lock);
|
||||
}
|
||||
|
||||
int __tracing_enable(bool enable)
|
||||
{
|
||||
int cpu, ret = enable ? -EINVAL : 0;
|
||||
|
||||
hyp_spin_lock(&trace_buffer.lock);
|
||||
|
||||
if (!hyp_trace_buffer_loaded(&trace_buffer))
|
||||
goto unlock;
|
||||
|
||||
for (cpu = 0; cpu < hyp_nr_cpus; cpu++)
|
||||
simple_ring_buffer_enable_tracing(per_cpu_ptr(trace_buffer.simple_rbs, cpu),
|
||||
enable);
|
||||
|
||||
ret = 0;
|
||||
|
||||
unlock:
|
||||
hyp_spin_unlock(&trace_buffer.lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __tracing_swap_reader(unsigned int cpu)
|
||||
{
|
||||
int ret = -ENODEV;
|
||||
|
||||
if (cpu >= hyp_nr_cpus)
|
||||
return -EINVAL;
|
||||
|
||||
hyp_spin_lock(&trace_buffer.lock);
|
||||
|
||||
if (hyp_trace_buffer_loaded(&trace_buffer))
|
||||
ret = simple_ring_buffer_swap_reader_page(
|
||||
per_cpu_ptr(trace_buffer.simple_rbs, cpu));
|
||||
|
||||
hyp_spin_unlock(&trace_buffer.lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void __tracing_update_clock(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
/* After this loop, all CPUs are observing the new bank... */
|
||||
for (cpu = 0; cpu < hyp_nr_cpus; cpu++) {
|
||||
struct simple_rb_per_cpu *simple_rb = per_cpu_ptr(trace_buffer.simple_rbs, cpu);
|
||||
|
||||
while (READ_ONCE(simple_rb->status) == SIMPLE_RB_WRITING)
|
||||
;
|
||||
}
|
||||
|
||||
/* ...we can now override the old one and swap. */
|
||||
trace_clock_update(mult, shift, epoch_ns, epoch_cyc);
|
||||
}
|
||||
|
||||
int __tracing_reset(unsigned int cpu)
|
||||
{
|
||||
int ret = -ENODEV;
|
||||
|
||||
if (cpu >= hyp_nr_cpus)
|
||||
return -EINVAL;
|
||||
|
||||
hyp_spin_lock(&trace_buffer.lock);
|
||||
|
||||
if (hyp_trace_buffer_loaded(&trace_buffer))
|
||||
ret = simple_ring_buffer_reset(per_cpu_ptr(trace_buffer.simple_rbs, cpu));
|
||||
|
||||
hyp_spin_unlock(&trace_buffer.lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -114,11 +114,6 @@ static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, s8 level)
|
||||
return pte;
|
||||
}
|
||||
|
||||
static kvm_pte_t kvm_init_invalid_leaf_owner(u8 owner_id)
|
||||
{
|
||||
return FIELD_PREP(KVM_INVALID_PTE_OWNER_MASK, owner_id);
|
||||
}
|
||||
|
||||
static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data,
|
||||
const struct kvm_pgtable_visit_ctx *ctx,
|
||||
enum kvm_pgtable_walk_flags visit)
|
||||
@@ -581,7 +576,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
|
||||
struct stage2_map_data {
|
||||
const u64 phys;
|
||||
kvm_pte_t attr;
|
||||
u8 owner_id;
|
||||
kvm_pte_t pte_annot;
|
||||
|
||||
kvm_pte_t *anchor;
|
||||
kvm_pte_t *childp;
|
||||
@@ -798,7 +793,11 @@ static bool stage2_pte_is_counted(kvm_pte_t pte)
|
||||
|
||||
static bool stage2_pte_is_locked(kvm_pte_t pte)
|
||||
{
|
||||
return !kvm_pte_valid(pte) && (pte & KVM_INVALID_PTE_LOCKED);
|
||||
if (kvm_pte_valid(pte))
|
||||
return false;
|
||||
|
||||
return FIELD_GET(KVM_INVALID_PTE_TYPE_MASK, pte) ==
|
||||
KVM_INVALID_PTE_TYPE_LOCKED;
|
||||
}
|
||||
|
||||
static bool stage2_try_set_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t new)
|
||||
@@ -829,6 +828,7 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
struct kvm_s2_mmu *mmu)
|
||||
{
|
||||
struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
|
||||
kvm_pte_t locked_pte;
|
||||
|
||||
if (stage2_pte_is_locked(ctx->old)) {
|
||||
/*
|
||||
@@ -839,7 +839,9 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!stage2_try_set_pte(ctx, KVM_INVALID_PTE_LOCKED))
|
||||
locked_pte = FIELD_PREP(KVM_INVALID_PTE_TYPE_MASK,
|
||||
KVM_INVALID_PTE_TYPE_LOCKED);
|
||||
if (!stage2_try_set_pte(ctx, locked_pte))
|
||||
return false;
|
||||
|
||||
if (!kvm_pgtable_walk_skip_bbm_tlbi(ctx)) {
|
||||
@@ -964,7 +966,7 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
if (!data->annotation)
|
||||
new = kvm_init_valid_leaf_pte(phys, data->attr, ctx->level);
|
||||
else
|
||||
new = kvm_init_invalid_leaf_owner(data->owner_id);
|
||||
new = data->pte_annot;
|
||||
|
||||
/*
|
||||
* Skip updating the PTE if we are trying to recreate the exact
|
||||
@@ -1118,16 +1120,18 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
void *mc, u8 owner_id)
|
||||
int kvm_pgtable_stage2_annotate(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
void *mc, enum kvm_invalid_pte_type type,
|
||||
kvm_pte_t pte_annot)
|
||||
{
|
||||
int ret;
|
||||
struct stage2_map_data map_data = {
|
||||
.mmu = pgt->mmu,
|
||||
.memcache = mc,
|
||||
.owner_id = owner_id,
|
||||
.force_pte = true,
|
||||
.annotation = true,
|
||||
.pte_annot = pte_annot |
|
||||
FIELD_PREP(KVM_INVALID_PTE_TYPE_MASK, type),
|
||||
};
|
||||
struct kvm_pgtable_walker walker = {
|
||||
.cb = stage2_map_walker,
|
||||
@@ -1136,7 +1140,10 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
.arg = &map_data,
|
||||
};
|
||||
|
||||
if (owner_id > KVM_MAX_OWNER_ID)
|
||||
if (pte_annot & ~KVM_INVALID_PTE_ANNOT_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
if (!type || type == KVM_INVALID_PTE_TYPE_LOCKED)
|
||||
return -EINVAL;
|
||||
|
||||
ret = kvm_pgtable_walk(pgt, addr, size, &walker);
|
||||
|
||||
166
arch/arm64/kvm/hyp/vgic-v5-sr.c
Normal file
166
arch/arm64/kvm/hyp/vgic-v5-sr.c
Normal file
@@ -0,0 +1,166 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2025, 2026 - Arm Ltd
|
||||
*/
|
||||
|
||||
#include <linux/irqchip/arm-gic-v5.h>
|
||||
|
||||
#include <asm/kvm_hyp.h>
|
||||
|
||||
void __vgic_v5_save_apr(struct vgic_v5_cpu_if *cpu_if)
|
||||
{
|
||||
cpu_if->vgic_apr = read_sysreg_s(SYS_ICH_APR_EL2);
|
||||
}
|
||||
|
||||
static void __vgic_v5_compat_mode_disable(void)
|
||||
{
|
||||
sysreg_clear_set_s(SYS_ICH_VCTLR_EL2, ICH_VCTLR_EL2_V3, 0);
|
||||
isb();
|
||||
}
|
||||
|
||||
void __vgic_v5_restore_vmcr_apr(struct vgic_v5_cpu_if *cpu_if)
|
||||
{
|
||||
__vgic_v5_compat_mode_disable();
|
||||
|
||||
write_sysreg_s(cpu_if->vgic_vmcr, SYS_ICH_VMCR_EL2);
|
||||
write_sysreg_s(cpu_if->vgic_apr, SYS_ICH_APR_EL2);
|
||||
}
|
||||
|
||||
void __vgic_v5_save_ppi_state(struct vgic_v5_cpu_if *cpu_if)
|
||||
{
|
||||
/*
|
||||
* The following code assumes that the bitmap storage that we have for
|
||||
* PPIs is either 64 (architected PPIs, only) or 128 bits (architected &
|
||||
* impdef PPIs).
|
||||
*/
|
||||
BUILD_BUG_ON(VGIC_V5_NR_PRIVATE_IRQS % 64);
|
||||
|
||||
bitmap_write(host_data_ptr(vgic_v5_ppi_state)->activer_exit,
|
||||
read_sysreg_s(SYS_ICH_PPI_ACTIVER0_EL2), 0, 64);
|
||||
bitmap_write(host_data_ptr(vgic_v5_ppi_state)->pendr,
|
||||
read_sysreg_s(SYS_ICH_PPI_PENDR0_EL2), 0, 64);
|
||||
|
||||
cpu_if->vgic_ppi_priorityr[0] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR0_EL2);
|
||||
cpu_if->vgic_ppi_priorityr[1] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR1_EL2);
|
||||
cpu_if->vgic_ppi_priorityr[2] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR2_EL2);
|
||||
cpu_if->vgic_ppi_priorityr[3] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR3_EL2);
|
||||
cpu_if->vgic_ppi_priorityr[4] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR4_EL2);
|
||||
cpu_if->vgic_ppi_priorityr[5] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR5_EL2);
|
||||
cpu_if->vgic_ppi_priorityr[6] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR6_EL2);
|
||||
cpu_if->vgic_ppi_priorityr[7] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR7_EL2);
|
||||
|
||||
if (VGIC_V5_NR_PRIVATE_IRQS == 128) {
|
||||
bitmap_write(host_data_ptr(vgic_v5_ppi_state)->activer_exit,
|
||||
read_sysreg_s(SYS_ICH_PPI_ACTIVER1_EL2), 64, 64);
|
||||
bitmap_write(host_data_ptr(vgic_v5_ppi_state)->pendr,
|
||||
read_sysreg_s(SYS_ICH_PPI_PENDR1_EL2), 64, 64);
|
||||
|
||||
cpu_if->vgic_ppi_priorityr[8] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR8_EL2);
|
||||
cpu_if->vgic_ppi_priorityr[9] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR9_EL2);
|
||||
cpu_if->vgic_ppi_priorityr[10] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR10_EL2);
|
||||
cpu_if->vgic_ppi_priorityr[11] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR11_EL2);
|
||||
cpu_if->vgic_ppi_priorityr[12] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR12_EL2);
|
||||
cpu_if->vgic_ppi_priorityr[13] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR13_EL2);
|
||||
cpu_if->vgic_ppi_priorityr[14] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR14_EL2);
|
||||
cpu_if->vgic_ppi_priorityr[15] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR15_EL2);
|
||||
}
|
||||
|
||||
/* Now that we are done, disable DVI */
|
||||
write_sysreg_s(0, SYS_ICH_PPI_DVIR0_EL2);
|
||||
write_sysreg_s(0, SYS_ICH_PPI_DVIR1_EL2);
|
||||
}
|
||||
|
||||
void __vgic_v5_restore_ppi_state(struct vgic_v5_cpu_if *cpu_if)
|
||||
{
|
||||
DECLARE_BITMAP(pendr, VGIC_V5_NR_PRIVATE_IRQS);
|
||||
|
||||
/* We assume 64 or 128 PPIs - see above comment */
|
||||
BUILD_BUG_ON(VGIC_V5_NR_PRIVATE_IRQS % 64);
|
||||
|
||||
/* Enable DVI so that the guest's interrupt config takes over */
|
||||
write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_dvir, 0, 64),
|
||||
SYS_ICH_PPI_DVIR0_EL2);
|
||||
|
||||
write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_activer, 0, 64),
|
||||
SYS_ICH_PPI_ACTIVER0_EL2);
|
||||
write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_enabler, 0, 64),
|
||||
SYS_ICH_PPI_ENABLER0_EL2);
|
||||
|
||||
/* Update the pending state of the NON-DVI'd PPIs, only */
|
||||
bitmap_andnot(pendr, host_data_ptr(vgic_v5_ppi_state)->pendr,
|
||||
cpu_if->vgic_ppi_dvir, VGIC_V5_NR_PRIVATE_IRQS);
|
||||
write_sysreg_s(bitmap_read(pendr, 0, 64), SYS_ICH_PPI_PENDR0_EL2);
|
||||
|
||||
write_sysreg_s(cpu_if->vgic_ppi_priorityr[0],
|
||||
SYS_ICH_PPI_PRIORITYR0_EL2);
|
||||
write_sysreg_s(cpu_if->vgic_ppi_priorityr[1],
|
||||
SYS_ICH_PPI_PRIORITYR1_EL2);
|
||||
write_sysreg_s(cpu_if->vgic_ppi_priorityr[2],
|
||||
SYS_ICH_PPI_PRIORITYR2_EL2);
|
||||
write_sysreg_s(cpu_if->vgic_ppi_priorityr[3],
|
||||
SYS_ICH_PPI_PRIORITYR3_EL2);
|
||||
write_sysreg_s(cpu_if->vgic_ppi_priorityr[4],
|
||||
SYS_ICH_PPI_PRIORITYR4_EL2);
|
||||
write_sysreg_s(cpu_if->vgic_ppi_priorityr[5],
|
||||
SYS_ICH_PPI_PRIORITYR5_EL2);
|
||||
write_sysreg_s(cpu_if->vgic_ppi_priorityr[6],
|
||||
SYS_ICH_PPI_PRIORITYR6_EL2);
|
||||
write_sysreg_s(cpu_if->vgic_ppi_priorityr[7],
|
||||
SYS_ICH_PPI_PRIORITYR7_EL2);
|
||||
|
||||
if (VGIC_V5_NR_PRIVATE_IRQS == 128) {
|
||||
/* Enable DVI so that the guest's interrupt config takes over */
|
||||
write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_dvir, 64, 64),
|
||||
SYS_ICH_PPI_DVIR1_EL2);
|
||||
|
||||
write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_activer, 64, 64),
|
||||
SYS_ICH_PPI_ACTIVER1_EL2);
|
||||
write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_enabler, 64, 64),
|
||||
SYS_ICH_PPI_ENABLER1_EL2);
|
||||
write_sysreg_s(bitmap_read(pendr, 64, 64),
|
||||
SYS_ICH_PPI_PENDR1_EL2);
|
||||
|
||||
write_sysreg_s(cpu_if->vgic_ppi_priorityr[8],
|
||||
SYS_ICH_PPI_PRIORITYR8_EL2);
|
||||
write_sysreg_s(cpu_if->vgic_ppi_priorityr[9],
|
||||
SYS_ICH_PPI_PRIORITYR9_EL2);
|
||||
write_sysreg_s(cpu_if->vgic_ppi_priorityr[10],
|
||||
SYS_ICH_PPI_PRIORITYR10_EL2);
|
||||
write_sysreg_s(cpu_if->vgic_ppi_priorityr[11],
|
||||
SYS_ICH_PPI_PRIORITYR11_EL2);
|
||||
write_sysreg_s(cpu_if->vgic_ppi_priorityr[12],
|
||||
SYS_ICH_PPI_PRIORITYR12_EL2);
|
||||
write_sysreg_s(cpu_if->vgic_ppi_priorityr[13],
|
||||
SYS_ICH_PPI_PRIORITYR13_EL2);
|
||||
write_sysreg_s(cpu_if->vgic_ppi_priorityr[14],
|
||||
SYS_ICH_PPI_PRIORITYR14_EL2);
|
||||
write_sysreg_s(cpu_if->vgic_ppi_priorityr[15],
|
||||
SYS_ICH_PPI_PRIORITYR15_EL2);
|
||||
} else {
|
||||
write_sysreg_s(0, SYS_ICH_PPI_DVIR1_EL2);
|
||||
|
||||
write_sysreg_s(0, SYS_ICH_PPI_ACTIVER1_EL2);
|
||||
write_sysreg_s(0, SYS_ICH_PPI_ENABLER1_EL2);
|
||||
write_sysreg_s(0, SYS_ICH_PPI_PENDR1_EL2);
|
||||
|
||||
write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR8_EL2);
|
||||
write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR9_EL2);
|
||||
write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR10_EL2);
|
||||
write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR11_EL2);
|
||||
write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR12_EL2);
|
||||
write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR13_EL2);
|
||||
write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR14_EL2);
|
||||
write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR15_EL2);
|
||||
}
|
||||
}
|
||||
|
||||
void __vgic_v5_save_state(struct vgic_v5_cpu_if *cpu_if)
|
||||
{
|
||||
cpu_if->vgic_vmcr = read_sysreg_s(SYS_ICH_VMCR_EL2);
|
||||
cpu_if->vgic_icsr = read_sysreg_s(SYS_ICC_ICSR_EL1);
|
||||
}
|
||||
|
||||
void __vgic_v5_restore_state(struct vgic_v5_cpu_if *cpu_if)
|
||||
{
|
||||
write_sysreg_s(cpu_if->vgic_icsr, SYS_ICC_ICSR_EL1);
|
||||
}
|
||||
@@ -10,4 +10,4 @@ CFLAGS_switch.o += -Wno-override-init
|
||||
|
||||
obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o
|
||||
obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
|
||||
../fpsimd.o ../hyp-entry.o ../exception.o
|
||||
../fpsimd.o ../hyp-entry.o ../exception.o ../vgic-v5-sr.o
|
||||
|
||||
442
arch/arm64/kvm/hyp_trace.c
Normal file
442
arch/arm64/kvm/hyp_trace.c
Normal file
@@ -0,0 +1,442 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2025 Google LLC
|
||||
* Author: Vincent Donnefort <vdonnefort@google.com>
|
||||
*/
|
||||
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/trace_remote.h>
|
||||
#include <linux/tracefs.h>
|
||||
#include <linux/simple_ring_buffer.h>
|
||||
|
||||
#include <asm/arch_timer.h>
|
||||
#include <asm/kvm_host.h>
|
||||
#include <asm/kvm_hyptrace.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
|
||||
#include "hyp_trace.h"
|
||||
|
||||
/* Same 10min used by clocksource when width is more than 32-bits */
|
||||
#define CLOCK_MAX_CONVERSION_S 600
|
||||
/*
|
||||
* Time to give for the clock init. Long enough to get a good mult/shift
|
||||
* estimation. Short enough to not delay the tracing start too much.
|
||||
*/
|
||||
#define CLOCK_INIT_MS 100
|
||||
/*
|
||||
* Time between clock checks. Must be small enough to catch clock deviation when
|
||||
* it is still tiny.
|
||||
*/
|
||||
#define CLOCK_UPDATE_MS 500
|
||||
|
||||
static struct hyp_trace_clock {
|
||||
u64 cycles;
|
||||
u64 cyc_overflow64;
|
||||
u64 boot;
|
||||
u32 mult;
|
||||
u32 shift;
|
||||
struct delayed_work work;
|
||||
struct completion ready;
|
||||
struct mutex lock;
|
||||
bool running;
|
||||
} hyp_clock;
|
||||
|
||||
static void __hyp_clock_work(struct work_struct *work)
|
||||
{
|
||||
struct delayed_work *dwork = to_delayed_work(work);
|
||||
struct hyp_trace_clock *hyp_clock;
|
||||
struct system_time_snapshot snap;
|
||||
u64 rate, delta_cycles;
|
||||
u64 boot, delta_boot;
|
||||
|
||||
hyp_clock = container_of(dwork, struct hyp_trace_clock, work);
|
||||
|
||||
ktime_get_snapshot(&snap);
|
||||
boot = ktime_to_ns(snap.boot);
|
||||
|
||||
delta_boot = boot - hyp_clock->boot;
|
||||
delta_cycles = snap.cycles - hyp_clock->cycles;
|
||||
|
||||
/* Compare hyp clock with the kernel boot clock */
|
||||
if (hyp_clock->mult) {
|
||||
u64 err, cur = delta_cycles;
|
||||
|
||||
if (WARN_ON_ONCE(cur >= hyp_clock->cyc_overflow64)) {
|
||||
__uint128_t tmp = (__uint128_t)cur * hyp_clock->mult;
|
||||
|
||||
cur = tmp >> hyp_clock->shift;
|
||||
} else {
|
||||
cur *= hyp_clock->mult;
|
||||
cur >>= hyp_clock->shift;
|
||||
}
|
||||
cur += hyp_clock->boot;
|
||||
|
||||
err = abs_diff(cur, boot);
|
||||
/* No deviation, only update epoch if necessary */
|
||||
if (!err) {
|
||||
if (delta_cycles >= (hyp_clock->cyc_overflow64 >> 1))
|
||||
goto fast_forward;
|
||||
|
||||
goto resched;
|
||||
}
|
||||
|
||||
/* Warn if the error is above tracing precision (1us) */
|
||||
if (err > NSEC_PER_USEC)
|
||||
pr_warn_ratelimited("hyp trace clock off by %lluus\n",
|
||||
err / NSEC_PER_USEC);
|
||||
}
|
||||
|
||||
rate = div64_u64(delta_cycles * NSEC_PER_SEC, delta_boot);
|
||||
|
||||
clocks_calc_mult_shift(&hyp_clock->mult, &hyp_clock->shift,
|
||||
rate, NSEC_PER_SEC, CLOCK_MAX_CONVERSION_S);
|
||||
|
||||
/* Add a comfortable 50% margin */
|
||||
hyp_clock->cyc_overflow64 = (U64_MAX / hyp_clock->mult) >> 1;
|
||||
|
||||
fast_forward:
|
||||
hyp_clock->cycles = snap.cycles;
|
||||
hyp_clock->boot = boot;
|
||||
kvm_call_hyp_nvhe(__tracing_update_clock, hyp_clock->mult,
|
||||
hyp_clock->shift, hyp_clock->boot, hyp_clock->cycles);
|
||||
complete(&hyp_clock->ready);
|
||||
|
||||
resched:
|
||||
schedule_delayed_work(&hyp_clock->work,
|
||||
msecs_to_jiffies(CLOCK_UPDATE_MS));
|
||||
}
|
||||
|
||||
static void hyp_trace_clock_enable(struct hyp_trace_clock *hyp_clock, bool enable)
|
||||
{
|
||||
struct system_time_snapshot snap;
|
||||
|
||||
if (hyp_clock->running == enable)
|
||||
return;
|
||||
|
||||
if (!enable) {
|
||||
cancel_delayed_work_sync(&hyp_clock->work);
|
||||
hyp_clock->running = false;
|
||||
}
|
||||
|
||||
ktime_get_snapshot(&snap);
|
||||
|
||||
hyp_clock->boot = ktime_to_ns(snap.boot);
|
||||
hyp_clock->cycles = snap.cycles;
|
||||
hyp_clock->mult = 0;
|
||||
|
||||
init_completion(&hyp_clock->ready);
|
||||
INIT_DELAYED_WORK(&hyp_clock->work, __hyp_clock_work);
|
||||
schedule_delayed_work(&hyp_clock->work, msecs_to_jiffies(CLOCK_INIT_MS));
|
||||
wait_for_completion(&hyp_clock->ready);
|
||||
hyp_clock->running = true;
|
||||
}
|
||||
|
||||
/* Access to this struct within the trace_remote_callbacks are protected by the trace_remote lock */
|
||||
static struct hyp_trace_buffer {
|
||||
struct hyp_trace_desc *desc;
|
||||
size_t desc_size;
|
||||
} trace_buffer;
|
||||
|
||||
static int __map_hyp(void *start, size_t size)
|
||||
{
|
||||
if (is_protected_kvm_enabled())
|
||||
return 0;
|
||||
|
||||
return create_hyp_mappings(start, start + size, PAGE_HYP);
|
||||
}
|
||||
|
||||
static int __share_page(unsigned long va)
|
||||
{
|
||||
return kvm_share_hyp((void *)va, (void *)va + 1);
|
||||
}
|
||||
|
||||
static void __unshare_page(unsigned long va)
|
||||
{
|
||||
kvm_unshare_hyp((void *)va, (void *)va + 1);
|
||||
}
|
||||
|
||||
static int hyp_trace_buffer_alloc_bpages_backing(struct hyp_trace_buffer *trace_buffer, size_t size)
|
||||
{
|
||||
int nr_bpages = (PAGE_ALIGN(size) / PAGE_SIZE) + 1;
|
||||
size_t backing_size;
|
||||
void *start;
|
||||
|
||||
backing_size = PAGE_ALIGN(sizeof(struct simple_buffer_page) * nr_bpages *
|
||||
num_possible_cpus());
|
||||
|
||||
start = alloc_pages_exact(backing_size, GFP_KERNEL_ACCOUNT);
|
||||
if (!start)
|
||||
return -ENOMEM;
|
||||
|
||||
trace_buffer->desc->bpages_backing_start = (unsigned long)start;
|
||||
trace_buffer->desc->bpages_backing_size = backing_size;
|
||||
|
||||
return __map_hyp(start, backing_size);
|
||||
}
|
||||
|
||||
static void hyp_trace_buffer_free_bpages_backing(struct hyp_trace_buffer *trace_buffer)
|
||||
{
|
||||
free_pages_exact((void *)trace_buffer->desc->bpages_backing_start,
|
||||
trace_buffer->desc->bpages_backing_size);
|
||||
}
|
||||
|
||||
static void hyp_trace_buffer_unshare_hyp(struct hyp_trace_buffer *trace_buffer, int last_cpu)
|
||||
{
|
||||
struct ring_buffer_desc *rb_desc;
|
||||
int cpu, p;
|
||||
|
||||
for_each_ring_buffer_desc(rb_desc, cpu, &trace_buffer->desc->trace_buffer_desc) {
|
||||
if (cpu > last_cpu)
|
||||
break;
|
||||
|
||||
__share_page(rb_desc->meta_va);
|
||||
for (p = 0; p < rb_desc->nr_page_va; p++)
|
||||
__unshare_page(rb_desc->page_va[p]);
|
||||
}
|
||||
}
|
||||
|
||||
static int hyp_trace_buffer_share_hyp(struct hyp_trace_buffer *trace_buffer)
|
||||
{
|
||||
struct ring_buffer_desc *rb_desc;
|
||||
int cpu, p, ret = 0;
|
||||
|
||||
for_each_ring_buffer_desc(rb_desc, cpu, &trace_buffer->desc->trace_buffer_desc) {
|
||||
ret = __share_page(rb_desc->meta_va);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
for (p = 0; p < rb_desc->nr_page_va; p++) {
|
||||
ret = __share_page(rb_desc->page_va[p]);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
for (p--; p >= 0; p--)
|
||||
__unshare_page(rb_desc->page_va[p]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (ret)
|
||||
hyp_trace_buffer_unshare_hyp(trace_buffer, cpu--);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct trace_buffer_desc *hyp_trace_load(unsigned long size, void *priv)
|
||||
{
|
||||
struct hyp_trace_buffer *trace_buffer = priv;
|
||||
struct hyp_trace_desc *desc;
|
||||
size_t desc_size;
|
||||
int ret;
|
||||
|
||||
if (WARN_ON(trace_buffer->desc))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
desc_size = trace_buffer_desc_size(size, num_possible_cpus());
|
||||
if (desc_size == SIZE_MAX)
|
||||
return ERR_PTR(-E2BIG);
|
||||
|
||||
desc_size = PAGE_ALIGN(desc_size);
|
||||
desc = (struct hyp_trace_desc *)alloc_pages_exact(desc_size, GFP_KERNEL);
|
||||
if (!desc)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
ret = __map_hyp(desc, desc_size);
|
||||
if (ret)
|
||||
goto err_free_desc;
|
||||
|
||||
trace_buffer->desc = desc;
|
||||
|
||||
ret = hyp_trace_buffer_alloc_bpages_backing(trace_buffer, size);
|
||||
if (ret)
|
||||
goto err_free_desc;
|
||||
|
||||
ret = trace_remote_alloc_buffer(&desc->trace_buffer_desc, desc_size, size,
|
||||
cpu_possible_mask);
|
||||
if (ret)
|
||||
goto err_free_backing;
|
||||
|
||||
ret = hyp_trace_buffer_share_hyp(trace_buffer);
|
||||
if (ret)
|
||||
goto err_free_buffer;
|
||||
|
||||
ret = kvm_call_hyp_nvhe(__tracing_load, (unsigned long)desc, desc_size);
|
||||
if (ret)
|
||||
goto err_unload_pages;
|
||||
|
||||
return &desc->trace_buffer_desc;
|
||||
|
||||
err_unload_pages:
|
||||
hyp_trace_buffer_unshare_hyp(trace_buffer, INT_MAX);
|
||||
|
||||
err_free_buffer:
|
||||
trace_remote_free_buffer(&desc->trace_buffer_desc);
|
||||
|
||||
err_free_backing:
|
||||
hyp_trace_buffer_free_bpages_backing(trace_buffer);
|
||||
|
||||
err_free_desc:
|
||||
free_pages_exact(desc, desc_size);
|
||||
trace_buffer->desc = NULL;
|
||||
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static void hyp_trace_unload(struct trace_buffer_desc *desc, void *priv)
|
||||
{
|
||||
struct hyp_trace_buffer *trace_buffer = priv;
|
||||
|
||||
if (WARN_ON(desc != &trace_buffer->desc->trace_buffer_desc))
|
||||
return;
|
||||
|
||||
kvm_call_hyp_nvhe(__tracing_unload);
|
||||
hyp_trace_buffer_unshare_hyp(trace_buffer, INT_MAX);
|
||||
trace_remote_free_buffer(desc);
|
||||
hyp_trace_buffer_free_bpages_backing(trace_buffer);
|
||||
free_pages_exact(trace_buffer->desc, trace_buffer->desc_size);
|
||||
trace_buffer->desc = NULL;
|
||||
}
|
||||
|
||||
static int hyp_trace_enable_tracing(bool enable, void *priv)
|
||||
{
|
||||
hyp_trace_clock_enable(&hyp_clock, enable);
|
||||
|
||||
return kvm_call_hyp_nvhe(__tracing_enable, enable);
|
||||
}
|
||||
|
||||
static int hyp_trace_swap_reader_page(unsigned int cpu, void *priv)
|
||||
{
|
||||
return kvm_call_hyp_nvhe(__tracing_swap_reader, cpu);
|
||||
}
|
||||
|
||||
static int hyp_trace_reset(unsigned int cpu, void *priv)
|
||||
{
|
||||
return kvm_call_hyp_nvhe(__tracing_reset, cpu);
|
||||
}
|
||||
|
||||
static int hyp_trace_enable_event(unsigned short id, bool enable, void *priv)
|
||||
{
|
||||
struct hyp_event_id *event_id = lm_alias(&__hyp_event_ids_start[id]);
|
||||
struct page *page;
|
||||
atomic_t *enabled;
|
||||
void *map;
|
||||
|
||||
if (is_protected_kvm_enabled())
|
||||
return kvm_call_hyp_nvhe(__tracing_enable_event, id, enable);
|
||||
|
||||
enabled = &event_id->enabled;
|
||||
page = virt_to_page(enabled);
|
||||
map = vmap(&page, 1, VM_MAP, PAGE_KERNEL);
|
||||
if (!map)
|
||||
return -ENOMEM;
|
||||
|
||||
enabled = map + offset_in_page(enabled);
|
||||
atomic_set(enabled, enable);
|
||||
|
||||
vunmap(map);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int hyp_trace_clock_show(struct seq_file *m, void *v)
|
||||
{
|
||||
seq_puts(m, "[boot]\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
DEFINE_SHOW_ATTRIBUTE(hyp_trace_clock);
|
||||
|
||||
static ssize_t hyp_trace_write_event_write(struct file *f, const char __user *ubuf,
|
||||
size_t cnt, loff_t *pos)
|
||||
{
|
||||
unsigned long val;
|
||||
int ret;
|
||||
|
||||
ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
kvm_call_hyp_nvhe(__tracing_write_event, val);
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
static const struct file_operations hyp_trace_write_event_fops = {
|
||||
.write = hyp_trace_write_event_write,
|
||||
};
|
||||
|
||||
static int hyp_trace_init_tracefs(struct dentry *d, void *priv)
|
||||
{
|
||||
if (!tracefs_create_file("write_event", 0200, d, NULL, &hyp_trace_write_event_fops))
|
||||
return -ENOMEM;
|
||||
|
||||
return tracefs_create_file("trace_clock", 0440, d, NULL, &hyp_trace_clock_fops) ?
|
||||
0 : -ENOMEM;
|
||||
}
|
||||
|
||||
static struct trace_remote_callbacks trace_remote_callbacks = {
|
||||
.init = hyp_trace_init_tracefs,
|
||||
.load_trace_buffer = hyp_trace_load,
|
||||
.unload_trace_buffer = hyp_trace_unload,
|
||||
.enable_tracing = hyp_trace_enable_tracing,
|
||||
.swap_reader_page = hyp_trace_swap_reader_page,
|
||||
.reset = hyp_trace_reset,
|
||||
.enable_event = hyp_trace_enable_event,
|
||||
};
|
||||
|
||||
static const char *__hyp_enter_exit_reason_str(u8 reason);
|
||||
|
||||
#include <asm/kvm_define_hypevents.h>
|
||||
|
||||
static const char *__hyp_enter_exit_reason_str(u8 reason)
|
||||
{
|
||||
static const char strs[][12] = {
|
||||
"smc",
|
||||
"hvc",
|
||||
"psci",
|
||||
"host_abort",
|
||||
"guest_exit",
|
||||
"eret_host",
|
||||
"eret_guest",
|
||||
"unknown",
|
||||
};
|
||||
|
||||
return strs[min(reason, HYP_REASON_UNKNOWN)];
|
||||
}
|
||||
|
||||
static void __init hyp_trace_init_events(void)
|
||||
{
|
||||
struct hyp_event_id *hyp_event_id = __hyp_event_ids_start;
|
||||
struct remote_event *event = __hyp_events_start;
|
||||
int id = 0;
|
||||
|
||||
/* Events on both sides hypervisor are sorted */
|
||||
for (; event < __hyp_events_end; event++, hyp_event_id++, id++)
|
||||
event->id = hyp_event_id->id = id;
|
||||
}
|
||||
|
||||
int __init kvm_hyp_trace_init(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
if (is_kernel_in_hyp_mode())
|
||||
return 0;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
const struct arch_timer_erratum_workaround *wa =
|
||||
per_cpu(timer_unstable_counter_workaround, cpu);
|
||||
|
||||
if (IS_ENABLED(CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND) &&
|
||||
wa && wa->read_cntvct_el0) {
|
||||
pr_warn("hyp trace can't handle CNTVCT workaround '%s'\n", wa->desc);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
|
||||
hyp_trace_init_events();
|
||||
|
||||
return trace_remote_register("hypervisor", &trace_remote_callbacks, &trace_buffer,
|
||||
__hyp_events_start, __hyp_events_end - __hyp_events_start);
|
||||
}
|
||||
11
arch/arm64/kvm/hyp_trace.h
Normal file
11
arch/arm64/kvm/hyp_trace.h
Normal file
@@ -0,0 +1,11 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef __ARM64_KVM_HYP_TRACE_H__
|
||||
#define __ARM64_KVM_HYP_TRACE_H__
|
||||
|
||||
#ifdef CONFIG_NVHE_EL2_TRACING
|
||||
int kvm_hyp_trace_init(void);
|
||||
#else
|
||||
static inline int kvm_hyp_trace_init(void) { return 0; }
|
||||
#endif
|
||||
#endif
|
||||
@@ -340,6 +340,9 @@ static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64
|
||||
void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start,
|
||||
u64 size, bool may_block)
|
||||
{
|
||||
if (kvm_vm_is_protected(kvm_s2_mmu_to_kvm(mmu)))
|
||||
return;
|
||||
|
||||
__unmap_stage2_range(mmu, start, size, may_block);
|
||||
}
|
||||
|
||||
@@ -878,9 +881,6 @@ static int kvm_init_ipa_range(struct kvm_s2_mmu *mmu, unsigned long type)
|
||||
u64 mmfr0, mmfr1;
|
||||
u32 phys_shift;
|
||||
|
||||
if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type);
|
||||
if (is_protected_kvm_enabled()) {
|
||||
phys_shift = kvm_ipa_limit;
|
||||
@@ -1013,6 +1013,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
|
||||
|
||||
out_destroy_pgtable:
|
||||
kvm_stage2_destroy(pgt);
|
||||
mmu->pgt = NULL;
|
||||
out_free_pgtable:
|
||||
kfree(pgt);
|
||||
return err;
|
||||
@@ -1400,10 +1401,10 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
|
||||
*/
|
||||
static long
|
||||
transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
unsigned long hva, kvm_pfn_t *pfnp,
|
||||
phys_addr_t *ipap)
|
||||
unsigned long hva, kvm_pfn_t *pfnp, gfn_t *gfnp)
|
||||
{
|
||||
kvm_pfn_t pfn = *pfnp;
|
||||
gfn_t gfn = *gfnp;
|
||||
|
||||
/*
|
||||
* Make sure the adjustment is done only for THP pages. Also make
|
||||
@@ -1419,7 +1420,8 @@ transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
if (sz < PMD_SIZE)
|
||||
return PAGE_SIZE;
|
||||
|
||||
*ipap &= PMD_MASK;
|
||||
gfn &= ~(PTRS_PER_PMD - 1);
|
||||
*gfnp = gfn;
|
||||
pfn &= ~(PTRS_PER_PMD - 1);
|
||||
*pfnp = pfn;
|
||||
|
||||
@@ -1512,25 +1514,22 @@ static bool kvm_vma_is_cacheable(struct vm_area_struct *vma)
|
||||
}
|
||||
}
|
||||
|
||||
static int prepare_mmu_memcache(struct kvm_vcpu *vcpu, bool topup_memcache,
|
||||
void **memcache)
|
||||
static void *get_mmu_memcache(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int min_pages;
|
||||
|
||||
if (!is_protected_kvm_enabled())
|
||||
*memcache = &vcpu->arch.mmu_page_cache;
|
||||
return &vcpu->arch.mmu_page_cache;
|
||||
else
|
||||
*memcache = &vcpu->arch.pkvm_memcache;
|
||||
return &vcpu->arch.pkvm_memcache;
|
||||
}
|
||||
|
||||
if (!topup_memcache)
|
||||
return 0;
|
||||
|
||||
min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu);
|
||||
static int topup_mmu_memcache(struct kvm_vcpu *vcpu, void *memcache)
|
||||
{
|
||||
int min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu);
|
||||
|
||||
if (!is_protected_kvm_enabled())
|
||||
return kvm_mmu_topup_memory_cache(*memcache, min_pages);
|
||||
return kvm_mmu_topup_memory_cache(memcache, min_pages);
|
||||
|
||||
return topup_hyp_memcache(*memcache, min_pages);
|
||||
return topup_hyp_memcache(memcache, min_pages);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1543,54 +1542,63 @@ static int prepare_mmu_memcache(struct kvm_vcpu *vcpu, bool topup_memcache,
|
||||
* TLB invalidation from the guest and used to limit the invalidation scope if a
|
||||
* TTL hint or a range isn't provided.
|
||||
*/
|
||||
static void adjust_nested_fault_perms(struct kvm_s2_trans *nested,
|
||||
enum kvm_pgtable_prot *prot,
|
||||
bool *writable)
|
||||
static enum kvm_pgtable_prot adjust_nested_fault_perms(struct kvm_s2_trans *nested,
|
||||
enum kvm_pgtable_prot prot)
|
||||
{
|
||||
*writable &= kvm_s2_trans_writable(nested);
|
||||
if (!kvm_s2_trans_writable(nested))
|
||||
prot &= ~KVM_PGTABLE_PROT_W;
|
||||
if (!kvm_s2_trans_readable(nested))
|
||||
*prot &= ~KVM_PGTABLE_PROT_R;
|
||||
prot &= ~KVM_PGTABLE_PROT_R;
|
||||
|
||||
*prot |= kvm_encode_nested_level(nested);
|
||||
return prot | kvm_encode_nested_level(nested);
|
||||
}
|
||||
|
||||
static void adjust_nested_exec_perms(struct kvm *kvm,
|
||||
struct kvm_s2_trans *nested,
|
||||
enum kvm_pgtable_prot *prot)
|
||||
static enum kvm_pgtable_prot adjust_nested_exec_perms(struct kvm *kvm,
|
||||
struct kvm_s2_trans *nested,
|
||||
enum kvm_pgtable_prot prot)
|
||||
{
|
||||
if (!kvm_s2_trans_exec_el0(kvm, nested))
|
||||
*prot &= ~KVM_PGTABLE_PROT_UX;
|
||||
prot &= ~KVM_PGTABLE_PROT_UX;
|
||||
if (!kvm_s2_trans_exec_el1(kvm, nested))
|
||||
*prot &= ~KVM_PGTABLE_PROT_PX;
|
||||
prot &= ~KVM_PGTABLE_PROT_PX;
|
||||
|
||||
return prot;
|
||||
}
|
||||
|
||||
static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
struct kvm_s2_trans *nested,
|
||||
struct kvm_memory_slot *memslot, bool is_perm)
|
||||
struct kvm_s2_fault_desc {
|
||||
struct kvm_vcpu *vcpu;
|
||||
phys_addr_t fault_ipa;
|
||||
struct kvm_s2_trans *nested;
|
||||
struct kvm_memory_slot *memslot;
|
||||
unsigned long hva;
|
||||
};
|
||||
|
||||
static int gmem_abort(const struct kvm_s2_fault_desc *s2fd)
|
||||
{
|
||||
bool write_fault, exec_fault, writable;
|
||||
bool write_fault, exec_fault;
|
||||
enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_SHARED;
|
||||
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
|
||||
struct kvm_pgtable *pgt = vcpu->arch.hw_mmu->pgt;
|
||||
struct kvm_pgtable *pgt = s2fd->vcpu->arch.hw_mmu->pgt;
|
||||
unsigned long mmu_seq;
|
||||
struct page *page;
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
struct kvm *kvm = s2fd->vcpu->kvm;
|
||||
void *memcache;
|
||||
kvm_pfn_t pfn;
|
||||
gfn_t gfn;
|
||||
int ret;
|
||||
|
||||
ret = prepare_mmu_memcache(vcpu, true, &memcache);
|
||||
memcache = get_mmu_memcache(s2fd->vcpu);
|
||||
ret = topup_mmu_memcache(s2fd->vcpu, memcache);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (nested)
|
||||
gfn = kvm_s2_trans_output(nested) >> PAGE_SHIFT;
|
||||
if (s2fd->nested)
|
||||
gfn = kvm_s2_trans_output(s2fd->nested) >> PAGE_SHIFT;
|
||||
else
|
||||
gfn = fault_ipa >> PAGE_SHIFT;
|
||||
gfn = s2fd->fault_ipa >> PAGE_SHIFT;
|
||||
|
||||
write_fault = kvm_is_write_fault(vcpu);
|
||||
exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
|
||||
write_fault = kvm_is_write_fault(s2fd->vcpu);
|
||||
exec_fault = kvm_vcpu_trap_is_exec_fault(s2fd->vcpu);
|
||||
|
||||
VM_WARN_ON_ONCE(write_fault && exec_fault);
|
||||
|
||||
@@ -1598,26 +1606,24 @@ static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
/* Pairs with the smp_wmb() in kvm_mmu_invalidate_end(). */
|
||||
smp_rmb();
|
||||
|
||||
ret = kvm_gmem_get_pfn(kvm, memslot, gfn, &pfn, &page, NULL);
|
||||
ret = kvm_gmem_get_pfn(kvm, s2fd->memslot, gfn, &pfn, &page, NULL);
|
||||
if (ret) {
|
||||
kvm_prepare_memory_fault_exit(vcpu, fault_ipa, PAGE_SIZE,
|
||||
kvm_prepare_memory_fault_exit(s2fd->vcpu, s2fd->fault_ipa, PAGE_SIZE,
|
||||
write_fault, exec_fault, false);
|
||||
return ret;
|
||||
}
|
||||
|
||||
writable = !(memslot->flags & KVM_MEM_READONLY);
|
||||
|
||||
if (nested)
|
||||
adjust_nested_fault_perms(nested, &prot, &writable);
|
||||
|
||||
if (writable)
|
||||
if (!(s2fd->memslot->flags & KVM_MEM_READONLY))
|
||||
prot |= KVM_PGTABLE_PROT_W;
|
||||
|
||||
if (s2fd->nested)
|
||||
prot = adjust_nested_fault_perms(s2fd->nested, prot);
|
||||
|
||||
if (exec_fault || cpus_have_final_cap(ARM64_HAS_CACHE_DIC))
|
||||
prot |= KVM_PGTABLE_PROT_X;
|
||||
|
||||
if (nested)
|
||||
adjust_nested_exec_perms(kvm, nested, &prot);
|
||||
if (s2fd->nested)
|
||||
prot = adjust_nested_exec_perms(kvm, s2fd->nested, prot);
|
||||
|
||||
kvm_fault_lock(kvm);
|
||||
if (mmu_invalidate_retry(kvm, mmu_seq)) {
|
||||
@@ -1625,85 +1631,122 @@ static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, fault_ipa, PAGE_SIZE,
|
||||
ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, s2fd->fault_ipa, PAGE_SIZE,
|
||||
__pfn_to_phys(pfn), prot,
|
||||
memcache, flags);
|
||||
|
||||
out_unlock:
|
||||
kvm_release_faultin_page(kvm, page, !!ret, writable);
|
||||
kvm_release_faultin_page(kvm, page, !!ret, prot & KVM_PGTABLE_PROT_W);
|
||||
kvm_fault_unlock(kvm);
|
||||
|
||||
if (writable && !ret)
|
||||
mark_page_dirty_in_slot(kvm, memslot, gfn);
|
||||
if ((prot & KVM_PGTABLE_PROT_W) && !ret)
|
||||
mark_page_dirty_in_slot(kvm, s2fd->memslot, gfn);
|
||||
|
||||
return ret != -EAGAIN ? ret : 0;
|
||||
}
|
||||
|
||||
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
struct kvm_s2_trans *nested,
|
||||
struct kvm_memory_slot *memslot, unsigned long hva,
|
||||
bool fault_is_perm)
|
||||
struct kvm_s2_fault_vma_info {
|
||||
unsigned long mmu_seq;
|
||||
long vma_pagesize;
|
||||
vm_flags_t vm_flags;
|
||||
unsigned long max_map_size;
|
||||
struct page *page;
|
||||
kvm_pfn_t pfn;
|
||||
gfn_t gfn;
|
||||
bool device;
|
||||
bool mte_allowed;
|
||||
bool is_vma_cacheable;
|
||||
bool map_writable;
|
||||
bool map_non_cacheable;
|
||||
};
|
||||
|
||||
static int pkvm_mem_abort(const struct kvm_s2_fault_desc *s2fd)
|
||||
{
|
||||
int ret = 0;
|
||||
bool topup_memcache;
|
||||
bool write_fault, writable;
|
||||
bool exec_fault, mte_allowed, is_vma_cacheable;
|
||||
bool s2_force_noncacheable = false, vfio_allow_any_uc = false;
|
||||
unsigned long mmu_seq;
|
||||
phys_addr_t ipa = fault_ipa;
|
||||
unsigned int flags = FOLL_HWPOISON | FOLL_LONGTERM | FOLL_WRITE;
|
||||
struct kvm_vcpu *vcpu = s2fd->vcpu;
|
||||
struct kvm_pgtable *pgt = vcpu->arch.hw_mmu->pgt;
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
struct vm_area_struct *vma;
|
||||
short vma_shift;
|
||||
void *memcache;
|
||||
gfn_t gfn;
|
||||
kvm_pfn_t pfn;
|
||||
bool logging_active = memslot_is_logging(memslot);
|
||||
bool force_pte = logging_active;
|
||||
long vma_pagesize, fault_granule;
|
||||
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
|
||||
struct kvm_pgtable *pgt;
|
||||
void *hyp_memcache;
|
||||
struct page *page;
|
||||
vm_flags_t vm_flags;
|
||||
enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_SHARED;
|
||||
int ret;
|
||||
|
||||
if (fault_is_perm)
|
||||
fault_granule = kvm_vcpu_trap_get_perm_fault_granule(vcpu);
|
||||
write_fault = kvm_is_write_fault(vcpu);
|
||||
exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
|
||||
VM_WARN_ON_ONCE(write_fault && exec_fault);
|
||||
hyp_memcache = get_mmu_memcache(vcpu);
|
||||
ret = topup_mmu_memcache(vcpu, hyp_memcache);
|
||||
if (ret)
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* Permission faults just need to update the existing leaf entry,
|
||||
* and so normally don't require allocations from the memcache. The
|
||||
* only exception to this is when dirty logging is enabled at runtime
|
||||
* and a write fault needs to collapse a block entry into a table.
|
||||
*/
|
||||
topup_memcache = !fault_is_perm || (logging_active && write_fault);
|
||||
ret = prepare_mmu_memcache(vcpu, topup_memcache, &memcache);
|
||||
ret = account_locked_vm(mm, 1, true);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Let's check if we will get back a huge page backed by hugetlbfs, or
|
||||
* get block mapping for device MMIO region.
|
||||
*/
|
||||
mmap_read_lock(current->mm);
|
||||
vma = vma_lookup(current->mm, hva);
|
||||
if (unlikely(!vma)) {
|
||||
kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
|
||||
mmap_read_unlock(current->mm);
|
||||
return -EFAULT;
|
||||
mmap_read_lock(mm);
|
||||
ret = pin_user_pages(s2fd->hva, 1, flags, &page);
|
||||
mmap_read_unlock(mm);
|
||||
|
||||
if (ret == -EHWPOISON) {
|
||||
kvm_send_hwpoison_signal(s2fd->hva, PAGE_SHIFT);
|
||||
ret = 0;
|
||||
goto dec_account;
|
||||
} else if (ret != 1) {
|
||||
ret = -EFAULT;
|
||||
goto dec_account;
|
||||
} else if (!folio_test_swapbacked(page_folio(page))) {
|
||||
/*
|
||||
* We really can't deal with page-cache pages returned by GUP
|
||||
* because (a) we may trigger writeback of a page for which we
|
||||
* no longer have access and (b) page_mkclean() won't find the
|
||||
* stage-2 mapping in the rmap so we can get out-of-whack with
|
||||
* the filesystem when marking the page dirty during unpinning
|
||||
* (see cc5095747edf ("ext4: don't BUG if someone dirty pages
|
||||
* without asking ext4 first")).
|
||||
*
|
||||
* Ideally we'd just restrict ourselves to anonymous pages, but
|
||||
* we also want to allow memfd (i.e. shmem) pages, so check for
|
||||
* pages backed by swap in the knowledge that the GUP pin will
|
||||
* prevent try_to_unmap() from succeeding.
|
||||
*/
|
||||
ret = -EIO;
|
||||
goto unpin;
|
||||
}
|
||||
|
||||
if (force_pte)
|
||||
write_lock(&kvm->mmu_lock);
|
||||
ret = pkvm_pgtable_stage2_map(pgt, s2fd->fault_ipa, PAGE_SIZE,
|
||||
page_to_phys(page), KVM_PGTABLE_PROT_RWX,
|
||||
hyp_memcache, 0);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
if (ret) {
|
||||
if (ret == -EAGAIN)
|
||||
ret = 0;
|
||||
goto unpin;
|
||||
}
|
||||
|
||||
return 0;
|
||||
unpin:
|
||||
unpin_user_pages(&page, 1);
|
||||
dec_account:
|
||||
account_locked_vm(mm, 1, false);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static short kvm_s2_resolve_vma_size(const struct kvm_s2_fault_desc *s2fd,
|
||||
struct kvm_s2_fault_vma_info *s2vi,
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
short vma_shift;
|
||||
|
||||
if (memslot_is_logging(s2fd->memslot)) {
|
||||
s2vi->max_map_size = PAGE_SIZE;
|
||||
vma_shift = PAGE_SHIFT;
|
||||
else
|
||||
vma_shift = get_vma_page_shift(vma, hva);
|
||||
} else {
|
||||
s2vi->max_map_size = PUD_SIZE;
|
||||
vma_shift = get_vma_page_shift(vma, s2fd->hva);
|
||||
}
|
||||
|
||||
switch (vma_shift) {
|
||||
#ifndef __PAGETABLE_PMD_FOLDED
|
||||
case PUD_SHIFT:
|
||||
if (fault_supports_stage2_huge_mapping(memslot, hva, PUD_SIZE))
|
||||
if (fault_supports_stage2_huge_mapping(s2fd->memslot, s2fd->hva, PUD_SIZE))
|
||||
break;
|
||||
fallthrough;
|
||||
#endif
|
||||
@@ -1711,12 +1754,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
vma_shift = PMD_SHIFT;
|
||||
fallthrough;
|
||||
case PMD_SHIFT:
|
||||
if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE))
|
||||
if (fault_supports_stage2_huge_mapping(s2fd->memslot, s2fd->hva, PMD_SIZE))
|
||||
break;
|
||||
fallthrough;
|
||||
case CONT_PTE_SHIFT:
|
||||
vma_shift = PAGE_SHIFT;
|
||||
force_pte = true;
|
||||
s2vi->max_map_size = PAGE_SIZE;
|
||||
fallthrough;
|
||||
case PAGE_SHIFT:
|
||||
break;
|
||||
@@ -1724,21 +1767,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
WARN_ONCE(1, "Unknown vma_shift %d", vma_shift);
|
||||
}
|
||||
|
||||
vma_pagesize = 1UL << vma_shift;
|
||||
|
||||
if (nested) {
|
||||
if (s2fd->nested) {
|
||||
unsigned long max_map_size;
|
||||
|
||||
max_map_size = force_pte ? PAGE_SIZE : PUD_SIZE;
|
||||
|
||||
ipa = kvm_s2_trans_output(nested);
|
||||
max_map_size = min(s2vi->max_map_size, PUD_SIZE);
|
||||
|
||||
/*
|
||||
* If we're about to create a shadow stage 2 entry, then we
|
||||
* can only create a block mapping if the guest stage 2 page
|
||||
* table uses at least as big a mapping.
|
||||
*/
|
||||
max_map_size = min(kvm_s2_trans_size(nested), max_map_size);
|
||||
max_map_size = min(kvm_s2_trans_size(s2fd->nested), max_map_size);
|
||||
|
||||
/*
|
||||
* Be careful that if the mapping size falls between
|
||||
@@ -1749,30 +1788,46 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
else if (max_map_size >= PAGE_SIZE && max_map_size < PMD_SIZE)
|
||||
max_map_size = PAGE_SIZE;
|
||||
|
||||
force_pte = (max_map_size == PAGE_SIZE);
|
||||
vma_pagesize = min_t(long, vma_pagesize, max_map_size);
|
||||
vma_shift = __ffs(vma_pagesize);
|
||||
s2vi->max_map_size = max_map_size;
|
||||
vma_shift = min_t(short, vma_shift, __ffs(max_map_size));
|
||||
}
|
||||
|
||||
return vma_shift;
|
||||
}
|
||||
|
||||
static bool kvm_s2_fault_is_perm(const struct kvm_s2_fault_desc *s2fd)
|
||||
{
|
||||
return kvm_vcpu_trap_is_permission_fault(s2fd->vcpu);
|
||||
}
|
||||
|
||||
static int kvm_s2_fault_get_vma_info(const struct kvm_s2_fault_desc *s2fd,
|
||||
struct kvm_s2_fault_vma_info *s2vi)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
struct kvm *kvm = s2fd->vcpu->kvm;
|
||||
|
||||
mmap_read_lock(current->mm);
|
||||
vma = vma_lookup(current->mm, s2fd->hva);
|
||||
if (unlikely(!vma)) {
|
||||
kvm_err("Failed to find VMA for hva 0x%lx\n", s2fd->hva);
|
||||
mmap_read_unlock(current->mm);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
s2vi->vma_pagesize = BIT(kvm_s2_resolve_vma_size(s2fd, s2vi, vma));
|
||||
|
||||
/*
|
||||
* Both the canonical IPA and fault IPA must be aligned to the
|
||||
* mapping size to ensure we find the right PFN and lay down the
|
||||
* mapping in the right place.
|
||||
*/
|
||||
fault_ipa = ALIGN_DOWN(fault_ipa, vma_pagesize);
|
||||
ipa = ALIGN_DOWN(ipa, vma_pagesize);
|
||||
s2vi->gfn = ALIGN_DOWN(s2fd->fault_ipa, s2vi->vma_pagesize) >> PAGE_SHIFT;
|
||||
|
||||
gfn = ipa >> PAGE_SHIFT;
|
||||
mte_allowed = kvm_vma_mte_allowed(vma);
|
||||
s2vi->mte_allowed = kvm_vma_mte_allowed(vma);
|
||||
|
||||
vfio_allow_any_uc = vma->vm_flags & VM_ALLOW_ANY_UNCACHED;
|
||||
s2vi->vm_flags = vma->vm_flags;
|
||||
|
||||
vm_flags = vma->vm_flags;
|
||||
|
||||
is_vma_cacheable = kvm_vma_is_cacheable(vma);
|
||||
|
||||
/* Don't use the VMA after the unlock -- it may have vanished */
|
||||
vma = NULL;
|
||||
s2vi->is_vma_cacheable = kvm_vma_is_cacheable(vma);
|
||||
|
||||
/*
|
||||
* Read mmu_invalidate_seq so that KVM can detect if the results of
|
||||
@@ -1782,24 +1837,50 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
* Rely on mmap_read_unlock() for an implicit smp_rmb(), which pairs
|
||||
* with the smp_wmb() in kvm_mmu_invalidate_end().
|
||||
*/
|
||||
mmu_seq = kvm->mmu_invalidate_seq;
|
||||
s2vi->mmu_seq = kvm->mmu_invalidate_seq;
|
||||
mmap_read_unlock(current->mm);
|
||||
|
||||
pfn = __kvm_faultin_pfn(memslot, gfn, write_fault ? FOLL_WRITE : 0,
|
||||
&writable, &page);
|
||||
if (pfn == KVM_PFN_ERR_HWPOISON) {
|
||||
kvm_send_hwpoison_signal(hva, vma_shift);
|
||||
return 0;
|
||||
}
|
||||
if (is_error_noslot_pfn(pfn))
|
||||
return 0;
|
||||
}
|
||||
|
||||
static gfn_t get_canonical_gfn(const struct kvm_s2_fault_desc *s2fd,
|
||||
const struct kvm_s2_fault_vma_info *s2vi)
|
||||
{
|
||||
phys_addr_t ipa;
|
||||
|
||||
if (!s2fd->nested)
|
||||
return s2vi->gfn;
|
||||
|
||||
ipa = kvm_s2_trans_output(s2fd->nested);
|
||||
return ALIGN_DOWN(ipa, s2vi->vma_pagesize) >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
static int kvm_s2_fault_pin_pfn(const struct kvm_s2_fault_desc *s2fd,
|
||||
struct kvm_s2_fault_vma_info *s2vi)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = kvm_s2_fault_get_vma_info(s2fd, s2vi);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
s2vi->pfn = __kvm_faultin_pfn(s2fd->memslot, get_canonical_gfn(s2fd, s2vi),
|
||||
kvm_is_write_fault(s2fd->vcpu) ? FOLL_WRITE : 0,
|
||||
&s2vi->map_writable, &s2vi->page);
|
||||
if (unlikely(is_error_noslot_pfn(s2vi->pfn))) {
|
||||
if (s2vi->pfn == KVM_PFN_ERR_HWPOISON) {
|
||||
kvm_send_hwpoison_signal(s2fd->hva, __ffs(s2vi->vma_pagesize));
|
||||
return 0;
|
||||
}
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if this is non-struct page memory PFN, and cannot support
|
||||
* CMOs. It could potentially be unsafe to access as cacheable.
|
||||
*/
|
||||
if (vm_flags & (VM_PFNMAP | VM_MIXEDMAP) && !pfn_is_map_memory(pfn)) {
|
||||
if (is_vma_cacheable) {
|
||||
if (s2vi->vm_flags & (VM_PFNMAP | VM_MIXEDMAP) && !pfn_is_map_memory(s2vi->pfn)) {
|
||||
if (s2vi->is_vma_cacheable) {
|
||||
/*
|
||||
* Whilst the VMA owner expects cacheable mapping to this
|
||||
* PFN, hardware also has to support the FWB and CACHE DIC
|
||||
@@ -1812,8 +1893,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
* S2FWB and CACHE DIC are mandatory to avoid the need for
|
||||
* cache maintenance.
|
||||
*/
|
||||
if (!kvm_supports_cacheable_pfnmap())
|
||||
ret = -EFAULT;
|
||||
if (!kvm_supports_cacheable_pfnmap()) {
|
||||
kvm_release_faultin_page(s2fd->vcpu->kvm, s2vi->page, true, false);
|
||||
return -EFAULT;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* If the page was identified as device early by looking at
|
||||
@@ -1825,21 +1908,23 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
* In both cases, we don't let transparent_hugepage_adjust()
|
||||
* change things at the last minute.
|
||||
*/
|
||||
s2_force_noncacheable = true;
|
||||
s2vi->map_non_cacheable = true;
|
||||
}
|
||||
} else if (logging_active && !write_fault) {
|
||||
/*
|
||||
* Only actually map the page as writable if this was a write
|
||||
* fault.
|
||||
*/
|
||||
writable = false;
|
||||
|
||||
s2vi->device = true;
|
||||
}
|
||||
|
||||
if (exec_fault && s2_force_noncacheable)
|
||||
ret = -ENOEXEC;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
goto out_put_page;
|
||||
static int kvm_s2_fault_compute_prot(const struct kvm_s2_fault_desc *s2fd,
|
||||
const struct kvm_s2_fault_vma_info *s2vi,
|
||||
enum kvm_pgtable_prot *prot)
|
||||
{
|
||||
struct kvm *kvm = s2fd->vcpu->kvm;
|
||||
|
||||
if (kvm_vcpu_trap_is_exec_fault(s2fd->vcpu) && s2vi->map_non_cacheable)
|
||||
return -ENOEXEC;
|
||||
|
||||
/*
|
||||
* Guest performs atomic/exclusive operations on memory with unsupported
|
||||
@@ -1847,99 +1932,167 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
* and trigger the exception here. Since the memslot is valid, inject
|
||||
* the fault back to the guest.
|
||||
*/
|
||||
if (esr_fsc_is_excl_atomic_fault(kvm_vcpu_get_esr(vcpu))) {
|
||||
kvm_inject_dabt_excl_atomic(vcpu, kvm_vcpu_get_hfar(vcpu));
|
||||
ret = 1;
|
||||
goto out_put_page;
|
||||
if (esr_fsc_is_excl_atomic_fault(kvm_vcpu_get_esr(s2fd->vcpu))) {
|
||||
kvm_inject_dabt_excl_atomic(s2fd->vcpu, kvm_vcpu_get_hfar(s2fd->vcpu));
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (nested)
|
||||
adjust_nested_fault_perms(nested, &prot, &writable);
|
||||
*prot = KVM_PGTABLE_PROT_R;
|
||||
|
||||
if (s2vi->map_writable && (s2vi->device ||
|
||||
!memslot_is_logging(s2fd->memslot) ||
|
||||
kvm_is_write_fault(s2fd->vcpu)))
|
||||
*prot |= KVM_PGTABLE_PROT_W;
|
||||
|
||||
if (s2fd->nested)
|
||||
*prot = adjust_nested_fault_perms(s2fd->nested, *prot);
|
||||
|
||||
if (kvm_vcpu_trap_is_exec_fault(s2fd->vcpu))
|
||||
*prot |= KVM_PGTABLE_PROT_X;
|
||||
|
||||
if (s2vi->map_non_cacheable)
|
||||
*prot |= (s2vi->vm_flags & VM_ALLOW_ANY_UNCACHED) ?
|
||||
KVM_PGTABLE_PROT_NORMAL_NC : KVM_PGTABLE_PROT_DEVICE;
|
||||
else if (cpus_have_final_cap(ARM64_HAS_CACHE_DIC))
|
||||
*prot |= KVM_PGTABLE_PROT_X;
|
||||
|
||||
if (s2fd->nested)
|
||||
*prot = adjust_nested_exec_perms(kvm, s2fd->nested, *prot);
|
||||
|
||||
if (!kvm_s2_fault_is_perm(s2fd) && !s2vi->map_non_cacheable && kvm_has_mte(kvm)) {
|
||||
/* Check the VMM hasn't introduced a new disallowed VMA */
|
||||
if (!s2vi->mte_allowed)
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_s2_fault_map(const struct kvm_s2_fault_desc *s2fd,
|
||||
const struct kvm_s2_fault_vma_info *s2vi,
|
||||
enum kvm_pgtable_prot prot,
|
||||
void *memcache)
|
||||
{
|
||||
enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_SHARED;
|
||||
bool writable = prot & KVM_PGTABLE_PROT_W;
|
||||
struct kvm *kvm = s2fd->vcpu->kvm;
|
||||
struct kvm_pgtable *pgt;
|
||||
long perm_fault_granule;
|
||||
long mapping_size;
|
||||
kvm_pfn_t pfn;
|
||||
gfn_t gfn;
|
||||
int ret;
|
||||
|
||||
kvm_fault_lock(kvm);
|
||||
pgt = vcpu->arch.hw_mmu->pgt;
|
||||
if (mmu_invalidate_retry(kvm, mmu_seq)) {
|
||||
ret = -EAGAIN;
|
||||
pgt = s2fd->vcpu->arch.hw_mmu->pgt;
|
||||
ret = -EAGAIN;
|
||||
if (mmu_invalidate_retry(kvm, s2vi->mmu_seq))
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
perm_fault_granule = (kvm_s2_fault_is_perm(s2fd) ?
|
||||
kvm_vcpu_trap_get_perm_fault_granule(s2fd->vcpu) : 0);
|
||||
mapping_size = s2vi->vma_pagesize;
|
||||
pfn = s2vi->pfn;
|
||||
gfn = s2vi->gfn;
|
||||
|
||||
/*
|
||||
* If we are not forced to use page mapping, check if we are
|
||||
* backed by a THP and thus use block mapping if possible.
|
||||
*/
|
||||
if (vma_pagesize == PAGE_SIZE && !(force_pte || s2_force_noncacheable)) {
|
||||
if (fault_is_perm && fault_granule > PAGE_SIZE)
|
||||
vma_pagesize = fault_granule;
|
||||
else
|
||||
vma_pagesize = transparent_hugepage_adjust(kvm, memslot,
|
||||
hva, &pfn,
|
||||
&fault_ipa);
|
||||
|
||||
if (vma_pagesize < 0) {
|
||||
ret = vma_pagesize;
|
||||
goto out_unlock;
|
||||
}
|
||||
}
|
||||
|
||||
if (!fault_is_perm && !s2_force_noncacheable && kvm_has_mte(kvm)) {
|
||||
/* Check the VMM hasn't introduced a new disallowed VMA */
|
||||
if (mte_allowed) {
|
||||
sanitise_mte_tags(kvm, pfn, vma_pagesize);
|
||||
if (mapping_size == PAGE_SIZE &&
|
||||
!(s2vi->max_map_size == PAGE_SIZE || s2vi->map_non_cacheable)) {
|
||||
if (perm_fault_granule > PAGE_SIZE) {
|
||||
mapping_size = perm_fault_granule;
|
||||
} else {
|
||||
ret = -EFAULT;
|
||||
goto out_unlock;
|
||||
mapping_size = transparent_hugepage_adjust(kvm, s2fd->memslot,
|
||||
s2fd->hva, &pfn,
|
||||
&gfn);
|
||||
if (mapping_size < 0) {
|
||||
ret = mapping_size;
|
||||
goto out_unlock;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (writable)
|
||||
prot |= KVM_PGTABLE_PROT_W;
|
||||
|
||||
if (exec_fault)
|
||||
prot |= KVM_PGTABLE_PROT_X;
|
||||
|
||||
if (s2_force_noncacheable) {
|
||||
if (vfio_allow_any_uc)
|
||||
prot |= KVM_PGTABLE_PROT_NORMAL_NC;
|
||||
else
|
||||
prot |= KVM_PGTABLE_PROT_DEVICE;
|
||||
} else if (cpus_have_final_cap(ARM64_HAS_CACHE_DIC)) {
|
||||
prot |= KVM_PGTABLE_PROT_X;
|
||||
}
|
||||
|
||||
if (nested)
|
||||
adjust_nested_exec_perms(kvm, nested, &prot);
|
||||
if (!perm_fault_granule && !s2vi->map_non_cacheable && kvm_has_mte(kvm))
|
||||
sanitise_mte_tags(kvm, pfn, mapping_size);
|
||||
|
||||
/*
|
||||
* Under the premise of getting a FSC_PERM fault, we just need to relax
|
||||
* permissions only if vma_pagesize equals fault_granule. Otherwise,
|
||||
* permissions only if mapping_size equals perm_fault_granule. Otherwise,
|
||||
* kvm_pgtable_stage2_map() should be called to change block size.
|
||||
*/
|
||||
if (fault_is_perm && vma_pagesize == fault_granule) {
|
||||
if (mapping_size == perm_fault_granule) {
|
||||
/*
|
||||
* Drop the SW bits in favour of those stored in the
|
||||
* PTE, which will be preserved.
|
||||
*/
|
||||
prot &= ~KVM_NV_GUEST_MAP_SZ;
|
||||
ret = KVM_PGT_FN(kvm_pgtable_stage2_relax_perms)(pgt, fault_ipa, prot, flags);
|
||||
ret = KVM_PGT_FN(kvm_pgtable_stage2_relax_perms)(pgt, gfn_to_gpa(gfn),
|
||||
prot, flags);
|
||||
} else {
|
||||
ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, fault_ipa, vma_pagesize,
|
||||
__pfn_to_phys(pfn), prot,
|
||||
memcache, flags);
|
||||
ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, gfn_to_gpa(gfn), mapping_size,
|
||||
__pfn_to_phys(pfn), prot,
|
||||
memcache, flags);
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
kvm_release_faultin_page(kvm, page, !!ret, writable);
|
||||
kvm_release_faultin_page(kvm, s2vi->page, !!ret, writable);
|
||||
kvm_fault_unlock(kvm);
|
||||
|
||||
/* Mark the page dirty only if the fault is handled successfully */
|
||||
if (writable && !ret)
|
||||
mark_page_dirty_in_slot(kvm, memslot, gfn);
|
||||
/*
|
||||
* Mark the page dirty only if the fault is handled successfully,
|
||||
* making sure we adjust the canonical IPA if the mapping size has
|
||||
* been updated (via a THP upgrade, for example).
|
||||
*/
|
||||
if (writable && !ret) {
|
||||
phys_addr_t ipa = gfn_to_gpa(get_canonical_gfn(s2fd, s2vi));
|
||||
ipa &= ~(mapping_size - 1);
|
||||
mark_page_dirty_in_slot(kvm, s2fd->memslot, gpa_to_gfn(ipa));
|
||||
}
|
||||
|
||||
return ret != -EAGAIN ? ret : 0;
|
||||
if (ret != -EAGAIN)
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
out_put_page:
|
||||
kvm_release_page_unused(page);
|
||||
return ret;
|
||||
static int user_mem_abort(const struct kvm_s2_fault_desc *s2fd)
|
||||
{
|
||||
bool perm_fault = kvm_vcpu_trap_is_permission_fault(s2fd->vcpu);
|
||||
struct kvm_s2_fault_vma_info s2vi = {};
|
||||
enum kvm_pgtable_prot prot;
|
||||
void *memcache;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Permission faults just need to update the existing leaf entry,
|
||||
* and so normally don't require allocations from the memcache. The
|
||||
* only exception to this is when dirty logging is enabled at runtime
|
||||
* and a write fault needs to collapse a block entry into a table.
|
||||
*/
|
||||
memcache = get_mmu_memcache(s2fd->vcpu);
|
||||
if (!perm_fault || (memslot_is_logging(s2fd->memslot) &&
|
||||
kvm_is_write_fault(s2fd->vcpu))) {
|
||||
ret = topup_mmu_memcache(s2fd->vcpu, memcache);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Let's check if we will get back a huge page backed by hugetlbfs, or
|
||||
* get block mapping for device MMIO region.
|
||||
*/
|
||||
ret = kvm_s2_fault_pin_pfn(s2fd, &s2vi);
|
||||
if (ret != 1)
|
||||
return ret;
|
||||
|
||||
ret = kvm_s2_fault_compute_prot(s2fd, &s2vi, &prot);
|
||||
if (ret) {
|
||||
kvm_release_page_unused(s2vi.page);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return kvm_s2_fault_map(s2fd, &s2vi, prot, memcache);
|
||||
}
|
||||
|
||||
/* Resolve the access fault by making the page young again. */
|
||||
@@ -2202,15 +2355,27 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
VM_WARN_ON_ONCE(kvm_vcpu_trap_is_permission_fault(vcpu) &&
|
||||
!write_fault && !kvm_vcpu_trap_is_exec_fault(vcpu));
|
||||
const struct kvm_s2_fault_desc s2fd = {
|
||||
.vcpu = vcpu,
|
||||
.fault_ipa = fault_ipa,
|
||||
.nested = nested,
|
||||
.memslot = memslot,
|
||||
.hva = hva,
|
||||
};
|
||||
|
||||
if (kvm_vm_is_protected(vcpu->kvm)) {
|
||||
ret = pkvm_mem_abort(&s2fd);
|
||||
} else {
|
||||
VM_WARN_ON_ONCE(kvm_vcpu_trap_is_permission_fault(vcpu) &&
|
||||
!write_fault &&
|
||||
!kvm_vcpu_trap_is_exec_fault(vcpu));
|
||||
|
||||
if (kvm_slot_has_gmem(memslot))
|
||||
ret = gmem_abort(&s2fd);
|
||||
else
|
||||
ret = user_mem_abort(&s2fd);
|
||||
}
|
||||
|
||||
if (kvm_slot_has_gmem(memslot))
|
||||
ret = gmem_abort(vcpu, fault_ipa, nested, memslot,
|
||||
esr_fsc_is_permission_fault(esr));
|
||||
else
|
||||
ret = user_mem_abort(vcpu, fault_ipa, nested, memslot, hva,
|
||||
esr_fsc_is_permission_fault(esr));
|
||||
if (ret == 0)
|
||||
ret = 1;
|
||||
out:
|
||||
@@ -2223,7 +2388,7 @@ out_unlock:
|
||||
|
||||
bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||
{
|
||||
if (!kvm->arch.mmu.pgt)
|
||||
if (!kvm->arch.mmu.pgt || kvm_vm_is_protected(kvm))
|
||||
return false;
|
||||
|
||||
__unmap_stage2_range(&kvm->arch.mmu, range->start << PAGE_SHIFT,
|
||||
@@ -2238,7 +2403,7 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||
{
|
||||
u64 size = (range->end - range->start) << PAGE_SHIFT;
|
||||
|
||||
if (!kvm->arch.mmu.pgt)
|
||||
if (!kvm->arch.mmu.pgt || kvm_vm_is_protected(kvm))
|
||||
return false;
|
||||
|
||||
return KVM_PGT_FN(kvm_pgtable_stage2_test_clear_young)(kvm->arch.mmu.pgt,
|
||||
@@ -2254,7 +2419,7 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||
{
|
||||
u64 size = (range->end - range->start) << PAGE_SHIFT;
|
||||
|
||||
if (!kvm->arch.mmu.pgt)
|
||||
if (!kvm->arch.mmu.pgt || kvm_vm_is_protected(kvm))
|
||||
return false;
|
||||
|
||||
return KVM_PGT_FN(kvm_pgtable_stage2_test_clear_young)(kvm->arch.mmu.pgt,
|
||||
@@ -2411,6 +2576,19 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
hva_t hva, reg_end;
|
||||
int ret = 0;
|
||||
|
||||
if (kvm_vm_is_protected(kvm)) {
|
||||
/* Cannot modify memslots once a pVM has run. */
|
||||
if (pkvm_hyp_vm_is_created(kvm) &&
|
||||
(change == KVM_MR_DELETE || change == KVM_MR_MOVE)) {
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
if (new &&
|
||||
new->flags & (KVM_MEM_LOG_DIRTY_PAGES | KVM_MEM_READONLY)) {
|
||||
return -EPERM;
|
||||
}
|
||||
}
|
||||
|
||||
if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
|
||||
change != KVM_MR_FLAGS_ONLY)
|
||||
return 0;
|
||||
|
||||
@@ -735,8 +735,10 @@ static struct kvm_s2_mmu *get_s2_mmu_nested(struct kvm_vcpu *vcpu)
|
||||
kvm->arch.nested_mmus_next = (i + 1) % kvm->arch.nested_mmus_size;
|
||||
|
||||
/* Make sure we don't forget to do the laundry */
|
||||
if (kvm_s2_mmu_valid(s2_mmu))
|
||||
if (kvm_s2_mmu_valid(s2_mmu)) {
|
||||
kvm_nested_s2_ptdump_remove_debugfs(s2_mmu);
|
||||
s2_mmu->pending_unmap = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* The virtual VMID (modulo CnP) will be used as a key when matching
|
||||
@@ -750,6 +752,8 @@ static struct kvm_s2_mmu *get_s2_mmu_nested(struct kvm_vcpu *vcpu)
|
||||
s2_mmu->tlb_vtcr = vcpu_read_sys_reg(vcpu, VTCR_EL2);
|
||||
s2_mmu->nested_stage2_enabled = vcpu_read_sys_reg(vcpu, HCR_EL2) & HCR_VM;
|
||||
|
||||
kvm_nested_s2_ptdump_create_debugfs(s2_mmu);
|
||||
|
||||
out:
|
||||
atomic_inc(&s2_mmu->refcnt);
|
||||
|
||||
@@ -1558,6 +1562,11 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val)
|
||||
ID_AA64PFR1_EL1_MTE);
|
||||
break;
|
||||
|
||||
case SYS_ID_AA64PFR2_EL1:
|
||||
/* GICv5 is not yet supported for NV */
|
||||
val &= ~ID_AA64PFR2_EL1_GCIE;
|
||||
break;
|
||||
|
||||
case SYS_ID_AA64MMFR0_EL1:
|
||||
/* Hide ExS, Secure Memory */
|
||||
val &= ~(ID_AA64MMFR0_EL1_EXS |
|
||||
|
||||
@@ -88,7 +88,7 @@ void __init kvm_hyp_reserve(void)
|
||||
static void __pkvm_destroy_hyp_vm(struct kvm *kvm)
|
||||
{
|
||||
if (pkvm_hyp_vm_is_created(kvm)) {
|
||||
WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm,
|
||||
WARN_ON(kvm_call_hyp_nvhe(__pkvm_finalize_teardown_vm,
|
||||
kvm->arch.pkvm.handle));
|
||||
} else if (kvm->arch.pkvm.handle) {
|
||||
/*
|
||||
@@ -192,10 +192,16 @@ int pkvm_create_hyp_vm(struct kvm *kvm)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* Synchronise with kvm_arch_prepare_memory_region(), as we
|
||||
* prevent memslot modifications on a pVM that has been run.
|
||||
*/
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
mutex_lock(&kvm->arch.config_lock);
|
||||
if (!pkvm_hyp_vm_is_created(kvm))
|
||||
ret = __pkvm_create_hyp_vm(kvm);
|
||||
mutex_unlock(&kvm->arch.config_lock);
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -219,9 +225,10 @@ void pkvm_destroy_hyp_vm(struct kvm *kvm)
|
||||
mutex_unlock(&kvm->arch.config_lock);
|
||||
}
|
||||
|
||||
int pkvm_init_host_vm(struct kvm *kvm)
|
||||
int pkvm_init_host_vm(struct kvm *kvm, unsigned long type)
|
||||
{
|
||||
int ret;
|
||||
bool protected = type & KVM_VM_TYPE_ARM_PROTECTED;
|
||||
|
||||
if (pkvm_hyp_vm_is_created(kvm))
|
||||
return -EINVAL;
|
||||
@@ -236,6 +243,11 @@ int pkvm_init_host_vm(struct kvm *kvm)
|
||||
return ret;
|
||||
|
||||
kvm->arch.pkvm.handle = ret;
|
||||
kvm->arch.pkvm.is_protected = protected;
|
||||
if (protected) {
|
||||
pr_warn_once("kvm: protected VMs are experimental and for development only, tainting kernel\n");
|
||||
add_taint(TAINT_USER, LOCKDEP_STILL_OK);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -322,15 +334,38 @@ int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 start, u64 end)
|
||||
static int __pkvm_pgtable_stage2_reclaim(struct kvm_pgtable *pgt, u64 start, u64 end)
|
||||
{
|
||||
struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
|
||||
pkvm_handle_t handle = kvm->arch.pkvm.handle;
|
||||
struct pkvm_mapping *mapping;
|
||||
int ret;
|
||||
|
||||
if (!handle)
|
||||
return 0;
|
||||
for_each_mapping_in_range_safe(pgt, start, end, mapping) {
|
||||
struct page *page;
|
||||
|
||||
ret = kvm_call_hyp_nvhe(__pkvm_reclaim_dying_guest_page,
|
||||
handle, mapping->gfn);
|
||||
if (WARN_ON(ret))
|
||||
continue;
|
||||
|
||||
page = pfn_to_page(mapping->pfn);
|
||||
WARN_ON_ONCE(mapping->nr_pages != 1);
|
||||
unpin_user_pages_dirty_lock(&page, 1, true);
|
||||
account_locked_vm(current->mm, 1, false);
|
||||
pkvm_mapping_remove(mapping, &pgt->pkvm_mappings);
|
||||
kfree(mapping);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __pkvm_pgtable_stage2_unshare(struct kvm_pgtable *pgt, u64 start, u64 end)
|
||||
{
|
||||
struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
|
||||
pkvm_handle_t handle = kvm->arch.pkvm.handle;
|
||||
struct pkvm_mapping *mapping;
|
||||
int ret;
|
||||
|
||||
for_each_mapping_in_range_safe(pgt, start, end, mapping) {
|
||||
ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn,
|
||||
@@ -347,7 +382,21 @@ static int __pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 start, u64 e
|
||||
void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
|
||||
u64 addr, u64 size)
|
||||
{
|
||||
__pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
|
||||
struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
|
||||
pkvm_handle_t handle = kvm->arch.pkvm.handle;
|
||||
|
||||
if (!handle)
|
||||
return;
|
||||
|
||||
if (pkvm_hyp_vm_is_created(kvm) && !kvm->arch.pkvm.is_dying) {
|
||||
WARN_ON(kvm_call_hyp_nvhe(__pkvm_start_teardown_vm, handle));
|
||||
kvm->arch.pkvm.is_dying = true;
|
||||
}
|
||||
|
||||
if (kvm_vm_is_protected(kvm))
|
||||
__pkvm_pgtable_stage2_reclaim(pgt, addr, addr + size);
|
||||
else
|
||||
__pkvm_pgtable_stage2_unshare(pgt, addr, addr + size);
|
||||
}
|
||||
|
||||
void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt)
|
||||
@@ -365,31 +414,58 @@ int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
struct kvm_hyp_memcache *cache = mc;
|
||||
u64 gfn = addr >> PAGE_SHIFT;
|
||||
u64 pfn = phys >> PAGE_SHIFT;
|
||||
u64 end = addr + size;
|
||||
int ret;
|
||||
|
||||
if (size != PAGE_SIZE && size != PMD_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
mapping = pkvm_mapping_iter_first(&pgt->pkvm_mappings, addr, end - 1);
|
||||
|
||||
/*
|
||||
* Calling stage2_map() on top of existing mappings is either happening because of a race
|
||||
* with another vCPU, or because we're changing between page and block mappings. As per
|
||||
* user_mem_abort(), same-size permission faults are handled in the relax_perms() path.
|
||||
*/
|
||||
mapping = pkvm_mapping_iter_first(&pgt->pkvm_mappings, addr, addr + size - 1);
|
||||
if (mapping) {
|
||||
if (size == (mapping->nr_pages * PAGE_SIZE))
|
||||
return -EAGAIN;
|
||||
if (kvm_vm_is_protected(kvm)) {
|
||||
/* Protected VMs are mapped using RWX page-granular mappings */
|
||||
if (WARN_ON_ONCE(size != PAGE_SIZE))
|
||||
return -EINVAL;
|
||||
|
||||
/* Remove _any_ pkvm_mapping overlapping with the range, bigger or smaller. */
|
||||
ret = __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
|
||||
if (ret)
|
||||
return ret;
|
||||
mapping = NULL;
|
||||
if (WARN_ON_ONCE(prot != KVM_PGTABLE_PROT_RWX))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* We either raced with another vCPU or the guest PTE
|
||||
* has been poisoned by an erroneous host access.
|
||||
*/
|
||||
if (mapping) {
|
||||
ret = kvm_call_hyp_nvhe(__pkvm_vcpu_in_poison_fault);
|
||||
return ret ? -EFAULT : -EAGAIN;
|
||||
}
|
||||
|
||||
ret = kvm_call_hyp_nvhe(__pkvm_host_donate_guest, pfn, gfn);
|
||||
} else {
|
||||
if (WARN_ON_ONCE(size != PAGE_SIZE && size != PMD_SIZE))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* We either raced with another vCPU or we're changing between
|
||||
* page and block mappings. As per user_mem_abort(), same-size
|
||||
* permission faults are handled in the relax_perms() path.
|
||||
*/
|
||||
if (mapping) {
|
||||
if (size == (mapping->nr_pages * PAGE_SIZE))
|
||||
return -EAGAIN;
|
||||
|
||||
/*
|
||||
* Remove _any_ pkvm_mapping overlapping with the range,
|
||||
* bigger or smaller.
|
||||
*/
|
||||
ret = __pkvm_pgtable_stage2_unshare(pgt, addr, end);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
mapping = NULL;
|
||||
}
|
||||
|
||||
ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn,
|
||||
size / PAGE_SIZE, prot);
|
||||
}
|
||||
|
||||
ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, size / PAGE_SIZE, prot);
|
||||
if (WARN_ON(ret))
|
||||
return ret;
|
||||
|
||||
@@ -404,9 +480,14 @@ int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
|
||||
int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
|
||||
{
|
||||
lockdep_assert_held_write(&kvm_s2_mmu_to_kvm(pgt->mmu)->mmu_lock);
|
||||
struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
|
||||
|
||||
return __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
|
||||
if (WARN_ON(kvm_vm_is_protected(kvm)))
|
||||
return -EPERM;
|
||||
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
|
||||
return __pkvm_pgtable_stage2_unshare(pgt, addr, addr + size);
|
||||
}
|
||||
|
||||
int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
|
||||
@@ -416,6 +497,9 @@ int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
|
||||
struct pkvm_mapping *mapping;
|
||||
int ret = 0;
|
||||
|
||||
if (WARN_ON(kvm_vm_is_protected(kvm)))
|
||||
return -EPERM;
|
||||
|
||||
lockdep_assert_held(&kvm->mmu_lock);
|
||||
for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
|
||||
ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn,
|
||||
@@ -447,6 +531,9 @@ bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64
|
||||
struct pkvm_mapping *mapping;
|
||||
bool young = false;
|
||||
|
||||
if (WARN_ON(kvm_vm_is_protected(kvm)))
|
||||
return false;
|
||||
|
||||
lockdep_assert_held(&kvm->mmu_lock);
|
||||
for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
|
||||
young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn,
|
||||
@@ -458,12 +545,18 @@ bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64
|
||||
int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot,
|
||||
enum kvm_pgtable_walk_flags flags)
|
||||
{
|
||||
if (WARN_ON(kvm_vm_is_protected(kvm_s2_mmu_to_kvm(pgt->mmu))))
|
||||
return -EPERM;
|
||||
|
||||
return kvm_call_hyp_nvhe(__pkvm_host_relax_perms_guest, addr >> PAGE_SHIFT, prot);
|
||||
}
|
||||
|
||||
void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
|
||||
enum kvm_pgtable_walk_flags flags)
|
||||
{
|
||||
if (WARN_ON(kvm_vm_is_protected(kvm_s2_mmu_to_kvm(pgt->mmu))))
|
||||
return;
|
||||
|
||||
WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_mkyoung_guest, addr >> PAGE_SHIFT));
|
||||
}
|
||||
|
||||
@@ -485,3 +578,15 @@ int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
WARN_ON_ONCE(1);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Forcefully reclaim a page from the guest, zeroing its contents and
|
||||
* poisoning the stage-2 pte so that pages can no longer be mapped at
|
||||
* the same IPA. The page remains pinned until the guest is destroyed.
|
||||
*/
|
||||
bool pkvm_force_reclaim_guest_page(phys_addr_t phys)
|
||||
{
|
||||
int ret = kvm_call_hyp_nvhe(__pkvm_force_reclaim_guest_page, phys);
|
||||
|
||||
return !ret || ret == -EAGAIN;
|
||||
}
|
||||
|
||||
@@ -939,7 +939,8 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
|
||||
* number against the dimensions of the vgic and make sure
|
||||
* it's valid.
|
||||
*/
|
||||
if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq))
|
||||
if (!irq_is_ppi(vcpu->kvm, irq) &&
|
||||
!vgic_valid_spi(vcpu->kvm, irq))
|
||||
return -EINVAL;
|
||||
} else if (kvm_arm_pmu_irq_initialized(vcpu)) {
|
||||
return -EINVAL;
|
||||
@@ -961,8 +962,13 @@ static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
|
||||
if (!vgic_initialized(vcpu->kvm))
|
||||
return -ENODEV;
|
||||
|
||||
if (!kvm_arm_pmu_irq_initialized(vcpu))
|
||||
return -ENXIO;
|
||||
if (!kvm_arm_pmu_irq_initialized(vcpu)) {
|
||||
if (!vgic_is_v5(vcpu->kvm))
|
||||
return -ENXIO;
|
||||
|
||||
/* Use the architected irq number for GICv5. */
|
||||
vcpu->arch.pmu.irq_num = KVM_ARMV8_PMU_GICV5_IRQ;
|
||||
}
|
||||
|
||||
ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num,
|
||||
&vcpu->arch.pmu);
|
||||
@@ -987,11 +993,15 @@ static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
|
||||
unsigned long i;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
/* On GICv5, the PMUIRQ is architecturally mandated to be PPI 23 */
|
||||
if (vgic_is_v5(kvm) && irq != KVM_ARMV8_PMU_GICV5_IRQ)
|
||||
return false;
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
if (!kvm_arm_pmu_irq_initialized(vcpu))
|
||||
continue;
|
||||
|
||||
if (irq_is_ppi(irq)) {
|
||||
if (irq_is_ppi(vcpu->kvm, irq)) {
|
||||
if (vcpu->arch.pmu.irq_num != irq)
|
||||
return false;
|
||||
} else {
|
||||
@@ -1142,7 +1152,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
|
||||
return -EFAULT;
|
||||
|
||||
/* The PMU overflow interrupt can be a PPI or a valid SPI. */
|
||||
if (!(irq_is_ppi(irq) || irq_is_spi(irq)))
|
||||
if (!(irq_is_ppi(vcpu->kvm, irq) || irq_is_spi(vcpu->kvm, irq)))
|
||||
return -EINVAL;
|
||||
|
||||
if (!pmu_irq_is_valid(kvm, irq))
|
||||
|
||||
@@ -10,19 +10,20 @@
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/seq_file.h>
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
#include <asm/kvm_pgtable.h>
|
||||
#include <asm/ptdump.h>
|
||||
|
||||
#define MARKERS_LEN 2
|
||||
#define KVM_PGTABLE_MAX_LEVELS (KVM_PGTABLE_LAST_LEVEL + 1)
|
||||
#define S2FNAMESZ sizeof("0x0123456789abcdef-0x0123456789abcdef-s2-disabled")
|
||||
|
||||
struct kvm_ptdump_guest_state {
|
||||
struct kvm *kvm;
|
||||
struct kvm_s2_mmu *mmu;
|
||||
struct ptdump_pg_state parser_state;
|
||||
struct addr_marker ipa_marker[MARKERS_LEN];
|
||||
struct ptdump_pg_level level[KVM_PGTABLE_MAX_LEVELS];
|
||||
struct ptdump_range range[MARKERS_LEN];
|
||||
};
|
||||
|
||||
static const struct ptdump_prot_bits stage2_pte_bits[] = {
|
||||
@@ -112,10 +113,9 @@ static int kvm_ptdump_build_levels(struct ptdump_pg_level *level, u32 start_lvl)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct kvm_ptdump_guest_state *kvm_ptdump_parser_create(struct kvm *kvm)
|
||||
static struct kvm_ptdump_guest_state *kvm_ptdump_parser_create(struct kvm_s2_mmu *mmu)
|
||||
{
|
||||
struct kvm_ptdump_guest_state *st;
|
||||
struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
|
||||
struct kvm_pgtable *pgtable = mmu->pgt;
|
||||
int ret;
|
||||
|
||||
@@ -131,17 +131,8 @@ static struct kvm_ptdump_guest_state *kvm_ptdump_parser_create(struct kvm *kvm)
|
||||
|
||||
st->ipa_marker[0].name = "Guest IPA";
|
||||
st->ipa_marker[1].start_address = BIT(pgtable->ia_bits);
|
||||
st->range[0].end = BIT(pgtable->ia_bits);
|
||||
|
||||
st->kvm = kvm;
|
||||
st->parser_state = (struct ptdump_pg_state) {
|
||||
.marker = &st->ipa_marker[0],
|
||||
.level = -1,
|
||||
.pg_level = &st->level[0],
|
||||
.ptdump.range = &st->range[0],
|
||||
.start_address = 0,
|
||||
};
|
||||
|
||||
st->mmu = mmu;
|
||||
return st;
|
||||
}
|
||||
|
||||
@@ -149,16 +140,20 @@ static int kvm_ptdump_guest_show(struct seq_file *m, void *unused)
|
||||
{
|
||||
int ret;
|
||||
struct kvm_ptdump_guest_state *st = m->private;
|
||||
struct kvm *kvm = st->kvm;
|
||||
struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
|
||||
struct ptdump_pg_state *parser_state = &st->parser_state;
|
||||
struct kvm_s2_mmu *mmu = st->mmu;
|
||||
struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
|
||||
struct kvm_pgtable_walker walker = (struct kvm_pgtable_walker) {
|
||||
.cb = kvm_ptdump_visitor,
|
||||
.arg = parser_state,
|
||||
.arg = &st->parser_state,
|
||||
.flags = KVM_PGTABLE_WALK_LEAF,
|
||||
};
|
||||
|
||||
parser_state->seq = m;
|
||||
st->parser_state = (struct ptdump_pg_state) {
|
||||
.marker = &st->ipa_marker[0],
|
||||
.level = -1,
|
||||
.pg_level = &st->level[0],
|
||||
.seq = m,
|
||||
};
|
||||
|
||||
write_lock(&kvm->mmu_lock);
|
||||
ret = kvm_pgtable_walk(mmu->pgt, 0, BIT(mmu->pgt->ia_bits), &walker);
|
||||
@@ -169,14 +164,15 @@ static int kvm_ptdump_guest_show(struct seq_file *m, void *unused)
|
||||
|
||||
static int kvm_ptdump_guest_open(struct inode *m, struct file *file)
|
||||
{
|
||||
struct kvm *kvm = m->i_private;
|
||||
struct kvm_s2_mmu *mmu = m->i_private;
|
||||
struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
|
||||
struct kvm_ptdump_guest_state *st;
|
||||
int ret;
|
||||
|
||||
if (!kvm_get_kvm_safe(kvm))
|
||||
return -ENOENT;
|
||||
|
||||
st = kvm_ptdump_parser_create(kvm);
|
||||
st = kvm_ptdump_parser_create(mmu);
|
||||
if (IS_ERR(st)) {
|
||||
ret = PTR_ERR(st);
|
||||
goto err_with_kvm_ref;
|
||||
@@ -194,7 +190,7 @@ err_with_kvm_ref:
|
||||
|
||||
static int kvm_ptdump_guest_close(struct inode *m, struct file *file)
|
||||
{
|
||||
struct kvm *kvm = m->i_private;
|
||||
struct kvm *kvm = kvm_s2_mmu_to_kvm(m->i_private);
|
||||
void *st = ((struct seq_file *)file->private_data)->private;
|
||||
|
||||
kfree(st);
|
||||
@@ -229,14 +225,15 @@ static int kvm_pgtable_levels_show(struct seq_file *m, void *unused)
|
||||
static int kvm_pgtable_debugfs_open(struct inode *m, struct file *file,
|
||||
int (*show)(struct seq_file *, void *))
|
||||
{
|
||||
struct kvm *kvm = m->i_private;
|
||||
struct kvm_s2_mmu *mmu = m->i_private;
|
||||
struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
|
||||
struct kvm_pgtable *pgtable;
|
||||
int ret;
|
||||
|
||||
if (!kvm_get_kvm_safe(kvm))
|
||||
return -ENOENT;
|
||||
|
||||
pgtable = kvm->arch.mmu.pgt;
|
||||
pgtable = mmu->pgt;
|
||||
|
||||
ret = single_open(file, show, pgtable);
|
||||
if (ret < 0)
|
||||
@@ -256,7 +253,7 @@ static int kvm_pgtable_levels_open(struct inode *m, struct file *file)
|
||||
|
||||
static int kvm_pgtable_debugfs_close(struct inode *m, struct file *file)
|
||||
{
|
||||
struct kvm *kvm = m->i_private;
|
||||
struct kvm *kvm = kvm_s2_mmu_to_kvm(m->i_private);
|
||||
|
||||
kvm_put_kvm(kvm);
|
||||
return single_release(m, file);
|
||||
@@ -276,12 +273,36 @@ static const struct file_operations kvm_pgtable_levels_fops = {
|
||||
.release = kvm_pgtable_debugfs_close,
|
||||
};
|
||||
|
||||
void kvm_nested_s2_ptdump_create_debugfs(struct kvm_s2_mmu *mmu)
|
||||
{
|
||||
struct dentry *dent;
|
||||
char file_name[S2FNAMESZ];
|
||||
|
||||
snprintf(file_name, sizeof(file_name), "0x%016llx-0x%016llx-s2-%sabled",
|
||||
mmu->tlb_vttbr,
|
||||
mmu->tlb_vtcr,
|
||||
mmu->nested_stage2_enabled ? "en" : "dis");
|
||||
|
||||
dent = debugfs_create_file(file_name, 0400,
|
||||
mmu->arch->debugfs_nv_dentry, mmu,
|
||||
&kvm_ptdump_guest_fops);
|
||||
|
||||
mmu->shadow_pt_debugfs_dentry = dent;
|
||||
}
|
||||
|
||||
void kvm_nested_s2_ptdump_remove_debugfs(struct kvm_s2_mmu *mmu)
|
||||
{
|
||||
debugfs_remove(mmu->shadow_pt_debugfs_dentry);
|
||||
}
|
||||
|
||||
void kvm_s2_ptdump_create_debugfs(struct kvm *kvm)
|
||||
{
|
||||
debugfs_create_file("stage2_page_tables", 0400, kvm->debugfs_dentry,
|
||||
kvm, &kvm_ptdump_guest_fops);
|
||||
debugfs_create_file("ipa_range", 0400, kvm->debugfs_dentry, kvm,
|
||||
&kvm_pgtable_range_fops);
|
||||
&kvm->arch.mmu, &kvm_ptdump_guest_fops);
|
||||
debugfs_create_file("ipa_range", 0400, kvm->debugfs_dentry,
|
||||
&kvm->arch.mmu, &kvm_pgtable_range_fops);
|
||||
debugfs_create_file("stage2_levels", 0400, kvm->debugfs_dentry,
|
||||
kvm, &kvm_pgtable_levels_fops);
|
||||
&kvm->arch.mmu, &kvm_pgtable_levels_fops);
|
||||
if (cpus_have_final_cap(ARM64_HAS_NESTED_VIRT))
|
||||
kvm->arch.debugfs_nv_dentry = debugfs_create_dir("nested", kvm->debugfs_dentry);
|
||||
}
|
||||
|
||||
@@ -197,7 +197,7 @@ static void hyp_dump_backtrace(unsigned long hyp_offset)
|
||||
kvm_nvhe_dump_backtrace_end();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROTECTED_NVHE_STACKTRACE
|
||||
#ifdef CONFIG_PKVM_STACKTRACE
|
||||
DECLARE_KVM_NVHE_PER_CPU(unsigned long [NVHE_STACKTRACE_SIZE/sizeof(long)],
|
||||
pkvm_stacktrace);
|
||||
|
||||
@@ -225,12 +225,12 @@ static void pkvm_dump_backtrace(unsigned long hyp_offset)
|
||||
kvm_nvhe_dump_backtrace_entry((void *)hyp_offset, stacktrace[i]);
|
||||
kvm_nvhe_dump_backtrace_end();
|
||||
}
|
||||
#else /* !CONFIG_PROTECTED_NVHE_STACKTRACE */
|
||||
#else /* !CONFIG_PKVM_STACKTRACE */
|
||||
static void pkvm_dump_backtrace(unsigned long hyp_offset)
|
||||
{
|
||||
kvm_err("Cannot dump pKVM nVHE stacktrace: !CONFIG_PROTECTED_NVHE_STACKTRACE\n");
|
||||
kvm_err("Cannot dump pKVM nVHE stacktrace: !CONFIG_PKVM_STACKTRACE\n");
|
||||
}
|
||||
#endif /* CONFIG_PROTECTED_NVHE_STACKTRACE */
|
||||
#endif /* CONFIG_PKVM_STACKTRACE */
|
||||
|
||||
/*
|
||||
* kvm_nvhe_dump_backtrace - Dump KVM nVHE hypervisor backtrace.
|
||||
|
||||
@@ -681,6 +681,91 @@ static bool access_gic_dir(struct kvm_vcpu *vcpu,
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool access_gicv5_idr0(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
if (p->is_write)
|
||||
return undef_access(vcpu, p, r);
|
||||
|
||||
/*
|
||||
* Expose KVM's priority- and ID-bits to the guest, but not GCIE_LEGACY.
|
||||
*
|
||||
* Note: for GICv5 the mimic the way that the num_pri_bits and
|
||||
* num_id_bits fields are used with GICv3:
|
||||
* - num_pri_bits stores the actual number of priority bits, whereas the
|
||||
* register field stores num_pri_bits - 1.
|
||||
* - num_id_bits stores the raw field value, which is 0b0000 for 16 bits
|
||||
* and 0b0001 for 24 bits.
|
||||
*/
|
||||
p->regval = FIELD_PREP(ICC_IDR0_EL1_PRI_BITS, vcpu->arch.vgic_cpu.num_pri_bits - 1) |
|
||||
FIELD_PREP(ICC_IDR0_EL1_ID_BITS, vcpu->arch.vgic_cpu.num_id_bits);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool access_gicv5_iaffid(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
if (p->is_write)
|
||||
return undef_access(vcpu, p, r);
|
||||
|
||||
/*
|
||||
* For GICv5 VMs, the IAFFID value is the same as the VPE ID. The VPE ID
|
||||
* is the same as the VCPU's ID.
|
||||
*/
|
||||
p->regval = FIELD_PREP(ICC_IAFFIDR_EL1_IAFFID, vcpu->vcpu_id);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool access_gicv5_ppi_enabler(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
unsigned long *mask = vcpu->kvm->arch.vgic.gicv5_vm.vgic_ppi_mask;
|
||||
struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
|
||||
int i;
|
||||
|
||||
/* We never expect to get here with a read! */
|
||||
if (WARN_ON_ONCE(!p->is_write))
|
||||
return undef_access(vcpu, p, r);
|
||||
|
||||
/*
|
||||
* If we're only handling architected PPIs and the guest writes to the
|
||||
* enable for the non-architected PPIs, we just return as there's
|
||||
* nothing to do at all. We don't even allocate the storage for them in
|
||||
* this case.
|
||||
*/
|
||||
if (VGIC_V5_NR_PRIVATE_IRQS == 64 && p->Op2 % 2)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Merge the raw guest write into out bitmap at an offset of either 0 or
|
||||
* 64, then and it with our PPI mask.
|
||||
*/
|
||||
bitmap_write(cpu_if->vgic_ppi_enabler, p->regval, 64 * (p->Op2 % 2), 64);
|
||||
bitmap_and(cpu_if->vgic_ppi_enabler, cpu_if->vgic_ppi_enabler, mask,
|
||||
VGIC_V5_NR_PRIVATE_IRQS);
|
||||
|
||||
/*
|
||||
* Sync the change in enable states to the vgic_irqs. We consider all
|
||||
* PPIs as we don't expose many to the guest.
|
||||
*/
|
||||
for_each_set_bit(i, mask, VGIC_V5_NR_PRIVATE_IRQS) {
|
||||
u32 intid = vgic_v5_make_ppi(i);
|
||||
struct vgic_irq *irq;
|
||||
|
||||
irq = vgic_get_vcpu_irq(vcpu, intid);
|
||||
|
||||
scoped_guard(raw_spinlock_irqsave, &irq->irq_lock)
|
||||
irq->enabled = test_bit(i, cpu_if->vgic_ppi_enabler);
|
||||
|
||||
vgic_put_irq(vcpu->kvm, irq);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool trap_raz_wi(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
@@ -1758,6 +1843,7 @@ static u8 pmuver_to_perfmon(u8 pmuver)
|
||||
|
||||
static u64 sanitise_id_aa64pfr0_el1(const struct kvm_vcpu *vcpu, u64 val);
|
||||
static u64 sanitise_id_aa64pfr1_el1(const struct kvm_vcpu *vcpu, u64 val);
|
||||
static u64 sanitise_id_aa64pfr2_el1(const struct kvm_vcpu *vcpu, u64 val);
|
||||
static u64 sanitise_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val);
|
||||
|
||||
/* Read a sanitised cpufeature ID register by sys_reg_desc */
|
||||
@@ -1783,10 +1869,7 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
|
||||
val = sanitise_id_aa64pfr1_el1(vcpu, val);
|
||||
break;
|
||||
case SYS_ID_AA64PFR2_EL1:
|
||||
val &= ID_AA64PFR2_EL1_FPMR |
|
||||
(kvm_has_mte(vcpu->kvm) ?
|
||||
ID_AA64PFR2_EL1_MTEFAR | ID_AA64PFR2_EL1_MTESTOREONLY :
|
||||
0);
|
||||
val = sanitise_id_aa64pfr2_el1(vcpu, val);
|
||||
break;
|
||||
case SYS_ID_AA64ISAR1_EL1:
|
||||
if (!vcpu_has_ptrauth(vcpu))
|
||||
@@ -1985,7 +2068,7 @@ static u64 sanitise_id_aa64pfr0_el1(const struct kvm_vcpu *vcpu, u64 val)
|
||||
val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, CSV3, IMP);
|
||||
}
|
||||
|
||||
if (vgic_is_v3(vcpu->kvm)) {
|
||||
if (vgic_host_has_gicv3()) {
|
||||
val &= ~ID_AA64PFR0_EL1_GIC_MASK;
|
||||
val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, GIC, IMP);
|
||||
}
|
||||
@@ -2027,6 +2110,23 @@ static u64 sanitise_id_aa64pfr1_el1(const struct kvm_vcpu *vcpu, u64 val)
|
||||
return val;
|
||||
}
|
||||
|
||||
static u64 sanitise_id_aa64pfr2_el1(const struct kvm_vcpu *vcpu, u64 val)
|
||||
{
|
||||
val &= ID_AA64PFR2_EL1_FPMR |
|
||||
ID_AA64PFR2_EL1_MTEFAR |
|
||||
ID_AA64PFR2_EL1_MTESTOREONLY;
|
||||
|
||||
if (!kvm_has_mte(vcpu->kvm)) {
|
||||
val &= ~ID_AA64PFR2_EL1_MTEFAR;
|
||||
val &= ~ID_AA64PFR2_EL1_MTESTOREONLY;
|
||||
}
|
||||
|
||||
if (vgic_host_has_gicv5())
|
||||
val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR2_EL1, GCIE, IMP);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static u64 sanitise_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val)
|
||||
{
|
||||
val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64DFR0_EL1, DebugVer, V8P8);
|
||||
@@ -2177,14 +2277,6 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
|
||||
(vcpu_has_nv(vcpu) && !FIELD_GET(ID_AA64PFR0_EL1_EL2, user_val)))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* If we are running on a GICv5 host and support FEAT_GCIE_LEGACY, then
|
||||
* we support GICv3. Fail attempts to do anything but set that to IMP.
|
||||
*/
|
||||
if (vgic_is_v3_compat(vcpu->kvm) &&
|
||||
FIELD_GET(ID_AA64PFR0_EL1_GIC_MASK, user_val) != ID_AA64PFR0_EL1_GIC_IMP)
|
||||
return -EINVAL;
|
||||
|
||||
return set_id_reg(vcpu, rd, user_val);
|
||||
}
|
||||
|
||||
@@ -2224,6 +2316,12 @@ static int set_id_aa64pfr1_el1(struct kvm_vcpu *vcpu,
|
||||
return set_id_reg(vcpu, rd, user_val);
|
||||
}
|
||||
|
||||
static int set_id_aa64pfr2_el1(struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_desc *rd, u64 user_val)
|
||||
{
|
||||
return set_id_reg(vcpu, rd, user_val);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allow userspace to de-feature a stage-2 translation granule but prevent it
|
||||
* from claiming the impossible.
|
||||
@@ -3205,10 +3303,11 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
||||
ID_AA64PFR1_EL1_RES0 |
|
||||
ID_AA64PFR1_EL1_MPAM_frac |
|
||||
ID_AA64PFR1_EL1_MTE)),
|
||||
ID_WRITABLE(ID_AA64PFR2_EL1,
|
||||
ID_AA64PFR2_EL1_FPMR |
|
||||
ID_AA64PFR2_EL1_MTEFAR |
|
||||
ID_AA64PFR2_EL1_MTESTOREONLY),
|
||||
ID_FILTERED(ID_AA64PFR2_EL1, id_aa64pfr2_el1,
|
||||
(ID_AA64PFR2_EL1_FPMR |
|
||||
ID_AA64PFR2_EL1_MTEFAR |
|
||||
ID_AA64PFR2_EL1_MTESTOREONLY |
|
||||
ID_AA64PFR2_EL1_GCIE)),
|
||||
ID_UNALLOCATED(4,3),
|
||||
ID_WRITABLE(ID_AA64ZFR0_EL1, ~ID_AA64ZFR0_EL1_RES0),
|
||||
ID_HIDDEN(ID_AA64SMFR0_EL1),
|
||||
@@ -3391,6 +3490,10 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
||||
{ SYS_DESC(SYS_ICC_AP1R1_EL1), undef_access },
|
||||
{ SYS_DESC(SYS_ICC_AP1R2_EL1), undef_access },
|
||||
{ SYS_DESC(SYS_ICC_AP1R3_EL1), undef_access },
|
||||
{ SYS_DESC(SYS_ICC_IDR0_EL1), access_gicv5_idr0 },
|
||||
{ SYS_DESC(SYS_ICC_IAFFIDR_EL1), access_gicv5_iaffid },
|
||||
{ SYS_DESC(SYS_ICC_PPI_ENABLER0_EL1), access_gicv5_ppi_enabler },
|
||||
{ SYS_DESC(SYS_ICC_PPI_ENABLER1_EL1), access_gicv5_ppi_enabler },
|
||||
{ SYS_DESC(SYS_ICC_DIR_EL1), access_gic_dir },
|
||||
{ SYS_DESC(SYS_ICC_RPR_EL1), undef_access },
|
||||
{ SYS_DESC(SYS_ICC_SGI1R_EL1), access_gic_sgi },
|
||||
@@ -5647,6 +5750,8 @@ void kvm_calculate_traps(struct kvm_vcpu *vcpu)
|
||||
compute_fgu(kvm, HFGRTR2_GROUP);
|
||||
compute_fgu(kvm, HFGITR2_GROUP);
|
||||
compute_fgu(kvm, HDFGRTR2_GROUP);
|
||||
compute_fgu(kvm, ICH_HFGRTR_GROUP);
|
||||
compute_fgu(kvm, ICH_HFGITR_GROUP);
|
||||
|
||||
set_bit(KVM_ARCH_FLAG_FGU_INITIALIZED, &kvm->arch.flags);
|
||||
out:
|
||||
@@ -5667,25 +5772,60 @@ int kvm_finalize_sys_regs(struct kvm_vcpu *vcpu)
|
||||
|
||||
guard(mutex)(&kvm->arch.config_lock);
|
||||
|
||||
/*
|
||||
* This hacks into the ID registers, so only perform it when the
|
||||
* first vcpu runs, or the kvm_set_vm_id_reg() helper will scream.
|
||||
*/
|
||||
if (!irqchip_in_kernel(kvm) && !kvm_vm_has_ran_once(kvm)) {
|
||||
u64 val;
|
||||
|
||||
val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC;
|
||||
kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, val);
|
||||
val = kvm_read_vm_id_reg(kvm, SYS_ID_PFR1_EL1) & ~ID_PFR1_EL1_GIC;
|
||||
kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, val);
|
||||
}
|
||||
|
||||
if (vcpu_has_nv(vcpu)) {
|
||||
int ret = kvm_init_nv_sysregs(vcpu);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (kvm_vm_has_ran_once(kvm))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* This hacks into the ID registers, so only perform it when the
|
||||
* first vcpu runs, or the kvm_set_vm_id_reg() helper will scream.
|
||||
*/
|
||||
if (!irqchip_in_kernel(kvm)) {
|
||||
u64 val;
|
||||
|
||||
val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC;
|
||||
kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, val);
|
||||
val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR2_EL1) & ~ID_AA64PFR2_EL1_GCIE;
|
||||
kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR2_EL1, val);
|
||||
val = kvm_read_vm_id_reg(kvm, SYS_ID_PFR1_EL1) & ~ID_PFR1_EL1_GIC;
|
||||
kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, val);
|
||||
} else {
|
||||
/*
|
||||
* Certain userspace software - QEMU - samples the system
|
||||
* register state without creating an irqchip, then blindly
|
||||
* restores the state prior to running the final guest. This
|
||||
* means that it restores the virtualization & emulation
|
||||
* capabilities of the host system, rather than something that
|
||||
* reflects the final guest state. Moreover, it checks that the
|
||||
* state was "correctly" restored (i.e., verbatim), bailing if
|
||||
* it isn't, so masking off invalid state isn't an option.
|
||||
*
|
||||
* On GICv5 hardware that supports FEAT_GCIE_LEGACY we can run
|
||||
* both GICv3- and GICv5-based guests. Therefore, we initially
|
||||
* present both ID_AA64PFR0.GIC and ID_AA64PFR2.GCIE as IMP to
|
||||
* reflect that userspace can create EITHER a vGICv3 or a
|
||||
* vGICv5. This is an architecturally invalid combination, of
|
||||
* course. Once an in-kernel GIC is created, the sysreg state is
|
||||
* updated to reflect the actual, valid configuration.
|
||||
*
|
||||
* Setting both the GIC and GCIE features to IMP unsurprisingly
|
||||
* results in guests falling over, and hence we need to fix up
|
||||
* this mess in KVM. Before running for the first time we yet
|
||||
* again ensure that the GIC and GCIE fields accurately reflect
|
||||
* the actual hardware the guest should see.
|
||||
*
|
||||
* This hack allows legacy QEMU-based GICv3 guests to run
|
||||
* unmodified on compatible GICv5 hosts, and avoids the inverse
|
||||
* problem for GICv5-based guests in the future.
|
||||
*/
|
||||
kvm_vgic_finalize_idregs(kvm);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -66,12 +66,11 @@ static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type);
|
||||
* or through the generic KVM_CREATE_DEVICE API ioctl.
|
||||
* irqchip_in_kernel() tells you if this function succeeded or not.
|
||||
* @kvm: kvm struct pointer
|
||||
* @type: KVM_DEV_TYPE_ARM_VGIC_V[23]
|
||||
* @type: KVM_DEV_TYPE_ARM_VGIC_V[235]
|
||||
*/
|
||||
int kvm_vgic_create(struct kvm *kvm, u32 type)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
u64 aa64pfr0, pfr1;
|
||||
unsigned long i;
|
||||
int ret;
|
||||
|
||||
@@ -132,8 +131,11 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
|
||||
|
||||
if (type == KVM_DEV_TYPE_ARM_VGIC_V2)
|
||||
kvm->max_vcpus = VGIC_V2_MAX_CPUS;
|
||||
else
|
||||
else if (type == KVM_DEV_TYPE_ARM_VGIC_V3)
|
||||
kvm->max_vcpus = VGIC_V3_MAX_CPUS;
|
||||
else if (type == KVM_DEV_TYPE_ARM_VGIC_V5)
|
||||
kvm->max_vcpus = min(VGIC_V5_MAX_CPUS,
|
||||
kvm_vgic_global_state.max_gic_vcpus);
|
||||
|
||||
if (atomic_read(&kvm->online_vcpus) > kvm->max_vcpus) {
|
||||
ret = -E2BIG;
|
||||
@@ -145,19 +147,20 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
|
||||
kvm->arch.vgic.implementation_rev = KVM_VGIC_IMP_REV_LATEST;
|
||||
kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
|
||||
|
||||
aa64pfr0 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC;
|
||||
pfr1 = kvm_read_vm_id_reg(kvm, SYS_ID_PFR1_EL1) & ~ID_PFR1_EL1_GIC;
|
||||
|
||||
if (type == KVM_DEV_TYPE_ARM_VGIC_V2) {
|
||||
switch (type) {
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V2:
|
||||
kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
|
||||
} else {
|
||||
break;
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V3:
|
||||
INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions);
|
||||
aa64pfr0 |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, GIC, IMP);
|
||||
pfr1 |= SYS_FIELD_PREP_ENUM(ID_PFR1_EL1, GIC, GICv3);
|
||||
break;
|
||||
}
|
||||
|
||||
kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, aa64pfr0);
|
||||
kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, pfr1);
|
||||
/*
|
||||
* We've now created the GIC. Update the system register state
|
||||
* to accurately reflect what we've created.
|
||||
*/
|
||||
kvm_vgic_finalize_idregs(kvm);
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
ret = vgic_allocate_private_irqs_locked(vcpu, type);
|
||||
@@ -179,6 +182,15 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
|
||||
if (type == KVM_DEV_TYPE_ARM_VGIC_V3)
|
||||
kvm->arch.vgic.nassgicap = system_supports_direct_sgis();
|
||||
|
||||
/*
|
||||
* We now know that we have a GICv5. The Arch Timer PPI interrupts may
|
||||
* have been initialised at this stage, but will have done so assuming
|
||||
* that we have an older GIC, meaning that the IntIDs won't be
|
||||
* correct. We init them again, and this time they will be correct.
|
||||
*/
|
||||
if (type == KVM_DEV_TYPE_ARM_VGIC_V5)
|
||||
kvm_timer_init_vm(kvm);
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&kvm->arch.config_lock);
|
||||
kvm_unlock_all_vcpus(kvm);
|
||||
@@ -259,9 +271,65 @@ int kvm_vgic_vcpu_nv_init(struct kvm_vcpu *vcpu)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void vgic_allocate_private_irq(struct kvm_vcpu *vcpu, int i, u32 type)
|
||||
{
|
||||
struct vgic_irq *irq = &vcpu->arch.vgic_cpu.private_irqs[i];
|
||||
|
||||
INIT_LIST_HEAD(&irq->ap_list);
|
||||
raw_spin_lock_init(&irq->irq_lock);
|
||||
irq->vcpu = NULL;
|
||||
irq->target_vcpu = vcpu;
|
||||
refcount_set(&irq->refcount, 0);
|
||||
|
||||
irq->intid = i;
|
||||
if (vgic_irq_is_sgi(i)) {
|
||||
/* SGIs */
|
||||
irq->enabled = 1;
|
||||
irq->config = VGIC_CONFIG_EDGE;
|
||||
} else {
|
||||
/* PPIs */
|
||||
irq->config = VGIC_CONFIG_LEVEL;
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V3:
|
||||
irq->group = 1;
|
||||
irq->mpidr = kvm_vcpu_get_mpidr_aff(vcpu);
|
||||
break;
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V2:
|
||||
irq->group = 0;
|
||||
irq->targets = BIT(vcpu->vcpu_id);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void vgic_v5_allocate_private_irq(struct kvm_vcpu *vcpu, int i, u32 type)
|
||||
{
|
||||
struct vgic_irq *irq = &vcpu->arch.vgic_cpu.private_irqs[i];
|
||||
u32 intid = vgic_v5_make_ppi(i);
|
||||
|
||||
INIT_LIST_HEAD(&irq->ap_list);
|
||||
raw_spin_lock_init(&irq->irq_lock);
|
||||
irq->vcpu = NULL;
|
||||
irq->target_vcpu = vcpu;
|
||||
refcount_set(&irq->refcount, 0);
|
||||
|
||||
irq->intid = intid;
|
||||
|
||||
/* The only Edge architected PPI is the SW_PPI */
|
||||
if (i == GICV5_ARCH_PPI_SW_PPI)
|
||||
irq->config = VGIC_CONFIG_EDGE;
|
||||
else
|
||||
irq->config = VGIC_CONFIG_LEVEL;
|
||||
|
||||
/* Register the GICv5-specific PPI ops */
|
||||
vgic_v5_set_ppi_ops(vcpu, intid);
|
||||
}
|
||||
|
||||
static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type)
|
||||
{
|
||||
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
|
||||
u32 num_private_irqs;
|
||||
int i;
|
||||
|
||||
lockdep_assert_held(&vcpu->kvm->arch.config_lock);
|
||||
@@ -269,8 +337,13 @@ static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type)
|
||||
if (vgic_cpu->private_irqs)
|
||||
return 0;
|
||||
|
||||
if (vgic_is_v5(vcpu->kvm))
|
||||
num_private_irqs = VGIC_V5_NR_PRIVATE_IRQS;
|
||||
else
|
||||
num_private_irqs = VGIC_NR_PRIVATE_IRQS;
|
||||
|
||||
vgic_cpu->private_irqs = kzalloc_objs(struct vgic_irq,
|
||||
VGIC_NR_PRIVATE_IRQS,
|
||||
num_private_irqs,
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
|
||||
if (!vgic_cpu->private_irqs)
|
||||
@@ -280,34 +353,11 @@ static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type)
|
||||
* Enable and configure all SGIs to be edge-triggered and
|
||||
* configure all PPIs as level-triggered.
|
||||
*/
|
||||
for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
|
||||
struct vgic_irq *irq = &vgic_cpu->private_irqs[i];
|
||||
|
||||
INIT_LIST_HEAD(&irq->ap_list);
|
||||
raw_spin_lock_init(&irq->irq_lock);
|
||||
irq->intid = i;
|
||||
irq->vcpu = NULL;
|
||||
irq->target_vcpu = vcpu;
|
||||
refcount_set(&irq->refcount, 0);
|
||||
if (vgic_irq_is_sgi(i)) {
|
||||
/* SGIs */
|
||||
irq->enabled = 1;
|
||||
irq->config = VGIC_CONFIG_EDGE;
|
||||
} else {
|
||||
/* PPIs */
|
||||
irq->config = VGIC_CONFIG_LEVEL;
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V3:
|
||||
irq->group = 1;
|
||||
irq->mpidr = kvm_vcpu_get_mpidr_aff(vcpu);
|
||||
break;
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V2:
|
||||
irq->group = 0;
|
||||
irq->targets = BIT(vcpu->vcpu_id);
|
||||
break;
|
||||
}
|
||||
for (i = 0; i < num_private_irqs; i++) {
|
||||
if (vgic_is_v5(vcpu->kvm))
|
||||
vgic_v5_allocate_private_irq(vcpu, i, type);
|
||||
else
|
||||
vgic_allocate_private_irq(vcpu, i, type);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -366,7 +416,11 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
|
||||
static void kvm_vgic_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (kvm_vgic_global_state.type == VGIC_V2)
|
||||
const struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
|
||||
|
||||
if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V5)
|
||||
vgic_v5_reset(vcpu);
|
||||
else if (kvm_vgic_global_state.type == VGIC_V2)
|
||||
vgic_v2_reset(vcpu);
|
||||
else
|
||||
vgic_v3_reset(vcpu);
|
||||
@@ -397,22 +451,28 @@ int vgic_init(struct kvm *kvm)
|
||||
if (kvm->created_vcpus != atomic_read(&kvm->online_vcpus))
|
||||
return -EBUSY;
|
||||
|
||||
/* freeze the number of spis */
|
||||
if (!dist->nr_spis)
|
||||
dist->nr_spis = VGIC_NR_IRQS_LEGACY - VGIC_NR_PRIVATE_IRQS;
|
||||
if (!vgic_is_v5(kvm)) {
|
||||
/* freeze the number of spis */
|
||||
if (!dist->nr_spis)
|
||||
dist->nr_spis = VGIC_NR_IRQS_LEGACY - VGIC_NR_PRIVATE_IRQS;
|
||||
|
||||
ret = kvm_vgic_dist_init(kvm, dist->nr_spis);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Ensure vPEs are allocated if direct IRQ injection (e.g. vSGIs,
|
||||
* vLPIs) is supported.
|
||||
*/
|
||||
if (vgic_supports_direct_irqs(kvm)) {
|
||||
ret = vgic_v4_init(kvm);
|
||||
ret = kvm_vgic_dist_init(kvm, dist->nr_spis);
|
||||
if (ret)
|
||||
goto out;
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Ensure vPEs are allocated if direct IRQ injection (e.g. vSGIs,
|
||||
* vLPIs) is supported.
|
||||
*/
|
||||
if (vgic_supports_direct_irqs(kvm)) {
|
||||
ret = vgic_v4_init(kvm);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
} else {
|
||||
ret = vgic_v5_init(kvm);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
kvm_for_each_vcpu(idx, vcpu, kvm)
|
||||
@@ -420,12 +480,12 @@ int vgic_init(struct kvm *kvm)
|
||||
|
||||
ret = kvm_vgic_setup_default_irq_routing(kvm);
|
||||
if (ret)
|
||||
goto out;
|
||||
return ret;
|
||||
|
||||
vgic_debug_init(kvm);
|
||||
dist->initialized = true;
|
||||
out:
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kvm_vgic_dist_destroy(struct kvm *kvm)
|
||||
@@ -569,6 +629,7 @@ int vgic_lazy_init(struct kvm *kvm)
|
||||
int kvm_vgic_map_resources(struct kvm *kvm)
|
||||
{
|
||||
struct vgic_dist *dist = &kvm->arch.vgic;
|
||||
bool needs_dist = true;
|
||||
enum vgic_type type;
|
||||
gpa_t dist_base;
|
||||
int ret = 0;
|
||||
@@ -587,21 +648,29 @@ int kvm_vgic_map_resources(struct kvm *kvm)
|
||||
if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) {
|
||||
ret = vgic_v2_map_resources(kvm);
|
||||
type = VGIC_V2;
|
||||
} else {
|
||||
} else if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
|
||||
ret = vgic_v3_map_resources(kvm);
|
||||
type = VGIC_V3;
|
||||
} else {
|
||||
ret = vgic_v5_map_resources(kvm);
|
||||
type = VGIC_V5;
|
||||
needs_dist = false;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
dist_base = dist->vgic_dist_base;
|
||||
mutex_unlock(&kvm->arch.config_lock);
|
||||
if (needs_dist) {
|
||||
dist_base = dist->vgic_dist_base;
|
||||
mutex_unlock(&kvm->arch.config_lock);
|
||||
|
||||
ret = vgic_register_dist_iodev(kvm, dist_base, type);
|
||||
if (ret) {
|
||||
kvm_err("Unable to register VGIC dist MMIO regions\n");
|
||||
goto out_slots;
|
||||
ret = vgic_register_dist_iodev(kvm, dist_base, type);
|
||||
if (ret) {
|
||||
kvm_err("Unable to register VGIC dist MMIO regions\n");
|
||||
goto out_slots;
|
||||
}
|
||||
} else {
|
||||
mutex_unlock(&kvm->arch.config_lock);
|
||||
}
|
||||
|
||||
smp_store_release(&dist->ready, true);
|
||||
@@ -617,6 +686,35 @@ out_slots:
|
||||
return ret;
|
||||
}
|
||||
|
||||
void kvm_vgic_finalize_idregs(struct kvm *kvm)
|
||||
{
|
||||
u32 type = kvm->arch.vgic.vgic_model;
|
||||
u64 aa64pfr0, aa64pfr2, pfr1;
|
||||
|
||||
aa64pfr0 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC;
|
||||
aa64pfr2 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR2_EL1) & ~ID_AA64PFR2_EL1_GCIE;
|
||||
pfr1 = kvm_read_vm_id_reg(kvm, SYS_ID_PFR1_EL1) & ~ID_PFR1_EL1_GIC;
|
||||
|
||||
switch (type) {
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V2:
|
||||
break;
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V3:
|
||||
aa64pfr0 |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, GIC, IMP);
|
||||
if (kvm_supports_32bit_el0())
|
||||
pfr1 |= SYS_FIELD_PREP_ENUM(ID_PFR1_EL1, GIC, GICv3);
|
||||
break;
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V5:
|
||||
aa64pfr2 |= SYS_FIELD_PREP_ENUM(ID_AA64PFR2_EL1, GCIE, IMP);
|
||||
break;
|
||||
default:
|
||||
WARN_ONCE(1, "Unknown VGIC type!!!\n");
|
||||
}
|
||||
|
||||
kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, aa64pfr0);
|
||||
kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR2_EL1, aa64pfr2);
|
||||
kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, pfr1);
|
||||
}
|
||||
|
||||
/* GENERIC PROBE */
|
||||
|
||||
void kvm_vgic_cpu_up(void)
|
||||
|
||||
@@ -336,6 +336,10 @@ int kvm_register_vgic_device(unsigned long type)
|
||||
break;
|
||||
ret = kvm_vgic_register_its_device();
|
||||
break;
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V5:
|
||||
ret = kvm_register_device_ops(&kvm_arm_vgic_v5_ops,
|
||||
KVM_DEV_TYPE_ARM_VGIC_V5);
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
@@ -639,7 +643,7 @@ static int vgic_v3_set_attr(struct kvm_device *dev,
|
||||
if (vgic_initialized(dev->kvm))
|
||||
return -EBUSY;
|
||||
|
||||
if (!irq_is_ppi(val))
|
||||
if (!irq_is_ppi(dev->kvm, val))
|
||||
return -EINVAL;
|
||||
|
||||
dev->kvm->arch.vgic.mi_intid = val;
|
||||
@@ -715,3 +719,104 @@ struct kvm_device_ops kvm_arm_vgic_v3_ops = {
|
||||
.get_attr = vgic_v3_get_attr,
|
||||
.has_attr = vgic_v3_has_attr,
|
||||
};
|
||||
|
||||
static int vgic_v5_get_userspace_ppis(struct kvm_device *dev,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
struct vgic_v5_vm *gicv5_vm = &dev->kvm->arch.vgic.gicv5_vm;
|
||||
u64 __user *uaddr = (u64 __user *)(long)attr->addr;
|
||||
int ret;
|
||||
|
||||
guard(mutex)(&dev->kvm->arch.config_lock);
|
||||
|
||||
/*
|
||||
* We either support 64 or 128 PPIs. In the former case, we need to
|
||||
* return 0s for the second 64 bits as we have no storage backing those.
|
||||
*/
|
||||
ret = put_user(bitmap_read(gicv5_vm->userspace_ppis, 0, 64), uaddr);
|
||||
if (ret)
|
||||
return ret;
|
||||
uaddr++;
|
||||
|
||||
if (VGIC_V5_NR_PRIVATE_IRQS == 128)
|
||||
ret = put_user(bitmap_read(gicv5_vm->userspace_ppis, 64, 128), uaddr);
|
||||
else
|
||||
ret = put_user(0, uaddr);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int vgic_v5_set_attr(struct kvm_device *dev,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
switch (attr->group) {
|
||||
case KVM_DEV_ARM_VGIC_GRP_ADDR:
|
||||
case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
|
||||
case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
|
||||
return -ENXIO;
|
||||
case KVM_DEV_ARM_VGIC_GRP_CTRL:
|
||||
switch (attr->attr) {
|
||||
case KVM_DEV_ARM_VGIC_CTRL_INIT:
|
||||
return vgic_set_common_attr(dev, attr);
|
||||
case KVM_DEV_ARM_VGIC_USERSPACE_PPIS:
|
||||
default:
|
||||
return -ENXIO;
|
||||
}
|
||||
default:
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static int vgic_v5_get_attr(struct kvm_device *dev,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
switch (attr->group) {
|
||||
case KVM_DEV_ARM_VGIC_GRP_ADDR:
|
||||
case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
|
||||
case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
|
||||
return -ENXIO;
|
||||
case KVM_DEV_ARM_VGIC_GRP_CTRL:
|
||||
switch (attr->attr) {
|
||||
case KVM_DEV_ARM_VGIC_CTRL_INIT:
|
||||
return vgic_get_common_attr(dev, attr);
|
||||
case KVM_DEV_ARM_VGIC_USERSPACE_PPIS:
|
||||
return vgic_v5_get_userspace_ppis(dev, attr);
|
||||
default:
|
||||
return -ENXIO;
|
||||
}
|
||||
default:
|
||||
return -ENXIO;
|
||||
}
|
||||
}
|
||||
|
||||
static int vgic_v5_has_attr(struct kvm_device *dev,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
switch (attr->group) {
|
||||
case KVM_DEV_ARM_VGIC_GRP_ADDR:
|
||||
case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
|
||||
case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
|
||||
return -ENXIO;
|
||||
case KVM_DEV_ARM_VGIC_GRP_CTRL:
|
||||
switch (attr->attr) {
|
||||
case KVM_DEV_ARM_VGIC_CTRL_INIT:
|
||||
return 0;
|
||||
case KVM_DEV_ARM_VGIC_USERSPACE_PPIS:
|
||||
return 0;
|
||||
default:
|
||||
return -ENXIO;
|
||||
}
|
||||
default:
|
||||
return -ENXIO;
|
||||
}
|
||||
}
|
||||
|
||||
struct kvm_device_ops kvm_arm_vgic_v5_ops = {
|
||||
.name = "kvm-arm-vgic-v5",
|
||||
.create = vgic_create,
|
||||
.destroy = vgic_destroy,
|
||||
.set_attr = vgic_v5_set_attr,
|
||||
.get_attr = vgic_v5_get_attr,
|
||||
.has_attr = vgic_v5_has_attr,
|
||||
};
|
||||
|
||||
@@ -842,18 +842,46 @@ vgic_find_mmio_region(const struct vgic_register_region *regions,
|
||||
|
||||
void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
|
||||
{
|
||||
if (kvm_vgic_global_state.type == VGIC_V2)
|
||||
vgic_v2_set_vmcr(vcpu, vmcr);
|
||||
else
|
||||
const struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
|
||||
|
||||
switch (dist->vgic_model) {
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V5:
|
||||
vgic_v5_set_vmcr(vcpu, vmcr);
|
||||
break;
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V3:
|
||||
vgic_v3_set_vmcr(vcpu, vmcr);
|
||||
break;
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V2:
|
||||
if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
|
||||
vgic_v3_set_vmcr(vcpu, vmcr);
|
||||
else
|
||||
vgic_v2_set_vmcr(vcpu, vmcr);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
|
||||
{
|
||||
if (kvm_vgic_global_state.type == VGIC_V2)
|
||||
vgic_v2_get_vmcr(vcpu, vmcr);
|
||||
else
|
||||
const struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
|
||||
|
||||
switch (dist->vgic_model) {
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V5:
|
||||
vgic_v5_get_vmcr(vcpu, vmcr);
|
||||
break;
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V3:
|
||||
vgic_v3_get_vmcr(vcpu, vmcr);
|
||||
break;
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V2:
|
||||
if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
|
||||
vgic_v3_get_vmcr(vcpu, vmcr);
|
||||
else
|
||||
vgic_v2_get_vmcr(vcpu, vmcr);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -499,7 +499,7 @@ void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3;
|
||||
|
||||
if (!vgic_is_v3(vcpu->kvm))
|
||||
if (!vgic_host_has_gicv3())
|
||||
return;
|
||||
|
||||
/* Hide GICv3 sysreg if necessary */
|
||||
|
||||
@@ -1,28 +1,52 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2025, 2026 Arm Ltd.
|
||||
*/
|
||||
|
||||
#include <kvm/arm_vgic.h>
|
||||
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/irqchip/arm-vgic-info.h>
|
||||
|
||||
#include "vgic.h"
|
||||
|
||||
static struct vgic_v5_ppi_caps ppi_caps;
|
||||
|
||||
/*
|
||||
* Not all PPIs are guaranteed to be implemented for GICv5. Deterermine which
|
||||
* ones are, and generate a mask.
|
||||
*/
|
||||
static void vgic_v5_get_implemented_ppis(void)
|
||||
{
|
||||
if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF))
|
||||
return;
|
||||
|
||||
/*
|
||||
* If we have KVM, we have EL2, which means that we have support for the
|
||||
* EL1 and EL2 Physical & Virtual timers.
|
||||
*/
|
||||
__assign_bit(GICV5_ARCH_PPI_CNTHP, ppi_caps.impl_ppi_mask, 1);
|
||||
__assign_bit(GICV5_ARCH_PPI_CNTV, ppi_caps.impl_ppi_mask, 1);
|
||||
__assign_bit(GICV5_ARCH_PPI_CNTHV, ppi_caps.impl_ppi_mask, 1);
|
||||
__assign_bit(GICV5_ARCH_PPI_CNTP, ppi_caps.impl_ppi_mask, 1);
|
||||
|
||||
/* The SW_PPI should be available */
|
||||
__assign_bit(GICV5_ARCH_PPI_SW_PPI, ppi_caps.impl_ppi_mask, 1);
|
||||
|
||||
/* The PMUIRQ is available if we have the PMU */
|
||||
__assign_bit(GICV5_ARCH_PPI_PMUIRQ, ppi_caps.impl_ppi_mask, system_supports_pmuv3());
|
||||
}
|
||||
|
||||
/*
|
||||
* Probe for a vGICv5 compatible interrupt controller, returning 0 on success.
|
||||
* Currently only supports GICv3-based VMs on a GICv5 host, and hence only
|
||||
* registers a VGIC_V3 device.
|
||||
*/
|
||||
int vgic_v5_probe(const struct gic_kvm_info *info)
|
||||
{
|
||||
bool v5_registered = false;
|
||||
u64 ich_vtr_el2;
|
||||
int ret;
|
||||
|
||||
if (!cpus_have_final_cap(ARM64_HAS_GICV5_LEGACY))
|
||||
return -ENODEV;
|
||||
|
||||
kvm_vgic_global_state.type = VGIC_V5;
|
||||
kvm_vgic_global_state.has_gcie_v3_compat = true;
|
||||
|
||||
/* We only support v3 compat mode - use vGICv3 limits */
|
||||
kvm_vgic_global_state.max_gic_vcpus = VGIC_V3_MAX_CPUS;
|
||||
|
||||
kvm_vgic_global_state.vcpu_base = 0;
|
||||
kvm_vgic_global_state.vctrl_base = NULL;
|
||||
@@ -30,6 +54,38 @@ int vgic_v5_probe(const struct gic_kvm_info *info)
|
||||
kvm_vgic_global_state.has_gicv4 = false;
|
||||
kvm_vgic_global_state.has_gicv4_1 = false;
|
||||
|
||||
/*
|
||||
* GICv5 is currently not supported in Protected mode. Skip the
|
||||
* registration of GICv5 completely to make sure no guests can create a
|
||||
* GICv5-based guest.
|
||||
*/
|
||||
if (is_protected_kvm_enabled()) {
|
||||
kvm_info("GICv5-based guests are not supported with pKVM\n");
|
||||
goto skip_v5;
|
||||
}
|
||||
|
||||
kvm_vgic_global_state.max_gic_vcpus = VGIC_V5_MAX_CPUS;
|
||||
|
||||
vgic_v5_get_implemented_ppis();
|
||||
|
||||
ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V5);
|
||||
if (ret) {
|
||||
kvm_err("Cannot register GICv5 KVM device.\n");
|
||||
goto skip_v5;
|
||||
}
|
||||
|
||||
v5_registered = true;
|
||||
kvm_info("GCIE system register CPU interface\n");
|
||||
|
||||
skip_v5:
|
||||
/* If we don't support the GICv3 compat mode we're done. */
|
||||
if (!cpus_have_final_cap(ARM64_HAS_GICV5_LEGACY)) {
|
||||
if (!v5_registered)
|
||||
return -ENODEV;
|
||||
return 0;
|
||||
}
|
||||
|
||||
kvm_vgic_global_state.has_gcie_v3_compat = true;
|
||||
ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_gic_config);
|
||||
kvm_vgic_global_state.ich_vtr_el2 = (u32)ich_vtr_el2;
|
||||
|
||||
@@ -45,6 +101,10 @@ int vgic_v5_probe(const struct gic_kvm_info *info)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* We potentially limit the max VCPUs further than we need to here */
|
||||
kvm_vgic_global_state.max_gic_vcpus = min(VGIC_V3_MAX_CPUS,
|
||||
VGIC_V5_MAX_CPUS);
|
||||
|
||||
static_branch_enable(&kvm_vgic_global_state.gicv3_cpuif);
|
||||
kvm_info("GCIE legacy system register CPU interface\n");
|
||||
|
||||
@@ -52,3 +112,424 @@ int vgic_v5_probe(const struct gic_kvm_info *info)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void vgic_v5_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* We always present 16-bits of ID space to the guest, irrespective of
|
||||
* the host allowing more.
|
||||
*/
|
||||
vcpu->arch.vgic_cpu.num_id_bits = ICC_IDR0_EL1_ID_BITS_16BITS;
|
||||
|
||||
/*
|
||||
* The GICv5 architeture only supports 5-bits of priority in the
|
||||
* CPUIF (but potentially fewer in the IRS).
|
||||
*/
|
||||
vcpu->arch.vgic_cpu.num_pri_bits = 5;
|
||||
}
|
||||
|
||||
int vgic_v5_init(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
unsigned long idx;
|
||||
|
||||
if (vgic_initialized(kvm))
|
||||
return 0;
|
||||
|
||||
kvm_for_each_vcpu(idx, vcpu, kvm) {
|
||||
if (vcpu_has_nv(vcpu)) {
|
||||
kvm_err("Nested GICv5 VMs are currently unsupported\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
/* We only allow userspace to drive the SW_PPI, if it is implemented. */
|
||||
bitmap_zero(kvm->arch.vgic.gicv5_vm.userspace_ppis,
|
||||
VGIC_V5_NR_PRIVATE_IRQS);
|
||||
__assign_bit(GICV5_ARCH_PPI_SW_PPI,
|
||||
kvm->arch.vgic.gicv5_vm.userspace_ppis,
|
||||
VGIC_V5_NR_PRIVATE_IRQS);
|
||||
bitmap_and(kvm->arch.vgic.gicv5_vm.userspace_ppis,
|
||||
kvm->arch.vgic.gicv5_vm.userspace_ppis,
|
||||
ppi_caps.impl_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vgic_v5_map_resources(struct kvm *kvm)
|
||||
{
|
||||
if (!vgic_initialized(kvm))
|
||||
return -EBUSY;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vgic_v5_finalize_ppi_state(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_vcpu *vcpu0;
|
||||
int i;
|
||||
|
||||
if (!vgic_is_v5(kvm))
|
||||
return 0;
|
||||
|
||||
guard(mutex)(&kvm->arch.config_lock);
|
||||
|
||||
/*
|
||||
* If SW_PPI has been advertised, then we know we already
|
||||
* initialised the whole thing, and we can return early. Yes,
|
||||
* this is pretty hackish as far as state tracking goes...
|
||||
*/
|
||||
if (test_bit(GICV5_ARCH_PPI_SW_PPI, kvm->arch.vgic.gicv5_vm.vgic_ppi_mask))
|
||||
return 0;
|
||||
|
||||
/* The PPI state for all VCPUs should be the same. Pick the first. */
|
||||
vcpu0 = kvm_get_vcpu(kvm, 0);
|
||||
|
||||
bitmap_zero(kvm->arch.vgic.gicv5_vm.vgic_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS);
|
||||
bitmap_zero(kvm->arch.vgic.gicv5_vm.vgic_ppi_hmr, VGIC_V5_NR_PRIVATE_IRQS);
|
||||
|
||||
for_each_set_bit(i, ppi_caps.impl_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS) {
|
||||
const u32 intid = vgic_v5_make_ppi(i);
|
||||
struct vgic_irq *irq;
|
||||
|
||||
irq = vgic_get_vcpu_irq(vcpu0, intid);
|
||||
|
||||
/* Expose PPIs with an owner or the SW_PPI, only */
|
||||
scoped_guard(raw_spinlock_irqsave, &irq->irq_lock) {
|
||||
if (irq->owner || i == GICV5_ARCH_PPI_SW_PPI) {
|
||||
__assign_bit(i, kvm->arch.vgic.gicv5_vm.vgic_ppi_mask, 1);
|
||||
__assign_bit(i, kvm->arch.vgic.gicv5_vm.vgic_ppi_hmr,
|
||||
irq->config == VGIC_CONFIG_LEVEL);
|
||||
}
|
||||
}
|
||||
|
||||
vgic_put_irq(vcpu0->kvm, irq);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u32 vgic_v5_get_effective_priority_mask(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
|
||||
u32 highest_ap, priority_mask, apr;
|
||||
|
||||
/*
|
||||
* If the guest's CPU has not opted to receive interrupts, then the
|
||||
* effective running priority is the highest priority. Just return 0
|
||||
* (the highest priority).
|
||||
*/
|
||||
if (!FIELD_GET(FEAT_GCIE_ICH_VMCR_EL2_EN, cpu_if->vgic_vmcr))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Counting the number of trailing zeros gives the current active
|
||||
* priority. Explicitly use the 32-bit version here as we have 32
|
||||
* priorities. 32 then means that there are no active priorities.
|
||||
*/
|
||||
apr = cpu_if->vgic_apr;
|
||||
highest_ap = apr ? __builtin_ctz(apr) : 32;
|
||||
|
||||
/*
|
||||
* An interrupt is of sufficient priority if it is equal to or
|
||||
* greater than the priority mask. Add 1 to the priority mask
|
||||
* (i.e., lower priority) to match the APR logic before taking
|
||||
* the min. This gives us the lowest priority that is masked.
|
||||
*/
|
||||
priority_mask = FIELD_GET(FEAT_GCIE_ICH_VMCR_EL2_VPMR, cpu_if->vgic_vmcr);
|
||||
|
||||
return min(highest_ap, priority_mask + 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* For GICv5, the PPIs are mostly directly managed by the hardware. We (the
|
||||
* hypervisor) handle the pending, active, enable state save/restore, but don't
|
||||
* need the PPIs to be queued on a per-VCPU AP list. Therefore, sanity check the
|
||||
* state, unlock, and return.
|
||||
*/
|
||||
bool vgic_v5_ppi_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
|
||||
unsigned long flags)
|
||||
__releases(&irq->irq_lock)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
lockdep_assert_held(&irq->irq_lock);
|
||||
|
||||
if (WARN_ON_ONCE(!__irq_is_ppi(KVM_DEV_TYPE_ARM_VGIC_V5, irq->intid)))
|
||||
goto out_unlock_fail;
|
||||
|
||||
vcpu = irq->target_vcpu;
|
||||
if (WARN_ON_ONCE(!vcpu))
|
||||
goto out_unlock_fail;
|
||||
|
||||
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
|
||||
|
||||
/* Directly kick the target VCPU to make sure it sees the IRQ */
|
||||
kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
|
||||
kvm_vcpu_kick(vcpu);
|
||||
|
||||
return true;
|
||||
|
||||
out_unlock_fail:
|
||||
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sets/clears the corresponding bit in the ICH_PPI_DVIR register.
|
||||
*/
|
||||
void vgic_v5_set_ppi_dvi(struct kvm_vcpu *vcpu, struct vgic_irq *irq, bool dvi)
|
||||
{
|
||||
struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
|
||||
u32 ppi;
|
||||
|
||||
lockdep_assert_held(&irq->irq_lock);
|
||||
|
||||
ppi = vgic_v5_get_hwirq_id(irq->intid);
|
||||
__assign_bit(ppi, cpu_if->vgic_ppi_dvir, dvi);
|
||||
}
|
||||
|
||||
static struct irq_ops vgic_v5_ppi_irq_ops = {
|
||||
.queue_irq_unlock = vgic_v5_ppi_queue_irq_unlock,
|
||||
.set_direct_injection = vgic_v5_set_ppi_dvi,
|
||||
};
|
||||
|
||||
void vgic_v5_set_ppi_ops(struct kvm_vcpu *vcpu, u32 vintid)
|
||||
{
|
||||
kvm_vgic_set_irq_ops(vcpu, vintid, &vgic_v5_ppi_irq_ops);
|
||||
}
|
||||
|
||||
/*
|
||||
* Sync back the PPI priorities to the vgic_irq shadow state for any interrupts
|
||||
* exposed to the guest (skipping all others).
|
||||
*/
|
||||
static void vgic_v5_sync_ppi_priorities(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
|
||||
u64 priorityr;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* We have up to 16 PPI Priority regs, but only have a few interrupts
|
||||
* that the guest is allowed to use. Limit our sync of PPI priorities to
|
||||
* those actually exposed to the guest by first iterating over the mask
|
||||
* of exposed PPIs.
|
||||
*/
|
||||
for_each_set_bit(i, vcpu->kvm->arch.vgic.gicv5_vm.vgic_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS) {
|
||||
u32 intid = vgic_v5_make_ppi(i);
|
||||
struct vgic_irq *irq;
|
||||
int pri_idx, pri_reg, pri_bit;
|
||||
u8 priority;
|
||||
|
||||
/*
|
||||
* Determine which priority register and the field within it to
|
||||
* extract.
|
||||
*/
|
||||
pri_reg = i / 8;
|
||||
pri_idx = i % 8;
|
||||
pri_bit = pri_idx * 8;
|
||||
|
||||
priorityr = cpu_if->vgic_ppi_priorityr[pri_reg];
|
||||
priority = field_get(GENMASK(pri_bit + 4, pri_bit), priorityr);
|
||||
|
||||
irq = vgic_get_vcpu_irq(vcpu, intid);
|
||||
|
||||
scoped_guard(raw_spinlock_irqsave, &irq->irq_lock)
|
||||
irq->priority = priority;
|
||||
|
||||
vgic_put_irq(vcpu->kvm, irq);
|
||||
}
|
||||
}
|
||||
|
||||
bool vgic_v5_has_pending_ppi(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned int priority_mask;
|
||||
int i;
|
||||
|
||||
priority_mask = vgic_v5_get_effective_priority_mask(vcpu);
|
||||
|
||||
/*
|
||||
* If the combined priority mask is 0, nothing can be signalled! In the
|
||||
* case where the guest has disabled interrupt delivery for the vcpu
|
||||
* (via ICV_CR0_EL1.EN->ICH_VMCR_EL2.EN), we calculate the priority mask
|
||||
* as 0 too (the highest possible priority).
|
||||
*/
|
||||
if (!priority_mask)
|
||||
return false;
|
||||
|
||||
for_each_set_bit(i, vcpu->kvm->arch.vgic.gicv5_vm.vgic_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS) {
|
||||
u32 intid = vgic_v5_make_ppi(i);
|
||||
bool has_pending = false;
|
||||
struct vgic_irq *irq;
|
||||
|
||||
irq = vgic_get_vcpu_irq(vcpu, intid);
|
||||
|
||||
scoped_guard(raw_spinlock_irqsave, &irq->irq_lock)
|
||||
if (irq->enabled && irq->priority < priority_mask)
|
||||
has_pending = irq->hw ? vgic_get_phys_line_level(irq) : irq_is_pending(irq);
|
||||
|
||||
vgic_put_irq(vcpu->kvm, irq);
|
||||
|
||||
if (has_pending)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Detect any PPIs state changes, and propagate the state with KVM's
|
||||
* shadow structures.
|
||||
*/
|
||||
void vgic_v5_fold_ppi_state(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
|
||||
unsigned long *activer, *pendr;
|
||||
int i;
|
||||
|
||||
activer = host_data_ptr(vgic_v5_ppi_state)->activer_exit;
|
||||
pendr = host_data_ptr(vgic_v5_ppi_state)->pendr;
|
||||
|
||||
for_each_set_bit(i, vcpu->kvm->arch.vgic.gicv5_vm.vgic_ppi_mask,
|
||||
VGIC_V5_NR_PRIVATE_IRQS) {
|
||||
u32 intid = vgic_v5_make_ppi(i);
|
||||
struct vgic_irq *irq;
|
||||
|
||||
irq = vgic_get_vcpu_irq(vcpu, intid);
|
||||
|
||||
scoped_guard(raw_spinlock_irqsave, &irq->irq_lock) {
|
||||
irq->active = test_bit(i, activer);
|
||||
|
||||
/* This is an OR to avoid losing incoming edges! */
|
||||
if (irq->config == VGIC_CONFIG_EDGE)
|
||||
irq->pending_latch |= test_bit(i, pendr);
|
||||
}
|
||||
|
||||
vgic_put_irq(vcpu->kvm, irq);
|
||||
}
|
||||
|
||||
/*
|
||||
* Re-inject the exit state as entry state next time!
|
||||
*
|
||||
* Note that the write of the Enable state is trapped, and hence there
|
||||
* is nothing to explcitly sync back here as we already have the latest
|
||||
* copy by definition.
|
||||
*/
|
||||
bitmap_copy(cpu_if->vgic_ppi_activer, activer, VGIC_V5_NR_PRIVATE_IRQS);
|
||||
}
|
||||
|
||||
void vgic_v5_flush_ppi_state(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
DECLARE_BITMAP(pendr, VGIC_V5_NR_PRIVATE_IRQS);
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Time to enter the guest - we first need to build the guest's
|
||||
* ICC_PPI_PENDRx_EL1, however.
|
||||
*/
|
||||
bitmap_zero(pendr, VGIC_V5_NR_PRIVATE_IRQS);
|
||||
for_each_set_bit(i, vcpu->kvm->arch.vgic.gicv5_vm.vgic_ppi_mask,
|
||||
VGIC_V5_NR_PRIVATE_IRQS) {
|
||||
u32 intid = vgic_v5_make_ppi(i);
|
||||
struct vgic_irq *irq;
|
||||
|
||||
irq = vgic_get_vcpu_irq(vcpu, intid);
|
||||
|
||||
scoped_guard(raw_spinlock_irqsave, &irq->irq_lock) {
|
||||
__assign_bit(i, pendr, irq_is_pending(irq));
|
||||
if (irq->config == VGIC_CONFIG_EDGE)
|
||||
irq->pending_latch = false;
|
||||
}
|
||||
|
||||
vgic_put_irq(vcpu->kvm, irq);
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy the shadow state to the pending reg that will be written to the
|
||||
* ICH_PPI_PENDRx_EL2 regs. While the guest is running we track any
|
||||
* incoming changes to the pending state in the vgic_irq structures. The
|
||||
* incoming changes are merged with the outgoing changes on the return
|
||||
* path.
|
||||
*/
|
||||
bitmap_copy(host_data_ptr(vgic_v5_ppi_state)->pendr, pendr,
|
||||
VGIC_V5_NR_PRIVATE_IRQS);
|
||||
}
|
||||
|
||||
void vgic_v5_load(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
|
||||
|
||||
/*
|
||||
* On the WFI path, vgic_load is called a second time. The first is when
|
||||
* scheduling in the vcpu thread again, and the second is when leaving
|
||||
* WFI. Skip the second instance as it serves no purpose and just
|
||||
* restores the same state again.
|
||||
*/
|
||||
if (cpu_if->gicv5_vpe.resident)
|
||||
return;
|
||||
|
||||
kvm_call_hyp(__vgic_v5_restore_vmcr_apr, cpu_if);
|
||||
|
||||
cpu_if->gicv5_vpe.resident = true;
|
||||
}
|
||||
|
||||
void vgic_v5_put(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
|
||||
|
||||
/*
|
||||
* Do nothing if we're not resident. This can happen in the WFI path
|
||||
* where we do a vgic_put in the WFI path and again later when
|
||||
* descheduling the thread. We risk losing VMCR state if we sync it
|
||||
* twice, so instead return early in this case.
|
||||
*/
|
||||
if (!cpu_if->gicv5_vpe.resident)
|
||||
return;
|
||||
|
||||
kvm_call_hyp(__vgic_v5_save_apr, cpu_if);
|
||||
|
||||
cpu_if->gicv5_vpe.resident = false;
|
||||
|
||||
/* The shadow priority is only updated on entering WFI */
|
||||
if (vcpu_get_flag(vcpu, IN_WFI))
|
||||
vgic_v5_sync_ppi_priorities(vcpu);
|
||||
}
|
||||
|
||||
void vgic_v5_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
|
||||
{
|
||||
struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
|
||||
u64 vmcr = cpu_if->vgic_vmcr;
|
||||
|
||||
vmcrp->en = FIELD_GET(FEAT_GCIE_ICH_VMCR_EL2_EN, vmcr);
|
||||
vmcrp->pmr = FIELD_GET(FEAT_GCIE_ICH_VMCR_EL2_VPMR, vmcr);
|
||||
}
|
||||
|
||||
void vgic_v5_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
|
||||
{
|
||||
struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
|
||||
u64 vmcr;
|
||||
|
||||
vmcr = FIELD_PREP(FEAT_GCIE_ICH_VMCR_EL2_VPMR, vmcrp->pmr) |
|
||||
FIELD_PREP(FEAT_GCIE_ICH_VMCR_EL2_EN, vmcrp->en);
|
||||
|
||||
cpu_if->vgic_vmcr = vmcr;
|
||||
}
|
||||
|
||||
void vgic_v5_restore_state(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
|
||||
|
||||
__vgic_v5_restore_state(cpu_if);
|
||||
__vgic_v5_restore_ppi_state(cpu_if);
|
||||
dsb(sy);
|
||||
}
|
||||
|
||||
void vgic_v5_save_state(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
|
||||
|
||||
__vgic_v5_save_state(cpu_if);
|
||||
__vgic_v5_save_ppi_state(cpu_if);
|
||||
dsb(sy);
|
||||
}
|
||||
|
||||
@@ -86,6 +86,10 @@ static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid)
|
||||
*/
|
||||
struct vgic_irq *vgic_get_irq(struct kvm *kvm, u32 intid)
|
||||
{
|
||||
/* Non-private IRQs are not yet implemented for GICv5 */
|
||||
if (vgic_is_v5(kvm))
|
||||
return NULL;
|
||||
|
||||
/* SPIs */
|
||||
if (intid >= VGIC_NR_PRIVATE_IRQS &&
|
||||
intid < (kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) {
|
||||
@@ -94,7 +98,7 @@ struct vgic_irq *vgic_get_irq(struct kvm *kvm, u32 intid)
|
||||
}
|
||||
|
||||
/* LPIs */
|
||||
if (intid >= VGIC_MIN_LPI)
|
||||
if (irq_is_lpi(kvm, intid))
|
||||
return vgic_get_lpi(kvm, intid);
|
||||
|
||||
return NULL;
|
||||
@@ -105,6 +109,18 @@ struct vgic_irq *vgic_get_vcpu_irq(struct kvm_vcpu *vcpu, u32 intid)
|
||||
if (WARN_ON(!vcpu))
|
||||
return NULL;
|
||||
|
||||
if (vgic_is_v5(vcpu->kvm)) {
|
||||
u32 int_num, hwirq_id;
|
||||
|
||||
if (!__irq_is_ppi(KVM_DEV_TYPE_ARM_VGIC_V5, intid))
|
||||
return NULL;
|
||||
|
||||
hwirq_id = FIELD_GET(GICV5_HWIRQ_ID, intid);
|
||||
int_num = array_index_nospec(hwirq_id, VGIC_V5_NR_PRIVATE_IRQS);
|
||||
|
||||
return &vcpu->arch.vgic_cpu.private_irqs[int_num];
|
||||
}
|
||||
|
||||
/* SGIs and PPIs */
|
||||
if (intid < VGIC_NR_PRIVATE_IRQS) {
|
||||
intid = array_index_nospec(intid, VGIC_NR_PRIVATE_IRQS);
|
||||
@@ -123,7 +139,7 @@ static void vgic_release_lpi_locked(struct vgic_dist *dist, struct vgic_irq *irq
|
||||
|
||||
static __must_check bool __vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
|
||||
{
|
||||
if (irq->intid < VGIC_MIN_LPI)
|
||||
if (!irq_is_lpi(kvm, irq->intid))
|
||||
return false;
|
||||
|
||||
return refcount_dec_and_test(&irq->refcount);
|
||||
@@ -148,7 +164,7 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
|
||||
* Acquire/release it early on lockdep kernels to make locking issues
|
||||
* in rare release paths a bit more obvious.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_LOCKDEP) && irq->intid >= VGIC_MIN_LPI) {
|
||||
if (IS_ENABLED(CONFIG_LOCKDEP) && irq_is_lpi(kvm, irq->intid)) {
|
||||
guard(spinlock_irqsave)(&dist->lpi_xa.xa_lock);
|
||||
}
|
||||
|
||||
@@ -186,7 +202,7 @@ void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu)
|
||||
raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
|
||||
|
||||
list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
|
||||
if (irq->intid >= VGIC_MIN_LPI) {
|
||||
if (irq_is_lpi(vcpu->kvm, irq->intid)) {
|
||||
raw_spin_lock(&irq->irq_lock);
|
||||
list_del(&irq->ap_list);
|
||||
irq->vcpu = NULL;
|
||||
@@ -404,6 +420,9 @@ bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
|
||||
|
||||
lockdep_assert_held(&irq->irq_lock);
|
||||
|
||||
if (irq->ops && irq->ops->queue_irq_unlock)
|
||||
return irq->ops->queue_irq_unlock(kvm, irq, flags);
|
||||
|
||||
retry:
|
||||
vcpu = vgic_target_oracle(irq);
|
||||
if (irq->vcpu || !vcpu) {
|
||||
@@ -521,12 +540,12 @@ int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!vcpu && intid < VGIC_NR_PRIVATE_IRQS)
|
||||
if (!vcpu && irq_is_private(kvm, intid))
|
||||
return -EINVAL;
|
||||
|
||||
trace_vgic_update_irq_pending(vcpu ? vcpu->vcpu_idx : 0, intid, level);
|
||||
|
||||
if (intid < VGIC_NR_PRIVATE_IRQS)
|
||||
if (irq_is_private(kvm, intid))
|
||||
irq = vgic_get_vcpu_irq(vcpu, intid);
|
||||
else
|
||||
irq = vgic_get_irq(kvm, intid);
|
||||
@@ -553,10 +572,27 @@ int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_vgic_set_irq_ops(struct kvm_vcpu *vcpu, u32 vintid,
|
||||
struct irq_ops *ops)
|
||||
{
|
||||
struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid);
|
||||
|
||||
BUG_ON(!irq);
|
||||
|
||||
scoped_guard(raw_spinlock_irqsave, &irq->irq_lock)
|
||||
irq->ops = ops;
|
||||
|
||||
vgic_put_irq(vcpu->kvm, irq);
|
||||
}
|
||||
|
||||
void kvm_vgic_clear_irq_ops(struct kvm_vcpu *vcpu, u32 vintid)
|
||||
{
|
||||
kvm_vgic_set_irq_ops(vcpu, vintid, NULL);
|
||||
}
|
||||
|
||||
/* @irq->irq_lock must be held */
|
||||
static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
|
||||
unsigned int host_irq,
|
||||
struct irq_ops *ops)
|
||||
unsigned int host_irq)
|
||||
{
|
||||
struct irq_desc *desc;
|
||||
struct irq_data *data;
|
||||
@@ -576,20 +612,25 @@ static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
|
||||
irq->hw = true;
|
||||
irq->host_irq = host_irq;
|
||||
irq->hwintid = data->hwirq;
|
||||
irq->ops = ops;
|
||||
|
||||
if (irq->ops && irq->ops->set_direct_injection)
|
||||
irq->ops->set_direct_injection(vcpu, irq, true);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* @irq->irq_lock must be held */
|
||||
static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq)
|
||||
{
|
||||
if (irq->ops && irq->ops->set_direct_injection)
|
||||
irq->ops->set_direct_injection(irq->target_vcpu, irq, false);
|
||||
|
||||
irq->hw = false;
|
||||
irq->hwintid = 0;
|
||||
irq->ops = NULL;
|
||||
}
|
||||
|
||||
int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
|
||||
u32 vintid, struct irq_ops *ops)
|
||||
u32 vintid)
|
||||
{
|
||||
struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid);
|
||||
unsigned long flags;
|
||||
@@ -598,7 +639,7 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
|
||||
BUG_ON(!irq);
|
||||
|
||||
raw_spin_lock_irqsave(&irq->irq_lock, flags);
|
||||
ret = kvm_vgic_map_irq(vcpu, irq, host_irq, ops);
|
||||
ret = kvm_vgic_map_irq(vcpu, irq, host_irq);
|
||||
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
|
||||
vgic_put_irq(vcpu->kvm, irq);
|
||||
|
||||
@@ -685,7 +726,7 @@ int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner)
|
||||
return -EAGAIN;
|
||||
|
||||
/* SGIs and LPIs cannot be wired up to any device */
|
||||
if (!irq_is_ppi(intid) && !vgic_valid_spi(vcpu->kvm, intid))
|
||||
if (!irq_is_ppi(vcpu->kvm, intid) && !vgic_valid_spi(vcpu->kvm, intid))
|
||||
return -EINVAL;
|
||||
|
||||
irq = vgic_get_vcpu_irq(vcpu, intid);
|
||||
@@ -812,8 +853,13 @@ retry:
|
||||
vgic_release_deleted_lpis(vcpu->kvm);
|
||||
}
|
||||
|
||||
static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu)
|
||||
static void vgic_fold_state(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (vgic_is_v5(vcpu->kvm)) {
|
||||
vgic_v5_fold_ppi_state(vcpu);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!*host_data_ptr(last_lr_irq))
|
||||
return;
|
||||
|
||||
@@ -1002,7 +1048,10 @@ static inline bool can_access_vgic_from_kernel(void)
|
||||
|
||||
static inline void vgic_save_state(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
|
||||
/* No switch statement here. See comment in vgic_restore_state() */
|
||||
if (vgic_is_v5(vcpu->kvm))
|
||||
vgic_v5_save_state(vcpu);
|
||||
else if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
|
||||
vgic_v2_save_state(vcpu);
|
||||
else
|
||||
__vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3);
|
||||
@@ -1011,20 +1060,24 @@ static inline void vgic_save_state(struct kvm_vcpu *vcpu)
|
||||
/* Sync back the hardware VGIC state into our emulation after a guest's run. */
|
||||
void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* If nesting, emulate the HW effect from L0 to L1 */
|
||||
if (vgic_state_is_nested(vcpu)) {
|
||||
vgic_v3_sync_nested(vcpu);
|
||||
return;
|
||||
}
|
||||
if (vgic_is_v3(vcpu->kvm)) {
|
||||
/* If nesting, emulate the HW effect from L0 to L1 */
|
||||
if (vgic_state_is_nested(vcpu)) {
|
||||
vgic_v3_sync_nested(vcpu);
|
||||
return;
|
||||
}
|
||||
|
||||
if (vcpu_has_nv(vcpu))
|
||||
vgic_v3_nested_update_mi(vcpu);
|
||||
if (vcpu_has_nv(vcpu))
|
||||
vgic_v3_nested_update_mi(vcpu);
|
||||
}
|
||||
|
||||
if (can_access_vgic_from_kernel())
|
||||
vgic_save_state(vcpu);
|
||||
|
||||
vgic_fold_lr_state(vcpu);
|
||||
vgic_prune_ap_list(vcpu);
|
||||
vgic_fold_state(vcpu);
|
||||
|
||||
if (!vgic_is_v5(vcpu->kvm))
|
||||
vgic_prune_ap_list(vcpu);
|
||||
}
|
||||
|
||||
/* Sync interrupts that were deactivated through a DIR trap */
|
||||
@@ -1040,12 +1093,34 @@ void kvm_vgic_process_async_update(struct kvm_vcpu *vcpu)
|
||||
|
||||
static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
|
||||
/*
|
||||
* As nice as it would be to restructure this code into a switch
|
||||
* statement as can be found elsewhere, the logic quickly gets ugly.
|
||||
*
|
||||
* __vgic_v3_restore_state() is doing a lot of heavy lifting here. It is
|
||||
* required for GICv3-on-GICv3, GICv2-on-GICv3, GICv3-on-GICv5, and the
|
||||
* no-in-kernel-irqchip case on GICv3 hardware. Hence, adding a switch
|
||||
* here results in much more complex code.
|
||||
*/
|
||||
if (vgic_is_v5(vcpu->kvm))
|
||||
vgic_v5_restore_state(vcpu);
|
||||
else if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
|
||||
vgic_v2_restore_state(vcpu);
|
||||
else
|
||||
__vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3);
|
||||
}
|
||||
|
||||
static void vgic_flush_state(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (vgic_is_v5(vcpu->kvm)) {
|
||||
vgic_v5_flush_ppi_state(vcpu);
|
||||
return;
|
||||
}
|
||||
|
||||
scoped_guard(raw_spinlock, &vcpu->arch.vgic_cpu.ap_list_lock)
|
||||
vgic_flush_lr_state(vcpu);
|
||||
}
|
||||
|
||||
/* Flush our emulation state into the GIC hardware before entering the guest. */
|
||||
void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
@@ -1082,42 +1157,69 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
|
||||
|
||||
DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
|
||||
|
||||
scoped_guard(raw_spinlock, &vcpu->arch.vgic_cpu.ap_list_lock)
|
||||
vgic_flush_lr_state(vcpu);
|
||||
vgic_flush_state(vcpu);
|
||||
|
||||
if (can_access_vgic_from_kernel())
|
||||
vgic_restore_state(vcpu);
|
||||
|
||||
if (vgic_supports_direct_irqs(vcpu->kvm))
|
||||
if (vgic_supports_direct_irqs(vcpu->kvm) && kvm_vgic_global_state.has_gicv4)
|
||||
vgic_v4_commit(vcpu);
|
||||
}
|
||||
|
||||
void kvm_vgic_load(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
const struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
|
||||
|
||||
if (unlikely(!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))) {
|
||||
if (has_vhe() && static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
|
||||
__vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
|
||||
vgic_v2_load(vcpu);
|
||||
else
|
||||
switch (dist->vgic_model) {
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V5:
|
||||
vgic_v5_load(vcpu);
|
||||
break;
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V3:
|
||||
vgic_v3_load(vcpu);
|
||||
break;
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V2:
|
||||
if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
|
||||
vgic_v3_load(vcpu);
|
||||
else
|
||||
vgic_v2_load(vcpu);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
void kvm_vgic_put(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
const struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
|
||||
|
||||
if (unlikely(!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))) {
|
||||
if (has_vhe() && static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
|
||||
__vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
|
||||
vgic_v2_put(vcpu);
|
||||
else
|
||||
switch (dist->vgic_model) {
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V5:
|
||||
vgic_v5_put(vcpu);
|
||||
break;
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V3:
|
||||
vgic_v3_put(vcpu);
|
||||
break;
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V2:
|
||||
if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
|
||||
vgic_v3_put(vcpu);
|
||||
else
|
||||
vgic_v2_put(vcpu);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
|
||||
@@ -1128,6 +1230,9 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
|
||||
unsigned long flags;
|
||||
struct vgic_vmcr vmcr;
|
||||
|
||||
if (vgic_is_v5(vcpu->kvm))
|
||||
return vgic_v5_has_pending_ppi(vcpu);
|
||||
|
||||
if (!vcpu->kvm->arch.vgic.enabled)
|
||||
return false;
|
||||
|
||||
|
||||
@@ -187,6 +187,7 @@ static inline u64 vgic_ich_hcr_trap_bits(void)
|
||||
* registers regardless of the hardware backed GIC used.
|
||||
*/
|
||||
struct vgic_vmcr {
|
||||
u32 en; /* GICv5-specific */
|
||||
u32 grpen0;
|
||||
u32 grpen1;
|
||||
|
||||
@@ -363,6 +364,19 @@ void vgic_debug_init(struct kvm *kvm);
|
||||
void vgic_debug_destroy(struct kvm *kvm);
|
||||
|
||||
int vgic_v5_probe(const struct gic_kvm_info *info);
|
||||
void vgic_v5_reset(struct kvm_vcpu *vcpu);
|
||||
int vgic_v5_init(struct kvm *kvm);
|
||||
int vgic_v5_map_resources(struct kvm *kvm);
|
||||
void vgic_v5_set_ppi_ops(struct kvm_vcpu *vcpu, u32 vintid);
|
||||
bool vgic_v5_has_pending_ppi(struct kvm_vcpu *vcpu);
|
||||
void vgic_v5_flush_ppi_state(struct kvm_vcpu *vcpu);
|
||||
void vgic_v5_fold_ppi_state(struct kvm_vcpu *vcpu);
|
||||
void vgic_v5_load(struct kvm_vcpu *vcpu);
|
||||
void vgic_v5_put(struct kvm_vcpu *vcpu);
|
||||
void vgic_v5_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
|
||||
void vgic_v5_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
|
||||
void vgic_v5_restore_state(struct kvm_vcpu *vcpu);
|
||||
void vgic_v5_save_state(struct kvm_vcpu *vcpu);
|
||||
|
||||
static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
@@ -425,15 +439,6 @@ void vgic_its_invalidate_all_caches(struct kvm *kvm);
|
||||
int vgic_its_inv_lpi(struct kvm *kvm, struct vgic_irq *irq);
|
||||
int vgic_its_invall(struct kvm_vcpu *vcpu);
|
||||
|
||||
bool system_supports_direct_sgis(void);
|
||||
bool vgic_supports_direct_msis(struct kvm *kvm);
|
||||
bool vgic_supports_direct_sgis(struct kvm *kvm);
|
||||
|
||||
static inline bool vgic_supports_direct_irqs(struct kvm *kvm)
|
||||
{
|
||||
return vgic_supports_direct_msis(kvm) || vgic_supports_direct_sgis(kvm);
|
||||
}
|
||||
|
||||
int vgic_v4_init(struct kvm *kvm);
|
||||
void vgic_v4_teardown(struct kvm *kvm);
|
||||
void vgic_v4_configure_vsgis(struct kvm *kvm);
|
||||
@@ -447,6 +452,11 @@ static inline bool kvm_has_gicv3(struct kvm *kvm)
|
||||
return kvm_has_feat(kvm, ID_AA64PFR0_EL1, GIC, IMP);
|
||||
}
|
||||
|
||||
static inline bool kvm_has_gicv5(struct kvm *kvm)
|
||||
{
|
||||
return kvm_has_feat(kvm, ID_AA64PFR2_EL1, GCIE, IMP);
|
||||
}
|
||||
|
||||
void vgic_v3_flush_nested(struct kvm_vcpu *vcpu);
|
||||
void vgic_v3_sync_nested(struct kvm_vcpu *vcpu);
|
||||
void vgic_v3_load_nested(struct kvm_vcpu *vcpu);
|
||||
@@ -454,15 +464,32 @@ void vgic_v3_put_nested(struct kvm_vcpu *vcpu);
|
||||
void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu);
|
||||
void vgic_v3_nested_update_mi(struct kvm_vcpu *vcpu);
|
||||
|
||||
static inline bool vgic_is_v3_compat(struct kvm *kvm)
|
||||
static inline bool vgic_host_has_gicv3(void)
|
||||
{
|
||||
return cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF) &&
|
||||
/*
|
||||
* Either the host is a native GICv3, or it is GICv5 with
|
||||
* FEAT_GCIE_LEGACY.
|
||||
*/
|
||||
return kvm_vgic_global_state.type == VGIC_V3 ||
|
||||
kvm_vgic_global_state.has_gcie_v3_compat;
|
||||
}
|
||||
|
||||
static inline bool vgic_is_v3(struct kvm *kvm)
|
||||
static inline bool vgic_host_has_gicv5(void)
|
||||
{
|
||||
return kvm_vgic_global_state.type == VGIC_V3 || vgic_is_v3_compat(kvm);
|
||||
return kvm_vgic_global_state.type == VGIC_V5;
|
||||
}
|
||||
|
||||
bool system_supports_direct_sgis(void);
|
||||
bool vgic_supports_direct_msis(struct kvm *kvm);
|
||||
bool vgic_supports_direct_sgis(struct kvm *kvm);
|
||||
|
||||
static inline bool vgic_supports_direct_irqs(struct kvm *kvm)
|
||||
{
|
||||
/* GICv5 always supports direct IRQs */
|
||||
if (vgic_is_v5(kvm))
|
||||
return true;
|
||||
|
||||
return vgic_supports_direct_msis(kvm) || vgic_supports_direct_sgis(kvm);
|
||||
}
|
||||
|
||||
int vgic_its_debug_init(struct kvm_device *dev);
|
||||
|
||||
@@ -43,6 +43,7 @@
|
||||
#include <asm/system_misc.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/traps.h>
|
||||
#include <asm/virt.h>
|
||||
|
||||
struct fault_info {
|
||||
int (*fn)(unsigned long far, unsigned long esr,
|
||||
@@ -269,6 +270,15 @@ static inline bool is_el1_permission_fault(unsigned long addr, unsigned long esr
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool is_pkvm_stage2_abort(unsigned int esr)
|
||||
{
|
||||
/*
|
||||
* S1PTW should only ever be set in ESR_EL1 if the pkvm hypervisor
|
||||
* injected a stage-2 abort -- see host_inject_mem_abort().
|
||||
*/
|
||||
return is_pkvm_initialized() && (esr & ESR_ELx_S1PTW);
|
||||
}
|
||||
|
||||
static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr,
|
||||
unsigned long esr,
|
||||
struct pt_regs *regs)
|
||||
@@ -289,8 +299,14 @@ static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr,
|
||||
* If we now have a valid translation, treat the translation fault as
|
||||
* spurious.
|
||||
*/
|
||||
if (!(par & SYS_PAR_EL1_F))
|
||||
if (!(par & SYS_PAR_EL1_F)) {
|
||||
if (is_pkvm_stage2_abort(esr)) {
|
||||
par &= SYS_PAR_EL1_PA;
|
||||
return pkvm_force_reclaim_guest_page(par);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we got a different type of fault from the AT instruction,
|
||||
@@ -376,9 +392,11 @@ static void __do_kernel_fault(unsigned long addr, unsigned long esr,
|
||||
if (!is_el1_instruction_abort(esr) && fixup_exception(regs, esr))
|
||||
return;
|
||||
|
||||
if (WARN_RATELIMIT(is_spurious_el1_translation_fault(addr, esr, regs),
|
||||
"Ignoring spurious kernel translation fault at virtual address %016lx\n", addr))
|
||||
if (is_spurious_el1_translation_fault(addr, esr, regs)) {
|
||||
WARN_RATELIMIT(!is_pkvm_stage2_abort(esr),
|
||||
"Ignoring spurious kernel translation fault at virtual address %016lx\n", addr);
|
||||
return;
|
||||
}
|
||||
|
||||
if (is_el1_mte_sync_tag_check_fault(esr)) {
|
||||
do_tag_recovery(addr, esr, regs);
|
||||
@@ -395,6 +413,8 @@ static void __do_kernel_fault(unsigned long addr, unsigned long esr,
|
||||
msg = "read from unreadable memory";
|
||||
} else if (addr < PAGE_SIZE) {
|
||||
msg = "NULL pointer dereference";
|
||||
} else if (is_pkvm_stage2_abort(esr)) {
|
||||
msg = "access to hypervisor-protected memory";
|
||||
} else {
|
||||
if (esr_fsc_is_translation_fault(esr) &&
|
||||
kfence_handle_page_fault(addr, esr & ESR_ELx_WNR, regs))
|
||||
@@ -621,6 +641,13 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
|
||||
addr, esr, regs);
|
||||
}
|
||||
|
||||
if (is_pkvm_stage2_abort(esr)) {
|
||||
if (!user_mode(regs))
|
||||
goto no_context;
|
||||
arm64_force_sig_fault(SIGSEGV, SEGV_ACCERR, far, "stage-2 fault");
|
||||
return 0;
|
||||
}
|
||||
|
||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
|
||||
|
||||
if (!(mm_flags & FAULT_FLAG_USER))
|
||||
|
||||
@@ -3243,6 +3243,14 @@ UnsignedEnum 3:0 ID_BITS
|
||||
EndEnum
|
||||
EndSysreg
|
||||
|
||||
Sysreg ICC_HPPIR_EL1 3 0 12 10 3
|
||||
Res0 63:33
|
||||
Field 32 HPPIV
|
||||
Field 31:29 TYPE
|
||||
Res0 28:24
|
||||
Field 23:0 ID
|
||||
EndSysreg
|
||||
|
||||
Sysreg ICC_ICSR_EL1 3 0 12 10 4
|
||||
Res0 63:48
|
||||
Field 47:32 IAFFID
|
||||
@@ -3257,6 +3265,11 @@ Field 1 Enabled
|
||||
Field 0 F
|
||||
EndSysreg
|
||||
|
||||
Sysreg ICC_IAFFIDR_EL1 3 0 12 10 5
|
||||
Res0 63:16
|
||||
Field 15:0 IAFFID
|
||||
EndSysreg
|
||||
|
||||
SysregFields ICC_PPI_ENABLERx_EL1
|
||||
Field 63 EN63
|
||||
Field 62 EN62
|
||||
@@ -3663,6 +3676,42 @@ Res0 14:12
|
||||
Field 11:0 AFFINITY
|
||||
EndSysreg
|
||||
|
||||
Sysreg ICC_APR_EL1 3 1 12 0 0
|
||||
Res0 63:32
|
||||
Field 31 P31
|
||||
Field 30 P30
|
||||
Field 29 P29
|
||||
Field 28 P28
|
||||
Field 27 P27
|
||||
Field 26 P26
|
||||
Field 25 P25
|
||||
Field 24 P24
|
||||
Field 23 P23
|
||||
Field 22 P22
|
||||
Field 21 P21
|
||||
Field 20 P20
|
||||
Field 19 P19
|
||||
Field 18 P18
|
||||
Field 17 P17
|
||||
Field 16 P16
|
||||
Field 15 P15
|
||||
Field 14 P14
|
||||
Field 13 P13
|
||||
Field 12 P12
|
||||
Field 11 P11
|
||||
Field 10 P10
|
||||
Field 9 P9
|
||||
Field 8 P8
|
||||
Field 7 P7
|
||||
Field 6 P6
|
||||
Field 5 P5
|
||||
Field 4 P4
|
||||
Field 3 P3
|
||||
Field 2 P2
|
||||
Field 1 P1
|
||||
Field 0 P0
|
||||
EndSysreg
|
||||
|
||||
Sysreg ICC_CR0_EL1 3 1 12 0 1
|
||||
Res0 63:39
|
||||
Field 38 PID
|
||||
@@ -4687,6 +4736,42 @@ Field 31:16 PhyPARTID29
|
||||
Field 15:0 PhyPARTID28
|
||||
EndSysreg
|
||||
|
||||
Sysreg ICH_APR_EL2 3 4 12 8 4
|
||||
Res0 63:32
|
||||
Field 31 P31
|
||||
Field 30 P30
|
||||
Field 29 P29
|
||||
Field 28 P28
|
||||
Field 27 P27
|
||||
Field 26 P26
|
||||
Field 25 P25
|
||||
Field 24 P24
|
||||
Field 23 P23
|
||||
Field 22 P22
|
||||
Field 21 P21
|
||||
Field 20 P20
|
||||
Field 19 P19
|
||||
Field 18 P18
|
||||
Field 17 P17
|
||||
Field 16 P16
|
||||
Field 15 P15
|
||||
Field 14 P14
|
||||
Field 13 P13
|
||||
Field 12 P12
|
||||
Field 11 P11
|
||||
Field 10 P10
|
||||
Field 9 P9
|
||||
Field 8 P8
|
||||
Field 7 P7
|
||||
Field 6 P6
|
||||
Field 5 P5
|
||||
Field 4 P4
|
||||
Field 3 P3
|
||||
Field 2 P2
|
||||
Field 1 P1
|
||||
Field 0 P0
|
||||
EndSysreg
|
||||
|
||||
Sysreg ICH_HFGRTR_EL2 3 4 12 9 4
|
||||
Res0 63:21
|
||||
Field 20 ICC_PPI_ACTIVERn_EL1
|
||||
@@ -4735,6 +4820,306 @@ Field 1 GICCDDIS
|
||||
Field 0 GICCDEN
|
||||
EndSysreg
|
||||
|
||||
SysregFields ICH_PPI_DVIRx_EL2
|
||||
Field 63 DVI63
|
||||
Field 62 DVI62
|
||||
Field 61 DVI61
|
||||
Field 60 DVI60
|
||||
Field 59 DVI59
|
||||
Field 58 DVI58
|
||||
Field 57 DVI57
|
||||
Field 56 DVI56
|
||||
Field 55 DVI55
|
||||
Field 54 DVI54
|
||||
Field 53 DVI53
|
||||
Field 52 DVI52
|
||||
Field 51 DVI51
|
||||
Field 50 DVI50
|
||||
Field 49 DVI49
|
||||
Field 48 DVI48
|
||||
Field 47 DVI47
|
||||
Field 46 DVI46
|
||||
Field 45 DVI45
|
||||
Field 44 DVI44
|
||||
Field 43 DVI43
|
||||
Field 42 DVI42
|
||||
Field 41 DVI41
|
||||
Field 40 DVI40
|
||||
Field 39 DVI39
|
||||
Field 38 DVI38
|
||||
Field 37 DVI37
|
||||
Field 36 DVI36
|
||||
Field 35 DVI35
|
||||
Field 34 DVI34
|
||||
Field 33 DVI33
|
||||
Field 32 DVI32
|
||||
Field 31 DVI31
|
||||
Field 30 DVI30
|
||||
Field 29 DVI29
|
||||
Field 28 DVI28
|
||||
Field 27 DVI27
|
||||
Field 26 DVI26
|
||||
Field 25 DVI25
|
||||
Field 24 DVI24
|
||||
Field 23 DVI23
|
||||
Field 22 DVI22
|
||||
Field 21 DVI21
|
||||
Field 20 DVI20
|
||||
Field 19 DVI19
|
||||
Field 18 DVI18
|
||||
Field 17 DVI17
|
||||
Field 16 DVI16
|
||||
Field 15 DVI15
|
||||
Field 14 DVI14
|
||||
Field 13 DVI13
|
||||
Field 12 DVI12
|
||||
Field 11 DVI11
|
||||
Field 10 DVI10
|
||||
Field 9 DVI9
|
||||
Field 8 DVI8
|
||||
Field 7 DVI7
|
||||
Field 6 DVI6
|
||||
Field 5 DVI5
|
||||
Field 4 DVI4
|
||||
Field 3 DVI3
|
||||
Field 2 DVI2
|
||||
Field 1 DVI1
|
||||
Field 0 DVI0
|
||||
EndSysregFields
|
||||
|
||||
Sysreg ICH_PPI_DVIR0_EL2 3 4 12 10 0
|
||||
Fields ICH_PPI_DVIRx_EL2
|
||||
EndSysreg
|
||||
|
||||
Sysreg ICH_PPI_DVIR1_EL2 3 4 12 10 1
|
||||
Fields ICH_PPI_DVIRx_EL2
|
||||
EndSysreg
|
||||
|
||||
SysregFields ICH_PPI_ENABLERx_EL2
|
||||
Field 63 EN63
|
||||
Field 62 EN62
|
||||
Field 61 EN61
|
||||
Field 60 EN60
|
||||
Field 59 EN59
|
||||
Field 58 EN58
|
||||
Field 57 EN57
|
||||
Field 56 EN56
|
||||
Field 55 EN55
|
||||
Field 54 EN54
|
||||
Field 53 EN53
|
||||
Field 52 EN52
|
||||
Field 51 EN51
|
||||
Field 50 EN50
|
||||
Field 49 EN49
|
||||
Field 48 EN48
|
||||
Field 47 EN47
|
||||
Field 46 EN46
|
||||
Field 45 EN45
|
||||
Field 44 EN44
|
||||
Field 43 EN43
|
||||
Field 42 EN42
|
||||
Field 41 EN41
|
||||
Field 40 EN40
|
||||
Field 39 EN39
|
||||
Field 38 EN38
|
||||
Field 37 EN37
|
||||
Field 36 EN36
|
||||
Field 35 EN35
|
||||
Field 34 EN34
|
||||
Field 33 EN33
|
||||
Field 32 EN32
|
||||
Field 31 EN31
|
||||
Field 30 EN30
|
||||
Field 29 EN29
|
||||
Field 28 EN28
|
||||
Field 27 EN27
|
||||
Field 26 EN26
|
||||
Field 25 EN25
|
||||
Field 24 EN24
|
||||
Field 23 EN23
|
||||
Field 22 EN22
|
||||
Field 21 EN21
|
||||
Field 20 EN20
|
||||
Field 19 EN19
|
||||
Field 18 EN18
|
||||
Field 17 EN17
|
||||
Field 16 EN16
|
||||
Field 15 EN15
|
||||
Field 14 EN14
|
||||
Field 13 EN13
|
||||
Field 12 EN12
|
||||
Field 11 EN11
|
||||
Field 10 EN10
|
||||
Field 9 EN9
|
||||
Field 8 EN8
|
||||
Field 7 EN7
|
||||
Field 6 EN6
|
||||
Field 5 EN5
|
||||
Field 4 EN4
|
||||
Field 3 EN3
|
||||
Field 2 EN2
|
||||
Field 1 EN1
|
||||
Field 0 EN0
|
||||
EndSysregFields
|
||||
|
||||
Sysreg ICH_PPI_ENABLER0_EL2 3 4 12 10 2
|
||||
Fields ICH_PPI_ENABLERx_EL2
|
||||
EndSysreg
|
||||
|
||||
Sysreg ICH_PPI_ENABLER1_EL2 3 4 12 10 3
|
||||
Fields ICH_PPI_ENABLERx_EL2
|
||||
EndSysreg
|
||||
|
||||
SysregFields ICH_PPI_PENDRx_EL2
|
||||
Field 63 PEND63
|
||||
Field 62 PEND62
|
||||
Field 61 PEND61
|
||||
Field 60 PEND60
|
||||
Field 59 PEND59
|
||||
Field 58 PEND58
|
||||
Field 57 PEND57
|
||||
Field 56 PEND56
|
||||
Field 55 PEND55
|
||||
Field 54 PEND54
|
||||
Field 53 PEND53
|
||||
Field 52 PEND52
|
||||
Field 51 PEND51
|
||||
Field 50 PEND50
|
||||
Field 49 PEND49
|
||||
Field 48 PEND48
|
||||
Field 47 PEND47
|
||||
Field 46 PEND46
|
||||
Field 45 PEND45
|
||||
Field 44 PEND44
|
||||
Field 43 PEND43
|
||||
Field 42 PEND42
|
||||
Field 41 PEND41
|
||||
Field 40 PEND40
|
||||
Field 39 PEND39
|
||||
Field 38 PEND38
|
||||
Field 37 PEND37
|
||||
Field 36 PEND36
|
||||
Field 35 PEND35
|
||||
Field 34 PEND34
|
||||
Field 33 PEND33
|
||||
Field 32 PEND32
|
||||
Field 31 PEND31
|
||||
Field 30 PEND30
|
||||
Field 29 PEND29
|
||||
Field 28 PEND28
|
||||
Field 27 PEND27
|
||||
Field 26 PEND26
|
||||
Field 25 PEND25
|
||||
Field 24 PEND24
|
||||
Field 23 PEND23
|
||||
Field 22 PEND22
|
||||
Field 21 PEND21
|
||||
Field 20 PEND20
|
||||
Field 19 PEND19
|
||||
Field 18 PEND18
|
||||
Field 17 PEND17
|
||||
Field 16 PEND16
|
||||
Field 15 PEND15
|
||||
Field 14 PEND14
|
||||
Field 13 PEND13
|
||||
Field 12 PEND12
|
||||
Field 11 PEND11
|
||||
Field 10 PEND10
|
||||
Field 9 PEND9
|
||||
Field 8 PEND8
|
||||
Field 7 PEND7
|
||||
Field 6 PEND6
|
||||
Field 5 PEND5
|
||||
Field 4 PEND4
|
||||
Field 3 PEND3
|
||||
Field 2 PEND2
|
||||
Field 1 PEND1
|
||||
Field 0 PEND0
|
||||
EndSysregFields
|
||||
|
||||
Sysreg ICH_PPI_PENDR0_EL2 3 4 12 10 4
|
||||
Fields ICH_PPI_PENDRx_EL2
|
||||
EndSysreg
|
||||
|
||||
Sysreg ICH_PPI_PENDR1_EL2 3 4 12 10 5
|
||||
Fields ICH_PPI_PENDRx_EL2
|
||||
EndSysreg
|
||||
|
||||
SysregFields ICH_PPI_ACTIVERx_EL2
|
||||
Field 63 ACTIVE63
|
||||
Field 62 ACTIVE62
|
||||
Field 61 ACTIVE61
|
||||
Field 60 ACTIVE60
|
||||
Field 59 ACTIVE59
|
||||
Field 58 ACTIVE58
|
||||
Field 57 ACTIVE57
|
||||
Field 56 ACTIVE56
|
||||
Field 55 ACTIVE55
|
||||
Field 54 ACTIVE54
|
||||
Field 53 ACTIVE53
|
||||
Field 52 ACTIVE52
|
||||
Field 51 ACTIVE51
|
||||
Field 50 ACTIVE50
|
||||
Field 49 ACTIVE49
|
||||
Field 48 ACTIVE48
|
||||
Field 47 ACTIVE47
|
||||
Field 46 ACTIVE46
|
||||
Field 45 ACTIVE45
|
||||
Field 44 ACTIVE44
|
||||
Field 43 ACTIVE43
|
||||
Field 42 ACTIVE42
|
||||
Field 41 ACTIVE41
|
||||
Field 40 ACTIVE40
|
||||
Field 39 ACTIVE39
|
||||
Field 38 ACTIVE38
|
||||
Field 37 ACTIVE37
|
||||
Field 36 ACTIVE36
|
||||
Field 35 ACTIVE35
|
||||
Field 34 ACTIVE34
|
||||
Field 33 ACTIVE33
|
||||
Field 32 ACTIVE32
|
||||
Field 31 ACTIVE31
|
||||
Field 30 ACTIVE30
|
||||
Field 29 ACTIVE29
|
||||
Field 28 ACTIVE28
|
||||
Field 27 ACTIVE27
|
||||
Field 26 ACTIVE26
|
||||
Field 25 ACTIVE25
|
||||
Field 24 ACTIVE24
|
||||
Field 23 ACTIVE23
|
||||
Field 22 ACTIVE22
|
||||
Field 21 ACTIVE21
|
||||
Field 20 ACTIVE20
|
||||
Field 19 ACTIVE19
|
||||
Field 18 ACTIVE18
|
||||
Field 17 ACTIVE17
|
||||
Field 16 ACTIVE16
|
||||
Field 15 ACTIVE15
|
||||
Field 14 ACTIVE14
|
||||
Field 13 ACTIVE13
|
||||
Field 12 ACTIVE12
|
||||
Field 11 ACTIVE11
|
||||
Field 10 ACTIVE10
|
||||
Field 9 ACTIVE9
|
||||
Field 8 ACTIVE8
|
||||
Field 7 ACTIVE7
|
||||
Field 6 ACTIVE6
|
||||
Field 5 ACTIVE5
|
||||
Field 4 ACTIVE4
|
||||
Field 3 ACTIVE3
|
||||
Field 2 ACTIVE2
|
||||
Field 1 ACTIVE1
|
||||
Field 0 ACTIVE0
|
||||
EndSysregFields
|
||||
|
||||
Sysreg ICH_PPI_ACTIVER0_EL2 3 4 12 10 6
|
||||
Fields ICH_PPI_ACTIVERx_EL2
|
||||
EndSysreg
|
||||
|
||||
Sysreg ICH_PPI_ACTIVER1_EL2 3 4 12 10 7
|
||||
Fields ICH_PPI_ACTIVERx_EL2
|
||||
EndSysreg
|
||||
|
||||
Sysreg ICH_HCR_EL2 3 4 12 11 0
|
||||
Res0 63:32
|
||||
Field 31:27 EOIcount
|
||||
@@ -4789,6 +5174,18 @@ Field 1 V3
|
||||
Field 0 En
|
||||
EndSysreg
|
||||
|
||||
Sysreg ICH_CONTEXTR_EL2 3 4 12 11 6
|
||||
Field 63 V
|
||||
Field 62 F
|
||||
Field 61 IRICHPPIDIS
|
||||
Field 60 DB
|
||||
Field 59:55 DBPM
|
||||
Res0 54:48
|
||||
Field 47:32 VPE
|
||||
Res0 31:16
|
||||
Field 15:0 VM
|
||||
EndSysreg
|
||||
|
||||
Sysreg ICH_VMCR_EL2 3 4 12 11 7
|
||||
Prefix FEAT_GCIE
|
||||
Res0 63:32
|
||||
@@ -4810,6 +5207,89 @@ Field 1 VENG1
|
||||
Field 0 VENG0
|
||||
EndSysreg
|
||||
|
||||
SysregFields ICH_PPI_PRIORITYRx_EL2
|
||||
Res0 63:61
|
||||
Field 60:56 Priority7
|
||||
Res0 55:53
|
||||
Field 52:48 Priority6
|
||||
Res0 47:45
|
||||
Field 44:40 Priority5
|
||||
Res0 39:37
|
||||
Field 36:32 Priority4
|
||||
Res0 31:29
|
||||
Field 28:24 Priority3
|
||||
Res0 23:21
|
||||
Field 20:16 Priority2
|
||||
Res0 15:13
|
||||
Field 12:8 Priority1
|
||||
Res0 7:5
|
||||
Field 4:0 Priority0
|
||||
EndSysregFields
|
||||
|
||||
Sysreg ICH_PPI_PRIORITYR0_EL2 3 4 12 14 0
|
||||
Fields ICH_PPI_PRIORITYRx_EL2
|
||||
EndSysreg
|
||||
|
||||
Sysreg ICH_PPI_PRIORITYR1_EL2 3 4 12 14 1
|
||||
Fields ICH_PPI_PRIORITYRx_EL2
|
||||
EndSysreg
|
||||
|
||||
Sysreg ICH_PPI_PRIORITYR2_EL2 3 4 12 14 2
|
||||
Fields ICH_PPI_PRIORITYRx_EL2
|
||||
EndSysreg
|
||||
|
||||
Sysreg ICH_PPI_PRIORITYR3_EL2 3 4 12 14 3
|
||||
Fields ICH_PPI_PRIORITYRx_EL2
|
||||
EndSysreg
|
||||
|
||||
Sysreg ICH_PPI_PRIORITYR4_EL2 3 4 12 14 4
|
||||
Fields ICH_PPI_PRIORITYRx_EL2
|
||||
EndSysreg
|
||||
|
||||
Sysreg ICH_PPI_PRIORITYR5_EL2 3 4 12 14 5
|
||||
Fields ICH_PPI_PRIORITYRx_EL2
|
||||
EndSysreg
|
||||
|
||||
Sysreg ICH_PPI_PRIORITYR6_EL2 3 4 12 14 6
|
||||
Fields ICH_PPI_PRIORITYRx_EL2
|
||||
EndSysreg
|
||||
|
||||
Sysreg ICH_PPI_PRIORITYR7_EL2 3 4 12 14 7
|
||||
Fields ICH_PPI_PRIORITYRx_EL2
|
||||
EndSysreg
|
||||
|
||||
Sysreg ICH_PPI_PRIORITYR8_EL2 3 4 12 15 0
|
||||
Fields ICH_PPI_PRIORITYRx_EL2
|
||||
EndSysreg
|
||||
|
||||
Sysreg ICH_PPI_PRIORITYR9_EL2 3 4 12 15 1
|
||||
Fields ICH_PPI_PRIORITYRx_EL2
|
||||
EndSysreg
|
||||
|
||||
Sysreg ICH_PPI_PRIORITYR10_EL2 3 4 12 15 2
|
||||
Fields ICH_PPI_PRIORITYRx_EL2
|
||||
EndSysreg
|
||||
|
||||
Sysreg ICH_PPI_PRIORITYR11_EL2 3 4 12 15 3
|
||||
Fields ICH_PPI_PRIORITYRx_EL2
|
||||
EndSysreg
|
||||
|
||||
Sysreg ICH_PPI_PRIORITYR12_EL2 3 4 12 15 4
|
||||
Fields ICH_PPI_PRIORITYRx_EL2
|
||||
EndSysreg
|
||||
|
||||
Sysreg ICH_PPI_PRIORITYR13_EL2 3 4 12 15 5
|
||||
Fields ICH_PPI_PRIORITYRx_EL2
|
||||
EndSysreg
|
||||
|
||||
Sysreg ICH_PPI_PRIORITYR14_EL2 3 4 12 15 6
|
||||
Fields ICH_PPI_PRIORITYRx_EL2
|
||||
EndSysreg
|
||||
|
||||
Sysreg ICH_PPI_PRIORITYR15_EL2 3 4 12 15 7
|
||||
Fields ICH_PPI_PRIORITYRx_EL2
|
||||
EndSysreg
|
||||
|
||||
Sysreg CONTEXTIDR_EL2 3 4 13 0 1
|
||||
Fields CONTEXTIDR_ELx
|
||||
EndSysreg
|
||||
|
||||
@@ -511,6 +511,23 @@ static bool gicv5_ppi_irq_is_level(irq_hw_number_t hwirq)
|
||||
return !!(read_ppi_sysreg_s(hwirq, PPI_HM) & bit);
|
||||
}
|
||||
|
||||
static int gicv5_ppi_irq_set_type(struct irq_data *d, unsigned int type)
|
||||
{
|
||||
/*
|
||||
* GICv5's PPIs do not have a configurable trigger or handling
|
||||
* mode. Check that the attempt to set a type matches what the
|
||||
* hardware reports in the HMR, and error on a mismatch.
|
||||
*/
|
||||
|
||||
if (type & IRQ_TYPE_EDGE_BOTH && gicv5_ppi_irq_is_level(d->hwirq))
|
||||
return -EINVAL;
|
||||
|
||||
if (type & IRQ_TYPE_LEVEL_MASK && !gicv5_ppi_irq_is_level(d->hwirq))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gicv5_ppi_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
|
||||
{
|
||||
if (vcpu)
|
||||
@@ -526,6 +543,7 @@ static const struct irq_chip gicv5_ppi_irq_chip = {
|
||||
.irq_mask = gicv5_ppi_irq_mask,
|
||||
.irq_unmask = gicv5_ppi_irq_unmask,
|
||||
.irq_eoi = gicv5_ppi_irq_eoi,
|
||||
.irq_set_type = gicv5_ppi_irq_set_type,
|
||||
.irq_get_irqchip_state = gicv5_ppi_irq_get_irqchip_state,
|
||||
.irq_set_irqchip_state = gicv5_ppi_irq_set_irqchip_state,
|
||||
.irq_set_vcpu_affinity = gicv5_ppi_irq_set_vcpu_affinity,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
config ARM_PKVM_GUEST
|
||||
bool "Arm pKVM protected guest driver"
|
||||
depends on ARM64
|
||||
depends on ARM64 && DMA_RESTRICTED_POOL
|
||||
help
|
||||
Protected guests running under the pKVM hypervisor on arm64
|
||||
are isolated from the host and must issue hypercalls to enable
|
||||
|
||||
@@ -664,6 +664,7 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode,
|
||||
fsnotify_create(d_inode(dentry->d_parent), dentry);
|
||||
return tracefs_end_creating(dentry);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tracefs_create_file);
|
||||
|
||||
static struct dentry *__create_dir(const char *name, struct dentry *parent,
|
||||
const struct inode_operations *ops)
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#include <linux/clocksource.h>
|
||||
#include <linux/hrtimer.h>
|
||||
|
||||
#include <linux/irqchip/arm-gic-v5.h>
|
||||
|
||||
enum kvm_arch_timers {
|
||||
TIMER_PTIMER,
|
||||
TIMER_VTIMER,
|
||||
@@ -47,7 +49,7 @@ struct arch_timer_vm_data {
|
||||
u64 poffset;
|
||||
|
||||
/* The PPI for each timer, global to the VM */
|
||||
u8 ppi[NR_KVM_TIMERS];
|
||||
u32 ppi[NR_KVM_TIMERS];
|
||||
};
|
||||
|
||||
struct arch_timer_context {
|
||||
@@ -130,6 +132,10 @@ void kvm_timer_init_vhe(void);
|
||||
#define timer_vm_data(ctx) (&(timer_context_to_vcpu(ctx)->kvm->arch.timer_data))
|
||||
#define timer_irq(ctx) (timer_vm_data(ctx)->ppi[arch_timer_ctx_index(ctx)])
|
||||
|
||||
#define get_vgic_ppi(k, i) (((k)->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V5) ? \
|
||||
(i) : (FIELD_PREP(GICV5_HWIRQ_ID, i) | \
|
||||
FIELD_PREP(GICV5_HWIRQ_TYPE, GICV5_HWIRQ_TYPE_PPI)))
|
||||
|
||||
u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu,
|
||||
enum kvm_arch_timers tmr,
|
||||
enum kvm_arch_timer_regs treg);
|
||||
|
||||
@@ -12,6 +12,9 @@
|
||||
|
||||
#define KVM_ARMV8_PMU_MAX_COUNTERS 32
|
||||
|
||||
/* PPI #23 - architecturally specified for GICv5 */
|
||||
#define KVM_ARMV8_PMU_GICV5_IRQ 0x20000017
|
||||
|
||||
#if IS_ENABLED(CONFIG_HW_PERF_EVENTS) && IS_ENABLED(CONFIG_KVM)
|
||||
struct kvm_pmc {
|
||||
u8 idx; /* index into the pmu->pmc array */
|
||||
@@ -38,7 +41,7 @@ struct arm_pmu_entry {
|
||||
};
|
||||
|
||||
bool kvm_supports_guest_pmuv3(void);
|
||||
#define kvm_arm_pmu_irq_initialized(v) ((v)->arch.pmu.irq_num >= VGIC_NR_SGIS)
|
||||
#define kvm_arm_pmu_irq_initialized(v) ((v)->arch.pmu.irq_num != 0)
|
||||
u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx);
|
||||
void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val);
|
||||
void kvm_pmu_set_counter_value_user(struct kvm_vcpu *vcpu, u64 select_idx, u64 val);
|
||||
|
||||
@@ -19,7 +19,9 @@
|
||||
#include <linux/jump_label.h>
|
||||
|
||||
#include <linux/irqchip/arm-gic-v4.h>
|
||||
#include <linux/irqchip/arm-gic-v5.h>
|
||||
|
||||
#define VGIC_V5_MAX_CPUS 512
|
||||
#define VGIC_V3_MAX_CPUS 512
|
||||
#define VGIC_V2_MAX_CPUS 8
|
||||
#define VGIC_NR_IRQS_LEGACY 256
|
||||
@@ -31,9 +33,96 @@
|
||||
#define VGIC_MIN_LPI 8192
|
||||
#define KVM_IRQCHIP_NUM_PINS (1020 - 32)
|
||||
|
||||
#define irq_is_ppi(irq) ((irq) >= VGIC_NR_SGIS && (irq) < VGIC_NR_PRIVATE_IRQS)
|
||||
#define irq_is_spi(irq) ((irq) >= VGIC_NR_PRIVATE_IRQS && \
|
||||
(irq) <= VGIC_MAX_SPI)
|
||||
/*
|
||||
* GICv5 supports 128 PPIs, but only the first 64 are architected. We only
|
||||
* support the timers and PMU in KVM, both of which are architected. Rather than
|
||||
* handling twice the state, we instead opt to only support the architected set
|
||||
* in KVM for now. At a future stage, this can be bumped up to 128, if required.
|
||||
*/
|
||||
#define VGIC_V5_NR_PRIVATE_IRQS 64
|
||||
|
||||
#define is_v5_type(t, i) (FIELD_GET(GICV5_HWIRQ_TYPE, (i)) == (t))
|
||||
|
||||
#define __irq_is_sgi(t, i) \
|
||||
({ \
|
||||
bool __ret; \
|
||||
\
|
||||
switch (t) { \
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V5: \
|
||||
__ret = false; \
|
||||
break; \
|
||||
default: \
|
||||
__ret = (i) < VGIC_NR_SGIS; \
|
||||
} \
|
||||
\
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define __irq_is_ppi(t, i) \
|
||||
({ \
|
||||
bool __ret; \
|
||||
\
|
||||
switch (t) { \
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V5: \
|
||||
__ret = is_v5_type(GICV5_HWIRQ_TYPE_PPI, (i)); \
|
||||
break; \
|
||||
default: \
|
||||
__ret = (i) >= VGIC_NR_SGIS; \
|
||||
__ret &= (i) < VGIC_NR_PRIVATE_IRQS; \
|
||||
} \
|
||||
\
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define __irq_is_spi(t, i) \
|
||||
({ \
|
||||
bool __ret; \
|
||||
\
|
||||
switch (t) { \
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V5: \
|
||||
__ret = is_v5_type(GICV5_HWIRQ_TYPE_SPI, (i)); \
|
||||
break; \
|
||||
default: \
|
||||
__ret = (i) <= VGIC_MAX_SPI; \
|
||||
__ret &= (i) >= VGIC_NR_PRIVATE_IRQS; \
|
||||
} \
|
||||
\
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define __irq_is_lpi(t, i) \
|
||||
({ \
|
||||
bool __ret; \
|
||||
\
|
||||
switch (t) { \
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V5: \
|
||||
__ret = is_v5_type(GICV5_HWIRQ_TYPE_LPI, (i)); \
|
||||
break; \
|
||||
default: \
|
||||
__ret = (i) >= 8192; \
|
||||
} \
|
||||
\
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define irq_is_sgi(k, i) __irq_is_sgi((k)->arch.vgic.vgic_model, i)
|
||||
#define irq_is_ppi(k, i) __irq_is_ppi((k)->arch.vgic.vgic_model, i)
|
||||
#define irq_is_spi(k, i) __irq_is_spi((k)->arch.vgic.vgic_model, i)
|
||||
#define irq_is_lpi(k, i) __irq_is_lpi((k)->arch.vgic.vgic_model, i)
|
||||
|
||||
#define irq_is_private(k, i) (irq_is_ppi(k, i) || irq_is_sgi(k, i))
|
||||
|
||||
#define vgic_v5_get_hwirq_id(x) FIELD_GET(GICV5_HWIRQ_ID, (x))
|
||||
#define vgic_v5_set_hwirq_id(x) FIELD_PREP(GICV5_HWIRQ_ID, (x))
|
||||
|
||||
#define __vgic_v5_set_type(t) (FIELD_PREP(GICV5_HWIRQ_TYPE, GICV5_HWIRQ_TYPE_##t))
|
||||
#define vgic_v5_make_ppi(x) (__vgic_v5_set_type(PPI) | vgic_v5_set_hwirq_id(x))
|
||||
#define vgic_v5_make_spi(x) (__vgic_v5_set_type(SPI) | vgic_v5_set_hwirq_id(x))
|
||||
#define vgic_v5_make_lpi(x) (__vgic_v5_set_type(LPI) | vgic_v5_set_hwirq_id(x))
|
||||
|
||||
#define __vgic_is_v(k, v) ((k)->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V##v)
|
||||
#define vgic_is_v3(k) (__vgic_is_v(k, 3))
|
||||
#define vgic_is_v5(k) (__vgic_is_v(k, 5))
|
||||
|
||||
enum vgic_type {
|
||||
VGIC_V2, /* Good ol' GICv2 */
|
||||
@@ -101,6 +190,8 @@ enum vgic_irq_config {
|
||||
VGIC_CONFIG_LEVEL
|
||||
};
|
||||
|
||||
struct vgic_irq;
|
||||
|
||||
/*
|
||||
* Per-irq ops overriding some common behavious.
|
||||
*
|
||||
@@ -119,6 +210,19 @@ struct irq_ops {
|
||||
* peaking into the physical GIC.
|
||||
*/
|
||||
bool (*get_input_level)(int vintid);
|
||||
|
||||
/*
|
||||
* Function pointer to override the queuing of an IRQ.
|
||||
*/
|
||||
bool (*queue_irq_unlock)(struct kvm *kvm, struct vgic_irq *irq,
|
||||
unsigned long flags) __releases(&irq->irq_lock);
|
||||
|
||||
/*
|
||||
* Callback function pointer to either enable or disable direct
|
||||
* injection for a mapped interrupt.
|
||||
*/
|
||||
void (*set_direct_injection)(struct kvm_vcpu *vcpu,
|
||||
struct vgic_irq *irq, bool direct);
|
||||
};
|
||||
|
||||
struct vgic_irq {
|
||||
@@ -238,6 +342,26 @@ struct vgic_redist_region {
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
struct vgic_v5_vm {
|
||||
/*
|
||||
* We only expose a subset of PPIs to the guest. This subset is a
|
||||
* combination of the PPIs that are actually implemented and what we
|
||||
* actually choose to expose.
|
||||
*/
|
||||
DECLARE_BITMAP(vgic_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS);
|
||||
|
||||
/* A mask of the PPIs that are exposed for userspace to drive. */
|
||||
DECLARE_BITMAP(userspace_ppis, VGIC_V5_NR_PRIVATE_IRQS);
|
||||
|
||||
/*
|
||||
* The HMR itself is handled by the hardware, but we still need to have
|
||||
* a mask that we can use when merging in pending state (only the state
|
||||
* of Edge PPIs is merged back in from the guest an the HMR provides a
|
||||
* convenient way to do that).
|
||||
*/
|
||||
DECLARE_BITMAP(vgic_ppi_hmr, VGIC_V5_NR_PRIVATE_IRQS);
|
||||
};
|
||||
|
||||
struct vgic_dist {
|
||||
bool in_kernel;
|
||||
bool ready;
|
||||
@@ -310,6 +434,11 @@ struct vgic_dist {
|
||||
* else.
|
||||
*/
|
||||
struct its_vm its_vm;
|
||||
|
||||
/*
|
||||
* GICv5 per-VM data.
|
||||
*/
|
||||
struct vgic_v5_vm gicv5_vm;
|
||||
};
|
||||
|
||||
struct vgic_v2_cpu_if {
|
||||
@@ -340,11 +469,40 @@ struct vgic_v3_cpu_if {
|
||||
unsigned int used_lrs;
|
||||
};
|
||||
|
||||
struct vgic_v5_cpu_if {
|
||||
u64 vgic_apr;
|
||||
u64 vgic_vmcr;
|
||||
|
||||
/* PPI register state */
|
||||
DECLARE_BITMAP(vgic_ppi_dvir, VGIC_V5_NR_PRIVATE_IRQS);
|
||||
DECLARE_BITMAP(vgic_ppi_activer, VGIC_V5_NR_PRIVATE_IRQS);
|
||||
DECLARE_BITMAP(vgic_ppi_enabler, VGIC_V5_NR_PRIVATE_IRQS);
|
||||
/* We have one byte (of which 5 bits are used) per PPI for priority */
|
||||
u64 vgic_ppi_priorityr[VGIC_V5_NR_PRIVATE_IRQS / 8];
|
||||
|
||||
/*
|
||||
* The ICSR is re-used across host and guest, and hence it needs to be
|
||||
* saved/restored. Only one copy is required as the host should block
|
||||
* preemption between executing GIC CDRCFG and acccessing the
|
||||
* ICC_ICSR_EL1. A guest, of course, can never guarantee this, and hence
|
||||
* it is the hyp's responsibility to keep the state constistent.
|
||||
*/
|
||||
u64 vgic_icsr;
|
||||
|
||||
struct gicv5_vpe gicv5_vpe;
|
||||
};
|
||||
|
||||
/* What PPI capabilities does a GICv5 host have */
|
||||
struct vgic_v5_ppi_caps {
|
||||
DECLARE_BITMAP(impl_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS);
|
||||
};
|
||||
|
||||
struct vgic_cpu {
|
||||
/* CPU vif control registers for world switch */
|
||||
union {
|
||||
struct vgic_v2_cpu_if vgic_v2;
|
||||
struct vgic_v3_cpu_if vgic_v3;
|
||||
struct vgic_v5_cpu_if vgic_v5;
|
||||
};
|
||||
|
||||
struct vgic_irq *private_irqs;
|
||||
@@ -392,13 +550,17 @@ int kvm_vgic_create(struct kvm *kvm, u32 type);
|
||||
void kvm_vgic_destroy(struct kvm *kvm);
|
||||
void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
|
||||
int kvm_vgic_map_resources(struct kvm *kvm);
|
||||
void kvm_vgic_finalize_idregs(struct kvm *kvm);
|
||||
int kvm_vgic_hyp_init(void);
|
||||
void kvm_vgic_init_cpu_hardware(void);
|
||||
|
||||
int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
|
||||
unsigned int intid, bool level, void *owner);
|
||||
void kvm_vgic_set_irq_ops(struct kvm_vcpu *vcpu, u32 vintid,
|
||||
struct irq_ops *ops);
|
||||
void kvm_vgic_clear_irq_ops(struct kvm_vcpu *vcpu, u32 vintid);
|
||||
int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
|
||||
u32 vintid, struct irq_ops *ops);
|
||||
u32 vintid);
|
||||
int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid);
|
||||
int kvm_vgic_get_map(struct kvm_vcpu *vcpu, unsigned int vintid);
|
||||
bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid);
|
||||
@@ -414,8 +576,20 @@ u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu);
|
||||
|
||||
#define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel))
|
||||
#define vgic_initialized(k) ((k)->arch.vgic.initialized)
|
||||
#define vgic_valid_spi(k, i) (((i) >= VGIC_NR_PRIVATE_IRQS) && \
|
||||
((i) < (k)->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS))
|
||||
#define vgic_valid_spi(k, i) \
|
||||
({ \
|
||||
bool __ret = irq_is_spi(k, i); \
|
||||
\
|
||||
switch ((k)->arch.vgic.vgic_model) { \
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V5: \
|
||||
__ret &= FIELD_GET(GICV5_HWIRQ_ID, i) < (k)->arch.vgic.nr_spis; \
|
||||
break; \
|
||||
default: \
|
||||
__ret &= (i) < ((k)->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS); \
|
||||
} \
|
||||
\
|
||||
__ret; \
|
||||
})
|
||||
|
||||
bool kvm_vcpu_has_pending_irqs(struct kvm_vcpu *vcpu);
|
||||
void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
|
||||
@@ -455,6 +629,11 @@ int vgic_v4_load(struct kvm_vcpu *vcpu);
|
||||
void vgic_v4_commit(struct kvm_vcpu *vcpu);
|
||||
int vgic_v4_put(struct kvm_vcpu *vcpu);
|
||||
|
||||
int vgic_v5_finalize_ppi_state(struct kvm *kvm);
|
||||
bool vgic_v5_ppi_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
|
||||
unsigned long flags);
|
||||
void vgic_v5_set_ppi_dvi(struct kvm_vcpu *vcpu, struct vgic_irq *irq, bool dvi);
|
||||
|
||||
bool vgic_state_is_nested(struct kvm_vcpu *vcpu);
|
||||
|
||||
/* CPU HP callbacks */
|
||||
|
||||
@@ -24,6 +24,28 @@
|
||||
#define GICV5_HWIRQ_TYPE_LPI UL(0x2)
|
||||
#define GICV5_HWIRQ_TYPE_SPI UL(0x3)
|
||||
|
||||
/*
|
||||
* Architected PPIs
|
||||
*/
|
||||
#define GICV5_ARCH_PPI_S_DB_PPI 0x0
|
||||
#define GICV5_ARCH_PPI_RL_DB_PPI 0x1
|
||||
#define GICV5_ARCH_PPI_NS_DB_PPI 0x2
|
||||
#define GICV5_ARCH_PPI_SW_PPI 0x3
|
||||
#define GICV5_ARCH_PPI_HACDBSIRQ 0xf
|
||||
#define GICV5_ARCH_PPI_CNTHVS 0x13
|
||||
#define GICV5_ARCH_PPI_CNTHPS 0x14
|
||||
#define GICV5_ARCH_PPI_PMBIRQ 0x15
|
||||
#define GICV5_ARCH_PPI_COMMIRQ 0x16
|
||||
#define GICV5_ARCH_PPI_PMUIRQ 0x17
|
||||
#define GICV5_ARCH_PPI_CTIIRQ 0x18
|
||||
#define GICV5_ARCH_PPI_GICMNT 0x19
|
||||
#define GICV5_ARCH_PPI_CNTHP 0x1a
|
||||
#define GICV5_ARCH_PPI_CNTV 0x1b
|
||||
#define GICV5_ARCH_PPI_CNTHV 0x1c
|
||||
#define GICV5_ARCH_PPI_CNTPS 0x1d
|
||||
#define GICV5_ARCH_PPI_CNTP 0x1e
|
||||
#define GICV5_ARCH_PPI_TRBIRQ 0x1f
|
||||
|
||||
/*
|
||||
* Tables attributes
|
||||
*/
|
||||
@@ -365,6 +387,11 @@ int gicv5_spi_irq_set_type(struct irq_data *d, unsigned int type);
|
||||
int gicv5_irs_iste_alloc(u32 lpi);
|
||||
void gicv5_irs_syncr(void);
|
||||
|
||||
/* Embedded in kvm.arch */
|
||||
struct gicv5_vpe {
|
||||
bool resident;
|
||||
};
|
||||
|
||||
struct gicv5_its_devtab_cfg {
|
||||
union {
|
||||
struct {
|
||||
|
||||
@@ -2366,6 +2366,7 @@ void kvm_unregister_device_ops(u32 type);
|
||||
extern struct kvm_device_ops kvm_mpic_ops;
|
||||
extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
|
||||
extern struct kvm_device_ops kvm_arm_vgic_v3_ops;
|
||||
extern struct kvm_device_ops kvm_arm_vgic_v5_ops;
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
|
||||
|
||||
|
||||
@@ -251,4 +251,62 @@ int ring_buffer_map(struct trace_buffer *buffer, int cpu,
|
||||
void ring_buffer_map_dup(struct trace_buffer *buffer, int cpu);
|
||||
int ring_buffer_unmap(struct trace_buffer *buffer, int cpu);
|
||||
int ring_buffer_map_get_reader(struct trace_buffer *buffer, int cpu);
|
||||
|
||||
struct ring_buffer_desc {
|
||||
int cpu;
|
||||
unsigned int nr_page_va; /* excludes the meta page */
|
||||
unsigned long meta_va;
|
||||
unsigned long page_va[] __counted_by(nr_page_va);
|
||||
};
|
||||
|
||||
struct trace_buffer_desc {
|
||||
int nr_cpus;
|
||||
size_t struct_len;
|
||||
char __data[]; /* list of ring_buffer_desc */
|
||||
};
|
||||
|
||||
static inline struct ring_buffer_desc *__next_ring_buffer_desc(struct ring_buffer_desc *desc)
|
||||
{
|
||||
size_t len = struct_size(desc, page_va, desc->nr_page_va);
|
||||
|
||||
return (struct ring_buffer_desc *)((void *)desc + len);
|
||||
}
|
||||
|
||||
static inline struct ring_buffer_desc *__first_ring_buffer_desc(struct trace_buffer_desc *desc)
|
||||
{
|
||||
return (struct ring_buffer_desc *)(&desc->__data[0]);
|
||||
}
|
||||
|
||||
static inline size_t trace_buffer_desc_size(size_t buffer_size, unsigned int nr_cpus)
|
||||
{
|
||||
unsigned int nr_pages = max(DIV_ROUND_UP(buffer_size, PAGE_SIZE), 2UL) + 1;
|
||||
struct ring_buffer_desc *rbdesc;
|
||||
|
||||
return size_add(offsetof(struct trace_buffer_desc, __data),
|
||||
size_mul(nr_cpus, struct_size(rbdesc, page_va, nr_pages)));
|
||||
}
|
||||
|
||||
#define for_each_ring_buffer_desc(__pdesc, __cpu, __trace_pdesc) \
|
||||
for (__pdesc = __first_ring_buffer_desc(__trace_pdesc), __cpu = 0; \
|
||||
(__cpu) < (__trace_pdesc)->nr_cpus; \
|
||||
(__cpu)++, __pdesc = __next_ring_buffer_desc(__pdesc))
|
||||
|
||||
struct ring_buffer_remote {
|
||||
struct trace_buffer_desc *desc;
|
||||
int (*swap_reader_page)(unsigned int cpu, void *priv);
|
||||
int (*reset)(unsigned int cpu, void *priv);
|
||||
void *priv;
|
||||
};
|
||||
|
||||
int ring_buffer_poll_remote(struct trace_buffer *buffer, int cpu);
|
||||
|
||||
struct trace_buffer *
|
||||
__ring_buffer_alloc_remote(struct ring_buffer_remote *remote,
|
||||
struct lock_class_key *key);
|
||||
|
||||
#define ring_buffer_alloc_remote(remote) \
|
||||
({ \
|
||||
static struct lock_class_key __key; \
|
||||
__ring_buffer_alloc_remote(remote, &__key); \
|
||||
})
|
||||
#endif /* _LINUX_RING_BUFFER_H */
|
||||
|
||||
41
include/linux/ring_buffer_types.h
Normal file
41
include/linux/ring_buffer_types.h
Normal file
@@ -0,0 +1,41 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_RING_BUFFER_TYPES_H
|
||||
#define _LINUX_RING_BUFFER_TYPES_H
|
||||
|
||||
#include <asm/local.h>
|
||||
|
||||
#define TS_SHIFT 27
|
||||
#define TS_MASK ((1ULL << TS_SHIFT) - 1)
|
||||
#define TS_DELTA_TEST (~TS_MASK)
|
||||
|
||||
/*
|
||||
* We need to fit the time_stamp delta into 27 bits.
|
||||
*/
|
||||
static inline bool test_time_stamp(u64 delta)
|
||||
{
|
||||
return !!(delta & TS_DELTA_TEST);
|
||||
}
|
||||
|
||||
#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data)
|
||||
|
||||
#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
|
||||
#define RB_ALIGNMENT 4U
|
||||
#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
|
||||
#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */
|
||||
|
||||
#ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS
|
||||
# define RB_FORCE_8BYTE_ALIGNMENT 0
|
||||
# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
|
||||
#else
|
||||
# define RB_FORCE_8BYTE_ALIGNMENT 1
|
||||
# define RB_ARCH_ALIGNMENT 8U
|
||||
#endif
|
||||
|
||||
#define RB_ALIGN_DATA __aligned(RB_ARCH_ALIGNMENT)
|
||||
|
||||
struct buffer_data_page {
|
||||
u64 time_stamp; /* page time stamp */
|
||||
local_t commit; /* write committed index */
|
||||
unsigned char data[] RB_ALIGN_DATA; /* data of buffer page */
|
||||
};
|
||||
#endif
|
||||
65
include/linux/simple_ring_buffer.h
Normal file
65
include/linux/simple_ring_buffer.h
Normal file
@@ -0,0 +1,65 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_SIMPLE_RING_BUFFER_H
|
||||
#define _LINUX_SIMPLE_RING_BUFFER_H
|
||||
|
||||
#include <linux/list.h>
|
||||
#include <linux/ring_buffer.h>
|
||||
#include <linux/ring_buffer_types.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
/*
|
||||
* Ideally those struct would stay private but the caller needs to know
|
||||
* the allocation size for simple_ring_buffer_init().
|
||||
*/
|
||||
struct simple_buffer_page {
|
||||
struct list_head link;
|
||||
struct buffer_data_page *page;
|
||||
u64 entries;
|
||||
u32 write;
|
||||
u32 id;
|
||||
};
|
||||
|
||||
struct simple_rb_per_cpu {
|
||||
struct simple_buffer_page *tail_page;
|
||||
struct simple_buffer_page *reader_page;
|
||||
struct simple_buffer_page *head_page;
|
||||
struct simple_buffer_page *bpages;
|
||||
struct trace_buffer_meta *meta;
|
||||
u32 nr_pages;
|
||||
|
||||
#define SIMPLE_RB_UNAVAILABLE 0
|
||||
#define SIMPLE_RB_READY 1
|
||||
#define SIMPLE_RB_WRITING 2
|
||||
u32 status;
|
||||
|
||||
u64 last_overrun;
|
||||
u64 write_stamp;
|
||||
|
||||
struct simple_rb_cbs *cbs;
|
||||
};
|
||||
|
||||
int simple_ring_buffer_init(struct simple_rb_per_cpu *cpu_buffer, struct simple_buffer_page *bpages,
|
||||
const struct ring_buffer_desc *desc);
|
||||
|
||||
void simple_ring_buffer_unload(struct simple_rb_per_cpu *cpu_buffer);
|
||||
|
||||
void *simple_ring_buffer_reserve(struct simple_rb_per_cpu *cpu_buffer, unsigned long length,
|
||||
u64 timestamp);
|
||||
|
||||
void simple_ring_buffer_commit(struct simple_rb_per_cpu *cpu_buffer);
|
||||
|
||||
int simple_ring_buffer_enable_tracing(struct simple_rb_per_cpu *cpu_buffer, bool enable);
|
||||
|
||||
int simple_ring_buffer_reset(struct simple_rb_per_cpu *cpu_buffer);
|
||||
|
||||
int simple_ring_buffer_swap_reader_page(struct simple_rb_per_cpu *cpu_buffer);
|
||||
|
||||
int simple_ring_buffer_init_mm(struct simple_rb_per_cpu *cpu_buffer,
|
||||
struct simple_buffer_page *bpages,
|
||||
const struct ring_buffer_desc *desc,
|
||||
void *(*load_page)(unsigned long va),
|
||||
void (*unload_page)(void *va));
|
||||
|
||||
void simple_ring_buffer_unload_mm(struct simple_rb_per_cpu *cpu_buffer,
|
||||
void (*unload_page)(void *));
|
||||
#endif
|
||||
48
include/linux/trace_remote.h
Normal file
48
include/linux/trace_remote.h
Normal file
@@ -0,0 +1,48 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef _LINUX_TRACE_REMOTE_H
|
||||
#define _LINUX_TRACE_REMOTE_H
|
||||
|
||||
#include <linux/dcache.h>
|
||||
#include <linux/ring_buffer.h>
|
||||
#include <linux/trace_remote_event.h>
|
||||
|
||||
/**
|
||||
* struct trace_remote_callbacks - Callbacks used by Tracefs to control the remote
|
||||
* @init: Called once the remote has been registered. Allows the
|
||||
* caller to extend the Tracefs remote directory
|
||||
* @load_trace_buffer: Called before Tracefs accesses the trace buffer for the first
|
||||
* time. Must return a &trace_buffer_desc
|
||||
* (most likely filled with trace_remote_alloc_buffer())
|
||||
* @unload_trace_buffer:
|
||||
* Called once Tracefs has no use for the trace buffer
|
||||
* (most likely call trace_remote_free_buffer())
|
||||
* @enable_tracing: Called on Tracefs tracing_on. It is expected from the
|
||||
* remote to allow writing.
|
||||
* @swap_reader_page: Called when Tracefs consumes a new page from a
|
||||
* ring-buffer. It is expected from the remote to isolate a
|
||||
* @reset: Called on `echo 0 > trace`. It is expected from the
|
||||
* remote to reset all ring-buffer pages.
|
||||
* new reader-page from the @cpu ring-buffer.
|
||||
* @enable_event: Called on events/event_name/enable. It is expected from
|
||||
* the remote to allow the writing event @id.
|
||||
*/
|
||||
struct trace_remote_callbacks {
|
||||
int (*init)(struct dentry *d, void *priv);
|
||||
struct trace_buffer_desc *(*load_trace_buffer)(unsigned long size, void *priv);
|
||||
void (*unload_trace_buffer)(struct trace_buffer_desc *desc, void *priv);
|
||||
int (*enable_tracing)(bool enable, void *priv);
|
||||
int (*swap_reader_page)(unsigned int cpu, void *priv);
|
||||
int (*reset)(unsigned int cpu, void *priv);
|
||||
int (*enable_event)(unsigned short id, bool enable, void *priv);
|
||||
};
|
||||
|
||||
int trace_remote_register(const char *name, struct trace_remote_callbacks *cbs, void *priv,
|
||||
struct remote_event *events, size_t nr_events);
|
||||
|
||||
int trace_remote_alloc_buffer(struct trace_buffer_desc *desc, size_t desc_size, size_t buffer_size,
|
||||
const struct cpumask *cpumask);
|
||||
|
||||
void trace_remote_free_buffer(struct trace_buffer_desc *desc);
|
||||
|
||||
#endif
|
||||
33
include/linux/trace_remote_event.h
Normal file
33
include/linux/trace_remote_event.h
Normal file
@@ -0,0 +1,33 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef _LINUX_TRACE_REMOTE_EVENTS_H
|
||||
#define _LINUX_TRACE_REMOTE_EVENTS_H
|
||||
|
||||
struct trace_remote;
|
||||
struct trace_event_fields;
|
||||
struct trace_seq;
|
||||
|
||||
struct remote_event_hdr {
|
||||
unsigned short id;
|
||||
};
|
||||
|
||||
#define REMOTE_EVENT_NAME_MAX 30
|
||||
struct remote_event {
|
||||
char name[REMOTE_EVENT_NAME_MAX];
|
||||
unsigned short id;
|
||||
bool enabled;
|
||||
struct trace_remote *remote;
|
||||
struct trace_event_fields *fields;
|
||||
char *print_fmt;
|
||||
void (*print)(void *evt, struct trace_seq *seq);
|
||||
};
|
||||
|
||||
#define RE_STRUCT(__args...) __args
|
||||
#define re_field(__type, __field) __type __field;
|
||||
|
||||
#define REMOTE_EVENT_FORMAT(__name, __struct) \
|
||||
struct remote_event_format_##__name { \
|
||||
struct remote_event_hdr hdr; \
|
||||
__struct \
|
||||
}
|
||||
#endif
|
||||
73
include/trace/define_remote_events.h
Normal file
73
include/trace/define_remote_events.h
Normal file
@@ -0,0 +1,73 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#include <linux/trace_events.h>
|
||||
#include <linux/trace_remote_event.h>
|
||||
#include <linux/trace_seq.h>
|
||||
#include <linux/stringify.h>
|
||||
|
||||
#define REMOTE_EVENT_INCLUDE(__file) __stringify(../../__file)
|
||||
|
||||
#ifdef REMOTE_EVENT_SECTION
|
||||
# define __REMOTE_EVENT_SECTION(__name) __used __section(REMOTE_EVENT_SECTION"."#__name)
|
||||
#else
|
||||
# define __REMOTE_EVENT_SECTION(__name)
|
||||
#endif
|
||||
|
||||
#define REMOTE_PRINTK_COUNT_ARGS(__args...) \
|
||||
__COUNT_ARGS(, ##__args, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 0)
|
||||
|
||||
#define __remote_printk0() \
|
||||
trace_seq_putc(seq, '\n')
|
||||
|
||||
#define __remote_printk1(__fmt) \
|
||||
trace_seq_puts(seq, " " __fmt "\n") \
|
||||
|
||||
#define __remote_printk2(__fmt, __args...) \
|
||||
do { \
|
||||
trace_seq_putc(seq, ' '); \
|
||||
trace_seq_printf(seq, __fmt, __args); \
|
||||
trace_seq_putc(seq, '\n'); \
|
||||
} while (0)
|
||||
|
||||
/* Apply the appropriate trace_seq sequence according to the number of arguments */
|
||||
#define remote_printk(__args...) \
|
||||
CONCATENATE(__remote_printk, REMOTE_PRINTK_COUNT_ARGS(__args))(__args)
|
||||
|
||||
#define RE_PRINTK(__args...) __args
|
||||
|
||||
#define REMOTE_EVENT(__name, __id, __struct, __printk) \
|
||||
REMOTE_EVENT_FORMAT(__name, __struct); \
|
||||
static void remote_event_print_##__name(void *evt, struct trace_seq *seq) \
|
||||
{ \
|
||||
struct remote_event_format_##__name __maybe_unused *__entry = evt; \
|
||||
trace_seq_puts(seq, #__name); \
|
||||
remote_printk(__printk); \
|
||||
}
|
||||
#include REMOTE_EVENT_INCLUDE(REMOTE_EVENT_INCLUDE_FILE)
|
||||
|
||||
#undef REMOTE_EVENT
|
||||
#undef RE_PRINTK
|
||||
#undef re_field
|
||||
#define re_field(__type, __field) \
|
||||
{ \
|
||||
.type = #__type, .name = #__field, \
|
||||
.size = sizeof(__type), .align = __alignof__(__type), \
|
||||
.is_signed = is_signed_type(__type), \
|
||||
},
|
||||
#define __entry REC
|
||||
#define RE_PRINTK(__fmt, __args...) "\"" __fmt "\", " __stringify(__args)
|
||||
#define REMOTE_EVENT(__name, __id, __struct, __printk) \
|
||||
static struct trace_event_fields remote_event_fields_##__name[] = { \
|
||||
__struct \
|
||||
{} \
|
||||
}; \
|
||||
static char remote_event_print_fmt_##__name[] = __printk; \
|
||||
static struct remote_event __REMOTE_EVENT_SECTION(__name) \
|
||||
remote_event_##__name = { \
|
||||
.name = #__name, \
|
||||
.id = __id, \
|
||||
.fields = remote_event_fields_##__name, \
|
||||
.print_fmt = remote_event_print_fmt_##__name, \
|
||||
.print = remote_event_print_##__name, \
|
||||
}
|
||||
#include REMOTE_EVENT_INCLUDE(REMOTE_EVENT_INCLUDE_FILE)
|
||||
@@ -704,6 +704,11 @@ struct kvm_enable_cap {
|
||||
#define KVM_VM_TYPE_ARM_IPA_SIZE_MASK 0xffULL
|
||||
#define KVM_VM_TYPE_ARM_IPA_SIZE(x) \
|
||||
((x) & KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
|
||||
|
||||
#define KVM_VM_TYPE_ARM_PROTECTED (1UL << 31)
|
||||
#define KVM_VM_TYPE_ARM_MASK (KVM_VM_TYPE_ARM_IPA_SIZE_MASK | \
|
||||
KVM_VM_TYPE_ARM_PROTECTED)
|
||||
|
||||
/*
|
||||
* ioctls for /dev/kvm fds:
|
||||
*/
|
||||
@@ -1227,6 +1232,8 @@ enum kvm_device_type {
|
||||
#define KVM_DEV_TYPE_LOONGARCH_PCHPIC KVM_DEV_TYPE_LOONGARCH_PCHPIC
|
||||
KVM_DEV_TYPE_LOONGARCH_DMSINTC,
|
||||
#define KVM_DEV_TYPE_LOONGARCH_DMSINTC KVM_DEV_TYPE_LOONGARCH_DMSINTC
|
||||
KVM_DEV_TYPE_ARM_VGIC_V5,
|
||||
#define KVM_DEV_TYPE_ARM_VGIC_V5 KVM_DEV_TYPE_ARM_VGIC_V5
|
||||
|
||||
KVM_DEV_TYPE_MAX,
|
||||
|
||||
|
||||
@@ -17,8 +17,8 @@
|
||||
* @entries: Number of entries in the ring-buffer.
|
||||
* @overrun: Number of entries lost in the ring-buffer.
|
||||
* @read: Number of entries that have been read.
|
||||
* @Reserved1: Internal use only.
|
||||
* @Reserved2: Internal use only.
|
||||
* @pages_lost: Number of pages overwritten by the writer.
|
||||
* @pages_touched: Number of pages written by the writer.
|
||||
*/
|
||||
struct trace_buffer_meta {
|
||||
__u32 meta_page_size;
|
||||
@@ -39,8 +39,8 @@ struct trace_buffer_meta {
|
||||
__u64 overrun;
|
||||
__u64 read;
|
||||
|
||||
__u64 Reserved1;
|
||||
__u64 Reserved2;
|
||||
__u64 pages_lost;
|
||||
__u64 pages_touched;
|
||||
};
|
||||
|
||||
#define TRACE_MMAP_IOCTL_GET_READER _IO('R', 0x20)
|
||||
|
||||
@@ -1281,4 +1281,18 @@ config HIST_TRIGGERS_DEBUG
|
||||
|
||||
source "kernel/trace/rv/Kconfig"
|
||||
|
||||
config TRACE_REMOTE
|
||||
bool
|
||||
|
||||
config SIMPLE_RING_BUFFER
|
||||
bool
|
||||
|
||||
config TRACE_REMOTE_TEST
|
||||
tristate "Test module for remote tracing"
|
||||
select TRACE_REMOTE
|
||||
select SIMPLE_RING_BUFFER
|
||||
help
|
||||
This trace remote includes a ring-buffer writer implementation using
|
||||
"simple_ring_buffer". This is solely intending for testing.
|
||||
|
||||
endif # FTRACE
|
||||
|
||||
@@ -128,4 +128,63 @@ obj-$(CONFIG_FPROBE_EVENTS) += trace_fprobe.o
|
||||
obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
|
||||
obj-$(CONFIG_RV) += rv/
|
||||
|
||||
obj-$(CONFIG_TRACE_REMOTE) += trace_remote.o
|
||||
obj-$(CONFIG_SIMPLE_RING_BUFFER) += simple_ring_buffer.o
|
||||
obj-$(CONFIG_TRACE_REMOTE_TEST) += remote_test.o
|
||||
|
||||
#
|
||||
# simple_ring_buffer is used by the pKVM hypervisor which does not have access
|
||||
# to all kernel symbols. Fail the build if forbidden symbols are found.
|
||||
#
|
||||
# undefsyms_base generates a set of compiler and tooling-generated symbols that can
|
||||
# safely be ignored for simple_ring_buffer.
|
||||
#
|
||||
filechk_undefsyms_base = \
|
||||
echo '$(pound)include <linux/atomic.h>'; \
|
||||
echo '$(pound)include <linux/string.h>'; \
|
||||
echo '$(pound)include <asm/page.h>'; \
|
||||
echo 'static char page[PAGE_SIZE] __aligned(PAGE_SIZE);'; \
|
||||
echo 'void undefsyms_base(void *p, int n);'; \
|
||||
echo 'void undefsyms_base(void *p, int n) {'; \
|
||||
echo ' char buffer[256] = { 0 };'; \
|
||||
echo ' u32 u = 0;'; \
|
||||
echo ' memset((char * volatile)page, 8, PAGE_SIZE);'; \
|
||||
echo ' memset((char * volatile)buffer, 8, sizeof(buffer));'; \
|
||||
echo ' memcpy((void * volatile)p, buffer, sizeof(buffer));'; \
|
||||
echo ' cmpxchg((u32 * volatile)&u, 0, 8);'; \
|
||||
echo ' WARN_ON(n == 0xdeadbeef);'; \
|
||||
echo '}'
|
||||
|
||||
$(obj)/undefsyms_base.c: FORCE
|
||||
$(call filechk,undefsyms_base)
|
||||
|
||||
clean-files += undefsyms_base.c
|
||||
|
||||
$(obj)/undefsyms_base.o: $(obj)/undefsyms_base.c
|
||||
|
||||
targets += undefsyms_base.o
|
||||
|
||||
# Ensure KASAN is enabled to avoid logic that may disable FORTIFY_SOURCE when
|
||||
# KASAN is not enabled. undefsyms_base.o does not automatically get KASAN flags
|
||||
# because it is not linked into vmlinux.
|
||||
KASAN_SANITIZE_undefsyms_base.o := y
|
||||
|
||||
UNDEFINED_ALLOWLIST = __asan __gcov __kasan __kcsan __hwasan __sancov __sanitizer __tsan __ubsan __x86_indirect_thunk \
|
||||
__msan simple_ring_buffer \
|
||||
$(shell $(NM) -u $(obj)/undefsyms_base.o 2>/dev/null | awk '{print $$2}')
|
||||
|
||||
quiet_cmd_check_undefined = NM $<
|
||||
cmd_check_undefined = \
|
||||
undefsyms=$$($(NM) -u $< | grep -v $(addprefix -e , $(UNDEFINED_ALLOWLIST)) || true); \
|
||||
if [ -n "$$undefsyms" ]; then \
|
||||
echo "Unexpected symbols in $<:" >&2; \
|
||||
echo "$$undefsyms" >&2; \
|
||||
false; \
|
||||
fi
|
||||
|
||||
$(obj)/%.o.checked: $(obj)/%.o $(obj)/undefsyms_base.o FORCE
|
||||
$(call if_changed,check_undefined)
|
||||
|
||||
always-$(CONFIG_SIMPLE_RING_BUFFER) += simple_ring_buffer.o.checked
|
||||
|
||||
libftrace-y := ftrace.o
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user