From dc31124379b69a758af740bbd981e9e9f04a61d5 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 24 Nov 2025 11:01:43 -0800 Subject: [PATCH 01/22] arm64: Detect FEAT_XNX Detect the feature in anticipation of using it in KVM. Reviewed-by: Marc Zyngier Tested-by: Marc Zyngier Link: https://msgid.link/20251124190158.177318-2-oupton@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kernel/cpufeature.c | 7 +++++++ arch/arm64/tools/cpucaps | 1 + 2 files changed, 8 insertions(+) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 5ed401ff79e3..aa3ecae252d3 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -3088,6 +3088,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_GICV5_LEGACY, .matches = test_has_gicv5_legacy, }, + { + .desc = "XNX", + .capability = ARM64_HAS_XNX, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, XNX, IMP) + }, {}, }; diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 1b32c1232d28..ee74199107d3 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -64,6 +64,7 @@ HAS_TLB_RANGE HAS_VA52 HAS_VIRT_HOST_EXTN HAS_WFXT +HAS_XNX HAFT HW_DBM KVM_HVHE From 2608563b466b9192a9356b18463005da6e138bf9 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 24 Nov 2025 11:01:44 -0800 Subject: [PATCH 02/22] KVM: arm64: Add support for FEAT_XNX stage-2 permissions FEAT_XNX adds support for encoding separate execute permissions for EL0 and EL1 at stage-2. Add support for this to the page table library, hiding the unintuitive encoding scheme behind generic pX and uX permission flags. Reviewed-by: Marc Zyngier Tested-by: Marc Zyngier Link: https://msgid.link/20251124190158.177318-3-oupton@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_pgtable.h | 15 ++++--- arch/arm64/kvm/hyp/pgtable.c | 58 ++++++++++++++++++++++++---- 2 files changed, 59 insertions(+), 14 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 2888b5d03757..c72149a607d6 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -89,7 +89,7 @@ typedef u64 kvm_pte_t; #define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) -#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54) +#define KVM_PTE_LEAF_ATTR_HI_S2_XN GENMASK(54, 53) #define KVM_PTE_LEAF_ATTR_HI_S1_GP BIT(50) @@ -251,12 +251,15 @@ enum kvm_pgtable_stage2_flags { * @KVM_PGTABLE_PROT_SW3: Software bit 3. */ enum kvm_pgtable_prot { - KVM_PGTABLE_PROT_X = BIT(0), - KVM_PGTABLE_PROT_W = BIT(1), - KVM_PGTABLE_PROT_R = BIT(2), + KVM_PGTABLE_PROT_PX = BIT(0), + KVM_PGTABLE_PROT_UX = BIT(1), + KVM_PGTABLE_PROT_X = KVM_PGTABLE_PROT_PX | + KVM_PGTABLE_PROT_UX, + KVM_PGTABLE_PROT_W = BIT(2), + KVM_PGTABLE_PROT_R = BIT(3), - KVM_PGTABLE_PROT_DEVICE = BIT(3), - KVM_PGTABLE_PROT_NORMAL_NC = BIT(4), + KVM_PGTABLE_PROT_DEVICE = BIT(4), + KVM_PGTABLE_PROT_NORMAL_NC = BIT(5), KVM_PGTABLE_PROT_SW0 = BIT(55), KVM_PGTABLE_PROT_SW1 = BIT(56), diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index c351b4abd5db..e1d75f965027 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -661,11 +661,37 @@ void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, #define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt)) +static int stage2_set_xn_attr(enum kvm_pgtable_prot prot, kvm_pte_t *attr) +{ + bool px, ux; + u8 xn; + + px = prot & KVM_PGTABLE_PROT_PX; + ux = prot & KVM_PGTABLE_PROT_UX; + + if (!cpus_have_final_cap(ARM64_HAS_XNX) && px != ux) + return -EINVAL; + + if (px && ux) + xn = 0b00; + else if (!px && ux) + xn = 0b01; + else if (!px && !ux) + xn = 0b10; + else + xn = 0b11; + + *attr &= ~KVM_PTE_LEAF_ATTR_HI_S2_XN; + *attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, xn); + return 0; +} + static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot, kvm_pte_t *ptep) { kvm_pte_t attr; u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS; + int r; switch (prot & (KVM_PGTABLE_PROT_DEVICE | KVM_PGTABLE_PROT_NORMAL_NC)) { @@ -685,8 +711,9 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p attr = KVM_S2_MEMATTR(pgt, NORMAL); } - if (!(prot & KVM_PGTABLE_PROT_X)) - attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; + r = stage2_set_xn_attr(prot, &attr); + if (r) + return r; if (prot & KVM_PGTABLE_PROT_R) attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R; @@ -715,8 +742,19 @@ enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte) prot |= KVM_PGTABLE_PROT_R; if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W) prot |= KVM_PGTABLE_PROT_W; - if (!(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN)) - prot |= KVM_PGTABLE_PROT_X; + + switch (FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, pte)) { + case 0b00: + prot |= KVM_PGTABLE_PROT_PX | KVM_PGTABLE_PROT_UX; + break; + case 0b01: + prot |= KVM_PGTABLE_PROT_UX; + break; + case 0b11: + prot |= KVM_PGTABLE_PROT_PX; + break; + default: + } return prot; } @@ -1290,9 +1328,9 @@ bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot, enum kvm_pgtable_walk_flags flags) { - int ret; + kvm_pte_t xn = 0, set = 0, clr = 0; s8 level; - kvm_pte_t set = 0, clr = 0; + int ret; if (prot & KVM_PTE_LEAF_ATTR_HI_SW) return -EINVAL; @@ -1303,8 +1341,12 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, if (prot & KVM_PGTABLE_PROT_W) set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W; - if (prot & KVM_PGTABLE_PROT_X) - clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; + ret = stage2_set_xn_attr(prot, &xn); + if (ret) + return ret; + + set |= xn & KVM_PTE_LEAF_ATTR_HI_S2_XN; + clr |= ~xn & KVM_PTE_LEAF_ATTR_HI_S2_XN; ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level, flags); if (!ret || ret == -EAGAIN) From d93febe2ed2e0491af9d47f0ee6d4b01918877f4 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 24 Nov 2025 11:01:45 -0800 Subject: [PATCH 03/22] KVM: arm64: nv: Forward FEAT_XNX permissions to the shadow stage-2 Add support for FEAT_XNX to shadow stage-2 MMUs, being careful to only evaluate XN[0] when the feature is actually exposed to the VM. Restructure the layering of permissions in the fault handler to assume pX and uX then restricting based on the guest's stage-2 afterwards. Reviewed-by: Marc Zyngier Tested-by: Marc Zyngier Link: https://msgid.link/20251124190158.177318-4-oupton@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_nested.h | 37 +++++++++++++++++++++++++++-- arch/arm64/kvm/mmu.c | 23 ++++++++++++++---- arch/arm64/kvm/nested.c | 5 +++- 3 files changed, 57 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index f7c06a840963..5d967b60414c 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -120,9 +120,42 @@ static inline bool kvm_s2_trans_writable(struct kvm_s2_trans *trans) return trans->writable; } -static inline bool kvm_s2_trans_executable(struct kvm_s2_trans *trans) +static inline bool kvm_has_xnx(struct kvm *kvm) { - return !(trans->desc & BIT(54)); + return cpus_have_final_cap(ARM64_HAS_XNX) && + kvm_has_feat(kvm, ID_AA64MMFR1_EL1, XNX, IMP); +} + +static inline bool kvm_s2_trans_exec_el0(struct kvm *kvm, struct kvm_s2_trans *trans) +{ + u8 xn = FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, trans->desc); + + if (!kvm_has_xnx(kvm)) + xn &= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, 0b10); + + switch (xn) { + case 0b00: + case 0b01: + return true; + default: + return false; + } +} + +static inline bool kvm_s2_trans_exec_el1(struct kvm *kvm, struct kvm_s2_trans *trans) +{ + u8 xn = FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, trans->desc); + + if (!kvm_has_xnx(kvm)) + xn &= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, 0b10); + + switch (xn) { + case 0b00: + case 0b11: + return true; + default: + return false; + } } extern int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa, diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 7cc964af8d30..96f1786c72fe 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1521,6 +1521,16 @@ static void adjust_nested_fault_perms(struct kvm_s2_trans *nested, *prot |= kvm_encode_nested_level(nested); } +static void adjust_nested_exec_perms(struct kvm *kvm, + struct kvm_s2_trans *nested, + enum kvm_pgtable_prot *prot) +{ + if (!kvm_s2_trans_exec_el0(kvm, nested)) + *prot &= ~KVM_PGTABLE_PROT_UX; + if (!kvm_s2_trans_exec_el1(kvm, nested)) + *prot &= ~KVM_PGTABLE_PROT_PX; +} + #define KVM_PGTABLE_WALK_MEMABORT_FLAGS (KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED) static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, @@ -1572,11 +1582,12 @@ static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (writable) prot |= KVM_PGTABLE_PROT_W; - if (exec_fault || - (cpus_have_final_cap(ARM64_HAS_CACHE_DIC) && - (!nested || kvm_s2_trans_executable(nested)))) + if (exec_fault || cpus_have_final_cap(ARM64_HAS_CACHE_DIC)) prot |= KVM_PGTABLE_PROT_X; + if (nested) + adjust_nested_exec_perms(kvm, nested, &prot); + kvm_fault_lock(kvm); if (mmu_invalidate_retry(kvm, mmu_seq)) { ret = -EAGAIN; @@ -1851,11 +1862,13 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, prot |= KVM_PGTABLE_PROT_NORMAL_NC; else prot |= KVM_PGTABLE_PROT_DEVICE; - } else if (cpus_have_final_cap(ARM64_HAS_CACHE_DIC) && - (!nested || kvm_s2_trans_executable(nested))) { + } else if (cpus_have_final_cap(ARM64_HAS_CACHE_DIC)) { prot |= KVM_PGTABLE_PROT_X; } + if (nested) + adjust_nested_exec_perms(kvm, nested, &prot); + /* * Under the premise of getting a FSC_PERM fault, we just need to relax * permissions only if vma_pagesize equals fault_granule. Otherwise, diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index f04cda40545b..92b2a69f0b89 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -788,7 +788,10 @@ int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu, struct kvm_s2_trans *trans) return 0; if (kvm_vcpu_trap_is_iabt(vcpu)) { - forward_fault = !kvm_s2_trans_executable(trans); + if (vcpu_mode_priv(vcpu)) + forward_fault = !kvm_s2_trans_exec_el1(vcpu->kvm, trans); + else + forward_fault = !kvm_s2_trans_exec_el0(vcpu->kvm, trans); } else { bool write_fault = kvm_is_write_fault(vcpu); From 6b49f70022ed607bab66da60c7b332f39cda4ff1 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 24 Nov 2025 11:01:46 -0800 Subject: [PATCH 04/22] KVM: arm64: Teach ptdump about FEAT_XNX permissions Although KVM doesn't make direct use of the feature, guest hypervisors can use FEAT_XNX which influences the permissions of the shadow stage-2. Update ptdump to separately print the privileged and unprivileged execute permissions. Reviewed-by: Marc Zyngier Tested-by: Marc Zyngier Link: https://msgid.link/20251124190158.177318-5-oupton@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/ptdump.c | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c index dc5acfb00af9..6cbe018fd6fd 100644 --- a/arch/arm64/kvm/ptdump.c +++ b/arch/arm64/kvm/ptdump.c @@ -31,27 +31,46 @@ static const struct ptdump_prot_bits stage2_pte_bits[] = { .val = PTE_VALID, .set = " ", .clear = "F", - }, { + }, + { .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R, .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R, .set = "R", .clear = " ", - }, { + }, + { .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W, .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W, .set = "W", .clear = " ", - }, { + }, + { .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN, - .val = KVM_PTE_LEAF_ATTR_HI_S2_XN, - .set = "NX", - .clear = "x ", - }, { + .val = 0b00UL << __bf_shf(KVM_PTE_LEAF_ATTR_HI_S2_XN), + .set = "px ux ", + }, + { + .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN, + .val = 0b01UL << __bf_shf(KVM_PTE_LEAF_ATTR_HI_S2_XN), + .set = "PXNux ", + }, + { + .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN, + .val = 0b10UL << __bf_shf(KVM_PTE_LEAF_ATTR_HI_S2_XN), + .set = "PXNUXN", + }, + { + .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN, + .val = 0b11UL << __bf_shf(KVM_PTE_LEAF_ATTR_HI_S2_XN), + .set = "px UXN", + }, + { .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF, .val = KVM_PTE_LEAF_ATTR_LO_S2_AF, .set = "AF", .clear = " ", - }, { + }, + { .mask = PMD_TYPE_MASK, .val = PMD_TYPE_SECT, .set = "BLK", From 692650bd7b12532798c2022cb53869a07a288fbe Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 24 Nov 2025 11:01:47 -0800 Subject: [PATCH 05/22] KVM: arm64: nv: Advertise support for FEAT_XNX Everything is in place to support FEAT_XNX, advertise support. Reviewed-by: Marc Zyngier Tested-by: Marc Zyngier Link: https://msgid.link/20251124190158.177318-6-oupton@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/nested.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 92b2a69f0b89..08839a320a45 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1559,7 +1559,6 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) val &= ~(ID_AA64MMFR1_EL1_CMOW | ID_AA64MMFR1_EL1_nTLBPA | ID_AA64MMFR1_EL1_ETS | - ID_AA64MMFR1_EL1_XNX | ID_AA64MMFR1_EL1_HAFDBS); /* FEAT_E2H0 implies no VHE */ if (test_bit(KVM_ARM_VCPU_HAS_EL2_E2H0, kvm->arch.vcpu_features)) From cdba9da34b145eb2f3c502279a454f9a1a8346c1 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 24 Nov 2025 11:01:48 -0800 Subject: [PATCH 06/22] KVM: arm64: Call helper for reading descriptors directly Going through a function pointer doesn't serve much purpose when there's only one implementation. Reviewed-by: Marc Zyngier Tested-by: Marc Zyngier Link: https://msgid.link/20251124190158.177318-7-oupton@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/nested.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 08839a320a45..e4928a6a3672 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -124,7 +124,6 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu) } struct s2_walk_info { - int (*read_desc)(phys_addr_t pa, u64 *desc, void *data); void *data; u64 baddr; unsigned int max_oa_bits; @@ -199,6 +198,13 @@ static int check_output_size(struct s2_walk_info *wi, phys_addr_t output) return 0; } +static int read_guest_s2_desc(phys_addr_t pa, u64 *desc, void *data) +{ + struct kvm_vcpu *vcpu = data; + + return kvm_read_guest(vcpu->kvm, pa, desc, sizeof(*desc)); +} + /* * This is essentially a C-version of the pseudo code from the ARM ARM * AArch64.TranslationTableWalk function. I strongly recommend looking at @@ -257,7 +263,7 @@ static int walk_nested_s2_pgd(phys_addr_t ipa, >> (addr_bottom - 3); paddr = base_addr | index; - ret = wi->read_desc(paddr, &desc, wi->data); + ret = read_guest_s2_desc(paddr, &desc, wi->data); if (ret < 0) return ret; @@ -325,13 +331,6 @@ static int walk_nested_s2_pgd(phys_addr_t ipa, return 0; } -static int read_guest_s2_desc(phys_addr_t pa, u64 *desc, void *data) -{ - struct kvm_vcpu *vcpu = data; - - return kvm_read_guest(vcpu->kvm, pa, desc, sizeof(*desc)); -} - static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi) { wi->t0sz = vtcr & TCR_EL2_T0SZ_MASK; @@ -364,7 +363,6 @@ int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa, if (!vcpu_has_nv(vcpu)) return 0; - wi.read_desc = read_guest_s2_desc; wi.data = vcpu; wi.baddr = vcpu_read_sys_reg(vcpu, VTTBR_EL2); From 977d1bf15c5179276d93468abbc00a224908ff72 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 24 Nov 2025 11:01:49 -0800 Subject: [PATCH 07/22] KVM: arm64: nv: Stop passing vCPU through void ptr in S2 PTW The stage-2 table walker passes down the vCPU as a void pointer. That might've made sense if the walker was generic although at this point it is clear this will only ever be used in the context of a vCPU. Suggested-by: Marc Zyngier Reviewed-by: Marc Zyngier Tested-by: Marc Zyngier Link: https://msgid.link/20251124190158.177318-8-oupton@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/nested.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index e4928a6a3672..94eff8307aad 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -124,7 +124,6 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu) } struct s2_walk_info { - void *data; u64 baddr; unsigned int max_oa_bits; unsigned int pgshift; @@ -198,10 +197,8 @@ static int check_output_size(struct s2_walk_info *wi, phys_addr_t output) return 0; } -static int read_guest_s2_desc(phys_addr_t pa, u64 *desc, void *data) +static int read_guest_s2_desc(struct kvm_vcpu *vcpu, phys_addr_t pa, u64 *desc) { - struct kvm_vcpu *vcpu = data; - return kvm_read_guest(vcpu->kvm, pa, desc, sizeof(*desc)); } @@ -212,7 +209,7 @@ static int read_guest_s2_desc(phys_addr_t pa, u64 *desc, void *data) * * Must be called with the kvm->srcu read lock held */ -static int walk_nested_s2_pgd(phys_addr_t ipa, +static int walk_nested_s2_pgd(struct kvm_vcpu *vcpu, phys_addr_t ipa, struct s2_walk_info *wi, struct kvm_s2_trans *out) { int first_block_level, level, stride, input_size, base_lower_bound; @@ -263,7 +260,7 @@ static int walk_nested_s2_pgd(phys_addr_t ipa, >> (addr_bottom - 3); paddr = base_addr | index; - ret = read_guest_s2_desc(paddr, &desc, wi->data); + ret = read_guest_s2_desc(vcpu, paddr, &desc); if (ret < 0) return ret; @@ -363,14 +360,13 @@ int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa, if (!vcpu_has_nv(vcpu)) return 0; - wi.data = vcpu; wi.baddr = vcpu_read_sys_reg(vcpu, VTTBR_EL2); vtcr_to_walk_info(vtcr, &wi); wi.be = vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_EE; - ret = walk_nested_s2_pgd(gipa, &wi, result); + ret = walk_nested_s2_pgd(vcpu, gipa, &wi, result); if (ret) result->esr |= (kvm_vcpu_get_esr(vcpu) & ~ESR_ELx_FSC); From fabf321cba4be0d0dcbb39e97c3deb572fec2f8d Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 24 Nov 2025 11:01:50 -0800 Subject: [PATCH 08/22] KVM: arm64: Handle endianness in read helper for emulated PTW Implementing FEAT_HAFDBS means adding another descriptor accessor that needs to deal with the guest-configured endianness. Prepare by moving the endianness handling into the read accessor and out of the main body of the S1/S2 PTWs. Reviewed-by: Marc Zyngier Tested-by: Marc Zyngier Link: https://msgid.link/20251124190158.177318-9-oupton@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/at.c | 25 +++++++++++++++++++------ arch/arm64/kvm/nested.c | 32 ++++++++++++++++++++------------ 2 files changed, 39 insertions(+), 18 deletions(-) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index be26d5aa668c..a295a37dd3b1 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -362,6 +362,24 @@ transfault: return -EFAULT; } +static int kvm_read_s1_desc(struct kvm_vcpu *vcpu, u64 pa, u64 *desc, + struct s1_walk_info *wi) +{ + u64 val; + int r; + + r = kvm_read_guest(vcpu->kvm, pa, &val, sizeof(val)); + if (r) + return r; + + if (wi->be) + *desc = be64_to_cpu((__force __be64)val); + else + *desc = le64_to_cpu((__force __le64)val); + + return 0; +} + static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, struct s1_walk_result *wr, u64 va) { @@ -414,17 +432,12 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, return ret; } - ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc)); + ret = kvm_read_s1_desc(vcpu, ipa, &desc, wi); if (ret) { fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false); return ret; } - if (wi->be) - desc = be64_to_cpu((__force __be64)desc); - else - desc = le64_to_cpu((__force __le64)desc); - /* Invalid descriptor */ if (!(desc & BIT(0))) goto transfault; diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 94eff8307aad..75d26c0ba3e0 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -197,9 +197,26 @@ static int check_output_size(struct s2_walk_info *wi, phys_addr_t output) return 0; } -static int read_guest_s2_desc(struct kvm_vcpu *vcpu, phys_addr_t pa, u64 *desc) +static int read_guest_s2_desc(struct kvm_vcpu *vcpu, phys_addr_t pa, u64 *desc, + struct s2_walk_info *wi) { - return kvm_read_guest(vcpu->kvm, pa, desc, sizeof(*desc)); + u64 val; + int r; + + r = kvm_read_guest(vcpu->kvm, pa, &val, sizeof(val)); + if (r) + return r; + + /* + * Handle reversedescriptors if endianness differs between the + * host and the guest hypervisor. + */ + if (wi->be) + *desc = be64_to_cpu((__force __be64)val); + else + *desc = le64_to_cpu((__force __le64)val); + + return 0; } /* @@ -260,19 +277,10 @@ static int walk_nested_s2_pgd(struct kvm_vcpu *vcpu, phys_addr_t ipa, >> (addr_bottom - 3); paddr = base_addr | index; - ret = read_guest_s2_desc(vcpu, paddr, &desc); + ret = read_guest_s2_desc(vcpu, paddr, &desc, wi); if (ret < 0) return ret; - /* - * Handle reversedescriptors if endianness differs between the - * host and the guest hypervisor. - */ - if (wi->be) - desc = be64_to_cpu((__force __be64)desc); - else - desc = le64_to_cpu((__force __le64)desc); - /* Check for valid descriptor at this point */ if (!(desc & 1) || ((desc & 3) == 1 && level == 3)) { out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT); From 590e694820bfd70e3de78fcb98b16c98a905230e Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 24 Nov 2025 11:01:51 -0800 Subject: [PATCH 09/22] KVM: arm64: nv: Use pgtable definitions in stage-2 walk Use the existing page table definitions instead of magic numbers for the stage-2 table walk. Reviewed-by: Marc Zyngier Tested-by: Marc Zyngier Link: https://msgid.link/20251124190158.177318-10-oupton@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/nested.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 75d26c0ba3e0..a096766c6ec3 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -282,14 +282,23 @@ static int walk_nested_s2_pgd(struct kvm_vcpu *vcpu, phys_addr_t ipa, return ret; /* Check for valid descriptor at this point */ - if (!(desc & 1) || ((desc & 3) == 1 && level == 3)) { + if (!(desc & KVM_PTE_VALID)) { out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT); out->desc = desc; return 1; } - /* We're at the final level or block translation level */ - if ((desc & 3) == 1 || level == 3) + if (FIELD_GET(KVM_PTE_TYPE, desc) == KVM_PTE_TYPE_BLOCK) { + if (level < 3) + break; + + out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT); + out->desc = desc; + return 1; + } + + /* We're at the final level */ + if (level == 3) break; if (check_output_size(wi, desc)) { @@ -316,7 +325,7 @@ static int walk_nested_s2_pgd(struct kvm_vcpu *vcpu, phys_addr_t ipa, return 1; } - if (!(desc & BIT(10))) { + if (!(desc & KVM_PTE_LEAF_ATTR_LO_S2_AF)) { out->esr = compute_fsc(level, ESR_ELx_FSC_ACCESS); out->desc = desc; return 1; @@ -329,8 +338,8 @@ static int walk_nested_s2_pgd(struct kvm_vcpu *vcpu, phys_addr_t ipa, (ipa & GENMASK_ULL(addr_bottom - 1, 0)); out->output = paddr; out->block_size = 1UL << ((3 - level) * stride + wi->pgshift); - out->readable = desc & (0b01 << 6); - out->writable = desc & (0b10 << 6); + out->readable = desc & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R; + out->writable = desc & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W; out->level = level; out->desc = desc; return 0; From f6927b41d57390c597a126063e2e518911976878 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 24 Nov 2025 11:01:52 -0800 Subject: [PATCH 10/22] KVM: arm64: Add helper for swapping guest descriptor Implementing FEAT_HAFDBS in KVM's software PTWs requires the ability to CAS a descriptor to update the in-memory value. Add an accessor to do exactly that, coping with the fact that guest descriptors are in user memory (duh). While FEAT_LSE required on any system that implements NV, KVM now uses the stage-1 PTW for non-nested use cases meaning an LL/SC implementation is necessary as well. Reviewed-by: Marc Zyngier Tested-by: Marc Zyngier Link: https://msgid.link/20251124190158.177318-11-oupton@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_nested.h | 2 + arch/arm64/kvm/at.c | 87 +++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+) diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 5d967b60414c..6dbc2908aed9 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -403,4 +403,6 @@ void kvm_handle_s1e2_tlbi(struct kvm_vcpu *vcpu, u32 inst, u64 val); (FIX_VNCR - __c); \ }) +int __kvm_at_swap_desc(struct kvm *kvm, gpa_t ipa, u64 old, u64 new); + #endif /* __ARM64_KVM_NESTED_H */ diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index a295a37dd3b1..581c4c49d9cd 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -1650,3 +1650,90 @@ int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level) return ret; } } + +static int __lse_swap_desc(u64 __user *ptep, u64 old, u64 new) +{ + u64 tmp = old; + int ret = 0; + + uaccess_enable_privileged(); + + asm volatile(__LSE_PREAMBLE + "1: cas %[old], %[new], %[addr]\n" + "2:\n" + _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w[ret]) + : [old] "+r" (old), [addr] "+Q" (*ptep), [ret] "+r" (ret) + : [new] "r" (new) + : "memory"); + + uaccess_disable_privileged(); + + if (ret) + return ret; + if (tmp != old) + return -EAGAIN; + + return ret; +} + +static int __llsc_swap_desc(u64 __user *ptep, u64 old, u64 new) +{ + int ret = 1; + u64 tmp; + + uaccess_enable_privileged(); + + asm volatile("prfm pstl1strm, %[addr]\n" + "1: ldxr %[tmp], %[addr]\n" + "sub %[tmp], %[tmp], %[old]\n" + "cbnz %[tmp], 3f\n" + "2: stlxr %w[ret], %[new], %[addr]\n" + "3:\n" + _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %w[ret]) + _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %w[ret]) + : [ret] "+r" (ret), [addr] "+Q" (*ptep), [tmp] "=&r" (tmp) + : [old] "r" (old), [new] "r" (new) + : "memory"); + + uaccess_disable_privileged(); + + /* STLXR didn't update the descriptor, or the compare failed */ + if (ret == 1) + return -EAGAIN; + + return ret; +} + +int __kvm_at_swap_desc(struct kvm *kvm, gpa_t ipa, u64 old, u64 new) +{ + struct kvm_memory_slot *slot; + unsigned long hva; + u64 __user *ptep; + bool writable; + int offset; + gfn_t gfn; + int r; + + lockdep_assert(srcu_read_lock_held(&kvm->srcu)); + + gfn = ipa >> PAGE_SHIFT; + offset = offset_in_page(ipa); + slot = gfn_to_memslot(kvm, gfn); + hva = gfn_to_hva_memslot_prot(slot, gfn, &writable); + if (kvm_is_error_hva(hva)) + return -EINVAL; + if (!writable) + return -EPERM; + + ptep = (u64 __user *)hva + offset; + if (cpus_have_final_cap(ARM64_HAS_LSE_ATOMICS)) + r = __lse_swap_desc(ptep, old, new); + else + r = __llsc_swap_desc(ptep, old, new); + + if (r < 0) + return r; + + mark_page_dirty_in_slot(kvm, slot, gfn); + return 0; +} From 92c6443222ca4289191d797ac79176c560886998 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 24 Nov 2025 11:01:53 -0800 Subject: [PATCH 11/22] KVM: arm64: Propagate PTW errors up to AT emulation KVM's software PTW will soon support 'hardware' updates to the access flag. Similar to fault handling, races to update the descriptor will be handled by restarting the instruction. Prepare for this by propagating errors up to the AT emulation, only retiring the instruction if the walk succeeds. Reviewed-by: Marc Zyngier Tested-by: Marc Zyngier Link: https://msgid.link/20251124190158.177318-12-oupton@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_asm.h | 6 ++--- arch/arm64/kvm/at.c | 43 ++++++++++++++++++++++---------- arch/arm64/kvm/sys_regs.c | 9 ++++--- 3 files changed, 39 insertions(+), 19 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 9da54d4ee49e..090f7b740bdc 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -246,9 +246,9 @@ extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu); extern int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding); extern void __kvm_timer_set_cntvoff(u64 cntvoff); -extern void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr); -extern void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr); -extern void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr); +extern int __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr); +extern int __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr); +extern int __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 581c4c49d9cd..2a99380ada6f 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -1234,7 +1234,7 @@ static void compute_s1_permissions(struct kvm_vcpu *vcpu, wr->pr &= !pan; } -static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) +static int handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr, u64 *par) { struct s1_walk_result wr = {}; struct s1_walk_info wi = {}; @@ -1259,6 +1259,11 @@ static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) srcu_read_unlock(&vcpu->kvm->srcu, idx); + /* + * Race to update a descriptor -- restart the walk. + */ + if (ret == -EAGAIN) + return ret; if (ret) goto compute_par; @@ -1292,7 +1297,8 @@ static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false); compute_par: - return compute_par_s1(vcpu, &wi, &wr); + *par = compute_par_s1(vcpu, &wi, &wr); + return 0; } /* @@ -1420,9 +1426,10 @@ static bool par_check_s1_access_fault(u64 par) !(par & SYS_PAR_EL1_S)); } -void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) +int __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) { u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr); + int ret; /* * If PAR_EL1 reports that AT failed on a S1 permission or access @@ -1434,15 +1441,20 @@ void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) */ if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par) && - !par_check_s1_access_fault(par)) - par = handle_at_slow(vcpu, op, vaddr); + !par_check_s1_access_fault(par)) { + ret = handle_at_slow(vcpu, op, vaddr, &par); + if (ret) + return ret; + } vcpu_write_sys_reg(vcpu, par, PAR_EL1); + return 0; } -void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) +int __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) { u64 par; + int ret; /* * We've trapped, so everything is live on the CPU. As we will be @@ -1489,13 +1501,17 @@ void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) } /* We failed the translation, let's replay it in slow motion */ - if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par)) - par = handle_at_slow(vcpu, op, vaddr); + if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par)) { + ret = handle_at_slow(vcpu, op, vaddr, &par); + if (ret) + return ret; + } vcpu_write_sys_reg(vcpu, par, PAR_EL1); + return 0; } -void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) +int __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) { struct kvm_s2_trans out = {}; u64 ipa, par; @@ -1522,13 +1538,13 @@ void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) break; default: WARN_ON_ONCE(1); - return; + return 0; } __kvm_at_s1e01(vcpu, op, vaddr); par = vcpu_read_sys_reg(vcpu, PAR_EL1); if (par & SYS_PAR_EL1_F) - return; + return 0; /* * If we only have a single stage of translation (EL2&0), exit @@ -1536,14 +1552,14 @@ void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) */ if (compute_translation_regime(vcpu, op) == TR_EL20 || !(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC))) - return; + return 0; /* Do the stage-2 translation */ ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0)); out.esr = 0; ret = kvm_walk_nested_s2(vcpu, ipa, &out); if (ret < 0) - return; + return ret; /* Check the access permission */ if (!out.esr && @@ -1552,6 +1568,7 @@ void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) par = compute_par_s12(vcpu, par, &out); vcpu_write_sys_reg(vcpu, par, PAR_EL1); + return 0; } /* diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index e67eb39ddc11..61830eb3607c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -3767,7 +3767,8 @@ static bool handle_at_s1e01(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u32 op = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2); - __kvm_at_s1e01(vcpu, op, p->regval); + if (__kvm_at_s1e01(vcpu, op, p->regval)) + return false; return true; } @@ -3784,7 +3785,8 @@ static bool handle_at_s1e2(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return false; } - __kvm_at_s1e2(vcpu, op, p->regval); + if (__kvm_at_s1e2(vcpu, op, p->regval)) + return false; return true; } @@ -3794,7 +3796,8 @@ static bool handle_at_s12(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u32 op = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2); - __kvm_at_s12(vcpu, op, p->regval); + if (__kvm_at_s12(vcpu, op, p->regval)) + return false; return true; } From bff8aa213dee742b09151a34494418050afed948 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 24 Nov 2025 11:01:54 -0800 Subject: [PATCH 12/22] KVM: arm64: Implement HW access flag management in stage-1 SW PTW Atomically update the Access flag at stage-1 when the guest has configured the MMU to do so. Make the implementation choice (and liberal interpretation of speculation) that any access type updates the Access flag, including AT and CMO instructions. Restart the entire walk by returning to the exception-generating instruction in the case of a failed Access flag update. Reviewed-by: Marc Zyngier Tested-by: Marc Zyngier Link: https://msgid.link/20251124190158.177318-13-oupton@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_nested.h | 1 + arch/arm64/kvm/at.c | 33 +++++++++++++++++++++++++++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 6dbc2908aed9..905c658057a4 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -353,6 +353,7 @@ struct s1_walk_info { bool be; bool s2; bool pa52bit; + bool ha; }; struct s1_walk_result { diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 2a99380ada6f..e39f814d247f 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -346,6 +346,8 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x); + wi->ha = tcr & TCR_HA; + return 0; addrsz: @@ -380,10 +382,24 @@ static int kvm_read_s1_desc(struct kvm_vcpu *vcpu, u64 pa, u64 *desc, return 0; } +static int kvm_swap_s1_desc(struct kvm_vcpu *vcpu, u64 pa, u64 old, u64 new, + struct s1_walk_info *wi) +{ + if (wi->be) { + old = cpu_to_be64(old); + new = cpu_to_be64(new); + } else { + old = cpu_to_le64(old); + new = cpu_to_le64(new); + } + + return __kvm_at_swap_desc(vcpu->kvm, pa, old, new); +} + static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, struct s1_walk_result *wr, u64 va) { - u64 va_top, va_bottom, baddr, desc; + u64 va_top, va_bottom, baddr, desc, new_desc, ipa; int level, stride, ret; level = wi->sl; @@ -393,7 +409,7 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, va_top = get_ia_size(wi) - 1; while (1) { - u64 index, ipa; + u64 index; va_bottom = (3 - level) * stride + wi->pgshift; index = (va & GENMASK_ULL(va_top, va_bottom)) >> (va_bottom - 3); @@ -438,6 +454,8 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, return ret; } + new_desc = desc; + /* Invalid descriptor */ if (!(desc & BIT(0))) goto transfault; @@ -490,6 +508,17 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, if (check_output_size(baddr & GENMASK(52, va_bottom), wi)) goto addrsz; + if (wi->ha) + new_desc |= PTE_AF; + + if (new_desc != desc) { + ret = kvm_swap_s1_desc(vcpu, ipa, desc, new_desc, wi); + if (ret) + return ret; + + desc = new_desc; + } + if (!(desc & PTE_AF)) { fail_s1_walk(wr, ESR_ELx_FSC_ACCESS_L(level), false); return -EACCES; From e4c7dfac2f1ab848ffd356b3e76827ea404bbd94 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 24 Nov 2025 11:01:55 -0800 Subject: [PATCH 13/22] KVM: arm64: nv: Implement HW access flag management in stage-2 SW PTW Give the stage-2 walk similar treatment to stage-1: update the access flag during the table walk and do so for any walk context. Reviewed-by: Marc Zyngier Tested-by: Marc Zyngier Link: https://msgid.link/20251124190158.177318-14-oupton@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/mmu.c | 5 +++++ arch/arm64/kvm/nested.c | 44 ++++++++++++++++++++++++++++++++++------- 2 files changed, 42 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 96f1786c72fe..b9aebca90f59 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -2012,6 +2012,11 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) u32 esr; ret = kvm_walk_nested_s2(vcpu, fault_ipa, &nested_trans); + if (ret == -EAGAIN) { + ret = 1; + goto out_unlock; + } + if (ret) { esr = kvm_s2_trans_esr(&nested_trans); kvm_inject_s2_fault(vcpu, esr); diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index a096766c6ec3..6495442f400a 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -124,12 +124,13 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu) } struct s2_walk_info { - u64 baddr; - unsigned int max_oa_bits; - unsigned int pgshift; - unsigned int sl; - unsigned int t0sz; - bool be; + u64 baddr; + unsigned int max_oa_bits; + unsigned int pgshift; + unsigned int sl; + unsigned int t0sz; + bool be; + bool ha; }; static u32 compute_fsc(int level, u32 fsc) @@ -219,6 +220,20 @@ static int read_guest_s2_desc(struct kvm_vcpu *vcpu, phys_addr_t pa, u64 *desc, return 0; } +static int swap_guest_s2_desc(struct kvm_vcpu *vcpu, phys_addr_t pa, u64 old, u64 new, + struct s2_walk_info *wi) +{ + if (wi->be) { + old = cpu_to_be64(old); + new = cpu_to_be64(new); + } else { + old = cpu_to_le64(old); + new = cpu_to_le64(new); + } + + return __kvm_at_swap_desc(vcpu->kvm, pa, old, new); +} + /* * This is essentially a C-version of the pseudo code from the ARM ARM * AArch64.TranslationTableWalk function. I strongly recommend looking at @@ -232,7 +247,7 @@ static int walk_nested_s2_pgd(struct kvm_vcpu *vcpu, phys_addr_t ipa, int first_block_level, level, stride, input_size, base_lower_bound; phys_addr_t base_addr; unsigned int addr_top, addr_bottom; - u64 desc; /* page table entry */ + u64 desc, new_desc; /* page table entry */ int ret; phys_addr_t paddr; @@ -281,6 +296,8 @@ static int walk_nested_s2_pgd(struct kvm_vcpu *vcpu, phys_addr_t ipa, if (ret < 0) return ret; + new_desc = desc; + /* Check for valid descriptor at this point */ if (!(desc & KVM_PTE_VALID)) { out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT); @@ -325,6 +342,17 @@ static int walk_nested_s2_pgd(struct kvm_vcpu *vcpu, phys_addr_t ipa, return 1; } + if (wi->ha) + new_desc |= KVM_PTE_LEAF_ATTR_LO_S2_AF; + + if (new_desc != desc) { + ret = swap_guest_s2_desc(vcpu, paddr, desc, new_desc, wi); + if (ret) + return ret; + + desc = new_desc; + } + if (!(desc & KVM_PTE_LEAF_ATTR_LO_S2_AF)) { out->esr = compute_fsc(level, ESR_ELx_FSC_ACCESS); out->desc = desc; @@ -363,6 +391,8 @@ static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi) /* Global limit for now, should eventually be per-VM */ wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr), false)); + + wi->ha = vtcr & VTCR_EL2_HA; } int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa, From d5bbb76f447420681abdcfa4ad32344d11188d00 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 24 Nov 2025 11:01:56 -0800 Subject: [PATCH 14/22] KVM: arm64: nv: Expose hardware access flag management to NV guests Everything is in place to update the access flag at S1 and S2. Expose support for the access flag flavor of FEAT_HAFDBS to NV guests. Reviewed-by: Marc Zyngier Tested-by: Marc Zyngier Link: https://msgid.link/20251124190158.177318-15-oupton@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/nested.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 6495442f400a..88d7dfb44410 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1599,11 +1599,13 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) case SYS_ID_AA64MMFR1_EL1: val &= ~(ID_AA64MMFR1_EL1_CMOW | ID_AA64MMFR1_EL1_nTLBPA | - ID_AA64MMFR1_EL1_ETS | - ID_AA64MMFR1_EL1_HAFDBS); + ID_AA64MMFR1_EL1_ETS); + /* FEAT_E2H0 implies no VHE */ if (test_bit(KVM_ARM_VCPU_HAS_EL2_E2H0, kvm->arch.vcpu_features)) val &= ~ID_AA64MMFR1_EL1_VH; + + val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64MMFR1_EL1, HAFDBS, AF); break; case SYS_ID_AA64MMFR2_EL1: From 66f188858385d640163fbf866d9c11b7741da91a Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 24 Nov 2025 11:01:57 -0800 Subject: [PATCH 15/22] KVM: arm64: selftests: Add test for AT emulation Add a basic test for AT emulation in the EL2&0 and EL1&0 translation regimes. Reviewed-by: Marc Zyngier Tested-by: Marc Zyngier Link: https://msgid.link/20251124190158.177318-16-oupton@kernel.org Signed-off-by: Oliver Upton --- tools/testing/selftests/kvm/Makefile.kvm | 1 + tools/testing/selftests/kvm/arm64/at.c | 166 ++++++++++++++++++ .../testing/selftests/kvm/include/kvm_util.h | 1 + tools/testing/selftests/kvm/lib/kvm_util.c | 10 ++ 4 files changed, 178 insertions(+) create mode 100644 tools/testing/selftests/kvm/arm64/at.c diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index 148d427ff24b..81b3aa54678a 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -156,6 +156,7 @@ TEST_GEN_PROGS_EXTENDED_x86 += x86/nx_huge_pages_test TEST_GEN_PROGS_arm64 = $(TEST_GEN_PROGS_COMMON) TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases +TEST_GEN_PROGS_arm64 += arm64/at TEST_GEN_PROGS_arm64 += arm64/debug-exceptions TEST_GEN_PROGS_arm64 += arm64/hello_el2 TEST_GEN_PROGS_arm64 += arm64/host_sve diff --git a/tools/testing/selftests/kvm/arm64/at.c b/tools/testing/selftests/kvm/arm64/at.c new file mode 100644 index 000000000000..acecb6ab5071 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/at.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * at - Test for KVM's AT emulation in the EL2&0 and EL1&0 translation regimes. + */ +#include "kvm_util.h" +#include "processor.h" +#include "test_util.h" +#include "ucall.h" + +#include + +#define TEST_ADDR 0x80000000 + +enum { + CLEAR_ACCESS_FLAG, + TEST_ACCESS_FLAG, +}; + +static u64 *ptep_hva; + +#define copy_el2_to_el1(reg) \ + write_sysreg_s(read_sysreg_s(SYS_##reg##_EL1), SYS_##reg##_EL12) + +/* Yes, this is an ugly hack */ +#define __at(op, addr) write_sysreg_s(addr, op) + +#define test_at_insn(op, expect_fault) \ +do { \ + u64 par, fsc; \ + bool fault; \ + \ + GUEST_SYNC(CLEAR_ACCESS_FLAG); \ + \ + __at(OP_AT_##op, TEST_ADDR); \ + isb(); \ + par = read_sysreg(par_el1); \ + \ + fault = par & SYS_PAR_EL1_F; \ + fsc = FIELD_GET(SYS_PAR_EL1_FST, par); \ + \ + __GUEST_ASSERT((expect_fault) == fault, \ + "AT "#op": %sexpected fault (par: %lx)1", \ + (expect_fault) ? "" : "un", par); \ + if ((expect_fault)) { \ + __GUEST_ASSERT(fsc == ESR_ELx_FSC_ACCESS_L(3), \ + "AT "#op": expected access flag fault (par: %lx)", \ + par); \ + } else { \ + GUEST_ASSERT_EQ(FIELD_GET(SYS_PAR_EL1_ATTR, par), MAIR_ATTR_NORMAL); \ + GUEST_ASSERT_EQ(FIELD_GET(SYS_PAR_EL1_SH, par), PTE_SHARED >> 8); \ + GUEST_ASSERT_EQ(par & SYS_PAR_EL1_PA, TEST_ADDR); \ + GUEST_SYNC(TEST_ACCESS_FLAG); \ + } \ +} while (0) + +static void test_at(bool expect_fault) +{ + test_at_insn(S1E2R, expect_fault); + test_at_insn(S1E2W, expect_fault); + + /* Reuse the stage-1 MMU context from EL2 at EL1 */ + copy_el2_to_el1(SCTLR); + copy_el2_to_el1(MAIR); + copy_el2_to_el1(TCR); + copy_el2_to_el1(TTBR0); + copy_el2_to_el1(TTBR1); + + /* Disable stage-2 translation and enter a non-host context */ + write_sysreg(0, vtcr_el2); + write_sysreg(0, vttbr_el2); + sysreg_clear_set(hcr_el2, HCR_EL2_TGE | HCR_EL2_VM, 0); + isb(); + + test_at_insn(S1E1R, expect_fault); + test_at_insn(S1E1W, expect_fault); +} + +static void guest_code(void) +{ + sysreg_clear_set(tcr_el1, TCR_HA, 0); + isb(); + + test_at(true); + + if (!SYS_FIELD_GET(ID_AA64MMFR1_EL1, HAFDBS, read_sysreg(id_aa64mmfr1_el1))) + GUEST_DONE(); + + /* + * KVM's software PTW makes the implementation choice that the AT + * instruction sets the access flag. + */ + sysreg_clear_set(tcr_el1, 0, TCR_HA); + isb(); + test_at(false); + + GUEST_DONE(); +} + +static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc) +{ + switch (uc->args[1]) { + case CLEAR_ACCESS_FLAG: + /* + * Delete + reinstall the memslot to invalidate stage-2 + * mappings of the stage-1 page tables, forcing KVM to + * use the 'slow' AT emulation path. + * + * This and clearing the access flag from host userspace + * ensures that the access flag cannot be set speculatively + * and is reliably cleared at the time of the AT instruction. + */ + clear_bit(__ffs(PTE_AF), ptep_hva); + vm_mem_region_reload(vcpu->vm, vcpu->vm->memslots[MEM_REGION_PT]); + break; + case TEST_ACCESS_FLAG: + TEST_ASSERT(test_bit(__ffs(PTE_AF), ptep_hva), + "Expected access flag to be set (desc: %lu)", *ptep_hva); + break; + default: + TEST_FAIL("Unexpected SYNC arg: %lu", uc->args[1]); + } +} + +static void run_test(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + while (true) { + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_DONE: + return; + case UCALL_SYNC: + handle_sync(vcpu, &uc); + continue; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + return; + default: + TEST_FAIL("Unexpeced ucall: %lu", uc.cmd); + } + } +} + +int main(void) +{ + struct kvm_vcpu_init init; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + TEST_REQUIRE(kvm_check_cap(KVM_CAP_ARM_EL2)); + + vm = vm_create(1); + + kvm_get_default_vcpu_target(vm, &init); + init.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2); + vcpu = aarch64_vcpu_add(vm, 0, &init, guest_code); + kvm_arch_vm_finalize_vcpus(vm); + + virt_map(vm, TEST_ADDR, TEST_ADDR, 1); + ptep_hva = virt_get_pte_hva_at_level(vm, TEST_ADDR, 3); + run_test(vcpu); + + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index d3f3e455c031..41467dad9178 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -715,6 +715,7 @@ static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm) #endif void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); +void vm_mem_region_reload(struct kvm_vm *vm, uint32_t slot); void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot); struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 1a93d6361671..d6538bb17740 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1201,6 +1201,16 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) ret, errno, slot, flags); } +void vm_mem_region_reload(struct kvm_vm *vm, uint32_t slot) +{ + struct userspace_mem_region *region = memslot2region(vm, slot); + struct kvm_userspace_memory_region2 tmp = region->region; + + tmp.memory_size = 0; + vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &tmp); + vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); +} + /* * VM Memory Region Move * From 36fe022f884bb936d911a0d2e93819aba11daceb Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 24 Nov 2025 15:54:09 -0800 Subject: [PATCH 16/22] KVM: arm64: Fix compilation when CONFIG_ARM64_USE_LSE_ATOMICS=n __lse_swap_desc() is compiled unconditionally, even if LSE is disabled using the config option. Align with the spirit of the config option and fix some build errors due to __LSE_PREAMBLE being undefined with the application of some ifdeffery. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202511250700.kAutzJFm-lkp@intel.com/ Link: https://msgid.link/20251124235409.1731253-1-oupton@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/at.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index e39f814d247f..f774a02d9393 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -1697,6 +1697,7 @@ int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level) } } +#ifdef CONFIG_ARM64_LSE_ATOMICS static int __lse_swap_desc(u64 __user *ptep, u64 old, u64 new) { u64 tmp = old; @@ -1721,6 +1722,12 @@ static int __lse_swap_desc(u64 __user *ptep, u64 old, u64 new) return ret; } +#else +static int __lse_swap_desc(u64 __user *ptep, u64 old, u64 new) +{ + return -EINVAL; +} +#endif static int __llsc_swap_desc(u64 __user *ptep, u64 old, u64 new) { From b0fc8329ec98ea891a5e47821db6aee1d564bff6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 25 Nov 2025 20:48:48 +0000 Subject: [PATCH 17/22] KVM: arm64: Add endian casting to kvm_swap_s[12]_desc() Keep sparse quiet by explicitly casting endianness conversion when swapping S1 and S2 descriptors. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202511260246.JQDGsQKa-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202511260344.9XehvH5Q-lkp@intel.com/ Fixes: c59ca4b5b0c3f ("KVM: arm64: Implement HW access flag management in stage-1 SW PTW") Fixes: 39db933ba67f8 ("KVM: arm64: nv: Implement HW access flag management in stage-2 SW PTW") Signed-off-by: Marc Zyngier Link: https://msgid.link/20251125204848.1136383-1-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/at.c | 8 ++++---- arch/arm64/kvm/nested.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index f774a02d9393..d25fef0f66e2 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -386,11 +386,11 @@ static int kvm_swap_s1_desc(struct kvm_vcpu *vcpu, u64 pa, u64 old, u64 new, struct s1_walk_info *wi) { if (wi->be) { - old = cpu_to_be64(old); - new = cpu_to_be64(new); + old = (__force u64)cpu_to_be64(old); + new = (__force u64)cpu_to_be64(new); } else { - old = cpu_to_le64(old); - new = cpu_to_le64(new); + old = (__force u64)cpu_to_le64(old); + new = (__force u64)cpu_to_le64(new); } return __kvm_at_swap_desc(vcpu->kvm, pa, old, new); diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 88d7dfb44410..911fc99ed99d 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -224,11 +224,11 @@ static int swap_guest_s2_desc(struct kvm_vcpu *vcpu, phys_addr_t pa, u64 old, u6 struct s2_walk_info *wi) { if (wi->be) { - old = cpu_to_be64(old); - new = cpu_to_be64(new); + old = (__force u64)cpu_to_be64(old); + new = (__force u64)cpu_to_be64(new); } else { - old = cpu_to_le64(old); - new = cpu_to_le64(new); + old = (__force u64)cpu_to_le64(old); + new = (__force u64)cpu_to_le64(new); } return __kvm_at_swap_desc(vcpu->kvm, pa, old, new); From d98a04dc190672bbd29654a159f7621e0c63adcf Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 25 Nov 2025 10:59:15 -0700 Subject: [PATCH 18/22] KVM: arm64: Add break to default case in kvm_pgtable_stage2_pte_prot() Clang warns (or errors with CONFIG_WERROR=y / W=e): arch/arm64/kvm/hyp/pgtable.c:757:2: error: label at end of compound statement is a C23 extension [-Werror,-Wc23-extensions] 757 | } | ^ With older versions of clang (15 and older) and GCC (at least the minimum supported, 8.1), this is an unconditional hard error: arch/arm64/kvm/hyp/pgtable.c: In function 'kvm_pgtable_stage2_pte_prot': arch/arm64/kvm/hyp/pgtable.c:756:2: error: label at end of compound statement default: ^~~~~~~ arch/arm64/kvm/hyp/pgtable.c:756:10: error: label at end of compound statement: expected statement default: ^ ; Add a break statement to this default case to clear up the error/warning. Fixes: 2608563b466b ("KVM: arm64: Add support for FEAT_XNX stage-2 permissions") Signed-off-by: Nathan Chancellor Acked-by: Marc Zyngier Link: https://msgid.link/20251125-arm64-kvm-hyp-pgtable-fix-c23-ext-warn-v1-1-98b506ddefbf@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/pgtable.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index e1d75f965027..5a1cf42df4c5 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -754,6 +754,7 @@ enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte) prot |= KVM_PGTABLE_PROT_PX; break; default: + break; } return prot; From 05474b7bc75d215a147b44b339ba4e9638b74382 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 28 Nov 2025 17:51:24 +0000 Subject: [PATCH 19/22] KVM: arm64: Fix spelling mistake "Unexpeced" -> "Unexpected" There is a spelling mistake in a TEST_FAIL message. Fix it. Signed-off-by: Colin Ian King Link: https://msgid.link/20251128175124.319094-1-colin.i.king@gmail.com Signed-off-by: Oliver Upton --- tools/testing/selftests/kvm/arm64/at.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/arm64/at.c b/tools/testing/selftests/kvm/arm64/at.c index acecb6ab5071..c8ee6f520734 100644 --- a/tools/testing/selftests/kvm/arm64/at.c +++ b/tools/testing/selftests/kvm/arm64/at.c @@ -137,7 +137,7 @@ static void run_test(struct kvm_vcpu *vcpu) REPORT_GUEST_ASSERT(uc); return; default: - TEST_FAIL("Unexpeced ucall: %lu", uc.cmd); + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); } } } From 93e8d997812b315bbec946e874171a8b7d785eaf Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Fri, 28 Nov 2025 10:09:43 +0000 Subject: [PATCH 20/22] KVM: arm64: Document KVM_PGTABLE_PROT_{UX,PX} Commit 2608563b466b ("KVM: arm64: Add support for FEAT_XNX stage-2 permissions") added the KVM_PGTABLE_PROX_{UX,PX} permissions to stage 2 and to EL2 translation regimes, but left them undocumented. Let's fix that. Signed-off-by: Alexandru Elisei Link: https://msgid.link/20251128100946.74210-2-alexandru.elisei@arm.com Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_pgtable.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index c72149a607d6..611e62331763 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -240,7 +240,9 @@ enum kvm_pgtable_stage2_flags { /** * enum kvm_pgtable_prot - Page-table permissions and attributes. - * @KVM_PGTABLE_PROT_X: Execute permission. + * @KVM_PGTABLE_PROT_UX: Unprivileged execute permission. + * @KVM_PGTABLE_PROT_PX: Privileged execute permission. + * @KVM_PGTABLE_PROT_X: Privileged and unprivileged execute permission. * @KVM_PGTABLE_PROT_W: Write permission. * @KVM_PGTABLE_PROT_R: Read permission. * @KVM_PGTABLE_PROT_DEVICE: Device attributes. From e88d60c0aa0abbaade177b84f54a868c67231cd7 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Fri, 28 Nov 2025 10:09:44 +0000 Subject: [PATCH 21/22] KVM: arm64: at: Use correct HA bit in TCR_EL2 when regime is EL2 According to ARM DDI 0487L.b, the HA bit in TCR_EL2 when the translation regime is EL2 (or !ELIsInHost(EL2)) is bit 21, not 39. Fixes: c59ca4b5b0c3 ("KVM: arm64: Implement HW access flag management in stage-1 SW PTW") Signed-off-by: Alexandru Elisei Link: https://msgid.link/20251128100946.74210-3-alexandru.elisei@arm.com Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_arm.h | 1 + arch/arm64/kvm/at.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 1da290aeedce..e500600e4b9b 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -111,6 +111,7 @@ #define TCR_EL2_DS (1UL << 32) #define TCR_EL2_RES1 ((1U << 31) | (1 << 23)) #define TCR_EL2_HPD (1 << 24) +#define TCR_EL2_HA (1 << 21) #define TCR_EL2_TBI (1 << 20) #define TCR_EL2_PS_SHIFT 16 #define TCR_EL2_PS_MASK (7 << TCR_EL2_PS_SHIFT) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index d25fef0f66e2..6d41a95f6c60 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -346,7 +346,9 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x); - wi->ha = tcr & TCR_HA; + wi->ha = (wi->regime == TR_EL2 ? + FIELD_GET(TCR_EL2_HA, tcr) : + FIELD_GET(TCR_HA, tcr)); return 0; From d52aca1635654805a682afef176473793a63b588 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Fri, 28 Nov 2025 10:09:46 +0000 Subject: [PATCH 22/22] KVM: arm64: at: Update AF on software walk only if VM has FEAT_HAFDBS A guest can write 1 to TCR_ELx.HA, making the KVM software walker update the access flag in a table descriptor even if FEAT_HAFDBS is not present. Avoid this by making wi->ha depend on FEAT_HAFDBS being enabled in the VM, similar to how the software walker treats FEAT_HPDS. This is not needed for VTCR_EL2.HA, since a guest will always write to the in-memory copy of the register, where the HA bit is masked (set to 0) by KVM if the VM doesn't have FEAT_HAFDBS. Fixes: c59ca4b5b0c3 ("KVM: arm64: Implement HW access flag management in stage-1 SW PTW") Signed-off-by: Alexandru Elisei Link: https://msgid.link/20251128100946.74210-5-alexandru.elisei@arm.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/at.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 6d41a95f6c60..53bf70126f81 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -346,7 +346,8 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x); - wi->ha = (wi->regime == TR_EL2 ? + wi->ha = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HAFDBS, AF); + wi->ha &= (wi->regime == TR_EL2 ? FIELD_GET(TCR_EL2_HA, tcr) : FIELD_GET(TCR_HA, tcr));