KVM: arm64: gic-v5: Enlighten arch timer for GICv5

Now that GICv5 has arrived, the arch timer requires some TLC to address some of the key differences introduced with GICv5. For PPIs on GICv5, the queue_irq_unlock irq_op is used as AP lists are not required at all for GICv5. The arch timer also introduces an irq_op - get_input_level. Extend the arch-timer-provided irq_ops to include the PPI op for vgic_v5 guests. When possible, DVI (Direct Virtual Interrupt) is set for PPIs when using a vgic_v5, which directly inject the pending state into the guest. This means that the host never sees the interrupt for the guest for these interrupts. This has three impacts. * First of all, the kvm_cpu_has_pending_timer check is updated to explicitly check if the timers are expected to fire. * Secondly, for mapped timers (which use DVI) they must be masked on the host prior to entering a GICv5 guest, and unmasked on the return path. This is handled in set_timer_irq_phys_masked. * Thirdly, it makes zero sense to attempt to inject state for a DVI'd interrupt. Track which timers are direct, and skip the call to kvm_vgic_inject_irq() for these. The final, but rather important, change is that the architected PPIs for the timers are made mandatory for a GICv5 guest. Attempts to set them to anything else are actively rejected. Once a vgic_v5 is initialised, the arch timer PPIs are also explicitly reinitialised to ensure the correct GICv5-compatible PPIs are used - this also adds in the GICv5 PPI type to the intid. Signed-off-by: Sascha Bischoff <sascha.bischoff@arm.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Link: https://patch.msgid.link/20260319154937.3619520-32-sascha.bischoff@arm.com Signed-off-by: Marc Zyngier <maz@kernel.org>
2026-04-18 06:44:00 -04:00 · 2026-03-19 15:57:45 +00:00
parent 91d940cd67
commit 9491c63b6c
5 changed files with 94 additions and 22 deletions
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -56,6 +56,12 @@ static struct irq_ops arch_timer_irq_ops = {
 	.get_input_level = kvm_arch_timer_get_input_level,
 };

+static struct irq_ops arch_timer_irq_ops_vgic_v5 = {
+	.get_input_level = kvm_arch_timer_get_input_level,
+	.queue_irq_unlock = vgic_v5_ppi_queue_irq_unlock,
+	.set_direct_injection = vgic_v5_set_ppi_dvi,
+};
+
 static int nr_timers(struct kvm_vcpu *vcpu)
 {
 	if (!vcpu_has_nv(vcpu))
@@ -177,6 +183,10 @@ void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
 		map->emul_ptimer = vcpu_ptimer(vcpu);
 	}

+	map->direct_vtimer->direct = true;
+	if (map->direct_ptimer)
+		map->direct_ptimer->direct = true;
+
 	trace_kvm_get_timer_map(vcpu->vcpu_id, map);
 }

@@ -396,7 +406,11 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)

 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 {
-	return vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == 0;
+	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+
+	return kvm_timer_should_fire(vtimer) || kvm_timer_should_fire(ptimer) ||
+	       (vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == 0);
 }

 /*
@@ -447,6 +461,10 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
 	if (userspace_irqchip(vcpu->kvm))
 		return;

+	/* Skip injecting on GICv5 for directly injected (DVI'd) timers */
+	if (vgic_is_v5(vcpu->kvm) && timer_ctx->direct)
+		return;
+
 	kvm_vgic_inject_irq(vcpu->kvm, vcpu,
 			    timer_irq(timer_ctx),
 			    timer_ctx->irq.level,
@@ -674,6 +692,7 @@ static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx)
 		phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx));

 	phys_active |= ctx->irq.level;
+	phys_active |= vgic_is_v5(vcpu->kvm);

 	set_timer_irq_phys_active(ctx, phys_active);
 }
@@ -862,7 +881,8 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
 	get_timer_map(vcpu, &map);

 	if (static_branch_likely(&has_gic_active_state)) {
-		if (vcpu_has_nv(vcpu))
+		/* We don't do NV on GICv5, yet */
+		if (vcpu_has_nv(vcpu) && !vgic_is_v5(vcpu->kvm))
 			kvm_timer_vcpu_load_nested_switch(vcpu, &map);

 		kvm_timer_vcpu_load_gic(map.direct_vtimer);
@@ -932,6 +952,12 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)

 	if (kvm_vcpu_is_blocking(vcpu))
 		kvm_timer_blocking(vcpu);
+
+	if (vgic_is_v5(vcpu->kvm)) {
+		set_timer_irq_phys_active(map.direct_vtimer, false);
+		if (map.direct_ptimer)
+			set_timer_irq_phys_active(map.direct_ptimer, false);
+	}
 }

 void kvm_timer_sync_nested(struct kvm_vcpu *vcpu)
@@ -1095,10 +1121,19 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
 		      HRTIMER_MODE_ABS_HARD);
 }

+/*
+ * This is always called during kvm_arch_init_vm, but will also be
+ * called from kvm_vgic_create if we have a vGICv5.
+ */
 void kvm_timer_init_vm(struct kvm *kvm)
 {
+	/*
+	 * Set up the default PPIs - note that we adjust them based on
+	 * the model of the GIC as GICv5 uses a different way to
+	 * describing interrupts.
+	 */
 	for (int i = 0; i < NR_KVM_TIMERS; i++)
-		kvm->arch.timer_data.ppi[i] = default_ppi[i];
+		kvm->arch.timer_data.ppi[i] = get_vgic_ppi(kvm, default_ppi[i]);
 }

 void kvm_timer_cpu_up(void)
@@ -1267,7 +1302,15 @@ static int timer_irq_set_irqchip_state(struct irq_data *d,

 static void timer_irq_eoi(struct irq_data *d)
 {
-	if (!irqd_is_forwarded_to_vcpu(d))
+	/*
+	 * On a GICv5 host, we still need to call EOI on the parent for
+	 * PPIs. The host driver already handles irqs which are forwarded to
+	 * vcpus, and skips the GIC CDDI while still doing the GIC CDEOI. This
+	 * is required to emulate the EOIMode=1 on GICv5 hardware. Failure to
+	 * call EOI unsurprisingly results in *BAD* lock-ups.
+	 */
+	if (!irqd_is_forwarded_to_vcpu(d) ||
+	    kvm_vgic_global_state.type == VGIC_V5)
 		irq_chip_eoi_parent(d);
 }

@@ -1331,7 +1374,8 @@ static int kvm_irq_init(struct arch_timer_kvm_info *info)
 	host_vtimer_irq = info->virtual_irq;
 	kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags);

-	if (kvm_vgic_global_state.no_hw_deactivation) {
+	if (kvm_vgic_global_state.no_hw_deactivation ||
+	    kvm_vgic_global_state.type == VGIC_V5) {
 		struct fwnode_handle *fwnode;
 		struct irq_data *data;

@@ -1349,7 +1393,8 @@ static int kvm_irq_init(struct arch_timer_kvm_info *info)
 			return -ENOMEM;
 		}

-		arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE;
+		if (kvm_vgic_global_state.no_hw_deactivation)
+			arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE;
 		WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq,
 					    (void *)TIMER_VTIMER));
 	}
@@ -1500,10 +1545,13 @@ static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
 			break;

 		/*
-		 * We know by construction that we only have PPIs, so
-		 * all values are less than 32.
+		 * We know by construction that we only have PPIs, so all values
+		 * are less than 32 for non-GICv5 VGICs. On GICv5, they are
+		 * architecturally defined to be under 32 too. However, we mask
+		 * off most of the bits as we might be presented with a GICv5
+		 * style PPI where the type is encoded in the top-bits.
 		 */
-		ppis |= BIT(irq);
+		ppis |= BIT(irq & 0x1f);
 	}

 	valid = hweight32(ppis) == nr_timers(vcpu);
@@ -1562,7 +1610,8 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)

 	get_timer_map(vcpu, &map);

-	ops = &arch_timer_irq_ops;
+	ops = vgic_is_v5(vcpu->kvm) ? &arch_timer_irq_ops_vgic_v5 :
+				      &arch_timer_irq_ops;

 	for (int i = 0; i < nr_timers(vcpu); i++)
 		kvm_vgic_set_irq_ops(vcpu, timer_irq(vcpu_get_timer(vcpu, i)), ops);
@@ -1606,12 +1655,11 @@ int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
 	if (!(irq_is_ppi(vcpu->kvm, irq)))
 		return -EINVAL;

-	mutex_lock(&vcpu->kvm->arch.config_lock);
+	guard(mutex)(&vcpu->kvm->arch.config_lock);

 	if (test_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE,
 		     &vcpu->kvm->arch.flags)) {
-		ret = -EBUSY;
-		goto out;
+		return -EBUSY;
 	}

 	switch (attr->attr) {
@@ -1628,10 +1676,16 @@ int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
 		idx = TIMER_HPTIMER;
 		break;
 	default:
-		ret = -ENXIO;
-		goto out;
+		return -ENXIO;
 	}

+	/*
+	 * The PPIs for the Arch Timers are architecturally defined for
+	 * GICv5. Reject anything that changes them from the specified value.
+	 */
+	if (vgic_is_v5(vcpu->kvm) && vcpu->kvm->arch.timer_data.ppi[idx] != irq)
+		return -EINVAL;
+
 	/*
 	 * We cannot validate the IRQ unicity before we run, so take it at
 	 * face value. The verdict will be given on first vcpu run, for each
@@ -1639,8 +1693,6 @@ int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
 	 */
 	vcpu->kvm->arch.timer_data.ppi[idx] = irq;

-out:
-	mutex_unlock(&vcpu->kvm->arch.config_lock);
 	return ret;
 }

--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -173,6 +173,15 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
 	if (type == KVM_DEV_TYPE_ARM_VGIC_V3)
 		kvm->arch.vgic.nassgicap = system_supports_direct_sgis();

+	/*
+	 * We now know that we have a GICv5. The Arch Timer PPI interrupts may
+	 * have been initialised at this stage, but will have done so assuming
+	 * that we have an older GIC, meaning that the IntIDs won't be
+	 * correct. We init them again, and this time they will be correct.
+	 */
+	if (type == KVM_DEV_TYPE_ARM_VGIC_V5)
+		kvm_timer_init_vm(kvm);
+
 out_unlock:
 	mutex_unlock(&kvm->arch.config_lock);
 	kvm_unlock_all_vcpus(kvm);
--- a/arch/arm64/kvm/vgic/vgic-v5.c
+++ b/arch/arm64/kvm/vgic/vgic-v5.c
@@ -200,8 +200,8 @@ static u32 vgic_v5_get_effective_priority_mask(struct kvm_vcpu *vcpu)
 * need the PPIs to be queued on a per-VCPU AP list. Therefore, sanity check the
 * state, unlock, and return.
 */
-static bool vgic_v5_ppi_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
-					 unsigned long flags)
+bool vgic_v5_ppi_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
+				  unsigned long flags)
 	__releases(&irq->irq_lock)
 {
 	struct kvm_vcpu *vcpu;
@@ -232,8 +232,7 @@ out_unlock_fail:
 /*
 * Sets/clears the corresponding bit in the ICH_PPI_DVIR register.
 */
-static void vgic_v5_set_ppi_dvi(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
-				bool dvi)
+void vgic_v5_set_ppi_dvi(struct kvm_vcpu *vcpu, struct vgic_irq *irq, bool dvi)
 {
 	struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
 	u32 ppi;
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -10,6 +10,8 @@
 #include <linux/clocksource.h>
 #include <linux/hrtimer.h>

+#include <linux/irqchip/arm-gic-v5.h>
+
 enum kvm_arch_timers {
 	TIMER_PTIMER,
 	TIMER_VTIMER,
@@ -47,7 +49,7 @@ struct arch_timer_vm_data {
 	u64	poffset;

 	/* The PPI for each timer, global to the VM */
-	u8	ppi[NR_KVM_TIMERS];
+	u32	ppi[NR_KVM_TIMERS];
 };

 struct arch_timer_context {
@@ -74,6 +76,9 @@ struct arch_timer_context {

 	/* Duplicated state from arch_timer.c for convenience */
 	u32				host_timer_irq;
+
+	/* Is this a direct timer? */
+	bool				direct;
 };

 struct timer_map {
@@ -130,6 +135,10 @@ void kvm_timer_init_vhe(void);
 #define timer_vm_data(ctx)		(&(timer_context_to_vcpu(ctx)->kvm->arch.timer_data))
 #define timer_irq(ctx)			(timer_vm_data(ctx)->ppi[arch_timer_ctx_index(ctx)])

+#define get_vgic_ppi(k, i) (((k)->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V5) ? \
+			    (i) : (FIELD_PREP(GICV5_HWIRQ_ID, i) |	\
+				   FIELD_PREP(GICV5_HWIRQ_TYPE, GICV5_HWIRQ_TYPE_PPI)))
+
 u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu,
 			      enum kvm_arch_timers tmr,
 			      enum kvm_arch_timer_regs treg);
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -627,6 +627,9 @@ void vgic_v4_commit(struct kvm_vcpu *vcpu);
 int vgic_v4_put(struct kvm_vcpu *vcpu);

 int vgic_v5_finalize_ppi_state(struct kvm *kvm);
+bool vgic_v5_ppi_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
+				  unsigned long flags);
+void vgic_v5_set_ppi_dvi(struct kvm_vcpu *vcpu, struct vgic_irq *irq, bool dvi);

 bool vgic_state_is_nested(struct kvm_vcpu *vcpu);