linux/include/kvm/arm_vgic.h

/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * Copyright (C) 2015, 2016 ARM Ltd.
 */
#ifndef __KVM_ARM_VGIC_H
#define __KVM_ARM_VGIC_H

#include <linux/bits.h>
#include <linux/kvm.h>
#include <linux/irqreturn.h>
#include <linux/mutex.h>
#include <linux/refcount.h>
#include <linux/spinlock.h>
#include <linux/static_key.h>
#include <linux/types.h>
#include <linux/xarray.h>
#include <kvm/iodev.h>
#include <linux/list.h>
#include <linux/jump_label.h>

#include <linux/irqchip/arm-gic-v4.h>
#include <linux/irqchip/arm-gic-v5.h>

#define VGIC_V5_MAX_CPUS	512
#define VGIC_V3_MAX_CPUS	512
#define VGIC_V2_MAX_CPUS	8
#define VGIC_NR_IRQS_LEGACY     256
#define VGIC_NR_SGIS		16
#define VGIC_NR_PPIS		16
#define VGIC_NR_PRIVATE_IRQS	(VGIC_NR_SGIS + VGIC_NR_PPIS)
#define VGIC_MAX_SPI		1019
#define VGIC_MAX_RESERVED	1023
#define VGIC_MIN_LPI		8192
#define KVM_IRQCHIP_NUM_PINS	(1020 - 32)

/*
 * GICv5 supports 128 PPIs, but only the first 64 are architected. We only
 * support the timers and PMU in KVM, both of which are architected. Rather than
 * handling twice the state, we instead opt to only support the architected set
 * in KVM for now. At a future stage, this can be bumped up to 128, if required.
 */
#define VGIC_V5_NR_PRIVATE_IRQS	64

#define is_v5_type(t, i)	(FIELD_GET(GICV5_HWIRQ_TYPE, (i)) == (t))

#define __irq_is_sgi(t, i)						\
	({								\
		bool __ret;						\
									\
		switch (t) {						\
		case KVM_DEV_TYPE_ARM_VGIC_V5:				\
			__ret = false;					\
			break;						\
		default:						\
			__ret  = (i) < VGIC_NR_SGIS;			\
		}							\
									\
		__ret;							\
	})

#define __irq_is_ppi(t, i)						\
	({								\
		bool __ret;						\
									\
		switch (t) {						\
		case KVM_DEV_TYPE_ARM_VGIC_V5:				\
			__ret = is_v5_type(GICV5_HWIRQ_TYPE_PPI, (i));	\
			break;						\
		default:						\
			__ret  = (i) >= VGIC_NR_SGIS;			\
			__ret &= (i) < VGIC_NR_PRIVATE_IRQS;		\
		}							\
									\
		__ret;							\
	})

#define __irq_is_spi(t, i)						\
	({								\
		bool __ret;						\
									\
		switch (t) {						\
		case KVM_DEV_TYPE_ARM_VGIC_V5:				\
			__ret = is_v5_type(GICV5_HWIRQ_TYPE_SPI, (i));	\
			break;						\
		default:						\
			__ret  = (i) <= VGIC_MAX_SPI;			\
			__ret &= (i) >= VGIC_NR_PRIVATE_IRQS;		\
		}							\
									\
		__ret;							\
	})

#define __irq_is_lpi(t, i)						\
	({								\
		bool __ret;						\
									\
		switch (t) {						\
		case KVM_DEV_TYPE_ARM_VGIC_V5:				\
			__ret = is_v5_type(GICV5_HWIRQ_TYPE_LPI, (i));	\
			break;						\
		default:						\
			__ret  = (i) >= 8192;				\
		}							\
									\
		__ret;							\
	})

#define irq_is_sgi(k, i) __irq_is_sgi((k)->arch.vgic.vgic_model, i)
#define irq_is_ppi(k, i) __irq_is_ppi((k)->arch.vgic.vgic_model, i)
#define irq_is_spi(k, i) __irq_is_spi((k)->arch.vgic.vgic_model, i)
#define irq_is_lpi(k, i) __irq_is_lpi((k)->arch.vgic.vgic_model, i)

#define irq_is_private(k, i) (irq_is_ppi(k, i) || irq_is_sgi(k, i))

#define vgic_v5_get_hwirq_id(x) FIELD_GET(GICV5_HWIRQ_ID, (x))
#define vgic_v5_set_hwirq_id(x) FIELD_PREP(GICV5_HWIRQ_ID, (x))

#define __vgic_v5_set_type(t) (FIELD_PREP(GICV5_HWIRQ_TYPE, GICV5_HWIRQ_TYPE_##t))
#define vgic_v5_make_ppi(x) (__vgic_v5_set_type(PPI) | vgic_v5_set_hwirq_id(x))
#define vgic_v5_make_spi(x) (__vgic_v5_set_type(SPI) | vgic_v5_set_hwirq_id(x))
#define vgic_v5_make_lpi(x) (__vgic_v5_set_type(LPI) | vgic_v5_set_hwirq_id(x))

#define __vgic_is_v(k, v) ((k)->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V##v)
#define vgic_is_v3(k) (__vgic_is_v(k, 3))
#define vgic_is_v5(k) (__vgic_is_v(k, 5))

enum vgic_type {
	VGIC_V2,		/* Good ol' GICv2 */
	VGIC_V3,		/* New fancy GICv3 */
	VGIC_V5,		/* Newer, fancier GICv5 */
};

/* same for all guests, as depending only on the _host's_ GIC model */
struct vgic_global {
	/* type of the host GIC */
	enum vgic_type		type;

	/* Physical address of vgic virtual cpu interface */
	phys_addr_t		vcpu_base;

	/* GICV mapping, kernel VA */
	void __iomem		*vcpu_base_va;
	/* GICV mapping, HYP VA */
	void __iomem		*vcpu_hyp_va;

	/* virtual control interface mapping, kernel VA */
	void __iomem		*vctrl_base;
	/* virtual control interface mapping, HYP VA */
	void __iomem		*vctrl_hyp;

	/* Physical CPU interface, kernel VA */
	void __iomem		*gicc_base;

	/* Number of implemented list registers */
	int			nr_lr;

	/* Maintenance IRQ number */
	unsigned int		maint_irq;

	/* maximum number of VCPUs allowed (GICv2 limits us to 8) */
	int			max_gic_vcpus;

	/* Only needed for the legacy KVM_CREATE_IRQCHIP */
	bool			can_emulate_gicv2;

	/* Hardware has GICv4? */
	bool			has_gicv4;
	bool			has_gicv4_1;

	/* Pseudo GICv3 from outer space */
	bool			no_hw_deactivation;

	/* GICv3 system register CPU interface */
	struct static_key_false gicv3_cpuif;

	/* GICv3 compat mode on a GICv5 host */
	bool			has_gcie_v3_compat;

	u32			ich_vtr_el2;
};

extern struct vgic_global kvm_vgic_global_state;

#define VGIC_V2_MAX_LRS		(1 << 6)
#define VGIC_V3_MAX_LRS		16
#define VGIC_V3_LR_INDEX(lr)	(VGIC_V3_MAX_LRS - 1 - lr)

enum vgic_irq_config {
	VGIC_CONFIG_EDGE = 0,
	VGIC_CONFIG_LEVEL
};

struct vgic_irq;

/*
 * Per-irq ops overriding some common behavious.
 *
 * Always called in non-preemptible section and the functions can use
 * kvm_arm_get_running_vcpu() to get the vcpu pointer for private IRQs.
 */
struct irq_ops {
	/* Per interrupt flags for special-cased interrupts */
	unsigned long flags;

#define VGIC_IRQ_SW_RESAMPLE	BIT(0)	/* Clear the active state for resampling */

	/*
	 * Callback function pointer to in-kernel devices that can tell us the
	 * state of the input level of mapped level-triggered IRQ faster than
	 * peaking into the physical GIC.
	 */
	bool (*get_input_level)(int vintid);

	/*
	 * Function pointer to override the queuing of an IRQ.
	 */
	bool (*queue_irq_unlock)(struct kvm *kvm, struct vgic_irq *irq,
				unsigned long flags) __releases(&irq->irq_lock);

	/*
	 * Callback function pointer to either enable or disable direct
	 * injection for a mapped interrupt.
	 */
	void (*set_direct_injection)(struct kvm_vcpu *vcpu,
				     struct vgic_irq *irq, bool direct);
};

struct vgic_irq {
	raw_spinlock_t irq_lock;	/* Protects the content of the struct */
	u32 intid;			/* Guest visible INTID */
	struct rcu_head rcu;
	struct list_head ap_list;

	struct kvm_vcpu *vcpu;		/* SGIs and PPIs: The VCPU
					 * SPIs and LPIs: The VCPU whose ap_list
					 * this is queued on.
					 */

	struct kvm_vcpu *target_vcpu;	/* The VCPU that this interrupt should
					 * be sent to, as a result of the
					 * targets reg (v2) or the
					 * affinity reg (v3).
					 */

	bool pending_release:1;		/* Used for LPIs only, unreferenced IRQ
					 * pending a release */

	bool pending_latch:1;		/* The pending latch state used to calculate
					 * the pending state for both level
					 * and edge triggered IRQs. */
	enum vgic_irq_config config:1;	/* Level or edge */
	bool line_level:1;		/* Level only */
	bool enabled:1;
	bool active:1;
	bool hw:1;			/* Tied to HW IRQ */
	bool on_lr:1;			/* Present in a CPU LR */
	refcount_t refcount;		/* Used for LPIs */
	u32 hwintid;			/* HW INTID number */
	unsigned int host_irq;		/* linux irq corresponding to hwintid */
	union {
		u8 targets;			/* GICv2 target VCPUs mask */
		u32 mpidr;			/* GICv3 target VCPU */
	};
	u8 source;			/* GICv2 SGIs only */
	u8 active_source;		/* GICv2 SGIs only */
	u8 priority;
	u8 group;			/* 0 == group 0, 1 == group 1 */

	struct irq_ops *ops;

	void *owner;			/* Opaque pointer to reserve an interrupt
					   for in-kernel devices. */
};

static inline bool vgic_irq_needs_resampling(struct vgic_irq *irq)
{
	return irq->ops && (irq->ops->flags & VGIC_IRQ_SW_RESAMPLE);
}

struct vgic_register_region;
struct vgic_its;

enum iodev_type {
	IODEV_CPUIF,
	IODEV_DIST,
	IODEV_REDIST,
	IODEV_ITS
};

struct vgic_io_device {
	gpa_t base_addr;
	union {
		struct kvm_vcpu *redist_vcpu;
		struct vgic_its *its;
	};
	const struct vgic_register_region *regions;
	enum iodev_type iodev_type;
	int nr_regions;
	struct kvm_io_device dev;
};

struct vgic_its {
	/* The base address of the ITS control register frame */
	gpa_t			vgic_its_base;

	bool			enabled;
	struct vgic_io_device	iodev;
	struct kvm_device	*dev;

	/* These registers correspond to GITS_BASER{0,1} */
	u64			baser_device_table;
	u64			baser_coll_table;

	/* Protects the command queue */
	struct mutex		cmd_lock;
	u64			cbaser;
	u32			creadr;
	u32			cwriter;

	/* migration ABI revision in use */
	u32			abi_rev;

	/* Protects the device and collection lists */
	struct mutex		its_lock;
	struct list_head	device_list;
	struct list_head	collection_list;

	/*
	 * Caches the (device_id, event_id) -> vgic_irq translation for
	 * LPIs that are mapped and enabled.
	 */
	struct xarray		translation_cache;
};

struct vgic_state_iter;

struct vgic_redist_region {
	u32 index;
	gpa_t base;
	u32 count; /* number of redistributors or 0 if single region */
	u32 free_index; /* index of the next free redistributor */
	struct list_head list;
};

struct vgic_v5_vm {
	/*
	 * We only expose a subset of PPIs to the guest. This subset is a
	 * combination of the PPIs that are actually implemented and what we
	 * actually choose to expose.
	 */
	DECLARE_BITMAP(vgic_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS);

	/* A mask of the PPIs that are exposed for userspace to drive. */
	DECLARE_BITMAP(userspace_ppis, VGIC_V5_NR_PRIVATE_IRQS);

	/*
	 * The HMR itself is handled by the hardware, but we still need to have
	 * a mask that we can use when merging in pending state (only the state
	 * of Edge PPIs is merged back in from the guest an the HMR provides a
	 * convenient way to do that).
	 */
	DECLARE_BITMAP(vgic_ppi_hmr, VGIC_V5_NR_PRIVATE_IRQS);
};

struct vgic_dist {
	bool			in_kernel;
	bool			ready;
	bool			initialized;

	/* vGIC model the kernel emulates for the guest (GICv2 or GICv3) */
	u32			vgic_model;

	/* Implementation revision as reported in the GICD_IIDR */
	u32			implementation_rev;
#define KVM_VGIC_IMP_REV_2	2 /* GICv2 restorable groups */
#define KVM_VGIC_IMP_REV_3	3 /* GICv3 GICR_CTLR.{IW,CES,RWP} */
#define KVM_VGIC_IMP_REV_LATEST	KVM_VGIC_IMP_REV_3

	/* Userspace can write to GICv2 IGROUPR */
	bool			v2_groups_user_writable;

	/* Do injected MSIs require an additional device ID? */
	bool			msis_require_devid;

	int			nr_spis;

	/* The GIC maintenance IRQ for nested hypervisors. */
	u32			mi_intid;

	/* Track the number of in-flight active SPIs */
	atomic_t		active_spis;

	/* base addresses in guest physical address space: */
	gpa_t			vgic_dist_base;		/* distributor */
	union {
		/* either a GICv2 CPU interface */
		gpa_t			vgic_cpu_base;
		/* or a number of GICv3 redistributor regions */
		struct list_head rd_regions;
	};

	/* distributor enabled */
	bool			enabled;

	/* Supports SGIs without active state */
	bool			nassgicap;

	/* Wants SGIs without active state */
	bool			nassgireq;

	struct vgic_irq		*spis;

	struct vgic_io_device	dist_iodev;
	struct vgic_io_device	cpuif_iodev;

	bool			has_its;
	bool			table_write_in_progress;

	/*
	 * Contains the attributes and gpa of the LPI configuration table.
	 * Since we report GICR_TYPER.CommonLPIAff as 0b00, we can share
	 * one address across all redistributors.
	 * GICv3 spec: IHI 0069E 6.1.1 "LPI Configuration tables"
	 */
	u64			propbaser;

	struct xarray		lpi_xa;

	/*
	 * GICv4 ITS per-VM data, containing the IRQ domain, the VPE
	 * array, the property table pointer as well as allocation
	 * data. This essentially ties the Linux IRQ core and ITS
	 * together, and avoids leaking KVM's data structures anywhere
	 * else.
	 */
	struct its_vm		its_vm;

	/*
	 * GICv5 per-VM data.
	 */
	struct vgic_v5_vm	gicv5_vm;
};

struct vgic_v2_cpu_if {
	u32		vgic_hcr;
	u32		vgic_vmcr;
	u32		vgic_apr;
	u32		vgic_lr[VGIC_V2_MAX_LRS];

	unsigned int used_lrs;
};

struct vgic_v3_cpu_if {
	u32		vgic_hcr;
	u32		vgic_vmcr;
	u32		vgic_sre;	/* Restored only, change ignored */
	u32		vgic_ap0r[4];
	u32		vgic_ap1r[4];
	u64		vgic_lr[VGIC_V3_MAX_LRS];

	/*
	 * GICv4 ITS per-VPE data, containing the doorbell IRQ, the
	 * pending table pointer, the its_vm pointer and a few other
	 * HW specific things. As for the its_vm structure, this is
	 * linking the Linux IRQ subsystem and the ITS together.
	 */
	struct its_vpe	its_vpe;

	unsigned int used_lrs;
};

struct vgic_v5_cpu_if {
	u64	vgic_apr;
	u64	vgic_vmcr;

	/* PPI register state */
	DECLARE_BITMAP(vgic_ppi_dvir, VGIC_V5_NR_PRIVATE_IRQS);
	DECLARE_BITMAP(vgic_ppi_activer, VGIC_V5_NR_PRIVATE_IRQS);
	DECLARE_BITMAP(vgic_ppi_enabler, VGIC_V5_NR_PRIVATE_IRQS);
	/* We have one byte (of which 5 bits are used) per PPI for priority */
	u64	vgic_ppi_priorityr[VGIC_V5_NR_PRIVATE_IRQS / 8];

	/*
	 * The ICSR is re-used across host and guest, and hence it needs to be
	 * saved/restored. Only one copy is required as the host should block
	 * preemption between executing GIC CDRCFG and acccessing the
	 * ICC_ICSR_EL1. A guest, of course, can never guarantee this, and hence
	 * it is the hyp's responsibility to keep the state constistent.
	 */
	u64	vgic_icsr;

	struct gicv5_vpe gicv5_vpe;
};

/* What PPI capabilities does a GICv5 host have */
struct vgic_v5_ppi_caps {
	DECLARE_BITMAP(impl_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS);
};

struct vgic_cpu {
	/* CPU vif control registers for world switch */
	union {
		struct vgic_v2_cpu_if	vgic_v2;
		struct vgic_v3_cpu_if	vgic_v3;
		struct vgic_v5_cpu_if	vgic_v5;
	};

	struct vgic_irq *private_irqs;

	raw_spinlock_t ap_list_lock;	/* Protects the ap_list */

	/*
	 * List of IRQs that this VCPU should consider because they are either
	 * Active or Pending (hence the name; AP list), or because they recently
	 * were one of the two and need to be migrated off this list to another
	 * VCPU.
	 */
	struct list_head ap_list_head;

	/*
	 * Members below are used with GICv3 emulation only and represent
	 * parts of the redistributor.
	 */
	struct vgic_io_device	rd_iodev;
	struct vgic_redist_region *rdreg;
	u32 rdreg_index;
	atomic_t syncr_busy;

	/* Contains the attributes and gpa of the LPI pending tables. */
	u64 pendbaser;
	/* GICR_CTLR.{ENABLE_LPIS,RWP} */
	atomic_t ctlr;

	/* Cache guest priority bits */
	u32 num_pri_bits;

	/* Cache guest interrupt ID bits */
	u32 num_id_bits;
};

extern struct static_key_false vgic_v2_cpuif_trap;
extern struct static_key_false vgic_v3_cpuif_trap;
extern struct static_key_false vgic_v3_has_v2_compat;

int kvm_set_legacy_vgic_v2_addr(struct kvm *kvm, struct kvm_arm_device_addr *dev_addr);
void kvm_vgic_early_init(struct kvm *kvm);
int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu);
int kvm_vgic_vcpu_nv_init(struct kvm_vcpu *vcpu);
int kvm_vgic_create(struct kvm *kvm, u32 type);
void kvm_vgic_destroy(struct kvm *kvm);
void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
int kvm_vgic_map_resources(struct kvm *kvm);
void kvm_vgic_finalize_idregs(struct kvm *kvm);
int kvm_vgic_hyp_init(void);
void kvm_vgic_init_cpu_hardware(void);

int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
			unsigned int intid, bool level, void *owner);
void kvm_vgic_set_irq_ops(struct kvm_vcpu *vcpu, u32 vintid,
			  struct irq_ops *ops);
void kvm_vgic_clear_irq_ops(struct kvm_vcpu *vcpu, u32 vintid);
int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
			  u32 vintid);
int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid);
int kvm_vgic_get_map(struct kvm_vcpu *vcpu, unsigned int vintid);
bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid);

int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);

void kvm_vgic_load(struct kvm_vcpu *vcpu);
void kvm_vgic_put(struct kvm_vcpu *vcpu);

u16 vgic_v3_get_eisr(struct kvm_vcpu *vcpu);
u16 vgic_v3_get_elrsr(struct kvm_vcpu *vcpu);
u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu);

#define irqchip_in_kernel(k)	(!!((k)->arch.vgic.in_kernel))
#define vgic_initialized(k)	((k)->arch.vgic.initialized)
#define vgic_valid_spi(k, i)						\
	({								\
		bool __ret = irq_is_spi(k, i);				\
									\
		switch ((k)->arch.vgic.vgic_model) {			\
		case KVM_DEV_TYPE_ARM_VGIC_V5:				\
			__ret &= FIELD_GET(GICV5_HWIRQ_ID, i) < (k)->arch.vgic.nr_spis; \
			break;						\
		default:						\
			__ret &= (i) < ((k)->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS); \
		}							\
									\
		__ret;							\
	})

bool kvm_vcpu_has_pending_irqs(struct kvm_vcpu *vcpu);
void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid);
void kvm_vgic_process_async_update(struct kvm_vcpu *vcpu);

void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1);

/**
 * kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW
 *
 * The host's GIC naturally limits the maximum amount of VCPUs a guest
 * can use.
 */
static inline int kvm_vgic_get_max_vcpus(void)
{
	return kvm_vgic_global_state.max_gic_vcpus;
}

/**
 * kvm_vgic_setup_default_irq_routing:
 * Setup a default flat gsi routing table mapping all SPIs
 */
int kvm_vgic_setup_default_irq_routing(struct kvm *kvm);

int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner);

struct kvm_kernel_irq_routing_entry;

int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int irq,
			       struct kvm_kernel_irq_routing_entry *irq_entry);

void kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int host_irq);

int vgic_v4_load(struct kvm_vcpu *vcpu);
void vgic_v4_commit(struct kvm_vcpu *vcpu);
int vgic_v4_put(struct kvm_vcpu *vcpu);

int vgic_v5_finalize_ppi_state(struct kvm *kvm);
bool vgic_v5_ppi_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
				  unsigned long flags);
void vgic_v5_set_ppi_dvi(struct kvm_vcpu *vcpu, struct vgic_irq *irq, bool dvi);

bool vgic_state_is_nested(struct kvm_vcpu *vcpu);

/* CPU HP callbacks */
void kvm_vgic_cpu_up(void);
void kvm_vgic_cpu_down(void);

#endif /* __KVM_ARM_VGIC_H */