KVM: x86/tdx: Do VMXON and TDX-Module initialization during subsys init

Now that VMXON can be done without bouncing through KVM, do TDX-Module
initialization during subsys init (specifically before module_init() so
that it runs before KVM when both are built-in).  Aside from the obvious
benefits of separating core TDX code from KVM, this will allow tagging a
pile of TDX functions and globals as being __init and __ro_after_init.

Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Chao Gao <chao.gao@intel.com>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Tested-by: Chao Gao <chao.gao@intel.com>
Tested-by: Sagi Shahar <sagis@google.com>
Link: https://patch.msgid.link/20260214012702.2368778-12-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
This commit is contained in:
Sean Christopherson
2026-02-13 17:26:57 -08:00
parent 0efe5dc161
commit 165e773538
5 changed files with 130 additions and 234 deletions

View File

@@ -60,44 +60,18 @@ Besides initializing the TDX module, a per-cpu initialization SEAMCALL
must be done on one cpu before any other SEAMCALLs can be made on that
cpu.
The kernel provides two functions, tdx_enable() and tdx_cpu_enable() to
allow the user of TDX to enable the TDX module and enable TDX on local
cpu respectively.
Making SEAMCALL requires VMXON has been done on that CPU. Currently only
KVM implements VMXON. For now both tdx_enable() and tdx_cpu_enable()
don't do VMXON internally (not trivial), but depends on the caller to
guarantee that.
To enable TDX, the caller of TDX should: 1) temporarily disable CPU
hotplug; 2) do VMXON and tdx_enable_cpu() on all online cpus; 3) call
tdx_enable(). For example::
cpus_read_lock();
on_each_cpu(vmxon_and_tdx_cpu_enable());
ret = tdx_enable();
cpus_read_unlock();
if (ret)
goto no_tdx;
// TDX is ready to use
And the caller of TDX must guarantee the tdx_cpu_enable() has been
successfully done on any cpu before it wants to run any other SEAMCALL.
A typical usage is do both VMXON and tdx_cpu_enable() in CPU hotplug
online callback, and refuse to online if tdx_cpu_enable() fails.
User can consult dmesg to see whether the TDX module has been initialized.
If the TDX module is initialized successfully, dmesg shows something
like below::
[..] virt/tdx: 262668 KBs allocated for PAMT
[..] virt/tdx: module initialized
[..] virt/tdx: TDX-Module initialized
If the TDX module failed to initialize, dmesg also shows it failed to
initialize::
[..] virt/tdx: module initialization failed ...
[..] virt/tdx: TDX-Module initialization failed ...
TDX Interaction to Other Kernel Components
------------------------------------------
@@ -129,9 +103,9 @@ CPU Hotplug
~~~~~~~~~~~
TDX module requires the per-cpu initialization SEAMCALL must be done on
one cpu before any other SEAMCALLs can be made on that cpu. The kernel
provides tdx_cpu_enable() to let the user of TDX to do it when the user
wants to use a new cpu for TDX task.
one cpu before any other SEAMCALLs can be made on that cpu. The kernel,
via the CPU hotplug framework, performs the necessary initialization when
a CPU is first brought online.
TDX doesn't support physical (ACPI) CPU hotplug. During machine boot,
TDX verifies all boot-time present logical CPUs are TDX compatible before

View File

@@ -145,8 +145,6 @@ static __always_inline u64 sc_retry(sc_func_t func, u64 fn,
#define seamcall(_fn, _args) sc_retry(__seamcall, (_fn), (_args))
#define seamcall_ret(_fn, _args) sc_retry(__seamcall_ret, (_fn), (_args))
#define seamcall_saved_ret(_fn, _args) sc_retry(__seamcall_saved_ret, (_fn), (_args))
int tdx_cpu_enable(void);
int tdx_enable(void);
const char *tdx_dump_mce_info(struct mce *m);
const struct tdx_sys_info *tdx_get_sysinfo(void);
@@ -223,8 +221,6 @@ u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td);
u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page);
#else
static inline void tdx_init(void) { }
static inline int tdx_cpu_enable(void) { return -ENODEV; }
static inline int tdx_enable(void) { return -ENODEV; }
static inline u32 tdx_get_nr_guest_keyids(void) { return 0; }
static inline const char *tdx_dump_mce_info(struct mce *m) { return NULL; }
static inline const struct tdx_sys_info *tdx_get_sysinfo(void) { return NULL; }

View File

@@ -59,7 +59,7 @@ module_param_named(tdx, enable_tdx, bool, 0444);
#define TDX_SHARED_BIT_PWL_5 gpa_to_gfn(BIT_ULL(51))
#define TDX_SHARED_BIT_PWL_4 gpa_to_gfn(BIT_ULL(47))
static enum cpuhp_state tdx_cpuhp_state;
static enum cpuhp_state tdx_cpuhp_state __ro_after_init;
static const struct tdx_sys_info *tdx_sysinfo;
@@ -3293,10 +3293,7 @@ int tdx_gmem_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, bool is_private)
static int tdx_online_cpu(unsigned int cpu)
{
/* Sanity check CPU is already in post-VMXON */
WARN_ON_ONCE(!(cr4_read_shadow() & X86_CR4_VMXE));
return tdx_cpu_enable();
return 0;
}
static int tdx_offline_cpu(unsigned int cpu)
@@ -3335,51 +3332,6 @@ static int tdx_offline_cpu(unsigned int cpu)
return -EBUSY;
}
static void __do_tdx_cleanup(void)
{
/*
* Once TDX module is initialized, it cannot be disabled and
* re-initialized again w/o runtime update (which isn't
* supported by kernel). Only need to remove the cpuhp here.
* The TDX host core code tracks TDX status and can handle
* 'multiple enabling' scenario.
*/
WARN_ON_ONCE(!tdx_cpuhp_state);
cpuhp_remove_state_nocalls_cpuslocked(tdx_cpuhp_state);
tdx_cpuhp_state = 0;
}
static void __tdx_cleanup(void)
{
cpus_read_lock();
__do_tdx_cleanup();
cpus_read_unlock();
}
static int __init __do_tdx_bringup(void)
{
int r;
/*
* TDX-specific cpuhp callback to call tdx_cpu_enable() on all
* online CPUs before calling tdx_enable(), and on any new
* going-online CPU to make sure it is ready for TDX guest.
*/
r = cpuhp_setup_state_cpuslocked(CPUHP_AP_ONLINE_DYN,
"kvm/cpu/tdx:online",
tdx_online_cpu, tdx_offline_cpu);
if (r < 0)
return r;
tdx_cpuhp_state = r;
r = tdx_enable();
if (r)
__do_tdx_cleanup();
return r;
}
static int __init __tdx_bringup(void)
{
const struct tdx_sys_info_td_conf *td_conf;
@@ -3399,34 +3351,18 @@ static int __init __tdx_bringup(void)
}
}
/*
* Enabling TDX requires enabling hardware virtualization first,
* as making SEAMCALLs requires CPU being in post-VMXON state.
*/
r = kvm_enable_virtualization();
if (r)
return r;
cpus_read_lock();
r = __do_tdx_bringup();
cpus_read_unlock();
if (r)
goto tdx_bringup_err;
r = -EINVAL;
/* Get TDX global information for later use */
tdx_sysinfo = tdx_get_sysinfo();
if (WARN_ON_ONCE(!tdx_sysinfo))
goto get_sysinfo_err;
if (!tdx_sysinfo)
return -ENODEV;
/* Check TDX module and KVM capabilities */
if (!tdx_get_supported_attrs(&tdx_sysinfo->td_conf) ||
!tdx_get_supported_xfam(&tdx_sysinfo->td_conf))
goto get_sysinfo_err;
return -EINVAL;
if (!(tdx_sysinfo->features.tdx_features0 & MD_FIELD_ID_FEATURES0_TOPOLOGY_ENUM))
goto get_sysinfo_err;
return -EINVAL;
/*
* TDX has its own limit of maximum vCPUs it can support for all
@@ -3461,34 +3397,31 @@ static int __init __tdx_bringup(void)
if (td_conf->max_vcpus_per_td < num_present_cpus()) {
pr_err("Disable TDX: MAX_VCPU_PER_TD (%u) smaller than number of logical CPUs (%u).\n",
td_conf->max_vcpus_per_td, num_present_cpus());
goto get_sysinfo_err;
return -EINVAL;
}
if (misc_cg_set_capacity(MISC_CG_RES_TDX, tdx_get_nr_guest_keyids()))
goto get_sysinfo_err;
return -EINVAL;
/*
* Leave hardware virtualization enabled after TDX is enabled
* successfully. TDX CPU hotplug depends on this.
* TDX-specific cpuhp callback to disallow offlining the last CPU in a
* packing while KVM is running one or more TDs. Reclaiming HKIDs
* requires doing PAGE.WBINVD on every package, i.e. offlining all CPUs
* of a package would prevent reclaiming the HKID.
*/
r = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "kvm/cpu/tdx:online",
tdx_online_cpu, tdx_offline_cpu);
if (r < 0)
goto err_cpuhup;
tdx_cpuhp_state = r;
return 0;
get_sysinfo_err:
__tdx_cleanup();
tdx_bringup_err:
kvm_disable_virtualization();
err_cpuhup:
misc_cg_set_capacity(MISC_CG_RES_TDX, 0);
return r;
}
void tdx_cleanup(void)
{
if (enable_tdx) {
misc_cg_set_capacity(MISC_CG_RES_TDX, 0);
__tdx_cleanup();
kvm_disable_virtualization();
}
}
int __init tdx_bringup(void)
{
int r, i;
@@ -3520,39 +3453,11 @@ int __init tdx_bringup(void)
goto success_disable_tdx;
}
if (!cpu_feature_enabled(X86_FEATURE_MOVDIR64B)) {
pr_err("tdx: MOVDIR64B is required for TDX\n");
goto success_disable_tdx;
}
if (!cpu_feature_enabled(X86_FEATURE_SELFSNOOP)) {
pr_err("Self-snoop is required for TDX\n");
goto success_disable_tdx;
}
if (!cpu_feature_enabled(X86_FEATURE_TDX_HOST_PLATFORM)) {
pr_err("tdx: no TDX private KeyIDs available\n");
pr_err("TDX not supported by the host platform\n");
goto success_disable_tdx;
}
if (!enable_virt_at_load) {
pr_err("tdx: tdx requires kvm.enable_virt_at_load=1\n");
goto success_disable_tdx;
}
/*
* Ideally KVM should probe whether TDX module has been loaded
* first and then try to bring it up. But TDX needs to use SEAMCALL
* to probe whether the module is loaded (there is no CPUID or MSR
* for that), and making SEAMCALL requires enabling virtualization
* first, just like the rest steps of bringing up TDX module.
*
* So, for simplicity do everything in __tdx_bringup(); the first
* SEAMCALL will return -ENODEV when the module is not loaded. The
* only complication is having to make sure that initialization
* SEAMCALLs don't return TDX_SEAMCALL_VMFAILINVALID in other
* cases.
*/
r = __tdx_bringup();
if (r) {
/*
@@ -3567,8 +3472,6 @@ int __init tdx_bringup(void)
*/
if (r == -ENODEV)
goto success_disable_tdx;
enable_tdx = 0;
}
return r;
@@ -3578,6 +3481,15 @@ success_disable_tdx:
return 0;
}
void tdx_cleanup(void)
{
if (!enable_tdx)
return;
misc_cg_set_capacity(MISC_CG_RES_TDX, 0);
cpuhp_remove_state(tdx_cpuhp_state);
}
void __init tdx_hardware_setup(void)
{
KVM_SANITY_CHECK_VM_STRUCT_SIZE(kvm_tdx);

View File

@@ -28,6 +28,7 @@
#include <linux/log2.h>
#include <linux/acpi.h>
#include <linux/suspend.h>
#include <linux/syscore_ops.h>
#include <linux/idr.h>
#include <linux/kvm_types.h>
#include <asm/page.h>
@@ -39,6 +40,7 @@
#include <asm/cpu_device_id.h>
#include <asm/processor.h>
#include <asm/mce.h>
#include <asm/virt.h>
#include "tdx.h"
static u32 tdx_global_keyid __ro_after_init;
@@ -51,13 +53,11 @@ static DEFINE_PER_CPU(bool, tdx_lp_initialized);
static struct tdmr_info_list tdx_tdmr_list;
static enum tdx_module_status_t tdx_module_status;
static DEFINE_MUTEX(tdx_module_lock);
/* All TDX-usable memory regions. Protected by mem_hotplug_lock. */
static LIST_HEAD(tdx_memlist);
static struct tdx_sys_info tdx_sysinfo;
static bool tdx_module_initialized;
typedef void (*sc_err_func_t)(u64 fn, u64 err, struct tdx_module_args *args);
@@ -139,22 +139,15 @@ out:
}
/**
* tdx_cpu_enable - Enable TDX on local cpu
*
* Do one-time TDX module per-cpu initialization SEAMCALL (and TDX module
* global initialization SEAMCALL if not done) on local cpu to make this
* cpu be ready to run any other SEAMCALLs.
*
* Return 0 on success, otherwise errors.
* Enable VMXON and then do one-time TDX module per-cpu initialization SEAMCALL
* (and TDX module global initialization SEAMCALL if not done) on local cpu to
* make this cpu be ready to run any other SEAMCALLs.
*/
int tdx_cpu_enable(void)
static int tdx_cpu_enable(void)
{
struct tdx_module_args args = {};
int ret;
if (!boot_cpu_has(X86_FEATURE_TDX_HOST_PLATFORM))
return -ENODEV;
if (__this_cpu_read(tdx_lp_initialized))
return 0;
@@ -175,7 +168,58 @@ int tdx_cpu_enable(void)
return 0;
}
EXPORT_SYMBOL_FOR_KVM(tdx_cpu_enable);
static int tdx_online_cpu(unsigned int cpu)
{
int ret;
ret = x86_virt_get_ref(X86_FEATURE_VMX);
if (ret)
return ret;
ret = tdx_cpu_enable();
if (ret)
x86_virt_put_ref(X86_FEATURE_VMX);
return ret;
}
static int tdx_offline_cpu(unsigned int cpu)
{
x86_virt_put_ref(X86_FEATURE_VMX);
return 0;
}
static void tdx_shutdown_cpu(void *ign)
{
x86_virt_put_ref(X86_FEATURE_VMX);
}
static void tdx_shutdown(void *ign)
{
on_each_cpu(tdx_shutdown_cpu, NULL, 1);
}
static int tdx_suspend(void *ign)
{
x86_virt_put_ref(X86_FEATURE_VMX);
return 0;
}
static void tdx_resume(void *ign)
{
WARN_ON_ONCE(x86_virt_get_ref(X86_FEATURE_VMX));
}
static const struct syscore_ops tdx_syscore_ops = {
.suspend = tdx_suspend,
.resume = tdx_resume,
.shutdown = tdx_shutdown,
};
static struct syscore tdx_syscore = {
.ops = &tdx_syscore_ops,
};
/*
* Add a memory region as a TDX memory block. The caller must make sure
@@ -1150,67 +1194,50 @@ err_free_tdxmem:
goto out_put_tdxmem;
}
static int __tdx_enable(void)
static int tdx_enable(void)
{
enum cpuhp_state state;
int ret;
if (!cpu_feature_enabled(X86_FEATURE_TDX_HOST_PLATFORM)) {
pr_err("TDX not supported by the host platform\n");
return -ENODEV;
}
if (!cpu_feature_enabled(X86_FEATURE_XSAVE)) {
pr_err("XSAVE is required for TDX\n");
return -EINVAL;
}
if (!cpu_feature_enabled(X86_FEATURE_MOVDIR64B)) {
pr_err("MOVDIR64B is required for TDX\n");
return -EINVAL;
}
if (!cpu_feature_enabled(X86_FEATURE_SELFSNOOP)) {
pr_err("Self-snoop is required for TDX\n");
return -ENODEV;
}
state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "virt/tdx:online",
tdx_online_cpu, tdx_offline_cpu);
if (state < 0)
return state;
ret = init_tdx_module();
if (ret) {
pr_err("module initialization failed (%d)\n", ret);
tdx_module_status = TDX_MODULE_ERROR;
pr_err("TDX-Module initialization failed (%d)\n", ret);
cpuhp_remove_state(state);
return ret;
}
pr_info("module initialized\n");
tdx_module_status = TDX_MODULE_INITIALIZED;
register_syscore(&tdx_syscore);
tdx_module_initialized = true;
pr_info("TDX-Module initialized\n");
return 0;
}
/**
* tdx_enable - Enable TDX module to make it ready to run TDX guests
*
* This function assumes the caller has: 1) held read lock of CPU hotplug
* lock to prevent any new cpu from becoming online; 2) done both VMXON
* and tdx_cpu_enable() on all online cpus.
*
* This function requires there's at least one online cpu for each CPU
* package to succeed.
*
* This function can be called in parallel by multiple callers.
*
* Return 0 if TDX is enabled successfully, otherwise error.
*/
int tdx_enable(void)
{
int ret;
if (!boot_cpu_has(X86_FEATURE_TDX_HOST_PLATFORM))
return -ENODEV;
lockdep_assert_cpus_held();
mutex_lock(&tdx_module_lock);
switch (tdx_module_status) {
case TDX_MODULE_UNINITIALIZED:
ret = __tdx_enable();
break;
case TDX_MODULE_INITIALIZED:
/* Already initialized, great, tell the caller. */
ret = 0;
break;
default:
/* Failed to initialize in the previous attempts */
ret = -EINVAL;
break;
}
mutex_unlock(&tdx_module_lock);
return ret;
}
EXPORT_SYMBOL_FOR_KVM(tdx_enable);
subsys_initcall(tdx_enable);
static bool is_pamt_page(unsigned long phys)
{
@@ -1461,15 +1488,10 @@ void __init tdx_init(void)
const struct tdx_sys_info *tdx_get_sysinfo(void)
{
const struct tdx_sys_info *p = NULL;
if (!tdx_module_initialized)
return NULL;
/* Make sure all fields in @tdx_sysinfo have been populated */
mutex_lock(&tdx_module_lock);
if (tdx_module_status == TDX_MODULE_INITIALIZED)
p = (const struct tdx_sys_info *)&tdx_sysinfo;
mutex_unlock(&tdx_module_lock);
return p;
return (const struct tdx_sys_info *)&tdx_sysinfo;
}
EXPORT_SYMBOL_FOR_KVM(tdx_get_sysinfo);

View File

@@ -91,14 +91,6 @@ struct tdmr_info {
* Do not put any hardware-defined TDX structure representations below
* this comment!
*/
/* Kernel defined TDX module status during module initialization. */
enum tdx_module_status_t {
TDX_MODULE_UNINITIALIZED,
TDX_MODULE_INITIALIZED,
TDX_MODULE_ERROR
};
struct tdx_memblock {
struct list_head list;
unsigned long start_pfn;