@@ -780,6 +780,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
/* Support RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE */
#define CPUID_7_0_EBX_FSGSBASE (1U << 0)
+/* Support for TSC adjustment MSR 0x3B */
+#define CPUID_7_0_EBX_TSC_ADJUST (1U << 1)
/* Support SGX */
#define CPUID_7_0_EBX_SGX (1U << 2)
/* 1st Group of Advanced Bit Manipulation Extensions */
@@ -798,8 +800,12 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
#define CPUID_7_0_EBX_INVPCID (1U << 10)
/* Restricted Transactional Memory */
#define CPUID_7_0_EBX_RTM (1U << 11)
+/* Cache QoS Monitoring */
+#define CPUID_7_0_EBX_PQM (1U << 12)
/* Memory Protection Extension */
#define CPUID_7_0_EBX_MPX (1U << 14)
+/* Resource Director Technology Allocation */
+#define CPUID_7_0_EBX_RDT_A (1U << 15)
/* AVX-512 Foundation */
#define CPUID_7_0_EBX_AVX512F (1U << 16)
/* AVX-512 Doubleword & Quadword Instruction */
@@ -855,10 +861,16 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
#define CPUID_7_0_ECX_AVX512VNNI (1U << 11)
/* Support for VPOPCNT[B,W] and VPSHUFBITQMB */
#define CPUID_7_0_ECX_AVX512BITALG (1U << 12)
+/* Intel Total Memory Encryption */
+#define CPUID_7_0_ECX_TME (1U << 13)
/* POPCNT for vectors of DW/QW */
#define CPUID_7_0_ECX_AVX512_VPOPCNTDQ (1U << 14)
+/* Placeholder for bit 15 */
+#define CPUID_7_0_ECX_FZM (1U << 15)
/* 5-level Page Tables */
#define CPUID_7_0_ECX_LA57 (1U << 16)
+/* MAWAU for MPX */
+#define CPUID_7_0_ECX_MAWAU (31U << 17)
/* Read Processor ID */
#define CPUID_7_0_ECX_RDPID (1U << 22)
/* Bus Lock Debug Exception */
@@ -869,6 +881,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
#define CPUID_7_0_ECX_MOVDIRI (1U << 27)
/* Move 64 Bytes as Direct Store Instruction */
#define CPUID_7_0_ECX_MOVDIR64B (1U << 28)
+/* ENQCMD and ENQCMDS instructions */
+#define CPUID_7_0_ECX_ENQCMD (1U << 29)
/* Support SGX Launch Control */
#define CPUID_7_0_ECX_SGX_LC (1U << 30)
/* Protection Keys for Supervisor-mode Pages */
@@ -886,6 +900,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
#define CPUID_7_0_EDX_SERIALIZE (1U << 14)
/* TSX Suspend Load Address Tracking instruction */
#define CPUID_7_0_EDX_TSX_LDTRK (1U << 16)
+/* PCONFIG instruction */
+#define CPUID_7_0_EDX_PCONFIG (1U << 18)
/* Architectural LBRs */
#define CPUID_7_0_EDX_ARCH_LBR (1U << 19)
/* AMX_BF16 instruction */
@@ -521,6 +521,10 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
ret |= 1U << KVM_HINTS_REALTIME;
}
+ if (is_tdx_vm()) {
+ tdx_get_supported_cpuid(function, index, reg, &ret);
+ }
+
return ret;
}
@@ -15,11 +15,129 @@
#include "qemu/error-report.h"
#include "qapi/error.h"
#include "qom/object_interfaces.h"
+#include "standard-headers/asm-x86/kvm_para.h"
#include "sysemu/kvm.h"
+#include "sysemu/sysemu.h"
#include "hw/i386/x86.h"
#include "kvm_i386.h"
#include "tdx.h"
+#include "../cpu-internal.h"
+
+#define TDX_SUPPORTED_KVM_FEATURES ((1U << KVM_FEATURE_NOP_IO_DELAY) | \
+ (1U << KVM_FEATURE_PV_UNHALT) | \
+ (1U << KVM_FEATURE_PV_TLB_FLUSH) | \
+ (1U << KVM_FEATURE_PV_SEND_IPI) | \
+ (1U << KVM_FEATURE_POLL_CONTROL) | \
+ (1U << KVM_FEATURE_PV_SCHED_YIELD) | \
+ (1U << KVM_FEATURE_MSI_EXT_DEST_ID))
+
+typedef struct KvmTdxCpuidLookup {
+ uint32_t tdx_fixed0;
+ uint32_t tdx_fixed1;
+
+ /*
+ * The CPUID bits that are configurable from the view of TDX module
+ * but require VMM's support when wanting to enable them.
+ *
+ * For those bits, they cannot be enabled if VMM (KVM/QEMU) doesn't support
+ * them.
+ */
+ uint32_t depends_on_vmm_cap;
+
+ bool inducing_ve;
+ /*
+ * The maximum supported feature set for given inducing-#VE leaf.
+ * It's valid only when .inducing_ve is true.
+ */
+ uint32_t supported_value_on_ve;
+} KvmTdxCpuidLookup;
+
+ /*
+ * QEMU maintained TDX CPUID lookup tables, which reflects how CPUIDs are
+ * virtualized for guest TDs based on "CPUID virtualization" of TDX spec.
+ *
+ * Note:
+ *
+ * This table will be updated runtime by tdx_caps reported by KVM.
+ *
+ */
+static KvmTdxCpuidLookup tdx_cpuid_lookup[FEATURE_WORDS] = {
+ [FEAT_1_EDX] = {
+ .tdx_fixed0 =
+ BIT(10) /* Reserved */ | BIT(20) /* Reserved */ | CPUID_IA64,
+ .tdx_fixed1 =
+ CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_APIC |
+ CPUID_MTRR | CPUID_MCA | CPUID_CLFLUSH | CPUID_DTS,
+ .depends_on_vmm_cap =
+ CPUID_ACPI | CPUID_PBE,
+ },
+ [FEAT_1_ECX] = {
+ .tdx_fixed0 =
+ CPUID_EXT_VMX | CPUID_EXT_SMX | BIT(16) /* Reserved */,
+ .tdx_fixed1 =
+ CPUID_EXT_CX16 | CPUID_EXT_PDCM | CPUID_EXT_X2APIC |
+ CPUID_EXT_AES | CPUID_EXT_XSAVE | CPUID_EXT_RDRAND |
+ CPUID_EXT_HYPERVISOR,
+ .depends_on_vmm_cap =
+ CPUID_EXT_EST | CPUID_EXT_TM2 | CPUID_EXT_XTPR | CPUID_EXT_DCA,
+ },
+ [FEAT_8000_0001_EDX] = {
+ .tdx_fixed1 =
+ CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB | CPUID_EXT2_RDTSCP |
+ CPUID_EXT2_LM,
+ },
+ [FEAT_7_0_EBX] = {
+ .tdx_fixed0 =
+ CPUID_7_0_EBX_TSC_ADJUST | CPUID_7_0_EBX_SGX | CPUID_7_0_EBX_MPX,
+ .tdx_fixed1 =
+ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_RTM |
+ CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_SMAP |
+ CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB |
+ CPUID_7_0_EBX_SHA_NI,
+ .depends_on_vmm_cap =
+ CPUID_7_0_EBX_PQM | CPUID_7_0_EBX_RDT_A,
+ },
+ [FEAT_7_0_ECX] = {
+ .tdx_fixed0 =
+ CPUID_7_0_ECX_FZM | CPUID_7_0_ECX_MAWAU |
+ CPUID_7_0_ECX_ENQCMD | CPUID_7_0_ECX_SGX_LC,
+ .tdx_fixed1 =
+ CPUID_7_0_ECX_MOVDIR64B | CPUID_7_0_ECX_BUS_LOCK_DETECT,
+ .depends_on_vmm_cap =
+ CPUID_7_0_ECX_TME,
+ },
+ [FEAT_7_0_EDX] = {
+ .tdx_fixed1 =
+ CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_ARCH_CAPABILITIES |
+ CPUID_7_0_EDX_CORE_CAPABILITY | CPUID_7_0_EDX_SPEC_CTRL_SSBD,
+ .depends_on_vmm_cap =
+ CPUID_7_0_EDX_PCONFIG,
+ },
+ [FEAT_8000_0008_EBX] = {
+ .tdx_fixed0 =
+ ~CPUID_8000_0008_EBX_WBNOINVD,
+ .tdx_fixed1 =
+ CPUID_8000_0008_EBX_WBNOINVD,
+ },
+ [FEAT_XSAVE] = {
+ .tdx_fixed1 =
+ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC |
+ CPUID_XSAVE_XSAVES,
+ },
+ [FEAT_6_EAX] = {
+ .inducing_ve = true,
+ .supported_value_on_ve = CPUID_6_EAX_ARAT,
+ },
+ [FEAT_8000_0007_EDX] = {
+ .inducing_ve = true,
+ .supported_value_on_ve = -1U,
+ },
+ [FEAT_KVM] = {
+ .inducing_ve = true,
+ .supported_value_on_ve = TDX_SUPPORTED_KVM_FEATURES,
+ },
+};
static TdxGuest *tdx_guest;
@@ -31,6 +149,151 @@ bool is_tdx_vm(void)
return !!tdx_guest;
}
+static inline uint32_t host_cpuid_reg(uint32_t function,
+ uint32_t index, int reg)
+{
+ uint32_t eax, ebx, ecx, edx;
+ uint32_t ret = 0;
+
+ host_cpuid(function, index, &eax, &ebx, &ecx, &edx);
+
+ switch (reg) {
+ case R_EAX:
+ ret = eax;
+ break;
+ case R_EBX:
+ ret = ebx;
+ break;
+ case R_ECX:
+ ret = ecx;
+ break;
+ case R_EDX:
+ ret = edx;
+ break;
+ }
+ return ret;
+}
+
+/*
+ * get the configurable cpuid bits (can be set to 0 or 1) reported by TDX module
+ * from tdx_caps.
+ */
+static inline uint32_t tdx_cap_cpuid_config(uint32_t function,
+ uint32_t index, int reg)
+{
+ struct kvm_tdx_cpuid_config *cpuid_c;
+ int ret = 0;
+ int i;
+
+ if (tdx_caps->nr_cpuid_configs <= 0) {
+ return ret;
+ }
+
+ for (i = 0; i < tdx_caps->nr_cpuid_configs; i++) {
+ cpuid_c = &tdx_caps->cpuid_configs[i];
+ /* 0xffffffff in sub_leaf means the leaf doesn't require a sublesf */
+ if (cpuid_c->leaf == function &&
+ (cpuid_c->sub_leaf == 0xffffffff || cpuid_c->sub_leaf == index)) {
+ switch (reg) {
+ case R_EAX:
+ ret = cpuid_c->eax;
+ break;
+ case R_EBX:
+ ret = cpuid_c->ebx;
+ break;
+ case R_ECX:
+ ret = cpuid_c->ecx;
+ break;
+ case R_EDX:
+ ret = cpuid_c->edx;
+ break;
+ default:
+ return 0;
+ }
+ }
+ }
+ return ret;
+}
+
+static FeatureWord get_cpuid_featureword_index(uint32_t function,
+ uint32_t index, int reg)
+{
+ FeatureWord w;
+
+ for (w = 0; w < FEATURE_WORDS; w++) {
+ FeatureWordInfo *f = &feature_word_info[w];
+
+ if (f->type == MSR_FEATURE_WORD || f->cpuid.eax != function ||
+ f->cpuid.reg != reg ||
+ (f->cpuid.needs_ecx && f->cpuid.ecx != index)) {
+ continue;
+ }
+
+ return w;
+ }
+
+ return w;
+}
+
+/*
+ * TDX supported CPUID varies from what KVM reports. Adjust the result by
+ * applying the TDX restrictions.
+ */
+void tdx_get_supported_cpuid(uint32_t function, uint32_t index, int reg,
+ uint32_t *ret)
+{
+ /*
+ * it's KVMM + QEMU 's capabilities of what CPUID bits is supported or
+ * can be emulated as supported.
+ */
+ uint32_t vmm_cap = *ret;
+ FeatureWord w;
+
+ /* Only handle features leaves that recognized by feature_word_info[] */
+ w = get_cpuid_featureword_index(function, index, reg);
+ if (w == FEATURE_WORDS) {
+ return;
+ }
+
+ if (tdx_cpuid_lookup[w].inducing_ve) {
+ *ret &= tdx_cpuid_lookup[w].supported_value_on_ve;
+ return;
+ }
+
+ /*
+ * Include all the native bits as first step. It covers types
+ * - As configured (if native)
+ * - Native
+ * - XFAM related and Attributes realted
+ *
+ * It also has side effect to enable unsupported bits, e.g., the
+ * bits of "fixed0" type while present natively. It's safe because
+ * the unsupported bits will be masked off by .fixed0 later.
+ */
+ *ret |= host_cpuid_reg(function, index, reg);
+
+ /* Adjust according to "fixed" type in tdx_cpuid_lookup. */
+ *ret |= tdx_cpuid_lookup[w].tdx_fixed1;
+ *ret &= ~tdx_cpuid_lookup[w].tdx_fixed0;
+
+ /*
+ * Configurable cpuids are supported unconditionally. It's mainly to
+ * include those configurable regardless of native existence.
+ */
+ *ret |= tdx_cap_cpuid_config(function, index, reg);
+
+ /*
+ * clear the configurable bits that require VMM emulation and VMM doesn't
+ * report the support.
+ */
+ *ret &= ~(tdx_cpuid_lookup[w].depends_on_vmm_cap & ~vmm_cap);
+
+ /* special handling */
+ if (function == 1 && reg == R_ECX && !enable_cpu_pm) {
+ *ret &= ~CPUID_EXT_MONITOR;
+ }
+}
+
enum tdx_ioctl_level{
TDX_VM_IOCTL,
TDX_VCPU_IOCTL,
@@ -27,5 +27,7 @@ bool is_tdx_vm(void);
#endif /* CONFIG_TDX */
int tdx_kvm_init(MachineState *ms, Error **errp);
+void tdx_get_supported_cpuid(uint32_t function, uint32_t index, int reg,
+ uint32_t *ret);
#endif /* QEMU_I386_TDX_H */