Skip to content

Commit 29f040d

Browse files
vishals4ghgregkh
authored andcommitted
x86/tdx: Fix arch_safe_halt() execution for TDX VMs
commit 9f98a4f upstream. Direct HLT instruction execution causes #VEs for TDX VMs which is routed to hypervisor via TDCALL. If HLT is executed in STI-shadow, resulting #VE handler will enable interrupts before TDCALL is routed to hypervisor leading to missed wakeup events, as current TDX spec doesn't expose interruptibility state information to allow #VE handler to selectively enable interrupts. Commit bfe6ed0 ("x86/tdx: Add HLT support for TDX guests") prevented the idle routines from executing HLT instruction in STI-shadow. But it missed the paravirt routine which can be reached via this path as an example: kvm_wait() => safe_halt() => raw_safe_halt() => arch_safe_halt() => irq.safe_halt() => pv_native_safe_halt() To reliably handle arch_safe_halt() for TDX VMs, introduce explicit dependency on CONFIG_PARAVIRT and override paravirt halt()/safe_halt() routines with TDX-safe versions that execute direct TDCALL and needed interrupt flag updates. Executing direct TDCALL brings in additional benefit of avoiding HLT related #VEs altogether. As tested by Ryan Afranji: "Tested with the specjbb2015 benchmark. It has heavy lock contention which leads to many halt calls. TDX VMs suffered a poor score before this patchset. Verified the major performance improvement with this patchset applied." Fixes: bfe6ed0 ("x86/tdx: Add HLT support for TDX guests") Signed-off-by: Vishal Annapurve <[email protected]> Signed-off-by: Ingo Molnar <[email protected]> Reviewed-by: Kirill A. Shutemov <[email protected]> Tested-by: Ryan Afranji <[email protected]> Cc: Andy Lutomirski <[email protected]> Cc: Brian Gerst <[email protected]> Cc: Juergen Gross <[email protected]> Cc: H. Peter Anvin <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Josh Poimboeuf <[email protected]> Cc: [email protected] Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent e5f0581 commit 29f040d

File tree

4 files changed

+29
-4
lines changed

4 files changed

+29
-4
lines changed

arch/x86/Kconfig

+1
Original file line numberDiff line numberDiff line change
@@ -881,6 +881,7 @@ config INTEL_TDX_GUEST
881881
depends on X86_64 && CPU_SUP_INTEL
882882
depends on X86_X2APIC
883883
depends on EFI_STUB
884+
depends on PARAVIRT
884885
select ARCH_HAS_CC_PLATFORM
885886
select X86_MEM_ENCRYPT
886887
select X86_MCE

arch/x86/coco/tdx/tdx.c

+25-1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <asm/ia32.h>
1414
#include <asm/insn.h>
1515
#include <asm/insn-eval.h>
16+
#include <asm/paravirt_types.h>
1617
#include <asm/pgtable.h>
1718
#include <asm/traps.h>
1819

@@ -334,7 +335,7 @@ static int handle_halt(struct ve_info *ve)
334335
return ve_instr_len(ve);
335336
}
336337

337-
void __cpuidle tdx_safe_halt(void)
338+
void __cpuidle tdx_halt(void)
338339
{
339340
const bool irq_disabled = false;
340341

@@ -345,6 +346,16 @@ void __cpuidle tdx_safe_halt(void)
345346
WARN_ONCE(1, "HLT instruction emulation failed\n");
346347
}
347348

349+
static void __cpuidle tdx_safe_halt(void)
350+
{
351+
tdx_halt();
352+
/*
353+
* "__cpuidle" section doesn't support instrumentation, so stick
354+
* with raw_* variant that avoids tracing hooks.
355+
*/
356+
raw_local_irq_enable();
357+
}
358+
348359
static int read_msr(struct pt_regs *regs, struct ve_info *ve)
349360
{
350361
struct tdx_hypercall_args args = {
@@ -888,6 +899,19 @@ void __init tdx_early_init(void)
888899
x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required;
889900
x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required;
890901

902+
/*
903+
* Avoid "sti;hlt" execution in TDX guests as HLT induces a #VE that
904+
* will enable interrupts before HLT TDCALL invocation if executed
905+
* in STI-shadow, possibly resulting in missed wakeup events.
906+
*
907+
* Modify all possible HLT execution paths to use TDX specific routines
908+
* that directly execute TDCALL and toggle the interrupt state as
909+
* needed after TDCALL completion. This also reduces HLT related #VEs
910+
* in addition to having a reliable halt logic execution.
911+
*/
912+
pv_ops.irq.safe_halt = tdx_safe_halt;
913+
pv_ops.irq.halt = tdx_halt;
914+
891915
/*
892916
* TDX intercepts the RDMSR to read the X2APIC ID in the parallel
893917
* bringup low level code. That raises #VE which cannot be handled

arch/x86/include/asm/tdx.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ void tdx_get_ve_info(struct ve_info *ve);
4646

4747
bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve);
4848

49-
void tdx_safe_halt(void);
49+
void tdx_halt(void);
5050

5151
bool tdx_early_handle_ve(struct pt_regs *regs);
5252

@@ -55,7 +55,7 @@ int tdx_mcall_get_report0(u8 *reportdata, u8 *tdreport);
5555
#else
5656

5757
static inline void tdx_early_init(void) { };
58-
static inline void tdx_safe_halt(void) { };
58+
static inline void tdx_halt(void) { };
5959

6060
static inline bool tdx_early_handle_ve(struct pt_regs *regs) { return false; }
6161

arch/x86/kernel/process.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -955,7 +955,7 @@ void select_idle_routine(const struct cpuinfo_x86 *c)
955955
static_call_update(x86_idle, mwait_idle);
956956
} else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
957957
pr_info("using TDX aware idle routine\n");
958-
static_call_update(x86_idle, tdx_safe_halt);
958+
static_call_update(x86_idle, tdx_halt);
959959
} else
960960
static_call_update(x86_idle, default_idle);
961961
}

0 commit comments

Comments
 (0)