KVM: x86: Support APX feature for guests

[PATCH RFC v1 14/20] KVM: x86: Emulate REX2-prefixed 64-bit absolute jump

Posted by Chang S. Bae 3 months ago

Add support for the new absolute jump, previously unimplemented.

This instruction has an unusual quirk: the REX2.W bit uses inverted
polarity. Unlike normal REX or REX2 semantics (where W=1 indicates a
64-bit operand size), this instruction uses W=0 to select an 8-byte
operand size.

The new InvertedWidthPolarity flag and its helper to interpret the
W bit correctly, avoiding special-case hacks in the emulator logic.

Since the ctxt->op_bytes depends on the instruction flags, the size
should be determined after the instruction lookup.

Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
---
 arch/x86/kvm/emulate.c | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 58879a31abcd..03f8e007b14e 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -179,6 +179,7 @@
 #define TwoMemOp    ((u64)1 << 55)  /* Instruction has two memory operand */
 #define IsBranch    ((u64)1 << 56)  /* Instruction is considered a branch. */
 #define ShadowStack ((u64)1 << 57)  /* Instruction affects Shadow Stacks. */
+#define InvertedWidthPolarity ((u64)1 << 58) /* Instruction uses inverted REX2.W polarity */
 
 #define DstXacc     (DstAccLo | SrcAccHi | SrcWrite)
 
@@ -993,6 +994,16 @@ EM_ASM_2W(btc);
 
 EM_ASM_2R(cmp, cmp_r);
 
+static inline bool is_64bit_operand_size(struct x86_emulate_ctxt *ctxt)
+{
+	/*
+	 * Most instructions interpret REX.W=1 as 64-bit operand size.
+	 * Some REX2 opcodes invert this logic.
+	 */
+	return ctxt->d & InvertedWidthPolarity ?
+	       ctxt->rex.bits.w == 0 : ctxt->rex.bits.w == 1;
+}
+
 static int em_bsf_c(struct x86_emulate_ctxt *ctxt)
 {
 	/* If src is zero, do not writeback, but update flags */
@@ -2472,7 +2483,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
 
 	setup_syscalls_segments(&cs, &ss);
 
-	if (ctxt->rex.bits.w)
+	if (is_64bit_operand_size(ctxt))
 		usermode = X86EMUL_MODE_PROT64;
 	else
 		usermode = X86EMUL_MODE_PROT32;
@@ -4486,7 +4497,8 @@ static struct opcode rex2_opcode_table[256]  __ro_after_init;
 static struct opcode rex2_twobyte_table[256] __ro_after_init;
 
 static const struct opcode undefined = D(Undefined);
-static const struct opcode notimpl   = N;
+static const struct opcode pfx_d5_a1 = I(SrcImm64 | NearBranch | IsBranch | InvertedWidthPolarity, \
+					 em_jmp_abs);
 
 #undef D
 #undef N
@@ -4543,6 +4555,7 @@ static bool is_ibt_instruction(struct x86_emulate_ctxt *ctxt)
 		return true;
 	case SrcNone:
 	case SrcImm:
+	case SrcImm64:
 	case SrcImmByte:
 	/*
 	 * Note, ImmU16 is used only for the stack adjustment operand on ENTER
@@ -4895,9 +4908,6 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
 
 done_prefixes:
 
-	if (ctxt->rex.bits.w)
-		ctxt->op_bytes = 8;
-
 	/* Determine opcode byte(s): */
 	if (ctxt->rex_prefix == REX2_INVALID) {
 		/*
@@ -4936,6 +4946,9 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
 	}
 	ctxt->d = opcode.flags;
 
+	if (is_64bit_operand_size(ctxt))
+		ctxt->op_bytes = 8;
+
 	if (ctxt->d & ModRM)
 		ctxt->modrm = insn_fetch(u8, ctxt);
 
@@ -5594,6 +5607,6 @@ void __init kvm_init_rex2_opcode_table(void)
 	undefine_row(&rex2_twobyte_table[0x30]);
 	undefine_row(&rex2_twobyte_table[0x80]);
 
-	/* Mark opcode not yet implemented: */
-	rex2_opcode_table[0xa1] = notimpl;
+	/* Define the REX2-specific absolute jump (0xA1) opcode */
+	rex2_opcode_table[0xa1] = pfx_d5_a1;
 }
-- 
2.51.0

Re: [PATCH RFC v1 14/20] KVM: x86: Emulate REX2-prefixed 64-bit absolute jump

Posted by Paolo Bonzini 2 months, 4 weeks ago

On 11/10/25 19:01, Chang S. Bae wrote:
> Add support for the new absolute jump, previously unimplemented.
> 
> This instruction has an unusual quirk: the REX2.W bit uses inverted
> polarity. Unlike normal REX or REX2 semantics (where W=1 indicates a
> 64-bit operand size), this instruction uses W=0 to select an 8-byte
> operand size.
> 
> The new InvertedWidthPolarity flag and its helper to interpret the
> W bit correctly, avoiding special-case hacks in the emulator logic.
> 
> Since the ctxt->op_bytes depends on the instruction flags, the size
> should be determined after the instruction lookup.
> 
> Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>

I think this is not needed.  Emulation of non-memory operations, in 
practice, is only needed to support big real mode on very old processors.

We can just add a NoRex bit and apply it to the six reows you touch in 
patch 13.

Paolo

> ---
>   arch/x86/kvm/emulate.c | 27 ++++++++++++++++++++-------
>   1 file changed, 20 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> index 58879a31abcd..03f8e007b14e 100644
> --- a/arch/x86/kvm/emulate.c
> +++ b/arch/x86/kvm/emulate.c
> @@ -179,6 +179,7 @@
>   #define TwoMemOp    ((u64)1 << 55)  /* Instruction has two memory operand */
>   #define IsBranch    ((u64)1 << 56)  /* Instruction is considered a branch. */
>   #define ShadowStack ((u64)1 << 57)  /* Instruction affects Shadow Stacks. */
> +#define InvertedWidthPolarity ((u64)1 << 58) /* Instruction uses inverted REX2.W polarity */
>   
>   #define DstXacc     (DstAccLo | SrcAccHi | SrcWrite)
>   
> @@ -993,6 +994,16 @@ EM_ASM_2W(btc);
>   
>   EM_ASM_2R(cmp, cmp_r);
>   
> +static inline bool is_64bit_operand_size(struct x86_emulate_ctxt *ctxt)
> +{
> +	/*
> +	 * Most instructions interpret REX.W=1 as 64-bit operand size.
> +	 * Some REX2 opcodes invert this logic.
> +	 */
> +	return ctxt->d & InvertedWidthPolarity ?
> +	       ctxt->rex.bits.w == 0 : ctxt->rex.bits.w == 1;
> +}
> +
>   static int em_bsf_c(struct x86_emulate_ctxt *ctxt)
>   {
>   	/* If src is zero, do not writeback, but update flags */
> @@ -2472,7 +2483,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
>   
>   	setup_syscalls_segments(&cs, &ss);
>   
> -	if (ctxt->rex.bits.w)
> +	if (is_64bit_operand_size(ctxt))
>   		usermode = X86EMUL_MODE_PROT64;
>   	else
>   		usermode = X86EMUL_MODE_PROT32;
> @@ -4486,7 +4497,8 @@ static struct opcode rex2_opcode_table[256]  __ro_after_init;
>   static struct opcode rex2_twobyte_table[256] __ro_after_init;
>   
>   static const struct opcode undefined = D(Undefined);
> -static const struct opcode notimpl   = N;
> +static const struct opcode pfx_d5_a1 = I(SrcImm64 | NearBranch | IsBranch | InvertedWidthPolarity, \
> +					 em_jmp_abs);
>   
>   #undef D
>   #undef N
> @@ -4543,6 +4555,7 @@ static bool is_ibt_instruction(struct x86_emulate_ctxt *ctxt)
>   		return true;
>   	case SrcNone:
>   	case SrcImm:
> +	case SrcImm64:
>   	case SrcImmByte:
>   	/*
>   	 * Note, ImmU16 is used only for the stack adjustment operand on ENTER
> @@ -4895,9 +4908,6 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
>   
>   done_prefixes:
>   
> -	if (ctxt->rex.bits.w)
> -		ctxt->op_bytes = 8;
> -
>   	/* Determine opcode byte(s): */
>   	if (ctxt->rex_prefix == REX2_INVALID) {
>   		/*
> @@ -4936,6 +4946,9 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
>   	}
>   	ctxt->d = opcode.flags;
>   
> +	if (is_64bit_operand_size(ctxt))
> +		ctxt->op_bytes = 8;
> +
>   	if (ctxt->d & ModRM)
>   		ctxt->modrm = insn_fetch(u8, ctxt);
>   
> @@ -5594,6 +5607,6 @@ void __init kvm_init_rex2_opcode_table(void)
>   	undefine_row(&rex2_twobyte_table[0x30]);
>   	undefine_row(&rex2_twobyte_table[0x80]);
>   
> -	/* Mark opcode not yet implemented: */
> -	rex2_opcode_table[0xa1] = notimpl;
> +	/* Define the REX2-specific absolute jump (0xA1) opcode */
> +	rex2_opcode_table[0xa1] = pfx_d5_a1;
>   }

Re: [PATCH RFC v1 14/20] KVM: x86: Emulate REX2-prefixed 64-bit absolute jump

Posted by Chang S. Bae 2 months, 3 weeks ago

On 11/11/2025 8:39 AM, Paolo Bonzini wrote:
> 
> We can just add a NoRex bit and apply it to the six reows you touch in 
> patch 13.

Yeah, I think that is much simple and better. Attached is the diff for
this:
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index b2490e56cb71..5f36dbcec484 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -179,6 +179,7 @@
 #define TwoMemOp    ((u64)1 << 55)  /* Instruction has two memory operand */
 #define IsBranch    ((u64)1 << 56)  /* Instruction is considered a branch. */
 #define ShadowStack ((u64)1 << 57)  /* Instruction affects Shadow Stacks. */
+#define NoRex2      ((u64)1 << 58)  /* Instruction has no use of REX2 prefix */
 
 #define DstXacc     (DstAccLo | SrcAccHi | SrcWrite)
 
@@ -4247,7 +4248,7 @@ static const struct opcode opcode_table[256] = {
 	/* 0x38 - 0x3F */
 	I6ALU(NoWrite, em_cmp), N, N,
 	/* 0x40 - 0x4F */
-	X8(I(DstReg, em_inc)), X8(I(DstReg, em_dec)),
+	X8(I(DstReg | NoRex2, em_inc)), X8(I(DstReg | NoRex2, em_dec)),
 	/* 0x50 - 0x57 */
 	X8(I(SrcReg | Stack, em_push)),
 	/* 0x58 - 0x5F */
@@ -4265,7 +4266,7 @@ static const struct opcode opcode_table[256] = {
 	I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
 	I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
 	/* 0x70 - 0x7F */
-	X16(D(SrcImmByte | NearBranch | IsBranch)),
+	X16(D(SrcImmByte | NearBranch | IsBranch | NoRex2)),
 	/* 0x80 - 0x87 */
 	G(ByteOp | DstMem | SrcImm, group1),
 	G(DstMem | SrcImm, group1),
@@ -4289,15 +4290,15 @@ static const struct opcode opcode_table[256] = {
 	II(ImplicitOps | Stack, em_popf, popf),
 	I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf),
 	/* 0xA0 - 0xA7 */
-	I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
-	I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
-	I2bv(SrcSI | DstDI | Mov | String | TwoMemOp, em_mov),
-	I2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r),
+	I2bv(DstAcc | SrcMem | Mov | MemAbs | NoRex2, em_mov),
+	I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable | NoRex2, em_mov),
+	I2bv(SrcSI | DstDI | Mov | String | TwoMemOp | NoRex2, em_mov),
+	I2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp | NoRex2, em_cmp_r),
 	/* 0xA8 - 0xAF */
-	I2bv(DstAcc | SrcImm | NoWrite, em_test),
-	I2bv(SrcAcc | DstDI | Mov | String, em_mov),
-	I2bv(SrcSI | DstAcc | Mov | String, em_mov),
-	I2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
+	I2bv(DstAcc | SrcImm | NoWrite | NoRex2, em_test),
+	I2bv(SrcAcc | DstDI | Mov | String | NoRex2, em_mov),
+	I2bv(SrcSI | DstAcc | Mov | String | NoRex2, em_mov),
+	I2bv(SrcAcc | DstDI | String | NoWrite | NoRex2, em_cmp_r),
 	/* 0xB0 - 0xB7 */
 	X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
 	/* 0xB8 - 0xBF */
@@ -4327,17 +4328,17 @@ static const struct opcode opcode_table[256] = {
 	/* 0xD8 - 0xDF */
 	N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
 	/* 0xE0 - 0xE7 */
-	X3(I(SrcImmByte | NearBranch | IsBranch, em_loop)),
-	I(SrcImmByte | NearBranch | IsBranch, em_jcxz),
-	I2bvIP(SrcImmUByte | DstAcc, em_in,  in,  check_perm_in),
-	I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
+	X3(I(SrcImmByte | NearBranch | IsBranch | NoRex2, em_loop)),
+	I(SrcImmByte | NearBranch | IsBranch | NoRex2, em_jcxz),
+	I2bvIP(SrcImmUByte | DstAcc | NoRex2, em_in,  in,  check_perm_in),
+	I2bvIP(SrcAcc | DstImmUByte | NoRex2, em_out, out, check_perm_out),
 	/* 0xE8 - 0xEF */
-	I(SrcImm | NearBranch | IsBranch | ShadowStack, em_call),
-	D(SrcImm | ImplicitOps | NearBranch | IsBranch),
-	I(SrcImmFAddr | No64 | IsBranch, em_jmp_far),
-	D(SrcImmByte | ImplicitOps | NearBranch | IsBranch),
-	I2bvIP(SrcDX | DstAcc, em_in,  in,  check_perm_in),
-	I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
+	I(SrcImm | NearBranch | IsBranch | ShadowStack | NoRex2, em_call),
+	D(SrcImm | ImplicitOps | NearBranch | IsBranch | NoRex2),
+	I(SrcImmFAddr | No64 | IsBranch | NoRex2, em_jmp_far),
+	D(SrcImmByte | ImplicitOps | NearBranch | IsBranch | NoRex2),
+	I2bvIP(SrcDX | DstAcc | NoRex2, em_in,  in,  check_perm_in),
+	I2bvIP(SrcAcc | DstDX | NoRex2, em_out, out, check_perm_out),
 	/* 0xF0 - 0xF7 */
 	N, DI(ImplicitOps, icebp), N, N,
 	DI(ImplicitOps | Priv, hlt), D(ImplicitOps),
@@ -4378,12 +4379,12 @@ static const struct opcode twobyte_table[256] = {
 	N, GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_2b),
 	N, N, N, N,
 	/* 0x30 - 0x3F */
-	II(ImplicitOps | Priv, em_wrmsr, wrmsr),
-	IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
-	II(ImplicitOps | Priv, em_rdmsr, rdmsr),
-	IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
-	I(ImplicitOps | EmulateOnUD | IsBranch | ShadowStack, em_sysenter),
-	I(ImplicitOps | Priv | EmulateOnUD | IsBranch | ShadowStack, em_sysexit),
+	II(ImplicitOps | Priv | NoRex2, em_wrmsr, wrmsr),
+	IIP(ImplicitOps | NoRex2, em_rdtsc, rdtsc, check_rdtsc),
+	II(ImplicitOps | Priv | NoRex2, em_rdmsr, rdmsr),
+	IIP(ImplicitOps | NoRex2, em_rdpmc, rdpmc, check_rdpmc),
+	I(ImplicitOps | EmulateOnUD | IsBranch | ShadowStack | NoRex2, em_sysenter),
+	I(ImplicitOps | Priv | EmulateOnUD | IsBranch | ShadowStack | NoRex2, em_sysexit),
 	N, N,
 	N, N, N, N, N, N, N, N,
 	/* 0x40 - 0x4F */
@@ -4401,7 +4402,7 @@ static const struct opcode twobyte_table[256] = {
 	N, N, N, N,
 	N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
 	/* 0x80 - 0x8F */
-	X16(D(SrcImm | NearBranch | IsBranch)),
+	X16(D(SrcImm | NearBranch | IsBranch | NoRex2)),
 	/* 0x90 - 0x9F */
 	X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
 	/* 0xA0 - 0xA7 */
@@ -4888,7 +4889,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
 	if (ctxt->b == 0x0f) {
 		/* Escape byte: start two-byte opcode sequence */
 		ctxt->b = insn_fetch(u8, ctxt);
-		if (ctxt->b == 0x38) {
+		if (ctxt->b == 0x38 && ctxt->rex_prefix != REX2_PREFIX) {
 			/* Three-byte opcode */
 			ctxt->opcode_len = 3;
 			ctxt->b = insn_fetch(u8, ctxt);
@@ -4905,6 +4906,9 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
 	}
 	ctxt->d = opcode.flags;
 
+	if (ctxt->d & NoRex2 && ctxt->rex_prefix == REX2_PREFIX)
+		ctxt->d = Undefined;
+
 	if (ctxt->d & ModRM)
 		ctxt->modrm = insn_fetch(u8, ctxt);