Add support for the new absolute jump, previously unimplemented.
This instruction has an unusual quirk: the REX2.W bit uses inverted
polarity. Unlike normal REX or REX2 semantics (where W=1 indicates a
64-bit operand size), this instruction uses W=0 to select an 8-byte
operand size.
The new InvertedWidthPolarity flag and its helper to interpret the
W bit correctly, avoiding special-case hacks in the emulator logic.
Since the ctxt->op_bytes depends on the instruction flags, the size
should be determined after the instruction lookup.
Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
---
arch/x86/kvm/emulate.c | 27 ++++++++++++++++++++-------
1 file changed, 20 insertions(+), 7 deletions(-)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 58879a31abcd..03f8e007b14e 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -179,6 +179,7 @@
#define TwoMemOp ((u64)1 << 55) /* Instruction has two memory operand */
#define IsBranch ((u64)1 << 56) /* Instruction is considered a branch. */
#define ShadowStack ((u64)1 << 57) /* Instruction affects Shadow Stacks. */
+#define InvertedWidthPolarity ((u64)1 << 58) /* Instruction uses inverted REX2.W polarity */
#define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
@@ -993,6 +994,16 @@ EM_ASM_2W(btc);
EM_ASM_2R(cmp, cmp_r);
+static inline bool is_64bit_operand_size(struct x86_emulate_ctxt *ctxt)
+{
+ /*
+ * Most instructions interpret REX.W=1 as 64-bit operand size.
+ * Some REX2 opcodes invert this logic.
+ */
+ return ctxt->d & InvertedWidthPolarity ?
+ ctxt->rex.bits.w == 0 : ctxt->rex.bits.w == 1;
+}
+
static int em_bsf_c(struct x86_emulate_ctxt *ctxt)
{
/* If src is zero, do not writeback, but update flags */
@@ -2472,7 +2483,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
setup_syscalls_segments(&cs, &ss);
- if (ctxt->rex.bits.w)
+ if (is_64bit_operand_size(ctxt))
usermode = X86EMUL_MODE_PROT64;
else
usermode = X86EMUL_MODE_PROT32;
@@ -4486,7 +4497,8 @@ static struct opcode rex2_opcode_table[256] __ro_after_init;
static struct opcode rex2_twobyte_table[256] __ro_after_init;
static const struct opcode undefined = D(Undefined);
-static const struct opcode notimpl = N;
+static const struct opcode pfx_d5_a1 = I(SrcImm64 | NearBranch | IsBranch | InvertedWidthPolarity, \
+ em_jmp_abs);
#undef D
#undef N
@@ -4543,6 +4555,7 @@ static bool is_ibt_instruction(struct x86_emulate_ctxt *ctxt)
return true;
case SrcNone:
case SrcImm:
+ case SrcImm64:
case SrcImmByte:
/*
* Note, ImmU16 is used only for the stack adjustment operand on ENTER
@@ -4895,9 +4908,6 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
done_prefixes:
- if (ctxt->rex.bits.w)
- ctxt->op_bytes = 8;
-
/* Determine opcode byte(s): */
if (ctxt->rex_prefix == REX2_INVALID) {
/*
@@ -4936,6 +4946,9 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
}
ctxt->d = opcode.flags;
+ if (is_64bit_operand_size(ctxt))
+ ctxt->op_bytes = 8;
+
if (ctxt->d & ModRM)
ctxt->modrm = insn_fetch(u8, ctxt);
@@ -5594,6 +5607,6 @@ void __init kvm_init_rex2_opcode_table(void)
undefine_row(&rex2_twobyte_table[0x30]);
undefine_row(&rex2_twobyte_table[0x80]);
- /* Mark opcode not yet implemented: */
- rex2_opcode_table[0xa1] = notimpl;
+ /* Define the REX2-specific absolute jump (0xA1) opcode */
+ rex2_opcode_table[0xa1] = pfx_d5_a1;
}
--
2.51.0
On 11/10/25 19:01, Chang S. Bae wrote:
> Add support for the new absolute jump, previously unimplemented.
>
> This instruction has an unusual quirk: the REX2.W bit uses inverted
> polarity. Unlike normal REX or REX2 semantics (where W=1 indicates a
> 64-bit operand size), this instruction uses W=0 to select an 8-byte
> operand size.
>
> The new InvertedWidthPolarity flag and its helper to interpret the
> W bit correctly, avoiding special-case hacks in the emulator logic.
>
> Since the ctxt->op_bytes depends on the instruction flags, the size
> should be determined after the instruction lookup.
>
> Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
I think this is not needed. Emulation of non-memory operations, in
practice, is only needed to support big real mode on very old processors.
We can just add a NoRex bit and apply it to the six reows you touch in
patch 13.
Paolo
> ---
> arch/x86/kvm/emulate.c | 27 ++++++++++++++++++++-------
> 1 file changed, 20 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> index 58879a31abcd..03f8e007b14e 100644
> --- a/arch/x86/kvm/emulate.c
> +++ b/arch/x86/kvm/emulate.c
> @@ -179,6 +179,7 @@
> #define TwoMemOp ((u64)1 << 55) /* Instruction has two memory operand */
> #define IsBranch ((u64)1 << 56) /* Instruction is considered a branch. */
> #define ShadowStack ((u64)1 << 57) /* Instruction affects Shadow Stacks. */
> +#define InvertedWidthPolarity ((u64)1 << 58) /* Instruction uses inverted REX2.W polarity */
>
> #define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
>
> @@ -993,6 +994,16 @@ EM_ASM_2W(btc);
>
> EM_ASM_2R(cmp, cmp_r);
>
> +static inline bool is_64bit_operand_size(struct x86_emulate_ctxt *ctxt)
> +{
> + /*
> + * Most instructions interpret REX.W=1 as 64-bit operand size.
> + * Some REX2 opcodes invert this logic.
> + */
> + return ctxt->d & InvertedWidthPolarity ?
> + ctxt->rex.bits.w == 0 : ctxt->rex.bits.w == 1;
> +}
> +
> static int em_bsf_c(struct x86_emulate_ctxt *ctxt)
> {
> /* If src is zero, do not writeback, but update flags */
> @@ -2472,7 +2483,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
>
> setup_syscalls_segments(&cs, &ss);
>
> - if (ctxt->rex.bits.w)
> + if (is_64bit_operand_size(ctxt))
> usermode = X86EMUL_MODE_PROT64;
> else
> usermode = X86EMUL_MODE_PROT32;
> @@ -4486,7 +4497,8 @@ static struct opcode rex2_opcode_table[256] __ro_after_init;
> static struct opcode rex2_twobyte_table[256] __ro_after_init;
>
> static const struct opcode undefined = D(Undefined);
> -static const struct opcode notimpl = N;
> +static const struct opcode pfx_d5_a1 = I(SrcImm64 | NearBranch | IsBranch | InvertedWidthPolarity, \
> + em_jmp_abs);
>
> #undef D
> #undef N
> @@ -4543,6 +4555,7 @@ static bool is_ibt_instruction(struct x86_emulate_ctxt *ctxt)
> return true;
> case SrcNone:
> case SrcImm:
> + case SrcImm64:
> case SrcImmByte:
> /*
> * Note, ImmU16 is used only for the stack adjustment operand on ENTER
> @@ -4895,9 +4908,6 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
>
> done_prefixes:
>
> - if (ctxt->rex.bits.w)
> - ctxt->op_bytes = 8;
> -
> /* Determine opcode byte(s): */
> if (ctxt->rex_prefix == REX2_INVALID) {
> /*
> @@ -4936,6 +4946,9 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
> }
> ctxt->d = opcode.flags;
>
> + if (is_64bit_operand_size(ctxt))
> + ctxt->op_bytes = 8;
> +
> if (ctxt->d & ModRM)
> ctxt->modrm = insn_fetch(u8, ctxt);
>
> @@ -5594,6 +5607,6 @@ void __init kvm_init_rex2_opcode_table(void)
> undefine_row(&rex2_twobyte_table[0x30]);
> undefine_row(&rex2_twobyte_table[0x80]);
>
> - /* Mark opcode not yet implemented: */
> - rex2_opcode_table[0xa1] = notimpl;
> + /* Define the REX2-specific absolute jump (0xA1) opcode */
> + rex2_opcode_table[0xa1] = pfx_d5_a1;
> }
On 11/11/2025 8:39 AM, Paolo Bonzini wrote:
>
> We can just add a NoRex bit and apply it to the six reows you touch in
> patch 13.
Yeah, I think that is much simple and better. Attached is the diff for
this:
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index b2490e56cb71..5f36dbcec484 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -179,6 +179,7 @@
#define TwoMemOp ((u64)1 << 55) /* Instruction has two memory operand */
#define IsBranch ((u64)1 << 56) /* Instruction is considered a branch. */
#define ShadowStack ((u64)1 << 57) /* Instruction affects Shadow Stacks. */
+#define NoRex2 ((u64)1 << 58) /* Instruction has no use of REX2 prefix */
#define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
@@ -4247,7 +4248,7 @@ static const struct opcode opcode_table[256] = {
/* 0x38 - 0x3F */
I6ALU(NoWrite, em_cmp), N, N,
/* 0x40 - 0x4F */
- X8(I(DstReg, em_inc)), X8(I(DstReg, em_dec)),
+ X8(I(DstReg | NoRex2, em_inc)), X8(I(DstReg | NoRex2, em_dec)),
/* 0x50 - 0x57 */
X8(I(SrcReg | Stack, em_push)),
/* 0x58 - 0x5F */
@@ -4265,7 +4266,7 @@ static const struct opcode opcode_table[256] = {
I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
/* 0x70 - 0x7F */
- X16(D(SrcImmByte | NearBranch | IsBranch)),
+ X16(D(SrcImmByte | NearBranch | IsBranch | NoRex2)),
/* 0x80 - 0x87 */
G(ByteOp | DstMem | SrcImm, group1),
G(DstMem | SrcImm, group1),
@@ -4289,15 +4290,15 @@ static const struct opcode opcode_table[256] = {
II(ImplicitOps | Stack, em_popf, popf),
I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf),
/* 0xA0 - 0xA7 */
- I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
- I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
- I2bv(SrcSI | DstDI | Mov | String | TwoMemOp, em_mov),
- I2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r),
+ I2bv(DstAcc | SrcMem | Mov | MemAbs | NoRex2, em_mov),
+ I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable | NoRex2, em_mov),
+ I2bv(SrcSI | DstDI | Mov | String | TwoMemOp | NoRex2, em_mov),
+ I2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp | NoRex2, em_cmp_r),
/* 0xA8 - 0xAF */
- I2bv(DstAcc | SrcImm | NoWrite, em_test),
- I2bv(SrcAcc | DstDI | Mov | String, em_mov),
- I2bv(SrcSI | DstAcc | Mov | String, em_mov),
- I2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
+ I2bv(DstAcc | SrcImm | NoWrite | NoRex2, em_test),
+ I2bv(SrcAcc | DstDI | Mov | String | NoRex2, em_mov),
+ I2bv(SrcSI | DstAcc | Mov | String | NoRex2, em_mov),
+ I2bv(SrcAcc | DstDI | String | NoWrite | NoRex2, em_cmp_r),
/* 0xB0 - 0xB7 */
X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
/* 0xB8 - 0xBF */
@@ -4327,17 +4328,17 @@ static const struct opcode opcode_table[256] = {
/* 0xD8 - 0xDF */
N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
/* 0xE0 - 0xE7 */
- X3(I(SrcImmByte | NearBranch | IsBranch, em_loop)),
- I(SrcImmByte | NearBranch | IsBranch, em_jcxz),
- I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in),
- I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
+ X3(I(SrcImmByte | NearBranch | IsBranch | NoRex2, em_loop)),
+ I(SrcImmByte | NearBranch | IsBranch | NoRex2, em_jcxz),
+ I2bvIP(SrcImmUByte | DstAcc | NoRex2, em_in, in, check_perm_in),
+ I2bvIP(SrcAcc | DstImmUByte | NoRex2, em_out, out, check_perm_out),
/* 0xE8 - 0xEF */
- I(SrcImm | NearBranch | IsBranch | ShadowStack, em_call),
- D(SrcImm | ImplicitOps | NearBranch | IsBranch),
- I(SrcImmFAddr | No64 | IsBranch, em_jmp_far),
- D(SrcImmByte | ImplicitOps | NearBranch | IsBranch),
- I2bvIP(SrcDX | DstAcc, em_in, in, check_perm_in),
- I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
+ I(SrcImm | NearBranch | IsBranch | ShadowStack | NoRex2, em_call),
+ D(SrcImm | ImplicitOps | NearBranch | IsBranch | NoRex2),
+ I(SrcImmFAddr | No64 | IsBranch | NoRex2, em_jmp_far),
+ D(SrcImmByte | ImplicitOps | NearBranch | IsBranch | NoRex2),
+ I2bvIP(SrcDX | DstAcc | NoRex2, em_in, in, check_perm_in),
+ I2bvIP(SrcAcc | DstDX | NoRex2, em_out, out, check_perm_out),
/* 0xF0 - 0xF7 */
N, DI(ImplicitOps, icebp), N, N,
DI(ImplicitOps | Priv, hlt), D(ImplicitOps),
@@ -4378,12 +4379,12 @@ static const struct opcode twobyte_table[256] = {
N, GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_2b),
N, N, N, N,
/* 0x30 - 0x3F */
- II(ImplicitOps | Priv, em_wrmsr, wrmsr),
- IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
- II(ImplicitOps | Priv, em_rdmsr, rdmsr),
- IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
- I(ImplicitOps | EmulateOnUD | IsBranch | ShadowStack, em_sysenter),
- I(ImplicitOps | Priv | EmulateOnUD | IsBranch | ShadowStack, em_sysexit),
+ II(ImplicitOps | Priv | NoRex2, em_wrmsr, wrmsr),
+ IIP(ImplicitOps | NoRex2, em_rdtsc, rdtsc, check_rdtsc),
+ II(ImplicitOps | Priv | NoRex2, em_rdmsr, rdmsr),
+ IIP(ImplicitOps | NoRex2, em_rdpmc, rdpmc, check_rdpmc),
+ I(ImplicitOps | EmulateOnUD | IsBranch | ShadowStack | NoRex2, em_sysenter),
+ I(ImplicitOps | Priv | EmulateOnUD | IsBranch | ShadowStack | NoRex2, em_sysexit),
N, N,
N, N, N, N, N, N, N, N,
/* 0x40 - 0x4F */
@@ -4401,7 +4402,7 @@ static const struct opcode twobyte_table[256] = {
N, N, N, N,
N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
/* 0x80 - 0x8F */
- X16(D(SrcImm | NearBranch | IsBranch)),
+ X16(D(SrcImm | NearBranch | IsBranch | NoRex2)),
/* 0x90 - 0x9F */
X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
/* 0xA0 - 0xA7 */
@@ -4888,7 +4889,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
if (ctxt->b == 0x0f) {
/* Escape byte: start two-byte opcode sequence */
ctxt->b = insn_fetch(u8, ctxt);
- if (ctxt->b == 0x38) {
+ if (ctxt->b == 0x38 && ctxt->rex_prefix != REX2_PREFIX) {
/* Three-byte opcode */
ctxt->opcode_len = 3;
ctxt->b = insn_fetch(u8, ctxt);
@@ -4905,6 +4906,9 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
}
ctxt->d = opcode.flags;
+ if (ctxt->d & NoRex2 && ctxt->rex_prefix == REX2_PREFIX)
+ ctxt->d = Undefined;
+
if (ctxt->d & ModRM)
ctxt->modrm = insn_fetch(u8, ctxt);
© 2016 - 2026 Red Hat, Inc.