[PATCH RFC v1 10/20] KVM: x86: Refactor REX prefix handling in instruction emulation

Chang S. Bae posted 20 patches 3 months ago
[PATCH RFC v1 10/20] KVM: x86: Refactor REX prefix handling in instruction emulation
Posted by Chang S. Bae 3 months ago
Restructure how to represent and interpret REX fields. Specifically,

 * Repurpose the existing rex_prefix field to identify the prefix type
 * Introduce a new union to hold both REX and REX2 bitfields
 * Update decoder logic to interpret the unified data type

Historically, REX used the upper four bits of a signle byte as a fixed
identifier, with the lower bits encoded. REX2 extends this to two bytes.
The first byte identifies the prefix, and the second encodes additional
bits, preserving compatibility with legacy REX encoding.

Previously, the emulator stored the REX byte as-is, which cannot capture
REX2 semantics. This refactor prepares for REX2 decoding while preserving
current behavior.

No functional changes intended.

Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
---
 arch/x86/kvm/emulate.c     | 33 ++++++++++++++++++---------------
 arch/x86/kvm/kvm_emulate.h | 31 ++++++++++++++++++++++++++++++-
 2 files changed, 48 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 4e3da5b497b8..763fbd139242 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -924,7 +924,7 @@ static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
 			     int byteop)
 {
 	void *p;
-	int highbyte_regs = (ctxt->rex_prefix == 0) && byteop;
+	int highbyte_regs = (ctxt->rex_prefix == REX_NONE) && byteop;
 
 	if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
 		p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
@@ -1080,10 +1080,12 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
 {
 	unsigned int reg;
 
-	if (ctxt->d & ModRM)
+	if (ctxt->d & ModRM) {
 		reg = ctxt->modrm_reg;
-	else
-		reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3);
+	} else {
+		reg = (ctxt->b & 7) |
+		      (ctxt->rex.bits.b3 * BIT(3));
+	}
 
 	if (ctxt->d & Sse) {
 		op->type = OP_XMM;
@@ -1122,9 +1124,9 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 	int rc = X86EMUL_CONTINUE;
 	ulong modrm_ea = 0;
 
-	ctxt->modrm_reg = ((ctxt->rex_prefix << 1) & 8); /* REX.R */
-	index_reg = (ctxt->rex_prefix << 2) & 8; /* REX.X */
-	base_reg = (ctxt->rex_prefix << 3) & 8; /* REX.B */
+	ctxt->modrm_reg = ctxt->rex.bits.r3 * BIT(3);
+	index_reg       = ctxt->rex.bits.x3 * BIT(3);
+	base_reg        = ctxt->rex.bits.b3 * BIT(3);
 
 	ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6;
 	ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
@@ -2466,7 +2468,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
 
 	setup_syscalls_segments(&cs, &ss);
 
-	if ((ctxt->rex_prefix & 0x8) != 0x0)
+	if (ctxt->rex.bits.w)
 		usermode = X86EMUL_MODE_PROT64;
 	else
 		usermode = X86EMUL_MODE_PROT32;
@@ -4851,7 +4853,8 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
 		case 0x40 ... 0x4f: /* REX */
 			if (mode != X86EMUL_MODE_PROT64)
 				goto done_prefixes;
-			ctxt->rex_prefix = ctxt->b;
+			ctxt->rex_prefix = REX_PREFIX;
+			ctxt->rex.raw    = 0x0f & ctxt->b;
 			continue;
 		case 0xf0:	/* LOCK */
 			ctxt->lock_prefix = 1;
@@ -4865,15 +4868,14 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
 		}
 
 		/* Any legacy prefix after a REX prefix nullifies its effect. */
-
-		ctxt->rex_prefix = 0;
+		ctxt->rex_prefix = REX_NONE;
+		ctxt->rex.raw = 0;
 	}
 
 done_prefixes:
 
-	/* REX prefix. */
-	if (ctxt->rex_prefix & 8)
-		ctxt->op_bytes = 8;	/* REX.W */
+	if (ctxt->rex.bits.w)
+		ctxt->op_bytes = 8;
 
 	/* Opcode byte(s). */
 	opcode = opcode_table[ctxt->b];
@@ -5137,7 +5139,8 @@ void init_decode_cache(struct x86_emulate_ctxt *ctxt)
 {
 	/* Clear fields that are set conditionally but read without a guard. */
 	ctxt->rip_relative = false;
-	ctxt->rex_prefix = 0;
+	ctxt->rex_prefix = REX_NONE;
+	ctxt->rex.raw = 0;
 	ctxt->lock_prefix = 0;
 	ctxt->rep_prefix = 0;
 	ctxt->regs_valid = 0;
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 153c70ea5561..b285299ebfa4 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -317,6 +317,32 @@ typedef void (*fastop_t)(struct fastop *);
 #define NR_EMULATOR_GPRS	8
 #endif
 
+/*
+ * REX prefix type to distinguish between no prefix, legacy REX, REX2,
+ * or an invalid REX2 sequence.
+ */
+enum rex_type {
+	REX_NONE,
+	REX_PREFIX,
+	REX2_PREFIX,
+	REX2_INVALID
+};
+
+/* Unified representation for REX/REX2 prefix bits */
+union rex_field {
+	struct {
+		u8 b3 :1, /* REX2.B3 or REX.B */
+		   x3 :1, /* REX2.X3 or REX.X */
+		   r3 :1, /* REX2.R3 or REX.R */
+		   w  :1, /* REX2.W  or REX.W */
+		   b4 :1, /* REX2.B4 */
+		   x4 :1, /* REX2.X4 */
+		   r4 :1, /* REX2.R4 */
+		   m0 :1; /* REX2.M0 */
+	} bits;
+	u8 raw;
+};
+
 struct x86_emulate_ctxt {
 	void *vcpu;
 	const struct x86_emulate_ops *ops;
@@ -357,7 +383,10 @@ struct x86_emulate_ctxt {
 	int (*check_perm)(struct x86_emulate_ctxt *ctxt);
 
 	bool rip_relative;
-	u8 rex_prefix;
+	/* Type of REX prefix (none, REX, REX2) */
+	enum rex_type rex_prefix;
+	/* Rex bits */
+	union rex_field rex;
 	u8 lock_prefix;
 	u8 rep_prefix;
 	/* bitmaps of registers in _regs[] that can be read */
-- 
2.51.0
Re: [PATCH RFC v1 10/20] KVM: x86: Refactor REX prefix handling in instruction emulation
Posted by Paolo Bonzini 2 months, 4 weeks ago
On 11/10/25 19:01, Chang S. Bae wrote:
> Restructure how to represent and interpret REX fields. Specifically,
> 
>   * Repurpose the existing rex_prefix field to identify the prefix type
>   * Introduce a new union to hold both REX and REX2 bitfields
>   * Update decoder logic to interpret the unified data type
> 
> Historically, REX used the upper four bits of a signle byte as a fixed
> identifier, with the lower bits encoded. REX2 extends this to two bytes.
> The first byte identifies the prefix, and the second encodes additional
> bits, preserving compatibility with legacy REX encoding.
> 
> Previously, the emulator stored the REX byte as-is, which cannot capture
> REX2 semantics. This refactor prepares for REX2 decoding while preserving
> current behavior.
> 
> No functional changes intended.
> 
> Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>

Good idea; having to add 0x40 to rex_prefix when decoding VEX was too
mysterious, I'll include something like this in my VEX patches.  Here
is the commit message I came up with:

commit fc8aa5c45d558393069a1c89b7a64e059b8f9418
Author: Chang S. Bae <chang.seok.bae@intel.com>
Date:   Mon Nov 10 18:01:21 2025 +0000

     KVM: x86: Refactor REX prefix handling in instruction emulation
     
     Restructure how to represent and interpret REX fields, preparing
     for handling of VEX and REX2.
     
     REX uses the upper four bits of a single byte as a fixed identifier,
     and the lower four bits containing the data. VEX and REX2 extend this so
     that the first byte identifies the prefix and the rest encode additional
     bits; and while VEX only has the same four data bits as REX, eight zero
     bits are a valid value for the data bits of REX2.  So, stop storing the
     REX byte as-is.  Instead, store only the low bits of the REX prefix and
     track separately whether a REX-like prefix wasused.
     
     No functional changes intended.
     
     Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
     Message-ID: <20251110180131.28264-11-chang.seok.bae@intel.com>
     [Extracted from APX series; removed bitfields and REX2-specific
      definitions. - Paolo]
     Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

Paolo

> ---
>   arch/x86/kvm/emulate.c     | 33 ++++++++++++++++++---------------
>   arch/x86/kvm/kvm_emulate.h | 31 ++++++++++++++++++++++++++++++-
>   2 files changed, 48 insertions(+), 16 deletions(-)
> 
> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> index 4e3da5b497b8..763fbd139242 100644
> --- a/arch/x86/kvm/emulate.c
> +++ b/arch/x86/kvm/emulate.c
> @@ -924,7 +924,7 @@ static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
>   			     int byteop)
>   {
>   	void *p;
> -	int highbyte_regs = (ctxt->rex_prefix == 0) && byteop;
> +	int highbyte_regs = (ctxt->rex_prefix == REX_NONE) && byteop;
>   
>   	if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
>   		p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
> @@ -1080,10 +1080,12 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
>   {
>   	unsigned int reg;
>   
> -	if (ctxt->d & ModRM)
> +	if (ctxt->d & ModRM) {
>   		reg = ctxt->modrm_reg;
> -	else
> -		reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3);
> +	} else {
> +		reg = (ctxt->b & 7) |
> +		      (ctxt->rex.bits.b3 * BIT(3));
> +	}
>   
>   	if (ctxt->d & Sse) {
>   		op->type = OP_XMM;
> @@ -1122,9 +1124,9 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
>   	int rc = X86EMUL_CONTINUE;
>   	ulong modrm_ea = 0;
>   
> -	ctxt->modrm_reg = ((ctxt->rex_prefix << 1) & 8); /* REX.R */
> -	index_reg = (ctxt->rex_prefix << 2) & 8; /* REX.X */
> -	base_reg = (ctxt->rex_prefix << 3) & 8; /* REX.B */
> +	ctxt->modrm_reg = ctxt->rex.bits.r3 * BIT(3);
> +	index_reg       = ctxt->rex.bits.x3 * BIT(3);
> +	base_reg        = ctxt->rex.bits.b3 * BIT(3);
>   
>   	ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6;
>   	ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
> @@ -2466,7 +2468,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
>   
>   	setup_syscalls_segments(&cs, &ss);
>   
> -	if ((ctxt->rex_prefix & 0x8) != 0x0)
> +	if (ctxt->rex.bits.w)
>   		usermode = X86EMUL_MODE_PROT64;
>   	else
>   		usermode = X86EMUL_MODE_PROT32;
> @@ -4851,7 +4853,8 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
>   		case 0x40 ... 0x4f: /* REX */
>   			if (mode != X86EMUL_MODE_PROT64)
>   				goto done_prefixes;
> -			ctxt->rex_prefix = ctxt->b;
> +			ctxt->rex_prefix = REX_PREFIX;
> +			ctxt->rex.raw    = 0x0f & ctxt->b;
>   			continue;
>   		case 0xf0:	/* LOCK */
>   			ctxt->lock_prefix = 1;
> @@ -4865,15 +4868,14 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
>   		}
>   
>   		/* Any legacy prefix after a REX prefix nullifies its effect. */
> -
> -		ctxt->rex_prefix = 0;
> +		ctxt->rex_prefix = REX_NONE;
> +		ctxt->rex.raw = 0;
>   	}
>   
>   done_prefixes:
>   
> -	/* REX prefix. */
> -	if (ctxt->rex_prefix & 8)
> -		ctxt->op_bytes = 8;	/* REX.W */
> +	if (ctxt->rex.bits.w)
> +		ctxt->op_bytes = 8;
>   
>   	/* Opcode byte(s). */
>   	opcode = opcode_table[ctxt->b];
> @@ -5137,7 +5139,8 @@ void init_decode_cache(struct x86_emulate_ctxt *ctxt)
>   {
>   	/* Clear fields that are set conditionally but read without a guard. */
>   	ctxt->rip_relative = false;
> -	ctxt->rex_prefix = 0;
> +	ctxt->rex_prefix = REX_NONE;
> +	ctxt->rex.raw = 0;
>   	ctxt->lock_prefix = 0;
>   	ctxt->rep_prefix = 0;
>   	ctxt->regs_valid = 0;
> diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
> index 153c70ea5561..b285299ebfa4 100644
> --- a/arch/x86/kvm/kvm_emulate.h
> +++ b/arch/x86/kvm/kvm_emulate.h
> @@ -317,6 +317,32 @@ typedef void (*fastop_t)(struct fastop *);
>   #define NR_EMULATOR_GPRS	8
>   #endif
>   
> +/*
> + * REX prefix type to distinguish between no prefix, legacy REX, REX2,
> + * or an invalid REX2 sequence.
> + */
> +enum rex_type {
> +	REX_NONE,
> +	REX_PREFIX,
> +	REX2_PREFIX,
> +	REX2_INVALID
> +};
> +
> +/* Unified representation for REX/REX2 prefix bits */
> +union rex_field {
> +	struct {
> +		u8 b3 :1, /* REX2.B3 or REX.B */
> +		   x3 :1, /* REX2.X3 or REX.X */
> +		   r3 :1, /* REX2.R3 or REX.R */
> +		   w  :1, /* REX2.W  or REX.W */
> +		   b4 :1, /* REX2.B4 */
> +		   x4 :1, /* REX2.X4 */
> +		   r4 :1, /* REX2.R4 */
> +		   m0 :1; /* REX2.M0 */
> +	} bits;
> +	u8 raw;
> +};
> +
>   struct x86_emulate_ctxt {
>   	void *vcpu;
>   	const struct x86_emulate_ops *ops;
> @@ -357,7 +383,10 @@ struct x86_emulate_ctxt {
>   	int (*check_perm)(struct x86_emulate_ctxt *ctxt);
>   
>   	bool rip_relative;
> -	u8 rex_prefix;
> +	/* Type of REX prefix (none, REX, REX2) */
> +	enum rex_type rex_prefix;
> +	/* Rex bits */
> +	union rex_field rex;
>   	u8 lock_prefix;
>   	u8 rep_prefix;
>   	/* bitmaps of registers in _regs[] that can be read */
Re: [PATCH RFC v1 10/20] KVM: x86: Refactor REX prefix handling in instruction emulation
Posted by Chang S. Bae 2 months, 3 weeks ago
On 11/11/2025 10:17 AM, Paolo Bonzini wrote:
> 
> commit fc8aa5c45d558393069a1c89b7a64e059b8f9418
> Author: Chang S. Bae <chang.seok.bae@intel.com>
> Date:   Mon Nov 10 18:01:21 2025 +0000
> 
>      KVM: x86: Refactor REX prefix handling in instruction emulation
>      Restructure how to represent and interpret REX fields, preparing
>      for handling of VEX and REX2.
>      REX uses the upper four bits of a single byte as a fixed identifier,
>      and the lower four bits containing the data. VEX and REX2 extend 
> this so
>      that the first byte identifies the prefix and the rest encode 
> additional
>      bits; and while VEX only has the same four data bits as REX, eight 
> zero
>      bits are a valid value for the data bits of REX2.  So, stop storing 
> the
>      REX byte as-is.  Instead, store only the low bits of the REX prefix 
> and
>      track separately whether a REX-like prefix wasused.
>      No functional changes intended.
>      Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
>      Message-ID: <20251110180131.28264-11-chang.seok.bae@intel.com>
>      [Extracted from APX series; removed bitfields and REX2-specific
>       definitions. - Paolo]
>      Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

Sure, I think it's good to have consistent handling like this for these
extended prefixes across the board. Thanks!