[PATCH v2 02/27] target/i386/emulate: Allow instruction decoding from stream

Magnus Kulke posted 27 patches 4 months, 2 weeks ago
Maintainers: Paolo Bonzini <pbonzini@redhat.com>, Richard Henderson <richard.henderson@linaro.org>, "Alex Bennée" <alex.bennee@linaro.org>, "Daniel P. Berrangé" <berrange@redhat.com>, Thomas Huth <thuth@redhat.com>, Markus Armbruster <armbru@redhat.com>, "Philippe Mathieu-Daudé" <philmd@linaro.org>, "Michael S. Tsirkin" <mst@redhat.com>, Cornelia Huck <cohuck@redhat.com>, "Marc-André Lureau" <marcandre.lureau@redhat.com>, Zhao Liu <zhao1.liu@intel.com>, Cameron Esfahani <dirty@apple.com>, Roman Bolshakov <rbolshakov@ddn.com>, Phil Dennis-Jordan <phil@philjordan.eu>, Wei Liu <wei.liu@kernel.org>
There is a newer version of this series
[PATCH v2 02/27] target/i386/emulate: Allow instruction decoding from stream
Posted by Magnus Kulke 4 months, 2 weeks ago
Introduce a new helper function to decode x86 instructions from a
raw instruction byte stream. MSHV delivers an instruction stream in a
buffer of the vm_exit message. It can be used to speed up MMIO
emulation, since instructions do not have to be fetched and translated.

Added "fetch_instruction()" op to x86_emul_ops() to improve
traceability.

Signed-off-by: Magnus Kulke <magnuskulke@linux.microsoft.com>
---
 target/i386/emulate/x86_decode.c | 31 +++++++++++++++++++++++++++----
 target/i386/emulate/x86_decode.h | 10 ++++++++++
 target/i386/emulate/x86_emu.c    |  3 ++-
 target/i386/emulate/x86_emu.h    |  1 +
 4 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/target/i386/emulate/x86_decode.c b/target/i386/emulate/x86_decode.c
index 2eca39802e..133065b50a 100644
--- a/target/i386/emulate/x86_decode.c
+++ b/target/i386/emulate/x86_decode.c
@@ -60,6 +60,7 @@ static inline uint64_t decode_bytes(CPUX86State *env, struct x86_decode *decode,
                                     int size)
 {
     uint64_t val = 0;
+    target_ulong va;
 
     switch (size) {
     case 1:
@@ -71,10 +72,17 @@ static inline uint64_t decode_bytes(CPUX86State *env, struct x86_decode *decode,
         VM_PANIC_EX("%s invalid size %d\n", __func__, size);
         break;
     }
-    target_ulong va  = linear_rip(env_cpu(env), env->eip) + decode->len;
-    emul_ops->read_mem(env_cpu(env), &val, va, size);
+
+    /* copy the bytes from the instruction stream, if available */
+    if (decode->stream && decode->len + size <= decode->stream->len) {
+        memcpy(&val, decode->stream->bytes + decode->len, size);
+    } else {
+        va = linear_rip(env_cpu(env), env->eip) + decode->len;
+        emul_ops->fetch_instruction(env_cpu(env), &val, va, size);
+    }
     decode->len += size;
-    
+
+
     return val;
 }
 
@@ -2076,9 +2084,10 @@ static void decode_opcodes(CPUX86State *env, struct x86_decode *decode)
     }
 }
 
-uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode)
+static uint32_t decode_opcode(CPUX86State *env, struct x86_decode *decode)
 {
     memset(decode, 0, sizeof(*decode));
+
     decode_prefix(env, decode);
     set_addressing_size(env, decode);
     set_operand_size(env, decode);
@@ -2088,6 +2097,20 @@ uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode)
     return decode->len;
 }
 
+uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode)
+{
+    return decode_opcode(env, decode);
+}
+
+uint32_t decode_instruction_stream(CPUX86State *env, struct x86_decode *decode,
+                                   struct x86_insn_stream *stream)
+{
+    if (stream != NULL) {
+        decode->stream = stream;
+    }
+    return decode_opcode(env, decode);
+}
+
 void init_decoder(void)
 {
     int i;
diff --git a/target/i386/emulate/x86_decode.h b/target/i386/emulate/x86_decode.h
index 927645af1a..f5e9738914 100644
--- a/target/i386/emulate/x86_decode.h
+++ b/target/i386/emulate/x86_decode.h
@@ -272,6 +272,11 @@ typedef struct x86_decode_op {
     };
 } x86_decode_op;
 
+typedef struct x86_insn_stream {
+    const uint8_t *bytes;
+    size_t len;
+} x86_insn_stream;
+
 typedef struct x86_decode {
     int len;
     uint8_t opcode[4];
@@ -298,11 +303,16 @@ typedef struct x86_decode {
     struct x86_modrm modrm;
     struct x86_decode_op op[4];
     bool is_fpu;
+
+	x86_insn_stream *stream;
 } x86_decode;
 
 uint64_t sign(uint64_t val, int size);
 
 uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode);
+uint32_t decode_instruction_stream(CPUX86State *env,
+								   struct x86_decode *decode,
+		                           struct x86_insn_stream *stream);
 
 void *get_reg_ref(CPUX86State *env, int reg, int rex_present,
                   int is_extended, int size);
diff --git a/target/i386/emulate/x86_emu.c b/target/i386/emulate/x86_emu.c
index db7a7f7437..f7e6bf01bc 100644
--- a/target/i386/emulate/x86_emu.c
+++ b/target/i386/emulate/x86_emu.c
@@ -1246,7 +1246,8 @@ static void init_cmd_handler(void)
 bool exec_instruction(CPUX86State *env, struct x86_decode *ins)
 {
     if (!_cmd_handler[ins->cmd].handler) {
-        printf("Unimplemented handler (" TARGET_FMT_lx ") for %d (%x %x) \n", env->eip,
+        printf("Unimplemented handler (" TARGET_FMT_lx ") for %d (%x %x) \n",
+                env->eip,
                 ins->cmd, ins->opcode[0],
                 ins->opcode_len > 1 ? ins->opcode[1] : 0);
         env->eip += ins->len;
diff --git a/target/i386/emulate/x86_emu.h b/target/i386/emulate/x86_emu.h
index a1a961284b..f1680c41f4 100644
--- a/target/i386/emulate/x86_emu.h
+++ b/target/i386/emulate/x86_emu.h
@@ -24,6 +24,7 @@
 #include "cpu.h"
 
 struct x86_emul_ops {
+    void (*fetch_instruction)(CPUState *cpu, void *data, target_ulong addr, int bytes);
     void (*read_mem)(CPUState *cpu, void *data, target_ulong addr, int bytes);
     void (*write_mem)(CPUState *cpu, void *data, target_ulong addr, int bytes);
     void (*read_segment_descriptor)(CPUState *cpu, struct x86_segment_descriptor *desc,
-- 
2.34.1
Re: [PATCH v2 02/27] target/i386/emulate: Allow instruction decoding from stream
Posted by Wei Liu 4 months, 2 weeks ago
On Tue, Jul 01, 2025 at 07:28:09PM +0200, Magnus Kulke wrote:
> Introduce a new helper function to decode x86 instructions from a
> raw instruction byte stream. MSHV delivers an instruction stream in a
> buffer of the vm_exit message. It can be used to speed up MMIO
> emulation, since instructions do not have to be fetched and translated.
> 
> Added "fetch_instruction()" op to x86_emul_ops() to improve
> traceability.
> 
> Signed-off-by: Magnus Kulke <magnuskulke@linux.microsoft.com>
> ---
>  target/i386/emulate/x86_decode.c | 31 +++++++++++++++++++++++++++----
>  target/i386/emulate/x86_decode.h | 10 ++++++++++
>  target/i386/emulate/x86_emu.c    |  3 ++-
>  target/i386/emulate/x86_emu.h    |  1 +
>  4 files changed, 40 insertions(+), 5 deletions(-)
> 
> diff --git a/target/i386/emulate/x86_decode.c b/target/i386/emulate/x86_decode.c
> index 2eca39802e..133065b50a 100644
> --- a/target/i386/emulate/x86_decode.c
> +++ b/target/i386/emulate/x86_decode.c
> @@ -60,6 +60,7 @@ static inline uint64_t decode_bytes(CPUX86State *env, struct x86_decode *decode,
>                                      int size)
>  {
>      uint64_t val = 0;
> +    target_ulong va;

This can be moved inside the else branch to limit the scope of the
variable.

>  
>      switch (size) {
>      case 1:
> @@ -71,10 +72,17 @@ static inline uint64_t decode_bytes(CPUX86State *env, struct x86_decode *decode,
>          VM_PANIC_EX("%s invalid size %d\n", __func__, size);
>          break;
>      }
> -    target_ulong va  = linear_rip(env_cpu(env), env->eip) + decode->len;
> -    emul_ops->read_mem(env_cpu(env), &val, va, size);
> +
> +    /* copy the bytes from the instruction stream, if available */
> +    if (decode->stream && decode->len + size <= decode->stream->len) {
> +        memcpy(&val, decode->stream->bytes + decode->len, size);
> +    } else {
> +        va = linear_rip(env_cpu(env), env->eip) + decode->len;

           target_ulong va = linear_rip(env_cpu(env), env->eip) + decode->len;

> +        emul_ops->fetch_instruction(env_cpu(env), &val, va, size);
> +    }
>      decode->len += size;
> -    
> +
> +

Extraneous blank line here.

>      return val;
>  }
>  
> @@ -2076,9 +2084,10 @@ static void decode_opcodes(CPUX86State *env, struct x86_decode *decode)
>      }
>  }
>  
> -uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode)
> +static uint32_t decode_opcode(CPUX86State *env, struct x86_decode *decode)
>  {
>      memset(decode, 0, sizeof(*decode));
> +
>      decode_prefix(env, decode);
>      set_addressing_size(env, decode);
>      set_operand_size(env, decode);
> @@ -2088,6 +2097,20 @@ uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode)
>      return decode->len;
>  }
>  
> +uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode)
> +{
> +    return decode_opcode(env, decode);
> +}
> +
> +uint32_t decode_instruction_stream(CPUX86State *env, struct x86_decode *decode,
> +                                   struct x86_insn_stream *stream)
> +{
> +    if (stream != NULL) {
> +        decode->stream = stream;
> +    }

This can be simplified as

       decode->stream = stream;

> +    return decode_opcode(env, decode);
> +}
> +
>  void init_decoder(void)
>  {
>      int i;
> diff --git a/target/i386/emulate/x86_decode.h b/target/i386/emulate/x86_decode.h
> index 927645af1a..f5e9738914 100644
> --- a/target/i386/emulate/x86_decode.h
> +++ b/target/i386/emulate/x86_decode.h
> @@ -272,6 +272,11 @@ typedef struct x86_decode_op {
>      };
>  } x86_decode_op;
>  
> +typedef struct x86_insn_stream {
> +    const uint8_t *bytes;
> +    size_t len;
> +} x86_insn_stream;
> +
>  typedef struct x86_decode {
>      int len;
>      uint8_t opcode[4];
> @@ -298,11 +303,16 @@ typedef struct x86_decode {
>      struct x86_modrm modrm;
>      struct x86_decode_op op[4];
>      bool is_fpu;
> +
> +	x86_insn_stream *stream;

Tab here.

>  } x86_decode;
>  
>  uint64_t sign(uint64_t val, int size);
>  
>  uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode);
> +uint32_t decode_instruction_stream(CPUX86State *env,
> +								   struct x86_decode *decode,
> +		                           struct x86_insn_stream *stream);

Mixing spaces and tabs.

Wei