[PATCH] target/riscv,disas/riscv: add insn len encodings > 64 bit

Daniel Henrique Barboza posted 1 patch 1 week, 2 days ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20260520173522.1794433-1-daniel.barboza@oss.qualcomm.com
Maintainers: Palmer Dabbelt <palmer@dabbelt.com>, Alistair Francis <Alistair.Francis@wdc.com>, Daniel Henrique Barboza <daniel.barboza@oss.qualcomm.com>, Weiwei Li <liwei1518@gmail.com>, Liu Zhiwei <zhiwei_liu@linux.alibaba.com>, Chao Liu <chao.liu.zevorn@gmail.com>
disas/riscv.c            | 50 +++++++++++++++++++++++++++++-----------
target/riscv/internals.h | 33 ++++++++++++++++++++++++--
target/riscv/translate.c |  7 +++++-
3 files changed, 73 insertions(+), 17 deletions(-)
[PATCH] target/riscv,disas/riscv: add insn len encodings > 64 bit
Posted by Daniel Henrique Barboza 1 week, 2 days ago
We're missing the "Expanded Instruction-Length Encoding" logic from the
unpriv isa, meaning we're not detecting missing insn len > 8 bytes.
This doesn't have impact in regular insn emulation because most (if not
all) of the existing insns are 2 or 4 bytes.

However, running with "-d in_asm" will cause QEMU to run an infinite
loop because our disas code can't handle the expanded length encoding,
causing inst_length() to return '0' and target_disas() to loop forever
since it's using the len to decrease the loop counter.

Fixing just the disas code isn't enough.  target_disas() will do a

"if (size < count) {"

check, where size = the insn length from the translator and count = the
insn length from disas, and will warn about it during disassembling.  We
need both places to use the same insn length logic.

[1] https://gitlab.com/qemu-project/qemu/-/work_items/3479

Resolves: https://gitlab.com/qemu-project/qemu/-/work_items/3479
Signed-off-by: Daniel Henrique Barboza <daniel.barboza@oss.qualcomm.com>
---
 disas/riscv.c            | 50 +++++++++++++++++++++++++++++-----------
 target/riscv/internals.h | 33 ++++++++++++++++++++++++--
 target/riscv/translate.c |  7 +++++-
 3 files changed, 73 insertions(+), 17 deletions(-)

diff --git a/disas/riscv.c b/disas/riscv.c
index d416a4d6b3..d32661c857 100644
--- a/disas/riscv.c
+++ b/disas/riscv.c
@@ -5057,26 +5057,48 @@ static bool check_constraints(rv_decode *dec, const rvc_constraint *c)
     return true;
 }
 
-/* instruction length */
-
+/*
+ * Note: basically a carbon copy of insn_len() from
+ * target/riscv/internals.h.
+ */
 static size_t inst_length(rv_inst inst)
 {
-    /* NOTE: supports maximum instruction size of 64-bits */
-
     /*
-     * instruction length coding
+     * "Expanded Instruction-Length Encoding" as in
+     * unpriv isa section 1.5.1.
      *
-     *      aa - 16 bit aa != 11
-     *   bbb11 - 32 bit bbb != 111
-     *  011111 - 48 bit
-     * 0111111 - 64 bit
+     *               aa - 16 bit aa != 11
+     *            bbb11 - 32 bit bbb != 111
+     *           011111 - 48 bit
+     *          0111111 - 64 bit
+     * xnnnxxxxx1111111 - (80 + 16*nnn) bits, if nnn != 111
+     * x111xxxxx1111111 - 192 bits
      */
+    if ((inst & 0b11) != 0b11) {
+        return 2;
+    } else if ((inst & 0b11100) != 0b11100) {
+        return 4;
+    } else if ((inst & 0b111111) == 0b011111) {
+        return 6;
+    } else if ((inst & 0b1111111) == 0b0111111) {
+        return 8;
+    } else if ((inst & 0b1111111) == 0b1111111) {
+        uint32_t nnn = (inst >> 12) & 0b0111;
+
+        if (nnn == 0b111) {
+            return 24;
+        }
+        return (16 * nnn + 80) / 8;
+    }
 
-    return (inst &      0b11) != 0b11      ? 2
-         : (inst &   0b11100) != 0b11100   ? 4
-         : (inst &  0b111111) == 0b011111  ? 6
-         : (inst & 0b1111111) == 0b0111111 ? 8
-         : 0;
+    /*
+     * Returning 0 if we don't find the right insn length will
+     * cause an infinite loop in target_disas().  Return an
+     * unrealistic length value instead, making target_disas()
+     * trigger the "Disassembler disagrees with translator over
+     * instruction" error path.
+     */
+    return 1024;
 }
 
 /* format instruction */
diff --git a/target/riscv/internals.h b/target/riscv/internals.h
index 8c24af0d85..80c2f8f5f8 100644
--- a/target/riscv/internals.h
+++ b/target/riscv/internals.h
@@ -245,9 +245,38 @@ static inline target_ulong adjust_addr_virt(CPURISCVState *env,
     return adjust_addr_body(env, addr, true);
 }
 
-static inline int insn_len(uint16_t first_word)
+static inline int insn_len(uint32_t opcode)
 {
-    return (first_word & 3) == 3 ? 4 : 2;
+    /*
+     * "Expanded Instruction-Length Encoding" as in
+     * unpriv isa section 1.5.1.
+     *
+     *               aa - 16 bit aa != 11
+     *            bbb11 - 32 bit bbb != 111
+     *           011111 - 48 bit
+     *          0111111 - 64 bit
+     * xnnnxxxxx1111111 - (80 + 16*nnn) bits, if nnn != 111
+     * x111xxxxx1111111 - 192 bits
+     */
+    if ((opcode & 0b11) != 0b11) {
+        return 2;
+    } else if ((opcode & 0b11100) != 0b11100) {
+        return 4;
+    } else if ((opcode & 0b111111) == 0b011111) {
+        return 6;
+    } else if ((opcode & 0b1111111) == 0b0111111) {
+        return 8;
+    } else if ((opcode & 0b1111111) == 0b1111111) {
+        uint32_t nnn = (opcode >> 12) & 0b0111;
+
+        if (nnn == 0b111) {
+            return 24;
+        }
+        return (16 * nnn + 80) / 8;
+    }
+
+    /* Shouldn't happen, but ... */
+    return -1;
 }
 
 int riscv_monitor_get_register_legacy(CPUState *cs, const char *name,
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 1e4f340256..bd28ed3fcb 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -1255,6 +1255,7 @@ static void decode_opc(CPURISCVState *env, DisasContext *ctx)
 {
     uint32_t opcode;
     bool pc_is_4byte_align = ((ctx->base.pc_next % 4) == 0);
+    int insn_length;
 
     ctx->virt_inst_excp = false;
     if (pc_is_4byte_align) {
@@ -1279,7 +1280,11 @@ static void decode_opc(CPURISCVState *env, DisasContext *ctx)
     }
     ctx->ol = ctx->xl;
 
-    ctx->cur_insn_len = insn_len((uint16_t)opcode);
+    insn_length = insn_len(opcode);
+    if (insn_length < 0) {
+        gen_exception_illegal(ctx);
+    }
+    ctx->cur_insn_len = insn_length;
     /* Check for compressed insn */
     if (ctx->cur_insn_len == 2) {
         ctx->opcode = (uint16_t)opcode;
-- 
2.43.0
Re: [PATCH] target/riscv, disas/riscv: add insn len encodings > 64 bit
Posted by Alistair Francis via qemu development 3 days, 16 hours ago
On Wed, 2026-05-20 at 14:35 -0300, Daniel Henrique Barboza wrote:
> We're missing the "Expanded Instruction-Length Encoding" logic from
> the
> unpriv isa, meaning we're not detecting missing insn len > 8 bytes.
> This doesn't have impact in regular insn emulation because most (if
> not
> all) of the existing insns are 2 or 4 bytes.
> 
> However, running with "-d in_asm" will cause QEMU to run an infinite
> loop because our disas code can't handle the expanded length
> encoding,
> causing inst_length() to return '0' and target_disas() to loop
> forever
> since it's using the len to decrease the loop counter.
> 
> Fixing just the disas code isn't enough.  target_disas() will do a
> 
> "if (size < count) {"
> 
> check, where size = the insn length from the translator and count =
> the
> insn length from disas, and will warn about it during disassembling. 
> We
> need both places to use the same insn length logic.
> 
> [1] https://gitlab.com/qemu-project/qemu/-/work_items/3479
> 
> Resolves: https://gitlab.com/qemu-project/qemu/-/work_items/3479
> Signed-off-by: Daniel Henrique Barboza
> <daniel.barboza@oss.qualcomm.com>
> ---
>  disas/riscv.c            | 50 +++++++++++++++++++++++++++++---------
> --
>  target/riscv/internals.h | 33 ++++++++++++++++++++++++--
>  target/riscv/translate.c |  7 +++++-
>  3 files changed, 73 insertions(+), 17 deletions(-)
> 
> diff --git a/disas/riscv.c b/disas/riscv.c
> index d416a4d6b3..d32661c857 100644
> --- a/disas/riscv.c
> +++ b/disas/riscv.c
> @@ -5057,26 +5057,48 @@ static bool check_constraints(rv_decode *dec,
> const rvc_constraint *c)
>      return true;
>  }
>  
> -/* instruction length */
> -
> +/*
> + * Note: basically a carbon copy of insn_len() from
> + * target/riscv/internals.h.
> + */
>  static size_t inst_length(rv_inst inst)
>  {
> -    /* NOTE: supports maximum instruction size of 64-bits */
> -
>      /*
> -     * instruction length coding
> +     * "Expanded Instruction-Length Encoding" as in
> +     * unpriv isa section 1.5.1.

This section was removed from the manual with commit

commit 4f05ffef4987213ddfb78b3e3aaf91b67892a664
Author: Andrew Waterman <andrew@sifive.com>
Date:   Tue Apr 1 18:02:36 2025 -0700

    Remove proposal for longer instruction encodings
    
    This is pursuant to making the manual contain only ratified
content.


So I don't think we should reference it. We should probably drop
support for these as who knows what the ratified version will look like

Alistair

>       *
> -     *      aa - 16 bit aa != 11
> -     *   bbb11 - 32 bit bbb != 111
> -     *  011111 - 48 bit
> -     * 0111111 - 64 bit
> +     *               aa - 16 bit aa != 11
> +     *            bbb11 - 32 bit bbb != 111
> +     *           011111 - 48 bit
> +     *          0111111 - 64 bit
> +     * xnnnxxxxx1111111 - (80 + 16*nnn) bits, if nnn != 111
> +     * x111xxxxx1111111 - 192 bits
>       */
> +    if ((inst & 0b11) != 0b11) {
> +        return 2;
> +    } else if ((inst & 0b11100) != 0b11100) {
> +        return 4;
> +    } else if ((inst & 0b111111) == 0b011111) {
> +        return 6;
> +    } else if ((inst & 0b1111111) == 0b0111111) {
> +        return 8;
> +    } else if ((inst & 0b1111111) == 0b1111111) {
> +        uint32_t nnn = (inst >> 12) & 0b0111;
> +
> +        if (nnn == 0b111) {
> +            return 24;
> +        }
> +        return (16 * nnn + 80) / 8;
> +    }
>  
> -    return (inst &      0b11) != 0b11      ? 2
> -         : (inst &   0b11100) != 0b11100   ? 4
> -         : (inst &  0b111111) == 0b011111  ? 6
> -         : (inst & 0b1111111) == 0b0111111 ? 8
> -         : 0;
> +    /*
> +     * Returning 0 if we don't find the right insn length will
> +     * cause an infinite loop in target_disas().  Return an
> +     * unrealistic length value instead, making target_disas()
> +     * trigger the "Disassembler disagrees with translator over
> +     * instruction" error path.
> +     */
> +    return 1024;
>  }
>  
>  /* format instruction */
> diff --git a/target/riscv/internals.h b/target/riscv/internals.h
> index 8c24af0d85..80c2f8f5f8 100644
> --- a/target/riscv/internals.h
> +++ b/target/riscv/internals.h
> @@ -245,9 +245,38 @@ static inline target_ulong
> adjust_addr_virt(CPURISCVState *env,
>      return adjust_addr_body(env, addr, true);
>  }
>  
> -static inline int insn_len(uint16_t first_word)
> +static inline int insn_len(uint32_t opcode)
>  {
> -    return (first_word & 3) == 3 ? 4 : 2;
> +    /*
> +     * "Expanded Instruction-Length Encoding" as in
> +     * unpriv isa section 1.5.1.
> +     *
> +     *               aa - 16 bit aa != 11
> +     *            bbb11 - 32 bit bbb != 111
> +     *           011111 - 48 bit
> +     *          0111111 - 64 bit
> +     * xnnnxxxxx1111111 - (80 + 16*nnn) bits, if nnn != 111
> +     * x111xxxxx1111111 - 192 bits
> +     */
> +    if ((opcode & 0b11) != 0b11) {
> +        return 2;
> +    } else if ((opcode & 0b11100) != 0b11100) {
> +        return 4;
> +    } else if ((opcode & 0b111111) == 0b011111) {
> +        return 6;
> +    } else if ((opcode & 0b1111111) == 0b0111111) {
> +        return 8;
> +    } else if ((opcode & 0b1111111) == 0b1111111) {
> +        uint32_t nnn = (opcode >> 12) & 0b0111;
> +
> +        if (nnn == 0b111) {
> +            return 24;
> +        }
> +        return (16 * nnn + 80) / 8;
> +    }
> +
> +    /* Shouldn't happen, but ... */
> +    return -1;
>  }
>  
>  int riscv_monitor_get_register_legacy(CPUState *cs, const char
> *name,
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index 1e4f340256..bd28ed3fcb 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -1255,6 +1255,7 @@ static void decode_opc(CPURISCVState *env,
> DisasContext *ctx)
>  {
>      uint32_t opcode;
>      bool pc_is_4byte_align = ((ctx->base.pc_next % 4) == 0);
> +    int insn_length;
>  
>      ctx->virt_inst_excp = false;
>      if (pc_is_4byte_align) {
> @@ -1279,7 +1280,11 @@ static void decode_opc(CPURISCVState *env,
> DisasContext *ctx)
>      }
>      ctx->ol = ctx->xl;
>  
> -    ctx->cur_insn_len = insn_len((uint16_t)opcode);
> +    insn_length = insn_len(opcode);
> +    if (insn_length < 0) {
> +        gen_exception_illegal(ctx);
> +    }
> +    ctx->cur_insn_len = insn_length;
>      /* Check for compressed insn */
>      if (ctx->cur_insn_len == 2) {
>          ctx->opcode = (uint16_t)opcode;
Re: [PATCH] target/riscv, disas/riscv: add insn len encodings > 64 bit
Posted by Daniel Henrique Barboza via qemu development 2 days, 22 hours ago

On 5/26/2026 9:35 PM, Alistair Francis wrote:
> On Wed, 2026-05-20 at 14:35 -0300, Daniel Henrique Barboza wrote:
>> We're missing the "Expanded Instruction-Length Encoding" logic from
>> the
>> unpriv isa, meaning we're not detecting missing insn len > 8 bytes.
>> This doesn't have impact in regular insn emulation because most (if
>> not
>> all) of the existing insns are 2 or 4 bytes.
>>
>> However, running with "-d in_asm" will cause QEMU to run an infinite
>> loop because our disas code can't handle the expanded length
>> encoding,
>> causing inst_length() to return '0' and target_disas() to loop
>> forever
>> since it's using the len to decrease the loop counter.
>>
>> Fixing just the disas code isn't enough.  target_disas() will do a
>>
>> "if (size < count) {"
>>
>> check, where size = the insn length from the translator and count =
>> the
>> insn length from disas, and will warn about it during disassembling.
>> We
>> need both places to use the same insn length logic.
>>
>> [1] https://gitlab.com/qemu-project/qemu/-/work_items/3479
>>
>> Resolves: https://gitlab.com/qemu-project/qemu/-/work_items/3479
>> Signed-off-by: Daniel Henrique Barboza
>> <daniel.barboza@oss.qualcomm.com>
>> ---
>>   disas/riscv.c            | 50 +++++++++++++++++++++++++++++---------
>> --
>>   target/riscv/internals.h | 33 ++++++++++++++++++++++++--
>>   target/riscv/translate.c |  7 +++++-
>>   3 files changed, 73 insertions(+), 17 deletions(-)
>>
>> diff --git a/disas/riscv.c b/disas/riscv.c
>> index d416a4d6b3..d32661c857 100644
>> --- a/disas/riscv.c
>> +++ b/disas/riscv.c
>> @@ -5057,26 +5057,48 @@ static bool check_constraints(rv_decode *dec,
>> const rvc_constraint *c)
>>       return true;
>>   }
>>   
>> -/* instruction length */
>> -
>> +/*
>> + * Note: basically a carbon copy of insn_len() from
>> + * target/riscv/internals.h.
>> + */
>>   static size_t inst_length(rv_inst inst)
>>   {
>> -    /* NOTE: supports maximum instruction size of 64-bits */
>> -
>>       /*
>> -     * instruction length coding
>> +     * "Expanded Instruction-Length Encoding" as in
>> +     * unpriv isa section 1.5.1.
> 
> This section was removed from the manual with commit
> 
> commit 4f05ffef4987213ddfb78b3e3aaf91b67892a664
> Author: Andrew Waterman <andrew@sifive.com>
> Date:   Tue Apr 1 18:02:36 2025 -0700
> 
>      Remove proposal for longer instruction encodings
>      
>      This is pursuant to making the manual contain only ratified
> content.
> 
> 
> So I don't think we should reference it. We should probably drop
> support for these as who knows what the ratified version will look like


Ooohhh good call. In the official RVI link:


https://docs.riscv.org/reference/isa/unpriv/intro.html#1-1-5-base-instruction-length-encoding


"All the 32-bit instructions in the base ISA have their lowest two bits set to 11.
The optional compressed 16-bit instruction-set extensions have their lowest two
bits equal to 00, 01, or 10."

So we'll support either 16 or 32 bits lengths only.  I'll send a v2.


Thanks,
Daniel


> 
> Alistair
> 
>>        *
>> -     *      aa - 16 bit aa != 11
>> -     *   bbb11 - 32 bit bbb != 111
>> -     *  011111 - 48 bit
>> -     * 0111111 - 64 bit
>> +     *               aa - 16 bit aa != 11
>> +     *            bbb11 - 32 bit bbb != 111
>> +     *           011111 - 48 bit
>> +     *          0111111 - 64 bit
>> +     * xnnnxxxxx1111111 - (80 + 16*nnn) bits, if nnn != 111
>> +     * x111xxxxx1111111 - 192 bits
>>        */
>> +    if ((inst & 0b11) != 0b11) {
>> +        return 2;
>> +    } else if ((inst & 0b11100) != 0b11100) {
>> +        return 4;
>> +    } else if ((inst & 0b111111) == 0b011111) {
>> +        return 6;
>> +    } else if ((inst & 0b1111111) == 0b0111111) {
>> +        return 8;
>> +    } else if ((inst & 0b1111111) == 0b1111111) {
>> +        uint32_t nnn = (inst >> 12) & 0b0111;
>> +
>> +        if (nnn == 0b111) {
>> +            return 24;
>> +        }
>> +        return (16 * nnn + 80) / 8;
>> +    }
>>   
>> -    return (inst &      0b11) != 0b11      ? 2
>> -         : (inst &   0b11100) != 0b11100   ? 4
>> -         : (inst &  0b111111) == 0b011111  ? 6
>> -         : (inst & 0b1111111) == 0b0111111 ? 8
>> -         : 0;
>> +    /*
>> +     * Returning 0 if we don't find the right insn length will
>> +     * cause an infinite loop in target_disas().  Return an
>> +     * unrealistic length value instead, making target_disas()
>> +     * trigger the "Disassembler disagrees with translator over
>> +     * instruction" error path.
>> +     */
>> +    return 1024;
>>   }
>>   
>>   /* format instruction */
>> diff --git a/target/riscv/internals.h b/target/riscv/internals.h
>> index 8c24af0d85..80c2f8f5f8 100644
>> --- a/target/riscv/internals.h
>> +++ b/target/riscv/internals.h
>> @@ -245,9 +245,38 @@ static inline target_ulong
>> adjust_addr_virt(CPURISCVState *env,
>>       return adjust_addr_body(env, addr, true);
>>   }
>>   
>> -static inline int insn_len(uint16_t first_word)
>> +static inline int insn_len(uint32_t opcode)
>>   {
>> -    return (first_word & 3) == 3 ? 4 : 2;
>> +    /*
>> +     * "Expanded Instruction-Length Encoding" as in
>> +     * unpriv isa section 1.5.1.
>> +     *
>> +     *               aa - 16 bit aa != 11
>> +     *            bbb11 - 32 bit bbb != 111
>> +     *           011111 - 48 bit
>> +     *          0111111 - 64 bit
>> +     * xnnnxxxxx1111111 - (80 + 16*nnn) bits, if nnn != 111
>> +     * x111xxxxx1111111 - 192 bits
>> +     */
>> +    if ((opcode & 0b11) != 0b11) {
>> +        return 2;
>> +    } else if ((opcode & 0b11100) != 0b11100) {
>> +        return 4;
>> +    } else if ((opcode & 0b111111) == 0b011111) {
>> +        return 6;
>> +    } else if ((opcode & 0b1111111) == 0b0111111) {
>> +        return 8;
>> +    } else if ((opcode & 0b1111111) == 0b1111111) {
>> +        uint32_t nnn = (opcode >> 12) & 0b0111;
>> +
>> +        if (nnn == 0b111) {
>> +            return 24;
>> +        }
>> +        return (16 * nnn + 80) / 8;
>> +    }
>> +
>> +    /* Shouldn't happen, but ... */
>> +    return -1;
>>   }
>>   
>>   int riscv_monitor_get_register_legacy(CPUState *cs, const char
>> *name,
>> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
>> index 1e4f340256..bd28ed3fcb 100644
>> --- a/target/riscv/translate.c
>> +++ b/target/riscv/translate.c
>> @@ -1255,6 +1255,7 @@ static void decode_opc(CPURISCVState *env,
>> DisasContext *ctx)
>>   {
>>       uint32_t opcode;
>>       bool pc_is_4byte_align = ((ctx->base.pc_next % 4) == 0);
>> +    int insn_length;
>>   
>>       ctx->virt_inst_excp = false;
>>       if (pc_is_4byte_align) {
>> @@ -1279,7 +1280,11 @@ static void decode_opc(CPURISCVState *env,
>> DisasContext *ctx)
>>       }
>>       ctx->ol = ctx->xl;
>>   
>> -    ctx->cur_insn_len = insn_len((uint16_t)opcode);
>> +    insn_length = insn_len(opcode);
>> +    if (insn_length < 0) {
>> +        gen_exception_illegal(ctx);
>> +    }
>> +    ctx->cur_insn_len = insn_length;
>>       /* Check for compressed insn */
>>       if (ctx->cur_insn_len == 2) {
>>           ctx->opcode = (uint16_t)opcode;