disas/riscv.c | 50 +++++++++++++++++++++++++++++----------- target/riscv/internals.h | 33 ++++++++++++++++++++++++-- target/riscv/translate.c | 7 +++++- 3 files changed, 73 insertions(+), 17 deletions(-)
We're missing the "Expanded Instruction-Length Encoding" logic from the
unpriv isa, meaning we're not detecting missing insn len > 8 bytes.
This doesn't have impact in regular insn emulation because most (if not
all) of the existing insns are 2 or 4 bytes.
However, running with "-d in_asm" will cause QEMU to run an infinite
loop because our disas code can't handle the expanded length encoding,
causing inst_length() to return '0' and target_disas() to loop forever
since it's using the len to decrease the loop counter.
Fixing just the disas code isn't enough. target_disas() will do a
"if (size < count) {"
check, where size = the insn length from the translator and count = the
insn length from disas, and will warn about it during disassembling. We
need both places to use the same insn length logic.
[1] https://gitlab.com/qemu-project/qemu/-/work_items/3479
Resolves: https://gitlab.com/qemu-project/qemu/-/work_items/3479
Signed-off-by: Daniel Henrique Barboza <daniel.barboza@oss.qualcomm.com>
---
disas/riscv.c | 50 +++++++++++++++++++++++++++++-----------
target/riscv/internals.h | 33 ++++++++++++++++++++++++--
target/riscv/translate.c | 7 +++++-
3 files changed, 73 insertions(+), 17 deletions(-)
diff --git a/disas/riscv.c b/disas/riscv.c
index d416a4d6b3..d32661c857 100644
--- a/disas/riscv.c
+++ b/disas/riscv.c
@@ -5057,26 +5057,48 @@ static bool check_constraints(rv_decode *dec, const rvc_constraint *c)
return true;
}
-/* instruction length */
-
+/*
+ * Note: basically a carbon copy of insn_len() from
+ * target/riscv/internals.h.
+ */
static size_t inst_length(rv_inst inst)
{
- /* NOTE: supports maximum instruction size of 64-bits */
-
/*
- * instruction length coding
+ * "Expanded Instruction-Length Encoding" as in
+ * unpriv isa section 1.5.1.
*
- * aa - 16 bit aa != 11
- * bbb11 - 32 bit bbb != 111
- * 011111 - 48 bit
- * 0111111 - 64 bit
+ * aa - 16 bit aa != 11
+ * bbb11 - 32 bit bbb != 111
+ * 011111 - 48 bit
+ * 0111111 - 64 bit
+ * xnnnxxxxx1111111 - (80 + 16*nnn) bits, if nnn != 111
+ * x111xxxxx1111111 - 192 bits
*/
+ if ((inst & 0b11) != 0b11) {
+ return 2;
+ } else if ((inst & 0b11100) != 0b11100) {
+ return 4;
+ } else if ((inst & 0b111111) == 0b011111) {
+ return 6;
+ } else if ((inst & 0b1111111) == 0b0111111) {
+ return 8;
+ } else if ((inst & 0b1111111) == 0b1111111) {
+ uint32_t nnn = (inst >> 12) & 0b0111;
+
+ if (nnn == 0b111) {
+ return 24;
+ }
+ return (16 * nnn + 80) / 8;
+ }
- return (inst & 0b11) != 0b11 ? 2
- : (inst & 0b11100) != 0b11100 ? 4
- : (inst & 0b111111) == 0b011111 ? 6
- : (inst & 0b1111111) == 0b0111111 ? 8
- : 0;
+ /*
+ * Returning 0 if we don't find the right insn length will
+ * cause an infinite loop in target_disas(). Return an
+ * unrealistic length value instead, making target_disas()
+ * trigger the "Disassembler disagrees with translator over
+ * instruction" error path.
+ */
+ return 1024;
}
/* format instruction */
diff --git a/target/riscv/internals.h b/target/riscv/internals.h
index 8c24af0d85..80c2f8f5f8 100644
--- a/target/riscv/internals.h
+++ b/target/riscv/internals.h
@@ -245,9 +245,38 @@ static inline target_ulong adjust_addr_virt(CPURISCVState *env,
return adjust_addr_body(env, addr, true);
}
-static inline int insn_len(uint16_t first_word)
+static inline int insn_len(uint32_t opcode)
{
- return (first_word & 3) == 3 ? 4 : 2;
+ /*
+ * "Expanded Instruction-Length Encoding" as in
+ * unpriv isa section 1.5.1.
+ *
+ * aa - 16 bit aa != 11
+ * bbb11 - 32 bit bbb != 111
+ * 011111 - 48 bit
+ * 0111111 - 64 bit
+ * xnnnxxxxx1111111 - (80 + 16*nnn) bits, if nnn != 111
+ * x111xxxxx1111111 - 192 bits
+ */
+ if ((opcode & 0b11) != 0b11) {
+ return 2;
+ } else if ((opcode & 0b11100) != 0b11100) {
+ return 4;
+ } else if ((opcode & 0b111111) == 0b011111) {
+ return 6;
+ } else if ((opcode & 0b1111111) == 0b0111111) {
+ return 8;
+ } else if ((opcode & 0b1111111) == 0b1111111) {
+ uint32_t nnn = (opcode >> 12) & 0b0111;
+
+ if (nnn == 0b111) {
+ return 24;
+ }
+ return (16 * nnn + 80) / 8;
+ }
+
+ /* Shouldn't happen, but ... */
+ return -1;
}
int riscv_monitor_get_register_legacy(CPUState *cs, const char *name,
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 1e4f340256..bd28ed3fcb 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -1255,6 +1255,7 @@ static void decode_opc(CPURISCVState *env, DisasContext *ctx)
{
uint32_t opcode;
bool pc_is_4byte_align = ((ctx->base.pc_next % 4) == 0);
+ int insn_length;
ctx->virt_inst_excp = false;
if (pc_is_4byte_align) {
@@ -1279,7 +1280,11 @@ static void decode_opc(CPURISCVState *env, DisasContext *ctx)
}
ctx->ol = ctx->xl;
- ctx->cur_insn_len = insn_len((uint16_t)opcode);
+ insn_length = insn_len(opcode);
+ if (insn_length < 0) {
+ gen_exception_illegal(ctx);
+ }
+ ctx->cur_insn_len = insn_length;
/* Check for compressed insn */
if (ctx->cur_insn_len == 2) {
ctx->opcode = (uint16_t)opcode;
--
2.43.0
On Wed, 2026-05-20 at 14:35 -0300, Daniel Henrique Barboza wrote:
> We're missing the "Expanded Instruction-Length Encoding" logic from
> the
> unpriv isa, meaning we're not detecting missing insn len > 8 bytes.
> This doesn't have impact in regular insn emulation because most (if
> not
> all) of the existing insns are 2 or 4 bytes.
>
> However, running with "-d in_asm" will cause QEMU to run an infinite
> loop because our disas code can't handle the expanded length
> encoding,
> causing inst_length() to return '0' and target_disas() to loop
> forever
> since it's using the len to decrease the loop counter.
>
> Fixing just the disas code isn't enough. target_disas() will do a
>
> "if (size < count) {"
>
> check, where size = the insn length from the translator and count =
> the
> insn length from disas, and will warn about it during disassembling.
> We
> need both places to use the same insn length logic.
>
> [1] https://gitlab.com/qemu-project/qemu/-/work_items/3479
>
> Resolves: https://gitlab.com/qemu-project/qemu/-/work_items/3479
> Signed-off-by: Daniel Henrique Barboza
> <daniel.barboza@oss.qualcomm.com>
> ---
> disas/riscv.c | 50 +++++++++++++++++++++++++++++---------
> --
> target/riscv/internals.h | 33 ++++++++++++++++++++++++--
> target/riscv/translate.c | 7 +++++-
> 3 files changed, 73 insertions(+), 17 deletions(-)
>
> diff --git a/disas/riscv.c b/disas/riscv.c
> index d416a4d6b3..d32661c857 100644
> --- a/disas/riscv.c
> +++ b/disas/riscv.c
> @@ -5057,26 +5057,48 @@ static bool check_constraints(rv_decode *dec,
> const rvc_constraint *c)
> return true;
> }
>
> -/* instruction length */
> -
> +/*
> + * Note: basically a carbon copy of insn_len() from
> + * target/riscv/internals.h.
> + */
> static size_t inst_length(rv_inst inst)
> {
> - /* NOTE: supports maximum instruction size of 64-bits */
> -
> /*
> - * instruction length coding
> + * "Expanded Instruction-Length Encoding" as in
> + * unpriv isa section 1.5.1.
This section was removed from the manual with commit
commit 4f05ffef4987213ddfb78b3e3aaf91b67892a664
Author: Andrew Waterman <andrew@sifive.com>
Date: Tue Apr 1 18:02:36 2025 -0700
Remove proposal for longer instruction encodings
This is pursuant to making the manual contain only ratified
content.
So I don't think we should reference it. We should probably drop
support for these as who knows what the ratified version will look like
Alistair
> *
> - * aa - 16 bit aa != 11
> - * bbb11 - 32 bit bbb != 111
> - * 011111 - 48 bit
> - * 0111111 - 64 bit
> + * aa - 16 bit aa != 11
> + * bbb11 - 32 bit bbb != 111
> + * 011111 - 48 bit
> + * 0111111 - 64 bit
> + * xnnnxxxxx1111111 - (80 + 16*nnn) bits, if nnn != 111
> + * x111xxxxx1111111 - 192 bits
> */
> + if ((inst & 0b11) != 0b11) {
> + return 2;
> + } else if ((inst & 0b11100) != 0b11100) {
> + return 4;
> + } else if ((inst & 0b111111) == 0b011111) {
> + return 6;
> + } else if ((inst & 0b1111111) == 0b0111111) {
> + return 8;
> + } else if ((inst & 0b1111111) == 0b1111111) {
> + uint32_t nnn = (inst >> 12) & 0b0111;
> +
> + if (nnn == 0b111) {
> + return 24;
> + }
> + return (16 * nnn + 80) / 8;
> + }
>
> - return (inst & 0b11) != 0b11 ? 2
> - : (inst & 0b11100) != 0b11100 ? 4
> - : (inst & 0b111111) == 0b011111 ? 6
> - : (inst & 0b1111111) == 0b0111111 ? 8
> - : 0;
> + /*
> + * Returning 0 if we don't find the right insn length will
> + * cause an infinite loop in target_disas(). Return an
> + * unrealistic length value instead, making target_disas()
> + * trigger the "Disassembler disagrees with translator over
> + * instruction" error path.
> + */
> + return 1024;
> }
>
> /* format instruction */
> diff --git a/target/riscv/internals.h b/target/riscv/internals.h
> index 8c24af0d85..80c2f8f5f8 100644
> --- a/target/riscv/internals.h
> +++ b/target/riscv/internals.h
> @@ -245,9 +245,38 @@ static inline target_ulong
> adjust_addr_virt(CPURISCVState *env,
> return adjust_addr_body(env, addr, true);
> }
>
> -static inline int insn_len(uint16_t first_word)
> +static inline int insn_len(uint32_t opcode)
> {
> - return (first_word & 3) == 3 ? 4 : 2;
> + /*
> + * "Expanded Instruction-Length Encoding" as in
> + * unpriv isa section 1.5.1.
> + *
> + * aa - 16 bit aa != 11
> + * bbb11 - 32 bit bbb != 111
> + * 011111 - 48 bit
> + * 0111111 - 64 bit
> + * xnnnxxxxx1111111 - (80 + 16*nnn) bits, if nnn != 111
> + * x111xxxxx1111111 - 192 bits
> + */
> + if ((opcode & 0b11) != 0b11) {
> + return 2;
> + } else if ((opcode & 0b11100) != 0b11100) {
> + return 4;
> + } else if ((opcode & 0b111111) == 0b011111) {
> + return 6;
> + } else if ((opcode & 0b1111111) == 0b0111111) {
> + return 8;
> + } else if ((opcode & 0b1111111) == 0b1111111) {
> + uint32_t nnn = (opcode >> 12) & 0b0111;
> +
> + if (nnn == 0b111) {
> + return 24;
> + }
> + return (16 * nnn + 80) / 8;
> + }
> +
> + /* Shouldn't happen, but ... */
> + return -1;
> }
>
> int riscv_monitor_get_register_legacy(CPUState *cs, const char
> *name,
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index 1e4f340256..bd28ed3fcb 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -1255,6 +1255,7 @@ static void decode_opc(CPURISCVState *env,
> DisasContext *ctx)
> {
> uint32_t opcode;
> bool pc_is_4byte_align = ((ctx->base.pc_next % 4) == 0);
> + int insn_length;
>
> ctx->virt_inst_excp = false;
> if (pc_is_4byte_align) {
> @@ -1279,7 +1280,11 @@ static void decode_opc(CPURISCVState *env,
> DisasContext *ctx)
> }
> ctx->ol = ctx->xl;
>
> - ctx->cur_insn_len = insn_len((uint16_t)opcode);
> + insn_length = insn_len(opcode);
> + if (insn_length < 0) {
> + gen_exception_illegal(ctx);
> + }
> + ctx->cur_insn_len = insn_length;
> /* Check for compressed insn */
> if (ctx->cur_insn_len == 2) {
> ctx->opcode = (uint16_t)opcode;
On 5/26/2026 9:35 PM, Alistair Francis wrote:
> On Wed, 2026-05-20 at 14:35 -0300, Daniel Henrique Barboza wrote:
>> We're missing the "Expanded Instruction-Length Encoding" logic from
>> the
>> unpriv isa, meaning we're not detecting missing insn len > 8 bytes.
>> This doesn't have impact in regular insn emulation because most (if
>> not
>> all) of the existing insns are 2 or 4 bytes.
>>
>> However, running with "-d in_asm" will cause QEMU to run an infinite
>> loop because our disas code can't handle the expanded length
>> encoding,
>> causing inst_length() to return '0' and target_disas() to loop
>> forever
>> since it's using the len to decrease the loop counter.
>>
>> Fixing just the disas code isn't enough. target_disas() will do a
>>
>> "if (size < count) {"
>>
>> check, where size = the insn length from the translator and count =
>> the
>> insn length from disas, and will warn about it during disassembling.
>> We
>> need both places to use the same insn length logic.
>>
>> [1] https://gitlab.com/qemu-project/qemu/-/work_items/3479
>>
>> Resolves: https://gitlab.com/qemu-project/qemu/-/work_items/3479
>> Signed-off-by: Daniel Henrique Barboza
>> <daniel.barboza@oss.qualcomm.com>
>> ---
>> disas/riscv.c | 50 +++++++++++++++++++++++++++++---------
>> --
>> target/riscv/internals.h | 33 ++++++++++++++++++++++++--
>> target/riscv/translate.c | 7 +++++-
>> 3 files changed, 73 insertions(+), 17 deletions(-)
>>
>> diff --git a/disas/riscv.c b/disas/riscv.c
>> index d416a4d6b3..d32661c857 100644
>> --- a/disas/riscv.c
>> +++ b/disas/riscv.c
>> @@ -5057,26 +5057,48 @@ static bool check_constraints(rv_decode *dec,
>> const rvc_constraint *c)
>> return true;
>> }
>>
>> -/* instruction length */
>> -
>> +/*
>> + * Note: basically a carbon copy of insn_len() from
>> + * target/riscv/internals.h.
>> + */
>> static size_t inst_length(rv_inst inst)
>> {
>> - /* NOTE: supports maximum instruction size of 64-bits */
>> -
>> /*
>> - * instruction length coding
>> + * "Expanded Instruction-Length Encoding" as in
>> + * unpriv isa section 1.5.1.
>
> This section was removed from the manual with commit
>
> commit 4f05ffef4987213ddfb78b3e3aaf91b67892a664
> Author: Andrew Waterman <andrew@sifive.com>
> Date: Tue Apr 1 18:02:36 2025 -0700
>
> Remove proposal for longer instruction encodings
>
> This is pursuant to making the manual contain only ratified
> content.
>
>
> So I don't think we should reference it. We should probably drop
> support for these as who knows what the ratified version will look like
Ooohhh good call. In the official RVI link:
https://docs.riscv.org/reference/isa/unpriv/intro.html#1-1-5-base-instruction-length-encoding
"All the 32-bit instructions in the base ISA have their lowest two bits set to 11.
The optional compressed 16-bit instruction-set extensions have their lowest two
bits equal to 00, 01, or 10."
So we'll support either 16 or 32 bits lengths only. I'll send a v2.
Thanks,
Daniel
>
> Alistair
>
>> *
>> - * aa - 16 bit aa != 11
>> - * bbb11 - 32 bit bbb != 111
>> - * 011111 - 48 bit
>> - * 0111111 - 64 bit
>> + * aa - 16 bit aa != 11
>> + * bbb11 - 32 bit bbb != 111
>> + * 011111 - 48 bit
>> + * 0111111 - 64 bit
>> + * xnnnxxxxx1111111 - (80 + 16*nnn) bits, if nnn != 111
>> + * x111xxxxx1111111 - 192 bits
>> */
>> + if ((inst & 0b11) != 0b11) {
>> + return 2;
>> + } else if ((inst & 0b11100) != 0b11100) {
>> + return 4;
>> + } else if ((inst & 0b111111) == 0b011111) {
>> + return 6;
>> + } else if ((inst & 0b1111111) == 0b0111111) {
>> + return 8;
>> + } else if ((inst & 0b1111111) == 0b1111111) {
>> + uint32_t nnn = (inst >> 12) & 0b0111;
>> +
>> + if (nnn == 0b111) {
>> + return 24;
>> + }
>> + return (16 * nnn + 80) / 8;
>> + }
>>
>> - return (inst & 0b11) != 0b11 ? 2
>> - : (inst & 0b11100) != 0b11100 ? 4
>> - : (inst & 0b111111) == 0b011111 ? 6
>> - : (inst & 0b1111111) == 0b0111111 ? 8
>> - : 0;
>> + /*
>> + * Returning 0 if we don't find the right insn length will
>> + * cause an infinite loop in target_disas(). Return an
>> + * unrealistic length value instead, making target_disas()
>> + * trigger the "Disassembler disagrees with translator over
>> + * instruction" error path.
>> + */
>> + return 1024;
>> }
>>
>> /* format instruction */
>> diff --git a/target/riscv/internals.h b/target/riscv/internals.h
>> index 8c24af0d85..80c2f8f5f8 100644
>> --- a/target/riscv/internals.h
>> +++ b/target/riscv/internals.h
>> @@ -245,9 +245,38 @@ static inline target_ulong
>> adjust_addr_virt(CPURISCVState *env,
>> return adjust_addr_body(env, addr, true);
>> }
>>
>> -static inline int insn_len(uint16_t first_word)
>> +static inline int insn_len(uint32_t opcode)
>> {
>> - return (first_word & 3) == 3 ? 4 : 2;
>> + /*
>> + * "Expanded Instruction-Length Encoding" as in
>> + * unpriv isa section 1.5.1.
>> + *
>> + * aa - 16 bit aa != 11
>> + * bbb11 - 32 bit bbb != 111
>> + * 011111 - 48 bit
>> + * 0111111 - 64 bit
>> + * xnnnxxxxx1111111 - (80 + 16*nnn) bits, if nnn != 111
>> + * x111xxxxx1111111 - 192 bits
>> + */
>> + if ((opcode & 0b11) != 0b11) {
>> + return 2;
>> + } else if ((opcode & 0b11100) != 0b11100) {
>> + return 4;
>> + } else if ((opcode & 0b111111) == 0b011111) {
>> + return 6;
>> + } else if ((opcode & 0b1111111) == 0b0111111) {
>> + return 8;
>> + } else if ((opcode & 0b1111111) == 0b1111111) {
>> + uint32_t nnn = (opcode >> 12) & 0b0111;
>> +
>> + if (nnn == 0b111) {
>> + return 24;
>> + }
>> + return (16 * nnn + 80) / 8;
>> + }
>> +
>> + /* Shouldn't happen, but ... */
>> + return -1;
>> }
>>
>> int riscv_monitor_get_register_legacy(CPUState *cs, const char
>> *name,
>> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
>> index 1e4f340256..bd28ed3fcb 100644
>> --- a/target/riscv/translate.c
>> +++ b/target/riscv/translate.c
>> @@ -1255,6 +1255,7 @@ static void decode_opc(CPURISCVState *env,
>> DisasContext *ctx)
>> {
>> uint32_t opcode;
>> bool pc_is_4byte_align = ((ctx->base.pc_next % 4) == 0);
>> + int insn_length;
>>
>> ctx->virt_inst_excp = false;
>> if (pc_is_4byte_align) {
>> @@ -1279,7 +1280,11 @@ static void decode_opc(CPURISCVState *env,
>> DisasContext *ctx)
>> }
>> ctx->ol = ctx->xl;
>>
>> - ctx->cur_insn_len = insn_len((uint16_t)opcode);
>> + insn_length = insn_len(opcode);
>> + if (insn_length < 0) {
>> + gen_exception_illegal(ctx);
>> + }
>> + ctx->cur_insn_len = insn_length;
>> /* Check for compressed insn */
>> if (ctx->cur_insn_len == 2) {
>> ctx->opcode = (uint16_t)opcode;
© 2016 - 2026 Red Hat, Inc.