1 | The following changes since commit 95d1fbabae0cd44156ac4b96d512d143ca7dfd5e: | 1 | The following changes since commit 15df33ceb73cb6bb3c6736cf4d2cff51129ed4b4: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/kraxel/tags/fixes-20200716-pull-request' into staging (2020-07-16 18:50:51 +0100) | 3 | Merge remote-tracking branch 'remotes/quic/tags/pull-hex-20220312-1' into staging (2022-03-13 17:29:18 +0000) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | https://github.com/rth7680/qemu.git tags/pull-tcg-20200717 | 7 | https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20220314 |
8 | 8 | ||
9 | for you to fetch changes up to ba3c35d9c4026361fd380b269dc6def9510b7166: | 9 | for you to fetch changes up to 76cff100beeae8d3676bb658cccd45ef5ced8aa9: |
10 | 10 | ||
11 | tcg/cpu-exec: precise single-stepping after an interrupt (2020-07-17 11:09:34 -0700) | 11 | tcg/arm: Don't emit UNPREDICTABLE LDRD with Rm == Rt or Rt+1 (2022-03-14 10:31:51 -0700) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Fix vector min/max fallback expansion | 14 | Fixes for s390x host vectors |
15 | Fix singlestep from exception and interrupt | 15 | Fix for arm ldrd unpredictable case |
16 | 16 | ||
17 | ---------------------------------------------------------------- | 17 | ---------------------------------------------------------------- |
18 | Luc Michel (1): | 18 | Richard Henderson (4): |
19 | tcg/cpu-exec: precise single-stepping after an exception | 19 | tcg/s390x: Fix tcg_out_dupi_vec vs VGM |
20 | tcg/s390x: Fix INDEX_op_bitsel_vec vs VSEL | ||
21 | tcg/s390x: Fix tcg_out_dup_vec vs general registers | ||
22 | tcg/arm: Don't emit UNPREDICTABLE LDRD with Rm == Rt or Rt+1 | ||
20 | 23 | ||
21 | Richard Henderson (2): | 24 | tcg/arm/tcg-target.c.inc | 17 +++++++++++++++-- |
22 | tcg: Save/restore vecop_list around minmax fallback | 25 | tcg/s390x/tcg-target.c.inc | 7 ++++--- |
23 | tcg/cpu-exec: precise single-stepping after an interrupt | 26 | 2 files changed, 19 insertions(+), 5 deletions(-) |
24 | |||
25 | accel/tcg/cpu-exec.c | 19 ++++++++++++++++++- | ||
26 | tcg/tcg-op-vec.c | 2 ++ | ||
27 | 2 files changed, 20 insertions(+), 1 deletion(-) | ||
28 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | The immediate operands to VGM were in the wrong order, | ||
2 | producing an inverse mask. | ||
1 | 3 | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | --- | ||
6 | tcg/s390x/tcg-target.c.inc | 4 ++-- | ||
7 | 1 file changed, 2 insertions(+), 2 deletions(-) | ||
8 | |||
9 | diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc | ||
10 | index XXXXXXX..XXXXXXX 100644 | ||
11 | --- a/tcg/s390x/tcg-target.c.inc | ||
12 | +++ b/tcg/s390x/tcg-target.c.inc | ||
13 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, | ||
14 | msb = clz32(val); | ||
15 | lsb = 31 - ctz32(val); | ||
16 | } | ||
17 | - tcg_out_insn(s, VRIb, VGM, dst, lsb, msb, MO_32); | ||
18 | + tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_32); | ||
19 | return; | ||
20 | } | ||
21 | } else { | ||
22 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, | ||
23 | msb = clz64(val); | ||
24 | lsb = 63 - ctz64(val); | ||
25 | } | ||
26 | - tcg_out_insn(s, VRIb, VGM, dst, lsb, msb, MO_64); | ||
27 | + tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_64); | ||
28 | return; | ||
29 | } | ||
30 | } | ||
31 | -- | ||
32 | 2.25.1 | diff view generated by jsdifflib |
1 | When single-stepping with a debugger attached to QEMU, and when an | 1 | The operands are output in the wrong order: the tcg selector |
---|---|---|---|
2 | interrupt is raised, the debugger misses the first instruction after | 2 | argument is first, whereas the s390x selector argument is last. |
3 | the interrupt. | ||
4 | 3 | ||
5 | Tested-by: Luc Michel <luc.michel@greensocs.com> | 4 | Tested-by: Thomas Huth <thuth@redhat.com> |
6 | Reviewed-by: Luc Michel <luc.michel@greensocs.com> | 5 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/898 |
7 | Buglink: https://bugs.launchpad.net/qemu/+bug/757702 | 6 | Fixes: 9bca986df88 ("tcg/s390x: Implement TCG_TARGET_HAS_bitsel_vec") |
8 | Message-Id: <20200717163029.2737546-1-richard.henderson@linaro.org> | ||
9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
10 | --- | 8 | --- |
11 | accel/tcg/cpu-exec.c | 8 +++++++- | 9 | tcg/s390x/tcg-target.c.inc | 2 +- |
12 | 1 file changed, 7 insertions(+), 1 deletion(-) | 10 | 1 file changed, 1 insertion(+), 1 deletion(-) |
13 | 11 | ||
14 | diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c | 12 | diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc |
15 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/accel/tcg/cpu-exec.c | 14 | --- a/tcg/s390x/tcg-target.c.inc |
17 | +++ b/accel/tcg/cpu-exec.c | 15 | +++ b/tcg/s390x/tcg-target.c.inc |
18 | @@ -XXX,XX +XXX,XX @@ static inline bool cpu_handle_interrupt(CPUState *cpu, | 16 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, |
19 | else { | 17 | break; |
20 | if (cc->cpu_exec_interrupt(cpu, interrupt_request)) { | 18 | |
21 | replay_interrupt(); | 19 | case INDEX_op_bitsel_vec: |
22 | - cpu->exception_index = -1; | 20 | - tcg_out_insn(s, VRRe, VSEL, a0, a1, a2, args[3]); |
23 | + /* | 21 | + tcg_out_insn(s, VRRe, VSEL, a0, a2, args[3], a1); |
24 | + * After processing the interrupt, ensure an EXCP_DEBUG is | 22 | break; |
25 | + * raised when single-stepping so that GDB doesn't miss the | 23 | |
26 | + * next instruction. | 24 | case INDEX_op_cmp_vec: |
27 | + */ | ||
28 | + cpu->exception_index = | ||
29 | + (cpu->singlestep_enabled ? EXCP_DEBUG : -1); | ||
30 | *last_tb = NULL; | ||
31 | } | ||
32 | /* The target hook may have updated the 'cpu->interrupt_request'; | ||
33 | -- | 25 | -- |
34 | 2.25.1 | 26 | 2.25.1 |
35 | |||
36 | diff view generated by jsdifflib |
1 | From: Luc Michel <luc.michel@greensocs.com> | 1 | We copied the data from the general register input to the |
---|---|---|---|
2 | vector register output, but have not yet replicated it. | ||
3 | We intended to fall through into the vector-vector case, | ||
4 | but failed to redirect the input register. | ||
2 | 5 | ||
3 | When single-stepping with a debugger attached to QEMU, and when an | 6 | This is caught by an assertion failure in tcg_out_insn_VRIc, |
4 | exception is raised, the debugger misses the first instruction after the | 7 | which diagnosed the incorrect register class. |
5 | exception: | ||
6 | 8 | ||
7 | $ qemu-system-aarch64 -M virt -display none -cpu cortex-a53 -s -S | ||
8 | |||
9 | $ aarch64-linux-gnu-gdb | ||
10 | GNU gdb (GDB) 9.2 | ||
11 | [...] | ||
12 | (gdb) tar rem :1234 | ||
13 | Remote debugging using :1234 | ||
14 | warning: No executable has been specified and target does not support | ||
15 | determining executable automatically. Try using the "file" command. | ||
16 | 0x0000000000000000 in ?? () | ||
17 | (gdb) # writing nop insns to 0x200 and 0x204 | ||
18 | (gdb) set *0x200 = 0xd503201f | ||
19 | (gdb) set *0x204 = 0xd503201f | ||
20 | (gdb) # 0x0 address contains 0 which is an invalid opcode. | ||
21 | (gdb) # The CPU should raise an exception and jump to 0x200 | ||
22 | (gdb) si | ||
23 | 0x0000000000000204 in ?? () | ||
24 | |||
25 | With this commit, the same run steps correctly on the first instruction | ||
26 | of the exception vector: | ||
27 | |||
28 | (gdb) si | ||
29 | 0x0000000000000200 in ?? () | ||
30 | |||
31 | Buglink: https://bugs.launchpad.net/qemu/+bug/757702 | ||
32 | Signed-off-by: Luc Michel <luc.michel@greensocs.com> | ||
33 | Message-Id: <20200716193947.3058389-1-luc.michel@greensocs.com> | ||
34 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
35 | --- | 10 | --- |
36 | accel/tcg/cpu-exec.c | 11 +++++++++++ | 11 | tcg/s390x/tcg-target.c.inc | 1 + |
37 | 1 file changed, 11 insertions(+) | 12 | 1 file changed, 1 insertion(+) |
38 | 13 | ||
39 | diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c | 14 | diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc |
40 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
41 | --- a/accel/tcg/cpu-exec.c | 16 | --- a/tcg/s390x/tcg-target.c.inc |
42 | +++ b/accel/tcg/cpu-exec.c | 17 | +++ b/tcg/s390x/tcg-target.c.inc |
43 | @@ -XXX,XX +XXX,XX @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret) | 18 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, |
44 | cc->do_interrupt(cpu); | 19 | if (vece == MO_64) { |
45 | qemu_mutex_unlock_iothread(); | 20 | return true; |
46 | cpu->exception_index = -1; | 21 | } |
47 | + | 22 | + src = dst; |
48 | + if (unlikely(cpu->singlestep_enabled)) { | 23 | } |
49 | + /* | 24 | |
50 | + * After processing the exception, ensure an EXCP_DEBUG is | 25 | /* |
51 | + * raised when single-stepping so that GDB doesn't miss the | ||
52 | + * next instruction. | ||
53 | + */ | ||
54 | + *ret = EXCP_DEBUG; | ||
55 | + cpu_handle_debug_exception(cpu); | ||
56 | + return true; | ||
57 | + } | ||
58 | } else if (!replay_has_interrupt()) { | ||
59 | /* give a chance to iothread in replay mode */ | ||
60 | *ret = EXCP_INTERRUPT; | ||
61 | -- | 26 | -- |
62 | 2.25.1 | 27 | 2.25.1 |
63 | |||
64 | diff view generated by jsdifflib |
1 | Forgetting this asserts when tcg_gen_cmp_vec is called from | 1 | The LDRD (register) instruction is UNPREDICTABLE if the Rm register |
---|---|---|---|
2 | within tcg_gen_cmpsel_vec. | 2 | is the same as either Rt or Rt+1 (the two registers being loaded to). |
3 | We weren't making sure we avoided this, with the result that on some | ||
4 | host CPUs like the Cortex-A7 we would get a SIGILL because the CPU | ||
5 | chooses to UNDEF for this particular UNPREDICTABLE case. | ||
3 | 6 | ||
4 | Fixes: 72b4c792c7a | 7 | Since we've already checked that datalo is aligned, we can simplify |
8 | the test vs the Rm operand by aligning it before comparison. Check | ||
9 | for the two orderings before falling back to two ldr instructions. | ||
10 | |||
11 | We don't bother to do anything similar for tcg_out_ldrd_rwb(), | ||
12 | because it is only used in tcg_out_tlb_read() with a fixed set of | ||
13 | registers which don't overlap. | ||
14 | |||
15 | There is no equivalent UNPREDICTABLE case for STRD. | ||
16 | |||
17 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
18 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/896 | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 19 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | --- | 20 | --- |
7 | tcg/tcg-op-vec.c | 2 ++ | 21 | tcg/arm/tcg-target.c.inc | 17 +++++++++++++++-- |
8 | 1 file changed, 2 insertions(+) | 22 | 1 file changed, 15 insertions(+), 2 deletions(-) |
9 | 23 | ||
10 | diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c | 24 | diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc |
11 | index XXXXXXX..XXXXXXX 100644 | 25 | index XXXXXXX..XXXXXXX 100644 |
12 | --- a/tcg/tcg-op-vec.c | 26 | --- a/tcg/arm/tcg-target.c.inc |
13 | +++ b/tcg/tcg-op-vec.c | 27 | +++ b/tcg/arm/tcg-target.c.inc |
14 | @@ -XXX,XX +XXX,XX @@ static void do_minmax(unsigned vece, TCGv_vec r, TCGv_vec a, | 28 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc, |
15 | TCGv_vec b, TCGOpcode opc, TCGCond cond) | 29 | /* LDRD requires alignment; double-check that. */ |
16 | { | 30 | if (get_alignment_bits(opc) >= MO_64 |
17 | if (!do_op3(vece, r, a, b, opc)) { | 31 | && (datalo & 1) == 0 && datahi == datalo + 1) { |
18 | + const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); | 32 | - tcg_out_ldrd_r(s, COND_AL, datalo, addrlo, addend); |
19 | tcg_gen_cmpsel_vec(cond, vece, r, a, b, a, b); | 33 | - } else if (scratch_addend) { |
20 | + tcg_swap_vecop_list(hold_list); | 34 | + /* |
21 | } | 35 | + * Rm (the second address op) must not overlap Rt or Rt + 1. |
22 | } | 36 | + * Since datalo is aligned, we can simplify the test via alignment. |
23 | 37 | + * Flip the two address arguments if that works. | |
38 | + */ | ||
39 | + if ((addend & ~1) != datalo) { | ||
40 | + tcg_out_ldrd_r(s, COND_AL, datalo, addrlo, addend); | ||
41 | + break; | ||
42 | + } | ||
43 | + if ((addrlo & ~1) != datalo) { | ||
44 | + tcg_out_ldrd_r(s, COND_AL, datalo, addend, addrlo); | ||
45 | + break; | ||
46 | + } | ||
47 | + } | ||
48 | + if (scratch_addend) { | ||
49 | tcg_out_ld32_rwb(s, COND_AL, datalo, addend, addrlo); | ||
50 | tcg_out_ld32_12(s, COND_AL, datahi, addend, 4); | ||
51 | } else { | ||
24 | -- | 52 | -- |
25 | 2.25.1 | 53 | 2.25.1 |
26 | 54 | ||
27 | 55 | diff view generated by jsdifflib |