configure | 6 +++--- cpu-exec.c | 6 ++---- include/exec/exec-all.h | 2 ++ include/exec/tb-hash.h | 12 +++++++++++ include/qemu/atomic.h | 34 ++++++++++++++++++++++-------- target/alpha/translate.c | 49 ++++++++++++++++++++++++++++++++------------ target/arm/translate.c | 21 ++++++++++++++----- target/arm/translate.h | 4 ++++ target/i386/translate.c | 43 ++++++++++++++++++++++++++++++-------- target/nios2/translate.c | 2 +- tcg-runtime.c | 24 ++++++++++++++++++++++ tcg/README | 8 ++++++++ tcg/aarch64/tcg-target.h | 1 + tcg/aarch64/tcg-target.inc.c | 22 ++++++++++++++++++-- tcg/arm/tcg-target.h | 1 + tcg/i386/tcg-target.h | 1 + tcg/i386/tcg-target.inc.c | 24 ++++++++++++++++++++-- tcg/ia64/tcg-target.h | 1 + tcg/mips/tcg-target.h | 1 + tcg/ppc/tcg-target.h | 1 + tcg/ppc/tcg-target.inc.c | 7 +++++++ tcg/s390/tcg-target.h | 1 + tcg/s390/tcg-target.inc.c | 24 +++++++++++++++++++--- tcg/sparc/tcg-target.h | 1 + tcg/sparc/tcg-target.inc.c | 11 +++++++++- tcg/tcg-op.c | 13 ++++++++++++ tcg/tcg-op.h | 11 ++++++++++ tcg/tcg-opc.h | 1 + tcg/tcg-runtime.h | 2 ++ tcg/tcg.h | 1 + tcg/tci/tcg-target.h | 1 + 31 files changed, 285 insertions(+), 51 deletions(-)
Changes since Emilio's v4: * Fold tcg/i386 exit_tb 0 to the epilogue we created for goto_ptr. * Drop gen_jr in favor of DISAS_EXIT for target/arm. * Backend support for ppc, aarch64, sparc, s390. * Fix 3 build failures that appear on sparc v8plus (64-bit ilp32). I attempted to throw together an x32 environment to validate x86_64 ilp32, which ought to have had the same problems as sparc, but my patience was exhausted by gentoo misconfigury. I may try that again later, but not now. r~ Emilio G. Cota (11): exec-all: export tb_htable_lookup tcg-runtime: add lookup_tb_ptr helper tcg: introduce goto_ptr opcode tcg: export tcg_gen_lookup_and_goto_ptr target/arm: optimize cross-page direct jumps in softmmu target/arm: optimize indirect branches target/i386: introduce gen_jr helper to generate lookup_and_goto_ptr target/i386: optimize cross-page direct jumps in softmmu target/i386: optimize indirect branches tb-hash: improve tb_jmp_cache hash function in user mode tcg/i386: implement goto_ptr Richard Henderson (8): target/nios2: Fix 64-bit ilp32 compilation tcg/sparc: Use the proper compilation flags for 32-bit qemu/atomic: Loosen restrictions for 64-bit ILP32 hosts target/alpha: Use tcg_gen_goto_ptr tcg/ppc: Implement goto_ptr tcg/aarch64: Implement goto_ptr tcg/sparc: Implement goto_ptr tcg/s390: Implement goto_ptr configure | 6 +++--- cpu-exec.c | 6 ++---- include/exec/exec-all.h | 2 ++ include/exec/tb-hash.h | 12 +++++++++++ include/qemu/atomic.h | 34 ++++++++++++++++++++++-------- target/alpha/translate.c | 49 ++++++++++++++++++++++++++++++++------------ target/arm/translate.c | 21 ++++++++++++++----- target/arm/translate.h | 4 ++++ target/i386/translate.c | 43 ++++++++++++++++++++++++++++++-------- target/nios2/translate.c | 2 +- tcg-runtime.c | 24 ++++++++++++++++++++++ tcg/README | 8 ++++++++ tcg/aarch64/tcg-target.h | 1 + tcg/aarch64/tcg-target.inc.c | 22 ++++++++++++++++++-- tcg/arm/tcg-target.h | 1 + tcg/i386/tcg-target.h | 1 + tcg/i386/tcg-target.inc.c | 24 ++++++++++++++++++++-- tcg/ia64/tcg-target.h | 1 + tcg/mips/tcg-target.h | 1 + tcg/ppc/tcg-target.h | 1 + tcg/ppc/tcg-target.inc.c | 7 +++++++ tcg/s390/tcg-target.h | 1 + tcg/s390/tcg-target.inc.c | 24 +++++++++++++++++++--- tcg/sparc/tcg-target.h | 1 + tcg/sparc/tcg-target.inc.c | 11 +++++++++- tcg/tcg-op.c | 13 ++++++++++++ tcg/tcg-op.h | 11 ++++++++++ tcg/tcg-opc.h | 1 + tcg/tcg-runtime.h | 2 ++ tcg/tcg.h | 1 + tcg/tci/tcg-target.h | 1 + 31 files changed, 285 insertions(+), 51 deletions(-) -- 2.9.3
Hi,
This series seems to have some coding style problems. See output below for
more information:
Subject: [Qemu-devel] [PATCH v5 00/19] TCG cross-tb optimizations
Message-id: 20170427120006.20564-1-rth@twiddle.net
Type: series
=== TEST SCRIPT BEGIN ===
#!/bin/bash
BASE=base
n=1
total=$(git log --oneline $BASE.. | wc -l)
failed=0
# Useful git options
git config --local diff.renamelimit 0
git config --local diff.renames True
commits="$(git log --format=%H --reverse $BASE..)"
for c in $commits; do
echo "Checking PATCH $n/$total: $(git log -n 1 --format=%s $c)..."
if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then
failed=1
echo
fi
n=$((n+1))
done
exit $failed
=== TEST SCRIPT END ===
Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
dd00ff3 tcg/s390: Implement goto_ptr
7a3bb3b tcg/sparc: Implement goto_ptr
d0faf3d tcg/aarch64: Implement goto_ptr
3e4a3c8 tcg/ppc: Implement goto_ptr
4203286 tcg/i386: implement goto_ptr
1ab1bc0 target/alpha: Use tcg_gen_goto_ptr
aa74c2b tb-hash: improve tb_jmp_cache hash function in user mode
623c464 target/i386: optimize indirect branches
8265f15 target/i386: optimize cross-page direct jumps in softmmu
e57b419 target/i386: introduce gen_jr helper to generate lookup_and_goto_ptr
f6f46f5 target/arm: optimize indirect branches
e3a959e target/arm: optimize cross-page direct jumps in softmmu
580b0ef tcg: export tcg_gen_lookup_and_goto_ptr
12ab749 tcg: introduce goto_ptr opcode
0bbf965 tcg-runtime: add lookup_tb_ptr helper
6e2d49c exec-all: export tb_htable_lookup
b0be14c qemu/atomic: Loosen restrictions for 64-bit ILP32 hosts
99bd77b tcg/sparc: Use the proper compilation flags for 32-bit
f0e5c62 target/nios2: Fix 64-bit ilp32 compilation
=== OUTPUT BEGIN ===
Checking PATCH 1/19: target/nios2: Fix 64-bit ilp32 compilation...
Checking PATCH 2/19: tcg/sparc: Use the proper compilation flags for 32-bit...
Checking PATCH 3/19: qemu/atomic: Loosen restrictions for 64-bit ILP32 hosts...
WARNING: architecture specific defines should be avoided
#33: FILE: include/qemu/atomic.h:104:
+#if defined(__x86_64__) || defined(__sparc__)
total: 0 errors, 1 warnings, 87 lines checked
Your patch has style problems, please review. If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
Checking PATCH 4/19: exec-all: export tb_htable_lookup...
Checking PATCH 5/19: tcg-runtime: add lookup_tb_ptr helper...
Checking PATCH 6/19: tcg: introduce goto_ptr opcode...
Checking PATCH 7/19: tcg: export tcg_gen_lookup_and_goto_ptr...
Checking PATCH 8/19: target/arm: optimize cross-page direct jumps in softmmu...
Checking PATCH 9/19: target/arm: optimize indirect branches...
Checking PATCH 10/19: target/i386: introduce gen_jr helper to generate lookup_and_goto_ptr...
Checking PATCH 11/19: target/i386: optimize cross-page direct jumps in softmmu...
Checking PATCH 12/19: target/i386: optimize indirect branches...
Checking PATCH 13/19: tb-hash: improve tb_jmp_cache hash function in user mode...
Checking PATCH 14/19: target/alpha: Use tcg_gen_goto_ptr...
ERROR: return is not a function, parentheses are not required
#31: FILE: target/alpha/translate.c:465:
+ return ((ctx->tb->cflags & CF_LAST_IO)
total: 1 errors, 0 warnings, 113 lines checked
Your patch has style problems, please review. If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
Checking PATCH 15/19: tcg/i386: implement goto_ptr...
Checking PATCH 16/19: tcg/ppc: Implement goto_ptr...
Checking PATCH 17/19: tcg/aarch64: Implement goto_ptr...
Checking PATCH 18/19: tcg/sparc: Implement goto_ptr...
Checking PATCH 19/19: tcg/s390: Implement goto_ptr...
=== OUTPUT END ===
Test command exited with code: 1
---
Email generated automatically by Patchew [http://patchew.org/].
Please send your feedback to patchew-devel@freelists.org
While at it, we might want to include these two for aarch64. They apply on top Richard's tcg-next branch, which includes v5. Thanks, Emilio
Perf numbers in next commit's log.
Signed-off-by: Emilio G. Cota <cota@braap.org>
---
target/arm/translate-a64.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 24de30d..5b691fc 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -373,8 +373,7 @@ static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
} else if (s->singlestep_enabled) {
gen_exception_internal(EXCP_DEBUG);
} else {
- tcg_gen_exit_tb(0);
- s->is_jmp = DISAS_TB_JUMP;
+ tcg_gen_lookup_and_goto_ptr(cpu_pc);
}
}
}
--
2.7.4
On Fri, Apr 28, 2017 at 15:17:24 -0400, Emilio G. Cota wrote:
> Perf numbers in next commit's log.
>
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
> target/arm/translate-a64.c | 3 +--
> 1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
> index 24de30d..5b691fc 100644
> --- a/target/arm/translate-a64.c
> +++ b/target/arm/translate-a64.c
> @@ -373,8 +373,7 @@ static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
> } else if (s->singlestep_enabled) {
> gen_exception_internal(EXCP_DEBUG);
> } else {
> - tcg_gen_exit_tb(0);
> - s->is_jmp = DISAS_TB_JUMP;
I'm not sure about removing this line though. Would it be better to leave it?
I can't see how TB_JUMP ends up doing anything in the rest of the file.
Thanks,
E.
On 04/28/2017 09:22 PM, Emilio G. Cota wrote:
> On Fri, Apr 28, 2017 at 15:17:24 -0400, Emilio G. Cota wrote:
>> Perf numbers in next commit's log.
>>
>> Signed-off-by: Emilio G. Cota <cota@braap.org>
>> ---
>> target/arm/translate-a64.c | 3 +--
>> 1 file changed, 1 insertion(+), 2 deletions(-)
>>
>> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
>> index 24de30d..5b691fc 100644
>> --- a/target/arm/translate-a64.c
>> +++ b/target/arm/translate-a64.c
>> @@ -373,8 +373,7 @@ static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
>> } else if (s->singlestep_enabled) {
>> gen_exception_internal(EXCP_DEBUG);
>> } else {
>> - tcg_gen_exit_tb(0);
>> - s->is_jmp = DISAS_TB_JUMP;
>
> I'm not sure about removing this line though. Would it be better to leave it?
> I can't see how TB_JUMP ends up doing anything in the rest of the file.
Why not just replace this with
s->is_jmp = DISAS_JUMP
and not emit the lookup_and_goto_ptr here at all?
r~
On Sat, Apr 29, 2017 at 12:30:08 +0200, Richard Henderson wrote:
> On 04/28/2017 09:22 PM, Emilio G. Cota wrote:
> >On Fri, Apr 28, 2017 at 15:17:24 -0400, Emilio G. Cota wrote:
> >>+++ b/target/arm/translate-a64.c
> >>@@ -373,8 +373,7 @@ static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
> >> } else if (s->singlestep_enabled) {
> >> gen_exception_internal(EXCP_DEBUG);
> >> } else {
> >>- tcg_gen_exit_tb(0);
> >>- s->is_jmp = DISAS_TB_JUMP;
> >
> >I'm not sure about removing this line though. Would it be better to leave it?
> >I can't see how TB_JUMP ends up doing anything in the rest of the file.
>
> Why not just replace this with
>
> s->is_jmp = DISAS_JUMP
>
> and not emit the lookup_and_goto_ptr here at all?
If we don't emit anything here, we get the error you reported
in the other message (icount whatever in cpu-exec.c:599).
I think this is due to callers assuming get_goto_tb does indeed
generate code, instead of deferring it via is_jmp. For example:
if (cond < 0x0e) {
/* genuinely conditional branches */
TCGLabel *label_match = gen_new_label();
arm_gen_test_cc(cond, label_match);
gen_goto_tb(s, 0, s->pc);
gen_set_label(label_match);
gen_goto_tb(s, 1, addr);
} else { [...]
So the simplest solution here seems to just emit the goto_ptr helper
in gen_goto_tb().
Regarding the setting of is_jmp to DISAS_TB_JUMP, after having
looked at the code more closely, I think it shouldn't
be removed, since this is the way we break out of the loop in
gen_intermediate_code(), thereby marking this instruction as the
last of the current TB.
I have updated patch 1/2 accordingly. You can cherry-pick it from:
https://github.com/cota/qemu/tree/tcg-next-v5+
Thanks,
Emilio
Measurements:
[Baseline performance is that before applying this and the previous commit]
- NBench, aarch64-softmmu. Host: Intel i7-4790K @ 4.00GHz
1.7x +-+--------------------------------------------------------------------------------------------------------------+-+
| |
| cross |
1.6x +cross+jr.................................................####...................................................+-+
| #++# |
| # # |
1.5x +-+...................................................*****..#...................................................+-+
| *+++* # |
| * * # |
1.4x +-+...................................................*...*..#...................................................+-+
| * * # |
| ##### * * # |
1.3x +-+................................****+++#...........*...*..#...................................................+-+
| *++* # * * # |
| * * # * * # |
1.2x +-+................................*..*...#...........*...*..#...................................................+-+
| * * # * * # |
| #### * * # * * # |
1.1x +-+.......................+++#..#..*..*...#...........*...*..#...................................................+-+
| **** # * * # * * # ****#### |
| * * # * * # * * # ****### +++#### ****### * * # |
1x +-++-++++++-++++****###++-*++*++#++*++*+-+#++****+++++*+++*++#++*++*-+#++*****++#++****###-++*++*-+#++*+-*+++#+-++-+
| *****### * * # * * # * * # *++*### * * # * * # * * # * *++# * * # * * # |
| * *++# * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # |
0.9x +-+---*****###--****###---****###--****####--****###--*****###--****###--*****###--****###---****###--****####---+-+
ASSIGNMENT BITFIELD FOURFP EMULATION HUFFMAN LU DECOMPOSITIONNEURAL NUMERIC SORSTRING SORT hmean
png: http://imgur.com/qO9ubtk
NB. cross here represents the previous commit.
- SPECint06 (test set), x86_64-linux-user. Host: Intel i7-4790K @ 4.00GHz
1.5x +-+--------------------------------------------------------------------------------------------------------------+-+
| ***** |
| *+++* jr |
| * * |
1.4x +-+.....................................................................*...*.....................+++............+-+
| * * | |
| ***** * * | |
| * * * * ***** |
1.3x +-+....................................*...*............................*...*....................*.|.*...........+-+
| +++ * * * * * | * |
| ***** * * * * *+++* |
| * * * * * * * * |
1.2x +-+....................*...*...........*...*............................*...*...........*****....*...*...........+-+
| ***** * * * * * * * * * * +++ |
| * * * * * * * * * * * * ***** |
| * * * * ***** * * * * * * * * * * |
1.1x +-+...*...*............*...*...*...*...*...*............................*...*....+++....*...*....*...*...*...*...+-+
| * * * * * * * * * * ***** * * * * * * |
| * * * * * * * * ***** * * * * * * * * * * |
| * * ***** * * * * * * * * ****** * * * * * * * * * * |
1x +-++-+*+++*-++*+++*++++*+-+*+++*-++*+++*-++*+++*+++*++-*++++*-++*****+++*++-*+++*++-*+++*+-+*++++*+++*++-*+++*+-++-+
| * * * * * * * * * * * * * * *+++* * * * * * * * * * * |
| * * * * * * * * * * * * * * * * * * * * * * * * * * |
| * * * * * * * * * * * * * * * * * * * * * * * * * * |
0.9x +-+---*****---*****----*****---*****---*****---*****---******---*****---*****---*****---*****----*****---*****---+-+
astar bzip2 gcc gobmk h264ref hmmlibquantum mcf omnetpperlbench sjengxalancbmk hmean
png: http://imgur.com/R0FXKxP
- SPECint06 (train set), x86_64-linux-user. Host: Intel i7-4790K @ 4.00GHz
1.7x +-+--------------------------------------------------------------------------------------------------------------+-+
| |
| jr |
1.6x +-+...............................................................................................+++............+-+
| ***** |
| *+++* |
| * * |
1.5x +-+..............................................................................................*...*...........+-+
| +++ * * |
| ***** * * |
1.4x +-+.....................................................................*+++*....................*...*...........+-+
| * * * * |
| ***** * * * * |
| * * * * ***** * * |
1.3x +-+....................................*...*............................*...*...*...*............*...*...........+-+
| +++ * * * * * * * * |
| ***** * * * * * * ***** * * |
1.2x +-+....................*...*...........*...*............................*...*...*...*...*+++*....*...*...*****...+-+
| * * * * * * * * * * * * *+++* |
| ***** * * ***** * * * * * * * * * * * * |
| * * * * *+++* * * * * * * * * * * * * |
1.1x +-+...*...*............*...*...*...*...*...*............................*...*...*...*...*...*....*...*...*...*...+-+
| * * ***** * * * * * * ***** * * * * * * * * * * |
| * * * * * * * * * * +++ ****** *+++* * * * * * * * * * * |
1x +-+---*****---*****----*****---*****---*****---*****---******---*****---*****---*****---*****----*****---*****---+-+
astar bzip2 gcc gobmk h264ref hmmlibquantum mcf omnetpperlbench sjengxalancbmk hmean
png: http://imgur.com/DXzwyLP
Signed-off-by: Emilio G. Cota <cota@braap.org>
---
target/arm/translate-a64.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 5b691fc..46cb6c5 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -11360,8 +11360,7 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb)
gen_a64_set_pc_im(dc->pc);
/* fall through */
case DISAS_JUMP:
- /* indicate that the hash table must be used to find the next TB */
- tcg_gen_exit_tb(0);
+ tcg_gen_lookup_and_goto_ptr(cpu_pc);
break;
case DISAS_TB_JUMP:
case DISAS_EXC:
--
2.7.4
On Fri, Apr 28, 2017 at 15:17:25 -0400, Emilio G. Cota wrote: > Measurements: (snip) > - SPECint06 (test set), x86_64-linux-user. Host: Intel i7-4790K @ 4.00GHz (snip) > - SPECint06 (train set), x86_64-linux-user. Host: Intel i7-4790K @ 4.00GHz s/x86_64/aarch64/ , obviously. E.
These aarch64 patches fail for me like so:
$ ../bld/aarch64-softmmu/qemu-system-aarch64 -M virt -cpu cortex-a57 \
-m 1024 -nographic -kernel ./aarch64-linux-3.15rc2-buildroot.img \
-append console=ttyAMA0
qemu-system-aarch64: /home/rth/work/qemu/qemu/cpu-exec.c:599: cpu_loop_exec_tb:
Assertion `use_icount' failed.
r~
On 04/30/2017 11:47 AM, Richard Henderson wrote: > These aarch64 patches fail for me like so: > > $ ../bld/aarch64-softmmu/qemu-system-aarch64 -M virt -cpu cortex-a57 \ > -m 1024 -nographic -kernel ./aarch64-linux-3.15rc2-buildroot.img \ > -append console=ttyAMA0 > qemu-system-aarch64: /home/rth/work/qemu/qemu/cpu-exec.c:599: cpu_loop_exec_tb: > Assertion `use_icount' failed. Bah. Nevermind, this is my fault. r~
Please find patches to support cross-tb optimizations on MIPS hosts and to implement cross-tb optimizations for MIPS target. Aurelien Jarno (3): tcg/mips: implement goto_ptr target/mips: optimize cross-page direct jumps in softmmu target/mips: optimize indirect branches target/mips/translate.c | 4 ++-- tcg/mips/tcg-target.h | 2 +- tcg/mips/tcg-target.inc.c | 13 +++++++++++++ 3 files changed, 16 insertions(+), 3 deletions(-) -- 2.11.0
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
tcg/mips/tcg-target.h | 2 +-
tcg/mips/tcg-target.inc.c | 13 +++++++++++++
2 files changed, 14 insertions(+), 1 deletion(-)
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index e3240cfba7..d75cb63ed3 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -130,7 +130,7 @@ extern bool use_mips32r2_instructions;
#define TCG_TARGET_HAS_muluh_i32 1
#define TCG_TARGET_HAS_mulsh_i32 1
#define TCG_TARGET_HAS_bswap32_i32 1
-#define TCG_TARGET_HAS_goto_ptr 0
+#define TCG_TARGET_HAS_goto_ptr 1
#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_add2_i32 0
diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c
index 01ac7b2c81..9e5b9f42da 100644
--- a/tcg/mips/tcg-target.inc.c
+++ b/tcg/mips/tcg-target.inc.c
@@ -1747,6 +1747,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_nop(s);
s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
break;
+ case INDEX_op_goto_ptr:
+ /* jmp to the given host address (could be epilogue) */
+ tcg_out_opc_reg(s, OPC_JR, 0, a0, 0);
+ tcg_out_nop(s);
+ break;
case INDEX_op_br:
tcg_out_brcond(s, TCG_COND_EQ, TCG_REG_ZERO, TCG_REG_ZERO,
arg_label(a0));
@@ -2160,6 +2165,7 @@ static const TCGTargetOpDef mips_op_defs[] = {
{ INDEX_op_exit_tb, { } },
{ INDEX_op_goto_tb, { } },
{ INDEX_op_br, { } },
+ { INDEX_op_goto_ptr, { "r" } },
{ INDEX_op_ld8u_i32, { "r", "r" } },
{ INDEX_op_ld8s_i32, { "r", "r" } },
@@ -2451,6 +2457,13 @@ static void tcg_target_qemu_prologue(TCGContext *s)
/* delay slot */
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
+ /*
+ * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
+ * and fall through to the rest of the epilogue.
+ */
+ s->code_gen_epilogue = s->code_ptr;
+ tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_V0, TCG_REG_ZERO);
+
/* TB epilogue */
tb_ret_addr = s->code_ptr;
for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
--
2.11.0
On 04/30/2017 11:52 AM, Aurelien Jarno wrote:
> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
> ---
> tcg/mips/tcg-target.h | 2 +-
> tcg/mips/tcg-target.inc.c | 13 +++++++++++++
> 2 files changed, 14 insertions(+), 1 deletion(-)
>
> diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
> index e3240cfba7..d75cb63ed3 100644
> --- a/tcg/mips/tcg-target.h
> +++ b/tcg/mips/tcg-target.h
> @@ -130,7 +130,7 @@ extern bool use_mips32r2_instructions;
> #define TCG_TARGET_HAS_muluh_i32 1
> #define TCG_TARGET_HAS_mulsh_i32 1
> #define TCG_TARGET_HAS_bswap32_i32 1
> -#define TCG_TARGET_HAS_goto_ptr 0
> +#define TCG_TARGET_HAS_goto_ptr 1
>
> #if TCG_TARGET_REG_BITS == 64
> #define TCG_TARGET_HAS_add2_i32 0
> diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c
> index 01ac7b2c81..9e5b9f42da 100644
> --- a/tcg/mips/tcg-target.inc.c
> +++ b/tcg/mips/tcg-target.inc.c
> @@ -1747,6 +1747,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
> tcg_out_nop(s);
> s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
> break;
> + case INDEX_op_goto_ptr:
> + /* jmp to the given host address (could be epilogue) */
> + tcg_out_opc_reg(s, OPC_JR, 0, a0, 0);
> + tcg_out_nop(s);
> + break;
> case INDEX_op_br:
> tcg_out_brcond(s, TCG_COND_EQ, TCG_REG_ZERO, TCG_REG_ZERO,
> arg_label(a0));
> @@ -2160,6 +2165,7 @@ static const TCGTargetOpDef mips_op_defs[] = {
> { INDEX_op_exit_tb, { } },
> { INDEX_op_goto_tb, { } },
> { INDEX_op_br, { } },
> + { INDEX_op_goto_ptr, { "r" } },
>
> { INDEX_op_ld8u_i32, { "r", "r" } },
> { INDEX_op_ld8s_i32, { "r", "r" } },
> @@ -2451,6 +2457,13 @@ static void tcg_target_qemu_prologue(TCGContext *s)
> /* delay slot */
> tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
>
> + /*
> + * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
> + * and fall through to the rest of the epilogue.
> + */
> + s->code_gen_epilogue = s->code_ptr;
> + tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_V0, TCG_REG_ZERO);
> +
> /* TB epilogue */
> tb_ret_addr = s->code_ptr;
> for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
>
On 04/30/2017 04:52 PM, Aurelien Jarno wrote: > + /* jmp to the given host address (could be epilogue) */ > + tcg_out_opc_reg(s, OPC_JR, 0, a0, 0); > + tcg_out_nop(s); Any particular reason not to do the zeroing in the delay slot... > + s->code_gen_epilogue = s->code_ptr; > + tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_V0, TCG_REG_ZERO); ... instead of here? r~
On 2017-05-02 18:21, Richard Henderson wrote: > On 04/30/2017 04:52 PM, Aurelien Jarno wrote: > > + /* jmp to the given host address (could be epilogue) */ > > + tcg_out_opc_reg(s, OPC_JR, 0, a0, 0); > > + tcg_out_nop(s); > > Any particular reason not to do the zeroing in the delay slot... > > > + s->code_gen_epilogue = s->code_ptr; > > + tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_V0, TCG_REG_ZERO); > > ... instead of here? There is no particular reason in the current usage of goto_ptr. It's just that in the future we might want to use code_gen_epilogue for other reasons or use the tcg_out_opc_reg to do other things. It's probably better to have a consistent behaviour across all TCG targets. That said if you prefer, I am find sending a v2 with the zeroing moved to the delay slot. -- Aurelien Jarno GPG: 4096R/1DDD8C9B aurelien@aurel32.net http://www.aurel32.net
Cc: Yongbok Kim <yongbok.kim@imgtec.com>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target/mips/translate.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/target/mips/translate.c b/target/mips/translate.c
index 3022f349cb..1a7ac07c67 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -4233,7 +4233,7 @@ static inline void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
save_cpu_state(ctx, 0);
gen_helper_raise_exception_debug(cpu_env);
}
- tcg_gen_exit_tb(0);
+ tcg_gen_lookup_and_goto_ptr(cpu_PC);
}
}
--
2.11.0
Cc: Yongbok Kim <yongbok.kim@imgtec.com>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target/mips/translate.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/target/mips/translate.c b/target/mips/translate.c
index 1a7ac07c67..559f8fed89 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -10725,7 +10725,7 @@ static void gen_branch(DisasContext *ctx, int insn_bytes)
save_cpu_state(ctx, 0);
gen_helper_raise_exception_debug(cpu_env);
}
- tcg_gen_exit_tb(0);
+ tcg_gen_lookup_and_goto_ptr(cpu_PC);
break;
default:
fprintf(stderr, "unknown branch 0x%x\n", proc_hflags);
--
2.11.0
© 2016 - 2026 Red Hat, Inc.