1 | I don't have anything else queued up at the moment, so this is just | 1 | Hi; here's a target-arm pullreq. Mostly this is some decodetree |
---|---|---|---|
2 | Richard's SME patches. | 2 | conversion patches from me, plus a scattering of other bug fixes. |
3 | 3 | ||
4 | thanks | ||
4 | -- PMM | 5 | -- PMM |
5 | 6 | ||
6 | The following changes since commit 63b38f6c85acd312c2cab68554abf33adf4ee2b3: | 7 | The following changes since commit e3660cc1e3cb136af50c0eaaeac27943c2438d1d: |
7 | 8 | ||
8 | Merge tag 'pull-target-arm-20220707' of https://git.linaro.org/people/pmaydell/qemu-arm into staging (2022-07-08 06:17:11 +0530) | 9 | Merge tag 'pull-loongarch-20230616' of https://gitlab.com/gaosong/qemu into staging (2023-06-16 12:30:16 +0200) |
9 | 10 | ||
10 | are available in the Git repository at: | 11 | are available in the Git repository at: |
11 | 12 | ||
12 | https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20220711 | 13 | https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20230619 |
13 | 14 | ||
14 | for you to fetch changes up to f9982ceaf26df27d15547a3a7990a95019e9e3a8: | 15 | for you to fetch changes up to 074259c0f2ac40042dce766d870318cc22f388eb: |
15 | 16 | ||
16 | linux-user/aarch64: Add SME related hwcap entries (2022-07-11 13:43:52 +0100) | 17 | hw/misc/bcm2835_property: Handle CORE_CLK_ID firmware property (2023-06-19 15:27:21 +0100) |
17 | 18 | ||
18 | ---------------------------------------------------------------- | 19 | ---------------------------------------------------------------- |
19 | target-arm: | 20 | target-arm queue: |
20 | * Implement SME emulation, for both system and linux-user | 21 | * Fix return value from LDSMIN/LDSMAX 8/16 bit atomics |
22 | * Return correct result for LDG when ATA=0 | ||
23 | * Conversion of system insns, loads and stores to decodetree | ||
24 | * hw/intc/allwinner-a10-pic: Handle IRQ levels other than 0 or 1 | ||
25 | * hw/sd/allwinner-sdhost: Don't send non-boolean IRQ line levels | ||
26 | * hw/timer/nrf51_timer: Don't lose time when timer is queried in tight loop | ||
27 | * hw/arm/Kconfig: sbsa-ref uses Bochs display | ||
28 | * imx_serial: set wake bit when we receive a data byte | ||
29 | * docs: sbsa: document board to firmware interface | ||
30 | * hw/misc/bcm2835_property: avoid hard-coded constants | ||
21 | 31 | ||
22 | ---------------------------------------------------------------- | 32 | ---------------------------------------------------------------- |
23 | Richard Henderson (45): | 33 | Marcin Juszkiewicz (2): |
24 | target/arm: Handle SME in aarch64_cpu_dump_state | 34 | hw/arm/Kconfig: sbsa-ref uses Bochs display |
25 | target/arm: Add infrastructure for disas_sme | 35 | docs: sbsa: document board to firmware interface |
26 | target/arm: Trap non-streaming usage when Streaming SVE is active | ||
27 | target/arm: Mark ADR as non-streaming | ||
28 | target/arm: Mark RDFFR, WRFFR, SETFFR as non-streaming | ||
29 | target/arm: Mark BDEP, BEXT, BGRP, COMPACT, FEXPA, FTSSEL as non-streaming | ||
30 | target/arm: Mark PMULL, FMMLA as non-streaming | ||
31 | target/arm: Mark FTSMUL, FTMAD, FADDA as non-streaming | ||
32 | target/arm: Mark SMMLA, UMMLA, USMMLA as non-streaming | ||
33 | target/arm: Mark string/histo/crypto as non-streaming | ||
34 | target/arm: Mark gather/scatter load/store as non-streaming | ||
35 | target/arm: Mark gather prefetch as non-streaming | ||
36 | target/arm: Mark LDFF1 and LDNF1 as non-streaming | ||
37 | target/arm: Mark LD1RO as non-streaming | ||
38 | target/arm: Add SME enablement checks | ||
39 | target/arm: Handle SME in sve_access_check | ||
40 | target/arm: Implement SME RDSVL, ADDSVL, ADDSPL | ||
41 | target/arm: Implement SME ZERO | ||
42 | target/arm: Implement SME MOVA | ||
43 | target/arm: Implement SME LD1, ST1 | ||
44 | target/arm: Export unpredicated ld/st from translate-sve.c | ||
45 | target/arm: Implement SME LDR, STR | ||
46 | target/arm: Implement SME ADDHA, ADDVA | ||
47 | target/arm: Implement FMOPA, FMOPS (non-widening) | ||
48 | target/arm: Implement BFMOPA, BFMOPS | ||
49 | target/arm: Implement FMOPA, FMOPS (widening) | ||
50 | target/arm: Implement SME integer outer product | ||
51 | target/arm: Implement PSEL | ||
52 | target/arm: Implement REVD | ||
53 | target/arm: Implement SCLAMP, UCLAMP | ||
54 | target/arm: Reset streaming sve state on exception boundaries | ||
55 | target/arm: Enable SME for -cpu max | ||
56 | linux-user/aarch64: Clear tpidr2_el0 if CLONE_SETTLS | ||
57 | linux-user/aarch64: Reset PSTATE.SM on syscalls | ||
58 | linux-user/aarch64: Add SM bit to SVE signal context | ||
59 | linux-user/aarch64: Tidy target_restore_sigframe error return | ||
60 | linux-user/aarch64: Do not allow duplicate or short sve records | ||
61 | linux-user/aarch64: Verify extra record lock succeeded | ||
62 | linux-user/aarch64: Move sve record checks into restore | ||
63 | linux-user/aarch64: Implement SME signal handling | ||
64 | linux-user: Rename sve prctls | ||
65 | linux-user/aarch64: Implement PR_SME_GET_VL, PR_SME_SET_VL | ||
66 | target/arm: Only set ZEN in reset if SVE present | ||
67 | target/arm: Enable SME for user-only | ||
68 | linux-user/aarch64: Add SME related hwcap entries | ||
69 | 36 | ||
70 | docs/system/arm/emulation.rst | 4 + | 37 | Martin Kaiser (1): |
71 | linux-user/aarch64/target_cpu.h | 5 +- | 38 | imx_serial: set wake bit when we receive a data byte |
72 | linux-user/aarch64/target_prctl.h | 62 +- | 39 | |
73 | target/arm/cpu.h | 7 + | 40 | Peter Maydell (26): |
74 | target/arm/helper-sme.h | 126 ++++ | 41 | target/arm: Fix return value from LDSMIN/LDSMAX 8/16 bit atomics |
75 | target/arm/helper-sve.h | 4 + | 42 | target/arm: Return correct result for LDG when ATA=0 |
76 | target/arm/helper.h | 18 + | 43 | target/arm: Pass memop to gen_mte_check1_mmuidx() in reg_imm9 decode |
77 | target/arm/translate-a64.h | 45 ++ | 44 | target/arm: Consistently use finalize_memop_asimd() for ASIMD loads/stores |
78 | target/arm/translate.h | 16 + | 45 | target/arm: Convert hint instruction space to decodetree |
79 | target/arm/sme-fa64.decode | 60 ++ | 46 | target/arm: Convert barrier insns to decodetree |
80 | target/arm/sme.decode | 88 +++ | 47 | target/arm: Convert CFINV, XAFLAG and AXFLAG to decodetree |
81 | target/arm/sve.decode | 41 +- | 48 | target/arm: Convert MSR (immediate) to decodetree |
82 | linux-user/aarch64/cpu_loop.c | 9 + | 49 | target/arm: Convert MSR (reg), MRS, SYS, SYSL to decodetree |
83 | linux-user/aarch64/signal.c | 243 ++++++-- | 50 | target/arm: Convert exception generation instructions to decodetree |
84 | linux-user/elfload.c | 20 + | 51 | target/arm: Convert load/store exclusive and ordered to decodetree |
85 | linux-user/syscall.c | 28 +- | 52 | target/arm: Convert LDXP, STXP, CASP, CAS to decodetree |
86 | target/arm/cpu.c | 35 +- | 53 | target/arm: Convert load reg (literal) group to decodetree |
87 | target/arm/cpu64.c | 11 + | 54 | target/arm: Convert load/store-pair to decodetree |
88 | target/arm/helper.c | 56 +- | 55 | target/arm: Convert ld/st reg+imm9 insns to decodetree |
89 | target/arm/sme_helper.c | 1140 +++++++++++++++++++++++++++++++++++++ | 56 | target/arm: Convert LDR/STR with 12-bit immediate to decodetree |
90 | target/arm/sve_helper.c | 28 + | 57 | target/arm: Convert LDR/STR reg+reg to decodetree |
91 | target/arm/translate-a64.c | 103 +++- | 58 | target/arm: Convert atomic memory ops to decodetree |
92 | target/arm/translate-sme.c | 373 ++++++++++++ | 59 | target/arm: Convert load (pointer auth) insns to decodetree |
93 | target/arm/translate-sve.c | 393 ++++++++++--- | 60 | target/arm: Convert LDAPR/STLR (imm) to decodetree |
94 | target/arm/translate-vfp.c | 12 + | 61 | target/arm: Convert load/store (multiple structures) to decodetree |
95 | target/arm/translate.c | 2 + | 62 | target/arm: Convert load/store single structure to decodetree |
96 | target/arm/vec_helper.c | 24 + | 63 | target/arm: Convert load/store tags insns to decodetree |
97 | target/arm/meson.build | 3 + | 64 | hw/intc/allwinner-a10-pic: Handle IRQ levels other than 0 or 1 |
98 | 28 files changed, 2821 insertions(+), 135 deletions(-) | 65 | hw/sd/allwinner-sdhost: Don't send non-boolean IRQ line levels |
99 | create mode 100644 target/arm/sme-fa64.decode | 66 | hw/timer/nrf51_timer: Don't lose time when timer is queried in tight loop |
100 | create mode 100644 target/arm/sme.decode | 67 | |
101 | create mode 100644 target/arm/translate-sme.c | 68 | Sergey Kambalin (4): |
69 | hw/arm/raspi: Import Linux raspi definitions as 'raspberrypi-fw-defs.h' | ||
70 | hw/misc/bcm2835_property: Use 'raspberrypi-fw-defs.h' definitions | ||
71 | hw/misc/bcm2835_property: Replace magic frequency values by definitions | ||
72 | hw/misc/bcm2835_property: Handle CORE_CLK_ID firmware property | ||
73 | |||
74 | docs/system/arm/sbsa.rst | 38 +- | ||
75 | include/hw/arm/raspi_platform.h | 10 + | ||
76 | include/hw/char/imx_serial.h | 1 + | ||
77 | include/hw/misc/raspberrypi-fw-defs.h | 163 ++ | ||
78 | target/arm/tcg/a64.decode | 403 ++++ | ||
79 | hw/char/imx_serial.c | 5 +- | ||
80 | hw/intc/allwinner-a10-pic.c | 2 +- | ||
81 | hw/misc/bcm2835_property.c | 112 +- | ||
82 | hw/sd/allwinner-sdhost.c | 2 +- | ||
83 | hw/timer/nrf51_timer.c | 7 +- | ||
84 | target/arm/tcg/translate-a64.c | 3319 +++++++++++++++------------------ | ||
85 | hw/arm/Kconfig | 1 + | ||
86 | 12 files changed, 2157 insertions(+), 1906 deletions(-) | ||
87 | create mode 100644 include/hw/misc/raspberrypi-fw-defs.h | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | Dump SVCR, plus use the correct access check for Streaming Mode. | ||
4 | |||
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Message-id: 20220708151540.18136-2-richard.henderson@linaro.org | ||
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
9 | --- | ||
10 | target/arm/cpu.c | 17 ++++++++++++++++- | ||
11 | 1 file changed, 16 insertions(+), 1 deletion(-) | ||
12 | |||
13 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/target/arm/cpu.c | ||
16 | +++ b/target/arm/cpu.c | ||
17 | @@ -XXX,XX +XXX,XX @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags) | ||
18 | int i; | ||
19 | int el = arm_current_el(env); | ||
20 | const char *ns_status; | ||
21 | + bool sve; | ||
22 | |||
23 | qemu_fprintf(f, " PC=%016" PRIx64 " ", env->pc); | ||
24 | for (i = 0; i < 32; i++) { | ||
25 | @@ -XXX,XX +XXX,XX @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags) | ||
26 | el, | ||
27 | psr & PSTATE_SP ? 'h' : 't'); | ||
28 | |||
29 | + if (cpu_isar_feature(aa64_sme, cpu)) { | ||
30 | + qemu_fprintf(f, " SVCR=%08" PRIx64 " %c%c", | ||
31 | + env->svcr, | ||
32 | + (FIELD_EX64(env->svcr, SVCR, ZA) ? 'Z' : '-'), | ||
33 | + (FIELD_EX64(env->svcr, SVCR, SM) ? 'S' : '-')); | ||
34 | + } | ||
35 | if (cpu_isar_feature(aa64_bti, cpu)) { | ||
36 | qemu_fprintf(f, " BTYPE=%d", (psr & PSTATE_BTYPE) >> 10); | ||
37 | } | ||
38 | @@ -XXX,XX +XXX,XX @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags) | ||
39 | qemu_fprintf(f, " FPCR=%08x FPSR=%08x\n", | ||
40 | vfp_get_fpcr(env), vfp_get_fpsr(env)); | ||
41 | |||
42 | - if (cpu_isar_feature(aa64_sve, cpu) && sve_exception_el(env, el) == 0) { | ||
43 | + if (cpu_isar_feature(aa64_sme, cpu) && FIELD_EX64(env->svcr, SVCR, SM)) { | ||
44 | + sve = sme_exception_el(env, el) == 0; | ||
45 | + } else if (cpu_isar_feature(aa64_sve, cpu)) { | ||
46 | + sve = sve_exception_el(env, el) == 0; | ||
47 | + } else { | ||
48 | + sve = false; | ||
49 | + } | ||
50 | + | ||
51 | + if (sve) { | ||
52 | int j, zcr_len = sve_vqm1_for_el(env, el); | ||
53 | |||
54 | for (i = 0; i <= FFR_PRED_NUM; i++) { | ||
55 | -- | ||
56 | 2.25.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | The atomic memory operations are supposed to return the old memory |
---|---|---|---|
2 | data value in the destination register. This value is not | ||
3 | sign-extended, even if the operation is the signed minimum or | ||
4 | maximum. (In the pseudocode for the instructions the returned data | ||
5 | value is passed to ZeroExtend() to create the value in the register.) | ||
2 | 6 | ||
3 | Fold the return value setting into the goto, so each | 7 | We got this wrong because we were doing a 32-to-64 zero extend on the |
4 | point of failure need not do both. | 8 | result for 8 and 16 bit data values, rather than the correct amount |
9 | of zero extension. | ||
5 | 10 | ||
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 11 | Fix the bug by using ext8u and ext16u for the MO_8 and MO_16 data |
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 12 | sizes rather than ext32u. |
8 | Message-id: 20220708151540.18136-37-richard.henderson@linaro.org | 13 | |
14 | Cc: qemu-stable@nongnu.org | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
16 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
17 | Message-id: 20230602155223.2040685-2-peter.maydell@linaro.org | ||
10 | --- | 18 | --- |
11 | linux-user/aarch64/signal.c | 26 +++++++++++--------------- | 19 | target/arm/tcg/translate-a64.c | 18 ++++++++++++++++-- |
12 | 1 file changed, 11 insertions(+), 15 deletions(-) | 20 | 1 file changed, 16 insertions(+), 2 deletions(-) |
13 | 21 | ||
14 | diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c | 22 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
15 | index XXXXXXX..XXXXXXX 100644 | 23 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/linux-user/aarch64/signal.c | 24 | --- a/target/arm/tcg/translate-a64.c |
17 | +++ b/linux-user/aarch64/signal.c | 25 | +++ b/target/arm/tcg/translate-a64.c |
18 | @@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env, | 26 | @@ -XXX,XX +XXX,XX @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, |
19 | struct target_sve_context *sve = NULL; | 27 | */ |
20 | uint64_t extra_datap = 0; | 28 | fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop); |
21 | bool used_extra = false; | 29 | |
22 | - bool err = false; | 30 | - if ((mop & MO_SIGN) && size != MO_64) { |
23 | int vq = 0, sve_size = 0; | 31 | - tcg_gen_ext32u_i64(tcg_rt, tcg_rt); |
24 | 32 | + if (mop & MO_SIGN) { | |
25 | target_restore_general_frame(env, sf); | 33 | + switch (size) { |
26 | @@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env, | 34 | + case MO_8: |
27 | switch (magic) { | 35 | + tcg_gen_ext8u_i64(tcg_rt, tcg_rt); |
28 | case 0: | 36 | + break; |
29 | if (size != 0) { | 37 | + case MO_16: |
30 | - err = true; | 38 | + tcg_gen_ext16u_i64(tcg_rt, tcg_rt); |
31 | - goto exit; | 39 | + break; |
32 | + goto err; | 40 | + case MO_32: |
33 | } | 41 | + tcg_gen_ext32u_i64(tcg_rt, tcg_rt); |
34 | if (used_extra) { | 42 | + break; |
35 | ctx = NULL; | 43 | + case MO_64: |
36 | @@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env, | 44 | + break; |
37 | 45 | + default: | |
38 | case TARGET_FPSIMD_MAGIC: | 46 | + g_assert_not_reached(); |
39 | if (fpsimd || size != sizeof(struct target_fpsimd_context)) { | 47 | + } |
40 | - err = true; | ||
41 | - goto exit; | ||
42 | + goto err; | ||
43 | } | ||
44 | fpsimd = (struct target_fpsimd_context *)ctx; | ||
45 | break; | ||
46 | @@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env, | ||
47 | break; | ||
48 | } | ||
49 | } | ||
50 | - err = true; | ||
51 | - goto exit; | ||
52 | + goto err; | ||
53 | |||
54 | case TARGET_EXTRA_MAGIC: | ||
55 | if (extra || size != sizeof(struct target_extra_context)) { | ||
56 | - err = true; | ||
57 | - goto exit; | ||
58 | + goto err; | ||
59 | } | ||
60 | __get_user(extra_datap, | ||
61 | &((struct target_extra_context *)ctx)->datap); | ||
62 | @@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env, | ||
63 | /* Unknown record -- we certainly didn't generate it. | ||
64 | * Did we in fact get out of sync? | ||
65 | */ | ||
66 | - err = true; | ||
67 | - goto exit; | ||
68 | + goto err; | ||
69 | } | ||
70 | ctx = (void *)ctx + size; | ||
71 | } | 48 | } |
72 | @@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env, | ||
73 | if (fpsimd) { | ||
74 | target_restore_fpsimd_record(env, fpsimd); | ||
75 | } else { | ||
76 | - err = true; | ||
77 | + goto err; | ||
78 | } | ||
79 | |||
80 | /* SVE data, if present, overwrites FPSIMD data. */ | ||
81 | if (sve) { | ||
82 | target_restore_sve_record(env, sve, vq); | ||
83 | } | ||
84 | - | ||
85 | - exit: | ||
86 | unlock_user(extra, extra_datap, 0); | ||
87 | - return err; | ||
88 | + return 0; | ||
89 | + | ||
90 | + err: | ||
91 | + unlock_user(extra, extra_datap, 0); | ||
92 | + return 1; | ||
93 | } | 49 | } |
94 | 50 | ||
95 | static abi_ulong get_sigframe(struct target_sigaction *ka, | ||
96 | -- | 51 | -- |
97 | 2.25.1 | 52 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | The LDG instruction loads the tag from a memory address (identified |
---|---|---|---|
2 | by [Xn + offset]), and then merges that tag into the destination | ||
3 | register Xt. We implemented this correctly for the case when | ||
4 | allocation tags are enabled, but didn't get it right when ATA=0: | ||
5 | instead of merging the tag bits into Xt, we merged them into the | ||
6 | memory address [Xn + offset] and then set Xt to that. | ||
2 | 7 | ||
3 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Merge the tag bits into the old Xt value, as they should be. |
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 9 | |
5 | Message-id: 20220708151540.18136-46-richard.henderson@linaro.org | 10 | Cc: qemu-stable@nongnu.org |
11 | Fixes: c15294c1e36a7dd9b25 ("target/arm: Implement LDG, STG, ST2G instructions") | ||
12 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
7 | --- | 14 | --- |
8 | linux-user/elfload.c | 20 ++++++++++++++++++++ | 15 | target/arm/tcg/translate-a64.c | 6 +++++- |
9 | 1 file changed, 20 insertions(+) | 16 | 1 file changed, 5 insertions(+), 1 deletion(-) |
10 | 17 | ||
11 | diff --git a/linux-user/elfload.c b/linux-user/elfload.c | 18 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
12 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/linux-user/elfload.c | 20 | --- a/target/arm/tcg/translate-a64.c |
14 | +++ b/linux-user/elfload.c | 21 | +++ b/target/arm/tcg/translate-a64.c |
15 | @@ -XXX,XX +XXX,XX @@ enum { | 22 | @@ -XXX,XX +XXX,XX @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn) |
16 | ARM_HWCAP2_A64_RNG = 1 << 16, | 23 | if (s->ata) { |
17 | ARM_HWCAP2_A64_BTI = 1 << 17, | 24 | gen_helper_ldg(tcg_rt, cpu_env, addr, tcg_rt); |
18 | ARM_HWCAP2_A64_MTE = 1 << 18, | 25 | } else { |
19 | + ARM_HWCAP2_A64_ECV = 1 << 19, | 26 | + /* |
20 | + ARM_HWCAP2_A64_AFP = 1 << 20, | 27 | + * Tag access disabled: we must check for aborts on the load |
21 | + ARM_HWCAP2_A64_RPRES = 1 << 21, | 28 | + * load from [rn+offset], and then insert a 0 tag into rt. |
22 | + ARM_HWCAP2_A64_MTE3 = 1 << 22, | 29 | + */ |
23 | + ARM_HWCAP2_A64_SME = 1 << 23, | 30 | clean_addr = clean_data_tbi(s, addr); |
24 | + ARM_HWCAP2_A64_SME_I16I64 = 1 << 24, | 31 | gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8); |
25 | + ARM_HWCAP2_A64_SME_F64F64 = 1 << 25, | 32 | - gen_address_with_allocation_tag0(tcg_rt, addr); |
26 | + ARM_HWCAP2_A64_SME_I8I32 = 1 << 26, | 33 | + gen_address_with_allocation_tag0(tcg_rt, tcg_rt); |
27 | + ARM_HWCAP2_A64_SME_F16F32 = 1 << 27, | 34 | } |
28 | + ARM_HWCAP2_A64_SME_B16F32 = 1 << 28, | 35 | } else { |
29 | + ARM_HWCAP2_A64_SME_F32F32 = 1 << 29, | 36 | tcg_rt = cpu_reg_sp(s, rt); |
30 | + ARM_HWCAP2_A64_SME_FA64 = 1 << 30, | ||
31 | }; | ||
32 | |||
33 | #define ELF_HWCAP get_elf_hwcap() | ||
34 | @@ -XXX,XX +XXX,XX @@ static uint32_t get_elf_hwcap2(void) | ||
35 | GET_FEATURE_ID(aa64_rndr, ARM_HWCAP2_A64_RNG); | ||
36 | GET_FEATURE_ID(aa64_bti, ARM_HWCAP2_A64_BTI); | ||
37 | GET_FEATURE_ID(aa64_mte, ARM_HWCAP2_A64_MTE); | ||
38 | + GET_FEATURE_ID(aa64_sme, (ARM_HWCAP2_A64_SME | | ||
39 | + ARM_HWCAP2_A64_SME_F32F32 | | ||
40 | + ARM_HWCAP2_A64_SME_B16F32 | | ||
41 | + ARM_HWCAP2_A64_SME_F16F32 | | ||
42 | + ARM_HWCAP2_A64_SME_I8I32)); | ||
43 | + GET_FEATURE_ID(aa64_sme_f64f64, ARM_HWCAP2_A64_SME_F64F64); | ||
44 | + GET_FEATURE_ID(aa64_sme_i16i64, ARM_HWCAP2_A64_SME_I16I64); | ||
45 | + GET_FEATURE_ID(aa64_sme_fa64, ARM_HWCAP2_A64_SME_FA64); | ||
46 | |||
47 | return hwcaps; | ||
48 | } | ||
49 | -- | 37 | -- |
50 | 2.25.1 | 38 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | In disas_ldst_reg_imm9() we missed one place where a call to |
---|---|---|---|
2 | a gen_mte_check* function should now be passed the memop we | ||
3 | have created rather than just being passed the size. Fix this. | ||
2 | 4 | ||
3 | Enable SME, TPIDR2_EL0, and FA64 if supported by the cpu. | 5 | Fixes: 0a9091424d ("target/arm: Pass memop to gen_mte_check1*") |
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
9 | --- | ||
10 | target/arm/tcg/translate-a64.c | 2 +- | ||
11 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
4 | 12 | ||
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 13 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 14 | index XXXXXXX..XXXXXXX 100644 |
7 | Message-id: 20220708151540.18136-45-richard.henderson@linaro.org | 15 | --- a/target/arm/tcg/translate-a64.c |
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 16 | +++ b/target/arm/tcg/translate-a64.c |
9 | --- | 17 | @@ -XXX,XX +XXX,XX @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, |
10 | target/arm/cpu.c | 11 +++++++++++ | 18 | |
11 | 1 file changed, 11 insertions(+) | 19 | clean_addr = gen_mte_check1_mmuidx(s, dirty_addr, is_store, |
20 | writeback || rn != 31, | ||
21 | - size, is_unpriv, memidx); | ||
22 | + memop, is_unpriv, memidx); | ||
23 | |||
24 | if (is_vector) { | ||
25 | if (is_store) { | ||
26 | -- | ||
27 | 2.34.1 | ||
12 | 28 | ||
13 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | 29 | |
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/target/arm/cpu.c | ||
16 | +++ b/target/arm/cpu.c | ||
17 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset(DeviceState *dev) | ||
18 | CPACR_EL1, ZEN, 3); | ||
19 | env->vfp.zcr_el[1] = cpu->sve_default_vq - 1; | ||
20 | } | ||
21 | + /* and for SME instructions, with default vector length, and TPIDR2 */ | ||
22 | + if (cpu_isar_feature(aa64_sme, cpu)) { | ||
23 | + env->cp15.sctlr_el[1] |= SCTLR_EnTP2; | ||
24 | + env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1, | ||
25 | + CPACR_EL1, SMEN, 3); | ||
26 | + env->vfp.smcr_el[1] = cpu->sme_default_vq - 1; | ||
27 | + if (cpu_isar_feature(aa64_sme_fa64, cpu)) { | ||
28 | + env->vfp.smcr_el[1] = FIELD_DP64(env->vfp.smcr_el[1], | ||
29 | + SMCR, FA64, 1); | ||
30 | + } | ||
31 | + } | ||
32 | /* | ||
33 | * Enable 48-bit address space (TODO: take reserved_va into account). | ||
34 | * Enable TBI0 but not TBI1. | ||
35 | -- | ||
36 | 2.25.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | In the recent refactoring we missed a few places which should be |
---|---|---|---|
2 | calling finalize_memop_asimd() for ASIMD loads and stores but | ||
3 | instead are just calling finalize_memop(); fix these. | ||
2 | 4 | ||
3 | We can handle both exception entry and exception return by | 5 | For the disas_ldst_single_struct() and disas_ldst_multiple_struct() |
4 | hooking into aarch64_sve_change_el. | 6 | cases, this is not a behaviour change because there the size |
7 | is never MO_128 and the two finalize functions do the same thing. | ||
5 | 8 | ||
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-id: 20220708151540.18136-32-richard.henderson@linaro.org | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
10 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | --- | 11 | --- |
11 | target/arm/helper.c | 15 +++++++++++++-- | 12 | target/arm/tcg/translate-a64.c | 10 ++++++---- |
12 | 1 file changed, 13 insertions(+), 2 deletions(-) | 13 | 1 file changed, 6 insertions(+), 4 deletions(-) |
13 | 14 | ||
14 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 15 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
15 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/target/arm/helper.c | 17 | --- a/target/arm/tcg/translate-a64.c |
17 | +++ b/target/arm/helper.c | 18 | +++ b/target/arm/tcg/translate-a64.c |
18 | @@ -XXX,XX +XXX,XX @@ void aarch64_sve_change_el(CPUARMState *env, int old_el, | 19 | @@ -XXX,XX +XXX,XX @@ static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn, |
19 | return; | 20 | if (!fp_access_check(s)) { |
21 | return; | ||
22 | } | ||
23 | + memop = finalize_memop_asimd(s, size); | ||
24 | } else { | ||
25 | if (size == 3 && opc == 2) { | ||
26 | /* PRFM - prefetch */ | ||
27 | @@ -XXX,XX +XXX,XX @@ static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn, | ||
28 | is_store = (opc == 0); | ||
29 | is_signed = !is_store && extract32(opc, 1, 1); | ||
30 | is_extended = (size < 3) && extract32(opc, 0, 1); | ||
31 | + memop = finalize_memop(s, size + is_signed * MO_SIGN); | ||
20 | } | 32 | } |
21 | 33 | ||
22 | + old_a64 = old_el ? arm_el_is_aa64(env, old_el) : el0_a64; | 34 | if (rn == 31) { |
23 | + new_a64 = new_el ? arm_el_is_aa64(env, new_el) : el0_a64; | 35 | @@ -XXX,XX +XXX,XX @@ static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn, |
24 | + | 36 | |
25 | + /* | 37 | tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm); |
26 | + * Both AArch64.TakeException and AArch64.ExceptionReturn | 38 | |
27 | + * invoke ResetSVEState when taking an exception from, or | 39 | - memop = finalize_memop(s, size + is_signed * MO_SIGN); |
28 | + * returning to, AArch32 state when PSTATE.SM is enabled. | 40 | clean_addr = gen_mte_check1(s, dirty_addr, is_store, true, memop); |
29 | + */ | 41 | |
30 | + if (old_a64 != new_a64 && FIELD_EX64(env->svcr, SVCR, SM)) { | 42 | if (is_vector) { |
31 | + arm_reset_sve_state(env); | 43 | @@ -XXX,XX +XXX,XX @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn, |
32 | + return; | 44 | if (!fp_access_check(s)) { |
33 | + } | 45 | return; |
34 | + | 46 | } |
47 | + memop = finalize_memop_asimd(s, size); | ||
48 | } else { | ||
49 | if (size == 3 && opc == 2) { | ||
50 | /* PRFM - prefetch */ | ||
51 | @@ -XXX,XX +XXX,XX @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn, | ||
52 | is_store = (opc == 0); | ||
53 | is_signed = !is_store && extract32(opc, 1, 1); | ||
54 | is_extended = (size < 3) && extract32(opc, 0, 1); | ||
55 | + memop = finalize_memop(s, size + is_signed * MO_SIGN); | ||
56 | } | ||
57 | |||
58 | if (rn == 31) { | ||
59 | @@ -XXX,XX +XXX,XX @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn, | ||
60 | offset = imm12 << size; | ||
61 | tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); | ||
62 | |||
63 | - memop = finalize_memop(s, size + is_signed * MO_SIGN); | ||
64 | clean_addr = gen_mte_check1(s, dirty_addr, is_store, rn != 31, memop); | ||
65 | |||
66 | if (is_vector) { | ||
67 | @@ -XXX,XX +XXX,XX @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) | ||
68 | * promote consecutive little-endian elements below. | ||
69 | */ | ||
70 | clean_addr = gen_mte_checkN(s, tcg_rn, is_store, is_postidx || rn != 31, | ||
71 | - total, finalize_memop(s, size)); | ||
72 | + total, finalize_memop_asimd(s, size)); | ||
73 | |||
35 | /* | 74 | /* |
36 | * DDI0584A.d sec 3.2: "If SVE instructions are disabled or trapped | 75 | * Consecutive little-endian elements from a single register |
37 | * at ELx, or not available because the EL is in AArch32 state, then | 76 | @@ -XXX,XX +XXX,XX @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) |
38 | @@ -XXX,XX +XXX,XX @@ void aarch64_sve_change_el(CPUARMState *env, int old_el, | 77 | total = selem << scale; |
39 | * we already have the correct register contents when encountering the | 78 | tcg_rn = cpu_reg_sp(s, rn); |
40 | * vq0->vq0 transition between EL0->EL1. | 79 | |
41 | */ | 80 | - mop = finalize_memop(s, scale); |
42 | - old_a64 = old_el ? arm_el_is_aa64(env, old_el) : el0_a64; | 81 | + mop = finalize_memop_asimd(s, scale); |
43 | old_len = (old_a64 && !sve_exception_el(env, old_el) | 82 | |
44 | ? sve_vqm1_for_el(env, old_el) : 0); | 83 | clean_addr = gen_mte_checkN(s, tcg_rn, !is_load, is_postidx || rn != 31, |
45 | - new_a64 = new_el ? arm_el_is_aa64(env, new_el) : el0_a64; | 84 | total, mop); |
46 | new_len = (new_a64 && !sve_exception_el(env, new_el) | ||
47 | ? sve_vqm1_for_el(env, new_el) : 0); | ||
48 | |||
49 | -- | 85 | -- |
50 | 2.25.1 | 86 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Convert the various instructions in the hint instruction space |
---|---|---|---|
2 | to decodetree. | ||
2 | 3 | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
4 | Message-id: 20220708151540.18136-27-richard.henderson@linaro.org | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
6 | Message-id: 20230602155223.2040685-3-peter.maydell@linaro.org | ||
7 | --- | 7 | --- |
8 | target/arm/helper-sme.h | 2 ++ | 8 | target/arm/tcg/a64.decode | 31 ++++ |
9 | target/arm/sme.decode | 1 + | 9 | target/arm/tcg/translate-a64.c | 277 ++++++++++++++++++--------------- |
10 | target/arm/sme_helper.c | 74 ++++++++++++++++++++++++++++++++++++++ | 10 | 2 files changed, 185 insertions(+), 123 deletions(-) |
11 | target/arm/translate-sme.c | 1 + | ||
12 | 4 files changed, 78 insertions(+) | ||
13 | 11 | ||
14 | diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h | 12 | diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode |
15 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/target/arm/helper-sme.h | 14 | --- a/target/arm/tcg/a64.decode |
17 | +++ b/target/arm/helper-sme.h | 15 | +++ b/target/arm/tcg/a64.decode |
18 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) | 16 | @@ -XXX,XX +XXX,XX @@ ERETA 1101011 0100 11111 00001 m:1 11111 11111 &reta # ERETAA, ERETAB |
19 | DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) | 17 | # the processor is in halting debug state (which we don't implement). |
20 | DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) | 18 | # The pattern is listed here as documentation. |
21 | 19 | # DRPS 1101011 0101 11111 000000 11111 00000 | |
22 | +DEF_HELPER_FLAGS_7(sme_fmopa_h, TCG_CALL_NO_RWG, | 20 | + |
23 | + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) | 21 | +# Hint instruction group |
24 | DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG, | 22 | +{ |
25 | void, ptr, ptr, ptr, ptr, ptr, ptr, i32) | 23 | + [ |
26 | DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG, | 24 | + YIELD 1101 0101 0000 0011 0010 0000 001 11111 |
27 | diff --git a/target/arm/sme.decode b/target/arm/sme.decode | 25 | + WFE 1101 0101 0000 0011 0010 0000 010 11111 |
26 | + WFI 1101 0101 0000 0011 0010 0000 011 11111 | ||
27 | + # We implement WFE to never block, so our SEV/SEVL are NOPs | ||
28 | + # SEV 1101 0101 0000 0011 0010 0000 100 11111 | ||
29 | + # SEVL 1101 0101 0000 0011 0010 0000 101 11111 | ||
30 | + # Our DGL is a NOP because we don't merge memory accesses anyway. | ||
31 | + # DGL 1101 0101 0000 0011 0010 0000 110 11111 | ||
32 | + XPACLRI 1101 0101 0000 0011 0010 0000 111 11111 | ||
33 | + PACIA1716 1101 0101 0000 0011 0010 0001 000 11111 | ||
34 | + PACIB1716 1101 0101 0000 0011 0010 0001 010 11111 | ||
35 | + AUTIA1716 1101 0101 0000 0011 0010 0001 100 11111 | ||
36 | + AUTIB1716 1101 0101 0000 0011 0010 0001 110 11111 | ||
37 | + ESB 1101 0101 0000 0011 0010 0010 000 11111 | ||
38 | + PACIAZ 1101 0101 0000 0011 0010 0011 000 11111 | ||
39 | + PACIASP 1101 0101 0000 0011 0010 0011 001 11111 | ||
40 | + PACIBZ 1101 0101 0000 0011 0010 0011 010 11111 | ||
41 | + PACIBSP 1101 0101 0000 0011 0010 0011 011 11111 | ||
42 | + AUTIAZ 1101 0101 0000 0011 0010 0011 100 11111 | ||
43 | + AUTIASP 1101 0101 0000 0011 0010 0011 101 11111 | ||
44 | + AUTIBZ 1101 0101 0000 0011 0010 0011 110 11111 | ||
45 | + AUTIBSP 1101 0101 0000 0011 0010 0011 111 11111 | ||
46 | + ] | ||
47 | + # The canonical NOP has CRm == op2 == 0, but all of the space | ||
48 | + # that isn't specifically allocated to an instruction must NOP | ||
49 | + NOP 1101 0101 0000 0011 0010 ---- --- 11111 | ||
50 | +} | ||
51 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | 52 | index XXXXXXX..XXXXXXX 100644 |
29 | --- a/target/arm/sme.decode | 53 | --- a/target/arm/tcg/translate-a64.c |
30 | +++ b/target/arm/sme.decode | 54 | +++ b/target/arm/tcg/translate-a64.c |
31 | @@ -XXX,XX +XXX,XX @@ FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32 | 55 | @@ -XXX,XX +XXX,XX @@ static bool trans_ERETA(DisasContext *s, arg_reta *a) |
32 | FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64 | ||
33 | |||
34 | BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32 | ||
35 | +FMOPA_h 10000001 101 ..... ... ... ..... . 00 .. @op_32 | ||
36 | diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/target/arm/sme_helper.c | ||
39 | +++ b/target/arm/sme_helper.c | ||
40 | @@ -XXX,XX +XXX,XX @@ static inline uint32_t f16mop_adj_pair(uint32_t pair, uint32_t pg, uint32_t neg) | ||
41 | return pair; | ||
42 | } | ||
43 | |||
44 | +static float32 f16_dotadd(float32 sum, uint32_t e1, uint32_t e2, | ||
45 | + float_status *s_std, float_status *s_odd) | ||
46 | +{ | ||
47 | + float64 e1r = float16_to_float64(e1 & 0xffff, true, s_std); | ||
48 | + float64 e1c = float16_to_float64(e1 >> 16, true, s_std); | ||
49 | + float64 e2r = float16_to_float64(e2 & 0xffff, true, s_std); | ||
50 | + float64 e2c = float16_to_float64(e2 >> 16, true, s_std); | ||
51 | + float64 t64; | ||
52 | + float32 t32; | ||
53 | + | ||
54 | + /* | ||
55 | + * The ARM pseudocode function FPDot performs both multiplies | ||
56 | + * and the add with a single rounding operation. Emulate this | ||
57 | + * by performing the first multiply in round-to-odd, then doing | ||
58 | + * the second multiply as fused multiply-add, and rounding to | ||
59 | + * float32 all in one step. | ||
60 | + */ | ||
61 | + t64 = float64_mul(e1r, e2r, s_odd); | ||
62 | + t64 = float64r32_muladd(e1c, e2c, t64, 0, s_std); | ||
63 | + | ||
64 | + /* This conversion is exact, because we've already rounded. */ | ||
65 | + t32 = float64_to_float32(t64, s_std); | ||
66 | + | ||
67 | + /* The final accumulation step is not fused. */ | ||
68 | + return float32_add(sum, t32, s_std); | ||
69 | +} | ||
70 | + | ||
71 | +void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, | ||
72 | + void *vpm, void *vst, uint32_t desc) | ||
73 | +{ | ||
74 | + intptr_t row, col, oprsz = simd_maxsz(desc); | ||
75 | + uint32_t neg = simd_data(desc) * 0x80008000u; | ||
76 | + uint16_t *pn = vpn, *pm = vpm; | ||
77 | + float_status fpst_odd, fpst_std; | ||
78 | + | ||
79 | + /* | ||
80 | + * Make a copy of float_status because this operation does not | ||
81 | + * update the cumulative fp exception status. It also produces | ||
82 | + * default nans. Make a second copy with round-to-odd -- see above. | ||
83 | + */ | ||
84 | + fpst_std = *(float_status *)vst; | ||
85 | + set_default_nan_mode(true, &fpst_std); | ||
86 | + fpst_odd = fpst_std; | ||
87 | + set_float_rounding_mode(float_round_to_odd, &fpst_odd); | ||
88 | + | ||
89 | + for (row = 0; row < oprsz; ) { | ||
90 | + uint16_t prow = pn[H2(row >> 4)]; | ||
91 | + do { | ||
92 | + void *vza_row = vza + tile_vslice_offset(row); | ||
93 | + uint32_t n = *(uint32_t *)(vzn + H1_4(row)); | ||
94 | + | ||
95 | + n = f16mop_adj_pair(n, prow, neg); | ||
96 | + | ||
97 | + for (col = 0; col < oprsz; ) { | ||
98 | + uint16_t pcol = pm[H2(col >> 4)]; | ||
99 | + do { | ||
100 | + if (prow & pcol & 0b0101) { | ||
101 | + uint32_t *a = vza_row + H1_4(col); | ||
102 | + uint32_t m = *(uint32_t *)(vzm + H1_4(col)); | ||
103 | + | ||
104 | + m = f16mop_adj_pair(m, pcol, 0); | ||
105 | + *a = f16_dotadd(*a, n, m, &fpst_std, &fpst_odd); | ||
106 | + | ||
107 | + col += 4; | ||
108 | + pcol >>= 4; | ||
109 | + } | ||
110 | + } while (col & 15); | ||
111 | + } | ||
112 | + row += 4; | ||
113 | + prow >>= 4; | ||
114 | + } while (row & 15); | ||
115 | + } | ||
116 | +} | ||
117 | + | ||
118 | void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn, | ||
119 | void *vpm, uint32_t desc) | ||
120 | { | ||
121 | diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c | ||
122 | index XXXXXXX..XXXXXXX 100644 | ||
123 | --- a/target/arm/translate-sme.c | ||
124 | +++ b/target/arm/translate-sme.c | ||
125 | @@ -XXX,XX +XXX,XX @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz, | ||
126 | return true; | 56 | return true; |
127 | } | 57 | } |
128 | 58 | ||
129 | +TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_h) | 59 | -/* HINT instruction group, including various allocated HINTs */ |
130 | TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s) | 60 | -static void handle_hint(DisasContext *s, uint32_t insn, |
131 | TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d) | 61 | - unsigned int op1, unsigned int op2, unsigned int crm) |
132 | 62 | +static bool trans_NOP(DisasContext *s, arg_NOP *a) | |
63 | { | ||
64 | - unsigned int selector = crm << 3 | op2; | ||
65 | + return true; | ||
66 | +} | ||
67 | |||
68 | - if (op1 != 3) { | ||
69 | - unallocated_encoding(s); | ||
70 | - return; | ||
71 | +static bool trans_YIELD(DisasContext *s, arg_YIELD *a) | ||
72 | +{ | ||
73 | + /* | ||
74 | + * When running in MTTCG we don't generate jumps to the yield and | ||
75 | + * WFE helpers as it won't affect the scheduling of other vCPUs. | ||
76 | + * If we wanted to more completely model WFE/SEV so we don't busy | ||
77 | + * spin unnecessarily we would need to do something more involved. | ||
78 | + */ | ||
79 | + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { | ||
80 | + s->base.is_jmp = DISAS_YIELD; | ||
81 | } | ||
82 | + return true; | ||
83 | +} | ||
84 | |||
85 | - switch (selector) { | ||
86 | - case 0b00000: /* NOP */ | ||
87 | - break; | ||
88 | - case 0b00011: /* WFI */ | ||
89 | - s->base.is_jmp = DISAS_WFI; | ||
90 | - break; | ||
91 | - case 0b00001: /* YIELD */ | ||
92 | - /* When running in MTTCG we don't generate jumps to the yield and | ||
93 | - * WFE helpers as it won't affect the scheduling of other vCPUs. | ||
94 | - * If we wanted to more completely model WFE/SEV so we don't busy | ||
95 | - * spin unnecessarily we would need to do something more involved. | ||
96 | +static bool trans_WFI(DisasContext *s, arg_WFI *a) | ||
97 | +{ | ||
98 | + s->base.is_jmp = DISAS_WFI; | ||
99 | + return true; | ||
100 | +} | ||
101 | + | ||
102 | +static bool trans_WFE(DisasContext *s, arg_WFI *a) | ||
103 | +{ | ||
104 | + /* | ||
105 | + * When running in MTTCG we don't generate jumps to the yield and | ||
106 | + * WFE helpers as it won't affect the scheduling of other vCPUs. | ||
107 | + * If we wanted to more completely model WFE/SEV so we don't busy | ||
108 | + * spin unnecessarily we would need to do something more involved. | ||
109 | + */ | ||
110 | + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { | ||
111 | + s->base.is_jmp = DISAS_WFE; | ||
112 | + } | ||
113 | + return true; | ||
114 | +} | ||
115 | + | ||
116 | +static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a) | ||
117 | +{ | ||
118 | + if (s->pauth_active) { | ||
119 | + gen_helper_xpaci(cpu_X[30], cpu_env, cpu_X[30]); | ||
120 | + } | ||
121 | + return true; | ||
122 | +} | ||
123 | + | ||
124 | +static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a) | ||
125 | +{ | ||
126 | + if (s->pauth_active) { | ||
127 | + gen_helper_pacia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); | ||
128 | + } | ||
129 | + return true; | ||
130 | +} | ||
131 | + | ||
132 | +static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a) | ||
133 | +{ | ||
134 | + if (s->pauth_active) { | ||
135 | + gen_helper_pacib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); | ||
136 | + } | ||
137 | + return true; | ||
138 | +} | ||
139 | + | ||
140 | +static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a) | ||
141 | +{ | ||
142 | + if (s->pauth_active) { | ||
143 | + gen_helper_autia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); | ||
144 | + } | ||
145 | + return true; | ||
146 | +} | ||
147 | + | ||
148 | +static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a) | ||
149 | +{ | ||
150 | + if (s->pauth_active) { | ||
151 | + gen_helper_autib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); | ||
152 | + } | ||
153 | + return true; | ||
154 | +} | ||
155 | + | ||
156 | +static bool trans_ESB(DisasContext *s, arg_ESB *a) | ||
157 | +{ | ||
158 | + /* Without RAS, we must implement this as NOP. */ | ||
159 | + if (dc_isar_feature(aa64_ras, s)) { | ||
160 | + /* | ||
161 | + * QEMU does not have a source of physical SErrors, | ||
162 | + * so we are only concerned with virtual SErrors. | ||
163 | + * The pseudocode in the ARM for this case is | ||
164 | + * if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then | ||
165 | + * AArch64.vESBOperation(); | ||
166 | + * Most of the condition can be evaluated at translation time. | ||
167 | + * Test for EL2 present, and defer test for SEL2 to runtime. | ||
168 | */ | ||
169 | - if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { | ||
170 | - s->base.is_jmp = DISAS_YIELD; | ||
171 | + if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) { | ||
172 | + gen_helper_vesb(cpu_env); | ||
173 | } | ||
174 | - break; | ||
175 | - case 0b00010: /* WFE */ | ||
176 | - if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { | ||
177 | - s->base.is_jmp = DISAS_WFE; | ||
178 | - } | ||
179 | - break; | ||
180 | - case 0b00100: /* SEV */ | ||
181 | - case 0b00101: /* SEVL */ | ||
182 | - case 0b00110: /* DGH */ | ||
183 | - /* we treat all as NOP at least for now */ | ||
184 | - break; | ||
185 | - case 0b00111: /* XPACLRI */ | ||
186 | - if (s->pauth_active) { | ||
187 | - gen_helper_xpaci(cpu_X[30], cpu_env, cpu_X[30]); | ||
188 | - } | ||
189 | - break; | ||
190 | - case 0b01000: /* PACIA1716 */ | ||
191 | - if (s->pauth_active) { | ||
192 | - gen_helper_pacia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); | ||
193 | - } | ||
194 | - break; | ||
195 | - case 0b01010: /* PACIB1716 */ | ||
196 | - if (s->pauth_active) { | ||
197 | - gen_helper_pacib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); | ||
198 | - } | ||
199 | - break; | ||
200 | - case 0b01100: /* AUTIA1716 */ | ||
201 | - if (s->pauth_active) { | ||
202 | - gen_helper_autia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); | ||
203 | - } | ||
204 | - break; | ||
205 | - case 0b01110: /* AUTIB1716 */ | ||
206 | - if (s->pauth_active) { | ||
207 | - gen_helper_autib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); | ||
208 | - } | ||
209 | - break; | ||
210 | - case 0b10000: /* ESB */ | ||
211 | - /* Without RAS, we must implement this as NOP. */ | ||
212 | - if (dc_isar_feature(aa64_ras, s)) { | ||
213 | - /* | ||
214 | - * QEMU does not have a source of physical SErrors, | ||
215 | - * so we are only concerned with virtual SErrors. | ||
216 | - * The pseudocode in the ARM for this case is | ||
217 | - * if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then | ||
218 | - * AArch64.vESBOperation(); | ||
219 | - * Most of the condition can be evaluated at translation time. | ||
220 | - * Test for EL2 present, and defer test for SEL2 to runtime. | ||
221 | - */ | ||
222 | - if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) { | ||
223 | - gen_helper_vesb(cpu_env); | ||
224 | - } | ||
225 | - } | ||
226 | - break; | ||
227 | - case 0b11000: /* PACIAZ */ | ||
228 | - if (s->pauth_active) { | ||
229 | - gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], | ||
230 | - tcg_constant_i64(0)); | ||
231 | - } | ||
232 | - break; | ||
233 | - case 0b11001: /* PACIASP */ | ||
234 | - if (s->pauth_active) { | ||
235 | - gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); | ||
236 | - } | ||
237 | - break; | ||
238 | - case 0b11010: /* PACIBZ */ | ||
239 | - if (s->pauth_active) { | ||
240 | - gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], | ||
241 | - tcg_constant_i64(0)); | ||
242 | - } | ||
243 | - break; | ||
244 | - case 0b11011: /* PACIBSP */ | ||
245 | - if (s->pauth_active) { | ||
246 | - gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); | ||
247 | - } | ||
248 | - break; | ||
249 | - case 0b11100: /* AUTIAZ */ | ||
250 | - if (s->pauth_active) { | ||
251 | - gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], | ||
252 | - tcg_constant_i64(0)); | ||
253 | - } | ||
254 | - break; | ||
255 | - case 0b11101: /* AUTIASP */ | ||
256 | - if (s->pauth_active) { | ||
257 | - gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); | ||
258 | - } | ||
259 | - break; | ||
260 | - case 0b11110: /* AUTIBZ */ | ||
261 | - if (s->pauth_active) { | ||
262 | - gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], | ||
263 | - tcg_constant_i64(0)); | ||
264 | - } | ||
265 | - break; | ||
266 | - case 0b11111: /* AUTIBSP */ | ||
267 | - if (s->pauth_active) { | ||
268 | - gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); | ||
269 | - } | ||
270 | - break; | ||
271 | - default: | ||
272 | - /* default specified as NOP equivalent */ | ||
273 | - break; | ||
274 | } | ||
275 | + return true; | ||
276 | +} | ||
277 | + | ||
278 | +static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a) | ||
279 | +{ | ||
280 | + if (s->pauth_active) { | ||
281 | + gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], tcg_constant_i64(0)); | ||
282 | + } | ||
283 | + return true; | ||
284 | +} | ||
285 | + | ||
286 | +static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a) | ||
287 | +{ | ||
288 | + if (s->pauth_active) { | ||
289 | + gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); | ||
290 | + } | ||
291 | + return true; | ||
292 | +} | ||
293 | + | ||
294 | +static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a) | ||
295 | +{ | ||
296 | + if (s->pauth_active) { | ||
297 | + gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], tcg_constant_i64(0)); | ||
298 | + } | ||
299 | + return true; | ||
300 | +} | ||
301 | + | ||
302 | +static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a) | ||
303 | +{ | ||
304 | + if (s->pauth_active) { | ||
305 | + gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); | ||
306 | + } | ||
307 | + return true; | ||
308 | +} | ||
309 | + | ||
310 | +static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a) | ||
311 | +{ | ||
312 | + if (s->pauth_active) { | ||
313 | + gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], tcg_constant_i64(0)); | ||
314 | + } | ||
315 | + return true; | ||
316 | +} | ||
317 | + | ||
318 | +static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a) | ||
319 | +{ | ||
320 | + if (s->pauth_active) { | ||
321 | + gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); | ||
322 | + } | ||
323 | + return true; | ||
324 | +} | ||
325 | + | ||
326 | +static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a) | ||
327 | +{ | ||
328 | + if (s->pauth_active) { | ||
329 | + gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], tcg_constant_i64(0)); | ||
330 | + } | ||
331 | + return true; | ||
332 | +} | ||
333 | + | ||
334 | +static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a) | ||
335 | +{ | ||
336 | + if (s->pauth_active) { | ||
337 | + gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); | ||
338 | + } | ||
339 | + return true; | ||
340 | } | ||
341 | |||
342 | static void gen_clrex(DisasContext *s, uint32_t insn) | ||
343 | @@ -XXX,XX +XXX,XX @@ static void disas_system(DisasContext *s, uint32_t insn) | ||
344 | return; | ||
345 | } | ||
346 | switch (crn) { | ||
347 | - case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */ | ||
348 | - handle_hint(s, insn, op1, op2, crm); | ||
349 | - break; | ||
350 | case 3: /* CLREX, DSB, DMB, ISB */ | ||
351 | handle_sync(s, insn, op1, op2, crm); | ||
352 | break; | ||
133 | -- | 353 | -- |
134 | 2.25.1 | 354 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Convert the insns in the "Barriers" instruction class to |
---|---|---|---|
2 | decodetree: CLREX, DSB, DMB, ISB and SB. | ||
2 | 3 | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
4 | Message-id: 20220708151540.18136-26-richard.henderson@linaro.org | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
6 | Message-id: 20230602155223.2040685-4-peter.maydell@linaro.org | ||
7 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
7 | --- | 8 | --- |
8 | target/arm/helper-sme.h | 2 ++ | 9 | target/arm/tcg/a64.decode | 7 +++ |
9 | target/arm/sme.decode | 2 ++ | 10 | target/arm/tcg/translate-a64.c | 92 ++++++++++++++-------------------- |
10 | target/arm/sme_helper.c | 56 ++++++++++++++++++++++++++++++++++++++ | 11 | 2 files changed, 46 insertions(+), 53 deletions(-) |
11 | target/arm/translate-sme.c | 30 ++++++++++++++++++++ | ||
12 | 4 files changed, 90 insertions(+) | ||
13 | 12 | ||
14 | diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h | 13 | diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode |
15 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/target/arm/helper-sme.h | 15 | --- a/target/arm/tcg/a64.decode |
17 | +++ b/target/arm/helper-sme.h | 16 | +++ b/target/arm/tcg/a64.decode |
18 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG, | 17 | @@ -XXX,XX +XXX,XX @@ ERETA 1101011 0100 11111 00001 m:1 11111 11111 &reta # ERETAA, ERETAB |
19 | void, ptr, ptr, ptr, ptr, ptr, ptr, i32) | 18 | # that isn't specifically allocated to an instruction must NOP |
20 | DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG, | 19 | NOP 1101 0101 0000 0011 0010 ---- --- 11111 |
21 | void, ptr, ptr, ptr, ptr, ptr, ptr, i32) | ||
22 | +DEF_HELPER_FLAGS_6(sme_bfmopa, TCG_CALL_NO_RWG, | ||
23 | + void, ptr, ptr, ptr, ptr, ptr, i32) | ||
24 | diff --git a/target/arm/sme.decode b/target/arm/sme.decode | ||
25 | index XXXXXXX..XXXXXXX 100644 | ||
26 | --- a/target/arm/sme.decode | ||
27 | +++ b/target/arm/sme.decode | ||
28 | @@ -XXX,XX +XXX,XX @@ ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64 | ||
29 | |||
30 | FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32 | ||
31 | FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64 | ||
32 | + | ||
33 | +BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32 | ||
34 | diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c | ||
35 | index XXXXXXX..XXXXXXX 100644 | ||
36 | --- a/target/arm/sme_helper.c | ||
37 | +++ b/target/arm/sme_helper.c | ||
38 | @@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_d)(void *vza, void *vzn, void *vzm, void *vpn, | ||
39 | } | ||
40 | } | ||
41 | } | 20 | } |
42 | + | 21 | + |
43 | +/* | 22 | +# Barriers |
44 | + * Alter PAIR as needed for controlling predicates being false, | 23 | + |
45 | + * and for NEG on an enabled row element. | 24 | +CLREX 1101 0101 0000 0011 0011 ---- 010 11111 |
46 | + */ | 25 | +DSB_DMB 1101 0101 0000 0011 0011 domain:2 types:2 10- 11111 |
47 | +static inline uint32_t f16mop_adj_pair(uint32_t pair, uint32_t pg, uint32_t neg) | 26 | +ISB 1101 0101 0000 0011 0011 ---- 110 11111 |
27 | +SB 1101 0101 0000 0011 0011 0000 111 11111 | ||
28 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/target/arm/tcg/translate-a64.c | ||
31 | +++ b/target/arm/tcg/translate-a64.c | ||
32 | @@ -XXX,XX +XXX,XX @@ static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a) | ||
33 | return true; | ||
34 | } | ||
35 | |||
36 | -static void gen_clrex(DisasContext *s, uint32_t insn) | ||
37 | +static bool trans_CLREX(DisasContext *s, arg_CLREX *a) | ||
38 | { | ||
39 | tcg_gen_movi_i64(cpu_exclusive_addr, -1); | ||
40 | + return true; | ||
41 | } | ||
42 | |||
43 | -/* CLREX, DSB, DMB, ISB */ | ||
44 | -static void handle_sync(DisasContext *s, uint32_t insn, | ||
45 | - unsigned int op1, unsigned int op2, unsigned int crm) | ||
46 | +static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a) | ||
47 | { | ||
48 | + /* We handle DSB and DMB the same way */ | ||
49 | TCGBar bar; | ||
50 | |||
51 | - if (op1 != 3) { | ||
52 | - unallocated_encoding(s); | ||
53 | - return; | ||
54 | + switch (a->types) { | ||
55 | + case 1: /* MBReqTypes_Reads */ | ||
56 | + bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST; | ||
57 | + break; | ||
58 | + case 2: /* MBReqTypes_Writes */ | ||
59 | + bar = TCG_BAR_SC | TCG_MO_ST_ST; | ||
60 | + break; | ||
61 | + default: /* MBReqTypes_All */ | ||
62 | + bar = TCG_BAR_SC | TCG_MO_ALL; | ||
63 | + break; | ||
64 | } | ||
65 | + tcg_gen_mb(bar); | ||
66 | + return true; | ||
67 | +} | ||
68 | |||
69 | - switch (op2) { | ||
70 | - case 2: /* CLREX */ | ||
71 | - gen_clrex(s, insn); | ||
72 | - return; | ||
73 | - case 4: /* DSB */ | ||
74 | - case 5: /* DMB */ | ||
75 | - switch (crm & 3) { | ||
76 | - case 1: /* MBReqTypes_Reads */ | ||
77 | - bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST; | ||
78 | - break; | ||
79 | - case 2: /* MBReqTypes_Writes */ | ||
80 | - bar = TCG_BAR_SC | TCG_MO_ST_ST; | ||
81 | - break; | ||
82 | - default: /* MBReqTypes_All */ | ||
83 | - bar = TCG_BAR_SC | TCG_MO_ALL; | ||
84 | - break; | ||
85 | - } | ||
86 | - tcg_gen_mb(bar); | ||
87 | - return; | ||
88 | - case 6: /* ISB */ | ||
89 | - /* We need to break the TB after this insn to execute | ||
90 | - * a self-modified code correctly and also to take | ||
91 | - * any pending interrupts immediately. | ||
92 | - */ | ||
93 | - reset_btype(s); | ||
94 | - gen_goto_tb(s, 0, 4); | ||
95 | - return; | ||
96 | +static bool trans_ISB(DisasContext *s, arg_ISB *a) | ||
48 | +{ | 97 | +{ |
49 | + /* | 98 | + /* |
50 | + * The pseudocode uses a conditional negate after the conditional zero. | 99 | + * We need to break the TB after this insn to execute |
51 | + * It is simpler here to unconditionally negate before conditional zero. | 100 | + * self-modifying code correctly and also to take |
101 | + * any pending interrupts immediately. | ||
52 | + */ | 102 | + */ |
53 | + pair ^= neg; | 103 | + reset_btype(s); |
54 | + if (!(pg & 1)) { | 104 | + gen_goto_tb(s, 0, 4); |
55 | + pair &= 0xffff0000u; | ||
56 | + } | ||
57 | + if (!(pg & 4)) { | ||
58 | + pair &= 0x0000ffffu; | ||
59 | + } | ||
60 | + return pair; | ||
61 | +} | ||
62 | + | ||
63 | +void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn, | ||
64 | + void *vpm, uint32_t desc) | ||
65 | +{ | ||
66 | + intptr_t row, col, oprsz = simd_maxsz(desc); | ||
67 | + uint32_t neg = simd_data(desc) * 0x80008000u; | ||
68 | + uint16_t *pn = vpn, *pm = vpm; | ||
69 | + | ||
70 | + for (row = 0; row < oprsz; ) { | ||
71 | + uint16_t prow = pn[H2(row >> 4)]; | ||
72 | + do { | ||
73 | + void *vza_row = vza + tile_vslice_offset(row); | ||
74 | + uint32_t n = *(uint32_t *)(vzn + H1_4(row)); | ||
75 | + | ||
76 | + n = f16mop_adj_pair(n, prow, neg); | ||
77 | + | ||
78 | + for (col = 0; col < oprsz; ) { | ||
79 | + uint16_t pcol = pm[H2(col >> 4)]; | ||
80 | + do { | ||
81 | + if (prow & pcol & 0b0101) { | ||
82 | + uint32_t *a = vza_row + H1_4(col); | ||
83 | + uint32_t m = *(uint32_t *)(vzm + H1_4(col)); | ||
84 | + | ||
85 | + m = f16mop_adj_pair(m, pcol, 0); | ||
86 | + *a = bfdotadd(*a, n, m); | ||
87 | + | ||
88 | + col += 4; | ||
89 | + pcol >>= 4; | ||
90 | + } | ||
91 | + } while (col & 15); | ||
92 | + } | ||
93 | + row += 4; | ||
94 | + prow >>= 4; | ||
95 | + } while (row & 15); | ||
96 | + } | ||
97 | +} | ||
98 | diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c | ||
99 | index XXXXXXX..XXXXXXX 100644 | ||
100 | --- a/target/arm/translate-sme.c | ||
101 | +++ b/target/arm/translate-sme.c | ||
102 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s) | ||
103 | TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d) | ||
104 | TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d) | ||
105 | |||
106 | +static bool do_outprod(DisasContext *s, arg_op *a, MemOp esz, | ||
107 | + gen_helper_gvec_5 *fn) | ||
108 | +{ | ||
109 | + int svl = streaming_vec_reg_size(s); | ||
110 | + uint32_t desc = simd_desc(svl, svl, a->sub); | ||
111 | + TCGv_ptr za, zn, zm, pn, pm; | ||
112 | + | ||
113 | + if (!sme_smza_enabled_check(s)) { | ||
114 | + return true; | ||
115 | + } | ||
116 | + | ||
117 | + /* Sum XZR+zad to find ZAd. */ | ||
118 | + za = get_tile_rowcol(s, esz, 31, a->zad, false); | ||
119 | + zn = vec_full_reg_ptr(s, a->zn); | ||
120 | + zm = vec_full_reg_ptr(s, a->zm); | ||
121 | + pn = pred_full_reg_ptr(s, a->pn); | ||
122 | + pm = pred_full_reg_ptr(s, a->pm); | ||
123 | + | ||
124 | + fn(za, zn, zm, pn, pm, tcg_constant_i32(desc)); | ||
125 | + | ||
126 | + tcg_temp_free_ptr(za); | ||
127 | + tcg_temp_free_ptr(zn); | ||
128 | + tcg_temp_free_ptr(pn); | ||
129 | + tcg_temp_free_ptr(pm); | ||
130 | + return true; | 105 | + return true; |
131 | +} | 106 | +} |
132 | + | 107 | |
133 | static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz, | 108 | - case 7: /* SB */ |
134 | gen_helper_gvec_5_ptr *fn) | 109 | - if (crm != 0 || !dc_isar_feature(aa64_sb, s)) { |
135 | { | 110 | - goto do_unallocated; |
136 | @@ -XXX,XX +XXX,XX @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz, | 111 | - } |
137 | 112 | - /* | |
138 | TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s) | 113 | - * TODO: There is no speculation barrier opcode for TCG; |
139 | TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d) | 114 | - * MB and end the TB instead. |
140 | + | 115 | - */ |
141 | +/* TODO: FEAT_EBF16 */ | 116 | - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); |
142 | +TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa) | 117 | - gen_goto_tb(s, 0, 4); |
118 | - return; | ||
119 | - | ||
120 | - default: | ||
121 | - do_unallocated: | ||
122 | - unallocated_encoding(s); | ||
123 | - return; | ||
124 | +static bool trans_SB(DisasContext *s, arg_SB *a) | ||
125 | +{ | ||
126 | + if (!dc_isar_feature(aa64_sb, s)) { | ||
127 | + return false; | ||
128 | } | ||
129 | + /* | ||
130 | + * TODO: There is no speculation barrier opcode for TCG; | ||
131 | + * MB and end the TB instead. | ||
132 | + */ | ||
133 | + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); | ||
134 | + gen_goto_tb(s, 0, 4); | ||
135 | + return true; | ||
136 | } | ||
137 | |||
138 | static void gen_xaflag(void) | ||
139 | @@ -XXX,XX +XXX,XX @@ static void disas_system(DisasContext *s, uint32_t insn) | ||
140 | return; | ||
141 | } | ||
142 | switch (crn) { | ||
143 | - case 3: /* CLREX, DSB, DMB, ISB */ | ||
144 | - handle_sync(s, insn, op1, op2, crm); | ||
145 | - break; | ||
146 | case 4: /* MSR (immediate) */ | ||
147 | handle_msr_i(s, insn, op1, op2, crm); | ||
148 | break; | ||
143 | -- | 149 | -- |
144 | 2.25.1 | 150 | 2.34.1 |
151 | |||
152 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Convert the CFINV, XAFLAG and AXFLAG insns to decodetree. |
---|---|---|---|
2 | The old decoder handles these in handle_msr_i(), but | ||
3 | the architecture defines them as separate instructions | ||
4 | from MSR (immediate). | ||
2 | 5 | ||
3 | Mark these as a non-streaming instructions, which should trap if full | 6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
4 | a64 support is not enabled in streaming mode. In this case, introduce | 7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
5 | PRF_ns (prefetch non-streaming) to handle the checks. | 8 | Message-id: 20230602155223.2040685-5-peter.maydell@linaro.org |
9 | --- | ||
10 | target/arm/tcg/a64.decode | 6 ++++ | ||
11 | target/arm/tcg/translate-a64.c | 53 +++++++++++++++++----------------- | ||
12 | 2 files changed, 32 insertions(+), 27 deletions(-) | ||
6 | 13 | ||
7 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 14 | diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode |
8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Message-id: 20220708151540.18136-13-richard.henderson@linaro.org | ||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | --- | ||
12 | target/arm/sme-fa64.decode | 3 --- | ||
13 | target/arm/sve.decode | 10 +++++----- | ||
14 | target/arm/translate-sve.c | 11 +++++++++++ | ||
15 | 3 files changed, 16 insertions(+), 8 deletions(-) | ||
16 | |||
17 | diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode | ||
18 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/target/arm/sme-fa64.decode | 16 | --- a/target/arm/tcg/a64.decode |
20 | +++ b/target/arm/sme-fa64.decode | 17 | +++ b/target/arm/tcg/a64.decode |
21 | @@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS | 18 | @@ -XXX,XX +XXX,XX @@ CLREX 1101 0101 0000 0011 0011 ---- 010 11111 |
22 | # --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset) | 19 | DSB_DMB 1101 0101 0000 0011 0011 domain:2 types:2 10- 11111 |
23 | # --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm) | 20 | ISB 1101 0101 0000 0011 0011 ---- 110 11111 |
24 | 21 | SB 1101 0101 0000 0011 0011 0000 111 11111 | |
25 | -FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm) | 22 | + |
26 | -FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector) | 23 | +# PSTATE |
27 | FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar) | 24 | + |
28 | FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm) | 25 | +CFINV 1101 0101 0000 0 000 0100 0000 000 11111 |
29 | FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar) | 26 | +XAFLAG 1101 0101 0000 0 000 0100 0000 001 11111 |
30 | FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm) | 27 | +AXFLAG 1101 0101 0000 0 000 0100 0000 010 11111 |
31 | -FAIL 1100 010- ---- ---- ---- ---- ---- ---- # SVE 64-bit gather load/prefetch | 28 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
32 | diff --git a/target/arm/sve.decode b/target/arm/sve.decode | ||
33 | index XXXXXXX..XXXXXXX 100644 | 29 | index XXXXXXX..XXXXXXX 100644 |
34 | --- a/target/arm/sve.decode | 30 | --- a/target/arm/tcg/translate-a64.c |
35 | +++ b/target/arm/sve.decode | 31 | +++ b/target/arm/tcg/translate-a64.c |
36 | @@ -XXX,XX +XXX,XX @@ LD1RO_zpri 1010010 .. 01 0.... 001 ... ..... ..... \ | 32 | @@ -XXX,XX +XXX,XX @@ static bool trans_SB(DisasContext *s, arg_SB *a) |
37 | @rpri_load_msz nreg=0 | ||
38 | |||
39 | # SVE 32-bit gather prefetch (scalar plus 32-bit scaled offsets) | ||
40 | -PRF 1000010 00 -1 ----- 0-- --- ----- 0 ---- | ||
41 | +PRF_ns 1000010 00 -1 ----- 0-- --- ----- 0 ---- | ||
42 | |||
43 | # SVE 32-bit gather prefetch (vector plus immediate) | ||
44 | -PRF 1000010 -- 00 ----- 111 --- ----- 0 ---- | ||
45 | +PRF_ns 1000010 -- 00 ----- 111 --- ----- 0 ---- | ||
46 | |||
47 | # SVE contiguous prefetch (scalar plus immediate) | ||
48 | PRF 1000010 11 1- ----- 0-- --- ----- 0 ---- | ||
49 | @@ -XXX,XX +XXX,XX @@ LD1_zpiz 1100010 .. 01 ..... 1.. ... ..... ..... \ | ||
50 | @rpri_g_load esz=3 | ||
51 | |||
52 | # SVE 64-bit gather prefetch (scalar plus 64-bit scaled offsets) | ||
53 | -PRF 1100010 00 11 ----- 1-- --- ----- 0 ---- | ||
54 | +PRF_ns 1100010 00 11 ----- 1-- --- ----- 0 ---- | ||
55 | |||
56 | # SVE 64-bit gather prefetch (scalar plus unpacked 32-bit scaled offsets) | ||
57 | -PRF 1100010 00 -1 ----- 0-- --- ----- 0 ---- | ||
58 | +PRF_ns 1100010 00 -1 ----- 0-- --- ----- 0 ---- | ||
59 | |||
60 | # SVE 64-bit gather prefetch (vector plus immediate) | ||
61 | -PRF 1100010 -- 00 ----- 111 --- ----- 0 ---- | ||
62 | +PRF_ns 1100010 -- 00 ----- 111 --- ----- 0 ---- | ||
63 | |||
64 | ### SVE Memory Store Group | ||
65 | |||
66 | diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c | ||
67 | index XXXXXXX..XXXXXXX 100644 | ||
68 | --- a/target/arm/translate-sve.c | ||
69 | +++ b/target/arm/translate-sve.c | ||
70 | @@ -XXX,XX +XXX,XX @@ static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a) | ||
71 | return true; | 33 | return true; |
72 | } | 34 | } |
73 | 35 | ||
74 | +static bool trans_PRF_ns(DisasContext *s, arg_PRF_ns *a) | 36 | -static void gen_xaflag(void) |
75 | +{ | 37 | +static bool trans_CFINV(DisasContext *s, arg_CFINV *a) |
76 | + if (!dc_isar_feature(aa64_sve, s)) { | 38 | { |
39 | - TCGv_i32 z = tcg_temp_new_i32(); | ||
40 | + if (!dc_isar_feature(aa64_condm_4, s)) { | ||
77 | + return false; | 41 | + return false; |
78 | + } | 42 | + } |
79 | + /* Prefetch is a nop within QEMU. */ | 43 | + tcg_gen_xori_i32(cpu_CF, cpu_CF, 1); |
80 | + s->is_nonstreaming = true; | ||
81 | + (void)sve_access_check(s); | ||
82 | + return true; | 44 | + return true; |
83 | +} | 45 | +} |
84 | + | 46 | + |
85 | /* | 47 | +static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a) |
86 | * Move Prefix | 48 | +{ |
87 | * | 49 | + TCGv_i32 z; |
50 | + | ||
51 | + if (!dc_isar_feature(aa64_condm_5, s)) { | ||
52 | + return false; | ||
53 | + } | ||
54 | + | ||
55 | + z = tcg_temp_new_i32(); | ||
56 | |||
57 | tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0); | ||
58 | |||
59 | @@ -XXX,XX +XXX,XX @@ static void gen_xaflag(void) | ||
60 | |||
61 | /* C | Z */ | ||
62 | tcg_gen_or_i32(cpu_CF, cpu_CF, z); | ||
63 | + | ||
64 | + return true; | ||
65 | } | ||
66 | |||
67 | -static void gen_axflag(void) | ||
68 | +static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a) | ||
69 | { | ||
70 | + if (!dc_isar_feature(aa64_condm_5, s)) { | ||
71 | + return false; | ||
72 | + } | ||
73 | + | ||
74 | tcg_gen_sari_i32(cpu_VF, cpu_VF, 31); /* V ? -1 : 0 */ | ||
75 | tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF); /* C & !V */ | ||
76 | |||
77 | @@ -XXX,XX +XXX,XX @@ static void gen_axflag(void) | ||
78 | |||
79 | tcg_gen_movi_i32(cpu_NF, 0); | ||
80 | tcg_gen_movi_i32(cpu_VF, 0); | ||
81 | + | ||
82 | + return true; | ||
83 | } | ||
84 | |||
85 | /* MSR (immediate) - move immediate to processor state field */ | ||
86 | @@ -XXX,XX +XXX,XX @@ static void handle_msr_i(DisasContext *s, uint32_t insn, | ||
87 | s->base.is_jmp = DISAS_TOO_MANY; | ||
88 | |||
89 | switch (op) { | ||
90 | - case 0x00: /* CFINV */ | ||
91 | - if (crm != 0 || !dc_isar_feature(aa64_condm_4, s)) { | ||
92 | - goto do_unallocated; | ||
93 | - } | ||
94 | - tcg_gen_xori_i32(cpu_CF, cpu_CF, 1); | ||
95 | - s->base.is_jmp = DISAS_NEXT; | ||
96 | - break; | ||
97 | - | ||
98 | - case 0x01: /* XAFlag */ | ||
99 | - if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) { | ||
100 | - goto do_unallocated; | ||
101 | - } | ||
102 | - gen_xaflag(); | ||
103 | - s->base.is_jmp = DISAS_NEXT; | ||
104 | - break; | ||
105 | - | ||
106 | - case 0x02: /* AXFlag */ | ||
107 | - if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) { | ||
108 | - goto do_unallocated; | ||
109 | - } | ||
110 | - gen_axflag(); | ||
111 | - s->base.is_jmp = DISAS_NEXT; | ||
112 | - break; | ||
113 | - | ||
114 | case 0x03: /* UAO */ | ||
115 | if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) { | ||
116 | goto do_unallocated; | ||
88 | -- | 117 | -- |
89 | 2.25.1 | 118 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Convert the MSR (immediate) insn to decodetree. Our implementation |
---|---|---|---|
2 | has basically no commonality between the different destinations, | ||
3 | so we decode the destination register in a64.decode. | ||
2 | 4 | ||
3 | This is an SVE instruction that operates using the SVE vector | 5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
4 | length but that it is present only if SME is implemented. | 6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
7 | Message-id: 20230602155223.2040685-6-peter.maydell@linaro.org | ||
8 | --- | ||
9 | target/arm/tcg/a64.decode | 13 ++ | ||
10 | target/arm/tcg/translate-a64.c | 251 ++++++++++++++++----------------- | ||
11 | 2 files changed, 136 insertions(+), 128 deletions(-) | ||
5 | 12 | ||
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 13 | diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode |
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-id: 20220708151540.18136-31-richard.henderson@linaro.org | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | --- | ||
11 | target/arm/helper.h | 18 +++++++ | ||
12 | target/arm/sve.decode | 5 ++ | ||
13 | target/arm/translate-sve.c | 102 +++++++++++++++++++++++++++++++++++++ | ||
14 | target/arm/vec_helper.c | 24 +++++++++ | ||
15 | 4 files changed, 149 insertions(+) | ||
16 | |||
17 | diff --git a/target/arm/helper.h b/target/arm/helper.h | ||
18 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/target/arm/helper.h | 15 | --- a/target/arm/tcg/a64.decode |
20 | +++ b/target/arm/helper.h | 16 | +++ b/target/arm/tcg/a64.decode |
21 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_6(gvec_bfmlal, TCG_CALL_NO_RWG, | 17 | @@ -XXX,XX +XXX,XX @@ SB 1101 0101 0000 0011 0011 0000 111 11111 |
22 | DEF_HELPER_FLAGS_6(gvec_bfmlal_idx, TCG_CALL_NO_RWG, | 18 | CFINV 1101 0101 0000 0 000 0100 0000 000 11111 |
23 | void, ptr, ptr, ptr, ptr, ptr, i32) | 19 | XAFLAG 1101 0101 0000 0 000 0100 0000 001 11111 |
24 | 20 | AXFLAG 1101 0101 0000 0 000 0100 0000 010 11111 | |
25 | +DEF_HELPER_FLAGS_5(gvec_sclamp_b, TCG_CALL_NO_RWG, | 21 | + |
26 | + void, ptr, ptr, ptr, ptr, i32) | 22 | +# These are architecturally all "MSR (immediate)"; we decode the destination |
27 | +DEF_HELPER_FLAGS_5(gvec_sclamp_h, TCG_CALL_NO_RWG, | 23 | +# register too because there is no commonality in our implementation. |
28 | + void, ptr, ptr, ptr, ptr, i32) | 24 | +@msr_i .... .... .... . ... .... imm:4 ... ..... |
29 | +DEF_HELPER_FLAGS_5(gvec_sclamp_s, TCG_CALL_NO_RWG, | 25 | +MSR_i_UAO 1101 0101 0000 0 000 0100 .... 011 11111 @msr_i |
30 | + void, ptr, ptr, ptr, ptr, i32) | 26 | +MSR_i_PAN 1101 0101 0000 0 000 0100 .... 100 11111 @msr_i |
31 | +DEF_HELPER_FLAGS_5(gvec_sclamp_d, TCG_CALL_NO_RWG, | 27 | +MSR_i_SPSEL 1101 0101 0000 0 000 0100 .... 101 11111 @msr_i |
32 | + void, ptr, ptr, ptr, ptr, i32) | 28 | +MSR_i_SBSS 1101 0101 0000 0 011 0100 .... 001 11111 @msr_i |
33 | + | 29 | +MSR_i_DIT 1101 0101 0000 0 011 0100 .... 010 11111 @msr_i |
34 | +DEF_HELPER_FLAGS_5(gvec_uclamp_b, TCG_CALL_NO_RWG, | 30 | +MSR_i_TCO 1101 0101 0000 0 011 0100 .... 100 11111 @msr_i |
35 | + void, ptr, ptr, ptr, ptr, i32) | 31 | +MSR_i_DAIFSET 1101 0101 0000 0 011 0100 .... 110 11111 @msr_i |
36 | +DEF_HELPER_FLAGS_5(gvec_uclamp_h, TCG_CALL_NO_RWG, | 32 | +MSR_i_DAIFCLEAR 1101 0101 0000 0 011 0100 .... 111 11111 @msr_i |
37 | + void, ptr, ptr, ptr, ptr, i32) | 33 | +MSR_i_SVCR 1101 0101 0000 0 011 0100 0 mask:2 imm:1 011 11111 |
38 | +DEF_HELPER_FLAGS_5(gvec_uclamp_s, TCG_CALL_NO_RWG, | 34 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
39 | + void, ptr, ptr, ptr, ptr, i32) | ||
40 | +DEF_HELPER_FLAGS_5(gvec_uclamp_d, TCG_CALL_NO_RWG, | ||
41 | + void, ptr, ptr, ptr, ptr, i32) | ||
42 | + | ||
43 | #ifdef TARGET_AARCH64 | ||
44 | #include "helper-a64.h" | ||
45 | #include "helper-sve.h" | ||
46 | diff --git a/target/arm/sve.decode b/target/arm/sve.decode | ||
47 | index XXXXXXX..XXXXXXX 100644 | 35 | index XXXXXXX..XXXXXXX 100644 |
48 | --- a/target/arm/sve.decode | 36 | --- a/target/arm/tcg/translate-a64.c |
49 | +++ b/target/arm/sve.decode | 37 | +++ b/target/arm/tcg/translate-a64.c |
50 | @@ -XXX,XX +XXX,XX @@ PSEL 00100101 .. 1 100 .. 01 .... 0 .... 0 .... \ | 38 | @@ -XXX,XX +XXX,XX @@ static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a) |
51 | @psel esz=2 imm=%psel_imm_s | ||
52 | PSEL 00100101 .1 1 000 .. 01 .... 0 .... 0 .... \ | ||
53 | @psel esz=3 imm=%psel_imm_d | ||
54 | + | ||
55 | +### SVE clamp | ||
56 | + | ||
57 | +SCLAMP 01000100 .. 0 ..... 110000 ..... ..... @rda_rn_rm | ||
58 | +UCLAMP 01000100 .. 0 ..... 110001 ..... ..... @rda_rn_rm | ||
59 | diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c | ||
60 | index XXXXXXX..XXXXXXX 100644 | ||
61 | --- a/target/arm/translate-sve.c | ||
62 | +++ b/target/arm/translate-sve.c | ||
63 | @@ -XXX,XX +XXX,XX @@ static bool trans_PSEL(DisasContext *s, arg_psel *a) | ||
64 | tcg_temp_free_ptr(ptr); | ||
65 | return true; | 39 | return true; |
66 | } | 40 | } |
67 | + | 41 | |
68 | +static void gen_sclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a) | 42 | -/* MSR (immediate) - move immediate to processor state field */ |
69 | +{ | 43 | -static void handle_msr_i(DisasContext *s, uint32_t insn, |
70 | + tcg_gen_smax_i32(d, a, n); | 44 | - unsigned int op1, unsigned int op2, unsigned int crm) |
71 | + tcg_gen_smin_i32(d, d, m); | 45 | +static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a) |
72 | +} | 46 | { |
73 | + | 47 | - int op = op1 << 3 | op2; |
74 | +static void gen_sclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a) | 48 | - |
75 | +{ | 49 | - /* End the TB by default, chaining is ok. */ |
76 | + tcg_gen_smax_i64(d, a, n); | 50 | - s->base.is_jmp = DISAS_TOO_MANY; |
77 | + tcg_gen_smin_i64(d, d, m); | 51 | - |
78 | +} | 52 | - switch (op) { |
79 | + | 53 | - case 0x03: /* UAO */ |
80 | +static void gen_sclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n, | 54 | - if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) { |
81 | + TCGv_vec m, TCGv_vec a) | 55 | - goto do_unallocated; |
82 | +{ | 56 | - } |
83 | + tcg_gen_smax_vec(vece, d, a, n); | 57 | - if (crm & 1) { |
84 | + tcg_gen_smin_vec(vece, d, d, m); | 58 | - set_pstate_bits(PSTATE_UAO); |
85 | +} | 59 | - } else { |
86 | + | 60 | - clear_pstate_bits(PSTATE_UAO); |
87 | +static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, | 61 | - } |
88 | + uint32_t a, uint32_t oprsz, uint32_t maxsz) | 62 | - gen_rebuild_hflags(s); |
89 | +{ | 63 | - break; |
90 | + static const TCGOpcode vecop[] = { | 64 | - |
91 | + INDEX_op_smin_vec, INDEX_op_smax_vec, 0 | 65 | - case 0x04: /* PAN */ |
92 | + }; | 66 | - if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) { |
93 | + static const GVecGen4 ops[4] = { | 67 | - goto do_unallocated; |
94 | + { .fniv = gen_sclamp_vec, | 68 | - } |
95 | + .fno = gen_helper_gvec_sclamp_b, | 69 | - if (crm & 1) { |
96 | + .opt_opc = vecop, | 70 | - set_pstate_bits(PSTATE_PAN); |
97 | + .vece = MO_8 }, | 71 | - } else { |
98 | + { .fniv = gen_sclamp_vec, | 72 | - clear_pstate_bits(PSTATE_PAN); |
99 | + .fno = gen_helper_gvec_sclamp_h, | 73 | - } |
100 | + .opt_opc = vecop, | 74 | - gen_rebuild_hflags(s); |
101 | + .vece = MO_16 }, | 75 | - break; |
102 | + { .fni4 = gen_sclamp_i32, | 76 | - |
103 | + .fniv = gen_sclamp_vec, | 77 | - case 0x05: /* SPSel */ |
104 | + .fno = gen_helper_gvec_sclamp_s, | 78 | - if (s->current_el == 0) { |
105 | + .opt_opc = vecop, | 79 | - goto do_unallocated; |
106 | + .vece = MO_32 }, | 80 | - } |
107 | + { .fni8 = gen_sclamp_i64, | 81 | - gen_helper_msr_i_spsel(cpu_env, tcg_constant_i32(crm & PSTATE_SP)); |
108 | + .fniv = gen_sclamp_vec, | 82 | - break; |
109 | + .fno = gen_helper_gvec_sclamp_d, | 83 | - |
110 | + .opt_opc = vecop, | 84 | - case 0x19: /* SSBS */ |
111 | + .vece = MO_64, | 85 | - if (!dc_isar_feature(aa64_ssbs, s)) { |
112 | + .prefer_i64 = TCG_TARGET_REG_BITS == 64 } | 86 | - goto do_unallocated; |
113 | + }; | 87 | - } |
114 | + tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); | 88 | - if (crm & 1) { |
115 | +} | 89 | - set_pstate_bits(PSTATE_SSBS); |
116 | + | 90 | - } else { |
117 | +TRANS_FEAT(SCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_sclamp, a) | 91 | - clear_pstate_bits(PSTATE_SSBS); |
118 | + | 92 | - } |
119 | +static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a) | 93 | - /* Don't need to rebuild hflags since SSBS is a nop */ |
120 | +{ | 94 | - break; |
121 | + tcg_gen_umax_i32(d, a, n); | 95 | - |
122 | + tcg_gen_umin_i32(d, d, m); | 96 | - case 0x1a: /* DIT */ |
123 | +} | 97 | - if (!dc_isar_feature(aa64_dit, s)) { |
124 | + | 98 | - goto do_unallocated; |
125 | +static void gen_uclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a) | 99 | - } |
126 | +{ | 100 | - if (crm & 1) { |
127 | + tcg_gen_umax_i64(d, a, n); | 101 | - set_pstate_bits(PSTATE_DIT); |
128 | + tcg_gen_umin_i64(d, d, m); | 102 | - } else { |
129 | +} | 103 | - clear_pstate_bits(PSTATE_DIT); |
130 | + | 104 | - } |
131 | +static void gen_uclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n, | 105 | - /* There's no need to rebuild hflags because DIT is a nop */ |
132 | + TCGv_vec m, TCGv_vec a) | 106 | - break; |
133 | +{ | 107 | - |
134 | + tcg_gen_umax_vec(vece, d, a, n); | 108 | - case 0x1e: /* DAIFSet */ |
135 | + tcg_gen_umin_vec(vece, d, d, m); | 109 | - gen_helper_msr_i_daifset(cpu_env, tcg_constant_i32(crm)); |
136 | +} | 110 | - break; |
137 | + | 111 | - |
138 | +static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, | 112 | - case 0x1f: /* DAIFClear */ |
139 | + uint32_t a, uint32_t oprsz, uint32_t maxsz) | 113 | - gen_helper_msr_i_daifclear(cpu_env, tcg_constant_i32(crm)); |
140 | +{ | 114 | - /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs. */ |
141 | + static const TCGOpcode vecop[] = { | 115 | - s->base.is_jmp = DISAS_UPDATE_EXIT; |
142 | + INDEX_op_umin_vec, INDEX_op_umax_vec, 0 | 116 | - break; |
143 | + }; | 117 | - |
144 | + static const GVecGen4 ops[4] = { | 118 | - case 0x1c: /* TCO */ |
145 | + { .fniv = gen_uclamp_vec, | 119 | - if (dc_isar_feature(aa64_mte, s)) { |
146 | + .fno = gen_helper_gvec_uclamp_b, | 120 | - /* Full MTE is enabled -- set the TCO bit as directed. */ |
147 | + .opt_opc = vecop, | 121 | - if (crm & 1) { |
148 | + .vece = MO_8 }, | 122 | - set_pstate_bits(PSTATE_TCO); |
149 | + { .fniv = gen_uclamp_vec, | 123 | - } else { |
150 | + .fno = gen_helper_gvec_uclamp_h, | 124 | - clear_pstate_bits(PSTATE_TCO); |
151 | + .opt_opc = vecop, | 125 | - } |
152 | + .vece = MO_16 }, | 126 | - gen_rebuild_hflags(s); |
153 | + { .fni4 = gen_uclamp_i32, | 127 | - /* Many factors, including TCO, go into MTE_ACTIVE. */ |
154 | + .fniv = gen_uclamp_vec, | 128 | - s->base.is_jmp = DISAS_UPDATE_NOCHAIN; |
155 | + .fno = gen_helper_gvec_uclamp_s, | 129 | - } else if (dc_isar_feature(aa64_mte_insn_reg, s)) { |
156 | + .opt_opc = vecop, | 130 | - /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */ |
157 | + .vece = MO_32 }, | 131 | - s->base.is_jmp = DISAS_NEXT; |
158 | + { .fni8 = gen_uclamp_i64, | 132 | - } else { |
159 | + .fniv = gen_uclamp_vec, | 133 | - goto do_unallocated; |
160 | + .fno = gen_helper_gvec_uclamp_d, | 134 | - } |
161 | + .opt_opc = vecop, | 135 | - break; |
162 | + .vece = MO_64, | 136 | - |
163 | + .prefer_i64 = TCG_TARGET_REG_BITS == 64 } | 137 | - case 0x1b: /* SVCR* */ |
164 | + }; | 138 | - if (!dc_isar_feature(aa64_sme, s) || crm < 2 || crm > 7) { |
165 | + tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); | 139 | - goto do_unallocated; |
166 | +} | 140 | - } |
167 | + | 141 | - if (sme_access_check(s)) { |
168 | +TRANS_FEAT(UCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_uclamp, a) | 142 | - int old = s->pstate_sm | (s->pstate_za << 1); |
169 | diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c | 143 | - int new = (crm & 1) * 3; |
170 | index XXXXXXX..XXXXXXX 100644 | 144 | - int msk = (crm >> 1) & 3; |
171 | --- a/target/arm/vec_helper.c | 145 | - |
172 | +++ b/target/arm/vec_helper.c | 146 | - if ((old ^ new) & msk) { |
173 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_bfmlal_idx)(void *vd, void *vn, void *vm, | 147 | - /* At least one bit changes. */ |
148 | - gen_helper_set_svcr(cpu_env, tcg_constant_i32(new), | ||
149 | - tcg_constant_i32(msk)); | ||
150 | - } else { | ||
151 | - s->base.is_jmp = DISAS_NEXT; | ||
152 | - } | ||
153 | - } | ||
154 | - break; | ||
155 | - | ||
156 | - default: | ||
157 | - do_unallocated: | ||
158 | - unallocated_encoding(s); | ||
159 | - return; | ||
160 | + if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) { | ||
161 | + return false; | ||
174 | } | 162 | } |
175 | clear_tail(d, opr_sz, simd_maxsz(desc)); | 163 | + if (a->imm & 1) { |
164 | + set_pstate_bits(PSTATE_UAO); | ||
165 | + } else { | ||
166 | + clear_pstate_bits(PSTATE_UAO); | ||
167 | + } | ||
168 | + gen_rebuild_hflags(s); | ||
169 | + s->base.is_jmp = DISAS_TOO_MANY; | ||
170 | + return true; | ||
171 | +} | ||
172 | + | ||
173 | +static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a) | ||
174 | +{ | ||
175 | + if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) { | ||
176 | + return false; | ||
177 | + } | ||
178 | + if (a->imm & 1) { | ||
179 | + set_pstate_bits(PSTATE_PAN); | ||
180 | + } else { | ||
181 | + clear_pstate_bits(PSTATE_PAN); | ||
182 | + } | ||
183 | + gen_rebuild_hflags(s); | ||
184 | + s->base.is_jmp = DISAS_TOO_MANY; | ||
185 | + return true; | ||
186 | +} | ||
187 | + | ||
188 | +static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a) | ||
189 | +{ | ||
190 | + if (s->current_el == 0) { | ||
191 | + return false; | ||
192 | + } | ||
193 | + gen_helper_msr_i_spsel(cpu_env, tcg_constant_i32(a->imm & PSTATE_SP)); | ||
194 | + s->base.is_jmp = DISAS_TOO_MANY; | ||
195 | + return true; | ||
196 | +} | ||
197 | + | ||
198 | +static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a) | ||
199 | +{ | ||
200 | + if (!dc_isar_feature(aa64_ssbs, s)) { | ||
201 | + return false; | ||
202 | + } | ||
203 | + if (a->imm & 1) { | ||
204 | + set_pstate_bits(PSTATE_SSBS); | ||
205 | + } else { | ||
206 | + clear_pstate_bits(PSTATE_SSBS); | ||
207 | + } | ||
208 | + /* Don't need to rebuild hflags since SSBS is a nop */ | ||
209 | + s->base.is_jmp = DISAS_TOO_MANY; | ||
210 | + return true; | ||
211 | +} | ||
212 | + | ||
213 | +static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a) | ||
214 | +{ | ||
215 | + if (!dc_isar_feature(aa64_dit, s)) { | ||
216 | + return false; | ||
217 | + } | ||
218 | + if (a->imm & 1) { | ||
219 | + set_pstate_bits(PSTATE_DIT); | ||
220 | + } else { | ||
221 | + clear_pstate_bits(PSTATE_DIT); | ||
222 | + } | ||
223 | + /* There's no need to rebuild hflags because DIT is a nop */ | ||
224 | + s->base.is_jmp = DISAS_TOO_MANY; | ||
225 | + return true; | ||
226 | +} | ||
227 | + | ||
228 | +static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a) | ||
229 | +{ | ||
230 | + if (dc_isar_feature(aa64_mte, s)) { | ||
231 | + /* Full MTE is enabled -- set the TCO bit as directed. */ | ||
232 | + if (a->imm & 1) { | ||
233 | + set_pstate_bits(PSTATE_TCO); | ||
234 | + } else { | ||
235 | + clear_pstate_bits(PSTATE_TCO); | ||
236 | + } | ||
237 | + gen_rebuild_hflags(s); | ||
238 | + /* Many factors, including TCO, go into MTE_ACTIVE. */ | ||
239 | + s->base.is_jmp = DISAS_UPDATE_NOCHAIN; | ||
240 | + return true; | ||
241 | + } else if (dc_isar_feature(aa64_mte_insn_reg, s)) { | ||
242 | + /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */ | ||
243 | + return true; | ||
244 | + } else { | ||
245 | + /* Insn not present */ | ||
246 | + return false; | ||
247 | + } | ||
248 | +} | ||
249 | + | ||
250 | +static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a) | ||
251 | +{ | ||
252 | + gen_helper_msr_i_daifset(cpu_env, tcg_constant_i32(a->imm)); | ||
253 | + s->base.is_jmp = DISAS_TOO_MANY; | ||
254 | + return true; | ||
255 | +} | ||
256 | + | ||
257 | +static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a) | ||
258 | +{ | ||
259 | + gen_helper_msr_i_daifclear(cpu_env, tcg_constant_i32(a->imm)); | ||
260 | + /* Exit the cpu loop to re-evaluate pending IRQs. */ | ||
261 | + s->base.is_jmp = DISAS_UPDATE_EXIT; | ||
262 | + return true; | ||
263 | +} | ||
264 | + | ||
265 | +static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a) | ||
266 | +{ | ||
267 | + if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) { | ||
268 | + return false; | ||
269 | + } | ||
270 | + if (sme_access_check(s)) { | ||
271 | + int old = s->pstate_sm | (s->pstate_za << 1); | ||
272 | + int new = a->imm * 3; | ||
273 | + | ||
274 | + if ((old ^ new) & a->mask) { | ||
275 | + /* At least one bit changes. */ | ||
276 | + gen_helper_set_svcr(cpu_env, tcg_constant_i32(new), | ||
277 | + tcg_constant_i32(a->mask)); | ||
278 | + s->base.is_jmp = DISAS_TOO_MANY; | ||
279 | + } | ||
280 | + } | ||
281 | + return true; | ||
176 | } | 282 | } |
177 | + | 283 | |
178 | +#define DO_CLAMP(NAME, TYPE) \ | 284 | static void gen_get_nzcv(TCGv_i64 tcg_rt) |
179 | +void HELPER(NAME)(void *d, void *n, void *m, void *a, uint32_t desc) \ | 285 | @@ -XXX,XX +XXX,XX @@ static void disas_system(DisasContext *s, uint32_t insn) |
180 | +{ \ | 286 | rt = extract32(insn, 0, 5); |
181 | + intptr_t i, opr_sz = simd_oprsz(desc); \ | 287 | |
182 | + for (i = 0; i < opr_sz; i += sizeof(TYPE)) { \ | 288 | if (op0 == 0) { |
183 | + TYPE aa = *(TYPE *)(a + i); \ | 289 | - if (l || rt != 31) { |
184 | + TYPE nn = *(TYPE *)(n + i); \ | 290 | - unallocated_encoding(s); |
185 | + TYPE mm = *(TYPE *)(m + i); \ | 291 | - return; |
186 | + TYPE dd = MIN(MAX(aa, nn), mm); \ | 292 | - } |
187 | + *(TYPE *)(d + i) = dd; \ | 293 | - switch (crn) { |
188 | + } \ | 294 | - case 4: /* MSR (immediate) */ |
189 | + clear_tail(d, opr_sz, simd_maxsz(desc)); \ | 295 | - handle_msr_i(s, insn, op1, op2, crm); |
190 | +} | 296 | - break; |
191 | + | 297 | - default: |
192 | +DO_CLAMP(gvec_sclamp_b, int8_t) | 298 | - unallocated_encoding(s); |
193 | +DO_CLAMP(gvec_sclamp_h, int16_t) | 299 | - break; |
194 | +DO_CLAMP(gvec_sclamp_s, int32_t) | 300 | - } |
195 | +DO_CLAMP(gvec_sclamp_d, int64_t) | 301 | + unallocated_encoding(s); |
196 | + | 302 | return; |
197 | +DO_CLAMP(gvec_uclamp_b, uint8_t) | 303 | } |
198 | +DO_CLAMP(gvec_uclamp_h, uint16_t) | 304 | handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt); |
199 | +DO_CLAMP(gvec_uclamp_s, uint32_t) | ||
200 | +DO_CLAMP(gvec_uclamp_d, uint64_t) | ||
201 | -- | 305 | -- |
202 | 2.25.1 | 306 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Convert MSR (reg), MRS, SYS, SYSL to decodetree. For QEMU these are |
---|---|---|---|
2 | all essentially the same instruction (system register access). | ||
2 | 3 | ||
3 | This is SMOPA, SUMOPA, USMOPA_s, UMOPA, for both Int8 and Int16. | 4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Message-id: 20230602155223.2040685-7-peter.maydell@linaro.org | ||
7 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
8 | --- | ||
9 | target/arm/tcg/a64.decode | 8 ++++++++ | ||
10 | target/arm/tcg/translate-a64.c | 32 +++++--------------------------- | ||
11 | 2 files changed, 13 insertions(+), 27 deletions(-) | ||
4 | 12 | ||
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 13 | diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode |
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Message-id: 20220708151540.18136-28-richard.henderson@linaro.org | ||
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
9 | --- | ||
10 | target/arm/helper-sme.h | 16 ++++++++ | ||
11 | target/arm/sme.decode | 10 +++++ | ||
12 | target/arm/sme_helper.c | 82 ++++++++++++++++++++++++++++++++++++++ | ||
13 | target/arm/translate-sme.c | 10 +++++ | ||
14 | 4 files changed, 118 insertions(+) | ||
15 | |||
16 | diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h | ||
17 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/target/arm/helper-sme.h | 15 | --- a/target/arm/tcg/a64.decode |
19 | +++ b/target/arm/helper-sme.h | 16 | +++ b/target/arm/tcg/a64.decode |
20 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG, | 17 | @@ -XXX,XX +XXX,XX @@ MSR_i_TCO 1101 0101 0000 0 011 0100 .... 100 11111 @msr_i |
21 | void, ptr, ptr, ptr, ptr, ptr, ptr, i32) | 18 | MSR_i_DAIFSET 1101 0101 0000 0 011 0100 .... 110 11111 @msr_i |
22 | DEF_HELPER_FLAGS_6(sme_bfmopa, TCG_CALL_NO_RWG, | 19 | MSR_i_DAIFCLEAR 1101 0101 0000 0 011 0100 .... 111 11111 @msr_i |
23 | void, ptr, ptr, ptr, ptr, ptr, i32) | 20 | MSR_i_SVCR 1101 0101 0000 0 011 0100 0 mask:2 imm:1 011 11111 |
24 | +DEF_HELPER_FLAGS_6(sme_smopa_s, TCG_CALL_NO_RWG, | 21 | + |
25 | + void, ptr, ptr, ptr, ptr, ptr, i32) | 22 | +# MRS, MSR (register), SYS, SYSL. These are all essentially the |
26 | +DEF_HELPER_FLAGS_6(sme_umopa_s, TCG_CALL_NO_RWG, | 23 | +# same instruction as far as QEMU is concerned. |
27 | + void, ptr, ptr, ptr, ptr, ptr, i32) | 24 | +# NB: op0 is bits [20:19], but op0=0b00 is other insns, so we have |
28 | +DEF_HELPER_FLAGS_6(sme_sumopa_s, TCG_CALL_NO_RWG, | 25 | +# to hand-decode it. |
29 | + void, ptr, ptr, ptr, ptr, ptr, i32) | 26 | +SYS 1101 0101 00 l:1 01 op1:3 crn:4 crm:4 op2:3 rt:5 op0=1 |
30 | +DEF_HELPER_FLAGS_6(sme_usmopa_s, TCG_CALL_NO_RWG, | 27 | +SYS 1101 0101 00 l:1 10 op1:3 crn:4 crm:4 op2:3 rt:5 op0=2 |
31 | + void, ptr, ptr, ptr, ptr, ptr, i32) | 28 | +SYS 1101 0101 00 l:1 11 op1:3 crn:4 crm:4 op2:3 rt:5 op0=3 |
32 | +DEF_HELPER_FLAGS_6(sme_smopa_d, TCG_CALL_NO_RWG, | 29 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
33 | + void, ptr, ptr, ptr, ptr, ptr, i32) | ||
34 | +DEF_HELPER_FLAGS_6(sme_umopa_d, TCG_CALL_NO_RWG, | ||
35 | + void, ptr, ptr, ptr, ptr, ptr, i32) | ||
36 | +DEF_HELPER_FLAGS_6(sme_sumopa_d, TCG_CALL_NO_RWG, | ||
37 | + void, ptr, ptr, ptr, ptr, ptr, i32) | ||
38 | +DEF_HELPER_FLAGS_6(sme_usmopa_d, TCG_CALL_NO_RWG, | ||
39 | + void, ptr, ptr, ptr, ptr, ptr, i32) | ||
40 | diff --git a/target/arm/sme.decode b/target/arm/sme.decode | ||
41 | index XXXXXXX..XXXXXXX 100644 | 30 | index XXXXXXX..XXXXXXX 100644 |
42 | --- a/target/arm/sme.decode | 31 | --- a/target/arm/tcg/translate-a64.c |
43 | +++ b/target/arm/sme.decode | 32 | +++ b/target/arm/tcg/translate-a64.c |
44 | @@ -XXX,XX +XXX,XX @@ FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64 | 33 | @@ -XXX,XX +XXX,XX @@ static void gen_sysreg_undef(DisasContext *s, bool isread, |
45 | 34 | * These are all essentially the same insn in 'read' and 'write' | |
46 | BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32 | 35 | * versions, with varying op0 fields. |
47 | FMOPA_h 10000001 101 ..... ... ... ..... . 00 .. @op_32 | 36 | */ |
48 | + | 37 | -static void handle_sys(DisasContext *s, uint32_t insn, bool isread, |
49 | +SMOPA_s 1010000 0 10 0 ..... ... ... ..... . 00 .. @op_32 | 38 | +static void handle_sys(DisasContext *s, bool isread, |
50 | +SUMOPA_s 1010000 0 10 1 ..... ... ... ..... . 00 .. @op_32 | 39 | unsigned int op0, unsigned int op1, unsigned int op2, |
51 | +USMOPA_s 1010000 1 10 0 ..... ... ... ..... . 00 .. @op_32 | 40 | unsigned int crn, unsigned int crm, unsigned int rt) |
52 | +UMOPA_s 1010000 1 10 1 ..... ... ... ..... . 00 .. @op_32 | 41 | { |
53 | + | 42 | @@ -XXX,XX +XXX,XX @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, |
54 | +SMOPA_d 1010000 0 11 0 ..... ... ... ..... . 0 ... @op_64 | ||
55 | +SUMOPA_d 1010000 0 11 1 ..... ... ... ..... . 0 ... @op_64 | ||
56 | +USMOPA_d 1010000 1 11 0 ..... ... ... ..... . 0 ... @op_64 | ||
57 | +UMOPA_d 1010000 1 11 1 ..... ... ... ..... . 0 ... @op_64 | ||
58 | diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c | ||
59 | index XXXXXXX..XXXXXXX 100644 | ||
60 | --- a/target/arm/sme_helper.c | ||
61 | +++ b/target/arm/sme_helper.c | ||
62 | @@ -XXX,XX +XXX,XX @@ void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn, | ||
63 | } while (row & 15); | ||
64 | } | 43 | } |
65 | } | 44 | } |
66 | + | 45 | |
67 | +typedef uint64_t IMOPFn(uint64_t, uint64_t, uint64_t, uint8_t, bool); | 46 | -/* System |
68 | + | 47 | - * 31 22 21 20 19 18 16 15 12 11 8 7 5 4 0 |
69 | +static inline void do_imopa(uint64_t *za, uint64_t *zn, uint64_t *zm, | 48 | - * +---------------------+---+-----+-----+-------+-------+-----+------+ |
70 | + uint8_t *pn, uint8_t *pm, | 49 | - * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 | CRn | CRm | op2 | Rt | |
71 | + uint32_t desc, IMOPFn *fn) | 50 | - * +---------------------+---+-----+-----+-------+-------+-----+------+ |
72 | +{ | 51 | - */ |
73 | + intptr_t row, col, oprsz = simd_oprsz(desc) / 8; | 52 | -static void disas_system(DisasContext *s, uint32_t insn) |
74 | + bool neg = simd_data(desc); | 53 | +static bool trans_SYS(DisasContext *s, arg_SYS *a) |
75 | + | 54 | { |
76 | + for (row = 0; row < oprsz; ++row) { | 55 | - unsigned int l, op0, op1, crn, crm, op2, rt; |
77 | + uint8_t pa = pn[H1(row)]; | 56 | - l = extract32(insn, 21, 1); |
78 | + uint64_t *za_row = &za[tile_vslice_index(row)]; | 57 | - op0 = extract32(insn, 19, 2); |
79 | + uint64_t n = zn[row]; | 58 | - op1 = extract32(insn, 16, 3); |
80 | + | 59 | - crn = extract32(insn, 12, 4); |
81 | + for (col = 0; col < oprsz; ++col) { | 60 | - crm = extract32(insn, 8, 4); |
82 | + uint8_t pb = pm[H1(col)]; | 61 | - op2 = extract32(insn, 5, 3); |
83 | + uint64_t *a = &za_row[col]; | 62 | - rt = extract32(insn, 0, 5); |
84 | + | 63 | - |
85 | + *a = fn(n, zm[col], *a, pa & pb, neg); | 64 | - if (op0 == 0) { |
86 | + } | 65 | - unallocated_encoding(s); |
87 | + } | 66 | - return; |
88 | +} | 67 | - } |
89 | + | 68 | - handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt); |
90 | +#define DEF_IMOP_32(NAME, NTYPE, MTYPE) \ | 69 | + handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt); |
91 | +static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \ | 70 | + return true; |
92 | +{ \ | 71 | } |
93 | + uint32_t sum0 = 0, sum1 = 0; \ | 72 | |
94 | + /* Apply P to N as a mask, making the inactive elements 0. */ \ | 73 | /* Exception generation |
95 | + n &= expand_pred_b(p); \ | 74 | @@ -XXX,XX +XXX,XX @@ static void disas_b_exc_sys(DisasContext *s, uint32_t insn) |
96 | + sum0 += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); \ | 75 | switch (extract32(insn, 25, 7)) { |
97 | + sum0 += (NTYPE)(n >> 8) * (MTYPE)(m >> 8); \ | 76 | case 0x6a: /* Exception generation / System */ |
98 | + sum0 += (NTYPE)(n >> 16) * (MTYPE)(m >> 16); \ | 77 | if (insn & (1 << 24)) { |
99 | + sum0 += (NTYPE)(n >> 24) * (MTYPE)(m >> 24); \ | 78 | - if (extract32(insn, 22, 2) == 0) { |
100 | + sum1 += (NTYPE)(n >> 32) * (MTYPE)(m >> 32); \ | 79 | - disas_system(s, insn); |
101 | + sum1 += (NTYPE)(n >> 40) * (MTYPE)(m >> 40); \ | 80 | - } else { |
102 | + sum1 += (NTYPE)(n >> 48) * (MTYPE)(m >> 48); \ | 81 | - unallocated_encoding(s); |
103 | + sum1 += (NTYPE)(n >> 56) * (MTYPE)(m >> 56); \ | 82 | - } |
104 | + if (neg) { \ | 83 | + unallocated_encoding(s); |
105 | + sum0 = (uint32_t)a - sum0, sum1 = (uint32_t)(a >> 32) - sum1; \ | 84 | } else { |
106 | + } else { \ | 85 | disas_exc(s, insn); |
107 | + sum0 = (uint32_t)a + sum0, sum1 = (uint32_t)(a >> 32) + sum1; \ | 86 | } |
108 | + } \ | ||
109 | + return ((uint64_t)sum1 << 32) | sum0; \ | ||
110 | +} | ||
111 | + | ||
112 | +#define DEF_IMOP_64(NAME, NTYPE, MTYPE) \ | ||
113 | +static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \ | ||
114 | +{ \ | ||
115 | + uint64_t sum = 0; \ | ||
116 | + /* Apply P to N as a mask, making the inactive elements 0. */ \ | ||
117 | + n &= expand_pred_h(p); \ | ||
118 | + sum += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); \ | ||
119 | + sum += (NTYPE)(n >> 16) * (MTYPE)(m >> 16); \ | ||
120 | + sum += (NTYPE)(n >> 32) * (MTYPE)(m >> 32); \ | ||
121 | + sum += (NTYPE)(n >> 48) * (MTYPE)(m >> 48); \ | ||
122 | + return neg ? a - sum : a + sum; \ | ||
123 | +} | ||
124 | + | ||
125 | +DEF_IMOP_32(smopa_s, int8_t, int8_t) | ||
126 | +DEF_IMOP_32(umopa_s, uint8_t, uint8_t) | ||
127 | +DEF_IMOP_32(sumopa_s, int8_t, uint8_t) | ||
128 | +DEF_IMOP_32(usmopa_s, uint8_t, int8_t) | ||
129 | + | ||
130 | +DEF_IMOP_64(smopa_d, int16_t, int16_t) | ||
131 | +DEF_IMOP_64(umopa_d, uint16_t, uint16_t) | ||
132 | +DEF_IMOP_64(sumopa_d, int16_t, uint16_t) | ||
133 | +DEF_IMOP_64(usmopa_d, uint16_t, int16_t) | ||
134 | + | ||
135 | +#define DEF_IMOPH(NAME) \ | ||
136 | + void HELPER(sme_##NAME)(void *vza, void *vzn, void *vzm, void *vpn, \ | ||
137 | + void *vpm, uint32_t desc) \ | ||
138 | + { do_imopa(vza, vzn, vzm, vpn, vpm, desc, NAME); } | ||
139 | + | ||
140 | +DEF_IMOPH(smopa_s) | ||
141 | +DEF_IMOPH(umopa_s) | ||
142 | +DEF_IMOPH(sumopa_s) | ||
143 | +DEF_IMOPH(usmopa_s) | ||
144 | +DEF_IMOPH(smopa_d) | ||
145 | +DEF_IMOPH(umopa_d) | ||
146 | +DEF_IMOPH(sumopa_d) | ||
147 | +DEF_IMOPH(usmopa_d) | ||
148 | diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c | ||
149 | index XXXXXXX..XXXXXXX 100644 | ||
150 | --- a/target/arm/translate-sme.c | ||
151 | +++ b/target/arm/translate-sme.c | ||
152 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_f | ||
153 | |||
154 | /* TODO: FEAT_EBF16 */ | ||
155 | TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa) | ||
156 | + | ||
157 | +TRANS_FEAT(SMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_smopa_s) | ||
158 | +TRANS_FEAT(UMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_umopa_s) | ||
159 | +TRANS_FEAT(SUMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_sumopa_s) | ||
160 | +TRANS_FEAT(USMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_usmopa_s) | ||
161 | + | ||
162 | +TRANS_FEAT(SMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_smopa_d) | ||
163 | +TRANS_FEAT(UMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_umopa_d) | ||
164 | +TRANS_FEAT(SUMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_sumopa_d) | ||
165 | +TRANS_FEAT(USMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_usmopa_d) | ||
166 | -- | 87 | -- |
167 | 2.25.1 | 88 | 2.34.1 |
89 | |||
90 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Convert the exception generation instructions SVC, HVC, SMC, BRK and |
---|---|---|---|
2 | HLT to decodetree. | ||
2 | 3 | ||
3 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 4 | The old decoder decoded the halting-debug insnns DCPS1, DCPS2 and |
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | DCPS3 just in order to then make them UNDEF; as with DRPS, we don't |
5 | Message-id: 20220708151540.18136-24-richard.henderson@linaro.org | 6 | bother to decode them, but document the patterns in a64.decode. |
7 | |||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | Message-id: 20230602155223.2040685-8-peter.maydell@linaro.org | ||
7 | --- | 11 | --- |
8 | target/arm/helper-sme.h | 5 +++ | 12 | target/arm/tcg/a64.decode | 15 +++ |
9 | target/arm/sme.decode | 11 +++++ | 13 | target/arm/tcg/translate-a64.c | 173 ++++++++++++--------------------- |
10 | target/arm/sme_helper.c | 90 ++++++++++++++++++++++++++++++++++++++ | 14 | 2 files changed, 79 insertions(+), 109 deletions(-) |
11 | target/arm/translate-sme.c | 31 +++++++++++++ | ||
12 | 4 files changed, 137 insertions(+) | ||
13 | 15 | ||
14 | diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h | 16 | diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode |
15 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/target/arm/helper-sme.h | 18 | --- a/target/arm/tcg/a64.decode |
17 | +++ b/target/arm/helper-sme.h | 19 | +++ b/target/arm/tcg/a64.decode |
18 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sme_st1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i | 20 | @@ -XXX,XX +XXX,XX @@ MSR_i_SVCR 1101 0101 0000 0 011 0100 0 mask:2 imm:1 011 11111 |
19 | DEF_HELPER_FLAGS_5(sme_st1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | 21 | SYS 1101 0101 00 l:1 01 op1:3 crn:4 crm:4 op2:3 rt:5 op0=1 |
20 | DEF_HELPER_FLAGS_5(sme_st1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | 22 | SYS 1101 0101 00 l:1 10 op1:3 crn:4 crm:4 op2:3 rt:5 op0=2 |
21 | DEF_HELPER_FLAGS_5(sme_st1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | 23 | SYS 1101 0101 00 l:1 11 op1:3 crn:4 crm:4 op2:3 rt:5 op0=3 |
22 | + | 24 | + |
23 | +DEF_HELPER_FLAGS_5(sme_addha_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) | 25 | +# Exception generation |
24 | +DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) | 26 | + |
25 | +DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) | 27 | +@i16 .... .... ... imm:16 ... .. &i |
26 | +DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) | 28 | +SVC 1101 0100 000 ................ 000 01 @i16 |
27 | diff --git a/target/arm/sme.decode b/target/arm/sme.decode | 29 | +HVC 1101 0100 000 ................ 000 10 @i16 |
30 | +SMC 1101 0100 000 ................ 000 11 @i16 | ||
31 | +BRK 1101 0100 001 ................ 000 00 @i16 | ||
32 | +HLT 1101 0100 010 ................ 000 00 @i16 | ||
33 | +# These insns always UNDEF unless in halting debug state, which | ||
34 | +# we don't implement. So we don't need to decode them. The patterns | ||
35 | +# are listed here as documentation. | ||
36 | +# DCPS1 1101 0100 101 ................ 000 01 @i16 | ||
37 | +# DCPS2 1101 0100 101 ................ 000 10 @i16 | ||
38 | +# DCPS3 1101 0100 101 ................ 000 11 @i16 | ||
39 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | 40 | index XXXXXXX..XXXXXXX 100644 |
29 | --- a/target/arm/sme.decode | 41 | --- a/target/arm/tcg/translate-a64.c |
30 | +++ b/target/arm/sme.decode | 42 | +++ b/target/arm/tcg/translate-a64.c |
31 | @@ -XXX,XX +XXX,XX @@ LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \ | 43 | @@ -XXX,XX +XXX,XX @@ static bool trans_SYS(DisasContext *s, arg_SYS *a) |
32 | 44 | return true; | |
33 | LDR 1110000 100 0 000000 .. 000 ..... 0 .... @ldstr | 45 | } |
34 | STR 1110000 100 1 000000 .. 000 ..... 0 .... @ldstr | 46 | |
35 | + | 47 | -/* Exception generation |
36 | +### SME Add Vector to Array | 48 | - * |
37 | + | 49 | - * 31 24 23 21 20 5 4 2 1 0 |
38 | +&adda zad zn pm pn | 50 | - * +-----------------+-----+------------------------+-----+----+ |
39 | +@adda_32 ........ .. ..... . pm:3 pn:3 zn:5 ... zad:2 &adda | 51 | - * | 1 1 0 1 0 1 0 0 | opc | imm16 | op2 | LL | |
40 | +@adda_64 ........ .. ..... . pm:3 pn:3 zn:5 .. zad:3 &adda | 52 | - * +-----------------------+------------------------+----------+ |
41 | + | 53 | - */ |
42 | +ADDHA_s 11000000 10 01000 0 ... ... ..... 000 .. @adda_32 | 54 | -static void disas_exc(DisasContext *s, uint32_t insn) |
43 | +ADDVA_s 11000000 10 01000 1 ... ... ..... 000 .. @adda_32 | 55 | +static bool trans_SVC(DisasContext *s, arg_i *a) |
44 | +ADDHA_d 11000000 11 01000 0 ... ... ..... 00 ... @adda_64 | 56 | { |
45 | +ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64 | 57 | - int opc = extract32(insn, 21, 3); |
46 | diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c | 58 | - int op2_ll = extract32(insn, 0, 5); |
47 | index XXXXXXX..XXXXXXX 100644 | 59 | - int imm16 = extract32(insn, 5, 16); |
48 | --- a/target/arm/sme_helper.c | 60 | - uint32_t syndrome; |
49 | +++ b/target/arm/sme_helper.c | 61 | - |
50 | @@ -XXX,XX +XXX,XX @@ DO_ST(q, _be, MO_128) | 62 | - switch (opc) { |
51 | DO_ST(q, _le, MO_128) | 63 | - case 0: |
52 | 64 | - /* For SVC, HVC and SMC we advance the single-step state | |
53 | #undef DO_ST | 65 | - * machine before taking the exception. This is architecturally |
54 | + | 66 | - * mandated, to ensure that single-stepping a system call |
55 | +void HELPER(sme_addha_s)(void *vzda, void *vzn, void *vpn, | 67 | - * instruction works properly. |
56 | + void *vpm, uint32_t desc) | 68 | - */ |
69 | - switch (op2_ll) { | ||
70 | - case 1: /* SVC */ | ||
71 | - syndrome = syn_aa64_svc(imm16); | ||
72 | - if (s->fgt_svc) { | ||
73 | - gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); | ||
74 | - break; | ||
75 | - } | ||
76 | - gen_ss_advance(s); | ||
77 | - gen_exception_insn(s, 4, EXCP_SWI, syndrome); | ||
78 | - break; | ||
79 | - case 2: /* HVC */ | ||
80 | - if (s->current_el == 0) { | ||
81 | - unallocated_encoding(s); | ||
82 | - break; | ||
83 | - } | ||
84 | - /* The pre HVC helper handles cases when HVC gets trapped | ||
85 | - * as an undefined insn by runtime configuration. | ||
86 | - */ | ||
87 | - gen_a64_update_pc(s, 0); | ||
88 | - gen_helper_pre_hvc(cpu_env); | ||
89 | - gen_ss_advance(s); | ||
90 | - gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(imm16), 2); | ||
91 | - break; | ||
92 | - case 3: /* SMC */ | ||
93 | - if (s->current_el == 0) { | ||
94 | - unallocated_encoding(s); | ||
95 | - break; | ||
96 | - } | ||
97 | - gen_a64_update_pc(s, 0); | ||
98 | - gen_helper_pre_smc(cpu_env, tcg_constant_i32(syn_aa64_smc(imm16))); | ||
99 | - gen_ss_advance(s); | ||
100 | - gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(imm16), 3); | ||
101 | - break; | ||
102 | - default: | ||
103 | - unallocated_encoding(s); | ||
104 | - break; | ||
105 | - } | ||
106 | - break; | ||
107 | - case 1: | ||
108 | - if (op2_ll != 0) { | ||
109 | - unallocated_encoding(s); | ||
110 | - break; | ||
111 | - } | ||
112 | - /* BRK */ | ||
113 | - gen_exception_bkpt_insn(s, syn_aa64_bkpt(imm16)); | ||
114 | - break; | ||
115 | - case 2: | ||
116 | - if (op2_ll != 0) { | ||
117 | - unallocated_encoding(s); | ||
118 | - break; | ||
119 | - } | ||
120 | - /* HLT. This has two purposes. | ||
121 | - * Architecturally, it is an external halting debug instruction. | ||
122 | - * Since QEMU doesn't implement external debug, we treat this as | ||
123 | - * it is required for halting debug disabled: it will UNDEF. | ||
124 | - * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction. | ||
125 | - */ | ||
126 | - if (semihosting_enabled(s->current_el == 0) && imm16 == 0xf000) { | ||
127 | - gen_exception_internal_insn(s, EXCP_SEMIHOST); | ||
128 | - } else { | ||
129 | - unallocated_encoding(s); | ||
130 | - } | ||
131 | - break; | ||
132 | - case 5: | ||
133 | - if (op2_ll < 1 || op2_ll > 3) { | ||
134 | - unallocated_encoding(s); | ||
135 | - break; | ||
136 | - } | ||
137 | - /* DCPS1, DCPS2, DCPS3 */ | ||
138 | - unallocated_encoding(s); | ||
139 | - break; | ||
140 | - default: | ||
141 | - unallocated_encoding(s); | ||
142 | - break; | ||
143 | + /* | ||
144 | + * For SVC, HVC and SMC we advance the single-step state | ||
145 | + * machine before taking the exception. This is architecturally | ||
146 | + * mandated, to ensure that single-stepping a system call | ||
147 | + * instruction works properly. | ||
148 | + */ | ||
149 | + uint32_t syndrome = syn_aa64_svc(a->imm); | ||
150 | + if (s->fgt_svc) { | ||
151 | + gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); | ||
152 | + return true; | ||
153 | } | ||
154 | + gen_ss_advance(s); | ||
155 | + gen_exception_insn(s, 4, EXCP_SWI, syndrome); | ||
156 | + return true; | ||
157 | } | ||
158 | |||
159 | -/* Branches, exception generating and system instructions */ | ||
160 | -static void disas_b_exc_sys(DisasContext *s, uint32_t insn) | ||
161 | +static bool trans_HVC(DisasContext *s, arg_i *a) | ||
162 | { | ||
163 | - switch (extract32(insn, 25, 7)) { | ||
164 | - case 0x6a: /* Exception generation / System */ | ||
165 | - if (insn & (1 << 24)) { | ||
166 | - unallocated_encoding(s); | ||
167 | - } else { | ||
168 | - disas_exc(s, insn); | ||
169 | - } | ||
170 | - break; | ||
171 | - default: | ||
172 | + if (s->current_el == 0) { | ||
173 | unallocated_encoding(s); | ||
174 | - break; | ||
175 | + return true; | ||
176 | } | ||
177 | + /* | ||
178 | + * The pre HVC helper handles cases when HVC gets trapped | ||
179 | + * as an undefined insn by runtime configuration. | ||
180 | + */ | ||
181 | + gen_a64_update_pc(s, 0); | ||
182 | + gen_helper_pre_hvc(cpu_env); | ||
183 | + /* Architecture requires ss advance before we do the actual work */ | ||
184 | + gen_ss_advance(s); | ||
185 | + gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), 2); | ||
186 | + return true; | ||
187 | +} | ||
188 | + | ||
189 | +static bool trans_SMC(DisasContext *s, arg_i *a) | ||
57 | +{ | 190 | +{ |
58 | + intptr_t row, col, oprsz = simd_oprsz(desc) / 4; | 191 | + if (s->current_el == 0) { |
59 | + uint64_t *pn = vpn, *pm = vpm; | 192 | + unallocated_encoding(s); |
60 | + uint32_t *zda = vzda, *zn = vzn; | ||
61 | + | ||
62 | + for (row = 0; row < oprsz; ) { | ||
63 | + uint64_t pa = pn[row >> 4]; | ||
64 | + do { | ||
65 | + if (pa & 1) { | ||
66 | + for (col = 0; col < oprsz; ) { | ||
67 | + uint64_t pb = pm[col >> 4]; | ||
68 | + do { | ||
69 | + if (pb & 1) { | ||
70 | + zda[tile_vslice_index(row) + H4(col)] += zn[H4(col)]; | ||
71 | + } | ||
72 | + pb >>= 4; | ||
73 | + } while (++col & 15); | ||
74 | + } | ||
75 | + } | ||
76 | + pa >>= 4; | ||
77 | + } while (++row & 15); | ||
78 | + } | ||
79 | +} | ||
80 | + | ||
81 | +void HELPER(sme_addha_d)(void *vzda, void *vzn, void *vpn, | ||
82 | + void *vpm, uint32_t desc) | ||
83 | +{ | ||
84 | + intptr_t row, col, oprsz = simd_oprsz(desc) / 8; | ||
85 | + uint8_t *pn = vpn, *pm = vpm; | ||
86 | + uint64_t *zda = vzda, *zn = vzn; | ||
87 | + | ||
88 | + for (row = 0; row < oprsz; ++row) { | ||
89 | + if (pn[H1(row)] & 1) { | ||
90 | + for (col = 0; col < oprsz; ++col) { | ||
91 | + if (pm[H1(col)] & 1) { | ||
92 | + zda[tile_vslice_index(row) + col] += zn[col]; | ||
93 | + } | ||
94 | + } | ||
95 | + } | ||
96 | + } | ||
97 | +} | ||
98 | + | ||
99 | +void HELPER(sme_addva_s)(void *vzda, void *vzn, void *vpn, | ||
100 | + void *vpm, uint32_t desc) | ||
101 | +{ | ||
102 | + intptr_t row, col, oprsz = simd_oprsz(desc) / 4; | ||
103 | + uint64_t *pn = vpn, *pm = vpm; | ||
104 | + uint32_t *zda = vzda, *zn = vzn; | ||
105 | + | ||
106 | + for (row = 0; row < oprsz; ) { | ||
107 | + uint64_t pa = pn[row >> 4]; | ||
108 | + do { | ||
109 | + if (pa & 1) { | ||
110 | + uint32_t zn_row = zn[H4(row)]; | ||
111 | + for (col = 0; col < oprsz; ) { | ||
112 | + uint64_t pb = pm[col >> 4]; | ||
113 | + do { | ||
114 | + if (pb & 1) { | ||
115 | + zda[tile_vslice_index(row) + H4(col)] += zn_row; | ||
116 | + } | ||
117 | + pb >>= 4; | ||
118 | + } while (++col & 15); | ||
119 | + } | ||
120 | + } | ||
121 | + pa >>= 4; | ||
122 | + } while (++row & 15); | ||
123 | + } | ||
124 | +} | ||
125 | + | ||
126 | +void HELPER(sme_addva_d)(void *vzda, void *vzn, void *vpn, | ||
127 | + void *vpm, uint32_t desc) | ||
128 | +{ | ||
129 | + intptr_t row, col, oprsz = simd_oprsz(desc) / 8; | ||
130 | + uint8_t *pn = vpn, *pm = vpm; | ||
131 | + uint64_t *zda = vzda, *zn = vzn; | ||
132 | + | ||
133 | + for (row = 0; row < oprsz; ++row) { | ||
134 | + if (pn[H1(row)] & 1) { | ||
135 | + uint64_t zn_row = zn[row]; | ||
136 | + for (col = 0; col < oprsz; ++col) { | ||
137 | + if (pm[H1(col)] & 1) { | ||
138 | + zda[tile_vslice_index(row) + col] += zn_row; | ||
139 | + } | ||
140 | + } | ||
141 | + } | ||
142 | + } | ||
143 | +} | ||
144 | diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c | ||
145 | index XXXXXXX..XXXXXXX 100644 | ||
146 | --- a/target/arm/translate-sme.c | ||
147 | +++ b/target/arm/translate-sme.c | ||
148 | @@ -XXX,XX +XXX,XX @@ static bool do_ldst_r(DisasContext *s, arg_ldstr *a, GenLdStR *fn) | ||
149 | |||
150 | TRANS_FEAT(LDR, aa64_sme, do_ldst_r, a, gen_sve_ldr) | ||
151 | TRANS_FEAT(STR, aa64_sme, do_ldst_r, a, gen_sve_str) | ||
152 | + | ||
153 | +static bool do_adda(DisasContext *s, arg_adda *a, MemOp esz, | ||
154 | + gen_helper_gvec_4 *fn) | ||
155 | +{ | ||
156 | + int svl = streaming_vec_reg_size(s); | ||
157 | + uint32_t desc = simd_desc(svl, svl, 0); | ||
158 | + TCGv_ptr za, zn, pn, pm; | ||
159 | + | ||
160 | + if (!sme_smza_enabled_check(s)) { | ||
161 | + return true; | 193 | + return true; |
162 | + } | 194 | + } |
163 | + | 195 | + gen_a64_update_pc(s, 0); |
164 | + /* Sum XZR+zad to find ZAd. */ | 196 | + gen_helper_pre_smc(cpu_env, tcg_constant_i32(syn_aa64_smc(a->imm))); |
165 | + za = get_tile_rowcol(s, esz, 31, a->zad, false); | 197 | + /* Architecture requires ss advance before we do the actual work */ |
166 | + zn = vec_full_reg_ptr(s, a->zn); | 198 | + gen_ss_advance(s); |
167 | + pn = pred_full_reg_ptr(s, a->pn); | 199 | + gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3); |
168 | + pm = pred_full_reg_ptr(s, a->pm); | ||
169 | + | ||
170 | + fn(za, zn, pn, pm, tcg_constant_i32(desc)); | ||
171 | + | ||
172 | + tcg_temp_free_ptr(za); | ||
173 | + tcg_temp_free_ptr(zn); | ||
174 | + tcg_temp_free_ptr(pn); | ||
175 | + tcg_temp_free_ptr(pm); | ||
176 | + return true; | 200 | + return true; |
177 | +} | 201 | +} |
178 | + | 202 | + |
179 | +TRANS_FEAT(ADDHA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addha_s) | 203 | +static bool trans_BRK(DisasContext *s, arg_i *a) |
180 | +TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s) | 204 | +{ |
181 | +TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d) | 205 | + gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm)); |
182 | +TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d) | 206 | + return true; |
207 | +} | ||
208 | + | ||
209 | +static bool trans_HLT(DisasContext *s, arg_i *a) | ||
210 | +{ | ||
211 | + /* | ||
212 | + * HLT. This has two purposes. | ||
213 | + * Architecturally, it is an external halting debug instruction. | ||
214 | + * Since QEMU doesn't implement external debug, we treat this as | ||
215 | + * it is required for halting debug disabled: it will UNDEF. | ||
216 | + * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction. | ||
217 | + */ | ||
218 | + if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) { | ||
219 | + gen_exception_internal_insn(s, EXCP_SEMIHOST); | ||
220 | + } else { | ||
221 | + unallocated_encoding(s); | ||
222 | + } | ||
223 | + return true; | ||
224 | } | ||
225 | |||
226 | /* | ||
227 | @@ -XXX,XX +XXX,XX @@ static bool btype_destination_ok(uint32_t insn, bool bt, int btype) | ||
228 | static void disas_a64_legacy(DisasContext *s, uint32_t insn) | ||
229 | { | ||
230 | switch (extract32(insn, 25, 4)) { | ||
231 | - case 0xa: case 0xb: /* Branch, exception generation and system insns */ | ||
232 | - disas_b_exc_sys(s, insn); | ||
233 | - break; | ||
234 | case 0x4: | ||
235 | case 0x6: | ||
236 | case 0xc: | ||
183 | -- | 237 | -- |
184 | 2.25.1 | 238 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Convert the instructions in the load/store exclusive (STXR, |
---|---|---|---|
2 | 2 | STLXR, LDXR, LDAXR) and load/store ordered (STLR, STLLR, | |
3 | This is an SVE instruction that operates using the SVE vector | 3 | LDAR, LDLAR) to decodetree. |
4 | length but that it is present only if SME is implemented. | 4 | |
5 | 5 | Note that for STLR, STLLR, LDAR, LDLAR this fixes an under-decoding | |
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 6 | in the legacy decoder where we were not checking that the RES1 bits |
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 7 | in the Rs and Rt2 fields were set. |
8 | Message-id: 20220708151540.18136-29-richard.henderson@linaro.org | 8 | |
9 | The new function ldst_iss_sf() is equivalent to the existing | ||
10 | disas_ldst_compute_iss_sf(), but it takes the pre-decoded 'ext' field | ||
11 | rather than taking an undecoded two-bit opc field and extracting | ||
12 | 'ext' from it. Once all the loads and stores have been converted | ||
13 | to decodetree disas_ldst_compute_iss_sf() will be unused and | ||
14 | can be deleted. | ||
15 | |||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 16 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
17 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
18 | Message-id: 20230602155223.2040685-9-peter.maydell@linaro.org | ||
10 | --- | 19 | --- |
11 | target/arm/sve.decode | 20 +++++++++++++ | 20 | target/arm/tcg/a64.decode | 11 +++ |
12 | target/arm/translate-sve.c | 57 ++++++++++++++++++++++++++++++++++++++ | 21 | target/arm/tcg/translate-a64.c | 154 ++++++++++++++++++++------------- |
13 | 2 files changed, 77 insertions(+) | 22 | 2 files changed, 103 insertions(+), 62 deletions(-) |
14 | 23 | ||
15 | diff --git a/target/arm/sve.decode b/target/arm/sve.decode | 24 | diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode |
16 | index XXXXXXX..XXXXXXX 100644 | 25 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/arm/sve.decode | 26 | --- a/target/arm/tcg/a64.decode |
18 | +++ b/target/arm/sve.decode | 27 | +++ b/target/arm/tcg/a64.decode |
19 | @@ -XXX,XX +XXX,XX @@ BFMLALT_zzxw 01100100 11 1 ..... 0100.1 ..... ..... @rrxr_3a esz=2 | 28 | @@ -XXX,XX +XXX,XX @@ HLT 1101 0100 010 ................ 000 00 @i16 |
20 | 29 | # DCPS1 1101 0100 101 ................ 000 01 @i16 | |
21 | ### SVE2 floating-point bfloat16 dot-product (indexed) | 30 | # DCPS2 1101 0100 101 ................ 000 10 @i16 |
22 | BFDOT_zzxz 01100100 01 1 ..... 010000 ..... ..... @rrxr_2 esz=2 | 31 | # DCPS3 1101 0100 101 ................ 000 11 @i16 |
23 | + | 32 | + |
24 | +### SVE broadcast predicate element | 33 | +# Loads and stores |
25 | + | 34 | + |
26 | +&psel esz pd pn pm rv imm | 35 | +&stxr rn rt rt2 rs sz lasr |
27 | +%psel_rv 16:2 !function=plus_12 | 36 | +&stlr rn rt sz lasr |
28 | +%psel_imm_b 22:2 19:2 | 37 | +@stxr sz:2 ...... ... rs:5 lasr:1 rt2:5 rn:5 rt:5 &stxr |
29 | +%psel_imm_h 22:2 20:1 | 38 | +@stlr sz:2 ...... ... ..... lasr:1 ..... rn:5 rt:5 &stlr |
30 | +%psel_imm_s 22:2 | 39 | +STXR .. 001000 000 ..... . ..... ..... ..... @stxr # inc STLXR |
31 | +%psel_imm_d 23:1 | 40 | +LDXR .. 001000 010 ..... . ..... ..... ..... @stxr # inc LDAXR |
32 | +@psel ........ .. . ... .. .. pn:4 . pm:4 . pd:4 \ | 41 | +STLR .. 001000 100 11111 . 11111 ..... ..... @stlr # inc STLLR |
33 | + &psel rv=%psel_rv | 42 | +LDAR .. 001000 110 11111 . 11111 ..... ..... @stlr # inc LDLAR |
34 | + | 43 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
35 | +PSEL 00100101 .. 1 ..1 .. 01 .... 0 .... 0 .... \ | ||
36 | + @psel esz=0 imm=%psel_imm_b | ||
37 | +PSEL 00100101 .. 1 .10 .. 01 .... 0 .... 0 .... \ | ||
38 | + @psel esz=1 imm=%psel_imm_h | ||
39 | +PSEL 00100101 .. 1 100 .. 01 .... 0 .... 0 .... \ | ||
40 | + @psel esz=2 imm=%psel_imm_s | ||
41 | +PSEL 00100101 .1 1 000 .. 01 .... 0 .... 0 .... \ | ||
42 | + @psel esz=3 imm=%psel_imm_d | ||
43 | diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c | ||
44 | index XXXXXXX..XXXXXXX 100644 | 44 | index XXXXXXX..XXXXXXX 100644 |
45 | --- a/target/arm/translate-sve.c | 45 | --- a/target/arm/tcg/translate-a64.c |
46 | +++ b/target/arm/translate-sve.c | 46 | +++ b/target/arm/tcg/translate-a64.c |
47 | @@ -XXX,XX +XXX,XX @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) | 47 | @@ -XXX,XX +XXX,XX @@ static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc) |
48 | 48 | return regsize == 64; | |
49 | TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false) | 49 | } |
50 | TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true) | 50 | |
51 | + | 51 | +static bool ldst_iss_sf(int size, bool sign, bool ext) |
52 | +static bool trans_PSEL(DisasContext *s, arg_psel *a) | 52 | +{ |
53 | +{ | 53 | + |
54 | + int vl = vec_full_reg_size(s); | 54 | + if (sign) { |
55 | + int pl = pred_gvec_reg_size(s); | 55 | + /* |
56 | + int elements = vl >> a->esz; | 56 | + * Signed loads are 64 bit results if we are not going to |
57 | + TCGv_i64 tmp, didx, dbit; | 57 | + * do a zero-extend from 32 to 64 after the load. |
58 | + TCGv_ptr ptr; | 58 | + * (For a store, sign and ext are always false.) |
59 | + | 59 | + */ |
60 | + if (!dc_isar_feature(aa64_sme, s)) { | 60 | + return !ext; |
61 | + } else { | ||
62 | + /* Unsigned loads/stores work at the specified size */ | ||
63 | + return size == MO_64; | ||
64 | + } | ||
65 | +} | ||
66 | + | ||
67 | +static bool trans_STXR(DisasContext *s, arg_stxr *a) | ||
68 | +{ | ||
69 | + if (a->rn == 31) { | ||
70 | + gen_check_sp_alignment(s); | ||
71 | + } | ||
72 | + if (a->lasr) { | ||
73 | + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); | ||
74 | + } | ||
75 | + gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false); | ||
76 | + return true; | ||
77 | +} | ||
78 | + | ||
79 | +static bool trans_LDXR(DisasContext *s, arg_stxr *a) | ||
80 | +{ | ||
81 | + if (a->rn == 31) { | ||
82 | + gen_check_sp_alignment(s); | ||
83 | + } | ||
84 | + gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false); | ||
85 | + if (a->lasr) { | ||
86 | + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); | ||
87 | + } | ||
88 | + return true; | ||
89 | +} | ||
90 | + | ||
91 | +static bool trans_STLR(DisasContext *s, arg_stlr *a) | ||
92 | +{ | ||
93 | + TCGv_i64 clean_addr; | ||
94 | + MemOp memop; | ||
95 | + bool iss_sf = ldst_iss_sf(a->sz, false, false); | ||
96 | + | ||
97 | + /* | ||
98 | + * StoreLORelease is the same as Store-Release for QEMU, but | ||
99 | + * needs the feature-test. | ||
100 | + */ | ||
101 | + if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { | ||
61 | + return false; | 102 | + return false; |
62 | + } | 103 | + } |
63 | + if (!sve_access_check(s)) { | 104 | + /* Generate ISS for non-exclusive accesses including LASR. */ |
64 | + return true; | 105 | + if (a->rn == 31) { |
65 | + } | 106 | + gen_check_sp_alignment(s); |
66 | + | 107 | + } |
67 | + tmp = tcg_temp_new_i64(); | 108 | + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); |
68 | + dbit = tcg_temp_new_i64(); | 109 | + memop = check_ordered_align(s, a->rn, 0, true, a->sz); |
69 | + didx = tcg_temp_new_i64(); | 110 | + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), |
70 | + ptr = tcg_temp_new_ptr(); | 111 | + true, a->rn != 31, memop); |
71 | + | 112 | + do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt, |
72 | + /* Compute the predicate element. */ | 113 | + iss_sf, a->lasr); |
73 | + tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm); | 114 | + return true; |
74 | + if (is_power_of_2(elements)) { | 115 | +} |
75 | + tcg_gen_andi_i64(tmp, tmp, elements - 1); | 116 | + |
76 | + } else { | 117 | +static bool trans_LDAR(DisasContext *s, arg_stlr *a) |
77 | + tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements)); | 118 | +{ |
78 | + } | 119 | + TCGv_i64 clean_addr; |
79 | + | 120 | + MemOp memop; |
80 | + /* Extract the predicate byte and bit indices. */ | 121 | + bool iss_sf = ldst_iss_sf(a->sz, false, false); |
81 | + tcg_gen_shli_i64(tmp, tmp, a->esz); | 122 | + |
82 | + tcg_gen_andi_i64(dbit, tmp, 7); | 123 | + /* LoadLOAcquire is the same as Load-Acquire for QEMU. */ |
83 | + tcg_gen_shri_i64(didx, tmp, 3); | 124 | + if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { |
84 | + if (HOST_BIG_ENDIAN) { | 125 | + return false; |
85 | + tcg_gen_xori_i64(didx, didx, 7); | 126 | + } |
86 | + } | 127 | + /* Generate ISS for non-exclusive accesses including LASR. */ |
87 | + | 128 | + if (a->rn == 31) { |
88 | + /* Load the predicate word. */ | 129 | + gen_check_sp_alignment(s); |
89 | + tcg_gen_trunc_i64_ptr(ptr, didx); | 130 | + } |
90 | + tcg_gen_add_ptr(ptr, ptr, cpu_env); | 131 | + memop = check_ordered_align(s, a->rn, 0, false, a->sz); |
91 | + tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm)); | 132 | + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), |
92 | + | 133 | + false, a->rn != 31, memop); |
93 | + /* Extract the predicate bit and replicate to MO_64. */ | 134 | + do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true, |
94 | + tcg_gen_shr_i64(tmp, tmp, dbit); | 135 | + a->rt, iss_sf, a->lasr); |
95 | + tcg_gen_andi_i64(tmp, tmp, 1); | 136 | + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); |
96 | + tcg_gen_neg_i64(tmp, tmp); | 137 | + return true; |
97 | + | 138 | +} |
98 | + /* Apply to either copy the source, or write zeros. */ | 139 | + |
99 | + tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd), | 140 | /* Load/store exclusive |
100 | + pred_full_reg_offset(s, a->pn), tmp, pl, pl); | 141 | * |
101 | + | 142 | * 31 30 29 24 23 22 21 20 16 15 14 10 9 5 4 0 |
102 | + tcg_temp_free_i64(tmp); | 143 | @@ -XXX,XX +XXX,XX @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) |
103 | + tcg_temp_free_i64(dbit); | 144 | int is_lasr = extract32(insn, 15, 1); |
104 | + tcg_temp_free_i64(didx); | 145 | int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr; |
105 | + tcg_temp_free_ptr(ptr); | 146 | int size = extract32(insn, 30, 2); |
106 | + return true; | 147 | - TCGv_i64 clean_addr; |
107 | +} | 148 | - MemOp memop; |
149 | |||
150 | switch (o2_L_o1_o0) { | ||
151 | - case 0x0: /* STXR */ | ||
152 | - case 0x1: /* STLXR */ | ||
153 | - if (rn == 31) { | ||
154 | - gen_check_sp_alignment(s); | ||
155 | - } | ||
156 | - if (is_lasr) { | ||
157 | - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); | ||
158 | - } | ||
159 | - gen_store_exclusive(s, rs, rt, rt2, rn, size, false); | ||
160 | - return; | ||
161 | - | ||
162 | - case 0x4: /* LDXR */ | ||
163 | - case 0x5: /* LDAXR */ | ||
164 | - if (rn == 31) { | ||
165 | - gen_check_sp_alignment(s); | ||
166 | - } | ||
167 | - gen_load_exclusive(s, rt, rt2, rn, size, false); | ||
168 | - if (is_lasr) { | ||
169 | - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); | ||
170 | - } | ||
171 | - return; | ||
172 | - | ||
173 | - case 0x8: /* STLLR */ | ||
174 | - if (!dc_isar_feature(aa64_lor, s)) { | ||
175 | - break; | ||
176 | - } | ||
177 | - /* StoreLORelease is the same as Store-Release for QEMU. */ | ||
178 | - /* fall through */ | ||
179 | - case 0x9: /* STLR */ | ||
180 | - /* Generate ISS for non-exclusive accesses including LASR. */ | ||
181 | - if (rn == 31) { | ||
182 | - gen_check_sp_alignment(s); | ||
183 | - } | ||
184 | - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); | ||
185 | - memop = check_ordered_align(s, rn, 0, true, size); | ||
186 | - clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), | ||
187 | - true, rn != 31, memop); | ||
188 | - do_gpr_st(s, cpu_reg(s, rt), clean_addr, memop, true, rt, | ||
189 | - disas_ldst_compute_iss_sf(size, false, 0), is_lasr); | ||
190 | - return; | ||
191 | - | ||
192 | - case 0xc: /* LDLAR */ | ||
193 | - if (!dc_isar_feature(aa64_lor, s)) { | ||
194 | - break; | ||
195 | - } | ||
196 | - /* LoadLOAcquire is the same as Load-Acquire for QEMU. */ | ||
197 | - /* fall through */ | ||
198 | - case 0xd: /* LDAR */ | ||
199 | - /* Generate ISS for non-exclusive accesses including LASR. */ | ||
200 | - if (rn == 31) { | ||
201 | - gen_check_sp_alignment(s); | ||
202 | - } | ||
203 | - memop = check_ordered_align(s, rn, 0, false, size); | ||
204 | - clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), | ||
205 | - false, rn != 31, memop); | ||
206 | - do_gpr_ld(s, cpu_reg(s, rt), clean_addr, memop, false, true, | ||
207 | - rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr); | ||
208 | - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); | ||
209 | - return; | ||
210 | - | ||
211 | case 0x2: case 0x3: /* CASP / STXP */ | ||
212 | if (size & 2) { /* STXP / STLXP */ | ||
213 | if (rn == 31) { | ||
214 | @@ -XXX,XX +XXX,XX @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) | ||
215 | return; | ||
216 | } | ||
217 | break; | ||
218 | + default: | ||
219 | + /* Handled in decodetree */ | ||
220 | + break; | ||
221 | } | ||
222 | unallocated_encoding(s); | ||
223 | } | ||
108 | -- | 224 | -- |
109 | 2.25.1 | 225 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Convert the load/store exclusive pair (LDXP, STXP, LDAXP, STLXP), |
---|---|---|---|
2 | compare-and-swap pair (CASP, CASPA, CASPAL, CASPL), and compare-and | ||
3 | swap (CAS, CASA, CASAL, CASL) instructions to decodetree. | ||
2 | 4 | ||
3 | We cannot reuse the SVE functions for LD[1-4] and ST[1-4], | 5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
4 | because those functions accept only a Zreg register number. | 6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
5 | For SME, we want to pass a pointer into ZA storage. | 7 | Message-id: 20230602155223.2040685-10-peter.maydell@linaro.org |
8 | --- | ||
9 | target/arm/tcg/a64.decode | 11 +++ | ||
10 | target/arm/tcg/translate-a64.c | 121 ++++++++++++--------------------- | ||
11 | 2 files changed, 53 insertions(+), 79 deletions(-) | ||
6 | 12 | ||
7 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 13 | diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode |
8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Message-id: 20220708151540.18136-21-richard.henderson@linaro.org | ||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | --- | ||
12 | target/arm/helper-sme.h | 82 +++++ | ||
13 | target/arm/sme.decode | 9 + | ||
14 | target/arm/sme_helper.c | 595 +++++++++++++++++++++++++++++++++++++ | ||
15 | target/arm/translate-sme.c | 70 +++++ | ||
16 | 4 files changed, 756 insertions(+) | ||
17 | |||
18 | diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h | ||
19 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/target/arm/helper-sme.h | 15 | --- a/target/arm/tcg/a64.decode |
21 | +++ b/target/arm/helper-sme.h | 16 | +++ b/target/arm/tcg/a64.decode |
22 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sme_mova_cz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | 17 | @@ -XXX,XX +XXX,XX @@ HLT 1101 0100 010 ................ 000 00 @i16 |
23 | DEF_HELPER_FLAGS_4(sme_mova_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | 18 | &stlr rn rt sz lasr |
24 | DEF_HELPER_FLAGS_4(sme_mova_cz_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | 19 | @stxr sz:2 ...... ... rs:5 lasr:1 rt2:5 rn:5 rt:5 &stxr |
25 | DEF_HELPER_FLAGS_4(sme_mova_zc_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | 20 | @stlr sz:2 ...... ... ..... lasr:1 ..... rn:5 rt:5 &stlr |
21 | +%imm1_30_p2 30:1 !function=plus_2 | ||
22 | +@stxp .. ...... ... rs:5 lasr:1 rt2:5 rn:5 rt:5 &stxr sz=%imm1_30_p2 | ||
23 | STXR .. 001000 000 ..... . ..... ..... ..... @stxr # inc STLXR | ||
24 | LDXR .. 001000 010 ..... . ..... ..... ..... @stxr # inc LDAXR | ||
25 | STLR .. 001000 100 11111 . 11111 ..... ..... @stlr # inc STLLR | ||
26 | LDAR .. 001000 110 11111 . 11111 ..... ..... @stlr # inc LDLAR | ||
26 | + | 27 | + |
27 | +DEF_HELPER_FLAGS_5(sme_ld1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | 28 | +STXP 1 . 001000 001 ..... . ..... ..... ..... @stxp # inc STLXP |
28 | +DEF_HELPER_FLAGS_5(sme_ld1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | 29 | +LDXP 1 . 001000 011 ..... . ..... ..... ..... @stxp # inc LDAXP |
29 | +DEF_HELPER_FLAGS_5(sme_ld1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
30 | +DEF_HELPER_FLAGS_5(sme_ld1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
31 | + | 30 | + |
32 | +DEF_HELPER_FLAGS_5(sme_ld1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | 31 | +# CASP, CASPA, CASPAL, CASPL (we don't decode the bits that determine |
33 | +DEF_HELPER_FLAGS_5(sme_ld1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | 32 | +# acquire/release semantics because QEMU's cmpxchg always has those) |
34 | +DEF_HELPER_FLAGS_5(sme_ld1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | 33 | +CASP 0 . 001000 0 - 1 rs:5 - 11111 rn:5 rt:5 sz=%imm1_30_p2 |
35 | +DEF_HELPER_FLAGS_5(sme_ld1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | 34 | +# CAS, CASA, CASAL, CASL |
36 | +DEF_HELPER_FLAGS_5(sme_ld1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | 35 | +CAS sz:2 001000 1 - 1 rs:5 - 11111 rn:5 rt:5 |
37 | +DEF_HELPER_FLAGS_5(sme_ld1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | 36 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
38 | +DEF_HELPER_FLAGS_5(sme_ld1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
39 | +DEF_HELPER_FLAGS_5(sme_ld1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
40 | + | ||
41 | +DEF_HELPER_FLAGS_5(sme_ld1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
42 | +DEF_HELPER_FLAGS_5(sme_ld1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
43 | +DEF_HELPER_FLAGS_5(sme_ld1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
44 | +DEF_HELPER_FLAGS_5(sme_ld1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
45 | +DEF_HELPER_FLAGS_5(sme_ld1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
46 | +DEF_HELPER_FLAGS_5(sme_ld1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
47 | +DEF_HELPER_FLAGS_5(sme_ld1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
48 | +DEF_HELPER_FLAGS_5(sme_ld1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
49 | + | ||
50 | +DEF_HELPER_FLAGS_5(sme_ld1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
51 | +DEF_HELPER_FLAGS_5(sme_ld1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
52 | +DEF_HELPER_FLAGS_5(sme_ld1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
53 | +DEF_HELPER_FLAGS_5(sme_ld1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
54 | +DEF_HELPER_FLAGS_5(sme_ld1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
55 | +DEF_HELPER_FLAGS_5(sme_ld1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
56 | +DEF_HELPER_FLAGS_5(sme_ld1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
57 | +DEF_HELPER_FLAGS_5(sme_ld1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
58 | + | ||
59 | +DEF_HELPER_FLAGS_5(sme_ld1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
60 | +DEF_HELPER_FLAGS_5(sme_ld1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
61 | +DEF_HELPER_FLAGS_5(sme_ld1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
62 | +DEF_HELPER_FLAGS_5(sme_ld1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
63 | +DEF_HELPER_FLAGS_5(sme_ld1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
64 | +DEF_HELPER_FLAGS_5(sme_ld1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
65 | +DEF_HELPER_FLAGS_5(sme_ld1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
66 | +DEF_HELPER_FLAGS_5(sme_ld1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
67 | + | ||
68 | +DEF_HELPER_FLAGS_5(sme_st1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
69 | +DEF_HELPER_FLAGS_5(sme_st1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
70 | +DEF_HELPER_FLAGS_5(sme_st1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
71 | +DEF_HELPER_FLAGS_5(sme_st1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
72 | + | ||
73 | +DEF_HELPER_FLAGS_5(sme_st1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
74 | +DEF_HELPER_FLAGS_5(sme_st1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
75 | +DEF_HELPER_FLAGS_5(sme_st1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
76 | +DEF_HELPER_FLAGS_5(sme_st1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
77 | +DEF_HELPER_FLAGS_5(sme_st1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
78 | +DEF_HELPER_FLAGS_5(sme_st1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
79 | +DEF_HELPER_FLAGS_5(sme_st1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
80 | +DEF_HELPER_FLAGS_5(sme_st1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
81 | + | ||
82 | +DEF_HELPER_FLAGS_5(sme_st1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
83 | +DEF_HELPER_FLAGS_5(sme_st1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
84 | +DEF_HELPER_FLAGS_5(sme_st1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
85 | +DEF_HELPER_FLAGS_5(sme_st1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
86 | +DEF_HELPER_FLAGS_5(sme_st1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
87 | +DEF_HELPER_FLAGS_5(sme_st1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
88 | +DEF_HELPER_FLAGS_5(sme_st1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
89 | +DEF_HELPER_FLAGS_5(sme_st1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
90 | + | ||
91 | +DEF_HELPER_FLAGS_5(sme_st1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
92 | +DEF_HELPER_FLAGS_5(sme_st1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
93 | +DEF_HELPER_FLAGS_5(sme_st1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
94 | +DEF_HELPER_FLAGS_5(sme_st1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
95 | +DEF_HELPER_FLAGS_5(sme_st1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
96 | +DEF_HELPER_FLAGS_5(sme_st1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
97 | +DEF_HELPER_FLAGS_5(sme_st1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
98 | +DEF_HELPER_FLAGS_5(sme_st1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
99 | + | ||
100 | +DEF_HELPER_FLAGS_5(sme_st1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
101 | +DEF_HELPER_FLAGS_5(sme_st1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
102 | +DEF_HELPER_FLAGS_5(sme_st1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
103 | +DEF_HELPER_FLAGS_5(sme_st1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
104 | +DEF_HELPER_FLAGS_5(sme_st1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
105 | +DEF_HELPER_FLAGS_5(sme_st1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
106 | +DEF_HELPER_FLAGS_5(sme_st1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
107 | +DEF_HELPER_FLAGS_5(sme_st1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) | ||
108 | diff --git a/target/arm/sme.decode b/target/arm/sme.decode | ||
109 | index XXXXXXX..XXXXXXX 100644 | 37 | index XXXXXXX..XXXXXXX 100644 |
110 | --- a/target/arm/sme.decode | 38 | --- a/target/arm/tcg/translate-a64.c |
111 | +++ b/target/arm/sme.decode | 39 | +++ b/target/arm/tcg/translate-a64.c |
112 | @@ -XXX,XX +XXX,XX @@ MOVA 11000000 esz:2 00001 0 v:1 .. pg:3 0 za_imm:4 zr:5 \ | 40 | @@ -XXX,XX +XXX,XX @@ static bool trans_LDAR(DisasContext *s, arg_stlr *a) |
113 | &mova to_vec=1 rs=%mova_rs | 41 | return true; |
114 | MOVA 11000000 11 00001 1 v:1 .. pg:3 0 za_imm:4 zr:5 \ | ||
115 | &mova to_vec=1 rs=%mova_rs esz=4 | ||
116 | + | ||
117 | +### SME Memory | ||
118 | + | ||
119 | +&ldst esz rs pg rn rm za_imm v:bool st:bool | ||
120 | + | ||
121 | +LDST1 1110000 0 esz:2 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \ | ||
122 | + &ldst rs=%mova_rs | ||
123 | +LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \ | ||
124 | + &ldst esz=4 rs=%mova_rs | ||
125 | diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c | ||
126 | index XXXXXXX..XXXXXXX 100644 | ||
127 | --- a/target/arm/sme_helper.c | ||
128 | +++ b/target/arm/sme_helper.c | ||
129 | @@ -XXX,XX +XXX,XX @@ | ||
130 | |||
131 | #include "qemu/osdep.h" | ||
132 | #include "cpu.h" | ||
133 | +#include "internals.h" | ||
134 | #include "tcg/tcg-gvec-desc.h" | ||
135 | #include "exec/helper-proto.h" | ||
136 | +#include "exec/cpu_ldst.h" | ||
137 | +#include "exec/exec-all.h" | ||
138 | #include "qemu/int128.h" | ||
139 | #include "vec_internal.h" | ||
140 | +#include "sve_ldst_internal.h" | ||
141 | |||
142 | /* ResetSVEState */ | ||
143 | void arm_reset_sve_state(CPUARMState *env) | ||
144 | @@ -XXX,XX +XXX,XX @@ void HELPER(sme_mova_zc_q)(void *vd, void *za, void *vg, uint32_t desc) | ||
145 | } | 42 | } |
146 | 43 | ||
147 | #undef DO_MOVA_Z | 44 | -/* Load/store exclusive |
148 | + | 45 | - * |
149 | +/* | 46 | - * 31 30 29 24 23 22 21 20 16 15 14 10 9 5 4 0 |
150 | + * Clear elements in a tile slice comprising len bytes. | 47 | - * +-----+-------------+----+---+----+------+----+-------+------+------+ |
151 | + */ | 48 | - * | sz | 0 0 1 0 0 0 | o2 | L | o1 | Rs | o0 | Rt2 | Rn | Rt | |
152 | + | 49 | - * +-----+-------------+----+---+----+------+----+-------+------+------+ |
153 | +typedef void ClearFn(void *ptr, size_t off, size_t len); | 50 | - * |
154 | + | 51 | - * sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit |
155 | +static void clear_horizontal(void *ptr, size_t off, size_t len) | 52 | - * L: 0 -> store, 1 -> load |
156 | +{ | 53 | - * o2: 0 -> exclusive, 1 -> not |
157 | + memset(ptr + off, 0, len); | 54 | - * o1: 0 -> single register, 1 -> register pair |
55 | - * o0: 1 -> load-acquire/store-release, 0 -> not | ||
56 | - */ | ||
57 | -static void disas_ldst_excl(DisasContext *s, uint32_t insn) | ||
58 | +static bool trans_STXP(DisasContext *s, arg_stxr *a) | ||
59 | { | ||
60 | - int rt = extract32(insn, 0, 5); | ||
61 | - int rn = extract32(insn, 5, 5); | ||
62 | - int rt2 = extract32(insn, 10, 5); | ||
63 | - int rs = extract32(insn, 16, 5); | ||
64 | - int is_lasr = extract32(insn, 15, 1); | ||
65 | - int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr; | ||
66 | - int size = extract32(insn, 30, 2); | ||
67 | - | ||
68 | - switch (o2_L_o1_o0) { | ||
69 | - case 0x2: case 0x3: /* CASP / STXP */ | ||
70 | - if (size & 2) { /* STXP / STLXP */ | ||
71 | - if (rn == 31) { | ||
72 | - gen_check_sp_alignment(s); | ||
73 | - } | ||
74 | - if (is_lasr) { | ||
75 | - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); | ||
76 | - } | ||
77 | - gen_store_exclusive(s, rs, rt, rt2, rn, size, true); | ||
78 | - return; | ||
79 | - } | ||
80 | - if (rt2 == 31 | ||
81 | - && ((rt | rs) & 1) == 0 | ||
82 | - && dc_isar_feature(aa64_atomics, s)) { | ||
83 | - /* CASP / CASPL */ | ||
84 | - gen_compare_and_swap_pair(s, rs, rt, rn, size | 2); | ||
85 | - return; | ||
86 | - } | ||
87 | - break; | ||
88 | - | ||
89 | - case 0x6: case 0x7: /* CASPA / LDXP */ | ||
90 | - if (size & 2) { /* LDXP / LDAXP */ | ||
91 | - if (rn == 31) { | ||
92 | - gen_check_sp_alignment(s); | ||
93 | - } | ||
94 | - gen_load_exclusive(s, rt, rt2, rn, size, true); | ||
95 | - if (is_lasr) { | ||
96 | - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); | ||
97 | - } | ||
98 | - return; | ||
99 | - } | ||
100 | - if (rt2 == 31 | ||
101 | - && ((rt | rs) & 1) == 0 | ||
102 | - && dc_isar_feature(aa64_atomics, s)) { | ||
103 | - /* CASPA / CASPAL */ | ||
104 | - gen_compare_and_swap_pair(s, rs, rt, rn, size | 2); | ||
105 | - return; | ||
106 | - } | ||
107 | - break; | ||
108 | - | ||
109 | - case 0xa: /* CAS */ | ||
110 | - case 0xb: /* CASL */ | ||
111 | - case 0xe: /* CASA */ | ||
112 | - case 0xf: /* CASAL */ | ||
113 | - if (rt2 == 31 && dc_isar_feature(aa64_atomics, s)) { | ||
114 | - gen_compare_and_swap(s, rs, rt, rn, size); | ||
115 | - return; | ||
116 | - } | ||
117 | - break; | ||
118 | - default: | ||
119 | - /* Handled in decodetree */ | ||
120 | - break; | ||
121 | + if (a->rn == 31) { | ||
122 | + gen_check_sp_alignment(s); | ||
123 | } | ||
124 | - unallocated_encoding(s); | ||
125 | + if (a->lasr) { | ||
126 | + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); | ||
127 | + } | ||
128 | + gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true); | ||
129 | + return true; | ||
158 | +} | 130 | +} |
159 | + | 131 | + |
160 | +static void clear_vertical_b(void *vptr, size_t off, size_t len) | 132 | +static bool trans_LDXP(DisasContext *s, arg_stxr *a) |
161 | +{ | 133 | +{ |
162 | + for (size_t i = 0; i < len; ++i) { | 134 | + if (a->rn == 31) { |
163 | + *(uint8_t *)(vptr + tile_vslice_offset(i + off)) = 0; | 135 | + gen_check_sp_alignment(s); |
164 | + } | 136 | + } |
137 | + gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true); | ||
138 | + if (a->lasr) { | ||
139 | + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); | ||
140 | + } | ||
141 | + return true; | ||
165 | +} | 142 | +} |
166 | + | 143 | + |
167 | +static void clear_vertical_h(void *vptr, size_t off, size_t len) | 144 | +static bool trans_CASP(DisasContext *s, arg_CASP *a) |
168 | +{ | 145 | +{ |
169 | + for (size_t i = 0; i < len; i += 2) { | 146 | + if (!dc_isar_feature(aa64_atomics, s)) { |
170 | + *(uint16_t *)(vptr + tile_vslice_offset(i + off)) = 0; | 147 | + return false; |
171 | + } | 148 | + } |
149 | + if (((a->rt | a->rs) & 1) != 0) { | ||
150 | + return false; | ||
151 | + } | ||
152 | + | ||
153 | + gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz); | ||
154 | + return true; | ||
172 | +} | 155 | +} |
173 | + | 156 | + |
174 | +static void clear_vertical_s(void *vptr, size_t off, size_t len) | 157 | +static bool trans_CAS(DisasContext *s, arg_CAS *a) |
175 | +{ | 158 | +{ |
176 | + for (size_t i = 0; i < len; i += 4) { | 159 | + if (!dc_isar_feature(aa64_atomics, s)) { |
177 | + *(uint32_t *)(vptr + tile_vslice_offset(i + off)) = 0; | ||
178 | + } | ||
179 | +} | ||
180 | + | ||
181 | +static void clear_vertical_d(void *vptr, size_t off, size_t len) | ||
182 | +{ | ||
183 | + for (size_t i = 0; i < len; i += 8) { | ||
184 | + *(uint64_t *)(vptr + tile_vslice_offset(i + off)) = 0; | ||
185 | + } | ||
186 | +} | ||
187 | + | ||
188 | +static void clear_vertical_q(void *vptr, size_t off, size_t len) | ||
189 | +{ | ||
190 | + for (size_t i = 0; i < len; i += 16) { | ||
191 | + memset(vptr + tile_vslice_offset(i + off), 0, 16); | ||
192 | + } | ||
193 | +} | ||
194 | + | ||
195 | +/* | ||
196 | + * Copy elements from an array into a tile slice comprising len bytes. | ||
197 | + */ | ||
198 | + | ||
199 | +typedef void CopyFn(void *dst, const void *src, size_t len); | ||
200 | + | ||
201 | +static void copy_horizontal(void *dst, const void *src, size_t len) | ||
202 | +{ | ||
203 | + memcpy(dst, src, len); | ||
204 | +} | ||
205 | + | ||
206 | +static void copy_vertical_b(void *vdst, const void *vsrc, size_t len) | ||
207 | +{ | ||
208 | + const uint8_t *src = vsrc; | ||
209 | + uint8_t *dst = vdst; | ||
210 | + size_t i; | ||
211 | + | ||
212 | + for (i = 0; i < len; ++i) { | ||
213 | + dst[tile_vslice_index(i)] = src[i]; | ||
214 | + } | ||
215 | +} | ||
216 | + | ||
217 | +static void copy_vertical_h(void *vdst, const void *vsrc, size_t len) | ||
218 | +{ | ||
219 | + const uint16_t *src = vsrc; | ||
220 | + uint16_t *dst = vdst; | ||
221 | + size_t i; | ||
222 | + | ||
223 | + for (i = 0; i < len / 2; ++i) { | ||
224 | + dst[tile_vslice_index(i)] = src[i]; | ||
225 | + } | ||
226 | +} | ||
227 | + | ||
228 | +static void copy_vertical_s(void *vdst, const void *vsrc, size_t len) | ||
229 | +{ | ||
230 | + const uint32_t *src = vsrc; | ||
231 | + uint32_t *dst = vdst; | ||
232 | + size_t i; | ||
233 | + | ||
234 | + for (i = 0; i < len / 4; ++i) { | ||
235 | + dst[tile_vslice_index(i)] = src[i]; | ||
236 | + } | ||
237 | +} | ||
238 | + | ||
239 | +static void copy_vertical_d(void *vdst, const void *vsrc, size_t len) | ||
240 | +{ | ||
241 | + const uint64_t *src = vsrc; | ||
242 | + uint64_t *dst = vdst; | ||
243 | + size_t i; | ||
244 | + | ||
245 | + for (i = 0; i < len / 8; ++i) { | ||
246 | + dst[tile_vslice_index(i)] = src[i]; | ||
247 | + } | ||
248 | +} | ||
249 | + | ||
250 | +static void copy_vertical_q(void *vdst, const void *vsrc, size_t len) | ||
251 | +{ | ||
252 | + for (size_t i = 0; i < len; i += 16) { | ||
253 | + memcpy(vdst + tile_vslice_offset(i), vsrc + i, 16); | ||
254 | + } | ||
255 | +} | ||
256 | + | ||
257 | +/* | ||
258 | + * Host and TLB primitives for vertical tile slice addressing. | ||
259 | + */ | ||
260 | + | ||
261 | +#define DO_LD(NAME, TYPE, HOST, TLB) \ | ||
262 | +static inline void sme_##NAME##_v_host(void *za, intptr_t off, void *host) \ | ||
263 | +{ \ | ||
264 | + TYPE val = HOST(host); \ | ||
265 | + *(TYPE *)(za + tile_vslice_offset(off)) = val; \ | ||
266 | +} \ | ||
267 | +static inline void sme_##NAME##_v_tlb(CPUARMState *env, void *za, \ | ||
268 | + intptr_t off, target_ulong addr, uintptr_t ra) \ | ||
269 | +{ \ | ||
270 | + TYPE val = TLB(env, useronly_clean_ptr(addr), ra); \ | ||
271 | + *(TYPE *)(za + tile_vslice_offset(off)) = val; \ | ||
272 | +} | ||
273 | + | ||
274 | +#define DO_ST(NAME, TYPE, HOST, TLB) \ | ||
275 | +static inline void sme_##NAME##_v_host(void *za, intptr_t off, void *host) \ | ||
276 | +{ \ | ||
277 | + TYPE val = *(TYPE *)(za + tile_vslice_offset(off)); \ | ||
278 | + HOST(host, val); \ | ||
279 | +} \ | ||
280 | +static inline void sme_##NAME##_v_tlb(CPUARMState *env, void *za, \ | ||
281 | + intptr_t off, target_ulong addr, uintptr_t ra) \ | ||
282 | +{ \ | ||
283 | + TYPE val = *(TYPE *)(za + tile_vslice_offset(off)); \ | ||
284 | + TLB(env, useronly_clean_ptr(addr), val, ra); \ | ||
285 | +} | ||
286 | + | ||
287 | +/* | ||
288 | + * The ARMVectorReg elements are stored in host-endian 64-bit units. | ||
289 | + * For 128-bit quantities, the sequence defined by the Elem[] pseudocode | ||
290 | + * corresponds to storing the two 64-bit pieces in little-endian order. | ||
291 | + */ | ||
292 | +#define DO_LDQ(HNAME, VNAME, BE, HOST, TLB) \ | ||
293 | +static inline void HNAME##_host(void *za, intptr_t off, void *host) \ | ||
294 | +{ \ | ||
295 | + uint64_t val0 = HOST(host), val1 = HOST(host + 8); \ | ||
296 | + uint64_t *ptr = za + off; \ | ||
297 | + ptr[0] = BE ? val1 : val0, ptr[1] = BE ? val0 : val1; \ | ||
298 | +} \ | ||
299 | +static inline void VNAME##_v_host(void *za, intptr_t off, void *host) \ | ||
300 | +{ \ | ||
301 | + HNAME##_host(za, tile_vslice_offset(off), host); \ | ||
302 | +} \ | ||
303 | +static inline void HNAME##_tlb(CPUARMState *env, void *za, intptr_t off, \ | ||
304 | + target_ulong addr, uintptr_t ra) \ | ||
305 | +{ \ | ||
306 | + uint64_t val0 = TLB(env, useronly_clean_ptr(addr), ra); \ | ||
307 | + uint64_t val1 = TLB(env, useronly_clean_ptr(addr + 8), ra); \ | ||
308 | + uint64_t *ptr = za + off; \ | ||
309 | + ptr[0] = BE ? val1 : val0, ptr[1] = BE ? val0 : val1; \ | ||
310 | +} \ | ||
311 | +static inline void VNAME##_v_tlb(CPUARMState *env, void *za, intptr_t off, \ | ||
312 | + target_ulong addr, uintptr_t ra) \ | ||
313 | +{ \ | ||
314 | + HNAME##_tlb(env, za, tile_vslice_offset(off), addr, ra); \ | ||
315 | +} | ||
316 | + | ||
317 | +#define DO_STQ(HNAME, VNAME, BE, HOST, TLB) \ | ||
318 | +static inline void HNAME##_host(void *za, intptr_t off, void *host) \ | ||
319 | +{ \ | ||
320 | + uint64_t *ptr = za + off; \ | ||
321 | + HOST(host, ptr[BE]); \ | ||
322 | + HOST(host + 1, ptr[!BE]); \ | ||
323 | +} \ | ||
324 | +static inline void VNAME##_v_host(void *za, intptr_t off, void *host) \ | ||
325 | +{ \ | ||
326 | + HNAME##_host(za, tile_vslice_offset(off), host); \ | ||
327 | +} \ | ||
328 | +static inline void HNAME##_tlb(CPUARMState *env, void *za, intptr_t off, \ | ||
329 | + target_ulong addr, uintptr_t ra) \ | ||
330 | +{ \ | ||
331 | + uint64_t *ptr = za + off; \ | ||
332 | + TLB(env, useronly_clean_ptr(addr), ptr[BE], ra); \ | ||
333 | + TLB(env, useronly_clean_ptr(addr + 8), ptr[!BE], ra); \ | ||
334 | +} \ | ||
335 | +static inline void VNAME##_v_tlb(CPUARMState *env, void *za, intptr_t off, \ | ||
336 | + target_ulong addr, uintptr_t ra) \ | ||
337 | +{ \ | ||
338 | + HNAME##_tlb(env, za, tile_vslice_offset(off), addr, ra); \ | ||
339 | +} | ||
340 | + | ||
341 | +DO_LD(ld1b, uint8_t, ldub_p, cpu_ldub_data_ra) | ||
342 | +DO_LD(ld1h_be, uint16_t, lduw_be_p, cpu_lduw_be_data_ra) | ||
343 | +DO_LD(ld1h_le, uint16_t, lduw_le_p, cpu_lduw_le_data_ra) | ||
344 | +DO_LD(ld1s_be, uint32_t, ldl_be_p, cpu_ldl_be_data_ra) | ||
345 | +DO_LD(ld1s_le, uint32_t, ldl_le_p, cpu_ldl_le_data_ra) | ||
346 | +DO_LD(ld1d_be, uint64_t, ldq_be_p, cpu_ldq_be_data_ra) | ||
347 | +DO_LD(ld1d_le, uint64_t, ldq_le_p, cpu_ldq_le_data_ra) | ||
348 | + | ||
349 | +DO_LDQ(sve_ld1qq_be, sme_ld1q_be, 1, ldq_be_p, cpu_ldq_be_data_ra) | ||
350 | +DO_LDQ(sve_ld1qq_le, sme_ld1q_le, 0, ldq_le_p, cpu_ldq_le_data_ra) | ||
351 | + | ||
352 | +DO_ST(st1b, uint8_t, stb_p, cpu_stb_data_ra) | ||
353 | +DO_ST(st1h_be, uint16_t, stw_be_p, cpu_stw_be_data_ra) | ||
354 | +DO_ST(st1h_le, uint16_t, stw_le_p, cpu_stw_le_data_ra) | ||
355 | +DO_ST(st1s_be, uint32_t, stl_be_p, cpu_stl_be_data_ra) | ||
356 | +DO_ST(st1s_le, uint32_t, stl_le_p, cpu_stl_le_data_ra) | ||
357 | +DO_ST(st1d_be, uint64_t, stq_be_p, cpu_stq_be_data_ra) | ||
358 | +DO_ST(st1d_le, uint64_t, stq_le_p, cpu_stq_le_data_ra) | ||
359 | + | ||
360 | +DO_STQ(sve_st1qq_be, sme_st1q_be, 1, stq_be_p, cpu_stq_be_data_ra) | ||
361 | +DO_STQ(sve_st1qq_le, sme_st1q_le, 0, stq_le_p, cpu_stq_le_data_ra) | ||
362 | + | ||
363 | +#undef DO_LD | ||
364 | +#undef DO_ST | ||
365 | +#undef DO_LDQ | ||
366 | +#undef DO_STQ | ||
367 | + | ||
368 | +/* | ||
369 | + * Common helper for all contiguous predicated loads. | ||
370 | + */ | ||
371 | + | ||
372 | +static inline QEMU_ALWAYS_INLINE | ||
373 | +void sme_ld1(CPUARMState *env, void *za, uint64_t *vg, | ||
374 | + const target_ulong addr, uint32_t desc, const uintptr_t ra, | ||
375 | + const int esz, uint32_t mtedesc, bool vertical, | ||
376 | + sve_ldst1_host_fn *host_fn, | ||
377 | + sve_ldst1_tlb_fn *tlb_fn, | ||
378 | + ClearFn *clr_fn, | ||
379 | + CopyFn *cpy_fn) | ||
380 | +{ | ||
381 | + const intptr_t reg_max = simd_oprsz(desc); | ||
382 | + const intptr_t esize = 1 << esz; | ||
383 | + intptr_t reg_off, reg_last; | ||
384 | + SVEContLdSt info; | ||
385 | + void *host; | ||
386 | + int flags; | ||
387 | + | ||
388 | + /* Find the active elements. */ | ||
389 | + if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, esize)) { | ||
390 | + /* The entire predicate was false; no load occurs. */ | ||
391 | + clr_fn(za, 0, reg_max); | ||
392 | + return; | ||
393 | + } | ||
394 | + | ||
395 | + /* Probe the page(s). Exit with exception for any invalid page. */ | ||
396 | + sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_LOAD, ra); | ||
397 | + | ||
398 | + /* Handle watchpoints for all active elements. */ | ||
399 | + sve_cont_ldst_watchpoints(&info, env, vg, addr, esize, esize, | ||
400 | + BP_MEM_READ, ra); | ||
401 | + | ||
402 | + /* | ||
403 | + * Handle mte checks for all active elements. | ||
404 | + * Since TBI must be set for MTE, !mtedesc => !mte_active. | ||
405 | + */ | ||
406 | + if (mtedesc) { | ||
407 | + sve_cont_ldst_mte_check(&info, env, vg, addr, esize, esize, | ||
408 | + mtedesc, ra); | ||
409 | + } | ||
410 | + | ||
411 | + flags = info.page[0].flags | info.page[1].flags; | ||
412 | + if (unlikely(flags != 0)) { | ||
413 | +#ifdef CONFIG_USER_ONLY | ||
414 | + g_assert_not_reached(); | ||
415 | +#else | ||
416 | + /* | ||
417 | + * At least one page includes MMIO. | ||
418 | + * Any bus operation can fail with cpu_transaction_failed, | ||
419 | + * which for ARM will raise SyncExternal. Perform the load | ||
420 | + * into scratch memory to preserve register state until the end. | ||
421 | + */ | ||
422 | + ARMVectorReg scratch = { }; | ||
423 | + | ||
424 | + reg_off = info.reg_off_first[0]; | ||
425 | + reg_last = info.reg_off_last[1]; | ||
426 | + if (reg_last < 0) { | ||
427 | + reg_last = info.reg_off_split; | ||
428 | + if (reg_last < 0) { | ||
429 | + reg_last = info.reg_off_last[0]; | ||
430 | + } | ||
431 | + } | ||
432 | + | ||
433 | + do { | ||
434 | + uint64_t pg = vg[reg_off >> 6]; | ||
435 | + do { | ||
436 | + if ((pg >> (reg_off & 63)) & 1) { | ||
437 | + tlb_fn(env, &scratch, reg_off, addr + reg_off, ra); | ||
438 | + } | ||
439 | + reg_off += esize; | ||
440 | + } while (reg_off & 63); | ||
441 | + } while (reg_off <= reg_last); | ||
442 | + | ||
443 | + cpy_fn(za, &scratch, reg_max); | ||
444 | + return; | ||
445 | +#endif | ||
446 | + } | ||
447 | + | ||
448 | + /* The entire operation is in RAM, on valid pages. */ | ||
449 | + | ||
450 | + reg_off = info.reg_off_first[0]; | ||
451 | + reg_last = info.reg_off_last[0]; | ||
452 | + host = info.page[0].host; | ||
453 | + | ||
454 | + if (!vertical) { | ||
455 | + memset(za, 0, reg_max); | ||
456 | + } else if (reg_off) { | ||
457 | + clr_fn(za, 0, reg_off); | ||
458 | + } | ||
459 | + | ||
460 | + while (reg_off <= reg_last) { | ||
461 | + uint64_t pg = vg[reg_off >> 6]; | ||
462 | + do { | ||
463 | + if ((pg >> (reg_off & 63)) & 1) { | ||
464 | + host_fn(za, reg_off, host + reg_off); | ||
465 | + } else if (vertical) { | ||
466 | + clr_fn(za, reg_off, esize); | ||
467 | + } | ||
468 | + reg_off += esize; | ||
469 | + } while (reg_off <= reg_last && (reg_off & 63)); | ||
470 | + } | ||
471 | + | ||
472 | + /* | ||
473 | + * Use the slow path to manage the cross-page misalignment. | ||
474 | + * But we know this is RAM and cannot trap. | ||
475 | + */ | ||
476 | + reg_off = info.reg_off_split; | ||
477 | + if (unlikely(reg_off >= 0)) { | ||
478 | + tlb_fn(env, za, reg_off, addr + reg_off, ra); | ||
479 | + } | ||
480 | + | ||
481 | + reg_off = info.reg_off_first[1]; | ||
482 | + if (unlikely(reg_off >= 0)) { | ||
483 | + reg_last = info.reg_off_last[1]; | ||
484 | + host = info.page[1].host; | ||
485 | + | ||
486 | + do { | ||
487 | + uint64_t pg = vg[reg_off >> 6]; | ||
488 | + do { | ||
489 | + if ((pg >> (reg_off & 63)) & 1) { | ||
490 | + host_fn(za, reg_off, host + reg_off); | ||
491 | + } else if (vertical) { | ||
492 | + clr_fn(za, reg_off, esize); | ||
493 | + } | ||
494 | + reg_off += esize; | ||
495 | + } while (reg_off & 63); | ||
496 | + } while (reg_off <= reg_last); | ||
497 | + } | ||
498 | +} | ||
499 | + | ||
500 | +static inline QEMU_ALWAYS_INLINE | ||
501 | +void sme_ld1_mte(CPUARMState *env, void *za, uint64_t *vg, | ||
502 | + target_ulong addr, uint32_t desc, uintptr_t ra, | ||
503 | + const int esz, bool vertical, | ||
504 | + sve_ldst1_host_fn *host_fn, | ||
505 | + sve_ldst1_tlb_fn *tlb_fn, | ||
506 | + ClearFn *clr_fn, | ||
507 | + CopyFn *cpy_fn) | ||
508 | +{ | ||
509 | + uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); | ||
510 | + int bit55 = extract64(addr, 55, 1); | ||
511 | + | ||
512 | + /* Remove mtedesc from the normal sve descriptor. */ | ||
513 | + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); | ||
514 | + | ||
515 | + /* Perform gross MTE suppression early. */ | ||
516 | + if (!tbi_check(desc, bit55) || | ||
517 | + tcma_check(desc, bit55, allocation_tag_from_addr(addr))) { | ||
518 | + mtedesc = 0; | ||
519 | + } | ||
520 | + | ||
521 | + sme_ld1(env, za, vg, addr, desc, ra, esz, mtedesc, vertical, | ||
522 | + host_fn, tlb_fn, clr_fn, cpy_fn); | ||
523 | +} | ||
524 | + | ||
525 | +#define DO_LD(L, END, ESZ) \ | ||
526 | +void HELPER(sme_ld1##L##END##_h)(CPUARMState *env, void *za, void *vg, \ | ||
527 | + target_ulong addr, uint32_t desc) \ | ||
528 | +{ \ | ||
529 | + sme_ld1(env, za, vg, addr, desc, GETPC(), ESZ, 0, false, \ | ||
530 | + sve_ld1##L##L##END##_host, sve_ld1##L##L##END##_tlb, \ | ||
531 | + clear_horizontal, copy_horizontal); \ | ||
532 | +} \ | ||
533 | +void HELPER(sme_ld1##L##END##_v)(CPUARMState *env, void *za, void *vg, \ | ||
534 | + target_ulong addr, uint32_t desc) \ | ||
535 | +{ \ | ||
536 | + sme_ld1(env, za, vg, addr, desc, GETPC(), ESZ, 0, true, \ | ||
537 | + sme_ld1##L##END##_v_host, sme_ld1##L##END##_v_tlb, \ | ||
538 | + clear_vertical_##L, copy_vertical_##L); \ | ||
539 | +} \ | ||
540 | +void HELPER(sme_ld1##L##END##_h_mte)(CPUARMState *env, void *za, void *vg, \ | ||
541 | + target_ulong addr, uint32_t desc) \ | ||
542 | +{ \ | ||
543 | + sme_ld1_mte(env, za, vg, addr, desc, GETPC(), ESZ, false, \ | ||
544 | + sve_ld1##L##L##END##_host, sve_ld1##L##L##END##_tlb, \ | ||
545 | + clear_horizontal, copy_horizontal); \ | ||
546 | +} \ | ||
547 | +void HELPER(sme_ld1##L##END##_v_mte)(CPUARMState *env, void *za, void *vg, \ | ||
548 | + target_ulong addr, uint32_t desc) \ | ||
549 | +{ \ | ||
550 | + sme_ld1_mte(env, za, vg, addr, desc, GETPC(), ESZ, true, \ | ||
551 | + sme_ld1##L##END##_v_host, sme_ld1##L##END##_v_tlb, \ | ||
552 | + clear_vertical_##L, copy_vertical_##L); \ | ||
553 | +} | ||
554 | + | ||
555 | +DO_LD(b, , MO_8) | ||
556 | +DO_LD(h, _be, MO_16) | ||
557 | +DO_LD(h, _le, MO_16) | ||
558 | +DO_LD(s, _be, MO_32) | ||
559 | +DO_LD(s, _le, MO_32) | ||
560 | +DO_LD(d, _be, MO_64) | ||
561 | +DO_LD(d, _le, MO_64) | ||
562 | +DO_LD(q, _be, MO_128) | ||
563 | +DO_LD(q, _le, MO_128) | ||
564 | + | ||
565 | +#undef DO_LD | ||
566 | + | ||
567 | +/* | ||
568 | + * Common helper for all contiguous predicated stores. | ||
569 | + */ | ||
570 | + | ||
571 | +static inline QEMU_ALWAYS_INLINE | ||
572 | +void sme_st1(CPUARMState *env, void *za, uint64_t *vg, | ||
573 | + const target_ulong addr, uint32_t desc, const uintptr_t ra, | ||
574 | + const int esz, uint32_t mtedesc, bool vertical, | ||
575 | + sve_ldst1_host_fn *host_fn, | ||
576 | + sve_ldst1_tlb_fn *tlb_fn) | ||
577 | +{ | ||
578 | + const intptr_t reg_max = simd_oprsz(desc); | ||
579 | + const intptr_t esize = 1 << esz; | ||
580 | + intptr_t reg_off, reg_last; | ||
581 | + SVEContLdSt info; | ||
582 | + void *host; | ||
583 | + int flags; | ||
584 | + | ||
585 | + /* Find the active elements. */ | ||
586 | + if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, esize)) { | ||
587 | + /* The entire predicate was false; no store occurs. */ | ||
588 | + return; | ||
589 | + } | ||
590 | + | ||
591 | + /* Probe the page(s). Exit with exception for any invalid page. */ | ||
592 | + sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_STORE, ra); | ||
593 | + | ||
594 | + /* Handle watchpoints for all active elements. */ | ||
595 | + sve_cont_ldst_watchpoints(&info, env, vg, addr, esize, esize, | ||
596 | + BP_MEM_WRITE, ra); | ||
597 | + | ||
598 | + /* | ||
599 | + * Handle mte checks for all active elements. | ||
600 | + * Since TBI must be set for MTE, !mtedesc => !mte_active. | ||
601 | + */ | ||
602 | + if (mtedesc) { | ||
603 | + sve_cont_ldst_mte_check(&info, env, vg, addr, esize, esize, | ||
604 | + mtedesc, ra); | ||
605 | + } | ||
606 | + | ||
607 | + flags = info.page[0].flags | info.page[1].flags; | ||
608 | + if (unlikely(flags != 0)) { | ||
609 | +#ifdef CONFIG_USER_ONLY | ||
610 | + g_assert_not_reached(); | ||
611 | +#else | ||
612 | + /* | ||
613 | + * At least one page includes MMIO. | ||
614 | + * Any bus operation can fail with cpu_transaction_failed, | ||
615 | + * which for ARM will raise SyncExternal. We cannot avoid | ||
616 | + * this fault and will leave with the store incomplete. | ||
617 | + */ | ||
618 | + reg_off = info.reg_off_first[0]; | ||
619 | + reg_last = info.reg_off_last[1]; | ||
620 | + if (reg_last < 0) { | ||
621 | + reg_last = info.reg_off_split; | ||
622 | + if (reg_last < 0) { | ||
623 | + reg_last = info.reg_off_last[0]; | ||
624 | + } | ||
625 | + } | ||
626 | + | ||
627 | + do { | ||
628 | + uint64_t pg = vg[reg_off >> 6]; | ||
629 | + do { | ||
630 | + if ((pg >> (reg_off & 63)) & 1) { | ||
631 | + tlb_fn(env, za, reg_off, addr + reg_off, ra); | ||
632 | + } | ||
633 | + reg_off += esize; | ||
634 | + } while (reg_off & 63); | ||
635 | + } while (reg_off <= reg_last); | ||
636 | + return; | ||
637 | +#endif | ||
638 | + } | ||
639 | + | ||
640 | + reg_off = info.reg_off_first[0]; | ||
641 | + reg_last = info.reg_off_last[0]; | ||
642 | + host = info.page[0].host; | ||
643 | + | ||
644 | + while (reg_off <= reg_last) { | ||
645 | + uint64_t pg = vg[reg_off >> 6]; | ||
646 | + do { | ||
647 | + if ((pg >> (reg_off & 63)) & 1) { | ||
648 | + host_fn(za, reg_off, host + reg_off); | ||
649 | + } | ||
650 | + reg_off += 1 << esz; | ||
651 | + } while (reg_off <= reg_last && (reg_off & 63)); | ||
652 | + } | ||
653 | + | ||
654 | + /* | ||
655 | + * Use the slow path to manage the cross-page misalignment. | ||
656 | + * But we know this is RAM and cannot trap. | ||
657 | + */ | ||
658 | + reg_off = info.reg_off_split; | ||
659 | + if (unlikely(reg_off >= 0)) { | ||
660 | + tlb_fn(env, za, reg_off, addr + reg_off, ra); | ||
661 | + } | ||
662 | + | ||
663 | + reg_off = info.reg_off_first[1]; | ||
664 | + if (unlikely(reg_off >= 0)) { | ||
665 | + reg_last = info.reg_off_last[1]; | ||
666 | + host = info.page[1].host; | ||
667 | + | ||
668 | + do { | ||
669 | + uint64_t pg = vg[reg_off >> 6]; | ||
670 | + do { | ||
671 | + if ((pg >> (reg_off & 63)) & 1) { | ||
672 | + host_fn(za, reg_off, host + reg_off); | ||
673 | + } | ||
674 | + reg_off += 1 << esz; | ||
675 | + } while (reg_off & 63); | ||
676 | + } while (reg_off <= reg_last); | ||
677 | + } | ||
678 | +} | ||
679 | + | ||
680 | +static inline QEMU_ALWAYS_INLINE | ||
681 | +void sme_st1_mte(CPUARMState *env, void *za, uint64_t *vg, target_ulong addr, | ||
682 | + uint32_t desc, uintptr_t ra, int esz, bool vertical, | ||
683 | + sve_ldst1_host_fn *host_fn, | ||
684 | + sve_ldst1_tlb_fn *tlb_fn) | ||
685 | +{ | ||
686 | + uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); | ||
687 | + int bit55 = extract64(addr, 55, 1); | ||
688 | + | ||
689 | + /* Remove mtedesc from the normal sve descriptor. */ | ||
690 | + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); | ||
691 | + | ||
692 | + /* Perform gross MTE suppression early. */ | ||
693 | + if (!tbi_check(desc, bit55) || | ||
694 | + tcma_check(desc, bit55, allocation_tag_from_addr(addr))) { | ||
695 | + mtedesc = 0; | ||
696 | + } | ||
697 | + | ||
698 | + sme_st1(env, za, vg, addr, desc, ra, esz, mtedesc, | ||
699 | + vertical, host_fn, tlb_fn); | ||
700 | +} | ||
701 | + | ||
702 | +#define DO_ST(L, END, ESZ) \ | ||
703 | +void HELPER(sme_st1##L##END##_h)(CPUARMState *env, void *za, void *vg, \ | ||
704 | + target_ulong addr, uint32_t desc) \ | ||
705 | +{ \ | ||
706 | + sme_st1(env, za, vg, addr, desc, GETPC(), ESZ, 0, false, \ | ||
707 | + sve_st1##L##L##END##_host, sve_st1##L##L##END##_tlb); \ | ||
708 | +} \ | ||
709 | +void HELPER(sme_st1##L##END##_v)(CPUARMState *env, void *za, void *vg, \ | ||
710 | + target_ulong addr, uint32_t desc) \ | ||
711 | +{ \ | ||
712 | + sme_st1(env, za, vg, addr, desc, GETPC(), ESZ, 0, true, \ | ||
713 | + sme_st1##L##END##_v_host, sme_st1##L##END##_v_tlb); \ | ||
714 | +} \ | ||
715 | +void HELPER(sme_st1##L##END##_h_mte)(CPUARMState *env, void *za, void *vg, \ | ||
716 | + target_ulong addr, uint32_t desc) \ | ||
717 | +{ \ | ||
718 | + sme_st1_mte(env, za, vg, addr, desc, GETPC(), ESZ, false, \ | ||
719 | + sve_st1##L##L##END##_host, sve_st1##L##L##END##_tlb); \ | ||
720 | +} \ | ||
721 | +void HELPER(sme_st1##L##END##_v_mte)(CPUARMState *env, void *za, void *vg, \ | ||
722 | + target_ulong addr, uint32_t desc) \ | ||
723 | +{ \ | ||
724 | + sme_st1_mte(env, za, vg, addr, desc, GETPC(), ESZ, true, \ | ||
725 | + sme_st1##L##END##_v_host, sme_st1##L##END##_v_tlb); \ | ||
726 | +} | ||
727 | + | ||
728 | +DO_ST(b, , MO_8) | ||
729 | +DO_ST(h, _be, MO_16) | ||
730 | +DO_ST(h, _le, MO_16) | ||
731 | +DO_ST(s, _be, MO_32) | ||
732 | +DO_ST(s, _le, MO_32) | ||
733 | +DO_ST(d, _be, MO_64) | ||
734 | +DO_ST(d, _le, MO_64) | ||
735 | +DO_ST(q, _be, MO_128) | ||
736 | +DO_ST(q, _le, MO_128) | ||
737 | + | ||
738 | +#undef DO_ST | ||
739 | diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c | ||
740 | index XXXXXXX..XXXXXXX 100644 | ||
741 | --- a/target/arm/translate-sme.c | ||
742 | +++ b/target/arm/translate-sme.c | ||
743 | @@ -XXX,XX +XXX,XX @@ static bool trans_MOVA(DisasContext *s, arg_MOVA *a) | ||
744 | |||
745 | return true; | ||
746 | } | ||
747 | + | ||
748 | +static bool trans_LDST1(DisasContext *s, arg_LDST1 *a) | ||
749 | +{ | ||
750 | + typedef void GenLdSt1(TCGv_env, TCGv_ptr, TCGv_ptr, TCGv, TCGv_i32); | ||
751 | + | ||
752 | + /* | ||
753 | + * Indexed by [esz][be][v][mte][st], which is (except for load/store) | ||
754 | + * also the order in which the elements appear in the function names, | ||
755 | + * and so how we must concatenate the pieces. | ||
756 | + */ | ||
757 | + | ||
758 | +#define FN_LS(F) { gen_helper_sme_ld1##F, gen_helper_sme_st1##F } | ||
759 | +#define FN_MTE(F) { FN_LS(F), FN_LS(F##_mte) } | ||
760 | +#define FN_HV(F) { FN_MTE(F##_h), FN_MTE(F##_v) } | ||
761 | +#define FN_END(L, B) { FN_HV(L), FN_HV(B) } | ||
762 | + | ||
763 | + static GenLdSt1 * const fns[5][2][2][2][2] = { | ||
764 | + FN_END(b, b), | ||
765 | + FN_END(h_le, h_be), | ||
766 | + FN_END(s_le, s_be), | ||
767 | + FN_END(d_le, d_be), | ||
768 | + FN_END(q_le, q_be), | ||
769 | + }; | ||
770 | + | ||
771 | +#undef FN_LS | ||
772 | +#undef FN_MTE | ||
773 | +#undef FN_HV | ||
774 | +#undef FN_END | ||
775 | + | ||
776 | + TCGv_ptr t_za, t_pg; | ||
777 | + TCGv_i64 addr; | ||
778 | + int svl, desc = 0; | ||
779 | + bool be = s->be_data == MO_BE; | ||
780 | + bool mte = s->mte_active[0]; | ||
781 | + | ||
782 | + if (!dc_isar_feature(aa64_sme, s)) { | ||
783 | + return false; | 160 | + return false; |
784 | + } | 161 | + } |
785 | + if (!sme_smza_enabled_check(s)) { | 162 | + gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz); |
786 | + return true; | ||
787 | + } | ||
788 | + | ||
789 | + t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v); | ||
790 | + t_pg = pred_full_reg_ptr(s, a->pg); | ||
791 | + addr = tcg_temp_new_i64(); | ||
792 | + | ||
793 | + tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->esz); | ||
794 | + tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); | ||
795 | + | ||
796 | + if (mte) { | ||
797 | + desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); | ||
798 | + desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); | ||
799 | + desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); | ||
800 | + desc = FIELD_DP32(desc, MTEDESC, WRITE, a->st); | ||
801 | + desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << a->esz) - 1); | ||
802 | + desc <<= SVE_MTEDESC_SHIFT; | ||
803 | + } else { | ||
804 | + addr = clean_data_tbi(s, addr); | ||
805 | + } | ||
806 | + svl = streaming_vec_reg_size(s); | ||
807 | + desc = simd_desc(svl, svl, desc); | ||
808 | + | ||
809 | + fns[a->esz][be][a->v][mte][a->st](cpu_env, t_za, t_pg, addr, | ||
810 | + tcg_constant_i32(desc)); | ||
811 | + | ||
812 | + tcg_temp_free_ptr(t_za); | ||
813 | + tcg_temp_free_ptr(t_pg); | ||
814 | + tcg_temp_free_i64(addr); | ||
815 | + return true; | 163 | + return true; |
816 | +} | 164 | } |
165 | |||
166 | /* | ||
167 | @@ -XXX,XX +XXX,XX @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn) | ||
168 | static void disas_ldst(DisasContext *s, uint32_t insn) | ||
169 | { | ||
170 | switch (extract32(insn, 24, 6)) { | ||
171 | - case 0x08: /* Load/store exclusive */ | ||
172 | - disas_ldst_excl(s, insn); | ||
173 | - break; | ||
174 | case 0x18: case 0x1c: /* Load register (literal) */ | ||
175 | disas_ld_lit(s, insn); | ||
176 | break; | ||
817 | -- | 177 | -- |
818 | 2.25.1 | 178 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Convert the "Load register (literal)" instruction class to |
---|---|---|---|
2 | decodetree. | ||
2 | 3 | ||
3 | Mark ADR as a non-streaming instruction, which should trap | 4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
4 | if full a64 support is not enabled in streaming mode. | 5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
6 | Message-id: 20230602155223.2040685-11-peter.maydell@linaro.org | ||
7 | --- | ||
8 | target/arm/tcg/a64.decode | 13 ++++++ | ||
9 | target/arm/tcg/translate-a64.c | 76 ++++++++++------------------------ | ||
10 | 2 files changed, 35 insertions(+), 54 deletions(-) | ||
5 | 11 | ||
6 | Removing entries from sme-fa64.decode is an easy way to see | 12 | diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode |
7 | what remains to be done. | ||
8 | |||
9 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
11 | Message-id: 20220708151540.18136-5-richard.henderson@linaro.org | ||
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
13 | --- | ||
14 | target/arm/translate.h | 7 +++++++ | ||
15 | target/arm/sme-fa64.decode | 1 - | ||
16 | target/arm/translate-sve.c | 8 ++++---- | ||
17 | 3 files changed, 11 insertions(+), 5 deletions(-) | ||
18 | |||
19 | diff --git a/target/arm/translate.h b/target/arm/translate.h | ||
20 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/target/arm/translate.h | 14 | --- a/target/arm/tcg/a64.decode |
22 | +++ b/target/arm/translate.h | 15 | +++ b/target/arm/tcg/a64.decode |
23 | @@ -XXX,XX +XXX,XX @@ uint64_t asimd_imm_const(uint32_t imm, int cmode, int op); | 16 | @@ -XXX,XX +XXX,XX @@ LDXP 1 . 001000 011 ..... . ..... ..... ..... @stxp # inc LDAXP |
24 | static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \ | 17 | CASP 0 . 001000 0 - 1 rs:5 - 11111 rn:5 rt:5 sz=%imm1_30_p2 |
25 | { return dc_isar_feature(FEAT, s) && FUNC(s, __VA_ARGS__); } | 18 | # CAS, CASA, CASAL, CASL |
26 | 19 | CAS sz:2 001000 1 - 1 rs:5 - 11111 rn:5 rt:5 | |
27 | +#define TRANS_FEAT_NONSTREAMING(NAME, FEAT, FUNC, ...) \ | ||
28 | + static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \ | ||
29 | + { \ | ||
30 | + s->is_nonstreaming = true; \ | ||
31 | + return dc_isar_feature(FEAT, s) && FUNC(s, __VA_ARGS__); \ | ||
32 | + } | ||
33 | + | 20 | + |
34 | #endif /* TARGET_ARM_TRANSLATE_H */ | 21 | +&ldlit rt imm sz sign |
35 | diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode | 22 | +@ldlit .. ... . .. ................... rt:5 &ldlit imm=%imm19 |
23 | + | ||
24 | +LD_lit 00 011 0 00 ................... ..... @ldlit sz=2 sign=0 | ||
25 | +LD_lit 01 011 0 00 ................... ..... @ldlit sz=3 sign=0 | ||
26 | +LD_lit 10 011 0 00 ................... ..... @ldlit sz=2 sign=1 | ||
27 | +LD_lit_v 00 011 1 00 ................... ..... @ldlit sz=2 sign=0 | ||
28 | +LD_lit_v 01 011 1 00 ................... ..... @ldlit sz=3 sign=0 | ||
29 | +LD_lit_v 10 011 1 00 ................... ..... @ldlit sz=4 sign=0 | ||
30 | + | ||
31 | +# PRFM | ||
32 | +NOP 11 011 0 00 ------------------- ----- | ||
33 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
36 | index XXXXXXX..XXXXXXX 100644 | 34 | index XXXXXXX..XXXXXXX 100644 |
37 | --- a/target/arm/sme-fa64.decode | 35 | --- a/target/arm/tcg/translate-a64.c |
38 | +++ b/target/arm/sme-fa64.decode | 36 | +++ b/target/arm/tcg/translate-a64.c |
39 | @@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS | 37 | @@ -XXX,XX +XXX,XX @@ static bool trans_CAS(DisasContext *s, arg_CAS *a) |
40 | # --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset) | 38 | return true; |
41 | # --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm) | ||
42 | |||
43 | -FAIL 0000 0100 --1- ---- 1010 ---- ---- ---- # ADR | ||
44 | FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA | ||
45 | FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT | ||
46 | FAIL 0010 0101 --01 100- 1111 000- ---0 ---- # RDFFR, RDFFRS | ||
47 | diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c | ||
48 | index XXXXXXX..XXXXXXX 100644 | ||
49 | --- a/target/arm/translate-sve.c | ||
50 | +++ b/target/arm/translate-sve.c | ||
51 | @@ -XXX,XX +XXX,XX @@ static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn) | ||
52 | return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm); | ||
53 | } | 39 | } |
54 | 40 | ||
55 | -TRANS_FEAT(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32) | 41 | -/* |
56 | -TRANS_FEAT(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64) | 42 | - * Load register (literal) |
57 | -TRANS_FEAT(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32) | 43 | - * |
58 | -TRANS_FEAT(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32) | 44 | - * 31 30 29 27 26 25 24 23 5 4 0 |
59 | +TRANS_FEAT_NONSTREAMING(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32) | 45 | - * +-----+-------+---+-----+-------------------+-------+ |
60 | +TRANS_FEAT_NONSTREAMING(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64) | 46 | - * | opc | 0 1 1 | V | 0 0 | imm19 | Rt | |
61 | +TRANS_FEAT_NONSTREAMING(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32) | 47 | - * +-----+-------+---+-----+-------------------+-------+ |
62 | +TRANS_FEAT_NONSTREAMING(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32) | 48 | - * |
49 | - * V: 1 -> vector (simd/fp) | ||
50 | - * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit, | ||
51 | - * 10-> 32 bit signed, 11 -> prefetch | ||
52 | - * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated) | ||
53 | - */ | ||
54 | -static void disas_ld_lit(DisasContext *s, uint32_t insn) | ||
55 | +static bool trans_LD_lit(DisasContext *s, arg_ldlit *a) | ||
56 | { | ||
57 | - int rt = extract32(insn, 0, 5); | ||
58 | - int64_t imm = sextract32(insn, 5, 19) << 2; | ||
59 | - bool is_vector = extract32(insn, 26, 1); | ||
60 | - int opc = extract32(insn, 30, 2); | ||
61 | - bool is_signed = false; | ||
62 | - int size = 2; | ||
63 | - TCGv_i64 tcg_rt, clean_addr; | ||
64 | + bool iss_sf = ldst_iss_sf(a->sz, a->sign, false); | ||
65 | + TCGv_i64 tcg_rt = cpu_reg(s, a->rt); | ||
66 | + TCGv_i64 clean_addr = tcg_temp_new_i64(); | ||
67 | + MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); | ||
68 | + | ||
69 | + gen_pc_plus_diff(s, clean_addr, a->imm); | ||
70 | + do_gpr_ld(s, tcg_rt, clean_addr, memop, | ||
71 | + false, true, a->rt, iss_sf, false); | ||
72 | + return true; | ||
73 | +} | ||
74 | + | ||
75 | +static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a) | ||
76 | +{ | ||
77 | + /* Load register (literal), vector version */ | ||
78 | + TCGv_i64 clean_addr; | ||
79 | MemOp memop; | ||
80 | |||
81 | - if (is_vector) { | ||
82 | - if (opc == 3) { | ||
83 | - unallocated_encoding(s); | ||
84 | - return; | ||
85 | - } | ||
86 | - size = 2 + opc; | ||
87 | - if (!fp_access_check(s)) { | ||
88 | - return; | ||
89 | - } | ||
90 | - memop = finalize_memop_asimd(s, size); | ||
91 | - } else { | ||
92 | - if (opc == 3) { | ||
93 | - /* PRFM (literal) : prefetch */ | ||
94 | - return; | ||
95 | - } | ||
96 | - size = 2 + extract32(opc, 0, 1); | ||
97 | - is_signed = extract32(opc, 1, 1); | ||
98 | - memop = finalize_memop(s, size + is_signed * MO_SIGN); | ||
99 | + if (!fp_access_check(s)) { | ||
100 | + return true; | ||
101 | } | ||
102 | - | ||
103 | - tcg_rt = cpu_reg(s, rt); | ||
104 | - | ||
105 | + memop = finalize_memop_asimd(s, a->sz); | ||
106 | clean_addr = tcg_temp_new_i64(); | ||
107 | - gen_pc_plus_diff(s, clean_addr, imm); | ||
108 | - | ||
109 | - if (is_vector) { | ||
110 | - do_fp_ld(s, rt, clean_addr, memop); | ||
111 | - } else { | ||
112 | - /* Only unsigned 32bit loads target 32bit registers. */ | ||
113 | - bool iss_sf = opc != 0; | ||
114 | - do_gpr_ld(s, tcg_rt, clean_addr, memop, false, true, rt, iss_sf, false); | ||
115 | - } | ||
116 | + gen_pc_plus_diff(s, clean_addr, a->imm); | ||
117 | + do_fp_ld(s, a->rt, clean_addr, memop); | ||
118 | + return true; | ||
119 | } | ||
63 | 120 | ||
64 | /* | 121 | /* |
65 | *** SVE Integer Misc - Unpredicated Group | 122 | @@ -XXX,XX +XXX,XX @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn) |
123 | static void disas_ldst(DisasContext *s, uint32_t insn) | ||
124 | { | ||
125 | switch (extract32(insn, 24, 6)) { | ||
126 | - case 0x18: case 0x1c: /* Load register (literal) */ | ||
127 | - disas_ld_lit(s, insn); | ||
128 | - break; | ||
129 | case 0x28: case 0x29: | ||
130 | case 0x2c: case 0x2d: /* Load/store pair (all forms) */ | ||
131 | disas_ldst_pair(s, insn); | ||
66 | -- | 132 | -- |
67 | 2.25.1 | 133 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Convert the load/store register pair insns (LDP, STP, |
---|---|---|---|
2 | LDNP, STNP, LDPSW, STGP) to decodetree. | ||
2 | 3 | ||
3 | This new behaviour is in the ARM pseudocode function | 4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
4 | AArch64.CheckFPAdvSIMDEnabled, which applies to AArch32 | 5 | Message-id: 20230602155223.2040685-12-peter.maydell@linaro.org |
5 | via AArch32.CheckAdvSIMDOrFPEnabled when the EL to which | 6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
6 | the trap would be delivered is in AArch64 mode. | 7 | --- |
8 | target/arm/tcg/a64.decode | 61 +++++ | ||
9 | target/arm/tcg/translate-a64.c | 422 ++++++++++++++++----------------- | ||
10 | 2 files changed, 268 insertions(+), 215 deletions(-) | ||
7 | 11 | ||
8 | Given that ARMv9 drops support for AArch32 outside EL0, the trap EL | 12 | diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode |
9 | detection ought to be trivially true, but the pseudocode still contains | ||
10 | a number of conditions, and QEMU has not yet committed to dropping A32 | ||
11 | support for EL[12] when v9 features are present. | ||
12 | |||
13 | Since the computation of SME_TRAP_NONSTREAMING is necessarily different | ||
14 | for the two modes, we might as well preserve bits within TBFLAG_ANY and | ||
15 | allocate separate bits within TBFLAG_A32 and TBFLAG_A64 instead. | ||
16 | |||
17 | Note that DDI0616A.a has typos for bits [22:21] of LD1RO in the table | ||
18 | of instructions illegal in streaming mode. | ||
19 | |||
20 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
21 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
22 | Message-id: 20220708151540.18136-4-richard.henderson@linaro.org | ||
23 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
24 | --- | ||
25 | target/arm/cpu.h | 7 +++ | ||
26 | target/arm/translate.h | 4 ++ | ||
27 | target/arm/sme-fa64.decode | 90 ++++++++++++++++++++++++++++++++++++++ | ||
28 | target/arm/helper.c | 41 +++++++++++++++++ | ||
29 | target/arm/translate-a64.c | 40 ++++++++++++++++- | ||
30 | target/arm/translate-vfp.c | 12 +++++ | ||
31 | target/arm/translate.c | 2 + | ||
32 | target/arm/meson.build | 1 + | ||
33 | 8 files changed, 195 insertions(+), 2 deletions(-) | ||
34 | create mode 100644 target/arm/sme-fa64.decode | ||
35 | |||
36 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
37 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
38 | --- a/target/arm/cpu.h | 14 | --- a/target/arm/tcg/a64.decode |
39 | +++ b/target/arm/cpu.h | 15 | +++ b/target/arm/tcg/a64.decode |
40 | @@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A32, HSTR_ACTIVE, 9, 1) | 16 | @@ -XXX,XX +XXX,XX @@ LD_lit_v 10 011 1 00 ................... ..... @ldlit sz=4 sign=0 |
41 | * the same thing as the current security state of the processor! | 17 | |
42 | */ | 18 | # PRFM |
43 | FIELD(TBFLAG_A32, NS, 10, 1) | 19 | NOP 11 011 0 00 ------------------- ----- |
44 | +/* | 20 | + |
45 | + * Indicates that SME Streaming mode is active, and SMCR_ELx.FA64 is not. | 21 | +&ldstpair rt2 rt rn imm sz sign w p |
46 | + * This requires an SME trap from AArch32 mode when using NEON. | 22 | +@ldstpair .. ... . ... . imm:s7 rt2:5 rn:5 rt:5 &ldstpair |
47 | + */ | 23 | + |
48 | +FIELD(TBFLAG_A32, SME_TRAP_NONSTREAMING, 11, 1) | 24 | +# STNP, LDNP: Signed offset, non-temporal hint. We don't emulate caches |
49 | 25 | +# so we ignore hints about data access patterns, and handle these like | |
50 | /* | 26 | +# plain signed offset. |
51 | * Bit usage when in AArch32 state, for M-profile only. | 27 | +STP 00 101 0 000 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0 |
52 | @@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A64, SMEEXC_EL, 20, 2) | 28 | +LDP 00 101 0 000 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0 |
53 | FIELD(TBFLAG_A64, PSTATE_SM, 22, 1) | 29 | +STP 10 101 0 000 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 |
54 | FIELD(TBFLAG_A64, PSTATE_ZA, 23, 1) | 30 | +LDP 10 101 0 000 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 |
55 | FIELD(TBFLAG_A64, SVL, 24, 4) | 31 | +STP_v 00 101 1 000 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0 |
56 | +/* Indicates that SME Streaming mode is active, and SMCR_ELx.FA64 is not. */ | 32 | +LDP_v 00 101 1 000 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0 |
57 | +FIELD(TBFLAG_A64, SME_TRAP_NONSTREAMING, 28, 1) | 33 | +STP_v 01 101 1 000 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 |
58 | 34 | +LDP_v 01 101 1 000 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 | |
59 | /* | 35 | +STP_v 10 101 1 000 0 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=0 w=0 |
60 | * Helpers for using the above. | 36 | +LDP_v 10 101 1 000 1 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=0 w=0 |
61 | diff --git a/target/arm/translate.h b/target/arm/translate.h | 37 | + |
38 | +# STP and LDP: post-indexed | ||
39 | +STP 00 101 0 001 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=1 w=1 | ||
40 | +LDP 00 101 0 001 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=1 w=1 | ||
41 | +LDP 01 101 0 001 1 ....... ..... ..... ..... @ldstpair sz=2 sign=1 p=1 w=1 | ||
42 | +STP 10 101 0 001 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=1 w=1 | ||
43 | +LDP 10 101 0 001 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=1 w=1 | ||
44 | +STP_v 00 101 1 001 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=1 w=1 | ||
45 | +LDP_v 00 101 1 001 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=1 w=1 | ||
46 | +STP_v 01 101 1 001 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=1 w=1 | ||
47 | +LDP_v 01 101 1 001 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=1 w=1 | ||
48 | +STP_v 10 101 1 001 0 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=1 w=1 | ||
49 | +LDP_v 10 101 1 001 1 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=1 w=1 | ||
50 | + | ||
51 | +# STP and LDP: offset | ||
52 | +STP 00 101 0 010 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0 | ||
53 | +LDP 00 101 0 010 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0 | ||
54 | +LDP 01 101 0 010 1 ....... ..... ..... ..... @ldstpair sz=2 sign=1 p=0 w=0 | ||
55 | +STP 10 101 0 010 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 | ||
56 | +LDP 10 101 0 010 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 | ||
57 | +STP_v 00 101 1 010 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0 | ||
58 | +LDP_v 00 101 1 010 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0 | ||
59 | +STP_v 01 101 1 010 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 | ||
60 | +LDP_v 01 101 1 010 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 | ||
61 | +STP_v 10 101 1 010 0 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=0 w=0 | ||
62 | +LDP_v 10 101 1 010 1 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=0 w=0 | ||
63 | + | ||
64 | +# STP and LDP: pre-indexed | ||
65 | +STP 00 101 0 011 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=1 | ||
66 | +LDP 00 101 0 011 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=1 | ||
67 | +LDP 01 101 0 011 1 ....... ..... ..... ..... @ldstpair sz=2 sign=1 p=0 w=1 | ||
68 | +STP 10 101 0 011 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=1 | ||
69 | +LDP 10 101 0 011 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=1 | ||
70 | +STP_v 00 101 1 011 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=1 | ||
71 | +LDP_v 00 101 1 011 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=1 | ||
72 | +STP_v 01 101 1 011 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=1 | ||
73 | +LDP_v 01 101 1 011 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=1 | ||
74 | +STP_v 10 101 1 011 0 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=0 w=1 | ||
75 | +LDP_v 10 101 1 011 1 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=0 w=1 | ||
76 | + | ||
77 | +# STGP: store tag and pair | ||
78 | +STGP 01 101 0 001 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=1 w=1 | ||
79 | +STGP 01 101 0 010 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 | ||
80 | +STGP 01 101 0 011 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=1 | ||
81 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
62 | index XXXXXXX..XXXXXXX 100644 | 82 | index XXXXXXX..XXXXXXX 100644 |
63 | --- a/target/arm/translate.h | 83 | --- a/target/arm/tcg/translate-a64.c |
64 | +++ b/target/arm/translate.h | 84 | +++ b/target/arm/tcg/translate-a64.c |
65 | @@ -XXX,XX +XXX,XX @@ typedef struct DisasContext { | 85 | @@ -XXX,XX +XXX,XX @@ static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a) |
66 | bool pstate_sm; | 86 | return true; |
67 | /* True if PSTATE.ZA is set. */ | 87 | } |
68 | bool pstate_za; | 88 | |
69 | + /* True if non-streaming insns should raise an SME Streaming exception. */ | 89 | -/* |
70 | + bool sme_trap_nonstreaming; | 90 | - * LDNP (Load Pair - non-temporal hint) |
71 | + /* True if the current instruction is non-streaming. */ | 91 | - * LDP (Load Pair - non vector) |
72 | + bool is_nonstreaming; | 92 | - * LDPSW (Load Pair Signed Word - non vector) |
73 | /* True if MVE insns are definitely not predicated by VPR or LTPSIZE */ | 93 | - * STNP (Store Pair - non-temporal hint) |
74 | bool mve_no_pred; | 94 | - * STP (Store Pair - non vector) |
75 | /* | 95 | - * LDNP (Load Pair of SIMD&FP - non-temporal hint) |
76 | diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode | 96 | - * LDP (Load Pair of SIMD&FP) |
77 | new file mode 100644 | 97 | - * STNP (Store Pair of SIMD&FP - non-temporal hint) |
78 | index XXXXXXX..XXXXXXX | 98 | - * STP (Store Pair of SIMD&FP) |
79 | --- /dev/null | 99 | - * |
80 | +++ b/target/arm/sme-fa64.decode | 100 | - * 31 30 29 27 26 25 24 23 22 21 15 14 10 9 5 4 0 |
81 | @@ -XXX,XX +XXX,XX @@ | 101 | - * +-----+-------+---+---+-------+---+-----------------------------+ |
82 | +# AArch64 SME allowed instruction decoding | 102 | - * | opc | 1 0 1 | V | 0 | index | L | imm7 | Rt2 | Rn | Rt | |
83 | +# | 103 | - * +-----+-------+---+---+-------+---+-------+-------+------+------+ |
84 | +# Copyright (c) 2022 Linaro, Ltd | 104 | - * |
85 | +# | 105 | - * opc: LDP/STP/LDNP/STNP 00 -> 32 bit, 10 -> 64 bit |
86 | +# This library is free software; you can redistribute it and/or | 106 | - * LDPSW/STGP 01 |
87 | +# modify it under the terms of the GNU Lesser General Public | 107 | - * LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit |
88 | +# License as published by the Free Software Foundation; either | 108 | - * V: 0 -> GPR, 1 -> Vector |
89 | +# version 2.1 of the License, or (at your option) any later version. | 109 | - * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index, |
90 | +# | 110 | - * 10 -> signed offset, 11 -> pre-index |
91 | +# This library is distributed in the hope that it will be useful, | 111 | - * L: 0 -> Store 1 -> Load |
92 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of | 112 | - * |
93 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 113 | - * Rt, Rt2 = GPR or SIMD registers to be stored |
94 | +# Lesser General Public License for more details. | 114 | - * Rn = general purpose register containing address |
95 | +# | 115 | - * imm7 = signed offset (multiple of 4 or 8 depending on size) |
96 | +# You should have received a copy of the GNU Lesser General Public | 116 | - */ |
97 | +# License along with this library; if not, see <http://www.gnu.org/licenses/>. | 117 | -static void disas_ldst_pair(DisasContext *s, uint32_t insn) |
98 | + | 118 | +static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a, |
99 | +# | 119 | + TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, |
100 | +# This file is processed by scripts/decodetree.py | 120 | + uint64_t offset, bool is_store, MemOp mop) |
101 | +# | 121 | { |
102 | + | 122 | - int rt = extract32(insn, 0, 5); |
103 | +# These patterns are taken from Appendix E1.1 of DDI0616 A.a, | 123 | - int rn = extract32(insn, 5, 5); |
104 | +# Arm Architecture Reference Manual Supplement, | 124 | - int rt2 = extract32(insn, 10, 5); |
105 | +# The Scalable Matrix Extension (SME), for Armv9-A | 125 | - uint64_t offset = sextract64(insn, 15, 7); |
106 | + | 126 | - int index = extract32(insn, 23, 2); |
127 | - bool is_vector = extract32(insn, 26, 1); | ||
128 | - bool is_load = extract32(insn, 22, 1); | ||
129 | - int opc = extract32(insn, 30, 2); | ||
130 | - bool is_signed = false; | ||
131 | - bool postindex = false; | ||
132 | - bool wback = false; | ||
133 | - bool set_tag = false; | ||
134 | - TCGv_i64 clean_addr, dirty_addr; | ||
135 | - MemOp mop; | ||
136 | - int size; | ||
137 | - | ||
138 | - if (opc == 3) { | ||
139 | - unallocated_encoding(s); | ||
140 | - return; | ||
141 | - } | ||
142 | - | ||
143 | - if (is_vector) { | ||
144 | - size = 2 + opc; | ||
145 | - } else if (opc == 1 && !is_load) { | ||
146 | - /* STGP */ | ||
147 | - if (!dc_isar_feature(aa64_mte_insn_reg, s) || index == 0) { | ||
148 | - unallocated_encoding(s); | ||
149 | - return; | ||
150 | - } | ||
151 | - size = 3; | ||
152 | - set_tag = true; | ||
153 | - } else { | ||
154 | - size = 2 + extract32(opc, 1, 1); | ||
155 | - is_signed = extract32(opc, 0, 1); | ||
156 | - if (!is_load && is_signed) { | ||
157 | - unallocated_encoding(s); | ||
158 | - return; | ||
159 | - } | ||
160 | - } | ||
161 | - | ||
162 | - switch (index) { | ||
163 | - case 1: /* post-index */ | ||
164 | - postindex = true; | ||
165 | - wback = true; | ||
166 | - break; | ||
167 | - case 0: | ||
168 | - /* signed offset with "non-temporal" hint. Since we don't emulate | ||
169 | - * caches we don't care about hints to the cache system about | ||
170 | - * data access patterns, and handle this identically to plain | ||
171 | - * signed offset. | ||
172 | - */ | ||
173 | - if (is_signed) { | ||
174 | - /* There is no non-temporal-hint version of LDPSW */ | ||
175 | - unallocated_encoding(s); | ||
176 | - return; | ||
177 | - } | ||
178 | - postindex = false; | ||
179 | - break; | ||
180 | - case 2: /* signed offset, rn not updated */ | ||
181 | - postindex = false; | ||
182 | - break; | ||
183 | - case 3: /* pre-index */ | ||
184 | - postindex = false; | ||
185 | - wback = true; | ||
186 | - break; | ||
187 | - } | ||
188 | - | ||
189 | - if (is_vector && !fp_access_check(s)) { | ||
190 | - return; | ||
191 | - } | ||
192 | - | ||
193 | - offset <<= (set_tag ? LOG2_TAG_GRANULE : size); | ||
194 | - | ||
195 | - if (rn == 31) { | ||
196 | + if (a->rn == 31) { | ||
197 | gen_check_sp_alignment(s); | ||
198 | } | ||
199 | |||
200 | - dirty_addr = read_cpu_reg_sp(s, rn, 1); | ||
201 | - if (!postindex) { | ||
202 | + *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); | ||
203 | + if (!a->p) { | ||
204 | + tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); | ||
205 | + } | ||
206 | + | ||
207 | + *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store, | ||
208 | + (a->w || a->rn != 31), 2 << a->sz, mop); | ||
209 | +} | ||
210 | + | ||
211 | +static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a, | ||
212 | + TCGv_i64 dirty_addr, uint64_t offset) | ||
107 | +{ | 213 | +{ |
108 | + [ | 214 | + if (a->w) { |
109 | + OK 0-00 1110 0000 0001 0010 11-- ---- ---- # SMOV W|Xd,Vn.B[0] | 215 | + if (a->p) { |
110 | + OK 0-00 1110 0000 0010 0010 11-- ---- ---- # SMOV W|Xd,Vn.H[0] | 216 | + tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); |
111 | + OK 0100 1110 0000 0100 0010 11-- ---- ---- # SMOV Xd,Vn.S[0] | 217 | + } |
112 | + OK 0000 1110 0000 0001 0011 11-- ---- ---- # UMOV Wd,Vn.B[0] | 218 | + tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); |
113 | + OK 0000 1110 0000 0010 0011 11-- ---- ---- # UMOV Wd,Vn.H[0] | 219 | + } |
114 | + OK 0000 1110 0000 0100 0011 11-- ---- ---- # UMOV Wd,Vn.S[0] | ||
115 | + OK 0100 1110 0000 1000 0011 11-- ---- ---- # UMOV Xd,Vn.D[0] | ||
116 | + ] | ||
117 | + FAIL 0--0 111- ---- ---- ---- ---- ---- ---- # Advanced SIMD vector operations | ||
118 | +} | 220 | +} |
119 | + | 221 | + |
222 | +static bool trans_STP(DisasContext *s, arg_ldstpair *a) | ||
120 | +{ | 223 | +{ |
121 | + [ | 224 | + uint64_t offset = a->imm << a->sz; |
122 | + OK 0101 1110 --1- ---- 11-1 11-- ---- ---- # FMULX/FRECPS/FRSQRTS (scalar) | 225 | + TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; |
123 | + OK 0101 1110 -10- ---- 00-1 11-- ---- ---- # FMULX/FRECPS/FRSQRTS (scalar, FP16) | 226 | + MemOp mop = finalize_memop(s, a->sz); |
124 | + OK 01-1 1110 1-10 0001 11-1 10-- ---- ---- # FRECPE/FRSQRTE/FRECPX (scalar) | 227 | + |
125 | + OK 01-1 1110 1111 1001 11-1 10-- ---- ---- # FRECPE/FRSQRTE/FRECPX (scalar, FP16) | 228 | + op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); |
126 | + ] | 229 | + tcg_rt = cpu_reg(s, a->rt); |
127 | + FAIL 01-1 111- ---- ---- ---- ---- ---- ---- # Advanced SIMD single-element operations | 230 | + tcg_rt2 = cpu_reg(s, a->rt2); |
128 | +} | 231 | + /* |
129 | + | 232 | + * We built mop above for the single logical access -- rebuild it |
130 | +FAIL 0-00 110- ---- ---- ---- ---- ---- ---- # Advanced SIMD structure load/store | 233 | + * now for the paired operation. |
131 | +FAIL 1100 1110 ---- ---- ---- ---- ---- ---- # Advanced SIMD cryptography extensions | 234 | + * |
132 | +FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS | 235 | + * With LSE2, non-sign-extending pairs are treated atomically if |
133 | + | 236 | + * aligned, and if unaligned one of the pair will be completely |
134 | +# These are the "avoidance of doubt" final table of Illegal Advanced SIMD instructions | 237 | + * within a 16-byte block and that element will be atomic. |
135 | +# We don't actually need to include these, as the default is OK. | 238 | + * Otherwise each element is separately atomic. |
136 | +# -001 111- ---- ---- ---- ---- ---- ---- # Scalar floating-point operations | 239 | + * In all cases, issue one operation with the correct atomicity. |
137 | +# --10 110- ---- ---- ---- ---- ---- ---- # Load/store pair of FP registers | 240 | + */ |
138 | +# --01 1100 ---- ---- ---- ---- ---- ---- # Load FP register (PC-relative literal) | 241 | + mop = a->sz + 1; |
139 | +# --11 1100 --0- ---- ---- ---- ---- ---- # Load/store FP register (unscaled imm) | 242 | + if (s->align_mem) { |
140 | +# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset) | 243 | + mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); |
141 | +# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm) | 244 | + } |
142 | + | 245 | + mop = finalize_memop_pair(s, mop); |
143 | +FAIL 0000 0100 --1- ---- 1010 ---- ---- ---- # ADR | 246 | + if (a->sz == 2) { |
144 | +FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA | 247 | + TCGv_i64 tmp = tcg_temp_new_i64(); |
145 | +FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT | 248 | + |
146 | +FAIL 0010 0101 --01 100- 1111 000- ---0 ---- # RDFFR, RDFFRS | 249 | + if (s->be_data == MO_LE) { |
147 | +FAIL 0010 0101 --10 1--- 1001 ---- ---- ---- # WRFFR, SETFFR | 250 | + tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2); |
148 | +FAIL 0100 0101 --0- ---- 1011 ---- ---- ---- # BDEP, BEXT, BGRP | 251 | + } else { |
149 | +FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result) | 252 | + tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt); |
150 | +FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA | ||
151 | +FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL | ||
152 | +FAIL 0110 0101 --01 0--- 100- ---- ---- ---- # FTMAD | ||
153 | +FAIL 0110 0101 --01 1--- 001- ---- ---- ---- # FADDA | ||
154 | +FAIL 0100 0101 --0- ---- 1001 10-- ---- ---- # SMMLA, UMMLA, USMMLA | ||
155 | +FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions | ||
156 | +FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar) | ||
157 | +FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm) | ||
158 | +FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector) | ||
159 | +FAIL 1000 010- -01- ---- 1--- ---- ---- ---- # SVE 32-bit gather load (vector+imm) | ||
160 | +FAIL 1000 0100 0-0- ---- 0--- ---- ---- ---- # SVE 32-bit gather load byte (scalar+vector) | ||
161 | +FAIL 1000 0100 1--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load half (scalar+vector) | ||
162 | +FAIL 1000 0101 0--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load word (scalar+vector) | ||
163 | +FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar) | ||
164 | +FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm) | ||
165 | +FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar) | ||
166 | +FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm) | ||
167 | +FAIL 1100 010- ---- ---- ---- ---- ---- ---- # SVE 64-bit gather load/prefetch | ||
168 | +FAIL 1110 010- -00- ---- 001- ---- ---- ---- # SVE2 64-bit scatter NT store (vector+scalar) | ||
169 | +FAIL 1110 010- -10- ---- 001- ---- ---- ---- # SVE2 32-bit scatter NT store (vector+scalar) | ||
170 | +FAIL 1110 010- ---- ---- 1-0- ---- ---- ---- # SVE scatter store (scalar+32-bit vector) | ||
171 | +FAIL 1110 010- ---- ---- 101- ---- ---- ---- # SVE scatter store (misc) | ||
172 | diff --git a/target/arm/helper.c b/target/arm/helper.c | ||
173 | index XXXXXXX..XXXXXXX 100644 | ||
174 | --- a/target/arm/helper.c | ||
175 | +++ b/target/arm/helper.c | ||
176 | @@ -XXX,XX +XXX,XX @@ int sme_exception_el(CPUARMState *env, int el) | ||
177 | return 0; | ||
178 | } | ||
179 | |||
180 | +/* This corresponds to the ARM pseudocode function IsFullA64Enabled(). */ | ||
181 | +static bool sme_fa64(CPUARMState *env, int el) | ||
182 | +{ | ||
183 | + if (!cpu_isar_feature(aa64_sme_fa64, env_archcpu(env))) { | ||
184 | + return false; | ||
185 | + } | ||
186 | + | ||
187 | + if (el <= 1 && !el_is_in_host(env, el)) { | ||
188 | + if (!FIELD_EX64(env->vfp.smcr_el[1], SMCR, FA64)) { | ||
189 | + return false; | ||
190 | + } | 253 | + } |
191 | + } | 254 | + tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop); |
192 | + if (el <= 2 && arm_is_el2_enabled(env)) { | 255 | + } else { |
193 | + if (!FIELD_EX64(env->vfp.smcr_el[2], SMCR, FA64)) { | 256 | + TCGv_i128 tmp = tcg_temp_new_i128(); |
194 | + return false; | 257 | + |
258 | + if (s->be_data == MO_LE) { | ||
259 | + tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); | ||
260 | + } else { | ||
261 | + tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); | ||
195 | + } | 262 | + } |
196 | + } | 263 | + tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); |
197 | + if (arm_feature(env, ARM_FEATURE_EL3)) { | 264 | + } |
198 | + if (!FIELD_EX64(env->vfp.smcr_el[3], SMCR, FA64)) { | 265 | + op_addr_ldstpair_post(s, a, dirty_addr, offset); |
199 | + return false; | ||
200 | + } | ||
201 | + } | ||
202 | + | ||
203 | + return true; | 266 | + return true; |
204 | +} | 267 | +} |
205 | + | 268 | + |
206 | /* | 269 | +static bool trans_LDP(DisasContext *s, arg_ldstpair *a) |
207 | * Given that SVE is enabled, return the vector length for EL. | 270 | +{ |
208 | */ | 271 | + uint64_t offset = a->imm << a->sz; |
209 | @@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a32(CPUARMState *env, int fp_el, | 272 | + TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; |
210 | DP_TBFLAG_ANY(flags, PSTATE__IL, 1); | 273 | + MemOp mop = finalize_memop(s, a->sz); |
211 | } | 274 | + |
212 | 275 | + op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); | |
276 | + tcg_rt = cpu_reg(s, a->rt); | ||
277 | + tcg_rt2 = cpu_reg(s, a->rt2); | ||
278 | + | ||
213 | + /* | 279 | + /* |
214 | + * The SME exception we are testing for is raised via | 280 | + * We built mop above for the single logical access -- rebuild it |
215 | + * AArch64.CheckFPAdvSIMDEnabled(), as called from | 281 | + * now for the paired operation. |
216 | + * AArch32.CheckAdvSIMDOrFPEnabled(). | 282 | + * |
283 | + * With LSE2, non-sign-extending pairs are treated atomically if | ||
284 | + * aligned, and if unaligned one of the pair will be completely | ||
285 | + * within a 16-byte block and that element will be atomic. | ||
286 | + * Otherwise each element is separately atomic. | ||
287 | + * In all cases, issue one operation with the correct atomicity. | ||
288 | + * | ||
289 | + * This treats sign-extending loads like zero-extending loads, | ||
290 | + * since that reuses the most code below. | ||
217 | + */ | 291 | + */ |
218 | + if (el == 0 | 292 | + mop = a->sz + 1; |
219 | + && FIELD_EX64(env->svcr, SVCR, SM) | 293 | + if (s->align_mem) { |
220 | + && (!arm_is_el2_enabled(env) | 294 | + mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); |
221 | + || (arm_el_is_aa64(env, 2) && !(env->cp15.hcr_el2 & HCR_TGE))) | 295 | + } |
222 | + && arm_el_is_aa64(env, 1) | 296 | + mop = finalize_memop_pair(s, mop); |
223 | + && !sme_fa64(env, el)) { | 297 | + if (a->sz == 2) { |
224 | + DP_TBFLAG_A32(flags, SME_TRAP_NONSTREAMING, 1); | 298 | + int o2 = s->be_data == MO_LE ? 32 : 0; |
225 | + } | 299 | + int o1 = o2 ^ 32; |
226 | + | 300 | + |
227 | return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags); | 301 | + tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop); |
228 | } | 302 | + if (a->sign) { |
229 | 303 | + tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32); | |
230 | @@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, | 304 | + tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32); |
231 | } | 305 | + } else { |
232 | if (FIELD_EX64(env->svcr, SVCR, SM)) { | 306 | + tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32); |
233 | DP_TBFLAG_A64(flags, PSTATE_SM, 1); | 307 | + tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32); |
234 | + DP_TBFLAG_A64(flags, SME_TRAP_NONSTREAMING, !sme_fa64(env, el)); | 308 | + } |
235 | } | 309 | + } else { |
236 | DP_TBFLAG_A64(flags, PSTATE_ZA, FIELD_EX64(env->svcr, SVCR, ZA)); | 310 | + TCGv_i128 tmp = tcg_temp_new_i128(); |
237 | } | 311 | + |
238 | diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c | 312 | + tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop); |
239 | index XXXXXXX..XXXXXXX 100644 | 313 | + if (s->be_data == MO_LE) { |
240 | --- a/target/arm/translate-a64.c | 314 | + tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp); |
241 | +++ b/target/arm/translate-a64.c | 315 | + } else { |
242 | @@ -XXX,XX +XXX,XX @@ static void do_vec_ld(DisasContext *s, int destidx, int element, | 316 | + tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp); |
243 | * unallocated-encoding checks (otherwise the syndrome information | 317 | + } |
244 | * for the resulting exception will be incorrect). | 318 | + } |
245 | */ | 319 | + op_addr_ldstpair_post(s, a, dirty_addr, offset); |
246 | -static bool fp_access_check(DisasContext *s) | ||
247 | +static bool fp_access_check_only(DisasContext *s) | ||
248 | { | ||
249 | if (s->fp_excp_el) { | ||
250 | assert(!s->fp_access_checked); | ||
251 | @@ -XXX,XX +XXX,XX @@ static bool fp_access_check(DisasContext *s) | ||
252 | return true; | ||
253 | } | ||
254 | |||
255 | +static bool fp_access_check(DisasContext *s) | ||
256 | +{ | ||
257 | + if (!fp_access_check_only(s)) { | ||
258 | + return false; | ||
259 | + } | ||
260 | + if (s->sme_trap_nonstreaming && s->is_nonstreaming) { | ||
261 | + gen_exception_insn(s, s->pc_curr, EXCP_UDEF, | ||
262 | + syn_smetrap(SME_ET_Streaming, false)); | ||
263 | + return false; | ||
264 | + } | ||
265 | + return true; | 320 | + return true; |
266 | +} | 321 | +} |
267 | + | 322 | + |
268 | /* Check that SVE access is enabled. If it is, return true. | 323 | +static bool trans_STP_v(DisasContext *s, arg_ldstpair *a) |
269 | * If not, emit code to generate an appropriate exception and return false. | ||
270 | */ | ||
271 | @@ -XXX,XX +XXX,XX @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, | ||
272 | default: | ||
273 | g_assert_not_reached(); | ||
274 | } | ||
275 | - if ((ri->type & ARM_CP_FPU) && !fp_access_check(s)) { | ||
276 | + if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) { | ||
277 | return; | ||
278 | } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) { | ||
279 | return; | ||
280 | @@ -XXX,XX +XXX,XX @@ static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn) | ||
281 | } | ||
282 | } | ||
283 | |||
284 | +/* | ||
285 | + * Include the generated SME FA64 decoder. | ||
286 | + */ | ||
287 | + | ||
288 | +#include "decode-sme-fa64.c.inc" | ||
289 | + | ||
290 | +static bool trans_OK(DisasContext *s, arg_OK *a) | ||
291 | +{ | 324 | +{ |
325 | + uint64_t offset = a->imm << a->sz; | ||
326 | + TCGv_i64 clean_addr, dirty_addr; | ||
327 | + MemOp mop; | ||
328 | + | ||
329 | + if (!fp_access_check(s)) { | ||
330 | + return true; | ||
331 | + } | ||
332 | + | ||
333 | + /* LSE2 does not merge FP pairs; leave these as separate operations. */ | ||
334 | + mop = finalize_memop_asimd(s, a->sz); | ||
335 | + op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); | ||
336 | + do_fp_st(s, a->rt, clean_addr, mop); | ||
337 | + tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); | ||
338 | + do_fp_st(s, a->rt2, clean_addr, mop); | ||
339 | + op_addr_ldstpair_post(s, a, dirty_addr, offset); | ||
292 | + return true; | 340 | + return true; |
293 | +} | 341 | +} |
294 | + | 342 | + |
295 | +static bool trans_FAIL(DisasContext *s, arg_OK *a) | 343 | +static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a) |
296 | +{ | 344 | +{ |
297 | + s->is_nonstreaming = true; | 345 | + uint64_t offset = a->imm << a->sz; |
346 | + TCGv_i64 clean_addr, dirty_addr; | ||
347 | + MemOp mop; | ||
348 | + | ||
349 | + if (!fp_access_check(s)) { | ||
350 | + return true; | ||
351 | + } | ||
352 | + | ||
353 | + /* LSE2 does not merge FP pairs; leave these as separate operations. */ | ||
354 | + mop = finalize_memop_asimd(s, a->sz); | ||
355 | + op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); | ||
356 | + do_fp_ld(s, a->rt, clean_addr, mop); | ||
357 | + tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); | ||
358 | + do_fp_ld(s, a->rt2, clean_addr, mop); | ||
359 | + op_addr_ldstpair_post(s, a, dirty_addr, offset); | ||
298 | + return true; | 360 | + return true; |
299 | +} | 361 | +} |
300 | + | 362 | + |
301 | /** | 363 | +static bool trans_STGP(DisasContext *s, arg_ldstpair *a) |
302 | * is_guarded_page: | 364 | +{ |
303 | * @env: The cpu environment | 365 | + TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; |
304 | @@ -XXX,XX +XXX,XX @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, | 366 | + uint64_t offset = a->imm << LOG2_TAG_GRANULE; |
305 | dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE); | 367 | + MemOp mop; |
306 | dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM); | 368 | + TCGv_i128 tmp; |
307 | dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA); | 369 | + |
308 | + dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING); | 370 | + if (!dc_isar_feature(aa64_mte_insn_reg, s)) { |
309 | dc->vec_len = 0; | 371 | + return false; |
310 | dc->vec_stride = 0; | 372 | + } |
311 | dc->cp_regs = arm_cpu->cp_regs; | 373 | + |
312 | @@ -XXX,XX +XXX,XX @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) | 374 | + if (a->rn == 31) { |
313 | } | 375 | + gen_check_sp_alignment(s); |
376 | + } | ||
377 | + | ||
378 | + dirty_addr = read_cpu_reg_sp(s, a->rn, 1); | ||
379 | + if (!a->p) { | ||
380 | tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); | ||
314 | } | 381 | } |
315 | 382 | ||
316 | + s->is_nonstreaming = false; | 383 | - if (set_tag) { |
317 | + if (s->sme_trap_nonstreaming) { | 384 | - if (!s->ata) { |
318 | + disas_sme_fa64(s, insn); | 385 | - /* |
319 | + } | 386 | - * TODO: We could rely on the stores below, at least for |
320 | + | 387 | - * system mode, if we arrange to add MO_ALIGN_16. |
321 | switch (extract32(insn, 25, 4)) { | 388 | - */ |
322 | case 0x0: | 389 | - gen_helper_stg_stub(cpu_env, dirty_addr); |
323 | if (!extract32(insn, 31, 1) || !disas_sme(s, insn)) { | 390 | - } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { |
324 | diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c | 391 | - gen_helper_stg_parallel(cpu_env, dirty_addr, dirty_addr); |
325 | index XXXXXXX..XXXXXXX 100644 | 392 | - } else { |
326 | --- a/target/arm/translate-vfp.c | 393 | - gen_helper_stg(cpu_env, dirty_addr, dirty_addr); |
327 | +++ b/target/arm/translate-vfp.c | 394 | - } |
328 | @@ -XXX,XX +XXX,XX @@ static bool vfp_access_check_a(DisasContext *s, bool ignore_vfp_enabled) | 395 | - } |
329 | return false; | 396 | - |
397 | - if (is_vector) { | ||
398 | - mop = finalize_memop_asimd(s, size); | ||
399 | - } else { | ||
400 | - mop = finalize_memop(s, size); | ||
401 | - } | ||
402 | - clean_addr = gen_mte_checkN(s, dirty_addr, !is_load, | ||
403 | - (wback || rn != 31) && !set_tag, | ||
404 | - 2 << size, mop); | ||
405 | - | ||
406 | - if (is_vector) { | ||
407 | - /* LSE2 does not merge FP pairs; leave these as separate operations. */ | ||
408 | - if (is_load) { | ||
409 | - do_fp_ld(s, rt, clean_addr, mop); | ||
410 | - } else { | ||
411 | - do_fp_st(s, rt, clean_addr, mop); | ||
412 | - } | ||
413 | - tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size); | ||
414 | - if (is_load) { | ||
415 | - do_fp_ld(s, rt2, clean_addr, mop); | ||
416 | - } else { | ||
417 | - do_fp_st(s, rt2, clean_addr, mop); | ||
418 | - } | ||
419 | - } else { | ||
420 | - TCGv_i64 tcg_rt = cpu_reg(s, rt); | ||
421 | - TCGv_i64 tcg_rt2 = cpu_reg(s, rt2); | ||
422 | - | ||
423 | + if (!s->ata) { | ||
424 | /* | ||
425 | - * We built mop above for the single logical access -- rebuild it | ||
426 | - * now for the paired operation. | ||
427 | - * | ||
428 | - * With LSE2, non-sign-extending pairs are treated atomically if | ||
429 | - * aligned, and if unaligned one of the pair will be completely | ||
430 | - * within a 16-byte block and that element will be atomic. | ||
431 | - * Otherwise each element is separately atomic. | ||
432 | - * In all cases, issue one operation with the correct atomicity. | ||
433 | - * | ||
434 | - * This treats sign-extending loads like zero-extending loads, | ||
435 | - * since that reuses the most code below. | ||
436 | + * TODO: We could rely on the stores below, at least for | ||
437 | + * system mode, if we arrange to add MO_ALIGN_16. | ||
438 | */ | ||
439 | - mop = size + 1; | ||
440 | - if (s->align_mem) { | ||
441 | - mop |= (size == 2 ? MO_ALIGN_4 : MO_ALIGN_8); | ||
442 | - } | ||
443 | - mop = finalize_memop_pair(s, mop); | ||
444 | - | ||
445 | - if (is_load) { | ||
446 | - if (size == 2) { | ||
447 | - int o2 = s->be_data == MO_LE ? 32 : 0; | ||
448 | - int o1 = o2 ^ 32; | ||
449 | - | ||
450 | - tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop); | ||
451 | - if (is_signed) { | ||
452 | - tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32); | ||
453 | - tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32); | ||
454 | - } else { | ||
455 | - tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32); | ||
456 | - tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32); | ||
457 | - } | ||
458 | - } else { | ||
459 | - TCGv_i128 tmp = tcg_temp_new_i128(); | ||
460 | - | ||
461 | - tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop); | ||
462 | - if (s->be_data == MO_LE) { | ||
463 | - tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp); | ||
464 | - } else { | ||
465 | - tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp); | ||
466 | - } | ||
467 | - } | ||
468 | - } else { | ||
469 | - if (size == 2) { | ||
470 | - TCGv_i64 tmp = tcg_temp_new_i64(); | ||
471 | - | ||
472 | - if (s->be_data == MO_LE) { | ||
473 | - tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2); | ||
474 | - } else { | ||
475 | - tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt); | ||
476 | - } | ||
477 | - tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop); | ||
478 | - } else { | ||
479 | - TCGv_i128 tmp = tcg_temp_new_i128(); | ||
480 | - | ||
481 | - if (s->be_data == MO_LE) { | ||
482 | - tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); | ||
483 | - } else { | ||
484 | - tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); | ||
485 | - } | ||
486 | - tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); | ||
487 | - } | ||
488 | - } | ||
489 | + gen_helper_stg_stub(cpu_env, dirty_addr); | ||
490 | + } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { | ||
491 | + gen_helper_stg_parallel(cpu_env, dirty_addr, dirty_addr); | ||
492 | + } else { | ||
493 | + gen_helper_stg(cpu_env, dirty_addr, dirty_addr); | ||
330 | } | 494 | } |
331 | 495 | ||
332 | + /* | 496 | - if (wback) { |
333 | + * Note that rebuild_hflags_a32 has already accounted for being in EL0 | 497 | - if (postindex) { |
334 | + * and the higher EL in A64 mode, etc. Unlike A64 mode, there do not | 498 | - tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); |
335 | + * appear to be any insns which touch VFP which are allowed. | 499 | - } |
336 | + */ | 500 | - tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr); |
337 | + if (s->sme_trap_nonstreaming) { | 501 | + mop = finalize_memop(s, a->sz); |
338 | + gen_exception_insn(s, s->pc_curr, EXCP_UDEF, | 502 | + clean_addr = gen_mte_checkN(s, dirty_addr, true, false, 2 << a->sz, mop); |
339 | + syn_smetrap(SME_ET_Streaming, | 503 | + |
340 | + s->base.pc_next - s->pc_curr == 2)); | 504 | + tcg_rt = cpu_reg(s, a->rt); |
341 | + return false; | 505 | + tcg_rt2 = cpu_reg(s, a->rt2); |
342 | + } | 506 | + |
343 | + | 507 | + assert(a->sz == 3); |
344 | if (!s->vfp_enabled && !ignore_vfp_enabled) { | 508 | + |
345 | assert(!arm_dc_feature(s, ARM_FEATURE_M)); | 509 | + tmp = tcg_temp_new_i128(); |
346 | unallocated_encoding(s); | 510 | + if (s->be_data == MO_LE) { |
347 | diff --git a/target/arm/translate.c b/target/arm/translate.c | 511 | + tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); |
348 | index XXXXXXX..XXXXXXX 100644 | 512 | + } else { |
349 | --- a/target/arm/translate.c | 513 | + tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); |
350 | +++ b/target/arm/translate.c | ||
351 | @@ -XXX,XX +XXX,XX @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) | ||
352 | dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN); | ||
353 | dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE); | ||
354 | } | ||
355 | + dc->sme_trap_nonstreaming = | ||
356 | + EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING); | ||
357 | } | 514 | } |
358 | dc->cp_regs = cpu->cp_regs; | 515 | + tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); |
359 | dc->features = env->features; | 516 | + |
360 | diff --git a/target/arm/meson.build b/target/arm/meson.build | 517 | + op_addr_ldstpair_post(s, a, dirty_addr, offset); |
361 | index XXXXXXX..XXXXXXX 100644 | 518 | + return true; |
362 | --- a/target/arm/meson.build | 519 | } |
363 | +++ b/target/arm/meson.build | 520 | |
364 | @@ -XXX,XX +XXX,XX @@ | 521 | /* |
365 | gen = [ | 522 | @@ -XXX,XX +XXX,XX @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn) |
366 | decodetree.process('sve.decode', extra_args: '--decode=disas_sve'), | 523 | static void disas_ldst(DisasContext *s, uint32_t insn) |
367 | decodetree.process('sme.decode', extra_args: '--decode=disas_sme'), | 524 | { |
368 | + decodetree.process('sme-fa64.decode', extra_args: '--static-decode=disas_sme_fa64'), | 525 | switch (extract32(insn, 24, 6)) { |
369 | decodetree.process('neon-shared.decode', extra_args: '--decode=disas_neon_shared'), | 526 | - case 0x28: case 0x29: |
370 | decodetree.process('neon-dp.decode', extra_args: '--decode=disas_neon_dp'), | 527 | - case 0x2c: case 0x2d: /* Load/store pair (all forms) */ |
371 | decodetree.process('neon-ls.decode', extra_args: '--decode=disas_neon_ls'), | 528 | - disas_ldst_pair(s, insn); |
529 | - break; | ||
530 | case 0x38: case 0x39: | ||
531 | case 0x3c: case 0x3d: /* Load/store register (all forms) */ | ||
532 | disas_ldst_reg(s, insn); | ||
372 | -- | 533 | -- |
373 | 2.25.1 | 534 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Convert the load and store instructions which use a 9-bit |
---|---|---|---|
2 | immediate offset to decodetree. | ||
2 | 3 | ||
3 | The pseudocode for CheckSVEEnabled gains a check for Streaming | 4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
4 | SVE mode, and for SME present but SVE absent. | 5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
6 | Message-id: 20230602155223.2040685-13-peter.maydell@linaro.org | ||
7 | --- | ||
8 | target/arm/tcg/a64.decode | 69 +++++++++++ | ||
9 | target/arm/tcg/translate-a64.c | 206 ++++++++++++++------------------- | ||
10 | 2 files changed, 153 insertions(+), 122 deletions(-) | ||
5 | 11 | ||
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 12 | diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode |
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-id: 20220708151540.18136-17-richard.henderson@linaro.org | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | --- | ||
11 | target/arm/translate-a64.c | 22 ++++++++++++++++------ | ||
12 | 1 file changed, 16 insertions(+), 6 deletions(-) | ||
13 | |||
14 | diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/target/arm/translate-a64.c | 14 | --- a/target/arm/tcg/a64.decode |
17 | +++ b/target/arm/translate-a64.c | 15 | +++ b/target/arm/tcg/a64.decode |
18 | @@ -XXX,XX +XXX,XX @@ static bool fp_access_check(DisasContext *s) | 16 | @@ -XXX,XX +XXX,XX @@ LDP_v 10 101 1 011 1 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p |
17 | STGP 01 101 0 001 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=1 w=1 | ||
18 | STGP 01 101 0 010 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 | ||
19 | STGP 01 101 0 011 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=1 | ||
20 | + | ||
21 | +# Load/store register (unscaled immediate) | ||
22 | +&ldst_imm rt rn imm sz sign w p unpriv ext | ||
23 | +@ldst_imm .. ... . .. .. . imm:s9 .. rn:5 rt:5 &ldst_imm unpriv=0 p=0 w=0 | ||
24 | +@ldst_imm_pre .. ... . .. .. . imm:s9 .. rn:5 rt:5 &ldst_imm unpriv=0 p=0 w=1 | ||
25 | +@ldst_imm_post .. ... . .. .. . imm:s9 .. rn:5 rt:5 &ldst_imm unpriv=0 p=1 w=1 | ||
26 | +@ldst_imm_user .. ... . .. .. . imm:s9 .. rn:5 rt:5 &ldst_imm unpriv=1 p=0 w=0 | ||
27 | + | ||
28 | +STR_i sz:2 111 0 00 00 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=0 | ||
29 | +LDR_i 00 111 0 00 01 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=1 sz=0 | ||
30 | +LDR_i 01 111 0 00 01 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=1 sz=1 | ||
31 | +LDR_i 10 111 0 00 01 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=1 sz=2 | ||
32 | +LDR_i 11 111 0 00 01 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=0 sz=3 | ||
33 | +LDR_i 00 111 0 00 10 0 ......... 00 ..... ..... @ldst_imm sign=1 ext=0 sz=0 | ||
34 | +LDR_i 01 111 0 00 10 0 ......... 00 ..... ..... @ldst_imm sign=1 ext=0 sz=1 | ||
35 | +LDR_i 10 111 0 00 10 0 ......... 00 ..... ..... @ldst_imm sign=1 ext=0 sz=2 | ||
36 | +LDR_i 00 111 0 00 11 0 ......... 00 ..... ..... @ldst_imm sign=1 ext=1 sz=0 | ||
37 | +LDR_i 01 111 0 00 11 0 ......... 00 ..... ..... @ldst_imm sign=1 ext=1 sz=1 | ||
38 | + | ||
39 | +STR_i sz:2 111 0 00 00 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=0 | ||
40 | +LDR_i 00 111 0 00 01 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=1 sz=0 | ||
41 | +LDR_i 01 111 0 00 01 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=1 sz=1 | ||
42 | +LDR_i 10 111 0 00 01 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=1 sz=2 | ||
43 | +LDR_i 11 111 0 00 01 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=0 sz=3 | ||
44 | +LDR_i 00 111 0 00 10 0 ......... 01 ..... ..... @ldst_imm_post sign=1 ext=0 sz=0 | ||
45 | +LDR_i 01 111 0 00 10 0 ......... 01 ..... ..... @ldst_imm_post sign=1 ext=0 sz=1 | ||
46 | +LDR_i 10 111 0 00 10 0 ......... 01 ..... ..... @ldst_imm_post sign=1 ext=0 sz=2 | ||
47 | +LDR_i 00 111 0 00 11 0 ......... 01 ..... ..... @ldst_imm_post sign=1 ext=1 sz=0 | ||
48 | +LDR_i 01 111 0 00 11 0 ......... 01 ..... ..... @ldst_imm_post sign=1 ext=1 sz=1 | ||
49 | + | ||
50 | +STR_i sz:2 111 0 00 00 0 ......... 10 ..... ..... @ldst_imm_user sign=0 ext=0 | ||
51 | +LDR_i 00 111 0 00 01 0 ......... 10 ..... ..... @ldst_imm_user sign=0 ext=1 sz=0 | ||
52 | +LDR_i 01 111 0 00 01 0 ......... 10 ..... ..... @ldst_imm_user sign=0 ext=1 sz=1 | ||
53 | +LDR_i 10 111 0 00 01 0 ......... 10 ..... ..... @ldst_imm_user sign=0 ext=1 sz=2 | ||
54 | +LDR_i 11 111 0 00 01 0 ......... 10 ..... ..... @ldst_imm_user sign=0 ext=0 sz=3 | ||
55 | +LDR_i 00 111 0 00 10 0 ......... 10 ..... ..... @ldst_imm_user sign=1 ext=0 sz=0 | ||
56 | +LDR_i 01 111 0 00 10 0 ......... 10 ..... ..... @ldst_imm_user sign=1 ext=0 sz=1 | ||
57 | +LDR_i 10 111 0 00 10 0 ......... 10 ..... ..... @ldst_imm_user sign=1 ext=0 sz=2 | ||
58 | +LDR_i 00 111 0 00 11 0 ......... 10 ..... ..... @ldst_imm_user sign=1 ext=1 sz=0 | ||
59 | +LDR_i 01 111 0 00 11 0 ......... 10 ..... ..... @ldst_imm_user sign=1 ext=1 sz=1 | ||
60 | + | ||
61 | +STR_i sz:2 111 0 00 00 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=0 | ||
62 | +LDR_i 00 111 0 00 01 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=1 sz=0 | ||
63 | +LDR_i 01 111 0 00 01 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=1 sz=1 | ||
64 | +LDR_i 10 111 0 00 01 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=1 sz=2 | ||
65 | +LDR_i 11 111 0 00 01 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=0 sz=3 | ||
66 | +LDR_i 00 111 0 00 10 0 ......... 11 ..... ..... @ldst_imm_pre sign=1 ext=0 sz=0 | ||
67 | +LDR_i 01 111 0 00 10 0 ......... 11 ..... ..... @ldst_imm_pre sign=1 ext=0 sz=1 | ||
68 | +LDR_i 10 111 0 00 10 0 ......... 11 ..... ..... @ldst_imm_pre sign=1 ext=0 sz=2 | ||
69 | +LDR_i 00 111 0 00 11 0 ......... 11 ..... ..... @ldst_imm_pre sign=1 ext=1 sz=0 | ||
70 | +LDR_i 01 111 0 00 11 0 ......... 11 ..... ..... @ldst_imm_pre sign=1 ext=1 sz=1 | ||
71 | + | ||
72 | +# PRFM : prefetch memory: a no-op for QEMU | ||
73 | +NOP 11 111 0 00 10 0 --------- 00 ----- ----- | ||
74 | + | ||
75 | +STR_v_i sz:2 111 1 00 00 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=0 | ||
76 | +STR_v_i 00 111 1 00 10 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=0 sz=4 | ||
77 | +LDR_v_i sz:2 111 1 00 01 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=0 | ||
78 | +LDR_v_i 00 111 1 00 11 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=0 sz=4 | ||
79 | + | ||
80 | +STR_v_i sz:2 111 1 00 00 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=0 | ||
81 | +STR_v_i 00 111 1 00 10 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=0 sz=4 | ||
82 | +LDR_v_i sz:2 111 1 00 01 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=0 | ||
83 | +LDR_v_i 00 111 1 00 11 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=0 sz=4 | ||
84 | + | ||
85 | +STR_v_i sz:2 111 1 00 00 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=0 | ||
86 | +STR_v_i 00 111 1 00 10 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=0 sz=4 | ||
87 | +LDR_v_i sz:2 111 1 00 01 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=0 | ||
88 | +LDR_v_i 00 111 1 00 11 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=0 sz=4 | ||
89 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
90 | index XXXXXXX..XXXXXXX 100644 | ||
91 | --- a/target/arm/tcg/translate-a64.c | ||
92 | +++ b/target/arm/tcg/translate-a64.c | ||
93 | @@ -XXX,XX +XXX,XX @@ static bool trans_STGP(DisasContext *s, arg_ldstpair *a) | ||
19 | return true; | 94 | return true; |
20 | } | 95 | } |
21 | 96 | ||
22 | -/* Check that SVE access is enabled. If it is, return true. | 97 | -/* |
23 | +/* | 98 | - * Load/store (immediate post-indexed) |
24 | + * Check that SVE access is enabled. If it is, return true. | 99 | - * Load/store (immediate pre-indexed) |
25 | * If not, emit code to generate an appropriate exception and return false. | 100 | - * Load/store (unscaled immediate) |
26 | + * This function corresponds to CheckSVEEnabled(). | 101 | - * |
27 | */ | 102 | - * 31 30 29 27 26 25 24 23 22 21 20 12 11 10 9 5 4 0 |
28 | bool sve_access_check(DisasContext *s) | 103 | - * +----+-------+---+-----+-----+---+--------+-----+------+------+ |
104 | - * |size| 1 1 1 | V | 0 0 | opc | 0 | imm9 | idx | Rn | Rt | | ||
105 | - * +----+-------+---+-----+-----+---+--------+-----+------+------+ | ||
106 | - * | ||
107 | - * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback) | ||
108 | - 10 -> unprivileged | ||
109 | - * V = 0 -> non-vector | ||
110 | - * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit | ||
111 | - * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32 | ||
112 | - */ | ||
113 | -static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, | ||
114 | - int opc, | ||
115 | - int size, | ||
116 | - int rt, | ||
117 | - bool is_vector) | ||
118 | +static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a, | ||
119 | + TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, | ||
120 | + uint64_t offset, bool is_store, MemOp mop) | ||
29 | { | 121 | { |
30 | - if (s->sve_excp_el) { | 122 | - int rn = extract32(insn, 5, 5); |
31 | - assert(!s->sve_access_checked); | 123 | - int imm9 = sextract32(insn, 12, 9); |
32 | - s->sve_access_checked = true; | 124 | - int idx = extract32(insn, 10, 2); |
33 | - | 125 | - bool is_signed = false; |
34 | + if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) { | 126 | - bool is_store = false; |
35 | + assert(dc_isar_feature(aa64_sme, s)); | 127 | - bool is_extended = false; |
36 | + if (!sme_sm_enabled_check(s)) { | 128 | - bool is_unpriv = (idx == 2); |
37 | + goto fail_exit; | 129 | - bool iss_valid; |
38 | + } | 130 | - bool post_index; |
39 | + } else if (s->sve_excp_el) { | 131 | - bool writeback; |
40 | gen_exception_insn_el(s, s->pc_curr, EXCP_UDEF, | 132 | int memidx; |
41 | syn_sve_access_trap(), s->sve_excp_el); | 133 | - MemOp memop; |
42 | - return false; | 134 | - TCGv_i64 clean_addr, dirty_addr; |
43 | + goto fail_exit; | 135 | |
136 | - if (is_vector) { | ||
137 | - size |= (opc & 2) << 1; | ||
138 | - if (size > 4 || is_unpriv) { | ||
139 | - unallocated_encoding(s); | ||
140 | - return; | ||
141 | - } | ||
142 | - is_store = ((opc & 1) == 0); | ||
143 | - if (!fp_access_check(s)) { | ||
144 | - return; | ||
145 | - } | ||
146 | - memop = finalize_memop_asimd(s, size); | ||
147 | - } else { | ||
148 | - if (size == 3 && opc == 2) { | ||
149 | - /* PRFM - prefetch */ | ||
150 | - if (idx != 0) { | ||
151 | - unallocated_encoding(s); | ||
152 | - return; | ||
153 | - } | ||
154 | - return; | ||
155 | - } | ||
156 | - if (opc == 3 && size > 1) { | ||
157 | - unallocated_encoding(s); | ||
158 | - return; | ||
159 | - } | ||
160 | - is_store = (opc == 0); | ||
161 | - is_signed = !is_store && extract32(opc, 1, 1); | ||
162 | - is_extended = (size < 3) && extract32(opc, 0, 1); | ||
163 | - memop = finalize_memop(s, size + is_signed * MO_SIGN); | ||
164 | - } | ||
165 | - | ||
166 | - switch (idx) { | ||
167 | - case 0: | ||
168 | - case 2: | ||
169 | - post_index = false; | ||
170 | - writeback = false; | ||
171 | - break; | ||
172 | - case 1: | ||
173 | - post_index = true; | ||
174 | - writeback = true; | ||
175 | - break; | ||
176 | - case 3: | ||
177 | - post_index = false; | ||
178 | - writeback = true; | ||
179 | - break; | ||
180 | - default: | ||
181 | - g_assert_not_reached(); | ||
182 | - } | ||
183 | - | ||
184 | - iss_valid = !is_vector && !writeback; | ||
185 | - | ||
186 | - if (rn == 31) { | ||
187 | + if (a->rn == 31) { | ||
188 | gen_check_sp_alignment(s); | ||
44 | } | 189 | } |
45 | s->sve_access_checked = true; | 190 | |
46 | return fp_access_check(s); | 191 | - dirty_addr = read_cpu_reg_sp(s, rn, 1); |
47 | + | 192 | - if (!post_index) { |
48 | + fail_exit: | 193 | - tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9); |
49 | + /* Assert that we only raise one exception per instruction. */ | 194 | + *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); |
50 | + assert(!s->sve_access_checked); | 195 | + if (!a->p) { |
51 | + s->sve_access_checked = true; | 196 | + tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); |
52 | + return false; | 197 | } |
198 | + memidx = a->unpriv ? get_a64_user_mem_index(s) : get_mem_index(s); | ||
199 | + *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store, | ||
200 | + a->w || a->rn != 31, | ||
201 | + mop, a->unpriv, memidx); | ||
202 | +} | ||
203 | |||
204 | - memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s); | ||
205 | - | ||
206 | - clean_addr = gen_mte_check1_mmuidx(s, dirty_addr, is_store, | ||
207 | - writeback || rn != 31, | ||
208 | - memop, is_unpriv, memidx); | ||
209 | - | ||
210 | - if (is_vector) { | ||
211 | - if (is_store) { | ||
212 | - do_fp_st(s, rt, clean_addr, memop); | ||
213 | - } else { | ||
214 | - do_fp_ld(s, rt, clean_addr, memop); | ||
215 | - } | ||
216 | - } else { | ||
217 | - TCGv_i64 tcg_rt = cpu_reg(s, rt); | ||
218 | - bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); | ||
219 | - | ||
220 | - if (is_store) { | ||
221 | - do_gpr_st_memidx(s, tcg_rt, clean_addr, memop, memidx, | ||
222 | - iss_valid, rt, iss_sf, false); | ||
223 | - } else { | ||
224 | - do_gpr_ld_memidx(s, tcg_rt, clean_addr, memop, | ||
225 | - is_extended, memidx, | ||
226 | - iss_valid, rt, iss_sf, false); | ||
227 | +static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a, | ||
228 | + TCGv_i64 dirty_addr, uint64_t offset) | ||
229 | +{ | ||
230 | + if (a->w) { | ||
231 | + if (a->p) { | ||
232 | + tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); | ||
233 | } | ||
234 | + tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); | ||
235 | } | ||
236 | +} | ||
237 | |||
238 | - if (writeback) { | ||
239 | - TCGv_i64 tcg_rn = cpu_reg_sp(s, rn); | ||
240 | - if (post_index) { | ||
241 | - tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9); | ||
242 | - } | ||
243 | - tcg_gen_mov_i64(tcg_rn, dirty_addr); | ||
244 | +static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a) | ||
245 | +{ | ||
246 | + bool iss_sf, iss_valid = !a->w; | ||
247 | + TCGv_i64 clean_addr, dirty_addr, tcg_rt; | ||
248 | + int memidx = a->unpriv ? get_a64_user_mem_index(s) : get_mem_index(s); | ||
249 | + MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); | ||
250 | + | ||
251 | + op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); | ||
252 | + | ||
253 | + tcg_rt = cpu_reg(s, a->rt); | ||
254 | + iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); | ||
255 | + | ||
256 | + do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx, | ||
257 | + iss_valid, a->rt, iss_sf, false); | ||
258 | + op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); | ||
259 | + return true; | ||
260 | +} | ||
261 | + | ||
262 | +static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a) | ||
263 | +{ | ||
264 | + bool iss_sf, iss_valid = !a->w; | ||
265 | + TCGv_i64 clean_addr, dirty_addr, tcg_rt; | ||
266 | + int memidx = a->unpriv ? get_a64_user_mem_index(s) : get_mem_index(s); | ||
267 | + MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); | ||
268 | + | ||
269 | + op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); | ||
270 | + | ||
271 | + tcg_rt = cpu_reg(s, a->rt); | ||
272 | + iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); | ||
273 | + | ||
274 | + do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop, | ||
275 | + a->ext, memidx, iss_valid, a->rt, iss_sf, false); | ||
276 | + op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); | ||
277 | + return true; | ||
278 | +} | ||
279 | + | ||
280 | +static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a) | ||
281 | +{ | ||
282 | + TCGv_i64 clean_addr, dirty_addr; | ||
283 | + MemOp mop; | ||
284 | + | ||
285 | + if (!fp_access_check(s)) { | ||
286 | + return true; | ||
287 | } | ||
288 | + mop = finalize_memop_asimd(s, a->sz); | ||
289 | + op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); | ||
290 | + do_fp_st(s, a->rt, clean_addr, mop); | ||
291 | + op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); | ||
292 | + return true; | ||
293 | +} | ||
294 | + | ||
295 | +static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a) | ||
296 | +{ | ||
297 | + TCGv_i64 clean_addr, dirty_addr; | ||
298 | + MemOp mop; | ||
299 | + | ||
300 | + if (!fp_access_check(s)) { | ||
301 | + return true; | ||
302 | + } | ||
303 | + mop = finalize_memop_asimd(s, a->sz); | ||
304 | + op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); | ||
305 | + do_fp_ld(s, a->rt, clean_addr, mop); | ||
306 | + op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); | ||
307 | + return true; | ||
53 | } | 308 | } |
54 | 309 | ||
55 | /* | 310 | /* |
311 | @@ -XXX,XX +XXX,XX @@ static void disas_ldst_reg(DisasContext *s, uint32_t insn) | ||
312 | switch (extract32(insn, 24, 2)) { | ||
313 | case 0: | ||
314 | if (extract32(insn, 21, 1) == 0) { | ||
315 | - /* Load/store register (unscaled immediate) | ||
316 | - * Load/store immediate pre/post-indexed | ||
317 | - * Load/store register unprivileged | ||
318 | - */ | ||
319 | - disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector); | ||
320 | - return; | ||
321 | + break; | ||
322 | } | ||
323 | switch (extract32(insn, 10, 2)) { | ||
324 | case 0: | ||
56 | -- | 325 | -- |
57 | 2.25.1 | 326 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Convert the LDR and STR instructions which use a 12-bit immediate |
---|---|---|---|
2 | offset to decodetree. We can reuse the existing LDR and STR | ||
3 | trans functions for these. | ||
2 | 4 | ||
3 | Add a TCGv_ptr base argument, which will be cpu_env for SVE. | 5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
4 | We will reuse this for SME save and restore array insns. | 6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
7 | Message-id: 20230602155223.2040685-14-peter.maydell@linaro.org | ||
8 | --- | ||
9 | target/arm/tcg/a64.decode | 25 ++++++++ | ||
10 | target/arm/tcg/translate-a64.c | 104 +++++---------------------------- | ||
11 | 2 files changed, 41 insertions(+), 88 deletions(-) | ||
5 | 12 | ||
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 13 | diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode |
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-id: 20220708151540.18136-22-richard.henderson@linaro.org | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | --- | ||
11 | target/arm/translate-a64.h | 3 +++ | ||
12 | target/arm/translate-sve.c | 48 ++++++++++++++++++++++++++++---------- | ||
13 | 2 files changed, 39 insertions(+), 12 deletions(-) | ||
14 | |||
15 | diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/arm/translate-a64.h | 15 | --- a/target/arm/tcg/a64.decode |
18 | +++ b/target/arm/translate-a64.h | 16 | +++ b/target/arm/tcg/a64.decode |
19 | @@ -XXX,XX +XXX,XX @@ void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, | 17 | @@ -XXX,XX +XXX,XX @@ STR_v_i sz:2 111 1 00 00 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 |
20 | uint32_t rm_ofs, int64_t shift, | 18 | STR_v_i 00 111 1 00 10 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=0 sz=4 |
21 | uint32_t opr_sz, uint32_t max_sz); | 19 | LDR_v_i sz:2 111 1 00 01 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=0 |
22 | 20 | LDR_v_i 00 111 1 00 11 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=0 sz=4 | |
23 | +void gen_sve_ldr(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm); | ||
24 | +void gen_sve_str(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm); | ||
25 | + | 21 | + |
26 | #endif /* TARGET_ARM_TRANSLATE_A64_H */ | 22 | +# Load/store with an unsigned 12 bit immediate, which is scaled by the |
27 | diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c | 23 | +# element size. The function gets the sz:imm and returns the scaled immediate. |
24 | +%uimm_scaled 10:12 sz:3 !function=uimm_scaled | ||
25 | + | ||
26 | +@ldst_uimm .. ... . .. .. ............ rn:5 rt:5 &ldst_imm unpriv=0 p=0 w=0 imm=%uimm_scaled | ||
27 | + | ||
28 | +STR_i sz:2 111 0 01 00 ............ ..... ..... @ldst_uimm sign=0 ext=0 | ||
29 | +LDR_i 00 111 0 01 01 ............ ..... ..... @ldst_uimm sign=0 ext=1 sz=0 | ||
30 | +LDR_i 01 111 0 01 01 ............ ..... ..... @ldst_uimm sign=0 ext=1 sz=1 | ||
31 | +LDR_i 10 111 0 01 01 ............ ..... ..... @ldst_uimm sign=0 ext=1 sz=2 | ||
32 | +LDR_i 11 111 0 01 01 ............ ..... ..... @ldst_uimm sign=0 ext=0 sz=3 | ||
33 | +LDR_i 00 111 0 01 10 ............ ..... ..... @ldst_uimm sign=1 ext=0 sz=0 | ||
34 | +LDR_i 01 111 0 01 10 ............ ..... ..... @ldst_uimm sign=1 ext=0 sz=1 | ||
35 | +LDR_i 10 111 0 01 10 ............ ..... ..... @ldst_uimm sign=1 ext=0 sz=2 | ||
36 | +LDR_i 00 111 0 01 11 ............ ..... ..... @ldst_uimm sign=1 ext=1 sz=0 | ||
37 | +LDR_i 01 111 0 01 11 ............ ..... ..... @ldst_uimm sign=1 ext=1 sz=1 | ||
38 | + | ||
39 | +# PRFM | ||
40 | +NOP 11 111 0 01 10 ------------ ----- ----- | ||
41 | + | ||
42 | +STR_v_i sz:2 111 1 01 00 ............ ..... ..... @ldst_uimm sign=0 ext=0 | ||
43 | +STR_v_i 00 111 1 01 10 ............ ..... ..... @ldst_uimm sign=0 ext=0 sz=4 | ||
44 | +LDR_v_i sz:2 111 1 01 01 ............ ..... ..... @ldst_uimm sign=0 ext=0 | ||
45 | +LDR_v_i 00 111 1 01 11 ............ ..... ..... @ldst_uimm sign=0 ext=0 sz=4 | ||
46 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | 47 | index XXXXXXX..XXXXXXX 100644 |
29 | --- a/target/arm/translate-sve.c | 48 | --- a/target/arm/tcg/translate-a64.c |
30 | +++ b/target/arm/translate-sve.c | 49 | +++ b/target/arm/tcg/translate-a64.c |
31 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, | 50 | @@ -XXX,XX +XXX,XX @@ enum a64_shift_type { |
32 | * The load should begin at the address Rn + IMM. | 51 | A64_SHIFT_TYPE_ROR = 3 |
52 | }; | ||
53 | |||
54 | +/* | ||
55 | + * Helpers for extracting complex instruction fields | ||
56 | + */ | ||
57 | + | ||
58 | +/* | ||
59 | + * For load/store with an unsigned 12 bit immediate scaled by the element | ||
60 | + * size. The input has the immediate field in bits [14:3] and the element | ||
61 | + * size in [2:0]. | ||
62 | + */ | ||
63 | +static int uimm_scaled(DisasContext *s, int x) | ||
64 | +{ | ||
65 | + unsigned imm = x >> 3; | ||
66 | + unsigned scale = extract32(x, 0, 3); | ||
67 | + return imm << scale; | ||
68 | +} | ||
69 | + | ||
70 | /* | ||
71 | * Include the generated decoders. | ||
33 | */ | 72 | */ |
34 | 73 | @@ -XXX,XX +XXX,XX @@ static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn, | |
35 | -static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm) | ||
36 | +void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, | ||
37 | + int len, int rn, int imm) | ||
38 | { | ||
39 | int len_align = QEMU_ALIGN_DOWN(len, 8); | ||
40 | int len_remain = len % 8; | ||
41 | @@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm) | ||
42 | t0 = tcg_temp_new_i64(); | ||
43 | for (i = 0; i < len_align; i += 8) { | ||
44 | tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ); | ||
45 | - tcg_gen_st_i64(t0, cpu_env, vofs + i); | ||
46 | + tcg_gen_st_i64(t0, base, vofs + i); | ||
47 | tcg_gen_addi_i64(clean_addr, clean_addr, 8); | ||
48 | } | ||
49 | tcg_temp_free_i64(t0); | ||
50 | @@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm) | ||
51 | clean_addr = new_tmp_a64_local(s); | ||
52 | tcg_gen_mov_i64(clean_addr, t0); | ||
53 | |||
54 | + if (base != cpu_env) { | ||
55 | + TCGv_ptr b = tcg_temp_local_new_ptr(); | ||
56 | + tcg_gen_mov_ptr(b, base); | ||
57 | + base = b; | ||
58 | + } | ||
59 | + | ||
60 | gen_set_label(loop); | ||
61 | |||
62 | t0 = tcg_temp_new_i64(); | ||
63 | @@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm) | ||
64 | tcg_gen_addi_i64(clean_addr, clean_addr, 8); | ||
65 | |||
66 | tp = tcg_temp_new_ptr(); | ||
67 | - tcg_gen_add_ptr(tp, cpu_env, i); | ||
68 | + tcg_gen_add_ptr(tp, base, i); | ||
69 | tcg_gen_addi_ptr(i, i, 8); | ||
70 | tcg_gen_st_i64(t0, tp, vofs); | ||
71 | tcg_temp_free_ptr(tp); | ||
72 | @@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm) | ||
73 | |||
74 | tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); | ||
75 | tcg_temp_free_ptr(i); | ||
76 | + | ||
77 | + if (base != cpu_env) { | ||
78 | + tcg_temp_free_ptr(base); | ||
79 | + assert(len_remain == 0); | ||
80 | + } | ||
81 | } | ||
82 | |||
83 | /* | ||
84 | @@ -XXX,XX +XXX,XX @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm) | ||
85 | default: | ||
86 | g_assert_not_reached(); | ||
87 | } | ||
88 | - tcg_gen_st_i64(t0, cpu_env, vofs + len_align); | ||
89 | + tcg_gen_st_i64(t0, base, vofs + len_align); | ||
90 | tcg_temp_free_i64(t0); | ||
91 | } | 74 | } |
92 | } | 75 | } |
93 | 76 | ||
94 | /* Similarly for stores. */ | 77 | -/* |
95 | -static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm) | 78 | - * Load/store (unsigned immediate) |
96 | +void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs, | 79 | - * |
97 | + int len, int rn, int imm) | 80 | - * 31 30 29 27 26 25 24 23 22 21 10 9 5 |
98 | { | 81 | - * +----+-------+---+-----+-----+------------+-------+------+ |
99 | int len_align = QEMU_ALIGN_DOWN(len, 8); | 82 | - * |size| 1 1 1 | V | 0 1 | opc | imm12 | Rn | Rt | |
100 | int len_remain = len % 8; | 83 | - * +----+-------+---+-----+-----+------------+-------+------+ |
101 | @@ -XXX,XX +XXX,XX @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm) | 84 | - * |
102 | 85 | - * For non-vector: | |
103 | t0 = tcg_temp_new_i64(); | 86 | - * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit |
104 | for (i = 0; i < len_align; i += 8) { | 87 | - * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32 |
105 | - tcg_gen_ld_i64(t0, cpu_env, vofs + i); | 88 | - * For vector: |
106 | + tcg_gen_ld_i64(t0, base, vofs + i); | 89 | - * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated |
107 | tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ); | 90 | - * opc<0>: 0 -> store, 1 -> load |
108 | tcg_gen_addi_i64(clean_addr, clean_addr, 8); | 91 | - * Rn: base address register (inc SP) |
92 | - * Rt: target register | ||
93 | - */ | ||
94 | -static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn, | ||
95 | - int opc, | ||
96 | - int size, | ||
97 | - int rt, | ||
98 | - bool is_vector) | ||
99 | -{ | ||
100 | - int rn = extract32(insn, 5, 5); | ||
101 | - unsigned int imm12 = extract32(insn, 10, 12); | ||
102 | - unsigned int offset; | ||
103 | - TCGv_i64 clean_addr, dirty_addr; | ||
104 | - bool is_store; | ||
105 | - bool is_signed = false; | ||
106 | - bool is_extended = false; | ||
107 | - MemOp memop; | ||
108 | - | ||
109 | - if (is_vector) { | ||
110 | - size |= (opc & 2) << 1; | ||
111 | - if (size > 4) { | ||
112 | - unallocated_encoding(s); | ||
113 | - return; | ||
114 | - } | ||
115 | - is_store = !extract32(opc, 0, 1); | ||
116 | - if (!fp_access_check(s)) { | ||
117 | - return; | ||
118 | - } | ||
119 | - memop = finalize_memop_asimd(s, size); | ||
120 | - } else { | ||
121 | - if (size == 3 && opc == 2) { | ||
122 | - /* PRFM - prefetch */ | ||
123 | - return; | ||
124 | - } | ||
125 | - if (opc == 3 && size > 1) { | ||
126 | - unallocated_encoding(s); | ||
127 | - return; | ||
128 | - } | ||
129 | - is_store = (opc == 0); | ||
130 | - is_signed = !is_store && extract32(opc, 1, 1); | ||
131 | - is_extended = (size < 3) && extract32(opc, 0, 1); | ||
132 | - memop = finalize_memop(s, size + is_signed * MO_SIGN); | ||
133 | - } | ||
134 | - | ||
135 | - if (rn == 31) { | ||
136 | - gen_check_sp_alignment(s); | ||
137 | - } | ||
138 | - dirty_addr = read_cpu_reg_sp(s, rn, 1); | ||
139 | - offset = imm12 << size; | ||
140 | - tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); | ||
141 | - | ||
142 | - clean_addr = gen_mte_check1(s, dirty_addr, is_store, rn != 31, memop); | ||
143 | - | ||
144 | - if (is_vector) { | ||
145 | - if (is_store) { | ||
146 | - do_fp_st(s, rt, clean_addr, memop); | ||
147 | - } else { | ||
148 | - do_fp_ld(s, rt, clean_addr, memop); | ||
149 | - } | ||
150 | - } else { | ||
151 | - TCGv_i64 tcg_rt = cpu_reg(s, rt); | ||
152 | - bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); | ||
153 | - if (is_store) { | ||
154 | - do_gpr_st(s, tcg_rt, clean_addr, memop, true, rt, iss_sf, false); | ||
155 | - } else { | ||
156 | - do_gpr_ld(s, tcg_rt, clean_addr, memop, | ||
157 | - is_extended, true, rt, iss_sf, false); | ||
158 | - } | ||
159 | - } | ||
160 | -} | ||
161 | - | ||
162 | /* Atomic memory operations | ||
163 | * | ||
164 | * 31 30 27 26 24 22 21 16 15 12 10 5 0 | ||
165 | @@ -XXX,XX +XXX,XX @@ static void disas_ldst_reg(DisasContext *s, uint32_t insn) | ||
166 | return; | ||
109 | } | 167 | } |
110 | @@ -XXX,XX +XXX,XX @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm) | 168 | break; |
111 | clean_addr = new_tmp_a64_local(s); | 169 | - case 1: |
112 | tcg_gen_mov_i64(clean_addr, t0); | 170 | - disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector); |
113 | 171 | - return; | |
114 | + if (base != cpu_env) { | ||
115 | + TCGv_ptr b = tcg_temp_local_new_ptr(); | ||
116 | + tcg_gen_mov_ptr(b, base); | ||
117 | + base = b; | ||
118 | + } | ||
119 | + | ||
120 | gen_set_label(loop); | ||
121 | |||
122 | t0 = tcg_temp_new_i64(); | ||
123 | tp = tcg_temp_new_ptr(); | ||
124 | - tcg_gen_add_ptr(tp, cpu_env, i); | ||
125 | + tcg_gen_add_ptr(tp, base, i); | ||
126 | tcg_gen_ld_i64(t0, tp, vofs); | ||
127 | tcg_gen_addi_ptr(i, i, 8); | ||
128 | tcg_temp_free_ptr(tp); | ||
129 | @@ -XXX,XX +XXX,XX @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm) | ||
130 | |||
131 | tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); | ||
132 | tcg_temp_free_ptr(i); | ||
133 | + | ||
134 | + if (base != cpu_env) { | ||
135 | + tcg_temp_free_ptr(base); | ||
136 | + assert(len_remain == 0); | ||
137 | + } | ||
138 | } | 172 | } |
139 | 173 | unallocated_encoding(s); | |
140 | /* Predicate register stores can be any multiple of 2. */ | ||
141 | if (len_remain) { | ||
142 | t0 = tcg_temp_new_i64(); | ||
143 | - tcg_gen_ld_i64(t0, cpu_env, vofs + len_align); | ||
144 | + tcg_gen_ld_i64(t0, base, vofs + len_align); | ||
145 | |||
146 | switch (len_remain) { | ||
147 | case 2: | ||
148 | @@ -XXX,XX +XXX,XX @@ static bool trans_LDR_zri(DisasContext *s, arg_rri *a) | ||
149 | if (sve_access_check(s)) { | ||
150 | int size = vec_full_reg_size(s); | ||
151 | int off = vec_full_reg_offset(s, a->rd); | ||
152 | - do_ldr(s, off, size, a->rn, a->imm * size); | ||
153 | + gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size); | ||
154 | } | ||
155 | return true; | ||
156 | } | ||
157 | @@ -XXX,XX +XXX,XX @@ static bool trans_LDR_pri(DisasContext *s, arg_rri *a) | ||
158 | if (sve_access_check(s)) { | ||
159 | int size = pred_full_reg_size(s); | ||
160 | int off = pred_full_reg_offset(s, a->rd); | ||
161 | - do_ldr(s, off, size, a->rn, a->imm * size); | ||
162 | + gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size); | ||
163 | } | ||
164 | return true; | ||
165 | } | ||
166 | @@ -XXX,XX +XXX,XX @@ static bool trans_STR_zri(DisasContext *s, arg_rri *a) | ||
167 | if (sve_access_check(s)) { | ||
168 | int size = vec_full_reg_size(s); | ||
169 | int off = vec_full_reg_offset(s, a->rd); | ||
170 | - do_str(s, off, size, a->rn, a->imm * size); | ||
171 | + gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size); | ||
172 | } | ||
173 | return true; | ||
174 | } | ||
175 | @@ -XXX,XX +XXX,XX @@ static bool trans_STR_pri(DisasContext *s, arg_rri *a) | ||
176 | if (sve_access_check(s)) { | ||
177 | int size = pred_full_reg_size(s); | ||
178 | int off = pred_full_reg_offset(s, a->rd); | ||
179 | - do_str(s, off, size, a->rn, a->imm * size); | ||
180 | + gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size); | ||
181 | } | ||
182 | return true; | ||
183 | } | 174 | } |
184 | -- | 175 | -- |
185 | 2.25.1 | 176 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Convert the LDR and STR instructions which take a register |
---|---|---|---|
2 | plus register offset to decodetree. | ||
2 | 3 | ||
3 | We can reuse the SVE functions for LDR and STR, passing in the | 4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
4 | base of the ZA vector and a zero offset. | 5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
6 | Message-id: 20230602155223.2040685-15-peter.maydell@linaro.org | ||
7 | --- | ||
8 | target/arm/tcg/a64.decode | 22 +++++ | ||
9 | target/arm/tcg/translate-a64.c | 173 +++++++++++++++------------------ | ||
10 | 2 files changed, 103 insertions(+), 92 deletions(-) | ||
5 | 11 | ||
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 12 | diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode |
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-id: 20220708151540.18136-23-richard.henderson@linaro.org | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | --- | ||
11 | target/arm/sme.decode | 7 +++++++ | ||
12 | target/arm/translate-sme.c | 24 ++++++++++++++++++++++++ | ||
13 | 2 files changed, 31 insertions(+) | ||
14 | |||
15 | diff --git a/target/arm/sme.decode b/target/arm/sme.decode | ||
16 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/arm/sme.decode | 14 | --- a/target/arm/tcg/a64.decode |
18 | +++ b/target/arm/sme.decode | 15 | +++ b/target/arm/tcg/a64.decode |
19 | @@ -XXX,XX +XXX,XX @@ LDST1 1110000 0 esz:2 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \ | 16 | @@ -XXX,XX +XXX,XX @@ STR_v_i sz:2 111 1 01 00 ............ ..... ..... @ldst_uimm sign=0 ext= |
20 | &ldst rs=%mova_rs | 17 | STR_v_i 00 111 1 01 10 ............ ..... ..... @ldst_uimm sign=0 ext=0 sz=4 |
21 | LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \ | 18 | LDR_v_i sz:2 111 1 01 01 ............ ..... ..... @ldst_uimm sign=0 ext=0 |
22 | &ldst esz=4 rs=%mova_rs | 19 | LDR_v_i 00 111 1 01 11 ............ ..... ..... @ldst_uimm sign=0 ext=0 sz=4 |
23 | + | 20 | + |
24 | +&ldstr rv rn imm | 21 | +# Load/store with register offset |
25 | +@ldstr ....... ... . ...... .. ... rn:5 . imm:4 \ | 22 | +&ldst rm rn rt sign ext sz opt s |
26 | + &ldstr rv=%mova_rs | 23 | +@ldst .. ... . .. .. . rm:5 opt:3 s:1 .. rn:5 rt:5 &ldst |
27 | + | 24 | +STR sz:2 111 0 00 00 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=0 |
28 | +LDR 1110000 100 0 000000 .. 000 ..... 0 .... @ldstr | 25 | +LDR 00 111 0 00 01 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=1 sz=0 |
29 | +STR 1110000 100 1 000000 .. 000 ..... 0 .... @ldstr | 26 | +LDR 01 111 0 00 01 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=1 sz=1 |
30 | diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c | 27 | +LDR 10 111 0 00 01 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=1 sz=2 |
28 | +LDR 11 111 0 00 01 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=0 sz=3 | ||
29 | +LDR 00 111 0 00 10 1 ..... ... . 10 ..... ..... @ldst sign=1 ext=0 sz=0 | ||
30 | +LDR 01 111 0 00 10 1 ..... ... . 10 ..... ..... @ldst sign=1 ext=0 sz=1 | ||
31 | +LDR 10 111 0 00 10 1 ..... ... . 10 ..... ..... @ldst sign=1 ext=0 sz=2 | ||
32 | +LDR 00 111 0 00 11 1 ..... ... . 10 ..... ..... @ldst sign=1 ext=1 sz=0 | ||
33 | +LDR 01 111 0 00 11 1 ..... ... . 10 ..... ..... @ldst sign=1 ext=1 sz=1 | ||
34 | + | ||
35 | +# PRFM | ||
36 | +NOP 11 111 0 00 10 1 ----- -1- - 10 ----- ----- | ||
37 | + | ||
38 | +STR_v sz:2 111 1 00 00 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=0 | ||
39 | +STR_v 00 111 1 00 10 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=0 sz=4 | ||
40 | +LDR_v sz:2 111 1 00 01 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=0 | ||
41 | +LDR_v 00 111 1 00 11 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=0 sz=4 | ||
42 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
31 | index XXXXXXX..XXXXXXX 100644 | 43 | index XXXXXXX..XXXXXXX 100644 |
32 | --- a/target/arm/translate-sme.c | 44 | --- a/target/arm/tcg/translate-a64.c |
33 | +++ b/target/arm/translate-sme.c | 45 | +++ b/target/arm/tcg/translate-a64.c |
34 | @@ -XXX,XX +XXX,XX @@ static bool trans_LDST1(DisasContext *s, arg_LDST1 *a) | 46 | @@ -XXX,XX +XXX,XX @@ static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a) |
35 | tcg_temp_free_i64(addr); | ||
36 | return true; | 47 | return true; |
37 | } | 48 | } |
38 | + | 49 | |
39 | +typedef void GenLdStR(DisasContext *, TCGv_ptr, int, int, int, int); | 50 | -/* |
40 | + | 51 | - * Load/store (register offset) |
41 | +static bool do_ldst_r(DisasContext *s, arg_ldstr *a, GenLdStR *fn) | 52 | - * |
42 | +{ | 53 | - * 31 30 29 27 26 25 24 23 22 21 20 16 15 13 12 11 10 9 5 4 0 |
43 | + int svl = streaming_vec_reg_size(s); | 54 | - * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+ |
44 | + int imm = a->imm; | 55 | - * |size| 1 1 1 | V | 0 0 | opc | 1 | Rm | opt | S| 1 0 | Rn | Rt | |
45 | + TCGv_ptr base; | 56 | - * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+ |
46 | + | 57 | - * |
47 | + if (!sme_za_enabled_check(s)) { | 58 | - * For non-vector: |
59 | - * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit | ||
60 | - * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32 | ||
61 | - * For vector: | ||
62 | - * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated | ||
63 | - * opc<0>: 0 -> store, 1 -> load | ||
64 | - * V: 1 -> vector/simd | ||
65 | - * opt: extend encoding (see DecodeRegExtend) | ||
66 | - * S: if S=1 then scale (essentially index by sizeof(size)) | ||
67 | - * Rt: register to transfer into/out of | ||
68 | - * Rn: address register or SP for base | ||
69 | - * Rm: offset register or ZR for offset | ||
70 | - */ | ||
71 | -static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn, | ||
72 | - int opc, | ||
73 | - int size, | ||
74 | - int rt, | ||
75 | - bool is_vector) | ||
76 | +static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a, | ||
77 | + TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, | ||
78 | + bool is_store, MemOp memop) | ||
79 | { | ||
80 | - int rn = extract32(insn, 5, 5); | ||
81 | - int shift = extract32(insn, 12, 1); | ||
82 | - int rm = extract32(insn, 16, 5); | ||
83 | - int opt = extract32(insn, 13, 3); | ||
84 | - bool is_signed = false; | ||
85 | - bool is_store = false; | ||
86 | - bool is_extended = false; | ||
87 | - TCGv_i64 tcg_rm, clean_addr, dirty_addr; | ||
88 | - MemOp memop; | ||
89 | + TCGv_i64 tcg_rm; | ||
90 | |||
91 | - if (extract32(opt, 1, 1) == 0) { | ||
92 | - unallocated_encoding(s); | ||
93 | - return; | ||
94 | - } | ||
95 | - | ||
96 | - if (is_vector) { | ||
97 | - size |= (opc & 2) << 1; | ||
98 | - if (size > 4) { | ||
99 | - unallocated_encoding(s); | ||
100 | - return; | ||
101 | - } | ||
102 | - is_store = !extract32(opc, 0, 1); | ||
103 | - if (!fp_access_check(s)) { | ||
104 | - return; | ||
105 | - } | ||
106 | - memop = finalize_memop_asimd(s, size); | ||
107 | - } else { | ||
108 | - if (size == 3 && opc == 2) { | ||
109 | - /* PRFM - prefetch */ | ||
110 | - return; | ||
111 | - } | ||
112 | - if (opc == 3 && size > 1) { | ||
113 | - unallocated_encoding(s); | ||
114 | - return; | ||
115 | - } | ||
116 | - is_store = (opc == 0); | ||
117 | - is_signed = !is_store && extract32(opc, 1, 1); | ||
118 | - is_extended = (size < 3) && extract32(opc, 0, 1); | ||
119 | - memop = finalize_memop(s, size + is_signed * MO_SIGN); | ||
120 | - } | ||
121 | - | ||
122 | - if (rn == 31) { | ||
123 | + if (a->rn == 31) { | ||
124 | gen_check_sp_alignment(s); | ||
125 | } | ||
126 | - dirty_addr = read_cpu_reg_sp(s, rn, 1); | ||
127 | + *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); | ||
128 | |||
129 | - tcg_rm = read_cpu_reg(s, rm, 1); | ||
130 | - ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0); | ||
131 | + tcg_rm = read_cpu_reg(s, a->rm, 1); | ||
132 | + ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0); | ||
133 | |||
134 | - tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm); | ||
135 | + tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm); | ||
136 | + *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop); | ||
137 | +} | ||
138 | |||
139 | - clean_addr = gen_mte_check1(s, dirty_addr, is_store, true, memop); | ||
140 | +static bool trans_LDR(DisasContext *s, arg_ldst *a) | ||
141 | +{ | ||
142 | + TCGv_i64 clean_addr, dirty_addr, tcg_rt; | ||
143 | + bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); | ||
144 | + MemOp memop; | ||
145 | |||
146 | - if (is_vector) { | ||
147 | - if (is_store) { | ||
148 | - do_fp_st(s, rt, clean_addr, memop); | ||
149 | - } else { | ||
150 | - do_fp_ld(s, rt, clean_addr, memop); | ||
151 | - } | ||
152 | - } else { | ||
153 | - TCGv_i64 tcg_rt = cpu_reg(s, rt); | ||
154 | - bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); | ||
155 | - | ||
156 | - if (is_store) { | ||
157 | - do_gpr_st(s, tcg_rt, clean_addr, memop, | ||
158 | - true, rt, iss_sf, false); | ||
159 | - } else { | ||
160 | - do_gpr_ld(s, tcg_rt, clean_addr, memop, | ||
161 | - is_extended, true, rt, iss_sf, false); | ||
162 | - } | ||
163 | + if (extract32(a->opt, 1, 1) == 0) { | ||
164 | + return false; | ||
165 | } | ||
166 | + | ||
167 | + memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); | ||
168 | + op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); | ||
169 | + tcg_rt = cpu_reg(s, a->rt); | ||
170 | + do_gpr_ld(s, tcg_rt, clean_addr, memop, | ||
171 | + a->ext, true, a->rt, iss_sf, false); | ||
172 | + return true; | ||
173 | +} | ||
174 | + | ||
175 | +static bool trans_STR(DisasContext *s, arg_ldst *a) | ||
176 | +{ | ||
177 | + TCGv_i64 clean_addr, dirty_addr, tcg_rt; | ||
178 | + bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); | ||
179 | + MemOp memop; | ||
180 | + | ||
181 | + if (extract32(a->opt, 1, 1) == 0) { | ||
182 | + return false; | ||
183 | + } | ||
184 | + | ||
185 | + memop = finalize_memop(s, a->sz); | ||
186 | + op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); | ||
187 | + tcg_rt = cpu_reg(s, a->rt); | ||
188 | + do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false); | ||
189 | + return true; | ||
190 | +} | ||
191 | + | ||
192 | +static bool trans_LDR_v(DisasContext *s, arg_ldst *a) | ||
193 | +{ | ||
194 | + TCGv_i64 clean_addr, dirty_addr; | ||
195 | + MemOp memop; | ||
196 | + | ||
197 | + if (extract32(a->opt, 1, 1) == 0) { | ||
198 | + return false; | ||
199 | + } | ||
200 | + | ||
201 | + if (!fp_access_check(s)) { | ||
48 | + return true; | 202 | + return true; |
49 | + } | 203 | + } |
50 | + | 204 | + |
51 | + /* ZA[n] equates to ZA0H.B[n]. */ | 205 | + memop = finalize_memop_asimd(s, a->sz); |
52 | + base = get_tile_rowcol(s, MO_8, a->rv, imm, false); | 206 | + op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); |
53 | + | 207 | + do_fp_ld(s, a->rt, clean_addr, memop); |
54 | + fn(s, base, 0, svl, a->rn, imm * svl); | 208 | + return true; |
55 | + | 209 | +} |
56 | + tcg_temp_free_ptr(base); | 210 | + |
57 | + return true; | 211 | +static bool trans_STR_v(DisasContext *s, arg_ldst *a) |
58 | +} | 212 | +{ |
59 | + | 213 | + TCGv_i64 clean_addr, dirty_addr; |
60 | +TRANS_FEAT(LDR, aa64_sme, do_ldst_r, a, gen_sve_ldr) | 214 | + MemOp memop; |
61 | +TRANS_FEAT(STR, aa64_sme, do_ldst_r, a, gen_sve_str) | 215 | + |
216 | + if (extract32(a->opt, 1, 1) == 0) { | ||
217 | + return false; | ||
218 | + } | ||
219 | + | ||
220 | + if (!fp_access_check(s)) { | ||
221 | + return true; | ||
222 | + } | ||
223 | + | ||
224 | + memop = finalize_memop_asimd(s, a->sz); | ||
225 | + op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); | ||
226 | + do_fp_st(s, a->rt, clean_addr, memop); | ||
227 | + return true; | ||
228 | } | ||
229 | |||
230 | /* Atomic memory operations | ||
231 | @@ -XXX,XX +XXX,XX @@ static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn) | ||
232 | static void disas_ldst_reg(DisasContext *s, uint32_t insn) | ||
233 | { | ||
234 | int rt = extract32(insn, 0, 5); | ||
235 | - int opc = extract32(insn, 22, 2); | ||
236 | bool is_vector = extract32(insn, 26, 1); | ||
237 | int size = extract32(insn, 30, 2); | ||
238 | |||
239 | @@ -XXX,XX +XXX,XX @@ static void disas_ldst_reg(DisasContext *s, uint32_t insn) | ||
240 | disas_ldst_atomic(s, insn, size, rt, is_vector); | ||
241 | return; | ||
242 | case 2: | ||
243 | - disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector); | ||
244 | - return; | ||
245 | + break; | ||
246 | default: | ||
247 | disas_ldst_pac(s, insn, size, rt, is_vector); | ||
248 | return; | ||
62 | -- | 249 | -- |
63 | 2.25.1 | 250 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Convert the insns in the atomic memory operations group to |
---|---|---|---|
2 | decodetree. | ||
2 | 3 | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
4 | Message-id: 20220708151540.18136-25-richard.henderson@linaro.org | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
6 | Message-id: 20230602155223.2040685-16-peter.maydell@linaro.org | ||
7 | --- | 7 | --- |
8 | target/arm/helper-sme.h | 5 +++ | 8 | target/arm/tcg/a64.decode | 15 ++++ |
9 | target/arm/sme.decode | 9 +++++ | 9 | target/arm/tcg/translate-a64.c | 153 ++++++++++++--------------------- |
10 | target/arm/sme_helper.c | 69 ++++++++++++++++++++++++++++++++++++++ | 10 | 2 files changed, 70 insertions(+), 98 deletions(-) |
11 | target/arm/translate-sme.c | 32 ++++++++++++++++++ | ||
12 | 4 files changed, 115 insertions(+) | ||
13 | 11 | ||
14 | diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h | 12 | diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode |
15 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/target/arm/helper-sme.h | 14 | --- a/target/arm/tcg/a64.decode |
17 | +++ b/target/arm/helper-sme.h | 15 | +++ b/target/arm/tcg/a64.decode |
18 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sme_addha_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) | 16 | @@ -XXX,XX +XXX,XX @@ STR_v sz:2 111 1 00 00 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=0 |
19 | DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) | 17 | STR_v 00 111 1 00 10 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=0 sz=4 |
20 | DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) | 18 | LDR_v sz:2 111 1 00 01 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=0 |
21 | DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) | 19 | LDR_v 00 111 1 00 11 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=0 sz=4 |
22 | + | 20 | + |
23 | +DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG, | 21 | +# Atomic memory operations |
24 | + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) | 22 | +&atomic rs rn rt a r sz |
25 | +DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG, | 23 | +@atomic sz:2 ... . .. a:1 r:1 . rs:5 . ... .. rn:5 rt:5 &atomic |
26 | + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) | 24 | +LDADD .. 111 0 00 . . 1 ..... 0000 00 ..... ..... @atomic |
27 | diff --git a/target/arm/sme.decode b/target/arm/sme.decode | 25 | +LDCLR .. 111 0 00 . . 1 ..... 0001 00 ..... ..... @atomic |
26 | +LDEOR .. 111 0 00 . . 1 ..... 0010 00 ..... ..... @atomic | ||
27 | +LDSET .. 111 0 00 . . 1 ..... 0011 00 ..... ..... @atomic | ||
28 | +LDSMAX .. 111 0 00 . . 1 ..... 0100 00 ..... ..... @atomic | ||
29 | +LDSMIN .. 111 0 00 . . 1 ..... 0101 00 ..... ..... @atomic | ||
30 | +LDUMAX .. 111 0 00 . . 1 ..... 0110 00 ..... ..... @atomic | ||
31 | +LDUMIN .. 111 0 00 . . 1 ..... 0111 00 ..... ..... @atomic | ||
32 | +SWP .. 111 0 00 . . 1 ..... 1000 00 ..... ..... @atomic | ||
33 | + | ||
34 | +LDAPR sz:2 111 0 00 1 0 1 11111 1100 00 rn:5 rt:5 | ||
35 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | 36 | index XXXXXXX..XXXXXXX 100644 |
29 | --- a/target/arm/sme.decode | 37 | --- a/target/arm/tcg/translate-a64.c |
30 | +++ b/target/arm/sme.decode | 38 | +++ b/target/arm/tcg/translate-a64.c |
31 | @@ -XXX,XX +XXX,XX @@ ADDHA_s 11000000 10 01000 0 ... ... ..... 000 .. @adda_32 | 39 | @@ -XXX,XX +XXX,XX @@ static bool trans_STR_v(DisasContext *s, arg_ldst *a) |
32 | ADDVA_s 11000000 10 01000 1 ... ... ..... 000 .. @adda_32 | 40 | return true; |
33 | ADDHA_d 11000000 11 01000 0 ... ... ..... 00 ... @adda_64 | 41 | } |
34 | ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64 | 42 | |
35 | + | 43 | -/* Atomic memory operations |
36 | +### SME Outer Product | 44 | - * |
37 | + | 45 | - * 31 30 27 26 24 22 21 16 15 12 10 5 0 |
38 | +&op zad zn zm pm pn sub:bool | 46 | - * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+ |
39 | +@op_32 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 .. zad:2 &op | 47 | - * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn | Rt | |
40 | +@op_64 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 . zad:3 &op | 48 | - * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+ |
41 | + | 49 | - * |
42 | +FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32 | 50 | - * Rt: the result register |
43 | +FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64 | 51 | - * Rn: base address or SP |
44 | diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c | 52 | - * Rs: the source register for the operation |
45 | index XXXXXXX..XXXXXXX 100644 | 53 | - * V: vector flag (always 0 as of v8.3) |
46 | --- a/target/arm/sme_helper.c | 54 | - * A: acquire flag |
47 | +++ b/target/arm/sme_helper.c | 55 | - * R: release flag |
48 | @@ -XXX,XX +XXX,XX @@ | 56 | - */ |
49 | #include "exec/cpu_ldst.h" | 57 | -static void disas_ldst_atomic(DisasContext *s, uint32_t insn, |
50 | #include "exec/exec-all.h" | 58 | - int size, int rt, bool is_vector) |
51 | #include "qemu/int128.h" | 59 | + |
52 | +#include "fpu/softfloat.h" | 60 | +static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn, |
53 | #include "vec_internal.h" | 61 | + int sign, bool invert) |
54 | #include "sve_ldst_internal.h" | 62 | { |
55 | 63 | - int rs = extract32(insn, 16, 5); | |
56 | @@ -XXX,XX +XXX,XX @@ void HELPER(sme_addva_d)(void *vzda, void *vzn, void *vpn, | 64 | - int rn = extract32(insn, 5, 5); |
65 | - int o3_opc = extract32(insn, 12, 4); | ||
66 | - bool r = extract32(insn, 22, 1); | ||
67 | - bool a = extract32(insn, 23, 1); | ||
68 | - TCGv_i64 tcg_rs, tcg_rt, clean_addr; | ||
69 | - AtomicThreeOpFn *fn = NULL; | ||
70 | - MemOp mop = size; | ||
71 | + MemOp mop = a->sz | sign; | ||
72 | + TCGv_i64 clean_addr, tcg_rs, tcg_rt; | ||
73 | |||
74 | - if (is_vector || !dc_isar_feature(aa64_atomics, s)) { | ||
75 | - unallocated_encoding(s); | ||
76 | - return; | ||
77 | - } | ||
78 | - switch (o3_opc) { | ||
79 | - case 000: /* LDADD */ | ||
80 | - fn = tcg_gen_atomic_fetch_add_i64; | ||
81 | - break; | ||
82 | - case 001: /* LDCLR */ | ||
83 | - fn = tcg_gen_atomic_fetch_and_i64; | ||
84 | - break; | ||
85 | - case 002: /* LDEOR */ | ||
86 | - fn = tcg_gen_atomic_fetch_xor_i64; | ||
87 | - break; | ||
88 | - case 003: /* LDSET */ | ||
89 | - fn = tcg_gen_atomic_fetch_or_i64; | ||
90 | - break; | ||
91 | - case 004: /* LDSMAX */ | ||
92 | - fn = tcg_gen_atomic_fetch_smax_i64; | ||
93 | - mop |= MO_SIGN; | ||
94 | - break; | ||
95 | - case 005: /* LDSMIN */ | ||
96 | - fn = tcg_gen_atomic_fetch_smin_i64; | ||
97 | - mop |= MO_SIGN; | ||
98 | - break; | ||
99 | - case 006: /* LDUMAX */ | ||
100 | - fn = tcg_gen_atomic_fetch_umax_i64; | ||
101 | - break; | ||
102 | - case 007: /* LDUMIN */ | ||
103 | - fn = tcg_gen_atomic_fetch_umin_i64; | ||
104 | - break; | ||
105 | - case 010: /* SWP */ | ||
106 | - fn = tcg_gen_atomic_xchg_i64; | ||
107 | - break; | ||
108 | - case 014: /* LDAPR, LDAPRH, LDAPRB */ | ||
109 | - if (!dc_isar_feature(aa64_rcpc_8_3, s) || | ||
110 | - rs != 31 || a != 1 || r != 0) { | ||
111 | - unallocated_encoding(s); | ||
112 | - return; | ||
113 | - } | ||
114 | - break; | ||
115 | - default: | ||
116 | - unallocated_encoding(s); | ||
117 | - return; | ||
118 | - } | ||
119 | - | ||
120 | - if (rn == 31) { | ||
121 | + if (a->rn == 31) { | ||
122 | gen_check_sp_alignment(s); | ||
123 | } | ||
124 | - | ||
125 | - mop = check_atomic_align(s, rn, mop); | ||
126 | - clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, mop); | ||
127 | - | ||
128 | - if (o3_opc == 014) { | ||
129 | - /* | ||
130 | - * LDAPR* are a special case because they are a simple load, not a | ||
131 | - * fetch-and-do-something op. | ||
132 | - * The architectural consistency requirements here are weaker than | ||
133 | - * full load-acquire (we only need "load-acquire processor consistent"), | ||
134 | - * but we choose to implement them as full LDAQ. | ||
135 | - */ | ||
136 | - do_gpr_ld(s, cpu_reg(s, rt), clean_addr, mop, false, | ||
137 | - true, rt, disas_ldst_compute_iss_sf(size, false, 0), true); | ||
138 | - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); | ||
139 | - return; | ||
140 | - } | ||
141 | - | ||
142 | - tcg_rs = read_cpu_reg(s, rs, true); | ||
143 | - tcg_rt = cpu_reg(s, rt); | ||
144 | - | ||
145 | - if (o3_opc == 1) { /* LDCLR */ | ||
146 | + mop = check_atomic_align(s, a->rn, mop); | ||
147 | + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, | ||
148 | + a->rn != 31, mop); | ||
149 | + tcg_rs = read_cpu_reg(s, a->rs, true); | ||
150 | + tcg_rt = cpu_reg(s, a->rt); | ||
151 | + if (invert) { | ||
152 | tcg_gen_not_i64(tcg_rs, tcg_rs); | ||
153 | } | ||
154 | - | ||
155 | - /* The tcg atomic primitives are all full barriers. Therefore we | ||
156 | + /* | ||
157 | + * The tcg atomic primitives are all full barriers. Therefore we | ||
158 | * can ignore the Acquire and Release bits of this instruction. | ||
159 | */ | ||
160 | fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop); | ||
161 | |||
162 | if (mop & MO_SIGN) { | ||
163 | - switch (size) { | ||
164 | + switch (a->sz) { | ||
165 | case MO_8: | ||
166 | tcg_gen_ext8u_i64(tcg_rt, tcg_rt); | ||
167 | break; | ||
168 | @@ -XXX,XX +XXX,XX @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, | ||
169 | g_assert_not_reached(); | ||
57 | } | 170 | } |
58 | } | 171 | } |
59 | } | ||
60 | + | ||
61 | +void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn, | ||
62 | + void *vpm, void *vst, uint32_t desc) | ||
63 | +{ | ||
64 | + intptr_t row, col, oprsz = simd_maxsz(desc); | ||
65 | + uint32_t neg = simd_data(desc) << 31; | ||
66 | + uint16_t *pn = vpn, *pm = vpm; | ||
67 | + float_status fpst; | ||
68 | + | ||
69 | + /* | ||
70 | + * Make a copy of float_status because this operation does not | ||
71 | + * update the cumulative fp exception status. It also produces | ||
72 | + * default nans. | ||
73 | + */ | ||
74 | + fpst = *(float_status *)vst; | ||
75 | + set_default_nan_mode(true, &fpst); | ||
76 | + | ||
77 | + for (row = 0; row < oprsz; ) { | ||
78 | + uint16_t pa = pn[H2(row >> 4)]; | ||
79 | + do { | ||
80 | + if (pa & 1) { | ||
81 | + void *vza_row = vza + tile_vslice_offset(row); | ||
82 | + uint32_t n = *(uint32_t *)(vzn + H1_4(row)) ^ neg; | ||
83 | + | ||
84 | + for (col = 0; col < oprsz; ) { | ||
85 | + uint16_t pb = pm[H2(col >> 4)]; | ||
86 | + do { | ||
87 | + if (pb & 1) { | ||
88 | + uint32_t *a = vza_row + H1_4(col); | ||
89 | + uint32_t *m = vzm + H1_4(col); | ||
90 | + *a = float32_muladd(n, *m, *a, 0, vst); | ||
91 | + } | ||
92 | + col += 4; | ||
93 | + pb >>= 4; | ||
94 | + } while (col & 15); | ||
95 | + } | ||
96 | + } | ||
97 | + row += 4; | ||
98 | + pa >>= 4; | ||
99 | + } while (row & 15); | ||
100 | + } | ||
101 | +} | ||
102 | + | ||
103 | +void HELPER(sme_fmopa_d)(void *vza, void *vzn, void *vzm, void *vpn, | ||
104 | + void *vpm, void *vst, uint32_t desc) | ||
105 | +{ | ||
106 | + intptr_t row, col, oprsz = simd_oprsz(desc) / 8; | ||
107 | + uint64_t neg = (uint64_t)simd_data(desc) << 63; | ||
108 | + uint64_t *za = vza, *zn = vzn, *zm = vzm; | ||
109 | + uint8_t *pn = vpn, *pm = vpm; | ||
110 | + float_status fpst = *(float_status *)vst; | ||
111 | + | ||
112 | + set_default_nan_mode(true, &fpst); | ||
113 | + | ||
114 | + for (row = 0; row < oprsz; ++row) { | ||
115 | + if (pn[H1(row)] & 1) { | ||
116 | + uint64_t *za_row = &za[tile_vslice_index(row)]; | ||
117 | + uint64_t n = zn[row] ^ neg; | ||
118 | + | ||
119 | + for (col = 0; col < oprsz; ++col) { | ||
120 | + if (pm[H1(col)] & 1) { | ||
121 | + uint64_t *a = &za_row[col]; | ||
122 | + *a = float64_muladd(n, zm[col], *a, 0, &fpst); | ||
123 | + } | ||
124 | + } | ||
125 | + } | ||
126 | + } | ||
127 | +} | ||
128 | diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c | ||
129 | index XXXXXXX..XXXXXXX 100644 | ||
130 | --- a/target/arm/translate-sme.c | ||
131 | +++ b/target/arm/translate-sme.c | ||
132 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(ADDHA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addha_s) | ||
133 | TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s) | ||
134 | TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d) | ||
135 | TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d) | ||
136 | + | ||
137 | +static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz, | ||
138 | + gen_helper_gvec_5_ptr *fn) | ||
139 | +{ | ||
140 | + int svl = streaming_vec_reg_size(s); | ||
141 | + uint32_t desc = simd_desc(svl, svl, a->sub); | ||
142 | + TCGv_ptr za, zn, zm, pn, pm, fpst; | ||
143 | + | ||
144 | + if (!sme_smza_enabled_check(s)) { | ||
145 | + return true; | ||
146 | + } | ||
147 | + | ||
148 | + /* Sum XZR+zad to find ZAd. */ | ||
149 | + za = get_tile_rowcol(s, esz, 31, a->zad, false); | ||
150 | + zn = vec_full_reg_ptr(s, a->zn); | ||
151 | + zm = vec_full_reg_ptr(s, a->zm); | ||
152 | + pn = pred_full_reg_ptr(s, a->pn); | ||
153 | + pm = pred_full_reg_ptr(s, a->pm); | ||
154 | + fpst = fpstatus_ptr(FPST_FPCR); | ||
155 | + | ||
156 | + fn(za, zn, zm, pn, pm, fpst, tcg_constant_i32(desc)); | ||
157 | + | ||
158 | + tcg_temp_free_ptr(za); | ||
159 | + tcg_temp_free_ptr(zn); | ||
160 | + tcg_temp_free_ptr(pn); | ||
161 | + tcg_temp_free_ptr(pm); | ||
162 | + tcg_temp_free_ptr(fpst); | ||
163 | + return true; | 172 | + return true; |
164 | +} | 173 | +} |
165 | + | 174 | + |
166 | +TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s) | 175 | +TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false) |
167 | +TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d) | 176 | +TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true) |
177 | +TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false) | ||
178 | +TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false) | ||
179 | +TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false) | ||
180 | +TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false) | ||
181 | +TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false) | ||
182 | +TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false) | ||
183 | +TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false) | ||
184 | + | ||
185 | +static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a) | ||
186 | +{ | ||
187 | + bool iss_sf = ldst_iss_sf(a->sz, false, false); | ||
188 | + TCGv_i64 clean_addr; | ||
189 | + MemOp mop; | ||
190 | + | ||
191 | + if (!dc_isar_feature(aa64_atomics, s) || | ||
192 | + !dc_isar_feature(aa64_rcpc_8_3, s)) { | ||
193 | + return false; | ||
194 | + } | ||
195 | + if (a->rn == 31) { | ||
196 | + gen_check_sp_alignment(s); | ||
197 | + } | ||
198 | + mop = check_atomic_align(s, a->rn, a->sz); | ||
199 | + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, | ||
200 | + a->rn != 31, mop); | ||
201 | + /* | ||
202 | + * LDAPR* are a special case because they are a simple load, not a | ||
203 | + * fetch-and-do-something op. | ||
204 | + * The architectural consistency requirements here are weaker than | ||
205 | + * full load-acquire (we only need "load-acquire processor consistent"), | ||
206 | + * but we choose to implement them as full LDAQ. | ||
207 | + */ | ||
208 | + do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false, | ||
209 | + true, a->rt, iss_sf, true); | ||
210 | + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); | ||
211 | + return true; | ||
212 | } | ||
213 | |||
214 | /* | ||
215 | @@ -XXX,XX +XXX,XX @@ static void disas_ldst_reg(DisasContext *s, uint32_t insn) | ||
216 | } | ||
217 | switch (extract32(insn, 10, 2)) { | ||
218 | case 0: | ||
219 | - disas_ldst_atomic(s, insn, size, rt, is_vector); | ||
220 | - return; | ||
221 | case 2: | ||
222 | break; | ||
223 | default: | ||
168 | -- | 224 | -- |
169 | 2.25.1 | 225 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Convert the instructions in the load/store register (pointer |
---|---|---|---|
2 | authentication) group ot decodetree: LDRAA, LDRAB. | ||
2 | 3 | ||
3 | We can reuse the SVE functions for implementing moves to/from | 4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
4 | horizontal tile slices, but we need new ones for moves to/from | 5 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
5 | vertical tile slices. | 6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
7 | Message-id: 20230602155223.2040685-17-peter.maydell@linaro.org | ||
8 | --- | ||
9 | target/arm/tcg/a64.decode | 7 +++ | ||
10 | target/arm/tcg/translate-a64.c | 83 +++++++--------------------------- | ||
11 | 2 files changed, 23 insertions(+), 67 deletions(-) | ||
6 | 12 | ||
7 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 13 | diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode |
8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Message-id: 20220708151540.18136-20-richard.henderson@linaro.org | ||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | --- | ||
12 | target/arm/helper-sme.h | 12 +++ | ||
13 | target/arm/helper-sve.h | 2 + | ||
14 | target/arm/translate-a64.h | 8 ++ | ||
15 | target/arm/translate.h | 5 ++ | ||
16 | target/arm/sme.decode | 15 ++++ | ||
17 | target/arm/sme_helper.c | 151 ++++++++++++++++++++++++++++++++++++- | ||
18 | target/arm/sve_helper.c | 12 +++ | ||
19 | target/arm/translate-sme.c | 127 +++++++++++++++++++++++++++++++ | ||
20 | 8 files changed, 331 insertions(+), 1 deletion(-) | ||
21 | |||
22 | diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h | ||
23 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
24 | --- a/target/arm/helper-sme.h | 15 | --- a/target/arm/tcg/a64.decode |
25 | +++ b/target/arm/helper-sme.h | 16 | +++ b/target/arm/tcg/a64.decode |
26 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_2(set_pstate_sm, TCG_CALL_NO_RWG, void, env, i32) | 17 | @@ -XXX,XX +XXX,XX @@ LDUMIN .. 111 0 00 . . 1 ..... 0111 00 ..... ..... @atomic |
27 | DEF_HELPER_FLAGS_2(set_pstate_za, TCG_CALL_NO_RWG, void, env, i32) | 18 | SWP .. 111 0 00 . . 1 ..... 1000 00 ..... ..... @atomic |
28 | 19 | ||
29 | DEF_HELPER_FLAGS_3(sme_zero, TCG_CALL_NO_RWG, void, env, i32, i32) | 20 | LDAPR sz:2 111 0 00 1 0 1 11111 1100 00 rn:5 rt:5 |
30 | + | 21 | + |
31 | +/* Move to/from vertical array slices, i.e. columns, so 'c'. */ | 22 | +# Load/store register (pointer authentication) |
32 | +DEF_HELPER_FLAGS_4(sme_mova_cz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | 23 | + |
33 | +DEF_HELPER_FLAGS_4(sme_mova_zc_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | 24 | +# LDRA immediate is 10 bits signed and scaled, but the bits aren't all contiguous |
34 | +DEF_HELPER_FLAGS_4(sme_mova_cz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | 25 | +%ldra_imm 22:s1 12:9 !function=times_2 |
35 | +DEF_HELPER_FLAGS_4(sme_mova_zc_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | 26 | + |
36 | +DEF_HELPER_FLAGS_4(sme_mova_cz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | 27 | +LDRA 11 111 0 00 m:1 . 1 ......... w:1 1 rn:5 rt:5 imm=%ldra_imm |
37 | +DEF_HELPER_FLAGS_4(sme_mova_zc_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | 28 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
38 | +DEF_HELPER_FLAGS_4(sme_mova_cz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
39 | +DEF_HELPER_FLAGS_4(sme_mova_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
40 | +DEF_HELPER_FLAGS_4(sme_mova_cz_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
41 | +DEF_HELPER_FLAGS_4(sme_mova_zc_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
42 | diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h | ||
43 | index XXXXXXX..XXXXXXX 100644 | 29 | index XXXXXXX..XXXXXXX 100644 |
44 | --- a/target/arm/helper-sve.h | 30 | --- a/target/arm/tcg/translate-a64.c |
45 | +++ b/target/arm/helper-sve.h | 31 | +++ b/target/arm/tcg/translate-a64.c |
46 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(sve_sel_zpzz_s, TCG_CALL_NO_RWG, | 32 | @@ -XXX,XX +XXX,XX @@ static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a) |
47 | void, ptr, ptr, ptr, ptr, i32) | 33 | return true; |
48 | DEF_HELPER_FLAGS_5(sve_sel_zpzz_d, TCG_CALL_NO_RWG, | ||
49 | void, ptr, ptr, ptr, ptr, i32) | ||
50 | +DEF_HELPER_FLAGS_5(sve_sel_zpzz_q, TCG_CALL_NO_RWG, | ||
51 | + void, ptr, ptr, ptr, ptr, i32) | ||
52 | |||
53 | DEF_HELPER_FLAGS_5(sve2_addp_zpzz_b, TCG_CALL_NO_RWG, | ||
54 | void, ptr, ptr, ptr, ptr, i32) | ||
55 | diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h | ||
56 | index XXXXXXX..XXXXXXX 100644 | ||
57 | --- a/target/arm/translate-a64.h | ||
58 | +++ b/target/arm/translate-a64.h | ||
59 | @@ -XXX,XX +XXX,XX @@ static inline int pred_gvec_reg_size(DisasContext *s) | ||
60 | return size_for_gvec(pred_full_reg_size(s)); | ||
61 | } | 34 | } |
62 | 35 | ||
63 | +/* Return a newly allocated pointer to the predicate register. */ | 36 | -/* |
64 | +static inline TCGv_ptr pred_full_reg_ptr(DisasContext *s, int regno) | 37 | - * PAC memory operations |
65 | +{ | 38 | - * |
66 | + TCGv_ptr ret = tcg_temp_new_ptr(); | 39 | - * 31 30 27 26 24 22 21 12 11 10 5 0 |
67 | + tcg_gen_addi_ptr(ret, cpu_env, pred_full_reg_offset(s, regno)); | 40 | - * +------+-------+---+-----+-----+---+--------+---+---+----+-----+ |
68 | + return ret; | 41 | - * | size | 1 1 1 | V | 0 0 | M S | 1 | imm9 | W | 1 | Rn | Rt | |
69 | +} | 42 | - * +------+-------+---+-----+-----+---+--------+---+---+----+-----+ |
70 | + | 43 | - * |
71 | bool disas_sve(DisasContext *, uint32_t); | 44 | - * Rt: the result register |
72 | bool disas_sme(DisasContext *, uint32_t); | 45 | - * Rn: base address or SP |
73 | 46 | - * V: vector flag (always 0 as of v8.3) | |
74 | diff --git a/target/arm/translate.h b/target/arm/translate.h | 47 | - * M: clear for key DA, set for key DB |
75 | index XXXXXXX..XXXXXXX 100644 | 48 | - * W: pre-indexing flag |
76 | --- a/target/arm/translate.h | 49 | - * S: sign for imm9. |
77 | +++ b/target/arm/translate.h | 50 | - */ |
78 | @@ -XXX,XX +XXX,XX @@ static inline int plus_2(DisasContext *s, int x) | 51 | -static void disas_ldst_pac(DisasContext *s, uint32_t insn, |
79 | return x + 2; | 52 | - int size, int rt, bool is_vector) |
80 | } | 53 | +static bool trans_LDRA(DisasContext *s, arg_LDRA *a) |
81 | |||
82 | +static inline int plus_12(DisasContext *s, int x) | ||
83 | +{ | ||
84 | + return x + 12; | ||
85 | +} | ||
86 | + | ||
87 | static inline int times_2(DisasContext *s, int x) | ||
88 | { | 54 | { |
89 | return x * 2; | 55 | - int rn = extract32(insn, 5, 5); |
90 | diff --git a/target/arm/sme.decode b/target/arm/sme.decode | 56 | - bool is_wback = extract32(insn, 11, 1); |
91 | index XXXXXXX..XXXXXXX 100644 | 57 | - bool use_key_a = !extract32(insn, 23, 1); |
92 | --- a/target/arm/sme.decode | 58 | - int offset; |
93 | +++ b/target/arm/sme.decode | 59 | TCGv_i64 clean_addr, dirty_addr, tcg_rt; |
94 | @@ -XXX,XX +XXX,XX @@ | 60 | MemOp memop; |
95 | ### SME Misc | 61 | |
96 | 62 | - if (size != 3 || is_vector || !dc_isar_feature(aa64_pauth, s)) { | |
97 | ZERO 11000000 00 001 00000000000 imm:8 | 63 | - unallocated_encoding(s); |
98 | + | 64 | - return; |
99 | +### SME Move into/from Array | 65 | + /* Load with pointer authentication */ |
100 | + | 66 | + if (!dc_isar_feature(aa64_pauth, s)) { |
101 | +%mova_rs 13:2 !function=plus_12 | 67 | + return false; |
102 | +&mova esz rs pg zr za_imm v:bool to_vec:bool | 68 | } |
103 | + | 69 | |
104 | +MOVA 11000000 esz:2 00000 0 v:1 .. pg:3 zr:5 0 za_imm:4 \ | 70 | - if (rn == 31) { |
105 | + &mova to_vec=0 rs=%mova_rs | 71 | + if (a->rn == 31) { |
106 | +MOVA 11000000 11 00000 1 v:1 .. pg:3 zr:5 0 za_imm:4 \ | 72 | gen_check_sp_alignment(s); |
107 | + &mova to_vec=0 rs=%mova_rs esz=4 | 73 | } |
108 | + | 74 | - dirty_addr = read_cpu_reg_sp(s, rn, 1); |
109 | +MOVA 11000000 esz:2 00001 0 v:1 .. pg:3 0 za_imm:4 zr:5 \ | 75 | + dirty_addr = read_cpu_reg_sp(s, a->rn, 1); |
110 | + &mova to_vec=1 rs=%mova_rs | 76 | |
111 | +MOVA 11000000 11 00001 1 v:1 .. pg:3 0 za_imm:4 zr:5 \ | 77 | if (s->pauth_active) { |
112 | + &mova to_vec=1 rs=%mova_rs esz=4 | 78 | - if (use_key_a) { |
113 | diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c | 79 | + if (!a->m) { |
114 | index XXXXXXX..XXXXXXX 100644 | 80 | gen_helper_autda(dirty_addr, cpu_env, dirty_addr, |
115 | --- a/target/arm/sme_helper.c | 81 | tcg_constant_i64(0)); |
116 | +++ b/target/arm/sme_helper.c | 82 | } else { |
117 | @@ -XXX,XX +XXX,XX @@ | 83 | @@ -XXX,XX +XXX,XX @@ static void disas_ldst_pac(DisasContext *s, uint32_t insn, |
118 | |||
119 | #include "qemu/osdep.h" | ||
120 | #include "cpu.h" | ||
121 | -#include "internals.h" | ||
122 | +#include "tcg/tcg-gvec-desc.h" | ||
123 | #include "exec/helper-proto.h" | ||
124 | +#include "qemu/int128.h" | ||
125 | +#include "vec_internal.h" | ||
126 | |||
127 | /* ResetSVEState */ | ||
128 | void arm_reset_sve_state(CPUARMState *env) | ||
129 | @@ -XXX,XX +XXX,XX @@ void helper_sme_zero(CPUARMState *env, uint32_t imm, uint32_t svl) | ||
130 | } | 84 | } |
131 | } | 85 | } |
86 | |||
87 | - /* Form the 10-bit signed, scaled offset. */ | ||
88 | - offset = (extract32(insn, 22, 1) << 9) | extract32(insn, 12, 9); | ||
89 | - offset = sextract32(offset << size, 0, 10 + size); | ||
90 | - tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); | ||
91 | + tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); | ||
92 | |||
93 | - memop = finalize_memop(s, size); | ||
94 | + memop = finalize_memop(s, MO_64); | ||
95 | |||
96 | /* Note that "clean" and "dirty" here refer to TBI not PAC. */ | ||
97 | clean_addr = gen_mte_check1(s, dirty_addr, false, | ||
98 | - is_wback || rn != 31, memop); | ||
99 | + a->w || a->rn != 31, memop); | ||
100 | |||
101 | - tcg_rt = cpu_reg(s, rt); | ||
102 | + tcg_rt = cpu_reg(s, a->rt); | ||
103 | do_gpr_ld(s, tcg_rt, clean_addr, memop, | ||
104 | - /* extend */ false, /* iss_valid */ !is_wback, | ||
105 | - /* iss_srt */ rt, /* iss_sf */ true, /* iss_ar */ false); | ||
106 | + /* extend */ false, /* iss_valid */ !a->w, | ||
107 | + /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false); | ||
108 | |||
109 | - if (is_wback) { | ||
110 | - tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr); | ||
111 | + if (a->w) { | ||
112 | + tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); | ||
113 | } | ||
114 | + return true; | ||
132 | } | 115 | } |
133 | + | 116 | |
134 | + | 117 | /* |
135 | +/* | 118 | @@ -XXX,XX +XXX,XX @@ static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn) |
136 | + * When considering the ZA storage as an array of elements of | ||
137 | + * type T, the index within that array of the Nth element of | ||
138 | + * a vertical slice of a tile can be calculated like this, | ||
139 | + * regardless of the size of type T. This is because the tiles | ||
140 | + * are interleaved, so if type T is size N bytes then row 1 of | ||
141 | + * the tile is N rows away from row 0. The division by N to | ||
142 | + * convert a byte offset into an array index and the multiplication | ||
143 | + * by N to convert from vslice-index-within-the-tile to | ||
144 | + * the index within the ZA storage cancel out. | ||
145 | + */ | ||
146 | +#define tile_vslice_index(i) ((i) * sizeof(ARMVectorReg)) | ||
147 | + | ||
148 | +/* | ||
149 | + * When doing byte arithmetic on the ZA storage, the element | ||
150 | + * byteoff bytes away in a tile vertical slice is always this | ||
151 | + * many bytes away in the ZA storage, regardless of the | ||
152 | + * size of the tile element, assuming that byteoff is a multiple | ||
153 | + * of the element size. Again this is because of the interleaving | ||
154 | + * of the tiles. For instance if we have 1 byte per element then | ||
155 | + * each row of the ZA storage has one byte of the vslice data, | ||
156 | + * and (counting from 0) byte 8 goes in row 8 of the storage | ||
157 | + * at offset (8 * row-size-in-bytes). | ||
158 | + * If we have 8 bytes per element then each row of the ZA storage | ||
159 | + * has 8 bytes of the data, but there are 8 interleaved tiles and | ||
160 | + * so byte 8 of the data goes into row 1 of the tile, | ||
161 | + * which is again row 8 of the storage, so the offset is still | ||
162 | + * (8 * row-size-in-bytes). Similarly for other element sizes. | ||
163 | + */ | ||
164 | +#define tile_vslice_offset(byteoff) ((byteoff) * sizeof(ARMVectorReg)) | ||
165 | + | ||
166 | + | ||
167 | +/* | ||
168 | + * Move Zreg vector to ZArray column. | ||
169 | + */ | ||
170 | +#define DO_MOVA_C(NAME, TYPE, H) \ | ||
171 | +void HELPER(NAME)(void *za, void *vn, void *vg, uint32_t desc) \ | ||
172 | +{ \ | ||
173 | + int i, oprsz = simd_oprsz(desc); \ | ||
174 | + for (i = 0; i < oprsz; ) { \ | ||
175 | + uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ | ||
176 | + do { \ | ||
177 | + if (pg & 1) { \ | ||
178 | + *(TYPE *)(za + tile_vslice_offset(i)) = *(TYPE *)(vn + H(i)); \ | ||
179 | + } \ | ||
180 | + i += sizeof(TYPE); \ | ||
181 | + pg >>= sizeof(TYPE); \ | ||
182 | + } while (i & 15); \ | ||
183 | + } \ | ||
184 | +} | ||
185 | + | ||
186 | +DO_MOVA_C(sme_mova_cz_b, uint8_t, H1) | ||
187 | +DO_MOVA_C(sme_mova_cz_h, uint16_t, H1_2) | ||
188 | +DO_MOVA_C(sme_mova_cz_s, uint32_t, H1_4) | ||
189 | + | ||
190 | +void HELPER(sme_mova_cz_d)(void *za, void *vn, void *vg, uint32_t desc) | ||
191 | +{ | ||
192 | + int i, oprsz = simd_oprsz(desc) / 8; | ||
193 | + uint8_t *pg = vg; | ||
194 | + uint64_t *n = vn; | ||
195 | + uint64_t *a = za; | ||
196 | + | ||
197 | + for (i = 0; i < oprsz; i++) { | ||
198 | + if (pg[H1(i)] & 1) { | ||
199 | + a[tile_vslice_index(i)] = n[i]; | ||
200 | + } | ||
201 | + } | ||
202 | +} | ||
203 | + | ||
204 | +void HELPER(sme_mova_cz_q)(void *za, void *vn, void *vg, uint32_t desc) | ||
205 | +{ | ||
206 | + int i, oprsz = simd_oprsz(desc) / 16; | ||
207 | + uint16_t *pg = vg; | ||
208 | + Int128 *n = vn; | ||
209 | + Int128 *a = za; | ||
210 | + | ||
211 | + /* | ||
212 | + * Int128 is used here simply to copy 16 bytes, and to simplify | ||
213 | + * the address arithmetic. | ||
214 | + */ | ||
215 | + for (i = 0; i < oprsz; i++) { | ||
216 | + if (pg[H2(i)] & 1) { | ||
217 | + a[tile_vslice_index(i)] = n[i]; | ||
218 | + } | ||
219 | + } | ||
220 | +} | ||
221 | + | ||
222 | +#undef DO_MOVA_C | ||
223 | + | ||
224 | +/* | ||
225 | + * Move ZArray column to Zreg vector. | ||
226 | + */ | ||
227 | +#define DO_MOVA_Z(NAME, TYPE, H) \ | ||
228 | +void HELPER(NAME)(void *vd, void *za, void *vg, uint32_t desc) \ | ||
229 | +{ \ | ||
230 | + int i, oprsz = simd_oprsz(desc); \ | ||
231 | + for (i = 0; i < oprsz; ) { \ | ||
232 | + uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ | ||
233 | + do { \ | ||
234 | + if (pg & 1) { \ | ||
235 | + *(TYPE *)(vd + H(i)) = *(TYPE *)(za + tile_vslice_offset(i)); \ | ||
236 | + } \ | ||
237 | + i += sizeof(TYPE); \ | ||
238 | + pg >>= sizeof(TYPE); \ | ||
239 | + } while (i & 15); \ | ||
240 | + } \ | ||
241 | +} | ||
242 | + | ||
243 | +DO_MOVA_Z(sme_mova_zc_b, uint8_t, H1) | ||
244 | +DO_MOVA_Z(sme_mova_zc_h, uint16_t, H1_2) | ||
245 | +DO_MOVA_Z(sme_mova_zc_s, uint32_t, H1_4) | ||
246 | + | ||
247 | +void HELPER(sme_mova_zc_d)(void *vd, void *za, void *vg, uint32_t desc) | ||
248 | +{ | ||
249 | + int i, oprsz = simd_oprsz(desc) / 8; | ||
250 | + uint8_t *pg = vg; | ||
251 | + uint64_t *d = vd; | ||
252 | + uint64_t *a = za; | ||
253 | + | ||
254 | + for (i = 0; i < oprsz; i++) { | ||
255 | + if (pg[H1(i)] & 1) { | ||
256 | + d[i] = a[tile_vslice_index(i)]; | ||
257 | + } | ||
258 | + } | ||
259 | +} | ||
260 | + | ||
261 | +void HELPER(sme_mova_zc_q)(void *vd, void *za, void *vg, uint32_t desc) | ||
262 | +{ | ||
263 | + int i, oprsz = simd_oprsz(desc) / 16; | ||
264 | + uint16_t *pg = vg; | ||
265 | + Int128 *d = vd; | ||
266 | + Int128 *a = za; | ||
267 | + | ||
268 | + /* | ||
269 | + * Int128 is used here simply to copy 16 bytes, and to simplify | ||
270 | + * the address arithmetic. | ||
271 | + */ | ||
272 | + for (i = 0; i < oprsz; i++, za += sizeof(ARMVectorReg)) { | ||
273 | + if (pg[H2(i)] & 1) { | ||
274 | + d[i] = a[tile_vslice_index(i)]; | ||
275 | + } | ||
276 | + } | ||
277 | +} | ||
278 | + | ||
279 | +#undef DO_MOVA_Z | ||
280 | diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c | ||
281 | index XXXXXXX..XXXXXXX 100644 | ||
282 | --- a/target/arm/sve_helper.c | ||
283 | +++ b/target/arm/sve_helper.c | ||
284 | @@ -XXX,XX +XXX,XX @@ void HELPER(sve_sel_zpzz_d)(void *vd, void *vn, void *vm, | ||
285 | } | 119 | } |
286 | } | 120 | } |
287 | 121 | ||
288 | +void HELPER(sve_sel_zpzz_q)(void *vd, void *vn, void *vm, | 122 | -/* Load/store register (all forms) */ |
289 | + void *vg, uint32_t desc) | 123 | -static void disas_ldst_reg(DisasContext *s, uint32_t insn) |
290 | +{ | 124 | -{ |
291 | + intptr_t i, opr_sz = simd_oprsz(desc) / 16; | 125 | - int rt = extract32(insn, 0, 5); |
292 | + Int128 *d = vd, *n = vn, *m = vm; | 126 | - bool is_vector = extract32(insn, 26, 1); |
293 | + uint16_t *pg = vg; | 127 | - int size = extract32(insn, 30, 2); |
294 | + | 128 | - |
295 | + for (i = 0; i < opr_sz; i += 1) { | 129 | - switch (extract32(insn, 24, 2)) { |
296 | + d[i] = (pg[H2(i)] & 1 ? n : m)[i]; | 130 | - case 0: |
297 | + } | 131 | - if (extract32(insn, 21, 1) == 0) { |
298 | +} | 132 | - break; |
299 | + | 133 | - } |
300 | /* Two operand comparison controlled by a predicate. | 134 | - switch (extract32(insn, 10, 2)) { |
301 | * ??? It is very tempting to want to be able to expand this inline | 135 | - case 0: |
302 | * with x86 instructions, e.g. | 136 | - case 2: |
303 | diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c | 137 | - break; |
304 | index XXXXXXX..XXXXXXX 100644 | 138 | - default: |
305 | --- a/target/arm/translate-sme.c | 139 | - disas_ldst_pac(s, insn, size, rt, is_vector); |
306 | +++ b/target/arm/translate-sme.c | 140 | - return; |
307 | @@ -XXX,XX +XXX,XX @@ | 141 | - } |
308 | #include "decode-sme.c.inc" | 142 | - break; |
309 | 143 | - } | |
310 | 144 | - unallocated_encoding(s); | |
311 | +/* | 145 | -} |
312 | + * Resolve tile.size[index] to a host pointer, where tile and index | 146 | - |
313 | + * are always decoded together, dependent on the element size. | 147 | /* AdvSIMD load/store multiple structures |
314 | + */ | 148 | * |
315 | +static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs, | 149 | * 31 30 29 23 22 21 16 15 12 11 10 9 5 4 0 |
316 | + int tile_index, bool vertical) | 150 | @@ -XXX,XX +XXX,XX @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn) |
317 | +{ | 151 | static void disas_ldst(DisasContext *s, uint32_t insn) |
318 | + int tile = tile_index >> (4 - esz); | ||
319 | + int index = esz == MO_128 ? 0 : extract32(tile_index, 0, 4 - esz); | ||
320 | + int pos, len, offset; | ||
321 | + TCGv_i32 tmp; | ||
322 | + TCGv_ptr addr; | ||
323 | + | ||
324 | + /* Compute the final index, which is Rs+imm. */ | ||
325 | + tmp = tcg_temp_new_i32(); | ||
326 | + tcg_gen_trunc_tl_i32(tmp, cpu_reg(s, rs)); | ||
327 | + tcg_gen_addi_i32(tmp, tmp, index); | ||
328 | + | ||
329 | + /* Prepare a power-of-two modulo via extraction of @len bits. */ | ||
330 | + len = ctz32(streaming_vec_reg_size(s)) - esz; | ||
331 | + | ||
332 | + if (vertical) { | ||
333 | + /* | ||
334 | + * Compute the byte offset of the index within the tile: | ||
335 | + * (index % (svl / size)) * size | ||
336 | + * = (index % (svl >> esz)) << esz | ||
337 | + * Perform the power-of-two modulo via extraction of the low @len bits. | ||
338 | + * Perform the multiply by shifting left by @pos bits. | ||
339 | + * Perform these operations simultaneously via deposit into zero. | ||
340 | + */ | ||
341 | + pos = esz; | ||
342 | + tcg_gen_deposit_z_i32(tmp, tmp, pos, len); | ||
343 | + | ||
344 | + /* | ||
345 | + * For big-endian, adjust the indexed column byte offset within | ||
346 | + * the uint64_t host words that make up env->zarray[]. | ||
347 | + */ | ||
348 | + if (HOST_BIG_ENDIAN && esz < MO_64) { | ||
349 | + tcg_gen_xori_i32(tmp, tmp, 8 - (1 << esz)); | ||
350 | + } | ||
351 | + } else { | ||
352 | + /* | ||
353 | + * Compute the byte offset of the index within the tile: | ||
354 | + * (index % (svl / size)) * (size * sizeof(row)) | ||
355 | + * = (index % (svl >> esz)) << (esz + log2(sizeof(row))) | ||
356 | + */ | ||
357 | + pos = esz + ctz32(sizeof(ARMVectorReg)); | ||
358 | + tcg_gen_deposit_z_i32(tmp, tmp, pos, len); | ||
359 | + | ||
360 | + /* Row slices are always aligned and need no endian adjustment. */ | ||
361 | + } | ||
362 | + | ||
363 | + /* The tile byte offset within env->zarray is the row. */ | ||
364 | + offset = tile * sizeof(ARMVectorReg); | ||
365 | + | ||
366 | + /* Include the byte offset of zarray to make this relative to env. */ | ||
367 | + offset += offsetof(CPUARMState, zarray); | ||
368 | + tcg_gen_addi_i32(tmp, tmp, offset); | ||
369 | + | ||
370 | + /* Add the byte offset to env to produce the final pointer. */ | ||
371 | + addr = tcg_temp_new_ptr(); | ||
372 | + tcg_gen_ext_i32_ptr(addr, tmp); | ||
373 | + tcg_temp_free_i32(tmp); | ||
374 | + tcg_gen_add_ptr(addr, addr, cpu_env); | ||
375 | + | ||
376 | + return addr; | ||
377 | +} | ||
378 | + | ||
379 | static bool trans_ZERO(DisasContext *s, arg_ZERO *a) | ||
380 | { | 152 | { |
381 | if (!dc_isar_feature(aa64_sme, s)) { | 153 | switch (extract32(insn, 24, 6)) { |
382 | @@ -XXX,XX +XXX,XX @@ static bool trans_ZERO(DisasContext *s, arg_ZERO *a) | 154 | - case 0x38: case 0x39: |
383 | } | 155 | - case 0x3c: case 0x3d: /* Load/store register (all forms) */ |
384 | return true; | 156 | - disas_ldst_reg(s, insn); |
385 | } | 157 | - break; |
386 | + | 158 | case 0x0c: /* AdvSIMD load/store multiple structures */ |
387 | +static bool trans_MOVA(DisasContext *s, arg_MOVA *a) | 159 | disas_ldst_multiple_struct(s, insn); |
388 | +{ | 160 | break; |
389 | + static gen_helper_gvec_4 * const h_fns[5] = { | ||
390 | + gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h, | ||
391 | + gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d, | ||
392 | + gen_helper_sve_sel_zpzz_q | ||
393 | + }; | ||
394 | + static gen_helper_gvec_3 * const cz_fns[5] = { | ||
395 | + gen_helper_sme_mova_cz_b, gen_helper_sme_mova_cz_h, | ||
396 | + gen_helper_sme_mova_cz_s, gen_helper_sme_mova_cz_d, | ||
397 | + gen_helper_sme_mova_cz_q, | ||
398 | + }; | ||
399 | + static gen_helper_gvec_3 * const zc_fns[5] = { | ||
400 | + gen_helper_sme_mova_zc_b, gen_helper_sme_mova_zc_h, | ||
401 | + gen_helper_sme_mova_zc_s, gen_helper_sme_mova_zc_d, | ||
402 | + gen_helper_sme_mova_zc_q, | ||
403 | + }; | ||
404 | + | ||
405 | + TCGv_ptr t_za, t_zr, t_pg; | ||
406 | + TCGv_i32 t_desc; | ||
407 | + int svl; | ||
408 | + | ||
409 | + if (!dc_isar_feature(aa64_sme, s)) { | ||
410 | + return false; | ||
411 | + } | ||
412 | + if (!sme_smza_enabled_check(s)) { | ||
413 | + return true; | ||
414 | + } | ||
415 | + | ||
416 | + t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v); | ||
417 | + t_zr = vec_full_reg_ptr(s, a->zr); | ||
418 | + t_pg = pred_full_reg_ptr(s, a->pg); | ||
419 | + | ||
420 | + svl = streaming_vec_reg_size(s); | ||
421 | + t_desc = tcg_constant_i32(simd_desc(svl, svl, 0)); | ||
422 | + | ||
423 | + if (a->v) { | ||
424 | + /* Vertical slice -- use sme mova helpers. */ | ||
425 | + if (a->to_vec) { | ||
426 | + zc_fns[a->esz](t_zr, t_za, t_pg, t_desc); | ||
427 | + } else { | ||
428 | + cz_fns[a->esz](t_za, t_zr, t_pg, t_desc); | ||
429 | + } | ||
430 | + } else { | ||
431 | + /* Horizontal slice -- reuse sve sel helpers. */ | ||
432 | + if (a->to_vec) { | ||
433 | + h_fns[a->esz](t_zr, t_za, t_zr, t_pg, t_desc); | ||
434 | + } else { | ||
435 | + h_fns[a->esz](t_za, t_zr, t_za, t_pg, t_desc); | ||
436 | + } | ||
437 | + } | ||
438 | + | ||
439 | + tcg_temp_free_ptr(t_za); | ||
440 | + tcg_temp_free_ptr(t_zr); | ||
441 | + tcg_temp_free_ptr(t_pg); | ||
442 | + | ||
443 | + return true; | ||
444 | +} | ||
445 | -- | 161 | -- |
446 | 2.25.1 | 162 | 2.34.1 |
163 | |||
164 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Convert the instructions in the LDAPR/STLR (unscaled immediate) |
---|---|---|---|
2 | group to decodetree. | ||
2 | 3 | ||
3 | Set the SM bit in the SVE record on signal delivery, create the ZA record. | 4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
4 | Restore SM and ZA state according to the records present on return. | 5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
6 | Message-id: 20230602155223.2040685-18-peter.maydell@linaro.org | ||
7 | --- | ||
8 | target/arm/tcg/a64.decode | 10 +++ | ||
9 | target/arm/tcg/translate-a64.c | 132 ++++++++++++--------------------- | ||
10 | 2 files changed, 56 insertions(+), 86 deletions(-) | ||
5 | 11 | ||
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 12 | diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode |
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-id: 20220708151540.18136-41-richard.henderson@linaro.org | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | --- | ||
11 | linux-user/aarch64/signal.c | 167 +++++++++++++++++++++++++++++++++--- | ||
12 | 1 file changed, 154 insertions(+), 13 deletions(-) | ||
13 | |||
14 | diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/linux-user/aarch64/signal.c | 14 | --- a/target/arm/tcg/a64.decode |
17 | +++ b/linux-user/aarch64/signal.c | 15 | +++ b/target/arm/tcg/a64.decode |
18 | @@ -XXX,XX +XXX,XX @@ struct target_sve_context { | 16 | @@ -XXX,XX +XXX,XX @@ LDAPR sz:2 111 0 00 1 0 1 11111 1100 00 rn:5 rt:5 |
19 | 17 | %ldra_imm 22:s1 12:9 !function=times_2 | |
20 | #define TARGET_SVE_SIG_FLAG_SM 1 | 18 | |
21 | 19 | LDRA 11 111 0 00 m:1 . 1 ......... w:1 1 rn:5 rt:5 imm=%ldra_imm | |
22 | +#define TARGET_ZA_MAGIC 0x54366345 | 20 | + |
23 | + | 21 | +&ldapr_stlr_i rn rt imm sz sign ext |
24 | +struct target_za_context { | 22 | +@ldapr_stlr_i .. ...... .. . imm:9 .. rn:5 rt:5 &ldapr_stlr_i |
25 | + struct target_aarch64_ctx head; | 23 | +STLR_i sz:2 011001 00 0 ......... 00 ..... ..... @ldapr_stlr_i sign=0 ext=0 |
26 | + uint16_t vl; | 24 | +LDAPR_i sz:2 011001 01 0 ......... 00 ..... ..... @ldapr_stlr_i sign=0 ext=0 |
27 | + uint16_t reserved[3]; | 25 | +LDAPR_i 00 011001 10 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=0 sz=0 |
28 | + /* The actual ZA data immediately follows. */ | 26 | +LDAPR_i 01 011001 10 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=0 sz=1 |
29 | +}; | 27 | +LDAPR_i 10 011001 10 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=0 sz=2 |
30 | + | 28 | +LDAPR_i 00 011001 11 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=1 sz=0 |
31 | +#define TARGET_ZA_SIG_REGS_OFFSET \ | 29 | +LDAPR_i 01 011001 11 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=1 sz=1 |
32 | + QEMU_ALIGN_UP(sizeof(struct target_za_context), TARGET_SVE_VQ_BYTES) | 30 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
33 | +#define TARGET_ZA_SIG_ZAV_OFFSET(VQ, N) \ | 31 | index XXXXXXX..XXXXXXX 100644 |
34 | + (TARGET_ZA_SIG_REGS_OFFSET + (VQ) * TARGET_SVE_VQ_BYTES * (N)) | 32 | --- a/target/arm/tcg/translate-a64.c |
35 | +#define TARGET_ZA_SIG_CONTEXT_SIZE(VQ) \ | 33 | +++ b/target/arm/tcg/translate-a64.c |
36 | + TARGET_ZA_SIG_ZAV_OFFSET(VQ, VQ * TARGET_SVE_VQ_BYTES) | 34 | @@ -XXX,XX +XXX,XX @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, |
37 | + | 35 | } |
38 | struct target_rt_sigframe { | ||
39 | struct target_siginfo info; | ||
40 | struct target_ucontext uc; | ||
41 | @@ -XXX,XX +XXX,XX @@ static void target_setup_end_record(struct target_aarch64_ctx *end) | ||
42 | } | 36 | } |
43 | 37 | ||
44 | static void target_setup_sve_record(struct target_sve_context *sve, | 38 | -/* Update the Sixty-Four bit (SF) registersize. This logic is derived |
45 | - CPUARMState *env, int vq, int size) | 39 | +/* |
46 | + CPUARMState *env, int size) | 40 | + * Compute the ISS.SF bit for syndrome information if an exception |
41 | + * is taken on a load or store. This indicates whether the instruction | ||
42 | + * is accessing a 32-bit or 64-bit register. This logic is derived | ||
43 | * from the ARMv8 specs for LDR (Shared decode for all encodings). | ||
44 | */ | ||
45 | -static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc) | ||
46 | -{ | ||
47 | - int opc0 = extract32(opc, 0, 1); | ||
48 | - int regsize; | ||
49 | - | ||
50 | - if (is_signed) { | ||
51 | - regsize = opc0 ? 32 : 64; | ||
52 | - } else { | ||
53 | - regsize = size == 3 ? 64 : 32; | ||
54 | - } | ||
55 | - return regsize == 64; | ||
56 | -} | ||
57 | - | ||
58 | static bool ldst_iss_sf(int size, bool sign, bool ext) | ||
47 | { | 59 | { |
48 | - int i, j; | 60 | |
49 | + int i, j, vq = sve_vq(env); | 61 | @@ -XXX,XX +XXX,XX @@ static bool trans_LDRA(DisasContext *s, arg_LDRA *a) |
50 | |||
51 | memset(sve, 0, sizeof(*sve)); | ||
52 | __put_user(TARGET_SVE_MAGIC, &sve->head.magic); | ||
53 | @@ -XXX,XX +XXX,XX @@ static void target_setup_sve_record(struct target_sve_context *sve, | ||
54 | } | ||
55 | } | ||
56 | |||
57 | +static void target_setup_za_record(struct target_za_context *za, | ||
58 | + CPUARMState *env, int size) | ||
59 | +{ | ||
60 | + int vq = sme_vq(env); | ||
61 | + int vl = vq * TARGET_SVE_VQ_BYTES; | ||
62 | + int i, j; | ||
63 | + | ||
64 | + memset(za, 0, sizeof(*za)); | ||
65 | + __put_user(TARGET_ZA_MAGIC, &za->head.magic); | ||
66 | + __put_user(size, &za->head.size); | ||
67 | + __put_user(vl, &za->vl); | ||
68 | + | ||
69 | + if (size == TARGET_ZA_SIG_CONTEXT_SIZE(0)) { | ||
70 | + return; | ||
71 | + } | ||
72 | + assert(size == TARGET_ZA_SIG_CONTEXT_SIZE(vq)); | ||
73 | + | ||
74 | + /* | ||
75 | + * Note that ZA vectors are stored as a byte stream, | ||
76 | + * with each byte element at a subsequent address. | ||
77 | + */ | ||
78 | + for (i = 0; i < vl; ++i) { | ||
79 | + uint64_t *z = (void *)za + TARGET_ZA_SIG_ZAV_OFFSET(vq, i); | ||
80 | + for (j = 0; j < vq * 2; ++j) { | ||
81 | + __put_user_e(env->zarray[i].d[j], z + j, le); | ||
82 | + } | ||
83 | + } | ||
84 | +} | ||
85 | + | ||
86 | static void target_restore_general_frame(CPUARMState *env, | ||
87 | struct target_rt_sigframe *sf) | ||
88 | { | ||
89 | @@ -XXX,XX +XXX,XX @@ static void target_restore_fpsimd_record(CPUARMState *env, | ||
90 | |||
91 | static bool target_restore_sve_record(CPUARMState *env, | ||
92 | struct target_sve_context *sve, | ||
93 | - int size) | ||
94 | + int size, int *svcr) | ||
95 | { | ||
96 | - int i, j, vl, vq; | ||
97 | + int i, j, vl, vq, flags; | ||
98 | + bool sm; | ||
99 | |||
100 | - if (!cpu_isar_feature(aa64_sve, env_archcpu(env))) { | ||
101 | + __get_user(vl, &sve->vl); | ||
102 | + __get_user(flags, &sve->flags); | ||
103 | + | ||
104 | + sm = flags & TARGET_SVE_SIG_FLAG_SM; | ||
105 | + | ||
106 | + /* The cpu must support Streaming or Non-streaming SVE. */ | ||
107 | + if (sm | ||
108 | + ? !cpu_isar_feature(aa64_sme, env_archcpu(env)) | ||
109 | + : !cpu_isar_feature(aa64_sve, env_archcpu(env))) { | ||
110 | return false; | ||
111 | } | ||
112 | |||
113 | - __get_user(vl, &sve->vl); | ||
114 | - vq = sve_vq(env); | ||
115 | + /* | ||
116 | + * Note that we cannot use sve_vq() because that depends on the | ||
117 | + * current setting of PSTATE.SM, not the state to be restored. | ||
118 | + */ | ||
119 | + vq = sve_vqm1_for_el_sm(env, 0, sm) + 1; | ||
120 | |||
121 | /* Reject mismatched VL. */ | ||
122 | if (vl != vq * TARGET_SVE_VQ_BYTES) { | ||
123 | @@ -XXX,XX +XXX,XX @@ static bool target_restore_sve_record(CPUARMState *env, | ||
124 | return false; | ||
125 | } | ||
126 | |||
127 | + *svcr = FIELD_DP64(*svcr, SVCR, SM, sm); | ||
128 | + | ||
129 | /* | ||
130 | * Note that SVE regs are stored as a byte stream, with each byte element | ||
131 | * at a subsequent address. This corresponds to a little-endian load | ||
132 | @@ -XXX,XX +XXX,XX @@ static bool target_restore_sve_record(CPUARMState *env, | ||
133 | return true; | 62 | return true; |
134 | } | 63 | } |
135 | 64 | ||
136 | +static bool target_restore_za_record(CPUARMState *env, | 65 | -/* |
137 | + struct target_za_context *za, | 66 | - * LDAPR/STLR (unscaled immediate) |
138 | + int size, int *svcr) | 67 | - * |
139 | +{ | 68 | - * 31 30 24 22 21 12 10 5 0 |
140 | + int i, j, vl, vq; | 69 | - * +------+-------------+-----+---+--------+-----+----+-----+ |
141 | + | 70 | - * | size | 0 1 1 0 0 1 | opc | 0 | imm9 | 0 0 | Rn | Rt | |
142 | + if (!cpu_isar_feature(aa64_sme, env_archcpu(env))) { | 71 | - * +------+-------------+-----+---+--------+-----+----+-----+ |
72 | - * | ||
73 | - * Rt: source or destination register | ||
74 | - * Rn: base register | ||
75 | - * imm9: unscaled immediate offset | ||
76 | - * opc: 00: STLUR*, 01/10/11: various LDAPUR* | ||
77 | - * size: size of load/store | ||
78 | - */ | ||
79 | -static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn) | ||
80 | +static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a) | ||
81 | { | ||
82 | - int rt = extract32(insn, 0, 5); | ||
83 | - int rn = extract32(insn, 5, 5); | ||
84 | - int offset = sextract32(insn, 12, 9); | ||
85 | - int opc = extract32(insn, 22, 2); | ||
86 | - int size = extract32(insn, 30, 2); | ||
87 | TCGv_i64 clean_addr, dirty_addr; | ||
88 | - bool is_store = false; | ||
89 | - bool extend = false; | ||
90 | - bool iss_sf; | ||
91 | - MemOp mop = size; | ||
92 | + MemOp mop = a->sz | (a->sign ? MO_SIGN : 0); | ||
93 | + bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); | ||
94 | |||
95 | if (!dc_isar_feature(aa64_rcpc_8_4, s)) { | ||
96 | - unallocated_encoding(s); | ||
97 | - return; | ||
143 | + return false; | 98 | + return false; |
144 | + } | 99 | } |
145 | + | 100 | |
146 | + __get_user(vl, &za->vl); | 101 | - switch (opc) { |
147 | + vq = sme_vq(env); | 102 | - case 0: /* STLURB */ |
148 | + | 103 | - is_store = true; |
149 | + /* Reject mismatched VL. */ | 104 | - break; |
150 | + if (vl != vq * TARGET_SVE_VQ_BYTES) { | 105 | - case 1: /* LDAPUR* */ |
151 | + return false; | 106 | - break; |
152 | + } | 107 | - case 2: /* LDAPURS* 64-bit variant */ |
153 | + | 108 | - if (size == 3) { |
154 | + /* Accept empty record -- used to clear PSTATE.ZA. */ | 109 | - unallocated_encoding(s); |
155 | + if (size <= TARGET_ZA_SIG_CONTEXT_SIZE(0)) { | 110 | - return; |
156 | + return true; | 111 | - } |
157 | + } | 112 | - mop |= MO_SIGN; |
158 | + | 113 | - break; |
159 | + /* Reject non-empty but incomplete record. */ | 114 | - case 3: /* LDAPURS* 32-bit variant */ |
160 | + if (size < TARGET_ZA_SIG_CONTEXT_SIZE(vq)) { | 115 | - if (size > 1) { |
161 | + return false; | 116 | - unallocated_encoding(s); |
162 | + } | 117 | - return; |
163 | + | 118 | - } |
164 | + *svcr = FIELD_DP64(*svcr, SVCR, ZA, 1); | 119 | - mop |= MO_SIGN; |
165 | + | 120 | - extend = true; /* zero-extend 32->64 after signed load */ |
166 | + for (i = 0; i < vl; ++i) { | 121 | - break; |
167 | + uint64_t *z = (void *)za + TARGET_ZA_SIG_ZAV_OFFSET(vq, i); | 122 | - default: |
168 | + for (j = 0; j < vq * 2; ++j) { | 123 | - g_assert_not_reached(); |
169 | + __get_user_e(env->zarray[i].d[j], z + j, le); | 124 | - } |
170 | + } | 125 | - |
171 | + } | 126 | - iss_sf = disas_ldst_compute_iss_sf(size, (mop & MO_SIGN) != 0, opc); |
127 | - | ||
128 | - if (rn == 31) { | ||
129 | + if (a->rn == 31) { | ||
130 | gen_check_sp_alignment(s); | ||
131 | } | ||
132 | |||
133 | - mop = check_ordered_align(s, rn, offset, is_store, mop); | ||
134 | - | ||
135 | - dirty_addr = read_cpu_reg_sp(s, rn, 1); | ||
136 | - tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); | ||
137 | + mop = check_ordered_align(s, a->rn, a->imm, false, mop); | ||
138 | + dirty_addr = read_cpu_reg_sp(s, a->rn, 1); | ||
139 | + tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); | ||
140 | clean_addr = clean_data_tbi(s, dirty_addr); | ||
141 | |||
142 | - if (is_store) { | ||
143 | - /* Store-Release semantics */ | ||
144 | - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); | ||
145 | - do_gpr_st(s, cpu_reg(s, rt), clean_addr, mop, true, rt, iss_sf, true); | ||
146 | - } else { | ||
147 | - /* | ||
148 | - * Load-AcquirePC semantics; we implement as the slightly more | ||
149 | - * restrictive Load-Acquire. | ||
150 | - */ | ||
151 | - do_gpr_ld(s, cpu_reg(s, rt), clean_addr, mop, | ||
152 | - extend, true, rt, iss_sf, true); | ||
153 | - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); | ||
154 | + /* | ||
155 | + * Load-AcquirePC semantics; we implement as the slightly more | ||
156 | + * restrictive Load-Acquire. | ||
157 | + */ | ||
158 | + do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true, | ||
159 | + a->rt, iss_sf, true); | ||
160 | + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); | ||
172 | + return true; | 161 | + return true; |
173 | +} | 162 | +} |
174 | + | 163 | + |
175 | static int target_restore_sigframe(CPUARMState *env, | 164 | +static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a) |
176 | struct target_rt_sigframe *sf) | 165 | +{ |
177 | { | 166 | + TCGv_i64 clean_addr, dirty_addr; |
178 | struct target_aarch64_ctx *ctx, *extra = NULL; | 167 | + MemOp mop = a->sz; |
179 | struct target_fpsimd_context *fpsimd = NULL; | 168 | + bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); |
180 | struct target_sve_context *sve = NULL; | 169 | + |
181 | + struct target_za_context *za = NULL; | 170 | + if (!dc_isar_feature(aa64_rcpc_8_4, s)) { |
182 | uint64_t extra_datap = 0; | 171 | + return false; |
183 | bool used_extra = false; | 172 | } |
184 | int sve_size = 0; | 173 | + |
185 | + int za_size = 0; | 174 | + /* TODO: ARMv8.4-LSE SCTLR.nAA */ |
186 | + int svcr = 0; | 175 | + |
187 | 176 | + if (a->rn == 31) { | |
188 | target_restore_general_frame(env, sf); | 177 | + gen_check_sp_alignment(s); |
189 | |||
190 | @@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env, | ||
191 | sve_size = size; | ||
192 | break; | ||
193 | |||
194 | + case TARGET_ZA_MAGIC: | ||
195 | + if (za || size < sizeof(struct target_za_context)) { | ||
196 | + goto err; | ||
197 | + } | ||
198 | + za = (struct target_za_context *)ctx; | ||
199 | + za_size = size; | ||
200 | + break; | ||
201 | + | ||
202 | case TARGET_EXTRA_MAGIC: | ||
203 | if (extra || size != sizeof(struct target_extra_context)) { | ||
204 | goto err; | ||
205 | @@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env, | ||
206 | } | ||
207 | |||
208 | /* SVE data, if present, overwrites FPSIMD data. */ | ||
209 | - if (sve && !target_restore_sve_record(env, sve, sve_size)) { | ||
210 | + if (sve && !target_restore_sve_record(env, sve, sve_size, &svcr)) { | ||
211 | goto err; | ||
212 | } | ||
213 | + if (za && !target_restore_za_record(env, za, za_size, &svcr)) { | ||
214 | + goto err; | ||
215 | + } | 178 | + } |
216 | + if (env->svcr != svcr) { | 179 | + |
217 | + env->svcr = svcr; | 180 | + mop = check_ordered_align(s, a->rn, a->imm, true, mop); |
218 | + arm_rebuild_hflags(env); | 181 | + dirty_addr = read_cpu_reg_sp(s, a->rn, 1); |
219 | + } | 182 | + tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); |
220 | unlock_user(extra, extra_datap, 0); | 183 | + clean_addr = clean_data_tbi(s, dirty_addr); |
221 | return 0; | 184 | + |
222 | 185 | + /* Store-Release semantics */ | |
223 | @@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka, | 186 | + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); |
224 | .total_size = offsetof(struct target_rt_sigframe, | 187 | + do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true); |
225 | uc.tuc_mcontext.__reserved), | 188 | + return true; |
226 | }; | 189 | } |
227 | - int fpsimd_ofs, fr_ofs, sve_ofs = 0, vq = 0, sve_size = 0; | 190 | |
228 | + int fpsimd_ofs, fr_ofs, sve_ofs = 0, za_ofs = 0; | 191 | /* AdvSIMD load/store multiple structures |
229 | + int sve_size = 0, za_size = 0; | 192 | @@ -XXX,XX +XXX,XX @@ static void disas_ldst(DisasContext *s, uint32_t insn) |
230 | struct target_rt_sigframe *frame; | 193 | case 0x19: |
231 | struct target_rt_frame_record *fr; | 194 | if (extract32(insn, 21, 1) != 0) { |
232 | abi_ulong frame_addr, return_addr; | 195 | disas_ldst_tag(s, insn); |
233 | @@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka, | 196 | - } else if (extract32(insn, 10, 2) == 0) { |
234 | &layout); | 197 | - disas_ldst_ldapr_stlr(s, insn); |
235 | 198 | } else { | |
236 | /* SVE state needs saving only if it exists. */ | 199 | unallocated_encoding(s); |
237 | - if (cpu_isar_feature(aa64_sve, env_archcpu(env))) { | 200 | } |
238 | - vq = sve_vq(env); | ||
239 | - sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16); | ||
240 | + if (cpu_isar_feature(aa64_sve, env_archcpu(env)) || | ||
241 | + cpu_isar_feature(aa64_sme, env_archcpu(env))) { | ||
242 | + sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(sve_vq(env)), 16); | ||
243 | sve_ofs = alloc_sigframe_space(sve_size, &layout); | ||
244 | } | ||
245 | + if (cpu_isar_feature(aa64_sme, env_archcpu(env))) { | ||
246 | + /* ZA state needs saving only if it is enabled. */ | ||
247 | + if (FIELD_EX64(env->svcr, SVCR, ZA)) { | ||
248 | + za_size = TARGET_ZA_SIG_CONTEXT_SIZE(sme_vq(env)); | ||
249 | + } else { | ||
250 | + za_size = TARGET_ZA_SIG_CONTEXT_SIZE(0); | ||
251 | + } | ||
252 | + za_ofs = alloc_sigframe_space(za_size, &layout); | ||
253 | + } | ||
254 | |||
255 | if (layout.extra_ofs) { | ||
256 | /* Reserve space for the extra end marker. The standard end marker | ||
257 | @@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka, | ||
258 | target_setup_end_record((void *)frame + layout.extra_end_ofs); | ||
259 | } | ||
260 | if (sve_ofs) { | ||
261 | - target_setup_sve_record((void *)frame + sve_ofs, env, vq, sve_size); | ||
262 | + target_setup_sve_record((void *)frame + sve_ofs, env, sve_size); | ||
263 | + } | ||
264 | + if (za_ofs) { | ||
265 | + target_setup_za_record((void *)frame + za_ofs, env, za_size); | ||
266 | } | ||
267 | |||
268 | /* Set up the stack frame for unwinding. */ | ||
269 | @@ -XXX,XX +XXX,XX @@ static void target_setup_frame(int usig, struct target_sigaction *ka, | ||
270 | env->btype = 2; | ||
271 | } | ||
272 | |||
273 | + /* | ||
274 | + * Invoke the signal handler with both SM and ZA disabled. | ||
275 | + * When clearing SM, ResetSVEState, per SMSTOP. | ||
276 | + */ | ||
277 | + if (FIELD_EX64(env->svcr, SVCR, SM)) { | ||
278 | + arm_reset_sve_state(env); | ||
279 | + } | ||
280 | + if (env->svcr) { | ||
281 | + env->svcr = 0; | ||
282 | + arm_rebuild_hflags(env); | ||
283 | + } | ||
284 | + | ||
285 | if (info) { | ||
286 | tswap_siginfo(&frame->info, info); | ||
287 | env->xregs[1] = frame_addr + offsetof(struct target_rt_sigframe, info); | ||
288 | -- | 201 | -- |
289 | 2.25.1 | 202 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Convert the instructions in the ASIMD load/store multiple structures |
---|---|---|---|
2 | instruction classes to decodetree. | ||
2 | 3 | ||
3 | These functions will be used to verify that the cpu | 4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
4 | is in the correct state for a given instruction. | 5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
6 | Message-id: 20230602155223.2040685-19-peter.maydell@linaro.org | ||
7 | --- | ||
8 | target/arm/tcg/a64.decode | 20 +++ | ||
9 | target/arm/tcg/translate-a64.c | 222 ++++++++++++++++----------------- | ||
10 | 2 files changed, 131 insertions(+), 111 deletions(-) | ||
5 | 11 | ||
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 12 | diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode |
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-id: 20220708151540.18136-16-richard.henderson@linaro.org | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | --- | ||
11 | target/arm/translate-a64.h | 21 +++++++++++++++++++++ | ||
12 | target/arm/translate-a64.c | 34 ++++++++++++++++++++++++++++++++++ | ||
13 | 2 files changed, 55 insertions(+) | ||
14 | |||
15 | diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/arm/translate-a64.h | 14 | --- a/target/arm/tcg/a64.decode |
18 | +++ b/target/arm/translate-a64.h | 15 | +++ b/target/arm/tcg/a64.decode |
19 | @@ -XXX,XX +XXX,XX @@ void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v); | 16 | @@ -XXX,XX +XXX,XX @@ LDAPR_i 01 011001 10 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext |
20 | bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, | 17 | LDAPR_i 10 011001 10 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=0 sz=2 |
21 | unsigned int imms, unsigned int immr); | 18 | LDAPR_i 00 011001 11 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=1 sz=0 |
22 | bool sve_access_check(DisasContext *s); | 19 | LDAPR_i 01 011001 11 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=1 sz=1 |
23 | +bool sme_enabled_check(DisasContext *s); | 20 | + |
24 | +bool sme_enabled_check_with_svcr(DisasContext *s, unsigned); | 21 | +# Load/store multiple structures |
25 | + | 22 | +# The 4-bit opcode in [15:12] encodes repeat count and structure elements |
26 | +/* This function corresponds to CheckStreamingSVEEnabled. */ | 23 | +&ldst_mult rm rn rt sz q p rpt selem |
27 | +static inline bool sme_sm_enabled_check(DisasContext *s) | 24 | +@ldst_mult . q:1 ...... p:1 . . rm:5 .... sz:2 rn:5 rt:5 &ldst_mult |
28 | +{ | 25 | +ST_mult 0 . 001100 . 0 0 ..... 0000 .. ..... ..... @ldst_mult rpt=1 selem=4 |
29 | + return sme_enabled_check_with_svcr(s, R_SVCR_SM_MASK); | 26 | +ST_mult 0 . 001100 . 0 0 ..... 0010 .. ..... ..... @ldst_mult rpt=4 selem=1 |
30 | +} | 27 | +ST_mult 0 . 001100 . 0 0 ..... 0100 .. ..... ..... @ldst_mult rpt=1 selem=3 |
31 | + | 28 | +ST_mult 0 . 001100 . 0 0 ..... 0110 .. ..... ..... @ldst_mult rpt=3 selem=1 |
32 | +/* This function corresponds to CheckSMEAndZAEnabled. */ | 29 | +ST_mult 0 . 001100 . 0 0 ..... 0111 .. ..... ..... @ldst_mult rpt=1 selem=1 |
33 | +static inline bool sme_za_enabled_check(DisasContext *s) | 30 | +ST_mult 0 . 001100 . 0 0 ..... 1000 .. ..... ..... @ldst_mult rpt=1 selem=2 |
34 | +{ | 31 | +ST_mult 0 . 001100 . 0 0 ..... 1010 .. ..... ..... @ldst_mult rpt=2 selem=1 |
35 | + return sme_enabled_check_with_svcr(s, R_SVCR_ZA_MASK); | 32 | + |
36 | +} | 33 | +LD_mult 0 . 001100 . 1 0 ..... 0000 .. ..... ..... @ldst_mult rpt=1 selem=4 |
37 | + | 34 | +LD_mult 0 . 001100 . 1 0 ..... 0010 .. ..... ..... @ldst_mult rpt=4 selem=1 |
38 | +/* Note that this function corresponds to CheckStreamingSVEAndZAEnabled. */ | 35 | +LD_mult 0 . 001100 . 1 0 ..... 0100 .. ..... ..... @ldst_mult rpt=1 selem=3 |
39 | +static inline bool sme_smza_enabled_check(DisasContext *s) | 36 | +LD_mult 0 . 001100 . 1 0 ..... 0110 .. ..... ..... @ldst_mult rpt=3 selem=1 |
40 | +{ | 37 | +LD_mult 0 . 001100 . 1 0 ..... 0111 .. ..... ..... @ldst_mult rpt=1 selem=1 |
41 | + return sme_enabled_check_with_svcr(s, R_SVCR_SM_MASK | R_SVCR_ZA_MASK); | 38 | +LD_mult 0 . 001100 . 1 0 ..... 1000 .. ..... ..... @ldst_mult rpt=1 selem=2 |
42 | +} | 39 | +LD_mult 0 . 001100 . 1 0 ..... 1010 .. ..... ..... @ldst_mult rpt=2 selem=1 |
43 | + | 40 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c |
44 | TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr); | ||
45 | TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, | ||
46 | bool tag_checked, int log2_size); | ||
47 | diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c | ||
48 | index XXXXXXX..XXXXXXX 100644 | 41 | index XXXXXXX..XXXXXXX 100644 |
49 | --- a/target/arm/translate-a64.c | 42 | --- a/target/arm/tcg/translate-a64.c |
50 | +++ b/target/arm/translate-a64.c | 43 | +++ b/target/arm/tcg/translate-a64.c |
51 | @@ -XXX,XX +XXX,XX @@ static bool sme_access_check(DisasContext *s) | 44 | @@ -XXX,XX +XXX,XX @@ static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a) |
52 | return true; | 45 | return true; |
53 | } | 46 | } |
54 | 47 | ||
55 | +/* This function corresponds to CheckSMEEnabled. */ | 48 | -/* AdvSIMD load/store multiple structures |
56 | +bool sme_enabled_check(DisasContext *s) | 49 | - * |
57 | +{ | 50 | - * 31 30 29 23 22 21 16 15 12 11 10 9 5 4 0 |
51 | - * +---+---+---------------+---+-------------+--------+------+------+------+ | ||
52 | - * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size | Rn | Rt | | ||
53 | - * +---+---+---------------+---+-------------+--------+------+------+------+ | ||
54 | - * | ||
55 | - * AdvSIMD load/store multiple structures (post-indexed) | ||
56 | - * | ||
57 | - * 31 30 29 23 22 21 20 16 15 12 11 10 9 5 4 0 | ||
58 | - * +---+---+---------------+---+---+---------+--------+------+------+------+ | ||
59 | - * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 | Rm | opcode | size | Rn | Rt | | ||
60 | - * +---+---+---------------+---+---+---------+--------+------+------+------+ | ||
61 | - * | ||
62 | - * Rt: first (or only) SIMD&FP register to be transferred | ||
63 | - * Rn: base address or SP | ||
64 | - * Rm (post-index only): post-index register (when !31) or size dependent #imm | ||
65 | - */ | ||
66 | -static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) | ||
67 | +static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a) | ||
68 | { | ||
69 | - int rt = extract32(insn, 0, 5); | ||
70 | - int rn = extract32(insn, 5, 5); | ||
71 | - int rm = extract32(insn, 16, 5); | ||
72 | - int size = extract32(insn, 10, 2); | ||
73 | - int opcode = extract32(insn, 12, 4); | ||
74 | - bool is_store = !extract32(insn, 22, 1); | ||
75 | - bool is_postidx = extract32(insn, 23, 1); | ||
76 | - bool is_q = extract32(insn, 30, 1); | ||
77 | TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; | ||
78 | MemOp endian, align, mop; | ||
79 | |||
80 | int total; /* total bytes */ | ||
81 | int elements; /* elements per vector */ | ||
82 | - int rpt; /* num iterations */ | ||
83 | - int selem; /* structure elements */ | ||
84 | int r; | ||
85 | + int size = a->sz; | ||
86 | |||
87 | - if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) { | ||
88 | - unallocated_encoding(s); | ||
89 | - return; | ||
90 | + if (!a->p && a->rm != 0) { | ||
91 | + /* For non-postindexed accesses the Rm field must be 0 */ | ||
92 | + return false; | ||
93 | } | ||
94 | - | ||
95 | - if (!is_postidx && rm != 0) { | ||
96 | - unallocated_encoding(s); | ||
97 | - return; | ||
98 | + if (size == 3 && !a->q && a->selem != 1) { | ||
99 | + return false; | ||
100 | } | ||
101 | - | ||
102 | - /* From the shared decode logic */ | ||
103 | - switch (opcode) { | ||
104 | - case 0x0: | ||
105 | - rpt = 1; | ||
106 | - selem = 4; | ||
107 | - break; | ||
108 | - case 0x2: | ||
109 | - rpt = 4; | ||
110 | - selem = 1; | ||
111 | - break; | ||
112 | - case 0x4: | ||
113 | - rpt = 1; | ||
114 | - selem = 3; | ||
115 | - break; | ||
116 | - case 0x6: | ||
117 | - rpt = 3; | ||
118 | - selem = 1; | ||
119 | - break; | ||
120 | - case 0x7: | ||
121 | - rpt = 1; | ||
122 | - selem = 1; | ||
123 | - break; | ||
124 | - case 0x8: | ||
125 | - rpt = 1; | ||
126 | - selem = 2; | ||
127 | - break; | ||
128 | - case 0xa: | ||
129 | - rpt = 2; | ||
130 | - selem = 1; | ||
131 | - break; | ||
132 | - default: | ||
133 | - unallocated_encoding(s); | ||
134 | - return; | ||
135 | - } | ||
136 | - | ||
137 | - if (size == 3 && !is_q && selem != 1) { | ||
138 | - /* reserved */ | ||
139 | - unallocated_encoding(s); | ||
140 | - return; | ||
141 | - } | ||
142 | - | ||
143 | if (!fp_access_check(s)) { | ||
144 | - return; | ||
145 | + return true; | ||
146 | } | ||
147 | |||
148 | - if (rn == 31) { | ||
149 | + if (a->rn == 31) { | ||
150 | gen_check_sp_alignment(s); | ||
151 | } | ||
152 | |||
153 | @@ -XXX,XX +XXX,XX @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) | ||
154 | endian = MO_LE; | ||
155 | } | ||
156 | |||
157 | - total = rpt * selem * (is_q ? 16 : 8); | ||
158 | - tcg_rn = cpu_reg_sp(s, rn); | ||
159 | + total = a->rpt * a->selem * (a->q ? 16 : 8); | ||
160 | + tcg_rn = cpu_reg_sp(s, a->rn); | ||
161 | |||
162 | /* | ||
163 | * Issue the MTE check vs the logical repeat count, before we | ||
164 | * promote consecutive little-endian elements below. | ||
165 | */ | ||
166 | - clean_addr = gen_mte_checkN(s, tcg_rn, is_store, is_postidx || rn != 31, | ||
167 | - total, finalize_memop_asimd(s, size)); | ||
168 | + clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total, | ||
169 | + finalize_memop_asimd(s, size)); | ||
170 | |||
171 | /* | ||
172 | * Consecutive little-endian elements from a single register | ||
173 | * can be promoted to a larger little-endian operation. | ||
174 | */ | ||
175 | align = MO_ALIGN; | ||
176 | - if (selem == 1 && endian == MO_LE) { | ||
177 | + if (a->selem == 1 && endian == MO_LE) { | ||
178 | align = pow2_align(size); | ||
179 | size = 3; | ||
180 | } | ||
181 | @@ -XXX,XX +XXX,XX @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) | ||
182 | } | ||
183 | mop = endian | size | align; | ||
184 | |||
185 | - elements = (is_q ? 16 : 8) >> size; | ||
186 | + elements = (a->q ? 16 : 8) >> size; | ||
187 | tcg_ebytes = tcg_constant_i64(1 << size); | ||
188 | - for (r = 0; r < rpt; r++) { | ||
189 | + for (r = 0; r < a->rpt; r++) { | ||
190 | int e; | ||
191 | for (e = 0; e < elements; e++) { | ||
192 | int xs; | ||
193 | - for (xs = 0; xs < selem; xs++) { | ||
194 | - int tt = (rt + r + xs) % 32; | ||
195 | - if (is_store) { | ||
196 | - do_vec_st(s, tt, e, clean_addr, mop); | ||
197 | - } else { | ||
198 | - do_vec_ld(s, tt, e, clean_addr, mop); | ||
199 | - } | ||
200 | + for (xs = 0; xs < a->selem; xs++) { | ||
201 | + int tt = (a->rt + r + xs) % 32; | ||
202 | + do_vec_ld(s, tt, e, clean_addr, mop); | ||
203 | tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); | ||
204 | } | ||
205 | } | ||
206 | } | ||
207 | |||
208 | - if (!is_store) { | ||
209 | - /* For non-quad operations, setting a slice of the low | ||
210 | - * 64 bits of the register clears the high 64 bits (in | ||
211 | - * the ARM ARM pseudocode this is implicit in the fact | ||
212 | - * that 'rval' is a 64 bit wide variable). | ||
213 | - * For quad operations, we might still need to zero the | ||
214 | - * high bits of SVE. | ||
215 | - */ | ||
216 | - for (r = 0; r < rpt * selem; r++) { | ||
217 | - int tt = (rt + r) % 32; | ||
218 | - clear_vec_high(s, is_q, tt); | ||
58 | + /* | 219 | + /* |
59 | + * Note that unlike sve_excp_el, we have not constrained sme_excp_el | 220 | + * For non-quad operations, setting a slice of the low 64 bits of |
60 | + * to be zero when fp_excp_el has priority. This is because we need | 221 | + * the register clears the high 64 bits (in the ARM ARM pseudocode |
61 | + * sme_excp_el by itself for cpregs access checks. | 222 | + * this is implicit in the fact that 'rval' is a 64 bit wide |
223 | + * variable). For quad operations, we might still need to zero | ||
224 | + * the high bits of SVE. | ||
62 | + */ | 225 | + */ |
63 | + if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) { | 226 | + for (r = 0; r < a->rpt * a->selem; r++) { |
64 | + s->fp_access_checked = true; | 227 | + int tt = (a->rt + r) % 32; |
65 | + return sme_access_check(s); | 228 | + clear_vec_high(s, a->q, tt); |
66 | + } | 229 | + } |
67 | + return fp_access_check_only(s); | 230 | + |
68 | +} | 231 | + if (a->p) { |
69 | + | 232 | + if (a->rm == 31) { |
70 | +/* Common subroutine for CheckSMEAnd*Enabled. */ | 233 | + tcg_gen_addi_i64(tcg_rn, tcg_rn, total); |
71 | +bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req) | 234 | + } else { |
72 | +{ | 235 | + tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); |
73 | + if (!sme_enabled_check(s)) { | 236 | + } |
74 | + return false; | ||
75 | + } | ||
76 | + if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) { | ||
77 | + gen_exception_insn(s, s->pc_curr, EXCP_UDEF, | ||
78 | + syn_smetrap(SME_ET_NotStreaming, false)); | ||
79 | + return false; | ||
80 | + } | ||
81 | + if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) { | ||
82 | + gen_exception_insn(s, s->pc_curr, EXCP_UDEF, | ||
83 | + syn_smetrap(SME_ET_InactiveZA, false)); | ||
84 | + return false; | ||
85 | + } | 237 | + } |
86 | + return true; | 238 | + return true; |
87 | +} | 239 | +} |
88 | + | 240 | + |
89 | /* | 241 | +static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a) |
90 | * This utility function is for doing register extension with an | 242 | +{ |
91 | * optional shift. You will likely want to pass a temporary for the | 243 | + TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; |
244 | + MemOp endian, align, mop; | ||
245 | + | ||
246 | + int total; /* total bytes */ | ||
247 | + int elements; /* elements per vector */ | ||
248 | + int r; | ||
249 | + int size = a->sz; | ||
250 | + | ||
251 | + if (!a->p && a->rm != 0) { | ||
252 | + /* For non-postindexed accesses the Rm field must be 0 */ | ||
253 | + return false; | ||
254 | + } | ||
255 | + if (size == 3 && !a->q && a->selem != 1) { | ||
256 | + return false; | ||
257 | + } | ||
258 | + if (!fp_access_check(s)) { | ||
259 | + return true; | ||
260 | + } | ||
261 | + | ||
262 | + if (a->rn == 31) { | ||
263 | + gen_check_sp_alignment(s); | ||
264 | + } | ||
265 | + | ||
266 | + /* For our purposes, bytes are always little-endian. */ | ||
267 | + endian = s->be_data; | ||
268 | + if (size == 0) { | ||
269 | + endian = MO_LE; | ||
270 | + } | ||
271 | + | ||
272 | + total = a->rpt * a->selem * (a->q ? 16 : 8); | ||
273 | + tcg_rn = cpu_reg_sp(s, a->rn); | ||
274 | + | ||
275 | + /* | ||
276 | + * Issue the MTE check vs the logical repeat count, before we | ||
277 | + * promote consecutive little-endian elements below. | ||
278 | + */ | ||
279 | + clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total, | ||
280 | + finalize_memop_asimd(s, size)); | ||
281 | + | ||
282 | + /* | ||
283 | + * Consecutive little-endian elements from a single register | ||
284 | + * can be promoted to a larger little-endian operation. | ||
285 | + */ | ||
286 | + align = MO_ALIGN; | ||
287 | + if (a->selem == 1 && endian == MO_LE) { | ||
288 | + align = pow2_align(size); | ||
289 | + size = 3; | ||
290 | + } | ||
291 | + if (!s->align_mem) { | ||
292 | + align = 0; | ||
293 | + } | ||
294 | + mop = endian | size | align; | ||
295 | + | ||
296 | + elements = (a->q ? 16 : 8) >> size; | ||
297 | + tcg_ebytes = tcg_constant_i64(1 << size); | ||
298 | + for (r = 0; r < a->rpt; r++) { | ||
299 | + int e; | ||
300 | + for (e = 0; e < elements; e++) { | ||
301 | + int xs; | ||
302 | + for (xs = 0; xs < a->selem; xs++) { | ||
303 | + int tt = (a->rt + r + xs) % 32; | ||
304 | + do_vec_st(s, tt, e, clean_addr, mop); | ||
305 | + tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); | ||
306 | + } | ||
307 | } | ||
308 | } | ||
309 | |||
310 | - if (is_postidx) { | ||
311 | - if (rm == 31) { | ||
312 | + if (a->p) { | ||
313 | + if (a->rm == 31) { | ||
314 | tcg_gen_addi_i64(tcg_rn, tcg_rn, total); | ||
315 | } else { | ||
316 | - tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm)); | ||
317 | + tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); | ||
318 | } | ||
319 | } | ||
320 | + return true; | ||
321 | } | ||
322 | |||
323 | /* AdvSIMD load/store single structure | ||
324 | @@ -XXX,XX +XXX,XX @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn) | ||
325 | static void disas_ldst(DisasContext *s, uint32_t insn) | ||
326 | { | ||
327 | switch (extract32(insn, 24, 6)) { | ||
328 | - case 0x0c: /* AdvSIMD load/store multiple structures */ | ||
329 | - disas_ldst_multiple_struct(s, insn); | ||
330 | - break; | ||
331 | case 0x0d: /* AdvSIMD load/store single structure */ | ||
332 | disas_ldst_single_struct(s, insn); | ||
333 | break; | ||
92 | -- | 334 | -- |
93 | 2.25.1 | 335 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Convert the ASIMD load/store single structure insns to decodetree. |
---|---|---|---|
2 | 2 | ||
3 | Move the checks out of the parsing loop and into the | 3 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
4 | restore function. This more closely mirrors the code | 4 | Message-id: 20230602155223.2040685-20-peter.maydell@linaro.org |
5 | structure in the kernel, and is slightly clearer. | 5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
6 | --- | ||
7 | target/arm/tcg/a64.decode | 34 +++++ | ||
8 | target/arm/tcg/translate-a64.c | 219 +++++++++++++++------------------ | ||
9 | 2 files changed, 136 insertions(+), 117 deletions(-) | ||
6 | 10 | ||
7 | Reject rather than silently skip incorrect VL and SVE record sizes, | 11 | diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode |
8 | bringing our checks in to line with those the kernel does. | ||
9 | |||
10 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | Message-id: 20220708151540.18136-40-richard.henderson@linaro.org | ||
13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
14 | --- | ||
15 | linux-user/aarch64/signal.c | 51 +++++++++++++++++++++++++------------ | ||
16 | 1 file changed, 35 insertions(+), 16 deletions(-) | ||
17 | |||
18 | diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/linux-user/aarch64/signal.c | 13 | --- a/target/arm/tcg/a64.decode |
21 | +++ b/linux-user/aarch64/signal.c | 14 | +++ b/target/arm/tcg/a64.decode |
22 | @@ -XXX,XX +XXX,XX @@ static void target_restore_fpsimd_record(CPUARMState *env, | 15 | @@ -XXX,XX +XXX,XX @@ LD_mult 0 . 001100 . 1 0 ..... 0110 .. ..... ..... @ldst_mult rpt=3 sele |
23 | } | 16 | LD_mult 0 . 001100 . 1 0 ..... 0111 .. ..... ..... @ldst_mult rpt=1 selem=1 |
17 | LD_mult 0 . 001100 . 1 0 ..... 1000 .. ..... ..... @ldst_mult rpt=1 selem=2 | ||
18 | LD_mult 0 . 001100 . 1 0 ..... 1010 .. ..... ..... @ldst_mult rpt=2 selem=1 | ||
19 | + | ||
20 | +# Load/store single structure | ||
21 | +&ldst_single rm rn rt p selem index scale | ||
22 | + | ||
23 | +%ldst_single_selem 13:1 21:1 !function=plus_1 | ||
24 | + | ||
25 | +%ldst_single_index_b 30:1 10:3 | ||
26 | +%ldst_single_index_h 30:1 11:2 | ||
27 | +%ldst_single_index_s 30:1 12:1 | ||
28 | + | ||
29 | +@ldst_single_b .. ...... p:1 .. rm:5 ...... rn:5 rt:5 \ | ||
30 | + &ldst_single scale=0 selem=%ldst_single_selem \ | ||
31 | + index=%ldst_single_index_b | ||
32 | +@ldst_single_h .. ...... p:1 .. rm:5 ...... rn:5 rt:5 \ | ||
33 | + &ldst_single scale=1 selem=%ldst_single_selem \ | ||
34 | + index=%ldst_single_index_h | ||
35 | +@ldst_single_s .. ...... p:1 .. rm:5 ...... rn:5 rt:5 \ | ||
36 | + &ldst_single scale=2 selem=%ldst_single_selem \ | ||
37 | + index=%ldst_single_index_s | ||
38 | +@ldst_single_d . index:1 ...... p:1 .. rm:5 ...... rn:5 rt:5 \ | ||
39 | + &ldst_single scale=3 selem=%ldst_single_selem | ||
40 | + | ||
41 | +ST_single 0 . 001101 . 0 . ..... 00 . ... ..... ..... @ldst_single_b | ||
42 | +ST_single 0 . 001101 . 0 . ..... 01 . ..0 ..... ..... @ldst_single_h | ||
43 | +ST_single 0 . 001101 . 0 . ..... 10 . .00 ..... ..... @ldst_single_s | ||
44 | +ST_single 0 . 001101 . 0 . ..... 10 . 001 ..... ..... @ldst_single_d | ||
45 | + | ||
46 | +LD_single 0 . 001101 . 1 . ..... 00 . ... ..... ..... @ldst_single_b | ||
47 | +LD_single 0 . 001101 . 1 . ..... 01 . ..0 ..... ..... @ldst_single_h | ||
48 | +LD_single 0 . 001101 . 1 . ..... 10 . .00 ..... ..... @ldst_single_s | ||
49 | +LD_single 0 . 001101 . 1 . ..... 10 . 001 ..... ..... @ldst_single_d | ||
50 | + | ||
51 | +# Replicating load case | ||
52 | +LD_single_repl 0 q:1 001101 p:1 1 . rm:5 11 . 0 scale:2 rn:5 rt:5 selem=%ldst_single_selem | ||
53 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
54 | index XXXXXXX..XXXXXXX 100644 | ||
55 | --- a/target/arm/tcg/translate-a64.c | ||
56 | +++ b/target/arm/tcg/translate-a64.c | ||
57 | @@ -XXX,XX +XXX,XX @@ static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a) | ||
58 | return true; | ||
24 | } | 59 | } |
25 | 60 | ||
26 | -static void target_restore_sve_record(CPUARMState *env, | 61 | -/* AdvSIMD load/store single structure |
27 | - struct target_sve_context *sve, int vq) | 62 | - * |
28 | +static bool target_restore_sve_record(CPUARMState *env, | 63 | - * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0 |
29 | + struct target_sve_context *sve, | 64 | - * +---+---+---------------+-----+-----------+-----+---+------+------+------+ |
30 | + int size) | 65 | - * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size | Rn | Rt | |
66 | - * +---+---+---------------+-----+-----------+-----+---+------+------+------+ | ||
67 | - * | ||
68 | - * AdvSIMD load/store single structure (post-indexed) | ||
69 | - * | ||
70 | - * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0 | ||
71 | - * +---+---+---------------+-----+-----------+-----+---+------+------+------+ | ||
72 | - * | 0 | Q | 0 0 1 1 0 1 1 | L R | Rm | opc | S | size | Rn | Rt | | ||
73 | - * +---+---+---------------+-----+-----------+-----+---+------+------+------+ | ||
74 | - * | ||
75 | - * Rt: first (or only) SIMD&FP register to be transferred | ||
76 | - * Rn: base address or SP | ||
77 | - * Rm (post-index only): post-index register (when !31) or size dependent #imm | ||
78 | - * index = encoded in Q:S:size dependent on size | ||
79 | - * | ||
80 | - * lane_size = encoded in R, opc | ||
81 | - * transfer width = encoded in opc, S, size | ||
82 | - */ | ||
83 | -static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) | ||
84 | +static bool trans_ST_single(DisasContext *s, arg_ldst_single *a) | ||
31 | { | 85 | { |
32 | - int i, j; | 86 | - int rt = extract32(insn, 0, 5); |
33 | + int i, j, vl, vq; | 87 | - int rn = extract32(insn, 5, 5); |
34 | 88 | - int rm = extract32(insn, 16, 5); | |
35 | - /* Note that SVE regs are stored as a byte stream, with each byte element | 89 | - int size = extract32(insn, 10, 2); |
36 | + if (!cpu_isar_feature(aa64_sve, env_archcpu(env))) { | 90 | - int S = extract32(insn, 12, 1); |
91 | - int opc = extract32(insn, 13, 3); | ||
92 | - int R = extract32(insn, 21, 1); | ||
93 | - int is_load = extract32(insn, 22, 1); | ||
94 | - int is_postidx = extract32(insn, 23, 1); | ||
95 | - int is_q = extract32(insn, 30, 1); | ||
96 | - | ||
97 | - int scale = extract32(opc, 1, 2); | ||
98 | - int selem = (extract32(opc, 0, 1) << 1 | R) + 1; | ||
99 | - bool replicate = false; | ||
100 | - int index = is_q << 3 | S << 2 | size; | ||
101 | - int xs, total; | ||
102 | + int xs, total, rt; | ||
103 | TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; | ||
104 | MemOp mop; | ||
105 | |||
106 | - if (extract32(insn, 31, 1)) { | ||
107 | - unallocated_encoding(s); | ||
108 | - return; | ||
109 | + if (!a->p && a->rm != 0) { | ||
37 | + return false; | 110 | + return false; |
38 | + } | 111 | } |
39 | + | 112 | - if (!is_postidx && rm != 0) { |
40 | + __get_user(vl, &sve->vl); | 113 | - unallocated_encoding(s); |
41 | + vq = sve_vq(env); | 114 | - return; |
42 | + | 115 | - } |
43 | + /* Reject mismatched VL. */ | 116 | - |
44 | + if (vl != vq * TARGET_SVE_VQ_BYTES) { | 117 | - switch (scale) { |
118 | - case 3: | ||
119 | - if (!is_load || S) { | ||
120 | - unallocated_encoding(s); | ||
121 | - return; | ||
122 | - } | ||
123 | - scale = size; | ||
124 | - replicate = true; | ||
125 | - break; | ||
126 | - case 0: | ||
127 | - break; | ||
128 | - case 1: | ||
129 | - if (extract32(size, 0, 1)) { | ||
130 | - unallocated_encoding(s); | ||
131 | - return; | ||
132 | - } | ||
133 | - index >>= 1; | ||
134 | - break; | ||
135 | - case 2: | ||
136 | - if (extract32(size, 1, 1)) { | ||
137 | - unallocated_encoding(s); | ||
138 | - return; | ||
139 | - } | ||
140 | - if (!extract32(size, 0, 1)) { | ||
141 | - index >>= 2; | ||
142 | - } else { | ||
143 | - if (S) { | ||
144 | - unallocated_encoding(s); | ||
145 | - return; | ||
146 | - } | ||
147 | - index >>= 3; | ||
148 | - scale = 3; | ||
149 | - } | ||
150 | - break; | ||
151 | - default: | ||
152 | - g_assert_not_reached(); | ||
153 | - } | ||
154 | - | ||
155 | if (!fp_access_check(s)) { | ||
156 | - return; | ||
157 | + return true; | ||
158 | } | ||
159 | |||
160 | - if (rn == 31) { | ||
161 | + if (a->rn == 31) { | ||
162 | gen_check_sp_alignment(s); | ||
163 | } | ||
164 | |||
165 | - total = selem << scale; | ||
166 | - tcg_rn = cpu_reg_sp(s, rn); | ||
167 | + total = a->selem << a->scale; | ||
168 | + tcg_rn = cpu_reg_sp(s, a->rn); | ||
169 | |||
170 | - mop = finalize_memop_asimd(s, scale); | ||
171 | - | ||
172 | - clean_addr = gen_mte_checkN(s, tcg_rn, !is_load, is_postidx || rn != 31, | ||
173 | + mop = finalize_memop_asimd(s, a->scale); | ||
174 | + clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, | ||
175 | total, mop); | ||
176 | |||
177 | - tcg_ebytes = tcg_constant_i64(1 << scale); | ||
178 | - for (xs = 0; xs < selem; xs++) { | ||
179 | - if (replicate) { | ||
180 | - /* Load and replicate to all elements */ | ||
181 | - TCGv_i64 tcg_tmp = tcg_temp_new_i64(); | ||
182 | - | ||
183 | - tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop); | ||
184 | - tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt), | ||
185 | - (is_q + 1) * 8, vec_full_reg_size(s), | ||
186 | - tcg_tmp); | ||
187 | - } else { | ||
188 | - /* Load/store one element per register */ | ||
189 | - if (is_load) { | ||
190 | - do_vec_ld(s, rt, index, clean_addr, mop); | ||
191 | - } else { | ||
192 | - do_vec_st(s, rt, index, clean_addr, mop); | ||
193 | - } | ||
194 | - } | ||
195 | + tcg_ebytes = tcg_constant_i64(1 << a->scale); | ||
196 | + for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { | ||
197 | + do_vec_st(s, rt, a->index, clean_addr, mop); | ||
198 | tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); | ||
199 | - rt = (rt + 1) % 32; | ||
200 | } | ||
201 | |||
202 | - if (is_postidx) { | ||
203 | - if (rm == 31) { | ||
204 | + if (a->p) { | ||
205 | + if (a->rm == 31) { | ||
206 | tcg_gen_addi_i64(tcg_rn, tcg_rn, total); | ||
207 | } else { | ||
208 | - tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm)); | ||
209 | + tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); | ||
210 | } | ||
211 | } | ||
212 | + return true; | ||
213 | +} | ||
214 | + | ||
215 | +static bool trans_LD_single(DisasContext *s, arg_ldst_single *a) | ||
216 | +{ | ||
217 | + int xs, total, rt; | ||
218 | + TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; | ||
219 | + MemOp mop; | ||
220 | + | ||
221 | + if (!a->p && a->rm != 0) { | ||
45 | + return false; | 222 | + return false; |
46 | + } | 223 | + } |
47 | + | 224 | + if (!fp_access_check(s)) { |
48 | + /* Accept empty record -- used to clear PSTATE.SM. */ | ||
49 | + if (size <= sizeof(*sve)) { | ||
50 | + return true; | 225 | + return true; |
51 | + } | 226 | + } |
52 | + | 227 | + |
53 | + /* Reject non-empty but incomplete record. */ | 228 | + if (a->rn == 31) { |
54 | + if (size < TARGET_SVE_SIG_CONTEXT_SIZE(vq)) { | 229 | + gen_check_sp_alignment(s); |
230 | + } | ||
231 | + | ||
232 | + total = a->selem << a->scale; | ||
233 | + tcg_rn = cpu_reg_sp(s, a->rn); | ||
234 | + | ||
235 | + mop = finalize_memop_asimd(s, a->scale); | ||
236 | + clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, | ||
237 | + total, mop); | ||
238 | + | ||
239 | + tcg_ebytes = tcg_constant_i64(1 << a->scale); | ||
240 | + for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { | ||
241 | + do_vec_ld(s, rt, a->index, clean_addr, mop); | ||
242 | + tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); | ||
243 | + } | ||
244 | + | ||
245 | + if (a->p) { | ||
246 | + if (a->rm == 31) { | ||
247 | + tcg_gen_addi_i64(tcg_rn, tcg_rn, total); | ||
248 | + } else { | ||
249 | + tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); | ||
250 | + } | ||
251 | + } | ||
252 | + return true; | ||
253 | +} | ||
254 | + | ||
255 | +static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a) | ||
256 | +{ | ||
257 | + int xs, total, rt; | ||
258 | + TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; | ||
259 | + MemOp mop; | ||
260 | + | ||
261 | + if (!a->p && a->rm != 0) { | ||
55 | + return false; | 262 | + return false; |
56 | + } | 263 | + } |
57 | + | 264 | + if (!fp_access_check(s)) { |
58 | + /* | 265 | + return true; |
59 | + * Note that SVE regs are stored as a byte stream, with each byte element | 266 | + } |
60 | * at a subsequent address. This corresponds to a little-endian load | 267 | + |
61 | * of our 64-bit hunks. | 268 | + if (a->rn == 31) { |
62 | */ | 269 | + gen_check_sp_alignment(s); |
63 | @@ -XXX,XX +XXX,XX @@ static void target_restore_sve_record(CPUARMState *env, | 270 | + } |
64 | } | 271 | + |
65 | } | 272 | + total = a->selem << a->scale; |
66 | } | 273 | + tcg_rn = cpu_reg_sp(s, a->rn); |
274 | + | ||
275 | + mop = finalize_memop_asimd(s, a->scale); | ||
276 | + clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, | ||
277 | + total, mop); | ||
278 | + | ||
279 | + tcg_ebytes = tcg_constant_i64(1 << a->scale); | ||
280 | + for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { | ||
281 | + /* Load and replicate to all elements */ | ||
282 | + TCGv_i64 tcg_tmp = tcg_temp_new_i64(); | ||
283 | + | ||
284 | + tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop); | ||
285 | + tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt), | ||
286 | + (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp); | ||
287 | + tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); | ||
288 | + } | ||
289 | + | ||
290 | + if (a->p) { | ||
291 | + if (a->rm == 31) { | ||
292 | + tcg_gen_addi_i64(tcg_rn, tcg_rn, total); | ||
293 | + } else { | ||
294 | + tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); | ||
295 | + } | ||
296 | + } | ||
67 | + return true; | 297 | + return true; |
68 | } | 298 | } |
69 | 299 | ||
70 | static int target_restore_sigframe(CPUARMState *env, | 300 | /* |
71 | @@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env, | 301 | @@ -XXX,XX +XXX,XX @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn) |
72 | struct target_sve_context *sve = NULL; | 302 | static void disas_ldst(DisasContext *s, uint32_t insn) |
73 | uint64_t extra_datap = 0; | 303 | { |
74 | bool used_extra = false; | 304 | switch (extract32(insn, 24, 6)) { |
75 | - int vq = 0, sve_size = 0; | 305 | - case 0x0d: /* AdvSIMD load/store single structure */ |
76 | + int sve_size = 0; | 306 | - disas_ldst_single_struct(s, insn); |
77 | 307 | - break; | |
78 | target_restore_general_frame(env, sf); | 308 | case 0x19: |
79 | 309 | if (extract32(insn, 21, 1) != 0) { | |
80 | @@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env, | 310 | disas_ldst_tag(s, insn); |
81 | if (sve || size < sizeof(struct target_sve_context)) { | ||
82 | goto err; | ||
83 | } | ||
84 | - if (cpu_isar_feature(aa64_sve, env_archcpu(env))) { | ||
85 | - vq = sve_vq(env); | ||
86 | - sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16); | ||
87 | - if (size == sve_size) { | ||
88 | - sve = (struct target_sve_context *)ctx; | ||
89 | - break; | ||
90 | - } | ||
91 | - } | ||
92 | - goto err; | ||
93 | + sve = (struct target_sve_context *)ctx; | ||
94 | + sve_size = size; | ||
95 | + break; | ||
96 | |||
97 | case TARGET_EXTRA_MAGIC: | ||
98 | if (extra || size != sizeof(struct target_extra_context)) { | ||
99 | @@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env, | ||
100 | } | ||
101 | |||
102 | /* SVE data, if present, overwrites FPSIMD data. */ | ||
103 | - if (sve) { | ||
104 | - target_restore_sve_record(env, sve, vq); | ||
105 | + if (sve && !target_restore_sve_record(env, sve, sve_size)) { | ||
106 | + goto err; | ||
107 | } | ||
108 | unlock_user(extra, extra_datap, 0); | ||
109 | return 0; | ||
110 | -- | 311 | -- |
111 | 2.25.1 | 312 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | Convert the instructions in the load/store memory tags instruction |
---|---|---|---|
2 | group to decodetree. | ||
2 | 3 | ||
3 | These SME instructions are nominally within the SVE decode space, | 4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
4 | so we add them to sve.decode and translate-sve.c. | 5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
6 | Message-id: 20230602155223.2040685-21-peter.maydell@linaro.org | ||
7 | --- | ||
8 | target/arm/tcg/a64.decode | 25 +++ | ||
9 | target/arm/tcg/translate-a64.c | 360 ++++++++++++++++----------------- | ||
10 | 2 files changed, 199 insertions(+), 186 deletions(-) | ||
5 | 11 | ||
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 12 | diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode |
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-id: 20220708151540.18136-18-richard.henderson@linaro.org | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | --- | ||
11 | target/arm/translate-a64.h | 12 ++++++++++++ | ||
12 | target/arm/sve.decode | 5 ++++- | ||
13 | target/arm/translate-sve.c | 38 ++++++++++++++++++++++++++++++++++++++ | ||
14 | 3 files changed, 54 insertions(+), 1 deletion(-) | ||
15 | |||
16 | diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h | ||
17 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/target/arm/translate-a64.h | 14 | --- a/target/arm/tcg/a64.decode |
19 | +++ b/target/arm/translate-a64.h | 15 | +++ b/target/arm/tcg/a64.decode |
20 | @@ -XXX,XX +XXX,XX @@ static inline int vec_full_reg_size(DisasContext *s) | 16 | @@ -XXX,XX +XXX,XX @@ LD_single 0 . 001101 . 1 . ..... 10 . 001 ..... ..... @ldst_single_d |
21 | return s->vl; | 17 | |
18 | # Replicating load case | ||
19 | LD_single_repl 0 q:1 001101 p:1 1 . rm:5 11 . 0 scale:2 rn:5 rt:5 selem=%ldst_single_selem | ||
20 | + | ||
21 | +%tag_offset 12:s9 !function=scale_by_log2_tag_granule | ||
22 | +&ldst_tag rn rt imm p w | ||
23 | +@ldst_tag ........ .. . ......... .. rn:5 rt:5 &ldst_tag imm=%tag_offset | ||
24 | +@ldst_tag_mult ........ .. . 000000000 .. rn:5 rt:5 &ldst_tag imm=0 | ||
25 | + | ||
26 | +STZGM 11011001 00 1 ......... 00 ..... ..... @ldst_tag_mult p=0 w=0 | ||
27 | +STG 11011001 00 1 ......... 01 ..... ..... @ldst_tag p=1 w=1 | ||
28 | +STG 11011001 00 1 ......... 10 ..... ..... @ldst_tag p=0 w=0 | ||
29 | +STG 11011001 00 1 ......... 11 ..... ..... @ldst_tag p=0 w=1 | ||
30 | + | ||
31 | +LDG 11011001 01 1 ......... 00 ..... ..... @ldst_tag p=0 w=0 | ||
32 | +STZG 11011001 01 1 ......... 01 ..... ..... @ldst_tag p=1 w=1 | ||
33 | +STZG 11011001 01 1 ......... 10 ..... ..... @ldst_tag p=0 w=0 | ||
34 | +STZG 11011001 01 1 ......... 11 ..... ..... @ldst_tag p=0 w=1 | ||
35 | + | ||
36 | +STGM 11011001 10 1 ......... 00 ..... ..... @ldst_tag_mult p=0 w=0 | ||
37 | +ST2G 11011001 10 1 ......... 01 ..... ..... @ldst_tag p=1 w=1 | ||
38 | +ST2G 11011001 10 1 ......... 10 ..... ..... @ldst_tag p=0 w=0 | ||
39 | +ST2G 11011001 10 1 ......... 11 ..... ..... @ldst_tag p=0 w=1 | ||
40 | + | ||
41 | +LDGM 11011001 11 1 ......... 00 ..... ..... @ldst_tag_mult p=0 w=0 | ||
42 | +STZ2G 11011001 11 1 ......... 01 ..... ..... @ldst_tag p=1 w=1 | ||
43 | +STZ2G 11011001 11 1 ......... 10 ..... ..... @ldst_tag p=0 w=0 | ||
44 | +STZ2G 11011001 11 1 ......... 11 ..... ..... @ldst_tag p=0 w=1 | ||
45 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/target/arm/tcg/translate-a64.c | ||
48 | +++ b/target/arm/tcg/translate-a64.c | ||
49 | @@ -XXX,XX +XXX,XX @@ static int uimm_scaled(DisasContext *s, int x) | ||
50 | return imm << scale; | ||
22 | } | 51 | } |
23 | 52 | ||
24 | +/* Return the byte size of the vector register, SVL / 8. */ | 53 | +/* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */ |
25 | +static inline int streaming_vec_reg_size(DisasContext *s) | 54 | +static int scale_by_log2_tag_granule(DisasContext *s, int x) |
26 | +{ | 55 | +{ |
27 | + return s->svl; | 56 | + return x << LOG2_TAG_GRANULE; |
28 | +} | 57 | +} |
29 | + | 58 | + |
30 | /* | 59 | /* |
31 | * Return the offset info CPUARMState of the predicate vector register Pn. | 60 | * Include the generated decoders. |
32 | * Note for this purpose, FFR is P16. | 61 | */ |
33 | @@ -XXX,XX +XXX,XX @@ static inline int pred_full_reg_size(DisasContext *s) | 62 | @@ -XXX,XX +XXX,XX @@ static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a) |
34 | return s->vl >> 3; | ||
35 | } | ||
36 | |||
37 | +/* Return the byte size of the predicate register, SVL / 64. */ | ||
38 | +static inline int streaming_pred_reg_size(DisasContext *s) | ||
39 | +{ | ||
40 | + return s->svl >> 3; | ||
41 | +} | ||
42 | + | ||
43 | /* | ||
44 | * Round up the size of a register to a size allowed by | ||
45 | * the tcg vector infrastructure. Any operation which uses this | ||
46 | diff --git a/target/arm/sve.decode b/target/arm/sve.decode | ||
47 | index XXXXXXX..XXXXXXX 100644 | ||
48 | --- a/target/arm/sve.decode | ||
49 | +++ b/target/arm/sve.decode | ||
50 | @@ -XXX,XX +XXX,XX @@ INDEX_ri 00000100 esz:2 1 imm:s5 010001 rn:5 rd:5 | ||
51 | # SVE index generation (register start, register increment) | ||
52 | INDEX_rr 00000100 .. 1 ..... 010011 ..... ..... @rd_rn_rm | ||
53 | |||
54 | -### SVE Stack Allocation Group | ||
55 | +### SVE / Streaming SVE Stack Allocation Group | ||
56 | |||
57 | # SVE stack frame adjustment | ||
58 | ADDVL 00000100 001 ..... 01010 ...... ..... @rd_rn_i6 | ||
59 | +ADDSVL 00000100 001 ..... 01011 ...... ..... @rd_rn_i6 | ||
60 | ADDPL 00000100 011 ..... 01010 ...... ..... @rd_rn_i6 | ||
61 | +ADDSPL 00000100 011 ..... 01011 ...... ..... @rd_rn_i6 | ||
62 | |||
63 | # SVE stack frame size | ||
64 | RDVL 00000100 101 11111 01010 imm:s6 rd:5 | ||
65 | +RDSVL 00000100 101 11111 01011 imm:s6 rd:5 | ||
66 | |||
67 | ### SVE Bitwise Shift - Unpredicated Group | ||
68 | |||
69 | diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c | ||
70 | index XXXXXXX..XXXXXXX 100644 | ||
71 | --- a/target/arm/translate-sve.c | ||
72 | +++ b/target/arm/translate-sve.c | ||
73 | @@ -XXX,XX +XXX,XX @@ static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a) | ||
74 | return true; | 63 | return true; |
75 | } | 64 | } |
76 | 65 | ||
77 | +static bool trans_ADDSVL(DisasContext *s, arg_ADDSVL *a) | 66 | -/* |
78 | +{ | 67 | - * Load/Store memory tags |
79 | + if (!dc_isar_feature(aa64_sme, s)) { | 68 | - * |
80 | + return false; | 69 | - * 31 30 29 24 22 21 12 10 5 0 |
81 | + } | 70 | - * +-----+-------------+-----+---+------+-----+------+------+ |
82 | + if (sme_enabled_check(s)) { | 71 | - * | 1 1 | 0 1 1 0 0 1 | op1 | 1 | imm9 | op2 | Rn | Rt | |
83 | + TCGv_i64 rd = cpu_reg_sp(s, a->rd); | 72 | - * +-----+-------------+-----+---+------+-----+------+------+ |
84 | + TCGv_i64 rn = cpu_reg_sp(s, a->rn); | 73 | - */ |
85 | + tcg_gen_addi_i64(rd, rn, a->imm * streaming_vec_reg_size(s)); | 74 | -static void disas_ldst_tag(DisasContext *s, uint32_t insn) |
86 | + } | 75 | +static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a) |
76 | { | ||
77 | - int rt = extract32(insn, 0, 5); | ||
78 | - int rn = extract32(insn, 5, 5); | ||
79 | - uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE; | ||
80 | - int op2 = extract32(insn, 10, 2); | ||
81 | - int op1 = extract32(insn, 22, 2); | ||
82 | - bool is_load = false, is_pair = false, is_zero = false, is_mult = false; | ||
83 | - int index = 0; | ||
84 | TCGv_i64 addr, clean_addr, tcg_rt; | ||
85 | + int size = 4 << s->dcz_blocksize; | ||
86 | |||
87 | - /* We checked insn bits [29:24,21] in the caller. */ | ||
88 | - if (extract32(insn, 30, 2) != 3) { | ||
89 | - goto do_unallocated; | ||
90 | + if (!dc_isar_feature(aa64_mte, s)) { | ||
91 | + return false; | ||
92 | + } | ||
93 | + if (s->current_el == 0) { | ||
94 | + return false; | ||
95 | } | ||
96 | |||
97 | - /* | ||
98 | - * @index is a tri-state variable which has 3 states: | ||
99 | - * < 0 : post-index, writeback | ||
100 | - * = 0 : signed offset | ||
101 | - * > 0 : pre-index, writeback | ||
102 | - */ | ||
103 | - switch (op1) { | ||
104 | - case 0: | ||
105 | - if (op2 != 0) { | ||
106 | - /* STG */ | ||
107 | - index = op2 - 2; | ||
108 | - } else { | ||
109 | - /* STZGM */ | ||
110 | - if (s->current_el == 0 || offset != 0) { | ||
111 | - goto do_unallocated; | ||
112 | - } | ||
113 | - is_mult = is_zero = true; | ||
114 | - } | ||
115 | - break; | ||
116 | - case 1: | ||
117 | - if (op2 != 0) { | ||
118 | - /* STZG */ | ||
119 | - is_zero = true; | ||
120 | - index = op2 - 2; | ||
121 | - } else { | ||
122 | - /* LDG */ | ||
123 | - is_load = true; | ||
124 | - } | ||
125 | - break; | ||
126 | - case 2: | ||
127 | - if (op2 != 0) { | ||
128 | - /* ST2G */ | ||
129 | - is_pair = true; | ||
130 | - index = op2 - 2; | ||
131 | - } else { | ||
132 | - /* STGM */ | ||
133 | - if (s->current_el == 0 || offset != 0) { | ||
134 | - goto do_unallocated; | ||
135 | - } | ||
136 | - is_mult = true; | ||
137 | - } | ||
138 | - break; | ||
139 | - case 3: | ||
140 | - if (op2 != 0) { | ||
141 | - /* STZ2G */ | ||
142 | - is_pair = is_zero = true; | ||
143 | - index = op2 - 2; | ||
144 | - } else { | ||
145 | - /* LDGM */ | ||
146 | - if (s->current_el == 0 || offset != 0) { | ||
147 | - goto do_unallocated; | ||
148 | - } | ||
149 | - is_mult = is_load = true; | ||
150 | - } | ||
151 | - break; | ||
152 | - | ||
153 | - default: | ||
154 | - do_unallocated: | ||
155 | - unallocated_encoding(s); | ||
156 | - return; | ||
157 | - } | ||
158 | - | ||
159 | - if (is_mult | ||
160 | - ? !dc_isar_feature(aa64_mte, s) | ||
161 | - : !dc_isar_feature(aa64_mte_insn_reg, s)) { | ||
162 | - goto do_unallocated; | ||
163 | - } | ||
164 | - | ||
165 | - if (rn == 31) { | ||
166 | + if (a->rn == 31) { | ||
167 | gen_check_sp_alignment(s); | ||
168 | } | ||
169 | |||
170 | - addr = read_cpu_reg_sp(s, rn, true); | ||
171 | - if (index >= 0) { | ||
172 | + addr = read_cpu_reg_sp(s, a->rn, true); | ||
173 | + tcg_gen_addi_i64(addr, addr, a->imm); | ||
174 | + tcg_rt = cpu_reg(s, a->rt); | ||
175 | + | ||
176 | + if (s->ata) { | ||
177 | + gen_helper_stzgm_tags(cpu_env, addr, tcg_rt); | ||
178 | + } | ||
179 | + /* | ||
180 | + * The non-tags portion of STZGM is mostly like DC_ZVA, | ||
181 | + * except the alignment happens before the access. | ||
182 | + */ | ||
183 | + clean_addr = clean_data_tbi(s, addr); | ||
184 | + tcg_gen_andi_i64(clean_addr, clean_addr, -size); | ||
185 | + gen_helper_dc_zva(cpu_env, clean_addr); | ||
87 | + return true; | 186 | + return true; |
88 | +} | 187 | +} |
89 | + | 188 | + |
90 | static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a) | 189 | +static bool trans_STGM(DisasContext *s, arg_ldst_tag *a) |
91 | { | ||
92 | if (!dc_isar_feature(aa64_sve, s)) { | ||
93 | @@ -XXX,XX +XXX,XX @@ static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a) | ||
94 | return true; | ||
95 | } | ||
96 | |||
97 | +static bool trans_ADDSPL(DisasContext *s, arg_ADDSPL *a) | ||
98 | +{ | 190 | +{ |
99 | + if (!dc_isar_feature(aa64_sme, s)) { | 191 | + TCGv_i64 addr, clean_addr, tcg_rt; |
100 | + return false; | 192 | + |
101 | + } | 193 | + if (!dc_isar_feature(aa64_mte, s)) { |
102 | + if (sme_enabled_check(s)) { | 194 | + return false; |
103 | + TCGv_i64 rd = cpu_reg_sp(s, a->rd); | 195 | + } |
104 | + TCGv_i64 rn = cpu_reg_sp(s, a->rn); | 196 | + if (s->current_el == 0) { |
105 | + tcg_gen_addi_i64(rd, rn, a->imm * streaming_pred_reg_size(s)); | 197 | + return false; |
198 | + } | ||
199 | + | ||
200 | + if (a->rn == 31) { | ||
201 | + gen_check_sp_alignment(s); | ||
202 | + } | ||
203 | + | ||
204 | + addr = read_cpu_reg_sp(s, a->rn, true); | ||
205 | + tcg_gen_addi_i64(addr, addr, a->imm); | ||
206 | + tcg_rt = cpu_reg(s, a->rt); | ||
207 | + | ||
208 | + if (s->ata) { | ||
209 | + gen_helper_stgm(cpu_env, addr, tcg_rt); | ||
210 | + } else { | ||
211 | + MMUAccessType acc = MMU_DATA_STORE; | ||
212 | + int size = 4 << GMID_EL1_BS; | ||
213 | + | ||
214 | + clean_addr = clean_data_tbi(s, addr); | ||
215 | + tcg_gen_andi_i64(clean_addr, clean_addr, -size); | ||
216 | + gen_probe_access(s, clean_addr, acc, size); | ||
106 | + } | 217 | + } |
107 | + return true; | 218 | + return true; |
108 | +} | 219 | +} |
109 | + | 220 | + |
110 | static bool trans_RDVL(DisasContext *s, arg_RDVL *a) | 221 | +static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a) |
111 | { | ||
112 | if (!dc_isar_feature(aa64_sve, s)) { | ||
113 | @@ -XXX,XX +XXX,XX @@ static bool trans_RDVL(DisasContext *s, arg_RDVL *a) | ||
114 | return true; | ||
115 | } | ||
116 | |||
117 | +static bool trans_RDSVL(DisasContext *s, arg_RDSVL *a) | ||
118 | +{ | 222 | +{ |
119 | + if (!dc_isar_feature(aa64_sme, s)) { | 223 | + TCGv_i64 addr, clean_addr, tcg_rt; |
120 | + return false; | 224 | + |
121 | + } | 225 | + if (!dc_isar_feature(aa64_mte, s)) { |
122 | + if (sme_enabled_check(s)) { | 226 | + return false; |
123 | + TCGv_i64 reg = cpu_reg(s, a->rd); | 227 | + } |
124 | + tcg_gen_movi_i64(reg, a->imm * streaming_vec_reg_size(s)); | 228 | + if (s->current_el == 0) { |
229 | + return false; | ||
230 | + } | ||
231 | + | ||
232 | + if (a->rn == 31) { | ||
233 | + gen_check_sp_alignment(s); | ||
234 | + } | ||
235 | + | ||
236 | + addr = read_cpu_reg_sp(s, a->rn, true); | ||
237 | + tcg_gen_addi_i64(addr, addr, a->imm); | ||
238 | + tcg_rt = cpu_reg(s, a->rt); | ||
239 | + | ||
240 | + if (s->ata) { | ||
241 | + gen_helper_ldgm(tcg_rt, cpu_env, addr); | ||
242 | + } else { | ||
243 | + MMUAccessType acc = MMU_DATA_LOAD; | ||
244 | + int size = 4 << GMID_EL1_BS; | ||
245 | + | ||
246 | + clean_addr = clean_data_tbi(s, addr); | ||
247 | + tcg_gen_andi_i64(clean_addr, clean_addr, -size); | ||
248 | + gen_probe_access(s, clean_addr, acc, size); | ||
249 | + /* The result tags are zeros. */ | ||
250 | + tcg_gen_movi_i64(tcg_rt, 0); | ||
125 | + } | 251 | + } |
126 | + return true; | 252 | + return true; |
127 | +} | 253 | +} |
128 | + | 254 | + |
129 | /* | 255 | +static bool trans_LDG(DisasContext *s, arg_ldst_tag *a) |
130 | *** SVE Compute Vector Address Group | 256 | +{ |
131 | */ | 257 | + TCGv_i64 addr, clean_addr, tcg_rt; |
258 | + | ||
259 | + if (!dc_isar_feature(aa64_mte_insn_reg, s)) { | ||
260 | + return false; | ||
261 | + } | ||
262 | + | ||
263 | + if (a->rn == 31) { | ||
264 | + gen_check_sp_alignment(s); | ||
265 | + } | ||
266 | + | ||
267 | + addr = read_cpu_reg_sp(s, a->rn, true); | ||
268 | + if (!a->p) { | ||
269 | /* pre-index or signed offset */ | ||
270 | - tcg_gen_addi_i64(addr, addr, offset); | ||
271 | + tcg_gen_addi_i64(addr, addr, a->imm); | ||
272 | } | ||
273 | |||
274 | - if (is_mult) { | ||
275 | - tcg_rt = cpu_reg(s, rt); | ||
276 | + tcg_gen_andi_i64(addr, addr, -TAG_GRANULE); | ||
277 | + tcg_rt = cpu_reg(s, a->rt); | ||
278 | + if (s->ata) { | ||
279 | + gen_helper_ldg(tcg_rt, cpu_env, addr, tcg_rt); | ||
280 | + } else { | ||
281 | + /* | ||
282 | + * Tag access disabled: we must check for aborts on the load | ||
283 | + * load from [rn+offset], and then insert a 0 tag into rt. | ||
284 | + */ | ||
285 | + clean_addr = clean_data_tbi(s, addr); | ||
286 | + gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8); | ||
287 | + gen_address_with_allocation_tag0(tcg_rt, tcg_rt); | ||
288 | + } | ||
289 | |||
290 | - if (is_zero) { | ||
291 | - int size = 4 << s->dcz_blocksize; | ||
292 | - | ||
293 | - if (s->ata) { | ||
294 | - gen_helper_stzgm_tags(cpu_env, addr, tcg_rt); | ||
295 | - } | ||
296 | - /* | ||
297 | - * The non-tags portion of STZGM is mostly like DC_ZVA, | ||
298 | - * except the alignment happens before the access. | ||
299 | - */ | ||
300 | - clean_addr = clean_data_tbi(s, addr); | ||
301 | - tcg_gen_andi_i64(clean_addr, clean_addr, -size); | ||
302 | - gen_helper_dc_zva(cpu_env, clean_addr); | ||
303 | - } else if (s->ata) { | ||
304 | - if (is_load) { | ||
305 | - gen_helper_ldgm(tcg_rt, cpu_env, addr); | ||
306 | - } else { | ||
307 | - gen_helper_stgm(cpu_env, addr, tcg_rt); | ||
308 | - } | ||
309 | - } else { | ||
310 | - MMUAccessType acc = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE; | ||
311 | - int size = 4 << GMID_EL1_BS; | ||
312 | - | ||
313 | - clean_addr = clean_data_tbi(s, addr); | ||
314 | - tcg_gen_andi_i64(clean_addr, clean_addr, -size); | ||
315 | - gen_probe_access(s, clean_addr, acc, size); | ||
316 | - | ||
317 | - if (is_load) { | ||
318 | - /* The result tags are zeros. */ | ||
319 | - tcg_gen_movi_i64(tcg_rt, 0); | ||
320 | - } | ||
321 | + if (a->w) { | ||
322 | + /* pre-index or post-index */ | ||
323 | + if (a->p) { | ||
324 | + /* post-index */ | ||
325 | + tcg_gen_addi_i64(addr, addr, a->imm); | ||
326 | } | ||
327 | - return; | ||
328 | + tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); | ||
329 | + } | ||
330 | + return true; | ||
331 | +} | ||
332 | + | ||
333 | +static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair) | ||
334 | +{ | ||
335 | + TCGv_i64 addr, tcg_rt; | ||
336 | + | ||
337 | + if (a->rn == 31) { | ||
338 | + gen_check_sp_alignment(s); | ||
339 | } | ||
340 | |||
341 | - if (is_load) { | ||
342 | - tcg_gen_andi_i64(addr, addr, -TAG_GRANULE); | ||
343 | - tcg_rt = cpu_reg(s, rt); | ||
344 | - if (s->ata) { | ||
345 | - gen_helper_ldg(tcg_rt, cpu_env, addr, tcg_rt); | ||
346 | + addr = read_cpu_reg_sp(s, a->rn, true); | ||
347 | + if (!a->p) { | ||
348 | + /* pre-index or signed offset */ | ||
349 | + tcg_gen_addi_i64(addr, addr, a->imm); | ||
350 | + } | ||
351 | + tcg_rt = cpu_reg_sp(s, a->rt); | ||
352 | + if (!s->ata) { | ||
353 | + /* | ||
354 | + * For STG and ST2G, we need to check alignment and probe memory. | ||
355 | + * TODO: For STZG and STZ2G, we could rely on the stores below, | ||
356 | + * at least for system mode; user-only won't enforce alignment. | ||
357 | + */ | ||
358 | + if (is_pair) { | ||
359 | + gen_helper_st2g_stub(cpu_env, addr); | ||
360 | } else { | ||
361 | - /* | ||
362 | - * Tag access disabled: we must check for aborts on the load | ||
363 | - * load from [rn+offset], and then insert a 0 tag into rt. | ||
364 | - */ | ||
365 | - clean_addr = clean_data_tbi(s, addr); | ||
366 | - gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8); | ||
367 | - gen_address_with_allocation_tag0(tcg_rt, tcg_rt); | ||
368 | + gen_helper_stg_stub(cpu_env, addr); | ||
369 | + } | ||
370 | + } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { | ||
371 | + if (is_pair) { | ||
372 | + gen_helper_st2g_parallel(cpu_env, addr, tcg_rt); | ||
373 | + } else { | ||
374 | + gen_helper_stg_parallel(cpu_env, addr, tcg_rt); | ||
375 | } | ||
376 | } else { | ||
377 | - tcg_rt = cpu_reg_sp(s, rt); | ||
378 | - if (!s->ata) { | ||
379 | - /* | ||
380 | - * For STG and ST2G, we need to check alignment and probe memory. | ||
381 | - * TODO: For STZG and STZ2G, we could rely on the stores below, | ||
382 | - * at least for system mode; user-only won't enforce alignment. | ||
383 | - */ | ||
384 | - if (is_pair) { | ||
385 | - gen_helper_st2g_stub(cpu_env, addr); | ||
386 | - } else { | ||
387 | - gen_helper_stg_stub(cpu_env, addr); | ||
388 | - } | ||
389 | - } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { | ||
390 | - if (is_pair) { | ||
391 | - gen_helper_st2g_parallel(cpu_env, addr, tcg_rt); | ||
392 | - } else { | ||
393 | - gen_helper_stg_parallel(cpu_env, addr, tcg_rt); | ||
394 | - } | ||
395 | + if (is_pair) { | ||
396 | + gen_helper_st2g(cpu_env, addr, tcg_rt); | ||
397 | } else { | ||
398 | - if (is_pair) { | ||
399 | - gen_helper_st2g(cpu_env, addr, tcg_rt); | ||
400 | - } else { | ||
401 | - gen_helper_stg(cpu_env, addr, tcg_rt); | ||
402 | - } | ||
403 | + gen_helper_stg(cpu_env, addr, tcg_rt); | ||
404 | } | ||
405 | } | ||
406 | |||
407 | @@ -XXX,XX +XXX,XX @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn) | ||
408 | } | ||
409 | } | ||
410 | |||
411 | - if (index != 0) { | ||
412 | + if (a->w) { | ||
413 | /* pre-index or post-index */ | ||
414 | - if (index < 0) { | ||
415 | + if (a->p) { | ||
416 | /* post-index */ | ||
417 | - tcg_gen_addi_i64(addr, addr, offset); | ||
418 | + tcg_gen_addi_i64(addr, addr, a->imm); | ||
419 | } | ||
420 | - tcg_gen_mov_i64(cpu_reg_sp(s, rn), addr); | ||
421 | + tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); | ||
422 | } | ||
423 | + return true; | ||
424 | } | ||
425 | |||
426 | -/* Loads and stores */ | ||
427 | -static void disas_ldst(DisasContext *s, uint32_t insn) | ||
428 | -{ | ||
429 | - switch (extract32(insn, 24, 6)) { | ||
430 | - case 0x19: | ||
431 | - if (extract32(insn, 21, 1) != 0) { | ||
432 | - disas_ldst_tag(s, insn); | ||
433 | - } else { | ||
434 | - unallocated_encoding(s); | ||
435 | - } | ||
436 | - break; | ||
437 | - default: | ||
438 | - unallocated_encoding(s); | ||
439 | - break; | ||
440 | - } | ||
441 | -} | ||
442 | +TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false) | ||
443 | +TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false) | ||
444 | +TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true) | ||
445 | +TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true) | ||
446 | |||
447 | typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64); | ||
448 | |||
449 | @@ -XXX,XX +XXX,XX @@ static bool btype_destination_ok(uint32_t insn, bool bt, int btype) | ||
450 | static void disas_a64_legacy(DisasContext *s, uint32_t insn) | ||
451 | { | ||
452 | switch (extract32(insn, 25, 4)) { | ||
453 | - case 0x4: | ||
454 | - case 0x6: | ||
455 | - case 0xc: | ||
456 | - case 0xe: /* Loads and stores */ | ||
457 | - disas_ldst(s, insn); | ||
458 | - break; | ||
459 | case 0x5: | ||
460 | case 0xd: /* Data processing - register */ | ||
461 | disas_data_proc_reg(s, insn); | ||
132 | -- | 462 | -- |
133 | 2.25.1 | 463 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | In commit 2c5fa0778c3b430 we fixed an endianness bug in the Allwinner |
---|---|---|---|
2 | A10 PIC model; however in the process we introduced a regression. | ||
3 | This is because the old code was robust against the incoming 'level' | ||
4 | argument being something other than 0 or 1, whereas the new code was | ||
5 | not. | ||
2 | 6 | ||
3 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 7 | In particular, the allwinner-sdhost code treats its IRQ line |
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 8 | as 0-vs-non-0 rather than 0-vs-1, so when the SD controller |
5 | Message-id: 20220708151540.18136-34-richard.henderson@linaro.org | 9 | set its IRQ line for any reason other than transmit the |
10 | interrupt controller would ignore it. The observed effect | ||
11 | was a guest timeout when rebooting the guest kernel. | ||
12 | |||
13 | Handle level values other than 0 or 1, to restore the old | ||
14 | behaviour. | ||
15 | |||
16 | Fixes: 2c5fa0778c3b430 ("hw/intc/allwinner-a10-pic: Don't use set_bit()/clear_bit()") | ||
17 | Cc: qemu-stable@nongnu.org | ||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 18 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
19 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
20 | Tested-by: Guenter Roeck <linux@roeck-us.net> | ||
21 | Message-id: 20230606104609.3692557-2-peter.maydell@linaro.org | ||
7 | --- | 22 | --- |
8 | linux-user/aarch64/target_cpu.h | 5 ++++- | 23 | hw/intc/allwinner-a10-pic.c | 2 +- |
9 | 1 file changed, 4 insertions(+), 1 deletion(-) | 24 | 1 file changed, 1 insertion(+), 1 deletion(-) |
10 | 25 | ||
11 | diff --git a/linux-user/aarch64/target_cpu.h b/linux-user/aarch64/target_cpu.h | 26 | diff --git a/hw/intc/allwinner-a10-pic.c b/hw/intc/allwinner-a10-pic.c |
12 | index XXXXXXX..XXXXXXX 100644 | 27 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/linux-user/aarch64/target_cpu.h | 28 | --- a/hw/intc/allwinner-a10-pic.c |
14 | +++ b/linux-user/aarch64/target_cpu.h | 29 | +++ b/hw/intc/allwinner-a10-pic.c |
15 | @@ -XXX,XX +XXX,XX @@ static inline void cpu_clone_regs_parent(CPUARMState *env, unsigned flags) | 30 | @@ -XXX,XX +XXX,XX @@ static void aw_a10_pic_set_irq(void *opaque, int irq, int level) |
16 | 31 | AwA10PICState *s = opaque; | |
17 | static inline void cpu_set_tls(CPUARMState *env, target_ulong newtls) | 32 | uint32_t *pending_reg = &s->irq_pending[irq / 32]; |
18 | { | 33 | |
19 | - /* Note that AArch64 Linux keeps the TLS pointer in TPIDR; this is | 34 | - *pending_reg = deposit32(*pending_reg, irq % 32, 1, level); |
20 | + /* | 35 | + *pending_reg = deposit32(*pending_reg, irq % 32, 1, !!level); |
21 | + * Note that AArch64 Linux keeps the TLS pointer in TPIDR; this is | 36 | aw_a10_pic_update(s); |
22 | * different from AArch32 Linux, which uses TPIDRRO. | ||
23 | */ | ||
24 | env->cp15.tpidr_el[0] = newtls; | ||
25 | + /* TPIDR2_EL0 is cleared with CLONE_SETTLS. */ | ||
26 | + env->cp15.tpidr2_el0 = 0; | ||
27 | } | 37 | } |
28 | 38 | ||
29 | static inline abi_ulong get_sp_from_cpustate(CPUARMState *state) | ||
30 | -- | 39 | -- |
31 | 2.25.1 | 40 | 2.34.1 |
41 | |||
42 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | QEMU allows qemu_irq lines to transfer arbitrary integers. However |
---|---|---|---|
2 | the convention is that for a simple IRQ line the values transferred | ||
3 | are always 0 and 1. The A10 SD controller device instead assumes a | ||
4 | 0-vs-non-0 convention, which happens to work with the interrupt | ||
5 | controller it is wired up to. | ||
2 | 6 | ||
3 | Mark these as a non-streaming instructions, which should trap | 7 | Coerce the value to boolean to follow our usual convention. |
4 | if full a64 support is not enabled in streaming mode. | ||
5 | 8 | ||
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-id: 20220708151540.18136-6-richard.henderson@linaro.org | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
10 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
11 | Tested-by: Guenter Roeck <linux@roeck-us.net> | ||
12 | Message-id: 20230606104609.3692557-3-peter.maydell@linaro.org | ||
10 | --- | 13 | --- |
11 | target/arm/sme-fa64.decode | 2 -- | 14 | hw/sd/allwinner-sdhost.c | 2 +- |
12 | target/arm/translate-sve.c | 9 ++++++--- | 15 | 1 file changed, 1 insertion(+), 1 deletion(-) |
13 | 2 files changed, 6 insertions(+), 5 deletions(-) | ||
14 | 16 | ||
15 | diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode | 17 | diff --git a/hw/sd/allwinner-sdhost.c b/hw/sd/allwinner-sdhost.c |
16 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/arm/sme-fa64.decode | 19 | --- a/hw/sd/allwinner-sdhost.c |
18 | +++ b/target/arm/sme-fa64.decode | 20 | +++ b/hw/sd/allwinner-sdhost.c |
19 | @@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS | 21 | @@ -XXX,XX +XXX,XX @@ static void allwinner_sdhost_update_irq(AwSdHostState *s) |
20 | 22 | } | |
21 | FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA | 23 | |
22 | FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT | 24 | trace_allwinner_sdhost_update_irq(irq); |
23 | -FAIL 0010 0101 --01 100- 1111 000- ---0 ---- # RDFFR, RDFFRS | 25 | - qemu_set_irq(s->irq, irq); |
24 | -FAIL 0010 0101 --10 1--- 1001 ---- ---- ---- # WRFFR, SETFFR | 26 | + qemu_set_irq(s->irq, !!irq); |
25 | FAIL 0100 0101 --0- ---- 1011 ---- ---- ---- # BDEP, BEXT, BGRP | ||
26 | FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result) | ||
27 | FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA | ||
28 | diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/target/arm/translate-sve.c | ||
31 | +++ b/target/arm/translate-sve.c | ||
32 | @@ -XXX,XX +XXX,XX @@ static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag) | ||
33 | TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s) | ||
34 | |||
35 | /* Note pat == 31 is #all, to set all elements. */ | ||
36 | -TRANS_FEAT(SETFFR, aa64_sve, do_predset, 0, FFR_PRED_NUM, 31, false) | ||
37 | +TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve, | ||
38 | + do_predset, 0, FFR_PRED_NUM, 31, false) | ||
39 | |||
40 | /* Note pat == 32 is #unimp, to set no elements. */ | ||
41 | TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false) | ||
42 | @@ -XXX,XX +XXX,XX @@ static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a) | ||
43 | .rd = a->rd, .pg = a->pg, .s = a->s, | ||
44 | .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM, | ||
45 | }; | ||
46 | + | ||
47 | + s->is_nonstreaming = true; | ||
48 | return trans_AND_pppp(s, &alt_a); | ||
49 | } | 27 | } |
50 | 28 | ||
51 | -TRANS_FEAT(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM) | 29 | static void allwinner_sdhost_update_transfer_cnt(AwSdHostState *s, |
52 | -TRANS_FEAT(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn) | ||
53 | +TRANS_FEAT_NONSTREAMING(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM) | ||
54 | +TRANS_FEAT_NONSTREAMING(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn) | ||
55 | |||
56 | static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a, | ||
57 | void (*gen_fn)(TCGv_i32, TCGv_ptr, | ||
58 | -- | 30 | -- |
59 | 2.25.1 | 31 | 2.34.1 |
32 | |||
33 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | The nrf51_timer has a free-running counter which we implement using |
---|---|---|---|
2 | the pattern of using two fields (update_counter_ns, counter) to track | ||
3 | the last point at which we calculated the counter value, and the | ||
4 | counter value at that time. Then we can find the current counter | ||
5 | value by converting the difference in wall-clock time between then | ||
6 | and now to a tick count that we need to add to the counter value. | ||
2 | 7 | ||
3 | Note that SME remains effectively disabled for user-only, | 8 | Unfortunately the nrf51_timer's implementation of this has a bug |
4 | because we do not yet set CPACR_EL1.SMEN. This needs to | 9 | which means it loses time every time update_counter() is called. |
5 | wait until the kernel ABI is implemented. | 10 | After updating s->counter it always sets s->update_counter_ns to |
11 | 'now', even though the actual point when s->counter hit the new value | ||
12 | will be some point in the past (half a tick, say). In the worst case | ||
13 | (guest code in a tight loop reading the counter, icount mode) the | ||
14 | counter is continually queried less than a tick after it was last | ||
15 | read, so s->counter never advances but s->update_counter_ns does, and | ||
16 | the guest never makes forward progress. | ||
6 | 17 | ||
7 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 18 | The fix for this is to only advance update_counter_ns to the |
8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 19 | timestamp of the last tick, not all the way to 'now'. (This is the |
9 | Message-id: 20220708151540.18136-33-richard.henderson@linaro.org | 20 | pattern used in hw/misc/mps2-fpgaio.c's counter.) |
21 | |||
22 | Cc: qemu-stable@nongnu.org | ||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 23 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
24 | Reviewed-by: Joel Stanley <joel@jms.id.au> | ||
25 | Message-id: 20230606134917.3782215-1-peter.maydell@linaro.org | ||
11 | --- | 26 | --- |
12 | docs/system/arm/emulation.rst | 4 ++++ | 27 | hw/timer/nrf51_timer.c | 7 ++++++- |
13 | target/arm/cpu64.c | 11 +++++++++++ | 28 | 1 file changed, 6 insertions(+), 1 deletion(-) |
14 | 2 files changed, 15 insertions(+) | ||
15 | 29 | ||
16 | diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst | 30 | diff --git a/hw/timer/nrf51_timer.c b/hw/timer/nrf51_timer.c |
17 | index XXXXXXX..XXXXXXX 100644 | 31 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/docs/system/arm/emulation.rst | 32 | --- a/hw/timer/nrf51_timer.c |
19 | +++ b/docs/system/arm/emulation.rst | 33 | +++ b/hw/timer/nrf51_timer.c |
20 | @@ -XXX,XX +XXX,XX @@ the following architecture extensions: | 34 | @@ -XXX,XX +XXX,XX @@ static uint32_t update_counter(NRF51TimerState *s, int64_t now) |
21 | - FEAT_SHA512 (Advanced SIMD SHA512 instructions) | 35 | uint32_t ticks = ns_to_ticks(s, now - s->update_counter_ns); |
22 | - FEAT_SM3 (Advanced SIMD SM3 instructions) | 36 | |
23 | - FEAT_SM4 (Advanced SIMD SM4 instructions) | 37 | s->counter = (s->counter + ticks) % BIT(bitwidths[s->bitmode]); |
24 | +- FEAT_SME (Scalable Matrix Extension) | 38 | - s->update_counter_ns = now; |
25 | +- FEAT_SME_FA64 (Full A64 instruction set in Streaming SVE mode) | 39 | + /* |
26 | +- FEAT_SME_F64F64 (Double-precision floating-point outer product instructions) | 40 | + * Only advance the sync time to the timestamp of the last tick, |
27 | +- FEAT_SME_I16I64 (16-bit to 64-bit integer widening outer product instructions) | 41 | + * not all the way to 'now', so we don't lose time if we do |
28 | - FEAT_SPECRES (Speculation restriction instructions) | 42 | + * multiple resyncs in a single tick. |
29 | - FEAT_SSBS (Speculative Store Bypass Safe) | 43 | + */ |
30 | - FEAT_TLBIOS (TLB invalidate instructions in Outer Shareable domain) | 44 | + s->update_counter_ns += ticks_to_ns(s, ticks); |
31 | diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c | 45 | return ticks; |
32 | index XXXXXXX..XXXXXXX 100644 | 46 | } |
33 | --- a/target/arm/cpu64.c | ||
34 | +++ b/target/arm/cpu64.c | ||
35 | @@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj) | ||
36 | */ | ||
37 | t = FIELD_DP64(t, ID_AA64PFR1, MTE, 3); /* FEAT_MTE3 */ | ||
38 | t = FIELD_DP64(t, ID_AA64PFR1, RAS_FRAC, 0); /* FEAT_RASv1p1 + FEAT_DoubleFault */ | ||
39 | + t = FIELD_DP64(t, ID_AA64PFR1, SME, 1); /* FEAT_SME */ | ||
40 | t = FIELD_DP64(t, ID_AA64PFR1, CSV2_FRAC, 0); /* FEAT_CSV2_2 */ | ||
41 | cpu->isar.id_aa64pfr1 = t; | ||
42 | |||
43 | @@ -XXX,XX +XXX,XX @@ static void aarch64_max_initfn(Object *obj) | ||
44 | t = FIELD_DP64(t, ID_AA64DFR0, PMUVER, 5); /* FEAT_PMUv3p4 */ | ||
45 | cpu->isar.id_aa64dfr0 = t; | ||
46 | |||
47 | + t = cpu->isar.id_aa64smfr0; | ||
48 | + t = FIELD_DP64(t, ID_AA64SMFR0, F32F32, 1); /* FEAT_SME */ | ||
49 | + t = FIELD_DP64(t, ID_AA64SMFR0, B16F32, 1); /* FEAT_SME */ | ||
50 | + t = FIELD_DP64(t, ID_AA64SMFR0, F16F32, 1); /* FEAT_SME */ | ||
51 | + t = FIELD_DP64(t, ID_AA64SMFR0, I8I32, 0xf); /* FEAT_SME */ | ||
52 | + t = FIELD_DP64(t, ID_AA64SMFR0, F64F64, 1); /* FEAT_SME_F64F64 */ | ||
53 | + t = FIELD_DP64(t, ID_AA64SMFR0, I16I64, 0xf); /* FEAT_SME_I16I64 */ | ||
54 | + t = FIELD_DP64(t, ID_AA64SMFR0, FA64, 1); /* FEAT_SME_FA64 */ | ||
55 | + cpu->isar.id_aa64smfr0 = t; | ||
56 | + | ||
57 | /* Replicate the same data to the 32-bit id registers. */ | ||
58 | aa32_max_features(cpu); | ||
59 | 47 | ||
60 | -- | 48 | -- |
61 | 2.25.1 | 49 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | From: Marcin Juszkiewicz <marcin.juszkiewicz@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | There's no reason to set CPACR_EL1.ZEN if SVE disabled. | 3 | Signed-off-by: Marcin Juszkiewicz <marcin.juszkiewicz@linaro.org> |
4 | 4 | Reviewed-by: Thomas Huth <thuth@redhat.com> | |
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 5 | Message-id: 20230607092112.655098-1-marcin.juszkiewicz@linaro.org |
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Message-id: 20220708151540.18136-44-richard.henderson@linaro.org | ||
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
9 | --- | 7 | --- |
10 | target/arm/cpu.c | 7 +++---- | 8 | hw/arm/Kconfig | 1 + |
11 | 1 file changed, 3 insertions(+), 4 deletions(-) | 9 | 1 file changed, 1 insertion(+) |
12 | 10 | ||
13 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | 11 | diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig |
14 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/target/arm/cpu.c | 13 | --- a/hw/arm/Kconfig |
16 | +++ b/target/arm/cpu.c | 14 | +++ b/hw/arm/Kconfig |
17 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset(DeviceState *dev) | 15 | @@ -XXX,XX +XXX,XX @@ config SBSA_REF |
18 | /* and to the FP/Neon instructions */ | 16 | select PL061 # GPIO |
19 | env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1, | 17 | select USB_EHCI_SYSBUS |
20 | CPACR_EL1, FPEN, 3); | 18 | select WDT_SBSA |
21 | - /* and to the SVE instructions */ | 19 | + select BOCHS_DISPLAY |
22 | - env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1, | 20 | |
23 | - CPACR_EL1, ZEN, 3); | 21 | config SABRELITE |
24 | - /* with reasonable vector length */ | 22 | bool |
25 | + /* and to the SVE instructions, with default vector length */ | ||
26 | if (cpu_isar_feature(aa64_sve, cpu)) { | ||
27 | + env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1, | ||
28 | + CPACR_EL1, ZEN, 3); | ||
29 | env->vfp.zcr_el[1] = cpu->sve_default_vq - 1; | ||
30 | } | ||
31 | /* | ||
32 | -- | 23 | -- |
33 | 2.25.1 | 24 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | From: Martin Kaiser <martin@kaiser.cx> |
---|---|---|---|
2 | 2 | ||
3 | This is an SVE instruction that operates using the SVE vector | 3 | The Linux kernel added a flood check for RX data recently in commit |
4 | length but that it is present only if SME is implemented. | 4 | 496a4471b7c3 ("serial: imx: work-around for hardware RX flood"). This |
5 | check uses the wake bit in the UART status register 2. The wake bit | ||
6 | indicates that the receiver detected a start bit on the RX line. If the | ||
7 | kernel sees a number of RX interrupts without the wake bit being set, it | ||
8 | treats this as spurious data and resets the UART port. imx_serial does | ||
9 | never set the wake bit and triggers the kernel's flood check. | ||
5 | 10 | ||
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 11 | This patch adds support for the wake bit. wake is set when we receive a |
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 12 | new character (it's not set for break events). It seems that wake is |
8 | Message-id: 20220708151540.18136-30-richard.henderson@linaro.org | 13 | cleared by the kernel driver, the hardware does not have to clear it |
14 | automatically after data was read. | ||
15 | |||
16 | The wake bit can be configured as an interrupt source. Support this | ||
17 | mechanism as well. | ||
18 | |||
19 | Co-developed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
20 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
21 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
22 | Signed-off-by: Martin Kaiser <martin@kaiser.cx> | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 23 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
10 | --- | 24 | --- |
11 | target/arm/helper-sve.h | 2 ++ | 25 | include/hw/char/imx_serial.h | 1 + |
12 | target/arm/sve.decode | 1 + | 26 | hw/char/imx_serial.c | 5 ++++- |
13 | target/arm/sve_helper.c | 16 ++++++++++++++++ | 27 | 2 files changed, 5 insertions(+), 1 deletion(-) |
14 | target/arm/translate-sve.c | 2 ++ | ||
15 | 4 files changed, 21 insertions(+) | ||
16 | 28 | ||
17 | diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h | 29 | diff --git a/include/hw/char/imx_serial.h b/include/hw/char/imx_serial.h |
18 | index XXXXXXX..XXXXXXX 100644 | 30 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/target/arm/helper-sve.h | 31 | --- a/include/hw/char/imx_serial.h |
20 | +++ b/target/arm/helper-sve.h | 32 | +++ b/include/hw/char/imx_serial.h |
21 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(sve_revh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | 33 | @@ -XXX,XX +XXX,XX @@ OBJECT_DECLARE_SIMPLE_TYPE(IMXSerialState, IMX_SERIAL) |
22 | 34 | ||
23 | DEF_HELPER_FLAGS_4(sve_revw_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | 35 | #define UCR4_DREN BIT(0) /* Receive Data Ready interrupt enable */ |
24 | 36 | #define UCR4_TCEN BIT(3) /* TX complete interrupt enable */ | |
25 | +DEF_HELPER_FLAGS_4(sme_revd_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | 37 | +#define UCR4_WKEN BIT(7) /* WAKE interrupt enable */ |
38 | |||
39 | #define UTS1_TXEMPTY (1<<6) | ||
40 | #define UTS1_RXEMPTY (1<<5) | ||
41 | diff --git a/hw/char/imx_serial.c b/hw/char/imx_serial.c | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/hw/char/imx_serial.c | ||
44 | +++ b/hw/char/imx_serial.c | ||
45 | @@ -XXX,XX +XXX,XX @@ static void imx_update(IMXSerialState *s) | ||
46 | * TCEN and TXDC are both bit 3 | ||
47 | * RDR and DREN are both bit 0 | ||
48 | */ | ||
49 | - mask |= s->ucr4 & (UCR4_TCEN | UCR4_DREN); | ||
50 | + mask |= s->ucr4 & (UCR4_WKEN | UCR4_TCEN | UCR4_DREN); | ||
51 | |||
52 | usr2 = s->usr2 & mask; | ||
53 | |||
54 | @@ -XXX,XX +XXX,XX @@ static void imx_put_data(void *opaque, uint32_t value) | ||
55 | |||
56 | static void imx_receive(void *opaque, const uint8_t *buf, int size) | ||
57 | { | ||
58 | + IMXSerialState *s = (IMXSerialState *)opaque; | ||
26 | + | 59 | + |
27 | DEF_HELPER_FLAGS_4(sve_rbit_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | 60 | + s->usr2 |= USR2_WAKE; |
28 | DEF_HELPER_FLAGS_4(sve_rbit_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | 61 | imx_put_data(opaque, *buf); |
29 | DEF_HELPER_FLAGS_4(sve_rbit_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | 62 | } |
30 | diff --git a/target/arm/sve.decode b/target/arm/sve.decode | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/target/arm/sve.decode | ||
33 | +++ b/target/arm/sve.decode | ||
34 | @@ -XXX,XX +XXX,XX @@ REVB 00000101 .. 1001 00 100 ... ..... ..... @rd_pg_rn | ||
35 | REVH 00000101 .. 1001 01 100 ... ..... ..... @rd_pg_rn | ||
36 | REVW 00000101 .. 1001 10 100 ... ..... ..... @rd_pg_rn | ||
37 | RBIT 00000101 .. 1001 11 100 ... ..... ..... @rd_pg_rn | ||
38 | +REVD 00000101 00 1011 10 100 ... ..... ..... @rd_pg_rn_e0 | ||
39 | |||
40 | # SVE vector splice (predicated, destructive) | ||
41 | SPLICE 00000101 .. 101 100 100 ... ..... ..... @rdn_pg_rm | ||
42 | diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/target/arm/sve_helper.c | ||
45 | +++ b/target/arm/sve_helper.c | ||
46 | @@ -XXX,XX +XXX,XX @@ DO_ZPZ_D(sve_revh_d, uint64_t, hswap64) | ||
47 | |||
48 | DO_ZPZ_D(sve_revw_d, uint64_t, wswap64) | ||
49 | |||
50 | +void HELPER(sme_revd_q)(void *vd, void *vn, void *vg, uint32_t desc) | ||
51 | +{ | ||
52 | + intptr_t i, opr_sz = simd_oprsz(desc) / 8; | ||
53 | + uint64_t *d = vd, *n = vn; | ||
54 | + uint8_t *pg = vg; | ||
55 | + | ||
56 | + for (i = 0; i < opr_sz; i += 2) { | ||
57 | + if (pg[H1(i)] & 1) { | ||
58 | + uint64_t n0 = n[i + 0]; | ||
59 | + uint64_t n1 = n[i + 1]; | ||
60 | + d[i + 0] = n1; | ||
61 | + d[i + 1] = n0; | ||
62 | + } | ||
63 | + } | ||
64 | +} | ||
65 | + | ||
66 | DO_ZPZ(sve_rbit_b, uint8_t, H1, revbit8) | ||
67 | DO_ZPZ(sve_rbit_h, uint16_t, H1_2, revbit16) | ||
68 | DO_ZPZ(sve_rbit_s, uint32_t, H1_4, revbit32) | ||
69 | diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c | ||
70 | index XXXXXXX..XXXXXXX 100644 | ||
71 | --- a/target/arm/translate-sve.c | ||
72 | +++ b/target/arm/translate-sve.c | ||
73 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0) | ||
74 | TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz, | ||
75 | a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0) | ||
76 | |||
77 | +TRANS_FEAT(REVD, aa64_sme, gen_gvec_ool_arg_zpz, gen_helper_sme_revd_q, a, 0) | ||
78 | + | ||
79 | TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz, | ||
80 | gen_helper_sve_splice, a, a->esz) | ||
81 | 63 | ||
82 | -- | 64 | -- |
83 | 2.25.1 | 65 | 2.34.1 |
66 | |||
67 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | From: Marcin Juszkiewicz <marcin.juszkiewicz@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | These prctl set the Streaming SVE vector length, which may | 3 | We plan to add more hardware information into DeviceTree to limit amount |
4 | be completely different from the Normal SVE vector length. | 4 | of hardcoded values in firmware. |
5 | 5 | ||
6 | Signed-off-by: Marcin Juszkiewicz <marcin.juszkiewicz@linaro.org> | ||
7 | Message-id: 20230531171834.236569-1-marcin.juszkiewicz@linaro.org | ||
8 | [PMM: fix format nits, add text about platform version fields from | ||
9 | a comment in the C source file] | ||
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-id: 20220708151540.18136-43-richard.henderson@linaro.org | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
10 | --- | 12 | --- |
11 | linux-user/aarch64/target_prctl.h | 54 +++++++++++++++++++++++++++++++ | 13 | docs/system/arm/sbsa.rst | 38 +++++++++++++++++++++++++++++++------- |
12 | linux-user/syscall.c | 16 +++++++++ | 14 | 1 file changed, 31 insertions(+), 7 deletions(-) |
13 | 2 files changed, 70 insertions(+) | ||
14 | 15 | ||
15 | diff --git a/linux-user/aarch64/target_prctl.h b/linux-user/aarch64/target_prctl.h | 16 | diff --git a/docs/system/arm/sbsa.rst b/docs/system/arm/sbsa.rst |
16 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/linux-user/aarch64/target_prctl.h | 18 | --- a/docs/system/arm/sbsa.rst |
18 | +++ b/linux-user/aarch64/target_prctl.h | 19 | +++ b/docs/system/arm/sbsa.rst |
19 | @@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_sve_get_vl(CPUArchState *env) | 20 | @@ -XXX,XX +XXX,XX @@ any real hardware the ``sbsa-ref`` board intends to look like real |
20 | { | 21 | hardware. The `Server Base System Architecture |
21 | ARMCPU *cpu = env_archcpu(env); | 22 | <https://developer.arm.com/documentation/den0029/latest>`_ defines a |
22 | if (cpu_isar_feature(aa64_sve, cpu)) { | 23 | minimum base line of hardware support and importantly how the firmware |
23 | + /* PSTATE.SM is always unset on syscall entry. */ | 24 | -reports that to any operating system. It is a static system that |
24 | return sve_vq(env) * 16; | 25 | -reports a very minimal DT to the firmware for non-discoverable |
25 | } | 26 | -information about components affected by the qemu command line (i.e. |
26 | return -TARGET_EINVAL; | 27 | -cpus and memory). As a result it must have a firmware specifically |
27 | @@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_sve_set_vl(CPUArchState *env, abi_long arg2) | 28 | -built to expect a certain hardware layout (as you would in a real |
28 | && arg2 >= 0 && arg2 <= 512 * 16 && !(arg2 & 15)) { | 29 | -machine). |
29 | uint32_t vq, old_vq; | 30 | +reports that to any operating system. |
30 | 31 | ||
31 | + /* PSTATE.SM is always unset on syscall entry. */ | 32 | It is intended to be a machine for developing firmware and testing |
32 | old_vq = sve_vq(env); | 33 | standards compliance with operating systems. |
33 | 34 | @@ -XXX,XX +XXX,XX @@ standards compliance with operating systems. | |
34 | /* | 35 | Supported devices |
35 | @@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_sve_set_vl(CPUArchState *env, abi_long arg2) | 36 | """"""""""""""""" |
36 | } | 37 | |
37 | #define do_prctl_sve_set_vl do_prctl_sve_set_vl | 38 | -The sbsa-ref board supports: |
38 | 39 | +The ``sbsa-ref`` board supports: | |
39 | +static abi_long do_prctl_sme_get_vl(CPUArchState *env) | 40 | |
40 | +{ | 41 | - A configurable number of AArch64 CPUs |
41 | + ARMCPU *cpu = env_archcpu(env); | 42 | - GIC version 3 |
42 | + if (cpu_isar_feature(aa64_sme, cpu)) { | 43 | @@ -XXX,XX +XXX,XX @@ The sbsa-ref board supports: |
43 | + return sme_vq(env) * 16; | 44 | - Bochs display adapter on PCIe bus |
44 | + } | 45 | - A generic SBSA watchdog device |
45 | + return -TARGET_EINVAL; | 46 | |
46 | +} | ||
47 | +#define do_prctl_sme_get_vl do_prctl_sme_get_vl | ||
48 | + | 47 | + |
49 | +static abi_long do_prctl_sme_set_vl(CPUArchState *env, abi_long arg2) | 48 | +Board to firmware interface |
50 | +{ | 49 | +""""""""""""""""""""""""""" |
51 | + /* | ||
52 | + * We cannot support either PR_SME_SET_VL_ONEXEC or PR_SME_VL_INHERIT. | ||
53 | + * Note the kernel definition of sve_vl_valid allows for VQ=512, | ||
54 | + * i.e. VL=8192, even though the architectural maximum is VQ=16. | ||
55 | + */ | ||
56 | + if (cpu_isar_feature(aa64_sme, env_archcpu(env)) | ||
57 | + && arg2 >= 0 && arg2 <= 512 * 16 && !(arg2 & 15)) { | ||
58 | + int vq, old_vq; | ||
59 | + | 50 | + |
60 | + old_vq = sme_vq(env); | 51 | +``sbsa-ref`` is a static system that reports a very minimal devicetree to the |
52 | +firmware for non-discoverable information about system components. This | ||
53 | +includes both internal hardware and parts affected by the qemu command line | ||
54 | +(i.e. CPUs and memory). As a result it must have a firmware specifically built | ||
55 | +to expect a certain hardware layout (as you would in a real machine). | ||
61 | + | 56 | + |
62 | + /* | 57 | +DeviceTree information |
63 | + * Bound the value of vq, so that we know that it fits into | 58 | +'''''''''''''''''''''' |
64 | + * the 4-bit field in SMCR_EL1. Because PSTATE.SM is cleared | ||
65 | + * on syscall entry, we are not modifying the current SVE | ||
66 | + * vector length. | ||
67 | + */ | ||
68 | + vq = MAX(arg2 / 16, 1); | ||
69 | + vq = MIN(vq, 16); | ||
70 | + env->vfp.smcr_el[1] = | ||
71 | + FIELD_DP64(env->vfp.smcr_el[1], SMCR, LEN, vq - 1); | ||
72 | + | 59 | + |
73 | + /* Delay rebuilding hflags until we know if ZA must change. */ | 60 | +The devicetree provided by the board model to the firmware is not intended |
74 | + vq = sve_vqm1_for_el_sm(env, 0, true) + 1; | 61 | +to be a complete compliant DT. It currently reports: |
75 | + | 62 | + |
76 | + if (vq != old_vq) { | 63 | + - CPUs |
77 | + /* | 64 | + - memory |
78 | + * PSTATE.ZA state is cleared on any change to SVL. | 65 | + - platform version |
79 | + * We need not call arm_rebuild_hflags because PSTATE.SM was | 66 | + - GIC addresses |
80 | + * cleared on syscall entry, so this hasn't changed VL. | ||
81 | + */ | ||
82 | + env->svcr = FIELD_DP64(env->svcr, SVCR, ZA, 0); | ||
83 | + arm_rebuild_hflags(env); | ||
84 | + } | ||
85 | + return vq * 16; | ||
86 | + } | ||
87 | + return -TARGET_EINVAL; | ||
88 | +} | ||
89 | +#define do_prctl_sme_set_vl do_prctl_sme_set_vl | ||
90 | + | 67 | + |
91 | static abi_long do_prctl_reset_keys(CPUArchState *env, abi_long arg2) | 68 | +The platform version is only for informing platform firmware about |
92 | { | 69 | +what kind of ``sbsa-ref`` board it is running on. It is neither |
93 | ARMCPU *cpu = env_archcpu(env); | 70 | +a QEMU versioned machine type nor a reflection of the level of the |
94 | diff --git a/linux-user/syscall.c b/linux-user/syscall.c | 71 | +SBSA/SystemReady SR support provided. |
95 | index XXXXXXX..XXXXXXX 100644 | 72 | + |
96 | --- a/linux-user/syscall.c | 73 | +The ``machine-version-major`` value is updated when changes breaking |
97 | +++ b/linux-user/syscall.c | 74 | +fw compatibility are introduced. The ``machine-version-minor`` value |
98 | @@ -XXX,XX +XXX,XX @@ abi_long do_arch_prctl(CPUX86State *env, int code, abi_ulong addr) | 75 | +is updated when features are added that don't break fw compatibility. |
99 | #ifndef PR_SET_SYSCALL_USER_DISPATCH | ||
100 | # define PR_SET_SYSCALL_USER_DISPATCH 59 | ||
101 | #endif | ||
102 | +#ifndef PR_SME_SET_VL | ||
103 | +# define PR_SME_SET_VL 63 | ||
104 | +# define PR_SME_GET_VL 64 | ||
105 | +# define PR_SME_VL_LEN_MASK 0xffff | ||
106 | +# define PR_SME_VL_INHERIT (1 << 17) | ||
107 | +#endif | ||
108 | |||
109 | #include "target_prctl.h" | ||
110 | |||
111 | @@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_inval1(CPUArchState *env, abi_long arg2) | ||
112 | #ifndef do_prctl_set_unalign | ||
113 | #define do_prctl_set_unalign do_prctl_inval1 | ||
114 | #endif | ||
115 | +#ifndef do_prctl_sme_get_vl | ||
116 | +#define do_prctl_sme_get_vl do_prctl_inval0 | ||
117 | +#endif | ||
118 | +#ifndef do_prctl_sme_set_vl | ||
119 | +#define do_prctl_sme_set_vl do_prctl_inval1 | ||
120 | +#endif | ||
121 | |||
122 | static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2, | ||
123 | abi_long arg3, abi_long arg4, abi_long arg5) | ||
124 | @@ -XXX,XX +XXX,XX @@ static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2, | ||
125 | return do_prctl_sve_get_vl(env); | ||
126 | case PR_SVE_SET_VL: | ||
127 | return do_prctl_sve_set_vl(env, arg2); | ||
128 | + case PR_SME_GET_VL: | ||
129 | + return do_prctl_sme_get_vl(env); | ||
130 | + case PR_SME_SET_VL: | ||
131 | + return do_prctl_sme_set_vl(env, arg2); | ||
132 | case PR_PAC_RESET_KEYS: | ||
133 | if (arg3 || arg4 || arg5) { | ||
134 | return -TARGET_EINVAL; | ||
135 | -- | 76 | -- |
136 | 2.25.1 | 77 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | From: Sergey Kambalin <sergey.kambalin@auriga.com> |
---|---|---|---|
2 | 2 | ||
3 | This includes the build rules for the decoder, and the | 3 | Signed-off-by: Sergey Kambalin <sergey.kambalin@auriga.com> |
4 | new file for translation, but excludes any instructions. | 4 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
5 | 5 | Acked-by: Richard Henderson <richard.henderson@linaro.org> | |
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 6 | Message-id: 20230612223456.33824-2-philmd@linaro.org |
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 7 | Message-Id: <20230531155258.8361-1-sergey.kambalin@auriga.com> |
8 | Message-id: 20220708151540.18136-3-richard.henderson@linaro.org | 8 | [PMD: Split from bigger patch: 1/4] |
9 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
10 | --- | 11 | --- |
11 | target/arm/translate-a64.h | 1 + | 12 | include/hw/misc/raspberrypi-fw-defs.h | 163 ++++++++++++++++++++++++++ |
12 | target/arm/sme.decode | 20 ++++++++++++++++++++ | 13 | 1 file changed, 163 insertions(+) |
13 | target/arm/translate-a64.c | 7 ++++++- | 14 | create mode 100644 include/hw/misc/raspberrypi-fw-defs.h |
14 | target/arm/translate-sme.c | 35 +++++++++++++++++++++++++++++++++++ | ||
15 | target/arm/meson.build | 2 ++ | ||
16 | 5 files changed, 64 insertions(+), 1 deletion(-) | ||
17 | create mode 100644 target/arm/sme.decode | ||
18 | create mode 100644 target/arm/translate-sme.c | ||
19 | 15 | ||
20 | diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h | 16 | diff --git a/include/hw/misc/raspberrypi-fw-defs.h b/include/hw/misc/raspberrypi-fw-defs.h |
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/target/arm/translate-a64.h | ||
23 | +++ b/target/arm/translate-a64.h | ||
24 | @@ -XXX,XX +XXX,XX @@ static inline int pred_gvec_reg_size(DisasContext *s) | ||
25 | } | ||
26 | |||
27 | bool disas_sve(DisasContext *, uint32_t); | ||
28 | +bool disas_sme(DisasContext *, uint32_t); | ||
29 | |||
30 | void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, | ||
31 | uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); | ||
32 | diff --git a/target/arm/sme.decode b/target/arm/sme.decode | ||
33 | new file mode 100644 | 17 | new file mode 100644 |
34 | index XXXXXXX..XXXXXXX | 18 | index XXXXXXX..XXXXXXX |
35 | --- /dev/null | 19 | --- /dev/null |
36 | +++ b/target/arm/sme.decode | 20 | +++ b/include/hw/misc/raspberrypi-fw-defs.h |
37 | @@ -XXX,XX +XXX,XX @@ | ||
38 | +# AArch64 SME instruction descriptions | ||
39 | +# | ||
40 | +# Copyright (c) 2022 Linaro, Ltd | ||
41 | +# | ||
42 | +# This library is free software; you can redistribute it and/or | ||
43 | +# modify it under the terms of the GNU Lesser General Public | ||
44 | +# License as published by the Free Software Foundation; either | ||
45 | +# version 2.1 of the License, or (at your option) any later version. | ||
46 | +# | ||
47 | +# This library is distributed in the hope that it will be useful, | ||
48 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
49 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
50 | +# Lesser General Public License for more details. | ||
51 | +# | ||
52 | +# You should have received a copy of the GNU Lesser General Public | ||
53 | +# License along with this library; if not, see <http://www.gnu.org/licenses/>. | ||
54 | + | ||
55 | +# | ||
56 | +# This file is processed by scripts/decodetree.py | ||
57 | +# | ||
58 | diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c | ||
59 | index XXXXXXX..XXXXXXX 100644 | ||
60 | --- a/target/arm/translate-a64.c | ||
61 | +++ b/target/arm/translate-a64.c | ||
62 | @@ -XXX,XX +XXX,XX @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) | ||
63 | } | ||
64 | |||
65 | switch (extract32(insn, 25, 4)) { | ||
66 | - case 0x0: case 0x1: case 0x3: /* UNALLOCATED */ | ||
67 | + case 0x0: | ||
68 | + if (!extract32(insn, 31, 1) || !disas_sme(s, insn)) { | ||
69 | + unallocated_encoding(s); | ||
70 | + } | ||
71 | + break; | ||
72 | + case 0x1: case 0x3: /* UNALLOCATED */ | ||
73 | unallocated_encoding(s); | ||
74 | break; | ||
75 | case 0x2: | ||
76 | diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c | ||
77 | new file mode 100644 | ||
78 | index XXXXXXX..XXXXXXX | ||
79 | --- /dev/null | ||
80 | +++ b/target/arm/translate-sme.c | ||
81 | @@ -XXX,XX +XXX,XX @@ | 21 | @@ -XXX,XX +XXX,XX @@ |
82 | +/* | 22 | +/* |
83 | + * AArch64 SME translation | 23 | + * Raspberry Pi firmware definitions |
84 | + * | 24 | + * |
85 | + * Copyright (c) 2022 Linaro, Ltd | 25 | + * Copyright (C) 2022 Auriga LLC, based on Linux kernel |
26 | + * `include/soc/bcm2835/raspberrypi-firmware.h` (Copyright © 2015 Broadcom) | ||
86 | + * | 27 | + * |
87 | + * This library is free software; you can redistribute it and/or | 28 | + * SPDX-License-Identifier: GPL-2.0-or-later |
88 | + * modify it under the terms of the GNU Lesser General Public | ||
89 | + * License as published by the Free Software Foundation; either | ||
90 | + * version 2.1 of the License, or (at your option) any later version. | ||
91 | + * | ||
92 | + * This library is distributed in the hope that it will be useful, | ||
93 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
94 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
95 | + * Lesser General Public License for more details. | ||
96 | + * | ||
97 | + * You should have received a copy of the GNU Lesser General Public | ||
98 | + * License along with this library; if not, see <http://www.gnu.org/licenses/>. | ||
99 | + */ | 29 | + */ |
100 | + | 30 | + |
31 | +#ifndef INCLUDE_HW_MISC_RASPBERRYPI_FW_DEFS_H_ | ||
32 | +#define INCLUDE_HW_MISC_RASPBERRYPI_FW_DEFS_H_ | ||
33 | + | ||
101 | +#include "qemu/osdep.h" | 34 | +#include "qemu/osdep.h" |
102 | +#include "cpu.h" | ||
103 | +#include "tcg/tcg-op.h" | ||
104 | +#include "tcg/tcg-op-gvec.h" | ||
105 | +#include "tcg/tcg-gvec-desc.h" | ||
106 | +#include "translate.h" | ||
107 | +#include "exec/helper-gen.h" | ||
108 | +#include "translate-a64.h" | ||
109 | +#include "fpu/softfloat.h" | ||
110 | + | 35 | + |
36 | +enum rpi_firmware_property_tag { | ||
37 | + RPI_FWREQ_PROPERTY_END = 0, | ||
38 | + RPI_FWREQ_GET_FIRMWARE_REVISION = 0x00000001, | ||
39 | + RPI_FWREQ_GET_FIRMWARE_VARIANT = 0x00000002, | ||
40 | + RPI_FWREQ_GET_FIRMWARE_HASH = 0x00000003, | ||
111 | + | 41 | + |
112 | +/* | 42 | + RPI_FWREQ_SET_CURSOR_INFO = 0x00008010, |
113 | + * Include the generated decoder. | 43 | + RPI_FWREQ_SET_CURSOR_STATE = 0x00008011, |
114 | + */ | ||
115 | + | 44 | + |
116 | +#include "decode-sme.c.inc" | 45 | + RPI_FWREQ_GET_BOARD_MODEL = 0x00010001, |
117 | diff --git a/target/arm/meson.build b/target/arm/meson.build | 46 | + RPI_FWREQ_GET_BOARD_REVISION = 0x00010002, |
118 | index XXXXXXX..XXXXXXX 100644 | 47 | + RPI_FWREQ_GET_BOARD_MAC_ADDRESS = 0x00010003, |
119 | --- a/target/arm/meson.build | 48 | + RPI_FWREQ_GET_BOARD_SERIAL = 0x00010004, |
120 | +++ b/target/arm/meson.build | 49 | + RPI_FWREQ_GET_ARM_MEMORY = 0x00010005, |
121 | @@ -XXX,XX +XXX,XX @@ | 50 | + RPI_FWREQ_GET_VC_MEMORY = 0x00010006, |
122 | gen = [ | 51 | + RPI_FWREQ_GET_CLOCKS = 0x00010007, |
123 | decodetree.process('sve.decode', extra_args: '--decode=disas_sve'), | 52 | + RPI_FWREQ_GET_POWER_STATE = 0x00020001, |
124 | + decodetree.process('sme.decode', extra_args: '--decode=disas_sme'), | 53 | + RPI_FWREQ_GET_TIMING = 0x00020002, |
125 | decodetree.process('neon-shared.decode', extra_args: '--decode=disas_neon_shared'), | 54 | + RPI_FWREQ_SET_POWER_STATE = 0x00028001, |
126 | decodetree.process('neon-dp.decode', extra_args: '--decode=disas_neon_dp'), | 55 | + RPI_FWREQ_GET_CLOCK_STATE = 0x00030001, |
127 | decodetree.process('neon-ls.decode', extra_args: '--decode=disas_neon_ls'), | 56 | + RPI_FWREQ_GET_CLOCK_RATE = 0x00030002, |
128 | @@ -XXX,XX +XXX,XX @@ arm_ss.add(when: 'TARGET_AARCH64', if_true: files( | 57 | + RPI_FWREQ_GET_VOLTAGE = 0x00030003, |
129 | 'sme_helper.c', | 58 | + RPI_FWREQ_GET_MAX_CLOCK_RATE = 0x00030004, |
130 | 'translate-a64.c', | 59 | + RPI_FWREQ_GET_MAX_VOLTAGE = 0x00030005, |
131 | 'translate-sve.c', | 60 | + RPI_FWREQ_GET_TEMPERATURE = 0x00030006, |
132 | + 'translate-sme.c', | 61 | + RPI_FWREQ_GET_MIN_CLOCK_RATE = 0x00030007, |
133 | )) | 62 | + RPI_FWREQ_GET_MIN_VOLTAGE = 0x00030008, |
134 | 63 | + RPI_FWREQ_GET_TURBO = 0x00030009, | |
135 | arm_softmmu_ss = ss.source_set() | 64 | + RPI_FWREQ_GET_MAX_TEMPERATURE = 0x0003000a, |
65 | + RPI_FWREQ_GET_STC = 0x0003000b, | ||
66 | + RPI_FWREQ_ALLOCATE_MEMORY = 0x0003000c, | ||
67 | + RPI_FWREQ_LOCK_MEMORY = 0x0003000d, | ||
68 | + RPI_FWREQ_UNLOCK_MEMORY = 0x0003000e, | ||
69 | + RPI_FWREQ_RELEASE_MEMORY = 0x0003000f, | ||
70 | + RPI_FWREQ_EXECUTE_CODE = 0x00030010, | ||
71 | + RPI_FWREQ_EXECUTE_QPU = 0x00030011, | ||
72 | + RPI_FWREQ_SET_ENABLE_QPU = 0x00030012, | ||
73 | + RPI_FWREQ_GET_DISPMANX_RESOURCE_MEM_HANDLE = 0x00030014, | ||
74 | + RPI_FWREQ_GET_EDID_BLOCK = 0x00030020, | ||
75 | + RPI_FWREQ_GET_CUSTOMER_OTP = 0x00030021, | ||
76 | + RPI_FWREQ_GET_EDID_BLOCK_DISPLAY = 0x00030023, | ||
77 | + RPI_FWREQ_GET_DOMAIN_STATE = 0x00030030, | ||
78 | + RPI_FWREQ_GET_THROTTLED = 0x00030046, | ||
79 | + RPI_FWREQ_GET_CLOCK_MEASURED = 0x00030047, | ||
80 | + RPI_FWREQ_NOTIFY_REBOOT = 0x00030048, | ||
81 | + RPI_FWREQ_SET_CLOCK_STATE = 0x00038001, | ||
82 | + RPI_FWREQ_SET_CLOCK_RATE = 0x00038002, | ||
83 | + RPI_FWREQ_SET_VOLTAGE = 0x00038003, | ||
84 | + RPI_FWREQ_SET_MAX_CLOCK_RATE = 0x00038004, | ||
85 | + RPI_FWREQ_SET_MIN_CLOCK_RATE = 0x00038007, | ||
86 | + RPI_FWREQ_SET_TURBO = 0x00038009, | ||
87 | + RPI_FWREQ_SET_CUSTOMER_OTP = 0x00038021, | ||
88 | + RPI_FWREQ_SET_DOMAIN_STATE = 0x00038030, | ||
89 | + RPI_FWREQ_GET_GPIO_STATE = 0x00030041, | ||
90 | + RPI_FWREQ_SET_GPIO_STATE = 0x00038041, | ||
91 | + RPI_FWREQ_SET_SDHOST_CLOCK = 0x00038042, | ||
92 | + RPI_FWREQ_GET_GPIO_CONFIG = 0x00030043, | ||
93 | + RPI_FWREQ_SET_GPIO_CONFIG = 0x00038043, | ||
94 | + RPI_FWREQ_GET_PERIPH_REG = 0x00030045, | ||
95 | + RPI_FWREQ_SET_PERIPH_REG = 0x00038045, | ||
96 | + RPI_FWREQ_GET_POE_HAT_VAL = 0x00030049, | ||
97 | + RPI_FWREQ_SET_POE_HAT_VAL = 0x00038049, | ||
98 | + RPI_FWREQ_SET_POE_HAT_VAL_OLD = 0x00030050, | ||
99 | + RPI_FWREQ_NOTIFY_XHCI_RESET = 0x00030058, | ||
100 | + RPI_FWREQ_GET_REBOOT_FLAGS = 0x00030064, | ||
101 | + RPI_FWREQ_SET_REBOOT_FLAGS = 0x00038064, | ||
102 | + RPI_FWREQ_NOTIFY_DISPLAY_DONE = 0x00030066, | ||
103 | + | ||
104 | + /* Dispmanx TAGS */ | ||
105 | + RPI_FWREQ_FRAMEBUFFER_ALLOCATE = 0x00040001, | ||
106 | + RPI_FWREQ_FRAMEBUFFER_BLANK = 0x00040002, | ||
107 | + RPI_FWREQ_FRAMEBUFFER_GET_PHYSICAL_WIDTH_HEIGHT = 0x00040003, | ||
108 | + RPI_FWREQ_FRAMEBUFFER_GET_VIRTUAL_WIDTH_HEIGHT = 0x00040004, | ||
109 | + RPI_FWREQ_FRAMEBUFFER_GET_DEPTH = 0x00040005, | ||
110 | + RPI_FWREQ_FRAMEBUFFER_GET_PIXEL_ORDER = 0x00040006, | ||
111 | + RPI_FWREQ_FRAMEBUFFER_GET_ALPHA_MODE = 0x00040007, | ||
112 | + RPI_FWREQ_FRAMEBUFFER_GET_PITCH = 0x00040008, | ||
113 | + RPI_FWREQ_FRAMEBUFFER_GET_VIRTUAL_OFFSET = 0x00040009, | ||
114 | + RPI_FWREQ_FRAMEBUFFER_GET_OVERSCAN = 0x0004000a, | ||
115 | + RPI_FWREQ_FRAMEBUFFER_GET_PALETTE = 0x0004000b, | ||
116 | + RPI_FWREQ_FRAMEBUFFER_GET_LAYER = 0x0004000c, | ||
117 | + RPI_FWREQ_FRAMEBUFFER_GET_TRANSFORM = 0x0004000d, | ||
118 | + RPI_FWREQ_FRAMEBUFFER_GET_VSYNC = 0x0004000e, | ||
119 | + RPI_FWREQ_FRAMEBUFFER_GET_TOUCHBUF = 0x0004000f, | ||
120 | + RPI_FWREQ_FRAMEBUFFER_GET_GPIOVIRTBUF = 0x00040010, | ||
121 | + RPI_FWREQ_FRAMEBUFFER_RELEASE = 0x00048001, | ||
122 | + RPI_FWREQ_FRAMEBUFFER_GET_DISPLAY_ID = 0x00040016, | ||
123 | + RPI_FWREQ_FRAMEBUFFER_SET_DISPLAY_NUM = 0x00048013, | ||
124 | + RPI_FWREQ_FRAMEBUFFER_GET_NUM_DISPLAYS = 0x00040013, | ||
125 | + RPI_FWREQ_FRAMEBUFFER_GET_DISPLAY_SETTINGS = 0x00040014, | ||
126 | + RPI_FWREQ_FRAMEBUFFER_TEST_PHYSICAL_WIDTH_HEIGHT = 0x00044003, | ||
127 | + RPI_FWREQ_FRAMEBUFFER_TEST_VIRTUAL_WIDTH_HEIGHT = 0x00044004, | ||
128 | + RPI_FWREQ_FRAMEBUFFER_TEST_DEPTH = 0x00044005, | ||
129 | + RPI_FWREQ_FRAMEBUFFER_TEST_PIXEL_ORDER = 0x00044006, | ||
130 | + RPI_FWREQ_FRAMEBUFFER_TEST_ALPHA_MODE = 0x00044007, | ||
131 | + RPI_FWREQ_FRAMEBUFFER_TEST_VIRTUAL_OFFSET = 0x00044009, | ||
132 | + RPI_FWREQ_FRAMEBUFFER_TEST_OVERSCAN = 0x0004400a, | ||
133 | + RPI_FWREQ_FRAMEBUFFER_TEST_PALETTE = 0x0004400b, | ||
134 | + RPI_FWREQ_FRAMEBUFFER_TEST_LAYER = 0x0004400c, | ||
135 | + RPI_FWREQ_FRAMEBUFFER_TEST_TRANSFORM = 0x0004400d, | ||
136 | + RPI_FWREQ_FRAMEBUFFER_TEST_VSYNC = 0x0004400e, | ||
137 | + RPI_FWREQ_FRAMEBUFFER_SET_PHYSICAL_WIDTH_HEIGHT = 0x00048003, | ||
138 | + RPI_FWREQ_FRAMEBUFFER_SET_VIRTUAL_WIDTH_HEIGHT = 0x00048004, | ||
139 | + RPI_FWREQ_FRAMEBUFFER_SET_DEPTH = 0x00048005, | ||
140 | + RPI_FWREQ_FRAMEBUFFER_SET_PIXEL_ORDER = 0x00048006, | ||
141 | + RPI_FWREQ_FRAMEBUFFER_SET_ALPHA_MODE = 0x00048007, | ||
142 | + RPI_FWREQ_FRAMEBUFFER_SET_PITCH = 0x00048008, | ||
143 | + RPI_FWREQ_FRAMEBUFFER_SET_VIRTUAL_OFFSET = 0x00048009, | ||
144 | + RPI_FWREQ_FRAMEBUFFER_SET_OVERSCAN = 0x0004800a, | ||
145 | + RPI_FWREQ_FRAMEBUFFER_SET_PALETTE = 0x0004800b, | ||
146 | + | ||
147 | + RPI_FWREQ_FRAMEBUFFER_SET_TOUCHBUF = 0x0004801f, | ||
148 | + RPI_FWREQ_FRAMEBUFFER_SET_GPIOVIRTBUF = 0x00048020, | ||
149 | + RPI_FWREQ_FRAMEBUFFER_SET_VSYNC = 0x0004800e, | ||
150 | + RPI_FWREQ_FRAMEBUFFER_SET_LAYER = 0x0004800c, | ||
151 | + RPI_FWREQ_FRAMEBUFFER_SET_TRANSFORM = 0x0004800d, | ||
152 | + RPI_FWREQ_FRAMEBUFFER_SET_BACKLIGHT = 0x0004800f, | ||
153 | + | ||
154 | + RPI_FWREQ_VCHIQ_INIT = 0x00048010, | ||
155 | + | ||
156 | + RPI_FWREQ_SET_PLANE = 0x00048015, | ||
157 | + RPI_FWREQ_GET_DISPLAY_TIMING = 0x00040017, | ||
158 | + RPI_FWREQ_SET_TIMING = 0x00048017, | ||
159 | + RPI_FWREQ_GET_DISPLAY_CFG = 0x00040018, | ||
160 | + RPI_FWREQ_SET_DISPLAY_POWER = 0x00048019, | ||
161 | + RPI_FWREQ_GET_COMMAND_LINE = 0x00050001, | ||
162 | + RPI_FWREQ_GET_DMA_CHANNELS = 0x00060001, | ||
163 | +}; | ||
164 | + | ||
165 | +enum rpi_firmware_clk_id { | ||
166 | + RPI_FIRMWARE_EMMC_CLK_ID = 1, | ||
167 | + RPI_FIRMWARE_UART_CLK_ID, | ||
168 | + RPI_FIRMWARE_ARM_CLK_ID, | ||
169 | + RPI_FIRMWARE_CORE_CLK_ID, | ||
170 | + RPI_FIRMWARE_V3D_CLK_ID, | ||
171 | + RPI_FIRMWARE_H264_CLK_ID, | ||
172 | + RPI_FIRMWARE_ISP_CLK_ID, | ||
173 | + RPI_FIRMWARE_SDRAM_CLK_ID, | ||
174 | + RPI_FIRMWARE_PIXEL_CLK_ID, | ||
175 | + RPI_FIRMWARE_PWM_CLK_ID, | ||
176 | + RPI_FIRMWARE_HEVC_CLK_ID, | ||
177 | + RPI_FIRMWARE_EMMC2_CLK_ID, | ||
178 | + RPI_FIRMWARE_M2MC_CLK_ID, | ||
179 | + RPI_FIRMWARE_PIXEL_BVB_CLK_ID, | ||
180 | + RPI_FIRMWARE_VEC_CLK_ID, | ||
181 | + RPI_FIRMWARE_NUM_CLK_ID, | ||
182 | +}; | ||
183 | + | ||
184 | +#endif /* INCLUDE_HW_MISC_RASPBERRYPI_FW_DEFS_H_ */ | ||
136 | -- | 185 | -- |
137 | 2.25.1 | 186 | 2.34.1 |
187 | |||
188 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | Mark these as a non-streaming instructions, which should trap | ||
4 | if full a64 support is not enabled in streaming mode. | ||
5 | |||
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-id: 20220708151540.18136-7-richard.henderson@linaro.org | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | --- | ||
11 | target/arm/sme-fa64.decode | 3 --- | ||
12 | target/arm/translate-sve.c | 22 ++++++++++++---------- | ||
13 | 2 files changed, 12 insertions(+), 13 deletions(-) | ||
14 | |||
15 | diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/arm/sme-fa64.decode | ||
18 | +++ b/target/arm/sme-fa64.decode | ||
19 | @@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS | ||
20 | # --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset) | ||
21 | # --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm) | ||
22 | |||
23 | -FAIL 0000 0100 --1- ---- 1011 -0-- ---- ---- # FTSSEL, FEXPA | ||
24 | -FAIL 0000 0101 --10 0001 100- ---- ---- ---- # COMPACT | ||
25 | -FAIL 0100 0101 --0- ---- 1011 ---- ---- ---- # BDEP, BEXT, BGRP | ||
26 | FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result) | ||
27 | FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA | ||
28 | FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL | ||
29 | diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c | ||
30 | index XXXXXXX..XXXXXXX 100644 | ||
31 | --- a/target/arm/translate-sve.c | ||
32 | +++ b/target/arm/translate-sve.c | ||
33 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_2 * const fexpa_fns[4] = { | ||
34 | NULL, gen_helper_sve_fexpa_h, | ||
35 | gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d, | ||
36 | }; | ||
37 | -TRANS_FEAT(FEXPA, aa64_sve, gen_gvec_ool_zz, | ||
38 | - fexpa_fns[a->esz], a->rd, a->rn, 0) | ||
39 | +TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz, | ||
40 | + fexpa_fns[a->esz], a->rd, a->rn, 0) | ||
41 | |||
42 | static gen_helper_gvec_3 * const ftssel_fns[4] = { | ||
43 | NULL, gen_helper_sve_ftssel_h, | ||
44 | gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d, | ||
45 | }; | ||
46 | -TRANS_FEAT(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, ftssel_fns[a->esz], a, 0) | ||
47 | +TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, | ||
48 | + ftssel_fns[a->esz], a, 0) | ||
49 | |||
50 | /* | ||
51 | *** SVE Predicate Logical Operations Group | ||
52 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, | ||
53 | static gen_helper_gvec_3 * const compact_fns[4] = { | ||
54 | NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d | ||
55 | }; | ||
56 | -TRANS_FEAT(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, compact_fns[a->esz], a, 0) | ||
57 | +TRANS_FEAT_NONSTREAMING(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, | ||
58 | + compact_fns[a->esz], a, 0) | ||
59 | |||
60 | /* Call the helper that computes the ARM LastActiveElement pseudocode | ||
61 | * function, scaled by the element size. This includes the not found | ||
62 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3 * const bext_fns[4] = { | ||
63 | gen_helper_sve2_bext_b, gen_helper_sve2_bext_h, | ||
64 | gen_helper_sve2_bext_s, gen_helper_sve2_bext_d, | ||
65 | }; | ||
66 | -TRANS_FEAT(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, | ||
67 | - bext_fns[a->esz], a, 0) | ||
68 | +TRANS_FEAT_NONSTREAMING(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, | ||
69 | + bext_fns[a->esz], a, 0) | ||
70 | |||
71 | static gen_helper_gvec_3 * const bdep_fns[4] = { | ||
72 | gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h, | ||
73 | gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d, | ||
74 | }; | ||
75 | -TRANS_FEAT(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, | ||
76 | - bdep_fns[a->esz], a, 0) | ||
77 | +TRANS_FEAT_NONSTREAMING(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, | ||
78 | + bdep_fns[a->esz], a, 0) | ||
79 | |||
80 | static gen_helper_gvec_3 * const bgrp_fns[4] = { | ||
81 | gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h, | ||
82 | gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d, | ||
83 | }; | ||
84 | -TRANS_FEAT(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, | ||
85 | - bgrp_fns[a->esz], a, 0) | ||
86 | +TRANS_FEAT_NONSTREAMING(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, | ||
87 | + bgrp_fns[a->esz], a, 0) | ||
88 | |||
89 | static gen_helper_gvec_3 * const cadd_fns[4] = { | ||
90 | gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h, | ||
91 | -- | ||
92 | 2.25.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | Mark these as a non-streaming instructions, which should trap | ||
4 | if full a64 support is not enabled in streaming mode. | ||
5 | |||
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-id: 20220708151540.18136-8-richard.henderson@linaro.org | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | --- | ||
11 | target/arm/sme-fa64.decode | 2 -- | ||
12 | target/arm/translate-sve.c | 24 +++++++++++++++--------- | ||
13 | 2 files changed, 15 insertions(+), 11 deletions(-) | ||
14 | |||
15 | diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/arm/sme-fa64.decode | ||
18 | +++ b/target/arm/sme-fa64.decode | ||
19 | @@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS | ||
20 | # --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset) | ||
21 | # --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm) | ||
22 | |||
23 | -FAIL 0100 0101 000- ---- 0110 1--- ---- ---- # PMULLB, PMULLT (128b result) | ||
24 | -FAIL 0110 0100 --1- ---- 1110 01-- ---- ---- # FMMLA, BFMMLA | ||
25 | FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL | ||
26 | FAIL 0110 0101 --01 0--- 100- ---- ---- ---- # FTMAD | ||
27 | FAIL 0110 0101 --01 1--- 001- ---- ---- ---- # FADDA | ||
28 | diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/target/arm/translate-sve.c | ||
31 | +++ b/target/arm/translate-sve.c | ||
32 | @@ -XXX,XX +XXX,XX @@ static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel) | ||
33 | gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h, | ||
34 | NULL, gen_helper_sve2_pmull_d, | ||
35 | }; | ||
36 | - if (a->esz == 0 | ||
37 | - ? !dc_isar_feature(aa64_sve2_pmull128, s) | ||
38 | - : !dc_isar_feature(aa64_sve, s)) { | ||
39 | + | ||
40 | + if (a->esz == 0) { | ||
41 | + if (!dc_isar_feature(aa64_sve2_pmull128, s)) { | ||
42 | + return false; | ||
43 | + } | ||
44 | + s->is_nonstreaming = true; | ||
45 | + } else if (!dc_isar_feature(aa64_sve, s)) { | ||
46 | return false; | ||
47 | } | ||
48 | return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel); | ||
49 | @@ -XXX,XX +XXX,XX @@ DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz) | ||
50 | * SVE Integer Multiply-Add (unpredicated) | ||
51 | */ | ||
52 | |||
53 | -TRANS_FEAT(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_s, | ||
54 | - a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR) | ||
55 | -TRANS_FEAT(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_d, | ||
56 | - a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR) | ||
57 | +TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, | ||
58 | + gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra, | ||
59 | + 0, FPST_FPCR) | ||
60 | +TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, | ||
61 | + gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra, | ||
62 | + 0, FPST_FPCR) | ||
63 | |||
64 | static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = { | ||
65 | NULL, gen_helper_sve2_sqdmlal_zzzw_h, | ||
66 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz, | ||
67 | TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz, | ||
68 | gen_helper_gvec_bfdot_idx, a) | ||
69 | |||
70 | -TRANS_FEAT(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz, | ||
71 | - gen_helper_gvec_bfmmla, a, 0) | ||
72 | +TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz, | ||
73 | + gen_helper_gvec_bfmmla, a, 0) | ||
74 | |||
75 | static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) | ||
76 | { | ||
77 | -- | ||
78 | 2.25.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | Mark these as a non-streaming instructions, which should trap | ||
4 | if full a64 support is not enabled in streaming mode. | ||
5 | |||
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-id: 20220708151540.18136-9-richard.henderson@linaro.org | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | --- | ||
11 | target/arm/sme-fa64.decode | 3 --- | ||
12 | target/arm/translate-sve.c | 15 +++++++++++---- | ||
13 | 2 files changed, 11 insertions(+), 7 deletions(-) | ||
14 | |||
15 | diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/arm/sme-fa64.decode | ||
18 | +++ b/target/arm/sme-fa64.decode | ||
19 | @@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS | ||
20 | # --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset) | ||
21 | # --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm) | ||
22 | |||
23 | -FAIL 0110 0101 --0- ---- 0000 11-- ---- ---- # FTSMUL | ||
24 | -FAIL 0110 0101 --01 0--- 100- ---- ---- ---- # FTMAD | ||
25 | -FAIL 0110 0101 --01 1--- 001- ---- ---- ---- # FADDA | ||
26 | FAIL 0100 0101 --0- ---- 1001 10-- ---- ---- # SMMLA, UMMLA, USMMLA | ||
27 | FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions | ||
28 | FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar) | ||
29 | diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c | ||
30 | index XXXXXXX..XXXXXXX 100644 | ||
31 | --- a/target/arm/translate-sve.c | ||
32 | +++ b/target/arm/translate-sve.c | ||
33 | @@ -XXX,XX +XXX,XX @@ static gen_helper_gvec_3_ptr * const ftmad_fns[4] = { | ||
34 | NULL, gen_helper_sve_ftmad_h, | ||
35 | gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d, | ||
36 | }; | ||
37 | -TRANS_FEAT(FTMAD, aa64_sve, gen_gvec_fpst_zzz, | ||
38 | - ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm, | ||
39 | - a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) | ||
40 | +TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz, | ||
41 | + ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm, | ||
42 | + a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) | ||
43 | |||
44 | /* | ||
45 | *** SVE Floating Point Accumulating Reduction Group | ||
46 | @@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) | ||
47 | if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { | ||
48 | return false; | ||
49 | } | ||
50 | + s->is_nonstreaming = true; | ||
51 | if (!sve_access_check(s)) { | ||
52 | return true; | ||
53 | } | ||
54 | @@ -XXX,XX +XXX,XX @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) | ||
55 | DO_FP3(FADD_zzz, fadd) | ||
56 | DO_FP3(FSUB_zzz, fsub) | ||
57 | DO_FP3(FMUL_zzz, fmul) | ||
58 | -DO_FP3(FTSMUL, ftsmul) | ||
59 | DO_FP3(FRECPS, recps) | ||
60 | DO_FP3(FRSQRTS, rsqrts) | ||
61 | |||
62 | #undef DO_FP3 | ||
63 | |||
64 | +static gen_helper_gvec_3_ptr * const ftsmul_fns[4] = { | ||
65 | + NULL, gen_helper_gvec_ftsmul_h, | ||
66 | + gen_helper_gvec_ftsmul_s, gen_helper_gvec_ftsmul_d | ||
67 | +}; | ||
68 | +TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz, | ||
69 | + ftsmul_fns[a->esz], a, 0) | ||
70 | + | ||
71 | /* | ||
72 | *** SVE Floating Point Arithmetic - Predicated Group | ||
73 | */ | ||
74 | -- | ||
75 | 2.25.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | Mark these as a non-streaming instructions, which should trap | ||
4 | if full a64 support is not enabled in streaming mode. | ||
5 | |||
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-id: 20220708151540.18136-10-richard.henderson@linaro.org | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | --- | ||
11 | target/arm/sme-fa64.decode | 1 - | ||
12 | target/arm/translate-sve.c | 12 ++++++------ | ||
13 | 2 files changed, 6 insertions(+), 7 deletions(-) | ||
14 | |||
15 | diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/arm/sme-fa64.decode | ||
18 | +++ b/target/arm/sme-fa64.decode | ||
19 | @@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS | ||
20 | # --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset) | ||
21 | # --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm) | ||
22 | |||
23 | -FAIL 0100 0101 --0- ---- 1001 10-- ---- ---- # SMMLA, UMMLA, USMMLA | ||
24 | FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions | ||
25 | FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar) | ||
26 | FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm) | ||
27 | diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/target/arm/translate-sve.c | ||
30 | +++ b/target/arm/translate-sve.c | ||
31 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, true) | ||
32 | TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false) | ||
33 | TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true) | ||
34 | |||
35 | -TRANS_FEAT(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, | ||
36 | - gen_helper_gvec_smmla_b, a, 0) | ||
37 | -TRANS_FEAT(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, | ||
38 | - gen_helper_gvec_usmmla_b, a, 0) | ||
39 | -TRANS_FEAT(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, | ||
40 | - gen_helper_gvec_ummla_b, a, 0) | ||
41 | +TRANS_FEAT_NONSTREAMING(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, | ||
42 | + gen_helper_gvec_smmla_b, a, 0) | ||
43 | +TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, | ||
44 | + gen_helper_gvec_usmmla_b, a, 0) | ||
45 | +TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, | ||
46 | + gen_helper_gvec_ummla_b, a, 0) | ||
47 | |||
48 | TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz, | ||
49 | gen_helper_gvec_bfdot, a, 0) | ||
50 | -- | ||
51 | 2.25.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | Mark these as non-streaming instructions, which should trap | ||
4 | if full a64 support is not enabled in streaming mode. | ||
5 | |||
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-id: 20220708151540.18136-11-richard.henderson@linaro.org | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | --- | ||
11 | target/arm/sme-fa64.decode | 1 - | ||
12 | target/arm/translate-sve.c | 35 ++++++++++++++++++----------------- | ||
13 | 2 files changed, 18 insertions(+), 18 deletions(-) | ||
14 | |||
15 | diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/arm/sme-fa64.decode | ||
18 | +++ b/target/arm/sme-fa64.decode | ||
19 | @@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS | ||
20 | # --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset) | ||
21 | # --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm) | ||
22 | |||
23 | -FAIL 0100 0101 --1- ---- 1--- ---- ---- ---- # SVE2 string/histo/crypto instructions | ||
24 | FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar) | ||
25 | FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm) | ||
26 | FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector) | ||
27 | diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/target/arm/translate-sve.c | ||
30 | +++ b/target/arm/translate-sve.c | ||
31 | @@ -XXX,XX +XXX,XX @@ DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt) | ||
32 | static gen_helper_gvec_flags_4 * const match_fns[4] = { | ||
33 | gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL | ||
34 | }; | ||
35 | -TRANS_FEAT(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz]) | ||
36 | +TRANS_FEAT_NONSTREAMING(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz]) | ||
37 | |||
38 | static gen_helper_gvec_flags_4 * const nmatch_fns[4] = { | ||
39 | gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL | ||
40 | }; | ||
41 | -TRANS_FEAT(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz]) | ||
42 | +TRANS_FEAT_NONSTREAMING(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz]) | ||
43 | |||
44 | static gen_helper_gvec_4 * const histcnt_fns[4] = { | ||
45 | NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d | ||
46 | }; | ||
47 | -TRANS_FEAT(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz, | ||
48 | - histcnt_fns[a->esz], a, 0) | ||
49 | +TRANS_FEAT_NONSTREAMING(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz, | ||
50 | + histcnt_fns[a->esz], a, 0) | ||
51 | |||
52 | -TRANS_FEAT(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz, | ||
53 | - a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0) | ||
54 | +TRANS_FEAT_NONSTREAMING(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz, | ||
55 | + a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0) | ||
56 | |||
57 | DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz) | ||
58 | DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz) | ||
59 | @@ -XXX,XX +XXX,XX @@ TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz, | ||
60 | TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, | ||
61 | a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0) | ||
62 | |||
63 | -TRANS_FEAT(AESMC, aa64_sve2_aes, gen_gvec_ool_zz, | ||
64 | - gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt) | ||
65 | +TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz, | ||
66 | + gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt) | ||
67 | |||
68 | -TRANS_FEAT(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz, | ||
69 | - gen_helper_crypto_aese, a, false) | ||
70 | -TRANS_FEAT(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz, | ||
71 | - gen_helper_crypto_aese, a, true) | ||
72 | +TRANS_FEAT_NONSTREAMING(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz, | ||
73 | + gen_helper_crypto_aese, a, false) | ||
74 | +TRANS_FEAT_NONSTREAMING(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz, | ||
75 | + gen_helper_crypto_aese, a, true) | ||
76 | |||
77 | -TRANS_FEAT(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz, | ||
78 | - gen_helper_crypto_sm4e, a, 0) | ||
79 | -TRANS_FEAT(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz, | ||
80 | - gen_helper_crypto_sm4ekey, a, 0) | ||
81 | +TRANS_FEAT_NONSTREAMING(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz, | ||
82 | + gen_helper_crypto_sm4e, a, 0) | ||
83 | +TRANS_FEAT_NONSTREAMING(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz, | ||
84 | + gen_helper_crypto_sm4ekey, a, 0) | ||
85 | |||
86 | -TRANS_FEAT(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, gen_gvec_rax1, a) | ||
87 | +TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, | ||
88 | + gen_gvec_rax1, a) | ||
89 | |||
90 | TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz, | ||
91 | gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR) | ||
92 | -- | ||
93 | 2.25.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | Mark these as a non-streaming instructions, which should trap | ||
4 | if full a64 support is not enabled in streaming mode. | ||
5 | |||
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-id: 20220708151540.18136-12-richard.henderson@linaro.org | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | --- | ||
11 | target/arm/sme-fa64.decode | 9 --------- | ||
12 | target/arm/translate-sve.c | 6 ++++++ | ||
13 | 2 files changed, 6 insertions(+), 9 deletions(-) | ||
14 | |||
15 | diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/arm/sme-fa64.decode | ||
18 | +++ b/target/arm/sme-fa64.decode | ||
19 | @@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS | ||
20 | # --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset) | ||
21 | # --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm) | ||
22 | |||
23 | -FAIL 1000 010- -00- ---- 10-- ---- ---- ---- # SVE2 32-bit gather NT load (vector+scalar) | ||
24 | FAIL 1000 010- -00- ---- 111- ---- ---- ---- # SVE 32-bit gather prefetch (vector+imm) | ||
25 | FAIL 1000 0100 0-1- ---- 0--- ---- ---- ---- # SVE 32-bit gather prefetch (scalar+vector) | ||
26 | -FAIL 1000 010- -01- ---- 1--- ---- ---- ---- # SVE 32-bit gather load (vector+imm) | ||
27 | -FAIL 1000 0100 0-0- ---- 0--- ---- ---- ---- # SVE 32-bit gather load byte (scalar+vector) | ||
28 | -FAIL 1000 0100 1--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load half (scalar+vector) | ||
29 | -FAIL 1000 0101 0--- ---- 0--- ---- ---- ---- # SVE 32-bit gather load word (scalar+vector) | ||
30 | FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar) | ||
31 | FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm) | ||
32 | FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar) | ||
33 | FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm) | ||
34 | FAIL 1100 010- ---- ---- ---- ---- ---- ---- # SVE 64-bit gather load/prefetch | ||
35 | -FAIL 1110 010- -00- ---- 001- ---- ---- ---- # SVE2 64-bit scatter NT store (vector+scalar) | ||
36 | -FAIL 1110 010- -10- ---- 001- ---- ---- ---- # SVE2 32-bit scatter NT store (vector+scalar) | ||
37 | -FAIL 1110 010- ---- ---- 1-0- ---- ---- ---- # SVE scatter store (scalar+32-bit vector) | ||
38 | -FAIL 1110 010- ---- ---- 101- ---- ---- ---- # SVE scatter store (misc) | ||
39 | diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c | ||
40 | index XXXXXXX..XXXXXXX 100644 | ||
41 | --- a/target/arm/translate-sve.c | ||
42 | +++ b/target/arm/translate-sve.c | ||
43 | @@ -XXX,XX +XXX,XX @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a) | ||
44 | if (!dc_isar_feature(aa64_sve, s)) { | ||
45 | return false; | ||
46 | } | ||
47 | + s->is_nonstreaming = true; | ||
48 | if (!sve_access_check(s)) { | ||
49 | return true; | ||
50 | } | ||
51 | @@ -XXX,XX +XXX,XX @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a) | ||
52 | if (!dc_isar_feature(aa64_sve, s)) { | ||
53 | return false; | ||
54 | } | ||
55 | + s->is_nonstreaming = true; | ||
56 | if (!sve_access_check(s)) { | ||
57 | return true; | ||
58 | } | ||
59 | @@ -XXX,XX +XXX,XX @@ static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a) | ||
60 | if (!dc_isar_feature(aa64_sve2, s)) { | ||
61 | return false; | ||
62 | } | ||
63 | + s->is_nonstreaming = true; | ||
64 | if (!sve_access_check(s)) { | ||
65 | return true; | ||
66 | } | ||
67 | @@ -XXX,XX +XXX,XX @@ static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a) | ||
68 | if (!dc_isar_feature(aa64_sve, s)) { | ||
69 | return false; | ||
70 | } | ||
71 | + s->is_nonstreaming = true; | ||
72 | if (!sve_access_check(s)) { | ||
73 | return true; | ||
74 | } | ||
75 | @@ -XXX,XX +XXX,XX @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a) | ||
76 | if (!dc_isar_feature(aa64_sve, s)) { | ||
77 | return false; | ||
78 | } | ||
79 | + s->is_nonstreaming = true; | ||
80 | if (!sve_access_check(s)) { | ||
81 | return true; | ||
82 | } | ||
83 | @@ -XXX,XX +XXX,XX @@ static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a) | ||
84 | if (!dc_isar_feature(aa64_sve2, s)) { | ||
85 | return false; | ||
86 | } | ||
87 | + s->is_nonstreaming = true; | ||
88 | if (!sve_access_check(s)) { | ||
89 | return true; | ||
90 | } | ||
91 | -- | ||
92 | 2.25.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | Mark these as a non-streaming instructions, which should trap | ||
4 | if full a64 support is not enabled in streaming mode. | ||
5 | |||
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-id: 20220708151540.18136-14-richard.henderson@linaro.org | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | --- | ||
11 | target/arm/sme-fa64.decode | 2 -- | ||
12 | target/arm/translate-sve.c | 2 ++ | ||
13 | 2 files changed, 2 insertions(+), 2 deletions(-) | ||
14 | |||
15 | diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/arm/sme-fa64.decode | ||
18 | +++ b/target/arm/sme-fa64.decode | ||
19 | @@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS | ||
20 | # --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset) | ||
21 | # --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm) | ||
22 | |||
23 | -FAIL 1010 010- ---- ---- 011- ---- ---- ---- # SVE contiguous FF load (scalar+scalar) | ||
24 | -FAIL 1010 010- ---1 ---- 101- ---- ---- ---- # SVE contiguous NF load (scalar+imm) | ||
25 | FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar) | ||
26 | FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm) | ||
27 | diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/target/arm/translate-sve.c | ||
30 | +++ b/target/arm/translate-sve.c | ||
31 | @@ -XXX,XX +XXX,XX @@ static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a) | ||
32 | if (!dc_isar_feature(aa64_sve, s)) { | ||
33 | return false; | ||
34 | } | ||
35 | + s->is_nonstreaming = true; | ||
36 | if (sve_access_check(s)) { | ||
37 | TCGv_i64 addr = new_tmp_a64(s); | ||
38 | tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); | ||
39 | @@ -XXX,XX +XXX,XX @@ static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a) | ||
40 | if (!dc_isar_feature(aa64_sve, s)) { | ||
41 | return false; | ||
42 | } | ||
43 | + s->is_nonstreaming = true; | ||
44 | if (sve_access_check(s)) { | ||
45 | int vsz = vec_full_reg_size(s); | ||
46 | int elements = vsz >> dtype_esz[a->dtype]; | ||
47 | -- | ||
48 | 2.25.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | Mark these as a non-streaming instructions, which should trap | ||
4 | if full a64 support is not enabled in streaming mode. | ||
5 | |||
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-id: 20220708151540.18136-15-richard.henderson@linaro.org | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | --- | ||
11 | target/arm/sme-fa64.decode | 3 --- | ||
12 | target/arm/translate-sve.c | 2 ++ | ||
13 | 2 files changed, 2 insertions(+), 3 deletions(-) | ||
14 | |||
15 | diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/target/arm/sme-fa64.decode | ||
18 | +++ b/target/arm/sme-fa64.decode | ||
19 | @@ -XXX,XX +XXX,XX @@ FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS | ||
20 | # --11 1100 --0- ---- ---- ---- ---- ---- # Load/store FP register (unscaled imm) | ||
21 | # --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset) | ||
22 | # --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm) | ||
23 | - | ||
24 | -FAIL 1010 010- -01- ---- 000- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+scalar) | ||
25 | -FAIL 1010 010- -010 ---- 001- ---- ---- ---- # SVE load & replicate 32 bytes (scalar+imm) | ||
26 | diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c | ||
27 | index XXXXXXX..XXXXXXX 100644 | ||
28 | --- a/target/arm/translate-sve.c | ||
29 | +++ b/target/arm/translate-sve.c | ||
30 | @@ -XXX,XX +XXX,XX @@ static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a) | ||
31 | if (a->rm == 31) { | ||
32 | return false; | ||
33 | } | ||
34 | + s->is_nonstreaming = true; | ||
35 | if (sve_access_check(s)) { | ||
36 | TCGv_i64 addr = new_tmp_a64(s); | ||
37 | tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); | ||
38 | @@ -XXX,XX +XXX,XX @@ static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a) | ||
39 | if (!dc_isar_feature(aa64_sve_f64mm, s)) { | ||
40 | return false; | ||
41 | } | ||
42 | + s->is_nonstreaming = true; | ||
43 | if (sve_access_check(s)) { | ||
44 | TCGv_i64 addr = new_tmp_a64(s); | ||
45 | tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32); | ||
46 | -- | ||
47 | 2.25.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Message-id: 20220708151540.18136-19-richard.henderson@linaro.org | ||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | --- | ||
8 | target/arm/helper-sme.h | 2 ++ | ||
9 | target/arm/sme.decode | 4 ++++ | ||
10 | target/arm/sme_helper.c | 25 +++++++++++++++++++++++++ | ||
11 | target/arm/translate-sme.c | 13 +++++++++++++ | ||
12 | 4 files changed, 44 insertions(+) | ||
13 | |||
14 | diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/target/arm/helper-sme.h | ||
17 | +++ b/target/arm/helper-sme.h | ||
18 | @@ -XXX,XX +XXX,XX @@ | ||
19 | |||
20 | DEF_HELPER_FLAGS_2(set_pstate_sm, TCG_CALL_NO_RWG, void, env, i32) | ||
21 | DEF_HELPER_FLAGS_2(set_pstate_za, TCG_CALL_NO_RWG, void, env, i32) | ||
22 | + | ||
23 | +DEF_HELPER_FLAGS_3(sme_zero, TCG_CALL_NO_RWG, void, env, i32, i32) | ||
24 | diff --git a/target/arm/sme.decode b/target/arm/sme.decode | ||
25 | index XXXXXXX..XXXXXXX 100644 | ||
26 | --- a/target/arm/sme.decode | ||
27 | +++ b/target/arm/sme.decode | ||
28 | @@ -XXX,XX +XXX,XX @@ | ||
29 | # | ||
30 | # This file is processed by scripts/decodetree.py | ||
31 | # | ||
32 | + | ||
33 | +### SME Misc | ||
34 | + | ||
35 | +ZERO 11000000 00 001 00000000000 imm:8 | ||
36 | diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/target/arm/sme_helper.c | ||
39 | +++ b/target/arm/sme_helper.c | ||
40 | @@ -XXX,XX +XXX,XX @@ void helper_set_pstate_za(CPUARMState *env, uint32_t i) | ||
41 | memset(env->zarray, 0, sizeof(env->zarray)); | ||
42 | } | ||
43 | } | ||
44 | + | ||
45 | +void helper_sme_zero(CPUARMState *env, uint32_t imm, uint32_t svl) | ||
46 | +{ | ||
47 | + uint32_t i; | ||
48 | + | ||
49 | + /* | ||
50 | + * Special case clearing the entire ZA space. | ||
51 | + * This falls into the CONSTRAINED UNPREDICTABLE zeroing of any | ||
52 | + * parts of the ZA storage outside of SVL. | ||
53 | + */ | ||
54 | + if (imm == 0xff) { | ||
55 | + memset(env->zarray, 0, sizeof(env->zarray)); | ||
56 | + return; | ||
57 | + } | ||
58 | + | ||
59 | + /* | ||
60 | + * Recall that ZAnH.D[m] is spread across ZA[n+8*m], | ||
61 | + * so each row is discontiguous within ZA[]. | ||
62 | + */ | ||
63 | + for (i = 0; i < svl; i++) { | ||
64 | + if (imm & (1 << (i % 8))) { | ||
65 | + memset(&env->zarray[i], 0, svl); | ||
66 | + } | ||
67 | + } | ||
68 | +} | ||
69 | diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c | ||
70 | index XXXXXXX..XXXXXXX 100644 | ||
71 | --- a/target/arm/translate-sme.c | ||
72 | +++ b/target/arm/translate-sme.c | ||
73 | @@ -XXX,XX +XXX,XX @@ | ||
74 | */ | ||
75 | |||
76 | #include "decode-sme.c.inc" | ||
77 | + | ||
78 | + | ||
79 | +static bool trans_ZERO(DisasContext *s, arg_ZERO *a) | ||
80 | +{ | ||
81 | + if (!dc_isar_feature(aa64_sme, s)) { | ||
82 | + return false; | ||
83 | + } | ||
84 | + if (sme_za_enabled_check(s)) { | ||
85 | + gen_helper_sme_zero(cpu_env, tcg_constant_i32(a->imm), | ||
86 | + tcg_constant_i32(streaming_vec_reg_size(s))); | ||
87 | + } | ||
88 | + return true; | ||
89 | +} | ||
90 | -- | ||
91 | 2.25.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | Message-id: 20220708151540.18136-35-richard.henderson@linaro.org | ||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | --- | ||
8 | linux-user/aarch64/cpu_loop.c | 9 +++++++++ | ||
9 | 1 file changed, 9 insertions(+) | ||
10 | |||
11 | diff --git a/linux-user/aarch64/cpu_loop.c b/linux-user/aarch64/cpu_loop.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/linux-user/aarch64/cpu_loop.c | ||
14 | +++ b/linux-user/aarch64/cpu_loop.c | ||
15 | @@ -XXX,XX +XXX,XX @@ void cpu_loop(CPUARMState *env) | ||
16 | |||
17 | switch (trapnr) { | ||
18 | case EXCP_SWI: | ||
19 | + /* | ||
20 | + * On syscall, PSTATE.ZA is preserved, along with the ZA matrix. | ||
21 | + * PSTATE.SM is cleared, per SMSTOP, which does ResetSVEState. | ||
22 | + */ | ||
23 | + if (FIELD_EX64(env->svcr, SVCR, SM)) { | ||
24 | + env->svcr = FIELD_DP64(env->svcr, SVCR, SM, 0); | ||
25 | + arm_rebuild_hflags(env); | ||
26 | + arm_reset_sve_state(env); | ||
27 | + } | ||
28 | ret = do_syscall(env, | ||
29 | env->xregs[8], | ||
30 | env->xregs[0], | ||
31 | -- | ||
32 | 2.25.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <richard.henderson@linaro.org> | ||
2 | 1 | ||
3 | Make sure to zero the currently reserved fields. | ||
4 | |||
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Message-id: 20220708151540.18136-36-richard.henderson@linaro.org | ||
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
9 | --- | ||
10 | linux-user/aarch64/signal.c | 9 ++++++++- | ||
11 | 1 file changed, 8 insertions(+), 1 deletion(-) | ||
12 | |||
13 | diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/linux-user/aarch64/signal.c | ||
16 | +++ b/linux-user/aarch64/signal.c | ||
17 | @@ -XXX,XX +XXX,XX @@ struct target_extra_context { | ||
18 | struct target_sve_context { | ||
19 | struct target_aarch64_ctx head; | ||
20 | uint16_t vl; | ||
21 | - uint16_t reserved[3]; | ||
22 | + uint16_t flags; | ||
23 | + uint16_t reserved[2]; | ||
24 | /* The actual SVE data immediately follows. It is laid out | ||
25 | * according to TARGET_SVE_SIG_{Z,P}REG_OFFSET, based off of | ||
26 | * the original struct pointer. | ||
27 | @@ -XXX,XX +XXX,XX @@ struct target_sve_context { | ||
28 | #define TARGET_SVE_SIG_CONTEXT_SIZE(VQ) \ | ||
29 | (TARGET_SVE_SIG_PREG_OFFSET(VQ, 17)) | ||
30 | |||
31 | +#define TARGET_SVE_SIG_FLAG_SM 1 | ||
32 | + | ||
33 | struct target_rt_sigframe { | ||
34 | struct target_siginfo info; | ||
35 | struct target_ucontext uc; | ||
36 | @@ -XXX,XX +XXX,XX @@ static void target_setup_sve_record(struct target_sve_context *sve, | ||
37 | { | ||
38 | int i, j; | ||
39 | |||
40 | + memset(sve, 0, sizeof(*sve)); | ||
41 | __put_user(TARGET_SVE_MAGIC, &sve->head.magic); | ||
42 | __put_user(size, &sve->head.size); | ||
43 | __put_user(vq * TARGET_SVE_VQ_BYTES, &sve->vl); | ||
44 | + if (FIELD_EX64(env->svcr, SVCR, SM)) { | ||
45 | + __put_user(TARGET_SVE_SIG_FLAG_SM, &sve->flags); | ||
46 | + } | ||
47 | |||
48 | /* Note that SVE regs are stored as a byte stream, with each byte element | ||
49 | * at a subsequent address. This corresponds to a little-endian store | ||
50 | -- | ||
51 | 2.25.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | From: Sergey Kambalin <sergey.kambalin@auriga.com> |
---|---|---|---|
2 | 2 | ||
3 | Add "sve" to the sve prctl functions, to distinguish | 3 | Replace magic property values by a proper definition, |
4 | them from the coming "sme" prctls with similar names. | 4 | removing redundant comments. |
5 | 5 | ||
6 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 6 | Signed-off-by: Sergey Kambalin <sergey.kambalin@auriga.com> |
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 7 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
8 | Message-id: 20220708151540.18136-42-richard.henderson@linaro.org | 8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
9 | Message-id: 20230612223456.33824-3-philmd@linaro.org | ||
10 | Message-Id: <20230531155258.8361-1-sergey.kambalin@auriga.com> | ||
11 | [PMD: Split from bigger patch: 2/4] | ||
12 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
10 | --- | 14 | --- |
11 | linux-user/aarch64/target_prctl.h | 8 ++++---- | 15 | hw/misc/bcm2835_property.c | 101 +++++++++++++++++++------------------ |
12 | linux-user/syscall.c | 12 ++++++------ | 16 | 1 file changed, 51 insertions(+), 50 deletions(-) |
13 | 2 files changed, 10 insertions(+), 10 deletions(-) | 17 | |
14 | 18 | diff --git a/hw/misc/bcm2835_property.c b/hw/misc/bcm2835_property.c | |
15 | diff --git a/linux-user/aarch64/target_prctl.h b/linux-user/aarch64/target_prctl.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/linux-user/aarch64/target_prctl.h | 20 | --- a/hw/misc/bcm2835_property.c |
18 | +++ b/linux-user/aarch64/target_prctl.h | 21 | +++ b/hw/misc/bcm2835_property.c |
19 | @@ -XXX,XX +XXX,XX @@ | 22 | @@ -XXX,XX +XXX,XX @@ |
20 | #ifndef AARCH64_TARGET_PRCTL_H | 23 | #include "migration/vmstate.h" |
21 | #define AARCH64_TARGET_PRCTL_H | 24 | #include "hw/irq.h" |
22 | 25 | #include "hw/misc/bcm2835_mbox_defs.h" | |
23 | -static abi_long do_prctl_get_vl(CPUArchState *env) | 26 | +#include "hw/misc/raspberrypi-fw-defs.h" |
24 | +static abi_long do_prctl_sve_get_vl(CPUArchState *env) | 27 | #include "sysemu/dma.h" |
25 | { | 28 | #include "qemu/log.h" |
26 | ARMCPU *cpu = env_archcpu(env); | 29 | #include "qemu/module.h" |
27 | if (cpu_isar_feature(aa64_sve, cpu)) { | 30 | @@ -XXX,XX +XXX,XX @@ static void bcm2835_property_mbox_push(BCM2835PropertyState *s, uint32_t value) |
28 | @@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_get_vl(CPUArchState *env) | 31 | /* @(value + 8) : Request/response indicator */ |
29 | } | 32 | resplen = 0; |
30 | return -TARGET_EINVAL; | 33 | switch (tag) { |
31 | } | 34 | - case 0x00000000: /* End tag */ |
32 | -#define do_prctl_get_vl do_prctl_get_vl | 35 | + case RPI_FWREQ_PROPERTY_END: |
33 | +#define do_prctl_sve_get_vl do_prctl_sve_get_vl | 36 | break; |
34 | 37 | - case 0x00000001: /* Get firmware revision */ | |
35 | -static abi_long do_prctl_set_vl(CPUArchState *env, abi_long arg2) | 38 | + case RPI_FWREQ_GET_FIRMWARE_REVISION: |
36 | +static abi_long do_prctl_sve_set_vl(CPUArchState *env, abi_long arg2) | 39 | stl_le_phys(&s->dma_as, value + 12, 346337); |
37 | { | 40 | resplen = 4; |
38 | /* | 41 | break; |
39 | * We cannot support either PR_SVE_SET_VL_ONEXEC or PR_SVE_VL_INHERIT. | 42 | - case 0x00010001: /* Get board model */ |
40 | @@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_set_vl(CPUArchState *env, abi_long arg2) | 43 | + case RPI_FWREQ_GET_BOARD_MODEL: |
41 | } | 44 | qemu_log_mask(LOG_UNIMP, |
42 | return -TARGET_EINVAL; | 45 | "bcm2835_property: 0x%08x get board model NYI\n", |
43 | } | 46 | tag); |
44 | -#define do_prctl_set_vl do_prctl_set_vl | 47 | resplen = 4; |
45 | +#define do_prctl_sve_set_vl do_prctl_sve_set_vl | 48 | break; |
46 | 49 | - case 0x00010002: /* Get board revision */ | |
47 | static abi_long do_prctl_reset_keys(CPUArchState *env, abi_long arg2) | 50 | + case RPI_FWREQ_GET_BOARD_REVISION: |
48 | { | 51 | stl_le_phys(&s->dma_as, value + 12, s->board_rev); |
49 | diff --git a/linux-user/syscall.c b/linux-user/syscall.c | 52 | resplen = 4; |
50 | index XXXXXXX..XXXXXXX 100644 | 53 | break; |
51 | --- a/linux-user/syscall.c | 54 | - case 0x00010003: /* Get board MAC address */ |
52 | +++ b/linux-user/syscall.c | 55 | + case RPI_FWREQ_GET_BOARD_MAC_ADDRESS: |
53 | @@ -XXX,XX +XXX,XX @@ static abi_long do_prctl_inval1(CPUArchState *env, abi_long arg2) | 56 | resplen = sizeof(s->macaddr.a); |
54 | #ifndef do_prctl_set_fp_mode | 57 | dma_memory_write(&s->dma_as, value + 12, s->macaddr.a, resplen, |
55 | #define do_prctl_set_fp_mode do_prctl_inval1 | 58 | MEMTXATTRS_UNSPECIFIED); |
56 | #endif | 59 | break; |
57 | -#ifndef do_prctl_get_vl | 60 | - case 0x00010004: /* Get board serial */ |
58 | -#define do_prctl_get_vl do_prctl_inval0 | 61 | + case RPI_FWREQ_GET_BOARD_SERIAL: |
59 | +#ifndef do_prctl_sve_get_vl | 62 | qemu_log_mask(LOG_UNIMP, |
60 | +#define do_prctl_sve_get_vl do_prctl_inval0 | 63 | "bcm2835_property: 0x%08x get board serial NYI\n", |
61 | #endif | 64 | tag); |
62 | -#ifndef do_prctl_set_vl | 65 | resplen = 8; |
63 | -#define do_prctl_set_vl do_prctl_inval1 | 66 | break; |
64 | +#ifndef do_prctl_sve_set_vl | 67 | - case 0x00010005: /* Get ARM memory */ |
65 | +#define do_prctl_sve_set_vl do_prctl_inval1 | 68 | + case RPI_FWREQ_GET_ARM_MEMORY: |
66 | #endif | 69 | /* base */ |
67 | #ifndef do_prctl_reset_keys | 70 | stl_le_phys(&s->dma_as, value + 12, 0); |
68 | #define do_prctl_reset_keys do_prctl_inval1 | 71 | /* size */ |
69 | @@ -XXX,XX +XXX,XX @@ static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2, | 72 | stl_le_phys(&s->dma_as, value + 16, s->fbdev->vcram_base); |
70 | case PR_SET_FP_MODE: | 73 | resplen = 8; |
71 | return do_prctl_set_fp_mode(env, arg2); | 74 | break; |
72 | case PR_SVE_GET_VL: | 75 | - case 0x00010006: /* Get VC memory */ |
73 | - return do_prctl_get_vl(env); | 76 | + case RPI_FWREQ_GET_VC_MEMORY: |
74 | + return do_prctl_sve_get_vl(env); | 77 | /* base */ |
75 | case PR_SVE_SET_VL: | 78 | stl_le_phys(&s->dma_as, value + 12, s->fbdev->vcram_base); |
76 | - return do_prctl_set_vl(env, arg2); | 79 | /* size */ |
77 | + return do_prctl_sve_set_vl(env, arg2); | 80 | stl_le_phys(&s->dma_as, value + 16, s->fbdev->vcram_size); |
78 | case PR_PAC_RESET_KEYS: | 81 | resplen = 8; |
79 | if (arg3 || arg4 || arg5) { | 82 | break; |
80 | return -TARGET_EINVAL; | 83 | - case 0x00028001: /* Set power state */ |
84 | + case RPI_FWREQ_SET_POWER_STATE: | ||
85 | /* Assume that whatever device they asked for exists, | ||
86 | * and we'll just claim we set it to the desired state | ||
87 | */ | ||
88 | @@ -XXX,XX +XXX,XX @@ static void bcm2835_property_mbox_push(BCM2835PropertyState *s, uint32_t value) | ||
89 | |||
90 | /* Clocks */ | ||
91 | |||
92 | - case 0x00030001: /* Get clock state */ | ||
93 | + case RPI_FWREQ_GET_CLOCK_STATE: | ||
94 | stl_le_phys(&s->dma_as, value + 16, 0x1); | ||
95 | resplen = 8; | ||
96 | break; | ||
97 | |||
98 | - case 0x00038001: /* Set clock state */ | ||
99 | + case RPI_FWREQ_SET_CLOCK_STATE: | ||
100 | qemu_log_mask(LOG_UNIMP, | ||
101 | "bcm2835_property: 0x%08x set clock state NYI\n", | ||
102 | tag); | ||
103 | resplen = 8; | ||
104 | break; | ||
105 | |||
106 | - case 0x00030002: /* Get clock rate */ | ||
107 | - case 0x00030004: /* Get max clock rate */ | ||
108 | - case 0x00030007: /* Get min clock rate */ | ||
109 | + case RPI_FWREQ_GET_CLOCK_RATE: | ||
110 | + case RPI_FWREQ_GET_MAX_CLOCK_RATE: | ||
111 | + case RPI_FWREQ_GET_MIN_CLOCK_RATE: | ||
112 | switch (ldl_le_phys(&s->dma_as, value + 12)) { | ||
113 | - case 1: /* EMMC */ | ||
114 | + case RPI_FIRMWARE_EMMC_CLK_ID: | ||
115 | stl_le_phys(&s->dma_as, value + 16, 50000000); | ||
116 | break; | ||
117 | - case 2: /* UART */ | ||
118 | + case RPI_FIRMWARE_UART_CLK_ID: | ||
119 | stl_le_phys(&s->dma_as, value + 16, 3000000); | ||
120 | break; | ||
121 | default: | ||
122 | @@ -XXX,XX +XXX,XX @@ static void bcm2835_property_mbox_push(BCM2835PropertyState *s, uint32_t value) | ||
123 | resplen = 8; | ||
124 | break; | ||
125 | |||
126 | - case 0x00038002: /* Set clock rate */ | ||
127 | - case 0x00038004: /* Set max clock rate */ | ||
128 | - case 0x00038007: /* Set min clock rate */ | ||
129 | + case RPI_FWREQ_SET_CLOCK_RATE: | ||
130 | + case RPI_FWREQ_SET_MAX_CLOCK_RATE: | ||
131 | + case RPI_FWREQ_SET_MIN_CLOCK_RATE: | ||
132 | qemu_log_mask(LOG_UNIMP, | ||
133 | "bcm2835_property: 0x%08x set clock rate NYI\n", | ||
134 | tag); | ||
135 | @@ -XXX,XX +XXX,XX @@ static void bcm2835_property_mbox_push(BCM2835PropertyState *s, uint32_t value) | ||
136 | |||
137 | /* Temperature */ | ||
138 | |||
139 | - case 0x00030006: /* Get temperature */ | ||
140 | + case RPI_FWREQ_GET_TEMPERATURE: | ||
141 | stl_le_phys(&s->dma_as, value + 16, 25000); | ||
142 | resplen = 8; | ||
143 | break; | ||
144 | |||
145 | - case 0x0003000A: /* Get max temperature */ | ||
146 | + case RPI_FWREQ_GET_MAX_TEMPERATURE: | ||
147 | stl_le_phys(&s->dma_as, value + 16, 99000); | ||
148 | resplen = 8; | ||
149 | break; | ||
150 | |||
151 | /* Frame buffer */ | ||
152 | |||
153 | - case 0x00040001: /* Allocate buffer */ | ||
154 | + case RPI_FWREQ_FRAMEBUFFER_ALLOCATE: | ||
155 | stl_le_phys(&s->dma_as, value + 12, fbconfig.base); | ||
156 | stl_le_phys(&s->dma_as, value + 16, | ||
157 | bcm2835_fb_get_size(&fbconfig)); | ||
158 | resplen = 8; | ||
159 | break; | ||
160 | - case 0x00048001: /* Release buffer */ | ||
161 | + case RPI_FWREQ_FRAMEBUFFER_RELEASE: | ||
162 | resplen = 0; | ||
163 | break; | ||
164 | - case 0x00040002: /* Blank screen */ | ||
165 | + case RPI_FWREQ_FRAMEBUFFER_BLANK: | ||
166 | resplen = 4; | ||
167 | break; | ||
168 | - case 0x00044003: /* Test physical display width/height */ | ||
169 | - case 0x00044004: /* Test virtual display width/height */ | ||
170 | + case RPI_FWREQ_FRAMEBUFFER_TEST_PHYSICAL_WIDTH_HEIGHT: | ||
171 | + case RPI_FWREQ_FRAMEBUFFER_TEST_VIRTUAL_WIDTH_HEIGHT: | ||
172 | resplen = 8; | ||
173 | break; | ||
174 | - case 0x00048003: /* Set physical display width/height */ | ||
175 | + case RPI_FWREQ_FRAMEBUFFER_SET_PHYSICAL_WIDTH_HEIGHT: | ||
176 | fbconfig.xres = ldl_le_phys(&s->dma_as, value + 12); | ||
177 | fbconfig.yres = ldl_le_phys(&s->dma_as, value + 16); | ||
178 | bcm2835_fb_validate_config(&fbconfig); | ||
179 | fbconfig_updated = true; | ||
180 | /* fall through */ | ||
181 | - case 0x00040003: /* Get physical display width/height */ | ||
182 | + case RPI_FWREQ_FRAMEBUFFER_GET_PHYSICAL_WIDTH_HEIGHT: | ||
183 | stl_le_phys(&s->dma_as, value + 12, fbconfig.xres); | ||
184 | stl_le_phys(&s->dma_as, value + 16, fbconfig.yres); | ||
185 | resplen = 8; | ||
186 | break; | ||
187 | - case 0x00048004: /* Set virtual display width/height */ | ||
188 | + case RPI_FWREQ_FRAMEBUFFER_SET_VIRTUAL_WIDTH_HEIGHT: | ||
189 | fbconfig.xres_virtual = ldl_le_phys(&s->dma_as, value + 12); | ||
190 | fbconfig.yres_virtual = ldl_le_phys(&s->dma_as, value + 16); | ||
191 | bcm2835_fb_validate_config(&fbconfig); | ||
192 | fbconfig_updated = true; | ||
193 | /* fall through */ | ||
194 | - case 0x00040004: /* Get virtual display width/height */ | ||
195 | + case RPI_FWREQ_FRAMEBUFFER_GET_VIRTUAL_WIDTH_HEIGHT: | ||
196 | stl_le_phys(&s->dma_as, value + 12, fbconfig.xres_virtual); | ||
197 | stl_le_phys(&s->dma_as, value + 16, fbconfig.yres_virtual); | ||
198 | resplen = 8; | ||
199 | break; | ||
200 | - case 0x00044005: /* Test depth */ | ||
201 | + case RPI_FWREQ_FRAMEBUFFER_TEST_DEPTH: | ||
202 | resplen = 4; | ||
203 | break; | ||
204 | - case 0x00048005: /* Set depth */ | ||
205 | + case RPI_FWREQ_FRAMEBUFFER_SET_DEPTH: | ||
206 | fbconfig.bpp = ldl_le_phys(&s->dma_as, value + 12); | ||
207 | bcm2835_fb_validate_config(&fbconfig); | ||
208 | fbconfig_updated = true; | ||
209 | /* fall through */ | ||
210 | - case 0x00040005: /* Get depth */ | ||
211 | + case RPI_FWREQ_FRAMEBUFFER_GET_DEPTH: | ||
212 | stl_le_phys(&s->dma_as, value + 12, fbconfig.bpp); | ||
213 | resplen = 4; | ||
214 | break; | ||
215 | - case 0x00044006: /* Test pixel order */ | ||
216 | + case RPI_FWREQ_FRAMEBUFFER_TEST_PIXEL_ORDER: | ||
217 | resplen = 4; | ||
218 | break; | ||
219 | - case 0x00048006: /* Set pixel order */ | ||
220 | + case RPI_FWREQ_FRAMEBUFFER_SET_PIXEL_ORDER: | ||
221 | fbconfig.pixo = ldl_le_phys(&s->dma_as, value + 12); | ||
222 | bcm2835_fb_validate_config(&fbconfig); | ||
223 | fbconfig_updated = true; | ||
224 | /* fall through */ | ||
225 | - case 0x00040006: /* Get pixel order */ | ||
226 | + case RPI_FWREQ_FRAMEBUFFER_GET_PIXEL_ORDER: | ||
227 | stl_le_phys(&s->dma_as, value + 12, fbconfig.pixo); | ||
228 | resplen = 4; | ||
229 | break; | ||
230 | - case 0x00044007: /* Test pixel alpha */ | ||
231 | + case RPI_FWREQ_FRAMEBUFFER_TEST_ALPHA_MODE: | ||
232 | resplen = 4; | ||
233 | break; | ||
234 | - case 0x00048007: /* Set alpha */ | ||
235 | + case RPI_FWREQ_FRAMEBUFFER_SET_ALPHA_MODE: | ||
236 | fbconfig.alpha = ldl_le_phys(&s->dma_as, value + 12); | ||
237 | bcm2835_fb_validate_config(&fbconfig); | ||
238 | fbconfig_updated = true; | ||
239 | /* fall through */ | ||
240 | - case 0x00040007: /* Get alpha */ | ||
241 | + case RPI_FWREQ_FRAMEBUFFER_GET_ALPHA_MODE: | ||
242 | stl_le_phys(&s->dma_as, value + 12, fbconfig.alpha); | ||
243 | resplen = 4; | ||
244 | break; | ||
245 | - case 0x00040008: /* Get pitch */ | ||
246 | + case RPI_FWREQ_FRAMEBUFFER_GET_PITCH: | ||
247 | stl_le_phys(&s->dma_as, value + 12, | ||
248 | bcm2835_fb_get_pitch(&fbconfig)); | ||
249 | resplen = 4; | ||
250 | break; | ||
251 | - case 0x00044009: /* Test virtual offset */ | ||
252 | + case RPI_FWREQ_FRAMEBUFFER_TEST_VIRTUAL_OFFSET: | ||
253 | resplen = 8; | ||
254 | break; | ||
255 | - case 0x00048009: /* Set virtual offset */ | ||
256 | + case RPI_FWREQ_FRAMEBUFFER_SET_VIRTUAL_OFFSET: | ||
257 | fbconfig.xoffset = ldl_le_phys(&s->dma_as, value + 12); | ||
258 | fbconfig.yoffset = ldl_le_phys(&s->dma_as, value + 16); | ||
259 | bcm2835_fb_validate_config(&fbconfig); | ||
260 | fbconfig_updated = true; | ||
261 | /* fall through */ | ||
262 | - case 0x00040009: /* Get virtual offset */ | ||
263 | + case RPI_FWREQ_FRAMEBUFFER_GET_VIRTUAL_OFFSET: | ||
264 | stl_le_phys(&s->dma_as, value + 12, fbconfig.xoffset); | ||
265 | stl_le_phys(&s->dma_as, value + 16, fbconfig.yoffset); | ||
266 | resplen = 8; | ||
267 | break; | ||
268 | - case 0x0004000a: /* Get/Test/Set overscan */ | ||
269 | - case 0x0004400a: | ||
270 | - case 0x0004800a: | ||
271 | + case RPI_FWREQ_FRAMEBUFFER_GET_OVERSCAN: | ||
272 | + case RPI_FWREQ_FRAMEBUFFER_TEST_OVERSCAN: | ||
273 | + case RPI_FWREQ_FRAMEBUFFER_SET_OVERSCAN: | ||
274 | stl_le_phys(&s->dma_as, value + 12, 0); | ||
275 | stl_le_phys(&s->dma_as, value + 16, 0); | ||
276 | stl_le_phys(&s->dma_as, value + 20, 0); | ||
277 | stl_le_phys(&s->dma_as, value + 24, 0); | ||
278 | resplen = 16; | ||
279 | break; | ||
280 | - case 0x0004800b: /* Set palette */ | ||
281 | + case RPI_FWREQ_FRAMEBUFFER_SET_PALETTE: | ||
282 | offset = ldl_le_phys(&s->dma_as, value + 12); | ||
283 | length = ldl_le_phys(&s->dma_as, value + 16); | ||
284 | n = 0; | ||
285 | @@ -XXX,XX +XXX,XX @@ static void bcm2835_property_mbox_push(BCM2835PropertyState *s, uint32_t value) | ||
286 | stl_le_phys(&s->dma_as, value + 12, 0); | ||
287 | resplen = 4; | ||
288 | break; | ||
289 | - case 0x00040013: /* Get number of displays */ | ||
290 | + case RPI_FWREQ_FRAMEBUFFER_GET_NUM_DISPLAYS: | ||
291 | stl_le_phys(&s->dma_as, value + 12, 1); | ||
292 | resplen = 4; | ||
293 | break; | ||
294 | |||
295 | - case 0x00060001: /* Get DMA channels */ | ||
296 | + case RPI_FWREQ_GET_DMA_CHANNELS: | ||
297 | /* channels 2-5 */ | ||
298 | stl_le_phys(&s->dma_as, value + 12, 0x003C); | ||
299 | resplen = 4; | ||
300 | break; | ||
301 | |||
302 | - case 0x00050001: /* Get command line */ | ||
303 | + case RPI_FWREQ_GET_COMMAND_LINE: | ||
304 | /* | ||
305 | * We follow the firmware behaviour: no NUL terminator is | ||
306 | * written to the buffer, and if the buffer is too short | ||
81 | -- | 307 | -- |
82 | 2.25.1 | 308 | 2.34.1 |
309 | |||
310 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | From: Sergey Kambalin <sergey.kambalin@auriga.com> |
---|---|---|---|
2 | 2 | ||
3 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 3 | Signed-off-by: Sergey Kambalin <sergey.kambalin@auriga.com> |
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 4 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
5 | Message-id: 20220708151540.18136-39-richard.henderson@linaro.org | 5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
6 | Message-id: 20230612223456.33824-4-philmd@linaro.org | ||
7 | Message-Id: <20230531155258.8361-1-sergey.kambalin@auriga.com> | ||
8 | [PMD: Split from bigger patch: 4/4] | ||
9 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
7 | --- | 11 | --- |
8 | linux-user/aarch64/signal.c | 3 +++ | 12 | include/hw/arm/raspi_platform.h | 5 +++++ |
9 | 1 file changed, 3 insertions(+) | 13 | hw/misc/bcm2835_property.c | 8 +++++--- |
14 | 2 files changed, 10 insertions(+), 3 deletions(-) | ||
10 | 15 | ||
11 | diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c | 16 | diff --git a/include/hw/arm/raspi_platform.h b/include/hw/arm/raspi_platform.h |
12 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/linux-user/aarch64/signal.c | 18 | --- a/include/hw/arm/raspi_platform.h |
14 | +++ b/linux-user/aarch64/signal.c | 19 | +++ b/include/hw/arm/raspi_platform.h |
15 | @@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env, | 20 | @@ -XXX,XX +XXX,XX @@ |
16 | __get_user(extra_size, | 21 | #define INTERRUPT_ILLEGAL_TYPE0 6 |
17 | &((struct target_extra_context *)ctx)->size); | 22 | #define INTERRUPT_ILLEGAL_TYPE1 7 |
18 | extra = lock_user(VERIFY_READ, extra_datap, extra_size, 0); | 23 | |
19 | + if (!extra) { | 24 | +/* Clock rates */ |
20 | + return 1; | 25 | +#define RPI_FIRMWARE_EMMC_CLK_RATE 50000000 |
21 | + } | 26 | +#define RPI_FIRMWARE_UART_CLK_RATE 3000000 |
22 | break; | 27 | +#define RPI_FIRMWARE_DEFAULT_CLK_RATE 700000000 |
23 | 28 | + | |
24 | default: | 29 | #endif |
30 | diff --git a/hw/misc/bcm2835_property.c b/hw/misc/bcm2835_property.c | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/hw/misc/bcm2835_property.c | ||
33 | +++ b/hw/misc/bcm2835_property.c | ||
34 | @@ -XXX,XX +XXX,XX @@ | ||
35 | #include "qemu/log.h" | ||
36 | #include "qemu/module.h" | ||
37 | #include "trace.h" | ||
38 | +#include "hw/arm/raspi_platform.h" | ||
39 | |||
40 | /* https://github.com/raspberrypi/firmware/wiki/Mailbox-property-interface */ | ||
41 | |||
42 | @@ -XXX,XX +XXX,XX @@ static void bcm2835_property_mbox_push(BCM2835PropertyState *s, uint32_t value) | ||
43 | case RPI_FWREQ_GET_MIN_CLOCK_RATE: | ||
44 | switch (ldl_le_phys(&s->dma_as, value + 12)) { | ||
45 | case RPI_FIRMWARE_EMMC_CLK_ID: | ||
46 | - stl_le_phys(&s->dma_as, value + 16, 50000000); | ||
47 | + stl_le_phys(&s->dma_as, value + 16, RPI_FIRMWARE_EMMC_CLK_RATE); | ||
48 | break; | ||
49 | case RPI_FIRMWARE_UART_CLK_ID: | ||
50 | - stl_le_phys(&s->dma_as, value + 16, 3000000); | ||
51 | + stl_le_phys(&s->dma_as, value + 16, RPI_FIRMWARE_UART_CLK_RATE); | ||
52 | break; | ||
53 | default: | ||
54 | - stl_le_phys(&s->dma_as, value + 16, 700000000); | ||
55 | + stl_le_phys(&s->dma_as, value + 16, | ||
56 | + RPI_FIRMWARE_DEFAULT_CLK_RATE); | ||
57 | break; | ||
58 | } | ||
59 | resplen = 8; | ||
25 | -- | 60 | -- |
26 | 2.25.1 | 61 | 2.34.1 |
62 | |||
63 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | From: Sergey Kambalin <sergey.kambalin@auriga.com> |
---|---|---|---|
2 | 2 | ||
3 | In parse_user_sigframe, the kernel rejects duplicate sve records, | 3 | Signed-off-by: Sergey Kambalin <sergey.kambalin@auriga.com> |
4 | or records that are smaller than the header. We were silently | 4 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
5 | allowing these cases to pass, dropping the record. | 5 | Message-id: 20230612223456.33824-5-philmd@linaro.org |
6 | 6 | Message-Id: <20230531155258.8361-1-sergey.kambalin@auriga.com> | |
7 | [PMD: Split from bigger patch: 3/4] | ||
8 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
9 | [PMM: added a comment about RPI_FIRMWARE_CORE_CLK_RATE | ||
10 | really being SoC-specific] | ||
7 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 11 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Message-id: 20220708151540.18136-38-richard.henderson@linaro.org | ||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
11 | --- | 13 | --- |
12 | linux-user/aarch64/signal.c | 5 ++++- | 14 | include/hw/arm/raspi_platform.h | 5 +++++ |
13 | 1 file changed, 4 insertions(+), 1 deletion(-) | 15 | hw/misc/bcm2835_property.c | 3 +++ |
16 | 2 files changed, 8 insertions(+) | ||
14 | 17 | ||
15 | diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c | 18 | diff --git a/include/hw/arm/raspi_platform.h b/include/hw/arm/raspi_platform.h |
16 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/linux-user/aarch64/signal.c | 20 | --- a/include/hw/arm/raspi_platform.h |
18 | +++ b/linux-user/aarch64/signal.c | 21 | +++ b/include/hw/arm/raspi_platform.h |
19 | @@ -XXX,XX +XXX,XX @@ static int target_restore_sigframe(CPUARMState *env, | 22 | @@ -XXX,XX +XXX,XX @@ |
20 | break; | 23 | /* Clock rates */ |
21 | 24 | #define RPI_FIRMWARE_EMMC_CLK_RATE 50000000 | |
22 | case TARGET_SVE_MAGIC: | 25 | #define RPI_FIRMWARE_UART_CLK_RATE 3000000 |
23 | + if (sve || size < sizeof(struct target_sve_context)) { | 26 | +/* |
24 | + goto err; | 27 | + * TODO: this is really SoC-specific; we might want to |
25 | + } | 28 | + * set it per-SoC if it turns out any guests care. |
26 | if (cpu_isar_feature(aa64_sve, env_archcpu(env))) { | 29 | + */ |
27 | vq = sve_vq(env); | 30 | +#define RPI_FIRMWARE_CORE_CLK_RATE 350000000 |
28 | sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16); | 31 | #define RPI_FIRMWARE_DEFAULT_CLK_RATE 700000000 |
29 | - if (!sve && size == sve_size) { | 32 | |
30 | + if (size == sve_size) { | 33 | #endif |
31 | sve = (struct target_sve_context *)ctx; | 34 | diff --git a/hw/misc/bcm2835_property.c b/hw/misc/bcm2835_property.c |
32 | break; | 35 | index XXXXXXX..XXXXXXX 100644 |
33 | } | 36 | --- a/hw/misc/bcm2835_property.c |
37 | +++ b/hw/misc/bcm2835_property.c | ||
38 | @@ -XXX,XX +XXX,XX @@ static void bcm2835_property_mbox_push(BCM2835PropertyState *s, uint32_t value) | ||
39 | case RPI_FIRMWARE_UART_CLK_ID: | ||
40 | stl_le_phys(&s->dma_as, value + 16, RPI_FIRMWARE_UART_CLK_RATE); | ||
41 | break; | ||
42 | + case RPI_FIRMWARE_CORE_CLK_ID: | ||
43 | + stl_le_phys(&s->dma_as, value + 16, RPI_FIRMWARE_CORE_CLK_RATE); | ||
44 | + break; | ||
45 | default: | ||
46 | stl_le_phys(&s->dma_as, value + 16, | ||
47 | RPI_FIRMWARE_DEFAULT_CLK_RATE); | ||
34 | -- | 48 | -- |
35 | 2.25.1 | 49 | 2.34.1 |
50 | |||
51 | diff view generated by jsdifflib |