1 | Hi; hopefully this is the last arm pullreq before softfreeze. | 1 | The following changes since commit 3214bec13d8d4c40f707d21d8350d04e4123ae97: |
---|---|---|---|
2 | There's a handful of miscellaneous bug fixes here, but the | ||
3 | bulk of the pullreq is Mostafa's implementation of 2-stage | ||
4 | translation in the SMMUv3. | ||
5 | 2 | ||
6 | thanks | 3 | Merge tag 'migration-20250110-pull-request' of https://gitlab.com/farosas/qemu into staging (2025-01-10 13:39:19 -0500) |
7 | -- PMM | ||
8 | |||
9 | The following changes since commit d74ec4d7dda6322bcc51d1b13ccbd993d3574795: | ||
10 | |||
11 | Merge tag 'pull-trivial-patches' of https://gitlab.com/mjt0k/qemu into staging (2024-07-18 10:07:23 +1000) | ||
12 | 4 | ||
13 | are available in the Git repository at: | 5 | are available in the Git repository at: |
14 | 6 | ||
15 | https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20240718 | 7 | https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20250113 |
16 | 8 | ||
17 | for you to fetch changes up to 30a1690f2402e6c1582d5b3ebcf7940bfe2fad4b: | 9 | for you to fetch changes up to 435d260e7ec5ff9c79e3e62f1d66ec82d2d691ae: |
18 | 10 | ||
19 | hvf: arm: Do not advance PC when raising an exception (2024-07-18 13:49:30 +0100) | 11 | docs/system/arm/virt: mention specific migration information (2025-01-13 12:35:35 +0000) |
20 | 12 | ||
21 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
22 | target-arm queue: | 14 | target-arm queue: |
23 | * Fix handling of LDAPR/STLR with negative offset | 15 | * hw/arm_sysctl: fix extracting 31th bit of val |
24 | * LDAPR should honour SCTLR_ELx.nAA | 16 | * hw/misc: cast rpm to uint64_t |
25 | * Use float_status copy in sme_fmopa_s | 17 | * tests/qtest/boot-serial-test: Improve ASM |
26 | * hw/display/bcm2835_fb: fix fb_use_offsets condition | 18 | * target/arm: Move minor arithmetic helpers out of helper.c |
27 | * hw/arm/smmuv3: Support and advertise nesting | 19 | * target/arm: change default pauth algorithm to impdef |
28 | * Use FPST_F16 for SME FMOPA (widening) | ||
29 | * tests/arm-cpu-features: Do not assume PMU availability | ||
30 | * hvf: arm: Do not advance PC when raising an exception | ||
31 | 20 | ||
32 | ---------------------------------------------------------------- | 21 | ---------------------------------------------------------------- |
33 | Akihiko Odaki (2): | 22 | Anastasia Belova (1): |
34 | tests/arm-cpu-features: Do not assume PMU availability | 23 | hw/arm_sysctl: fix extracting 31th bit of val |
35 | hvf: arm: Do not advance PC when raising an exception | ||
36 | |||
37 | Daniyal Khan (2): | ||
38 | target/arm: Use float_status copy in sme_fmopa_s | ||
39 | tests/tcg/aarch64: Add test cases for SME FMOPA (widening) | ||
40 | |||
41 | Mostafa Saleh (18): | ||
42 | hw/arm/smmu-common: Add missing size check for stage-1 | ||
43 | hw/arm/smmu: Fix IPA for stage-2 events | ||
44 | hw/arm/smmuv3: Fix encoding of CLASS in events | ||
45 | hw/arm/smmu: Use enum for SMMU stage | ||
46 | hw/arm/smmu: Split smmuv3_translate() | ||
47 | hw/arm/smmu: Consolidate ASID and VMID types | ||
48 | hw/arm/smmu: Introduce CACHED_ENTRY_TO_ADDR | ||
49 | hw/arm/smmuv3: Translate CD and TT using stage-2 table | ||
50 | hw/arm/smmu-common: Rework TLB lookup for nesting | ||
51 | hw/arm/smmu-common: Add support for nested TLB | ||
52 | hw/arm/smmu-common: Support nested translation | ||
53 | hw/arm/smmu: Support nesting in smmuv3_range_inval() | ||
54 | hw/arm/smmu: Introduce smmu_iotlb_inv_asid_vmid | ||
55 | hw/arm/smmu: Support nesting in the rest of commands | ||
56 | hw/arm/smmuv3: Support nested SMMUs in smmuv3_notify_iova() | ||
57 | hw/arm/smmuv3: Handle translation faults according to SMMUPTWEventInfo | ||
58 | hw/arm/smmuv3: Support and advertise nesting | ||
59 | hw/arm/smmu: Refactor SMMU OAS | ||
60 | 24 | ||
61 | Peter Maydell (2): | 25 | Peter Maydell (2): |
62 | target/arm: Fix handling of LDAPR/STLR with negative offset | 26 | target/arm: Move minor arithmetic helpers out of helper.c |
63 | target/arm: LDAPR should honour SCTLR_ELx.nAA | 27 | tests/tcg/aarch64: force qarma5 for pauth-3 test |
64 | 28 | ||
65 | Richard Henderson (1): | 29 | Philippe Mathieu-Daudé (4): |
66 | target/arm: Use FPST_F16 for SME FMOPA (widening) | 30 | tests/qtest/boot-serial-test: Improve ASM comments of PL011 tests |
31 | tests/qtest/boot-serial-test: Reduce for() loop in PL011 tests | ||
32 | tests/qtest/boot-serial-test: Reorder pair of instructions in PL011 test | ||
33 | tests/qtest/boot-serial-test: Initialize PL011 Control register | ||
67 | 34 | ||
68 | SamJakob (1): | 35 | Pierrick Bouvier (3): |
69 | hw/display/bcm2835_fb: fix fb_use_offsets condition | 36 | target/arm: add new property to select pauth-qarma5 |
37 | target/arm: change default pauth algorithm to impdef | ||
38 | docs/system/arm/virt: mention specific migration information | ||
70 | 39 | ||
71 | hw/arm/smmuv3-internal.h | 19 +- | 40 | Tigran Sogomonian (1): |
72 | include/hw/arm/smmu-common.h | 46 +++- | 41 | hw/misc: cast rpm to uint64_t |
73 | target/arm/tcg/a64.decode | 2 +- | 42 | |
74 | hw/arm/smmu-common.c | 312 ++++++++++++++++++++++--- | 43 | docs/system/arm/cpu-features.rst | 7 +- |
75 | hw/arm/smmuv3.c | 467 +++++++++++++++++++++++++------------- | 44 | docs/system/arm/virt.rst | 4 + |
76 | hw/display/bcm2835_fb.c | 2 +- | 45 | docs/system/introduction.rst | 2 +- |
77 | target/arm/hvf/hvf.c | 1 + | 46 | target/arm/cpu.h | 4 + |
78 | target/arm/tcg/sme_helper.c | 2 +- | 47 | hw/core/machine.c | 4 +- |
79 | target/arm/tcg/translate-a64.c | 2 +- | 48 | hw/misc/arm_sysctl.c | 2 +- |
80 | target/arm/tcg/translate-sme.c | 12 +- | 49 | hw/misc/npcm7xx_mft.c | 5 +- |
81 | tests/qtest/arm-cpu-features.c | 13 +- | 50 | target/arm/arm-qmp-cmds.c | 2 +- |
82 | tests/tcg/aarch64/sme-fmopa-1.c | 63 +++++ | 51 | target/arm/cpu.c | 2 + |
83 | tests/tcg/aarch64/sme-fmopa-2.c | 56 +++++ | 52 | target/arm/cpu64.c | 38 ++- |
84 | tests/tcg/aarch64/sme-fmopa-3.c | 63 +++++ | 53 | target/arm/helper.c | 285 ----------------------- |
85 | hw/arm/trace-events | 26 ++- | 54 | target/arm/tcg/arith_helper.c | 296 ++++++++++++++++++++++++ |
86 | tests/tcg/aarch64/Makefile.target | 5 +- | 55 | tests/qtest/arm-cpu-features.c | 15 +- |
87 | 16 files changed, 846 insertions(+), 245 deletions(-) | 56 | tests/qtest/boot-serial-test.c | 23 +- |
88 | create mode 100644 tests/tcg/aarch64/sme-fmopa-1.c | 57 | target/arm/{op_addsub.h => tcg/op_addsub.c.inc} | 0 |
89 | create mode 100644 tests/tcg/aarch64/sme-fmopa-2.c | 58 | target/arm/tcg/meson.build | 1 + |
90 | create mode 100644 tests/tcg/aarch64/sme-fmopa-3.c | 59 | tests/tcg/aarch64/Makefile.softmmu-target | 3 + |
60 | 17 files changed, 377 insertions(+), 316 deletions(-) | ||
61 | create mode 100644 target/arm/tcg/arith_helper.c | ||
62 | rename target/arm/{op_addsub.h => tcg/op_addsub.c.inc} (100%) | ||
63 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | When we converted the LDAPR/STLR instructions to decodetree we | ||
2 | accidentally introduced a regression where the offset is negative. | ||
3 | The 9-bit immediate field is signed, and the old hand decoder | ||
4 | correctly used sextract32() to get it out of the insn word, | ||
5 | but the ldapr_stlr_i pattern in the decode file used "imm:9" | ||
6 | instead of "imm:s9", so it treated the field as unsigned. | ||
7 | 1 | ||
8 | Fix the pattern to treat the field as a signed immediate. | ||
9 | |||
10 | Cc: qemu-stable@nongnu.org | ||
11 | Fixes: 2521b6073b7 ("target/arm: Convert LDAPR/STLR (imm) to decodetree") | ||
12 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2419 | ||
13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
14 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
15 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
16 | Message-id: 20240709134504.3500007-2-peter.maydell@linaro.org | ||
17 | --- | ||
18 | target/arm/tcg/a64.decode | 2 +- | ||
19 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
20 | |||
21 | diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode | ||
22 | index XXXXXXX..XXXXXXX 100644 | ||
23 | --- a/target/arm/tcg/a64.decode | ||
24 | +++ b/target/arm/tcg/a64.decode | ||
25 | @@ -XXX,XX +XXX,XX @@ LDAPR sz:2 111 0 00 1 0 1 11111 1100 00 rn:5 rt:5 | ||
26 | LDRA 11 111 0 00 m:1 . 1 ......... w:1 1 rn:5 rt:5 imm=%ldra_imm | ||
27 | |||
28 | &ldapr_stlr_i rn rt imm sz sign ext | ||
29 | -@ldapr_stlr_i .. ...... .. . imm:9 .. rn:5 rt:5 &ldapr_stlr_i | ||
30 | +@ldapr_stlr_i .. ...... .. . imm:s9 .. rn:5 rt:5 &ldapr_stlr_i | ||
31 | STLR_i sz:2 011001 00 0 ......... 00 ..... ..... @ldapr_stlr_i sign=0 ext=0 | ||
32 | LDAPR_i sz:2 011001 01 0 ......... 00 ..... ..... @ldapr_stlr_i sign=0 ext=0 | ||
33 | LDAPR_i 00 011001 10 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=0 sz=0 | ||
34 | -- | ||
35 | 2.34.1 | ||
36 | |||
37 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | In commit c1a1f80518d360b when we added the FEAT_LSE2 relaxations to | ||
2 | the alignment requirements for atomic and ordered loads and stores, | ||
3 | we didn't quite get it right for LDAPR/LDAPRH/LDAPRB with no | ||
4 | immediate offset. These instructions were handled in the old decoder | ||
5 | as part of disas_ldst_atomic(), but unlike all the other insns that | ||
6 | function decoded (LDADD, LDCLR, etc) these insns are "ordered", not | ||
7 | "atomic", so they should be using check_ordered_align() rather than | ||
8 | check_atomic_align(). Commit c1a1f80518d360b used | ||
9 | check_atomic_align() regardless for everything in | ||
10 | disas_ldst_atomic(). We then carried that incorrect check over in | ||
11 | the decodetree conversion, where LDAPR/LDAPRH/LDAPRB are now handled | ||
12 | by trans_LDAPR(). | ||
13 | 1 | ||
14 | The effect is that when FEAT_LSE2 is implemented, these instructions | ||
15 | don't honour the SCTLR_ELx.nAA bit and will generate alignment | ||
16 | faults when they should not. | ||
17 | |||
18 | (The LDAPR insns with an immediate offset were in disas_ldst_ldapr_stlr() | ||
19 | and then in trans_LDAPR_i() and trans_STLR_i(), and have always used | ||
20 | the correct check_ordered_align().) | ||
21 | |||
22 | Use check_ordered_align() in trans_LDAPR(). | ||
23 | |||
24 | Cc: qemu-stable@nongnu.org | ||
25 | Fixes: c1a1f80518d360b ("target/arm: Relax ordered/atomic alignment checks for LSE2") | ||
26 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
27 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
28 | Message-id: 20240709134504.3500007-3-peter.maydell@linaro.org | ||
29 | --- | ||
30 | target/arm/tcg/translate-a64.c | 2 +- | ||
31 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
32 | |||
33 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
34 | index XXXXXXX..XXXXXXX 100644 | ||
35 | --- a/target/arm/tcg/translate-a64.c | ||
36 | +++ b/target/arm/tcg/translate-a64.c | ||
37 | @@ -XXX,XX +XXX,XX @@ static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a) | ||
38 | if (a->rn == 31) { | ||
39 | gen_check_sp_alignment(s); | ||
40 | } | ||
41 | - mop = check_atomic_align(s, a->rn, a->sz); | ||
42 | + mop = check_ordered_align(s, a->rn, 0, false, a->sz); | ||
43 | clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, | ||
44 | a->rn != 31, mop); | ||
45 | /* | ||
46 | -- | ||
47 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Daniyal Khan <danikhan632@gmail.com> | 1 | From: Anastasia Belova <abelova@astralinux.ru> |
---|---|---|---|
2 | 2 | ||
3 | We made a copy above because the fp exception flags | 3 | 1 << 31 is casted to uint64_t while bitwise and with val. |
4 | are not propagated back to the FPST register, but | 4 | So this value may become 0xffffffff80000000 but only |
5 | then failed to use the copy. | 5 | 31th "start" bit is required. |
6 | 6 | ||
7 | Cc: qemu-stable@nongnu.org | 7 | This is not possible in practice because the MemoryRegionOps |
8 | Fixes: 558e956c719 ("target/arm: Implement FMOPA, FMOPS (non-widening)") | 8 | uses the default max access size of 4 bytes and so none |
9 | Signed-off-by: Daniyal Khan <danikhan632@gmail.com> | 9 | of the upper bytes of val will be set, but the bitfield |
10 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 10 | extract API is clearer anyway. |
11 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | 11 | |
12 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | 12 | Use the bitfield extract() API instead. |
13 | Message-id: 20240717060149.204788-2-richard.henderson@linaro.org | 13 | |
14 | [rth: Split from a larger patch] | 14 | Found by Linux Verification Center (linuxtesting.org) with SVACE. |
15 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 15 | |
16 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | 16 | Signed-off-by: Anastasia Belova <abelova@astralinux.ru> |
17 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | 17 | Message-id: 20241220125429.7552-1-abelova@astralinux.ru |
18 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
19 | [PMM: add clarification to commit message] | ||
18 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 20 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
19 | --- | 21 | --- |
20 | target/arm/tcg/sme_helper.c | 2 +- | 22 | hw/misc/arm_sysctl.c | 2 +- |
21 | 1 file changed, 1 insertion(+), 1 deletion(-) | 23 | 1 file changed, 1 insertion(+), 1 deletion(-) |
22 | 24 | ||
23 | diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c | 25 | diff --git a/hw/misc/arm_sysctl.c b/hw/misc/arm_sysctl.c |
24 | index XXXXXXX..XXXXXXX 100644 | 26 | index XXXXXXX..XXXXXXX 100644 |
25 | --- a/target/arm/tcg/sme_helper.c | 27 | --- a/hw/misc/arm_sysctl.c |
26 | +++ b/target/arm/tcg/sme_helper.c | 28 | +++ b/hw/misc/arm_sysctl.c |
27 | @@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn, | 29 | @@ -XXX,XX +XXX,XX @@ static void arm_sysctl_write(void *opaque, hwaddr offset, |
28 | if (pb & 1) { | 30 | * as zero. |
29 | uint32_t *a = vza_row + H1_4(col); | 31 | */ |
30 | uint32_t *m = vzm + H1_4(col); | 32 | s->sys_cfgctrl = val & ~((3 << 18) | (1 << 31)); |
31 | - *a = float32_muladd(n, *m, *a, 0, vst); | 33 | - if (val & (1 << 31)) { |
32 | + *a = float32_muladd(n, *m, *a, 0, &fpst); | 34 | + if (extract64(val, 31, 1)) { |
33 | } | 35 | /* Start bit set -- actually do something */ |
34 | col += 4; | 36 | unsigned int dcc = extract32(s->sys_cfgctrl, 26, 4); |
35 | pb >>= 4; | 37 | unsigned int function = extract32(s->sys_cfgctrl, 20, 6); |
36 | -- | 38 | -- |
37 | 2.34.1 | 39 | 2.34.1 |
38 | |||
39 | diff view generated by jsdifflib |
1 | From: Mostafa Saleh <smostafa@google.com> | 1 | From: Tigran Sogomonian <tsogomonian@astralinux.ru> |
---|---|---|---|
2 | 2 | ||
3 | For the following events (ARM IHI 0070 F.b - 7.3 Event records): | 3 | The value of an arithmetic expression |
4 | - F_TRANSLATION | 4 | 'rpm * NPCM7XX_MFT_PULSE_PER_REVOLUTION' is a subject |
5 | - F_ACCESS | 5 | to overflow because its operands are not cast to |
6 | - F_PERMISSION | 6 | a larger data type before performing arithmetic. Thus, need |
7 | - F_ADDR_SIZE | 7 | to cast rpm to uint64_t. |
8 | 8 | ||
9 | If fault occurs at stage 2, S2 == 1 and: | 9 | Found by Linux Verification Center (linuxtesting.org) with SVACE. |
10 | - If translating an IPA for a transaction (whether by input to | ||
11 | stage 2-only configuration, or after successful stage 1 translation), | ||
12 | CLASS == IN, and IPA is provided. | ||
13 | 10 | ||
14 | At the moment only CLASS == IN is used which indicates input | 11 | Signed-off-by: Tigran Sogomonian <tsogomonian@astralinux.ru> |
15 | translation. | 12 | Reviewed-by: Patrick Leis <venture@google.com> |
16 | 13 | Reviewed-by: Hao Wu <wuhaotsh@google.com> | |
17 | However, this was not implemented correctly, as for stage 2, the code | 14 | Message-id: 20241226130311.1349-1-tsogomonian@astralinux.ru |
18 | only sets the S2 bit but not the IPA. | ||
19 | |||
20 | This field has the same bits as FetchAddr in F_WALK_EABT which is | ||
21 | populated correctly, so we don’t change that. | ||
22 | The setting of this field should be done from the walker as the IPA address | ||
23 | wouldn't be known in case of nesting. | ||
24 | |||
25 | For stage 1, the spec says: | ||
26 | If fault occurs at stage 1, S2 == 0 and: | ||
27 | CLASS == IN, IPA is UNKNOWN. | ||
28 | |||
29 | So, no need to set it to for stage 1, as ptw_info is initialised by zero in | ||
30 | smmuv3_translate(). | ||
31 | |||
32 | Fixes: e703f7076a “hw/arm/smmuv3: Add page table walk for stage-2” | ||
33 | Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org> | ||
34 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
35 | Signed-off-by: Mostafa Saleh <smostafa@google.com> | ||
36 | Message-id: 20240715084519.1189624-3-smostafa@google.com | ||
37 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
38 | --- | 16 | --- |
39 | hw/arm/smmu-common.c | 10 ++++++---- | 17 | hw/misc/npcm7xx_mft.c | 5 +++-- |
40 | hw/arm/smmuv3.c | 4 ++++ | 18 | 1 file changed, 3 insertions(+), 2 deletions(-) |
41 | 2 files changed, 10 insertions(+), 4 deletions(-) | ||
42 | 19 | ||
43 | diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c | 20 | diff --git a/hw/misc/npcm7xx_mft.c b/hw/misc/npcm7xx_mft.c |
44 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
45 | --- a/hw/arm/smmu-common.c | 22 | --- a/hw/misc/npcm7xx_mft.c |
46 | +++ b/hw/arm/smmu-common.c | 23 | +++ b/hw/misc/npcm7xx_mft.c |
47 | @@ -XXX,XX +XXX,XX @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg, | 24 | @@ -XXX,XX +XXX,XX @@ static NPCM7xxMFTCaptureState npcm7xx_mft_compute_cnt( |
48 | */ | 25 | * RPM = revolution/min. The time for one revlution (in ns) is |
49 | if (ipa >= (1ULL << inputsize)) { | 26 | * MINUTE_TO_NANOSECOND / RPM. |
50 | info->type = SMMU_PTW_ERR_TRANSLATION; | 27 | */ |
51 | - goto error; | 28 | - count = clock_ns_to_ticks(clock, (60 * NANOSECONDS_PER_SECOND) / |
52 | + goto error_ipa; | 29 | - (rpm * NPCM7XX_MFT_PULSE_PER_REVOLUTION)); |
30 | + count = clock_ns_to_ticks(clock, | ||
31 | + (uint64_t)(60 * NANOSECONDS_PER_SECOND) / | ||
32 | + ((uint64_t)rpm * NPCM7XX_MFT_PULSE_PER_REVOLUTION)); | ||
53 | } | 33 | } |
54 | 34 | ||
55 | while (level < VMSA_LEVELS) { | 35 | if (count > NPCM7XX_MFT_MAX_CNT) { |
56 | @@ -XXX,XX +XXX,XX @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg, | ||
57 | */ | ||
58 | if (!PTE_AF(pte) && !cfg->s2cfg.affd) { | ||
59 | info->type = SMMU_PTW_ERR_ACCESS; | ||
60 | - goto error; | ||
61 | + goto error_ipa; | ||
62 | } | ||
63 | |||
64 | s2ap = PTE_AP(pte); | ||
65 | if (is_permission_fault_s2(s2ap, perm)) { | ||
66 | info->type = SMMU_PTW_ERR_PERMISSION; | ||
67 | - goto error; | ||
68 | + goto error_ipa; | ||
69 | } | ||
70 | |||
71 | /* | ||
72 | @@ -XXX,XX +XXX,XX @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg, | ||
73 | */ | ||
74 | if (gpa >= (1ULL << cfg->s2cfg.eff_ps)) { | ||
75 | info->type = SMMU_PTW_ERR_ADDR_SIZE; | ||
76 | - goto error; | ||
77 | + goto error_ipa; | ||
78 | } | ||
79 | |||
80 | tlbe->entry.translated_addr = gpa; | ||
81 | @@ -XXX,XX +XXX,XX @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg, | ||
82 | } | ||
83 | info->type = SMMU_PTW_ERR_TRANSLATION; | ||
84 | |||
85 | +error_ipa: | ||
86 | + info->addr = ipa; | ||
87 | error: | ||
88 | info->stage = 2; | ||
89 | tlbe->entry.perm = IOMMU_NONE; | ||
90 | diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c | ||
91 | index XXXXXXX..XXXXXXX 100644 | ||
92 | --- a/hw/arm/smmuv3.c | ||
93 | +++ b/hw/arm/smmuv3.c | ||
94 | @@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, | ||
95 | if (PTW_RECORD_FAULT(cfg)) { | ||
96 | event.type = SMMU_EVT_F_TRANSLATION; | ||
97 | event.u.f_translation.addr = addr; | ||
98 | + event.u.f_translation.addr2 = ptw_info.addr; | ||
99 | event.u.f_translation.rnw = flag & 0x1; | ||
100 | } | ||
101 | break; | ||
102 | @@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, | ||
103 | if (PTW_RECORD_FAULT(cfg)) { | ||
104 | event.type = SMMU_EVT_F_ADDR_SIZE; | ||
105 | event.u.f_addr_size.addr = addr; | ||
106 | + event.u.f_addr_size.addr2 = ptw_info.addr; | ||
107 | event.u.f_addr_size.rnw = flag & 0x1; | ||
108 | } | ||
109 | break; | ||
110 | @@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, | ||
111 | if (PTW_RECORD_FAULT(cfg)) { | ||
112 | event.type = SMMU_EVT_F_ACCESS; | ||
113 | event.u.f_access.addr = addr; | ||
114 | + event.u.f_access.addr2 = ptw_info.addr; | ||
115 | event.u.f_access.rnw = flag & 0x1; | ||
116 | } | ||
117 | break; | ||
118 | @@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, | ||
119 | if (PTW_RECORD_FAULT(cfg)) { | ||
120 | event.type = SMMU_EVT_F_PERMISSION; | ||
121 | event.u.f_permission.addr = addr; | ||
122 | + event.u.f_permission.addr2 = ptw_info.addr; | ||
123 | event.u.f_permission.rnw = flag & 0x1; | ||
124 | } | ||
125 | break; | ||
126 | -- | 36 | -- |
127 | 2.34.1 | 37 | 2.34.1 |
128 | |||
129 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | This operation has float16 inputs and thus must use | 3 | Re-indent ASM comments adding the 'loop:' label. |
4 | the FZ16 control not the FZ control. | ||
5 | 4 | ||
6 | Cc: qemu-stable@nongnu.org | 5 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
7 | Fixes: 3916841ac75 ("target/arm: Implement FMOPA, FMOPS (widening)") | 6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
8 | Reported-by: Daniyal Khan <danikhan632@gmail.com> | 7 | Reviewed-by: Fabiano Rosas <farosas@suse.de> |
9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
11 | Message-id: 20240717060149.204788-3-richard.henderson@linaro.org | ||
12 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2374 | ||
13 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
14 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
16 | --- | 9 | --- |
17 | target/arm/tcg/translate-sme.c | 12 ++++++++---- | 10 | tests/qtest/boot-serial-test.c | 18 +++++++++--------- |
18 | 1 file changed, 8 insertions(+), 4 deletions(-) | 11 | 1 file changed, 9 insertions(+), 9 deletions(-) |
19 | 12 | ||
20 | diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c | 13 | diff --git a/tests/qtest/boot-serial-test.c b/tests/qtest/boot-serial-test.c |
21 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/target/arm/tcg/translate-sme.c | 15 | --- a/tests/qtest/boot-serial-test.c |
23 | +++ b/target/arm/tcg/translate-sme.c | 16 | +++ b/tests/qtest/boot-serial-test.c |
24 | @@ -XXX,XX +XXX,XX @@ static bool do_outprod(DisasContext *s, arg_op *a, MemOp esz, | 17 | @@ -XXX,XX +XXX,XX @@ static const uint8_t kernel_plml605[] = { |
25 | } | 18 | }; |
26 | 19 | ||
27 | static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz, | 20 | static const uint8_t bios_raspi2[] = { |
28 | + ARMFPStatusFlavour e_fpst, | 21 | - 0x08, 0x30, 0x9f, 0xe5, /* ldr r3,[pc,#8] Get base */ |
29 | gen_helper_gvec_5_ptr *fn) | 22 | - 0x54, 0x20, 0xa0, 0xe3, /* mov r2,#'T' */ |
30 | { | 23 | - 0x00, 0x20, 0xc3, 0xe5, /* strb r2,[r3] */ |
31 | int svl = streaming_vec_reg_size(s); | 24 | - 0xfb, 0xff, 0xff, 0xea, /* b loop */ |
32 | @@ -XXX,XX +XXX,XX @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz, | 25 | - 0x00, 0x10, 0x20, 0x3f, /* 0x3f201000 = UART0 base addr */ |
33 | zm = vec_full_reg_ptr(s, a->zm); | 26 | + 0x08, 0x30, 0x9f, 0xe5, /* loop: ldr r3, [pc, #8] Get &UART0 */ |
34 | pn = pred_full_reg_ptr(s, a->pn); | 27 | + 0x54, 0x20, 0xa0, 0xe3, /* mov r2, #'T' */ |
35 | pm = pred_full_reg_ptr(s, a->pm); | 28 | + 0x00, 0x20, 0xc3, 0xe5, /* strb r2, [r3] *TXDAT = 'T' */ |
36 | - fpst = fpstatus_ptr(FPST_FPCR); | 29 | + 0xfb, 0xff, 0xff, 0xea, /* b -12 (loop) */ |
37 | + fpst = fpstatus_ptr(e_fpst); | 30 | + 0x00, 0x10, 0x20, 0x3f, /* UART0: 0x3f201000 */ |
38 | 31 | }; | |
39 | fn(za, zn, zm, pn, pm, fpst, tcg_constant_i32(desc)); | 32 | |
40 | return true; | 33 | static const uint8_t kernel_aarch64[] = { |
41 | } | 34 | - 0x81, 0x0a, 0x80, 0x52, /* mov w1, #0x54 */ |
42 | 35 | - 0x02, 0x20, 0xa1, 0xd2, /* mov x2, #0x9000000 */ | |
43 | -TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_h) | 36 | - 0x41, 0x00, 0x00, 0x39, /* strb w1, [x2] */ |
44 | -TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s) | 37 | - 0xfd, 0xff, 0xff, 0x17, /* b -12 (loop) */ |
45 | -TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d) | 38 | + 0x81, 0x0a, 0x80, 0x52, /* loop: mov w1, #'T' */ |
46 | +TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a, | 39 | + 0x02, 0x20, 0xa1, 0xd2, /* mov x2, #0x9000000 Load UART0 */ |
47 | + MO_32, FPST_FPCR_F16, gen_helper_sme_fmopa_h) | 40 | + 0x41, 0x00, 0x00, 0x39, /* strb w1, [x2] *TXDAT = 'T' */ |
48 | +TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, | 41 | + 0xfd, 0xff, 0xff, 0x17, /* b -12 (loop) */ |
49 | + MO_32, FPST_FPCR, gen_helper_sme_fmopa_s) | 42 | }; |
50 | +TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, | 43 | |
51 | + MO_64, FPST_FPCR, gen_helper_sme_fmopa_d) | 44 | static const uint8_t kernel_nrf51[] = { |
52 | |||
53 | /* TODO: FEAT_EBF16 */ | ||
54 | TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa) | ||
55 | -- | 45 | -- |
56 | 2.34.1 | 46 | 2.34.1 |
57 | 47 | ||
58 | 48 | diff view generated by jsdifflib |
1 | From: Mostafa Saleh <smostafa@google.com> | 1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Everything is in place, consolidate parsing of STE cfg and setting | 3 | Since registers are not modified, we don't need |
4 | translation stage. | 4 | to refill their values. Directly jump to the previous |
5 | store instruction to keep filling the TXDAT register. | ||
5 | 6 | ||
6 | Advertise nesting if stage requested is "nested". | 7 | The equivalent C code remains: |
7 | 8 | ||
8 | Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org> | 9 | while (true) { |
9 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | 10 | *UART_DATA = 'T'; |
10 | Signed-off-by: Mostafa Saleh <smostafa@google.com> | 11 | } |
11 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | 12 | |
12 | Message-id: 20240715084519.1189624-18-smostafa@google.com | 13 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
14 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
15 | Reviewed-by: Fabiano Rosas <farosas@suse.de> | ||
13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 16 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
14 | --- | 17 | --- |
15 | hw/arm/smmuv3.c | 35 ++++++++++++++++++++++++++--------- | 18 | tests/qtest/boot-serial-test.c | 12 ++++++------ |
16 | 1 file changed, 26 insertions(+), 9 deletions(-) | 19 | 1 file changed, 6 insertions(+), 6 deletions(-) |
17 | 20 | ||
18 | diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c | 21 | diff --git a/tests/qtest/boot-serial-test.c b/tests/qtest/boot-serial-test.c |
19 | index XXXXXXX..XXXXXXX 100644 | 22 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/hw/arm/smmuv3.c | 23 | --- a/tests/qtest/boot-serial-test.c |
21 | +++ b/hw/arm/smmuv3.c | 24 | +++ b/tests/qtest/boot-serial-test.c |
22 | @@ -XXX,XX +XXX,XX @@ static void smmuv3_init_regs(SMMUv3State *s) | 25 | @@ -XXX,XX +XXX,XX @@ static const uint8_t kernel_plml605[] = { |
23 | /* Based on sys property, the stages supported in smmu will be advertised.*/ | 26 | }; |
24 | if (s->stage && !strcmp("2", s->stage)) { | 27 | |
25 | s->idr[0] = FIELD_DP32(s->idr[0], IDR0, S2P, 1); | 28 | static const uint8_t bios_raspi2[] = { |
26 | + } else if (s->stage && !strcmp("nested", s->stage)) { | 29 | - 0x08, 0x30, 0x9f, 0xe5, /* loop: ldr r3, [pc, #8] Get &UART0 */ |
27 | + s->idr[0] = FIELD_DP32(s->idr[0], IDR0, S1P, 1); | 30 | + 0x08, 0x30, 0x9f, 0xe5, /* ldr r3, [pc, #8] Get &UART0 */ |
28 | + s->idr[0] = FIELD_DP32(s->idr[0], IDR0, S2P, 1); | 31 | 0x54, 0x20, 0xa0, 0xe3, /* mov r2, #'T' */ |
29 | } else { | 32 | - 0x00, 0x20, 0xc3, 0xe5, /* strb r2, [r3] *TXDAT = 'T' */ |
30 | s->idr[0] = FIELD_DP32(s->idr[0], IDR0, S1P, 1); | 33 | - 0xfb, 0xff, 0xff, 0xea, /* b -12 (loop) */ |
31 | } | 34 | + 0x00, 0x20, 0xc3, 0xe5, /* loop: strb r2, [r3] *TXDAT = 'T' */ |
32 | @@ -XXX,XX +XXX,XX @@ static bool s2_pgtable_config_valid(uint8_t sl0, uint8_t t0sz, uint8_t gran) | 35 | + 0xff, 0xff, 0xff, 0xea, /* b -4 (loop) */ |
33 | 36 | 0x00, 0x10, 0x20, 0x3f, /* UART0: 0x3f201000 */ | |
34 | static int decode_ste_s2_cfg(SMMUTransCfg *cfg, STE *ste) | 37 | }; |
35 | { | 38 | |
36 | - cfg->stage = SMMU_STAGE_2; | 39 | static const uint8_t kernel_aarch64[] = { |
37 | - | 40 | - 0x81, 0x0a, 0x80, 0x52, /* loop: mov w1, #'T' */ |
38 | if (STE_S2AA64(ste) == 0x0) { | 41 | + 0x81, 0x0a, 0x80, 0x52, /* mov w1, #'T' */ |
39 | qemu_log_mask(LOG_UNIMP, | 42 | 0x02, 0x20, 0xa1, 0xd2, /* mov x2, #0x9000000 Load UART0 */ |
40 | "SMMUv3 AArch32 tables not supported\n"); | 43 | - 0x41, 0x00, 0x00, 0x39, /* strb w1, [x2] *TXDAT = 'T' */ |
41 | @@ -XXX,XX +XXX,XX @@ bad_ste: | 44 | - 0xfd, 0xff, 0xff, 0x17, /* b -12 (loop) */ |
42 | return -EINVAL; | 45 | + 0x41, 0x00, 0x00, 0x39, /* loop: strb w1, [x2] *TXDAT = 'T' */ |
43 | } | 46 | + 0xff, 0xff, 0xff, 0x17, /* b -4 (loop) */ |
44 | 47 | }; | |
45 | +static void decode_ste_config(SMMUTransCfg *cfg, uint32_t config) | 48 | |
46 | +{ | 49 | static const uint8_t kernel_nrf51[] = { |
47 | + | ||
48 | + if (STE_CFG_ABORT(config)) { | ||
49 | + cfg->aborted = true; | ||
50 | + return; | ||
51 | + } | ||
52 | + if (STE_CFG_BYPASS(config)) { | ||
53 | + cfg->bypassed = true; | ||
54 | + return; | ||
55 | + } | ||
56 | + | ||
57 | + if (STE_CFG_S1_ENABLED(config)) { | ||
58 | + cfg->stage = SMMU_STAGE_1; | ||
59 | + } | ||
60 | + | ||
61 | + if (STE_CFG_S2_ENABLED(config)) { | ||
62 | + cfg->stage |= SMMU_STAGE_2; | ||
63 | + } | ||
64 | +} | ||
65 | + | ||
66 | /* Returns < 0 in case of invalid STE, 0 otherwise */ | ||
67 | static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg, | ||
68 | STE *ste, SMMUEventInfo *event) | ||
69 | @@ -XXX,XX +XXX,XX @@ static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg, | ||
70 | |||
71 | config = STE_CONFIG(ste); | ||
72 | |||
73 | - if (STE_CFG_ABORT(config)) { | ||
74 | - cfg->aborted = true; | ||
75 | - return 0; | ||
76 | - } | ||
77 | + decode_ste_config(cfg, config); | ||
78 | |||
79 | - if (STE_CFG_BYPASS(config)) { | ||
80 | - cfg->bypassed = true; | ||
81 | + if (cfg->aborted || cfg->bypassed) { | ||
82 | return 0; | ||
83 | } | ||
84 | |||
85 | @@ -XXX,XX +XXX,XX @@ static int decode_cd(SMMUv3State *s, SMMUTransCfg *cfg, | ||
86 | |||
87 | /* we support only those at the moment */ | ||
88 | cfg->aa64 = true; | ||
89 | - cfg->stage = SMMU_STAGE_1; | ||
90 | |||
91 | cfg->oas = oas2bits(CD_IPS(cd)); | ||
92 | cfg->oas = MIN(oas2bits(SMMU_IDR5_OAS), cfg->oas); | ||
93 | -- | 50 | -- |
94 | 2.34.1 | 51 | 2.34.1 |
95 | 52 | ||
96 | 53 | diff view generated by jsdifflib |
1 | From: SamJakob <me@samjakob.com> | 1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | It is common practice when implementing double-buffering on VideoCore | 3 | In the next commit we are going to use a different value |
4 | to do so by multiplying the height of the virtual buffer by the | 4 | for the $w1 register, maintaining the same $x2 value. In |
5 | number of virtual screens desired (i.e., two - in the case of | 5 | order to keep the next commit trivial to review, set $x2 |
6 | double-bufferring). | 6 | before $w1. |
7 | 7 | ||
8 | At present, this won't work in QEMU because the logic in | 8 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
9 | fb_use_offsets require that both the virtual width and height exceed | 9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
10 | their physical counterparts. | 10 | Reviewed-by: Fabiano Rosas <farosas@suse.de> |
11 | |||
12 | This appears to be unintentional/a typo and indeed the comment | ||
13 | states; "Experimentally, the hardware seems to do this only if the | ||
14 | viewport size is larger than the physical screen". The | ||
15 | viewport/virtual size would be larger than the physical size if | ||
16 | either virtual dimension were larger than their physical counterparts | ||
17 | and not necessarily both. | ||
18 | |||
19 | Signed-off-by: SamJakob <me@samjakob.com> | ||
20 | Message-id: 20240713160353.62410-1-me@samjakob.com | ||
21 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
22 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
23 | --- | 12 | --- |
24 | hw/display/bcm2835_fb.c | 2 +- | 13 | tests/qtest/boot-serial-test.c | 2 +- |
25 | 1 file changed, 1 insertion(+), 1 deletion(-) | 14 | 1 file changed, 1 insertion(+), 1 deletion(-) |
26 | 15 | ||
27 | diff --git a/hw/display/bcm2835_fb.c b/hw/display/bcm2835_fb.c | 16 | diff --git a/tests/qtest/boot-serial-test.c b/tests/qtest/boot-serial-test.c |
28 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
29 | --- a/hw/display/bcm2835_fb.c | 18 | --- a/tests/qtest/boot-serial-test.c |
30 | +++ b/hw/display/bcm2835_fb.c | 19 | +++ b/tests/qtest/boot-serial-test.c |
31 | @@ -XXX,XX +XXX,XX @@ static bool fb_use_offsets(BCM2835FBConfig *config) | 20 | @@ -XXX,XX +XXX,XX @@ static const uint8_t bios_raspi2[] = { |
32 | * viewport size is larger than the physical screen. (It doesn't | 21 | }; |
33 | * prevent the guest setting this silly viewport setting, though...) | 22 | |
34 | */ | 23 | static const uint8_t kernel_aarch64[] = { |
35 | - return config->xres_virtual > config->xres && | 24 | - 0x81, 0x0a, 0x80, 0x52, /* mov w1, #'T' */ |
36 | + return config->xres_virtual > config->xres || | 25 | 0x02, 0x20, 0xa1, 0xd2, /* mov x2, #0x9000000 Load UART0 */ |
37 | config->yres_virtual > config->yres; | 26 | + 0x81, 0x0a, 0x80, 0x52, /* mov w1, #'T' */ |
38 | } | 27 | 0x41, 0x00, 0x00, 0x39, /* loop: strb w1, [x2] *TXDAT = 'T' */ |
39 | 28 | 0xff, 0xff, 0xff, 0x17, /* b -4 (loop) */ | |
29 | }; | ||
40 | -- | 30 | -- |
41 | 2.34.1 | 31 | 2.34.1 |
42 | 32 | ||
43 | 33 | diff view generated by jsdifflib |
1 | From: Mostafa Saleh <smostafa@google.com> | 1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Previously, to check if faults are enabled, it was sufficient to check | 3 | The tests using the PL011 UART of the virt and raspi machines |
4 | the current stage of translation and check the corresponding | 4 | weren't properly enabling the UART and its transmitter previous |
5 | record_faults flag. | 5 | to sending characters. Follow the PL011 manual initialization |
6 | recommendation by setting the proper bits of the control register. | ||
6 | 7 | ||
7 | However, with nesting, it is possible for stage-1 (nested) translation | 8 | Update the ASM code prefixing: |
8 | to trigger a stage-2 fault, so we check SMMUPTWEventInfo as it would | ||
9 | have the correct stage set from the page table walk. | ||
10 | 9 | ||
11 | Signed-off-by: Mostafa Saleh <smostafa@google.com> | 10 | *UART_CTRL = UART_ENABLE | TX_ENABLE; |
12 | Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org> | 11 | |
13 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | 12 | to: |
14 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | 13 | |
15 | Message-id: 20240715084519.1189624-17-smostafa@google.com | 14 | while (true) { |
15 | *UART_DATA = 'T'; | ||
16 | } | ||
17 | |||
18 | Note, since commit 51b61dd4d56 ("hw/char/pl011: Warn when using | ||
19 | disabled transmitter") incomplete PL011 initialization can be | ||
20 | logged using the '-d guest_errors' command line option. | ||
21 | |||
22 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
23 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
16 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 24 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
17 | --- | 25 | --- |
18 | hw/arm/smmuv3.c | 15 ++++++++------- | 26 | tests/qtest/boot-serial-test.c | 7 ++++++- |
19 | 1 file changed, 8 insertions(+), 7 deletions(-) | 27 | 1 file changed, 6 insertions(+), 1 deletion(-) |
20 | 28 | ||
21 | diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c | 29 | diff --git a/tests/qtest/boot-serial-test.c b/tests/qtest/boot-serial-test.c |
22 | index XXXXXXX..XXXXXXX 100644 | 30 | index XXXXXXX..XXXXXXX 100644 |
23 | --- a/hw/arm/smmuv3.c | 31 | --- a/tests/qtest/boot-serial-test.c |
24 | +++ b/hw/arm/smmuv3.c | 32 | +++ b/tests/qtest/boot-serial-test.c |
25 | @@ -XXX,XX +XXX,XX @@ | 33 | @@ -XXX,XX +XXX,XX @@ static const uint8_t kernel_plml605[] = { |
26 | #include "smmuv3-internal.h" | 34 | }; |
27 | #include "smmu-internal.h" | 35 | |
28 | 36 | static const uint8_t bios_raspi2[] = { | |
29 | -#define PTW_RECORD_FAULT(cfg) (((cfg)->stage == SMMU_STAGE_1) ? \ | 37 | - 0x08, 0x30, 0x9f, 0xe5, /* ldr r3, [pc, #8] Get &UART0 */ |
30 | - (cfg)->record_faults : \ | 38 | + 0x10, 0x30, 0x9f, 0xe5, /* ldr r3, [pc, #16] Get &UART0 */ |
31 | - (cfg)->s2cfg.record_faults) | 39 | + 0x10, 0x20, 0x9f, 0xe5, /* ldr r2, [pc, #16] Get &CR */ |
32 | +#define PTW_RECORD_FAULT(ptw_info, cfg) (((ptw_info).stage == SMMU_STAGE_1 && \ | 40 | + 0xb0, 0x23, 0xc3, 0xe1, /* strh r2, [r3, #48] Set CR */ |
33 | + (cfg)->record_faults) || \ | 41 | 0x54, 0x20, 0xa0, 0xe3, /* mov r2, #'T' */ |
34 | + ((ptw_info).stage == SMMU_STAGE_2 && \ | 42 | 0x00, 0x20, 0xc3, 0xe5, /* loop: strb r2, [r3] *TXDAT = 'T' */ |
35 | + (cfg)->s2cfg.record_faults)) | 43 | 0xff, 0xff, 0xff, 0xea, /* b -4 (loop) */ |
36 | 44 | 0x00, 0x10, 0x20, 0x3f, /* UART0: 0x3f201000 */ | |
37 | /** | 45 | + 0x01, 0x01, 0x00, 0x00, /* CR: 0x101 = UARTEN|TXE */ |
38 | * smmuv3_trigger_irq - pulse @irq if enabled and update | 46 | }; |
39 | @@ -XXX,XX +XXX,XX @@ static SMMUTranslationStatus smmuv3_do_translate(SMMUv3State *s, hwaddr addr, | 47 | |
40 | event->u.f_walk_eabt.addr2 = ptw_info.addr; | 48 | static const uint8_t kernel_aarch64[] = { |
41 | break; | 49 | 0x02, 0x20, 0xa1, 0xd2, /* mov x2, #0x9000000 Load UART0 */ |
42 | case SMMU_PTW_ERR_TRANSLATION: | 50 | + 0x21, 0x20, 0x80, 0x52, /* mov w1, 0x101 CR = UARTEN|TXE */ |
43 | - if (PTW_RECORD_FAULT(cfg)) { | 51 | + 0x41, 0x60, 0x00, 0x79, /* strh w1, [x2, #48] Set CR */ |
44 | + if (PTW_RECORD_FAULT(ptw_info, cfg)) { | 52 | 0x81, 0x0a, 0x80, 0x52, /* mov w1, #'T' */ |
45 | event->type = SMMU_EVT_F_TRANSLATION; | 53 | 0x41, 0x00, 0x00, 0x39, /* loop: strb w1, [x2] *TXDAT = 'T' */ |
46 | event->u.f_translation.addr2 = ptw_info.addr; | 54 | 0xff, 0xff, 0xff, 0x17, /* b -4 (loop) */ |
47 | event->u.f_translation.class = class; | ||
48 | @@ -XXX,XX +XXX,XX @@ static SMMUTranslationStatus smmuv3_do_translate(SMMUv3State *s, hwaddr addr, | ||
49 | } | ||
50 | break; | ||
51 | case SMMU_PTW_ERR_ADDR_SIZE: | ||
52 | - if (PTW_RECORD_FAULT(cfg)) { | ||
53 | + if (PTW_RECORD_FAULT(ptw_info, cfg)) { | ||
54 | event->type = SMMU_EVT_F_ADDR_SIZE; | ||
55 | event->u.f_addr_size.addr2 = ptw_info.addr; | ||
56 | event->u.f_addr_size.class = class; | ||
57 | @@ -XXX,XX +XXX,XX @@ static SMMUTranslationStatus smmuv3_do_translate(SMMUv3State *s, hwaddr addr, | ||
58 | } | ||
59 | break; | ||
60 | case SMMU_PTW_ERR_ACCESS: | ||
61 | - if (PTW_RECORD_FAULT(cfg)) { | ||
62 | + if (PTW_RECORD_FAULT(ptw_info, cfg)) { | ||
63 | event->type = SMMU_EVT_F_ACCESS; | ||
64 | event->u.f_access.addr2 = ptw_info.addr; | ||
65 | event->u.f_access.class = class; | ||
66 | @@ -XXX,XX +XXX,XX @@ static SMMUTranslationStatus smmuv3_do_translate(SMMUv3State *s, hwaddr addr, | ||
67 | } | ||
68 | break; | ||
69 | case SMMU_PTW_ERR_PERMISSION: | ||
70 | - if (PTW_RECORD_FAULT(cfg)) { | ||
71 | + if (PTW_RECORD_FAULT(ptw_info, cfg)) { | ||
72 | event->type = SMMU_EVT_F_PERMISSION; | ||
73 | event->u.f_permission.addr2 = ptw_info.addr; | ||
74 | event->u.f_permission.class = class; | ||
75 | -- | 55 | -- |
76 | 2.34.1 | 56 | 2.34.1 |
77 | 57 | ||
78 | 58 | diff view generated by jsdifflib |
1 | From: Daniyal Khan <danikhan632@gmail.com> | 1 | helper.c includes some small TCG helper functions used for mostly |
---|---|---|---|
2 | arithmetic instructions. These are TCG only and there's no need for | ||
3 | them to be in the large and unwieldy helper.c. Move them out to | ||
4 | their own source file in the tcg/ subdirectory, together with the | ||
5 | op_addsub.h multiply-included template header that they use. | ||
2 | 6 | ||
3 | Signed-off-by: Daniyal Khan <danikhan632@gmail.com> | 7 | Since we are moving op_addsub.h, we take the opportunity to |
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 8 | give it a name which matches our convention for files which |
9 | are not true header files but which are #included from other | ||
10 | C files: op_addsub.c.inc. | ||
11 | |||
12 | (Ironically, this means that helper.c no longer contains | ||
13 | any TCG helper function definitions at all.) | ||
14 | |||
15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
16 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
17 | Message-id: 20250110131211.2546314-1-peter.maydell@linaro.org | ||
5 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | 18 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> |
6 | Message-id: 20240717060149.204788-4-richard.henderson@linaro.org | ||
7 | Message-Id: 172090222034.13953.16888708708822922098-1@git.sr.ht | ||
8 | [rth: Split test from a larger patch, tidy assembly] | ||
9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
12 | --- | 19 | --- |
13 | tests/tcg/aarch64/sme-fmopa-1.c | 63 +++++++++++++++++++++++++++++++ | 20 | target/arm/helper.c | 285 ----------------- |
14 | tests/tcg/aarch64/sme-fmopa-2.c | 56 +++++++++++++++++++++++++++ | 21 | target/arm/tcg/arith_helper.c | 296 ++++++++++++++++++ |
15 | tests/tcg/aarch64/sme-fmopa-3.c | 63 +++++++++++++++++++++++++++++++ | 22 | .../arm/{op_addsub.h => tcg/op_addsub.c.inc} | 0 |
16 | tests/tcg/aarch64/Makefile.target | 5 ++- | 23 | target/arm/tcg/meson.build | 1 + |
17 | 4 files changed, 185 insertions(+), 2 deletions(-) | 24 | 4 files changed, 297 insertions(+), 285 deletions(-) |
18 | create mode 100644 tests/tcg/aarch64/sme-fmopa-1.c | 25 | create mode 100644 target/arm/tcg/arith_helper.c |
19 | create mode 100644 tests/tcg/aarch64/sme-fmopa-2.c | 26 | rename target/arm/{op_addsub.h => tcg/op_addsub.c.inc} (100%) |
20 | create mode 100644 tests/tcg/aarch64/sme-fmopa-3.c | ||
21 | 27 | ||
22 | diff --git a/tests/tcg/aarch64/sme-fmopa-1.c b/tests/tcg/aarch64/sme-fmopa-1.c | 28 | diff --git a/target/arm/helper.c b/target/arm/helper.c |
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/target/arm/helper.c | ||
31 | +++ b/target/arm/helper.c | ||
32 | @@ -XXX,XX +XXX,XX @@ | ||
33 | #include "qemu/main-loop.h" | ||
34 | #include "qemu/timer.h" | ||
35 | #include "qemu/bitops.h" | ||
36 | -#include "qemu/crc32c.h" | ||
37 | #include "qemu/qemu-print.h" | ||
38 | #include "exec/exec-all.h" | ||
39 | #include "exec/translation-block.h" | ||
40 | -#include <zlib.h> /* for crc32 */ | ||
41 | #include "hw/irq.h" | ||
42 | #include "system/cpu-timers.h" | ||
43 | #include "system/kvm.h" | ||
44 | @@ -XXX,XX +XXX,XX @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, | ||
45 | }; | ||
46 | } | ||
47 | |||
48 | -/* | ||
49 | - * Note that signed overflow is undefined in C. The following routines are | ||
50 | - * careful to use unsigned types where modulo arithmetic is required. | ||
51 | - * Failure to do so _will_ break on newer gcc. | ||
52 | - */ | ||
53 | - | ||
54 | -/* Signed saturating arithmetic. */ | ||
55 | - | ||
56 | -/* Perform 16-bit signed saturating addition. */ | ||
57 | -static inline uint16_t add16_sat(uint16_t a, uint16_t b) | ||
58 | -{ | ||
59 | - uint16_t res; | ||
60 | - | ||
61 | - res = a + b; | ||
62 | - if (((res ^ a) & 0x8000) && !((a ^ b) & 0x8000)) { | ||
63 | - if (a & 0x8000) { | ||
64 | - res = 0x8000; | ||
65 | - } else { | ||
66 | - res = 0x7fff; | ||
67 | - } | ||
68 | - } | ||
69 | - return res; | ||
70 | -} | ||
71 | - | ||
72 | -/* Perform 8-bit signed saturating addition. */ | ||
73 | -static inline uint8_t add8_sat(uint8_t a, uint8_t b) | ||
74 | -{ | ||
75 | - uint8_t res; | ||
76 | - | ||
77 | - res = a + b; | ||
78 | - if (((res ^ a) & 0x80) && !((a ^ b) & 0x80)) { | ||
79 | - if (a & 0x80) { | ||
80 | - res = 0x80; | ||
81 | - } else { | ||
82 | - res = 0x7f; | ||
83 | - } | ||
84 | - } | ||
85 | - return res; | ||
86 | -} | ||
87 | - | ||
88 | -/* Perform 16-bit signed saturating subtraction. */ | ||
89 | -static inline uint16_t sub16_sat(uint16_t a, uint16_t b) | ||
90 | -{ | ||
91 | - uint16_t res; | ||
92 | - | ||
93 | - res = a - b; | ||
94 | - if (((res ^ a) & 0x8000) && ((a ^ b) & 0x8000)) { | ||
95 | - if (a & 0x8000) { | ||
96 | - res = 0x8000; | ||
97 | - } else { | ||
98 | - res = 0x7fff; | ||
99 | - } | ||
100 | - } | ||
101 | - return res; | ||
102 | -} | ||
103 | - | ||
104 | -/* Perform 8-bit signed saturating subtraction. */ | ||
105 | -static inline uint8_t sub8_sat(uint8_t a, uint8_t b) | ||
106 | -{ | ||
107 | - uint8_t res; | ||
108 | - | ||
109 | - res = a - b; | ||
110 | - if (((res ^ a) & 0x80) && ((a ^ b) & 0x80)) { | ||
111 | - if (a & 0x80) { | ||
112 | - res = 0x80; | ||
113 | - } else { | ||
114 | - res = 0x7f; | ||
115 | - } | ||
116 | - } | ||
117 | - return res; | ||
118 | -} | ||
119 | - | ||
120 | -#define ADD16(a, b, n) RESULT(add16_sat(a, b), n, 16); | ||
121 | -#define SUB16(a, b, n) RESULT(sub16_sat(a, b), n, 16); | ||
122 | -#define ADD8(a, b, n) RESULT(add8_sat(a, b), n, 8); | ||
123 | -#define SUB8(a, b, n) RESULT(sub8_sat(a, b), n, 8); | ||
124 | -#define PFX q | ||
125 | - | ||
126 | -#include "op_addsub.h" | ||
127 | - | ||
128 | -/* Unsigned saturating arithmetic. */ | ||
129 | -static inline uint16_t add16_usat(uint16_t a, uint16_t b) | ||
130 | -{ | ||
131 | - uint16_t res; | ||
132 | - res = a + b; | ||
133 | - if (res < a) { | ||
134 | - res = 0xffff; | ||
135 | - } | ||
136 | - return res; | ||
137 | -} | ||
138 | - | ||
139 | -static inline uint16_t sub16_usat(uint16_t a, uint16_t b) | ||
140 | -{ | ||
141 | - if (a > b) { | ||
142 | - return a - b; | ||
143 | - } else { | ||
144 | - return 0; | ||
145 | - } | ||
146 | -} | ||
147 | - | ||
148 | -static inline uint8_t add8_usat(uint8_t a, uint8_t b) | ||
149 | -{ | ||
150 | - uint8_t res; | ||
151 | - res = a + b; | ||
152 | - if (res < a) { | ||
153 | - res = 0xff; | ||
154 | - } | ||
155 | - return res; | ||
156 | -} | ||
157 | - | ||
158 | -static inline uint8_t sub8_usat(uint8_t a, uint8_t b) | ||
159 | -{ | ||
160 | - if (a > b) { | ||
161 | - return a - b; | ||
162 | - } else { | ||
163 | - return 0; | ||
164 | - } | ||
165 | -} | ||
166 | - | ||
167 | -#define ADD16(a, b, n) RESULT(add16_usat(a, b), n, 16); | ||
168 | -#define SUB16(a, b, n) RESULT(sub16_usat(a, b), n, 16); | ||
169 | -#define ADD8(a, b, n) RESULT(add8_usat(a, b), n, 8); | ||
170 | -#define SUB8(a, b, n) RESULT(sub8_usat(a, b), n, 8); | ||
171 | -#define PFX uq | ||
172 | - | ||
173 | -#include "op_addsub.h" | ||
174 | - | ||
175 | -/* Signed modulo arithmetic. */ | ||
176 | -#define SARITH16(a, b, n, op) do { \ | ||
177 | - int32_t sum; \ | ||
178 | - sum = (int32_t)(int16_t)(a) op (int32_t)(int16_t)(b); \ | ||
179 | - RESULT(sum, n, 16); \ | ||
180 | - if (sum >= 0) \ | ||
181 | - ge |= 3 << (n * 2); \ | ||
182 | - } while (0) | ||
183 | - | ||
184 | -#define SARITH8(a, b, n, op) do { \ | ||
185 | - int32_t sum; \ | ||
186 | - sum = (int32_t)(int8_t)(a) op (int32_t)(int8_t)(b); \ | ||
187 | - RESULT(sum, n, 8); \ | ||
188 | - if (sum >= 0) \ | ||
189 | - ge |= 1 << n; \ | ||
190 | - } while (0) | ||
191 | - | ||
192 | - | ||
193 | -#define ADD16(a, b, n) SARITH16(a, b, n, +) | ||
194 | -#define SUB16(a, b, n) SARITH16(a, b, n, -) | ||
195 | -#define ADD8(a, b, n) SARITH8(a, b, n, +) | ||
196 | -#define SUB8(a, b, n) SARITH8(a, b, n, -) | ||
197 | -#define PFX s | ||
198 | -#define ARITH_GE | ||
199 | - | ||
200 | -#include "op_addsub.h" | ||
201 | - | ||
202 | -/* Unsigned modulo arithmetic. */ | ||
203 | -#define ADD16(a, b, n) do { \ | ||
204 | - uint32_t sum; \ | ||
205 | - sum = (uint32_t)(uint16_t)(a) + (uint32_t)(uint16_t)(b); \ | ||
206 | - RESULT(sum, n, 16); \ | ||
207 | - if ((sum >> 16) == 1) \ | ||
208 | - ge |= 3 << (n * 2); \ | ||
209 | - } while (0) | ||
210 | - | ||
211 | -#define ADD8(a, b, n) do { \ | ||
212 | - uint32_t sum; \ | ||
213 | - sum = (uint32_t)(uint8_t)(a) + (uint32_t)(uint8_t)(b); \ | ||
214 | - RESULT(sum, n, 8); \ | ||
215 | - if ((sum >> 8) == 1) \ | ||
216 | - ge |= 1 << n; \ | ||
217 | - } while (0) | ||
218 | - | ||
219 | -#define SUB16(a, b, n) do { \ | ||
220 | - uint32_t sum; \ | ||
221 | - sum = (uint32_t)(uint16_t)(a) - (uint32_t)(uint16_t)(b); \ | ||
222 | - RESULT(sum, n, 16); \ | ||
223 | - if ((sum >> 16) == 0) \ | ||
224 | - ge |= 3 << (n * 2); \ | ||
225 | - } while (0) | ||
226 | - | ||
227 | -#define SUB8(a, b, n) do { \ | ||
228 | - uint32_t sum; \ | ||
229 | - sum = (uint32_t)(uint8_t)(a) - (uint32_t)(uint8_t)(b); \ | ||
230 | - RESULT(sum, n, 8); \ | ||
231 | - if ((sum >> 8) == 0) \ | ||
232 | - ge |= 1 << n; \ | ||
233 | - } while (0) | ||
234 | - | ||
235 | -#define PFX u | ||
236 | -#define ARITH_GE | ||
237 | - | ||
238 | -#include "op_addsub.h" | ||
239 | - | ||
240 | -/* Halved signed arithmetic. */ | ||
241 | -#define ADD16(a, b, n) \ | ||
242 | - RESULT(((int32_t)(int16_t)(a) + (int32_t)(int16_t)(b)) >> 1, n, 16) | ||
243 | -#define SUB16(a, b, n) \ | ||
244 | - RESULT(((int32_t)(int16_t)(a) - (int32_t)(int16_t)(b)) >> 1, n, 16) | ||
245 | -#define ADD8(a, b, n) \ | ||
246 | - RESULT(((int32_t)(int8_t)(a) + (int32_t)(int8_t)(b)) >> 1, n, 8) | ||
247 | -#define SUB8(a, b, n) \ | ||
248 | - RESULT(((int32_t)(int8_t)(a) - (int32_t)(int8_t)(b)) >> 1, n, 8) | ||
249 | -#define PFX sh | ||
250 | - | ||
251 | -#include "op_addsub.h" | ||
252 | - | ||
253 | -/* Halved unsigned arithmetic. */ | ||
254 | -#define ADD16(a, b, n) \ | ||
255 | - RESULT(((uint32_t)(uint16_t)(a) + (uint32_t)(uint16_t)(b)) >> 1, n, 16) | ||
256 | -#define SUB16(a, b, n) \ | ||
257 | - RESULT(((uint32_t)(uint16_t)(a) - (uint32_t)(uint16_t)(b)) >> 1, n, 16) | ||
258 | -#define ADD8(a, b, n) \ | ||
259 | - RESULT(((uint32_t)(uint8_t)(a) + (uint32_t)(uint8_t)(b)) >> 1, n, 8) | ||
260 | -#define SUB8(a, b, n) \ | ||
261 | - RESULT(((uint32_t)(uint8_t)(a) - (uint32_t)(uint8_t)(b)) >> 1, n, 8) | ||
262 | -#define PFX uh | ||
263 | - | ||
264 | -#include "op_addsub.h" | ||
265 | - | ||
266 | -static inline uint8_t do_usad(uint8_t a, uint8_t b) | ||
267 | -{ | ||
268 | - if (a > b) { | ||
269 | - return a - b; | ||
270 | - } else { | ||
271 | - return b - a; | ||
272 | - } | ||
273 | -} | ||
274 | - | ||
275 | -/* Unsigned sum of absolute byte differences. */ | ||
276 | -uint32_t HELPER(usad8)(uint32_t a, uint32_t b) | ||
277 | -{ | ||
278 | - uint32_t sum; | ||
279 | - sum = do_usad(a, b); | ||
280 | - sum += do_usad(a >> 8, b >> 8); | ||
281 | - sum += do_usad(a >> 16, b >> 16); | ||
282 | - sum += do_usad(a >> 24, b >> 24); | ||
283 | - return sum; | ||
284 | -} | ||
285 | - | ||
286 | -/* For ARMv6 SEL instruction. */ | ||
287 | -uint32_t HELPER(sel_flags)(uint32_t flags, uint32_t a, uint32_t b) | ||
288 | -{ | ||
289 | - uint32_t mask; | ||
290 | - | ||
291 | - mask = 0; | ||
292 | - if (flags & 1) { | ||
293 | - mask |= 0xff; | ||
294 | - } | ||
295 | - if (flags & 2) { | ||
296 | - mask |= 0xff00; | ||
297 | - } | ||
298 | - if (flags & 4) { | ||
299 | - mask |= 0xff0000; | ||
300 | - } | ||
301 | - if (flags & 8) { | ||
302 | - mask |= 0xff000000; | ||
303 | - } | ||
304 | - return (a & mask) | (b & ~mask); | ||
305 | -} | ||
306 | - | ||
307 | -/* | ||
308 | - * CRC helpers. | ||
309 | - * The upper bytes of val (above the number specified by 'bytes') must have | ||
310 | - * been zeroed out by the caller. | ||
311 | - */ | ||
312 | -uint32_t HELPER(crc32)(uint32_t acc, uint32_t val, uint32_t bytes) | ||
313 | -{ | ||
314 | - uint8_t buf[4]; | ||
315 | - | ||
316 | - stl_le_p(buf, val); | ||
317 | - | ||
318 | - /* zlib crc32 converts the accumulator and output to one's complement. */ | ||
319 | - return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff; | ||
320 | -} | ||
321 | - | ||
322 | -uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes) | ||
323 | -{ | ||
324 | - uint8_t buf[4]; | ||
325 | - | ||
326 | - stl_le_p(buf, val); | ||
327 | - | ||
328 | - /* Linux crc32c converts the output to one's complement. */ | ||
329 | - return crc32c(acc, buf, bytes) ^ 0xffffffff; | ||
330 | -} | ||
331 | |||
332 | /* | ||
333 | * Return the exception level to which FP-disabled exceptions should | ||
334 | diff --git a/target/arm/tcg/arith_helper.c b/target/arm/tcg/arith_helper.c | ||
23 | new file mode 100644 | 335 | new file mode 100644 |
24 | index XXXXXXX..XXXXXXX | 336 | index XXXXXXX..XXXXXXX |
25 | --- /dev/null | 337 | --- /dev/null |
26 | +++ b/tests/tcg/aarch64/sme-fmopa-1.c | 338 | +++ b/target/arm/tcg/arith_helper.c |
27 | @@ -XXX,XX +XXX,XX @@ | 339 | @@ -XXX,XX +XXX,XX @@ |
28 | +/* | 340 | +/* |
29 | + * SME outer product, 1 x 1. | 341 | + * ARM generic helpers for various arithmetical operations. |
342 | + * | ||
343 | + * This code is licensed under the GNU GPL v2 or later. | ||
344 | + * | ||
30 | + * SPDX-License-Identifier: GPL-2.0-or-later | 345 | + * SPDX-License-Identifier: GPL-2.0-or-later |
31 | + */ | 346 | + */ |
32 | + | 347 | +#include "qemu/osdep.h" |
33 | +#include <stdio.h> | 348 | +#include "cpu.h" |
34 | + | 349 | +#include "exec/helper-proto.h" |
35 | +static void foo(float *dst) | 350 | +#include "qemu/crc32c.h" |
36 | +{ | 351 | +#include <zlib.h> /* for crc32 */ |
37 | + asm(".arch_extension sme\n\t" | 352 | + |
38 | + "smstart\n\t" | 353 | +/* |
39 | + "ptrue p0.s, vl4\n\t" | 354 | + * Note that signed overflow is undefined in C. The following routines are |
40 | + "fmov z0.s, #1.0\n\t" | 355 | + * careful to use unsigned types where modulo arithmetic is required. |
41 | + /* | 356 | + * Failure to do so _will_ break on newer gcc. |
42 | + * An outer product of a vector of 1.0 by itself should be a matrix of 1.0. | 357 | + */ |
43 | + * Note that we are using tile 1 here (za1.s) rather than tile 0. | 358 | + |
44 | + */ | 359 | +/* Signed saturating arithmetic. */ |
45 | + "zero {za}\n\t" | 360 | + |
46 | + "fmopa za1.s, p0/m, p0/m, z0.s, z0.s\n\t" | 361 | +/* Perform 16-bit signed saturating addition. */ |
47 | + /* | 362 | +static inline uint16_t add16_sat(uint16_t a, uint16_t b) |
48 | + * Read the first 4x4 sub-matrix of elements from tile 1: | 363 | +{ |
49 | + * Note that za1h should be interchangeable here. | 364 | + uint16_t res; |
50 | + */ | 365 | + |
51 | + "mov w12, #0\n\t" | 366 | + res = a + b; |
52 | + "mova z0.s, p0/m, za1v.s[w12, #0]\n\t" | 367 | + if (((res ^ a) & 0x8000) && !((a ^ b) & 0x8000)) { |
53 | + "mova z1.s, p0/m, za1v.s[w12, #1]\n\t" | 368 | + if (a & 0x8000) { |
54 | + "mova z2.s, p0/m, za1v.s[w12, #2]\n\t" | 369 | + res = 0x8000; |
55 | + "mova z3.s, p0/m, za1v.s[w12, #3]\n\t" | 370 | + } else { |
56 | + /* | 371 | + res = 0x7fff; |
57 | + * And store them to the input pointer (dst in the C code): | ||
58 | + */ | ||
59 | + "st1w {z0.s}, p0, [%0]\n\t" | ||
60 | + "add x0, x0, #16\n\t" | ||
61 | + "st1w {z1.s}, p0, [x0]\n\t" | ||
62 | + "add x0, x0, #16\n\t" | ||
63 | + "st1w {z2.s}, p0, [x0]\n\t" | ||
64 | + "add x0, x0, #16\n\t" | ||
65 | + "st1w {z3.s}, p0, [x0]\n\t" | ||
66 | + "smstop" | ||
67 | + : : "r"(dst) | ||
68 | + : "x12", "d0", "d1", "d2", "d3", "memory"); | ||
69 | +} | ||
70 | + | ||
71 | +int main() | ||
72 | +{ | ||
73 | + float dst[16] = { }; | ||
74 | + | ||
75 | + foo(dst); | ||
76 | + | ||
77 | + for (int i = 0; i < 16; i++) { | ||
78 | + if (dst[i] != 1.0f) { | ||
79 | + goto failure; | ||
80 | + } | 372 | + } |
81 | + } | 373 | + } |
82 | + /* success */ | 374 | + return res; |
83 | + return 0; | 375 | +} |
84 | + | 376 | + |
85 | + failure: | 377 | +/* Perform 8-bit signed saturating addition. */ |
86 | + for (int i = 0; i < 16; i++) { | 378 | +static inline uint8_t add8_sat(uint8_t a, uint8_t b) |
87 | + printf("%f%c", dst[i], i % 4 == 3 ? '\n' : ' '); | 379 | +{ |
88 | + } | 380 | + uint8_t res; |
89 | + return 1; | 381 | + |
90 | +} | 382 | + res = a + b; |
91 | diff --git a/tests/tcg/aarch64/sme-fmopa-2.c b/tests/tcg/aarch64/sme-fmopa-2.c | 383 | + if (((res ^ a) & 0x80) && !((a ^ b) & 0x80)) { |
92 | new file mode 100644 | 384 | + if (a & 0x80) { |
93 | index XXXXXXX..XXXXXXX | 385 | + res = 0x80; |
94 | --- /dev/null | 386 | + } else { |
95 | +++ b/tests/tcg/aarch64/sme-fmopa-2.c | 387 | + res = 0x7f; |
96 | @@ -XXX,XX +XXX,XX @@ | 388 | + } |
389 | + } | ||
390 | + return res; | ||
391 | +} | ||
392 | + | ||
393 | +/* Perform 16-bit signed saturating subtraction. */ | ||
394 | +static inline uint16_t sub16_sat(uint16_t a, uint16_t b) | ||
395 | +{ | ||
396 | + uint16_t res; | ||
397 | + | ||
398 | + res = a - b; | ||
399 | + if (((res ^ a) & 0x8000) && ((a ^ b) & 0x8000)) { | ||
400 | + if (a & 0x8000) { | ||
401 | + res = 0x8000; | ||
402 | + } else { | ||
403 | + res = 0x7fff; | ||
404 | + } | ||
405 | + } | ||
406 | + return res; | ||
407 | +} | ||
408 | + | ||
409 | +/* Perform 8-bit signed saturating subtraction. */ | ||
410 | +static inline uint8_t sub8_sat(uint8_t a, uint8_t b) | ||
411 | +{ | ||
412 | + uint8_t res; | ||
413 | + | ||
414 | + res = a - b; | ||
415 | + if (((res ^ a) & 0x80) && ((a ^ b) & 0x80)) { | ||
416 | + if (a & 0x80) { | ||
417 | + res = 0x80; | ||
418 | + } else { | ||
419 | + res = 0x7f; | ||
420 | + } | ||
421 | + } | ||
422 | + return res; | ||
423 | +} | ||
424 | + | ||
425 | +#define ADD16(a, b, n) RESULT(add16_sat(a, b), n, 16); | ||
426 | +#define SUB16(a, b, n) RESULT(sub16_sat(a, b), n, 16); | ||
427 | +#define ADD8(a, b, n) RESULT(add8_sat(a, b), n, 8); | ||
428 | +#define SUB8(a, b, n) RESULT(sub8_sat(a, b), n, 8); | ||
429 | +#define PFX q | ||
430 | + | ||
431 | +#include "op_addsub.c.inc" | ||
432 | + | ||
433 | +/* Unsigned saturating arithmetic. */ | ||
434 | +static inline uint16_t add16_usat(uint16_t a, uint16_t b) | ||
435 | +{ | ||
436 | + uint16_t res; | ||
437 | + res = a + b; | ||
438 | + if (res < a) { | ||
439 | + res = 0xffff; | ||
440 | + } | ||
441 | + return res; | ||
442 | +} | ||
443 | + | ||
444 | +static inline uint16_t sub16_usat(uint16_t a, uint16_t b) | ||
445 | +{ | ||
446 | + if (a > b) { | ||
447 | + return a - b; | ||
448 | + } else { | ||
449 | + return 0; | ||
450 | + } | ||
451 | +} | ||
452 | + | ||
453 | +static inline uint8_t add8_usat(uint8_t a, uint8_t b) | ||
454 | +{ | ||
455 | + uint8_t res; | ||
456 | + res = a + b; | ||
457 | + if (res < a) { | ||
458 | + res = 0xff; | ||
459 | + } | ||
460 | + return res; | ||
461 | +} | ||
462 | + | ||
463 | +static inline uint8_t sub8_usat(uint8_t a, uint8_t b) | ||
464 | +{ | ||
465 | + if (a > b) { | ||
466 | + return a - b; | ||
467 | + } else { | ||
468 | + return 0; | ||
469 | + } | ||
470 | +} | ||
471 | + | ||
472 | +#define ADD16(a, b, n) RESULT(add16_usat(a, b), n, 16); | ||
473 | +#define SUB16(a, b, n) RESULT(sub16_usat(a, b), n, 16); | ||
474 | +#define ADD8(a, b, n) RESULT(add8_usat(a, b), n, 8); | ||
475 | +#define SUB8(a, b, n) RESULT(sub8_usat(a, b), n, 8); | ||
476 | +#define PFX uq | ||
477 | + | ||
478 | +#include "op_addsub.c.inc" | ||
479 | + | ||
480 | +/* Signed modulo arithmetic. */ | ||
481 | +#define SARITH16(a, b, n, op) do { \ | ||
482 | + int32_t sum; \ | ||
483 | + sum = (int32_t)(int16_t)(a) op (int32_t)(int16_t)(b); \ | ||
484 | + RESULT(sum, n, 16); \ | ||
485 | + if (sum >= 0) \ | ||
486 | + ge |= 3 << (n * 2); \ | ||
487 | + } while (0) | ||
488 | + | ||
489 | +#define SARITH8(a, b, n, op) do { \ | ||
490 | + int32_t sum; \ | ||
491 | + sum = (int32_t)(int8_t)(a) op (int32_t)(int8_t)(b); \ | ||
492 | + RESULT(sum, n, 8); \ | ||
493 | + if (sum >= 0) \ | ||
494 | + ge |= 1 << n; \ | ||
495 | + } while (0) | ||
496 | + | ||
497 | + | ||
498 | +#define ADD16(a, b, n) SARITH16(a, b, n, +) | ||
499 | +#define SUB16(a, b, n) SARITH16(a, b, n, -) | ||
500 | +#define ADD8(a, b, n) SARITH8(a, b, n, +) | ||
501 | +#define SUB8(a, b, n) SARITH8(a, b, n, -) | ||
502 | +#define PFX s | ||
503 | +#define ARITH_GE | ||
504 | + | ||
505 | +#include "op_addsub.c.inc" | ||
506 | + | ||
507 | +/* Unsigned modulo arithmetic. */ | ||
508 | +#define ADD16(a, b, n) do { \ | ||
509 | + uint32_t sum; \ | ||
510 | + sum = (uint32_t)(uint16_t)(a) + (uint32_t)(uint16_t)(b); \ | ||
511 | + RESULT(sum, n, 16); \ | ||
512 | + if ((sum >> 16) == 1) \ | ||
513 | + ge |= 3 << (n * 2); \ | ||
514 | + } while (0) | ||
515 | + | ||
516 | +#define ADD8(a, b, n) do { \ | ||
517 | + uint32_t sum; \ | ||
518 | + sum = (uint32_t)(uint8_t)(a) + (uint32_t)(uint8_t)(b); \ | ||
519 | + RESULT(sum, n, 8); \ | ||
520 | + if ((sum >> 8) == 1) \ | ||
521 | + ge |= 1 << n; \ | ||
522 | + } while (0) | ||
523 | + | ||
524 | +#define SUB16(a, b, n) do { \ | ||
525 | + uint32_t sum; \ | ||
526 | + sum = (uint32_t)(uint16_t)(a) - (uint32_t)(uint16_t)(b); \ | ||
527 | + RESULT(sum, n, 16); \ | ||
528 | + if ((sum >> 16) == 0) \ | ||
529 | + ge |= 3 << (n * 2); \ | ||
530 | + } while (0) | ||
531 | + | ||
532 | +#define SUB8(a, b, n) do { \ | ||
533 | + uint32_t sum; \ | ||
534 | + sum = (uint32_t)(uint8_t)(a) - (uint32_t)(uint8_t)(b); \ | ||
535 | + RESULT(sum, n, 8); \ | ||
536 | + if ((sum >> 8) == 0) \ | ||
537 | + ge |= 1 << n; \ | ||
538 | + } while (0) | ||
539 | + | ||
540 | +#define PFX u | ||
541 | +#define ARITH_GE | ||
542 | + | ||
543 | +#include "op_addsub.c.inc" | ||
544 | + | ||
545 | +/* Halved signed arithmetic. */ | ||
546 | +#define ADD16(a, b, n) \ | ||
547 | + RESULT(((int32_t)(int16_t)(a) + (int32_t)(int16_t)(b)) >> 1, n, 16) | ||
548 | +#define SUB16(a, b, n) \ | ||
549 | + RESULT(((int32_t)(int16_t)(a) - (int32_t)(int16_t)(b)) >> 1, n, 16) | ||
550 | +#define ADD8(a, b, n) \ | ||
551 | + RESULT(((int32_t)(int8_t)(a) + (int32_t)(int8_t)(b)) >> 1, n, 8) | ||
552 | +#define SUB8(a, b, n) \ | ||
553 | + RESULT(((int32_t)(int8_t)(a) - (int32_t)(int8_t)(b)) >> 1, n, 8) | ||
554 | +#define PFX sh | ||
555 | + | ||
556 | +#include "op_addsub.c.inc" | ||
557 | + | ||
558 | +/* Halved unsigned arithmetic. */ | ||
559 | +#define ADD16(a, b, n) \ | ||
560 | + RESULT(((uint32_t)(uint16_t)(a) + (uint32_t)(uint16_t)(b)) >> 1, n, 16) | ||
561 | +#define SUB16(a, b, n) \ | ||
562 | + RESULT(((uint32_t)(uint16_t)(a) - (uint32_t)(uint16_t)(b)) >> 1, n, 16) | ||
563 | +#define ADD8(a, b, n) \ | ||
564 | + RESULT(((uint32_t)(uint8_t)(a) + (uint32_t)(uint8_t)(b)) >> 1, n, 8) | ||
565 | +#define SUB8(a, b, n) \ | ||
566 | + RESULT(((uint32_t)(uint8_t)(a) - (uint32_t)(uint8_t)(b)) >> 1, n, 8) | ||
567 | +#define PFX uh | ||
568 | + | ||
569 | +#include "op_addsub.c.inc" | ||
570 | + | ||
571 | +static inline uint8_t do_usad(uint8_t a, uint8_t b) | ||
572 | +{ | ||
573 | + if (a > b) { | ||
574 | + return a - b; | ||
575 | + } else { | ||
576 | + return b - a; | ||
577 | + } | ||
578 | +} | ||
579 | + | ||
580 | +/* Unsigned sum of absolute byte differences. */ | ||
581 | +uint32_t HELPER(usad8)(uint32_t a, uint32_t b) | ||
582 | +{ | ||
583 | + uint32_t sum; | ||
584 | + sum = do_usad(a, b); | ||
585 | + sum += do_usad(a >> 8, b >> 8); | ||
586 | + sum += do_usad(a >> 16, b >> 16); | ||
587 | + sum += do_usad(a >> 24, b >> 24); | ||
588 | + return sum; | ||
589 | +} | ||
590 | + | ||
591 | +/* For ARMv6 SEL instruction. */ | ||
592 | +uint32_t HELPER(sel_flags)(uint32_t flags, uint32_t a, uint32_t b) | ||
593 | +{ | ||
594 | + uint32_t mask; | ||
595 | + | ||
596 | + mask = 0; | ||
597 | + if (flags & 1) { | ||
598 | + mask |= 0xff; | ||
599 | + } | ||
600 | + if (flags & 2) { | ||
601 | + mask |= 0xff00; | ||
602 | + } | ||
603 | + if (flags & 4) { | ||
604 | + mask |= 0xff0000; | ||
605 | + } | ||
606 | + if (flags & 8) { | ||
607 | + mask |= 0xff000000; | ||
608 | + } | ||
609 | + return (a & mask) | (b & ~mask); | ||
610 | +} | ||
611 | + | ||
97 | +/* | 612 | +/* |
98 | + * SME outer product, FZ vs FZ16 | 613 | + * CRC helpers. |
99 | + * SPDX-License-Identifier: GPL-2.0-or-later | 614 | + * The upper bytes of val (above the number specified by 'bytes') must have |
615 | + * been zeroed out by the caller. | ||
100 | + */ | 616 | + */ |
101 | + | 617 | +uint32_t HELPER(crc32)(uint32_t acc, uint32_t val, uint32_t bytes) |
102 | +#include <stdint.h> | 618 | +{ |
103 | +#include <stdio.h> | 619 | + uint8_t buf[4]; |
104 | + | 620 | + |
105 | +static void test_fmopa(uint32_t *result) | 621 | + stl_le_p(buf, val); |
106 | +{ | 622 | + |
107 | + asm(".arch_extension sme\n\t" | 623 | + /* zlib crc32 converts the accumulator and output to one's complement. */ |
108 | + "smstart\n\t" /* Z*, P* and ZArray cleared */ | 624 | + return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff; |
109 | + "ptrue p2.b, vl16\n\t" /* Limit vector length to 16 */ | 625 | +} |
110 | + "ptrue p5.b, vl16\n\t" | 626 | + |
111 | + "movi d0, #0x00ff\n\t" /* fp16 denormal */ | 627 | +uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes) |
112 | + "movi d16, #0x00ff\n\t" | 628 | +{ |
113 | + "mov w15, #0x0001000000\n\t" /* FZ=1, FZ16=0 */ | 629 | + uint8_t buf[4]; |
114 | + "msr fpcr, x15\n\t" | 630 | + |
115 | + "fmopa za3.s, p2/m, p5/m, z16.h, z0.h\n\t" | 631 | + stl_le_p(buf, val); |
116 | + "mov w15, #0\n\t" | 632 | + |
117 | + "st1w {za3h.s[w15, 0]}, p2, [%0]\n\t" | 633 | + /* Linux crc32c converts the output to one's complement. */ |
118 | + "add %0, %0, #16\n\t" | 634 | + return crc32c(acc, buf, bytes) ^ 0xffffffff; |
119 | + "st1w {za3h.s[w15, 1]}, p2, [%0]\n\t" | 635 | +} |
120 | + "mov w15, #2\n\t" | 636 | diff --git a/target/arm/op_addsub.h b/target/arm/tcg/op_addsub.c.inc |
121 | + "add %0, %0, #16\n\t" | 637 | similarity index 100% |
122 | + "st1w {za3h.s[w15, 0]}, p2, [%0]\n\t" | 638 | rename from target/arm/op_addsub.h |
123 | + "add %0, %0, #16\n\t" | 639 | rename to target/arm/tcg/op_addsub.c.inc |
124 | + "st1w {za3h.s[w15, 1]}, p2, [%0]\n\t" | 640 | diff --git a/target/arm/tcg/meson.build b/target/arm/tcg/meson.build |
125 | + "smstop" | ||
126 | + : "+r"(result) : | ||
127 | + : "x15", "x16", "p2", "p5", "d0", "d16", "memory"); | ||
128 | +} | ||
129 | + | ||
130 | +int main(void) | ||
131 | +{ | ||
132 | + uint32_t result[4 * 4] = { }; | ||
133 | + | ||
134 | + test_fmopa(result); | ||
135 | + | ||
136 | + if (result[0] != 0x2f7e0100) { | ||
137 | + printf("Test failed: Incorrect output in first 4 bytes\n" | ||
138 | + "Expected: %08x\n" | ||
139 | + "Got: %08x\n", | ||
140 | + 0x2f7e0100, result[0]); | ||
141 | + return 1; | ||
142 | + } | ||
143 | + | ||
144 | + for (int i = 1; i < 16; ++i) { | ||
145 | + if (result[i] != 0) { | ||
146 | + printf("Test failed: Non-zero word at position %d\n", i); | ||
147 | + return 1; | ||
148 | + } | ||
149 | + } | ||
150 | + | ||
151 | + return 0; | ||
152 | +} | ||
153 | diff --git a/tests/tcg/aarch64/sme-fmopa-3.c b/tests/tcg/aarch64/sme-fmopa-3.c | ||
154 | new file mode 100644 | ||
155 | index XXXXXXX..XXXXXXX | ||
156 | --- /dev/null | ||
157 | +++ b/tests/tcg/aarch64/sme-fmopa-3.c | ||
158 | @@ -XXX,XX +XXX,XX @@ | ||
159 | +/* | ||
160 | + * SME outer product, [ 1 2 3 4 ] squared | ||
161 | + * SPDX-License-Identifier: GPL-2.0-or-later | ||
162 | + */ | ||
163 | + | ||
164 | +#include <stdio.h> | ||
165 | +#include <stdint.h> | ||
166 | +#include <string.h> | ||
167 | +#include <math.h> | ||
168 | + | ||
169 | +static const float i_1234[4] = { | ||
170 | + 1.0f, 2.0f, 3.0f, 4.0f | ||
171 | +}; | ||
172 | + | ||
173 | +static const float expected[4] = { | ||
174 | + 4.515625f, 5.750000f, 6.984375f, 8.218750f | ||
175 | +}; | ||
176 | + | ||
177 | +static void test_fmopa(float *result) | ||
178 | +{ | ||
179 | + asm(".arch_extension sme\n\t" | ||
180 | + "smstart\n\t" /* ZArray cleared */ | ||
181 | + "ptrue p2.b, vl16\n\t" /* Limit vector length to 16 */ | ||
182 | + "ld1w {z0.s}, p2/z, [%1]\n\t" | ||
183 | + "mov w15, #0\n\t" | ||
184 | + "mov za3h.s[w15, 0], p2/m, z0.s\n\t" | ||
185 | + "mov za3h.s[w15, 1], p2/m, z0.s\n\t" | ||
186 | + "mov w15, #2\n\t" | ||
187 | + "mov za3h.s[w15, 0], p2/m, z0.s\n\t" | ||
188 | + "mov za3h.s[w15, 1], p2/m, z0.s\n\t" | ||
189 | + "msr fpcr, xzr\n\t" | ||
190 | + "fmopa za3.s, p2/m, p2/m, z0.h, z0.h\n\t" | ||
191 | + "mov w15, #0\n\t" | ||
192 | + "st1w {za3h.s[w15, 0]}, p2, [%0]\n" | ||
193 | + "add %0, %0, #16\n\t" | ||
194 | + "st1w {za3h.s[w15, 1]}, p2, [%0]\n\t" | ||
195 | + "mov w15, #2\n\t" | ||
196 | + "add %0, %0, #16\n\t" | ||
197 | + "st1w {za3h.s[w15, 0]}, p2, [%0]\n\t" | ||
198 | + "add %0, %0, #16\n\t" | ||
199 | + "st1w {za3h.s[w15, 1]}, p2, [%0]\n\t" | ||
200 | + "smstop" | ||
201 | + : "+r"(result) : "r"(i_1234) | ||
202 | + : "x15", "x16", "p2", "d0", "memory"); | ||
203 | +} | ||
204 | + | ||
205 | +int main(void) | ||
206 | +{ | ||
207 | + float result[4 * 4] = { }; | ||
208 | + int ret = 0; | ||
209 | + | ||
210 | + test_fmopa(result); | ||
211 | + | ||
212 | + for (int i = 0; i < 4; i++) { | ||
213 | + float actual = result[i]; | ||
214 | + if (fabsf(actual - expected[i]) > 0.001f) { | ||
215 | + printf("Test failed at element %d: Expected %f, got %f\n", | ||
216 | + i, expected[i], actual); | ||
217 | + ret = 1; | ||
218 | + } | ||
219 | + } | ||
220 | + return ret; | ||
221 | +} | ||
222 | diff --git a/tests/tcg/aarch64/Makefile.target b/tests/tcg/aarch64/Makefile.target | ||
223 | index XXXXXXX..XXXXXXX 100644 | 641 | index XXXXXXX..XXXXXXX 100644 |
224 | --- a/tests/tcg/aarch64/Makefile.target | 642 | --- a/target/arm/tcg/meson.build |
225 | +++ b/tests/tcg/aarch64/Makefile.target | 643 | +++ b/target/arm/tcg/meson.build |
226 | @@ -XXX,XX +XXX,XX @@ endif | 644 | @@ -XXX,XX +XXX,XX @@ arm_ss.add(files( |
227 | 645 | 'tlb_helper.c', | |
228 | # SME Tests | 646 | 'vec_helper.c', |
229 | ifneq ($(CROSS_AS_HAS_ARMV9_SME),) | 647 | 'tlb-insns.c', |
230 | -AARCH64_TESTS += sme-outprod1 sme-smopa-1 sme-smopa-2 | 648 | + 'arith_helper.c', |
231 | -sme-outprod1 sme-smopa-1 sme-smopa-2: CFLAGS += $(CROSS_AS_HAS_ARMV9_SME) | 649 | )) |
232 | +SME_TESTS = sme-outprod1 sme-smopa-1 sme-smopa-2 sme-fmopa-1 sme-fmopa-2 sme-fmopa-3 | 650 | |
233 | +AARCH64_TESTS += $(SME_TESTS) | 651 | arm_ss.add(when: 'TARGET_AARCH64', if_true: files( |
234 | +$(SME_TESTS): CFLAGS += $(CROSS_AS_HAS_ARMV9_SME) | ||
235 | endif | ||
236 | |||
237 | # System Registers Tests | ||
238 | -- | 652 | -- |
239 | 2.34.1 | 653 | 2.34.1 |
240 | 654 | ||
241 | 655 | diff view generated by jsdifflib |
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | 1 | From: Pierrick Bouvier <pierrick.bouvier@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Asahi Linux supports KVM but lacks PMU support. | 3 | Before changing default pauth algorithm, we need to make sure current |
4 | default one (QARMA5) can still be selected. | ||
4 | 5 | ||
5 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | 6 | $ qemu-system-aarch64 -cpu max,pauth-qarma5=on ... |
6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | 7 | |
7 | Message-id: 20240716-pmu-v3-1-8c7c1858a227@daynix.com | 8 | Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> |
9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | Message-id: 20241219183211.3493974-2-pierrick.bouvier@linaro.org | ||
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
9 | --- | 12 | --- |
10 | tests/qtest/arm-cpu-features.c | 13 ++++++++----- | 13 | docs/system/arm/cpu-features.rst | 5 ++++- |
11 | 1 file changed, 8 insertions(+), 5 deletions(-) | 14 | target/arm/cpu.h | 1 + |
15 | target/arm/arm-qmp-cmds.c | 2 +- | ||
16 | target/arm/cpu64.c | 20 ++++++++++++++------ | ||
17 | tests/qtest/arm-cpu-features.c | 15 +++++++++++---- | ||
18 | 5 files changed, 31 insertions(+), 12 deletions(-) | ||
12 | 19 | ||
20 | diff --git a/docs/system/arm/cpu-features.rst b/docs/system/arm/cpu-features.rst | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/docs/system/arm/cpu-features.rst | ||
23 | +++ b/docs/system/arm/cpu-features.rst | ||
24 | @@ -XXX,XX +XXX,XX @@ Below is the list of TCG VCPU features and their descriptions. | ||
25 | ``pauth-qarma3`` | ||
26 | When ``pauth`` is enabled, select the architected QARMA3 algorithm. | ||
27 | |||
28 | -Without either ``pauth-impdef`` or ``pauth-qarma3`` enabled, | ||
29 | +``pauth-qarma5`` | ||
30 | + When ``pauth`` is enabled, select the architected QARMA5 algorithm. | ||
31 | + | ||
32 | +Without ``pauth-impdef``, ``pauth-qarma3`` or ``pauth-qarma5`` enabled, | ||
33 | the architected QARMA5 algorithm is used. The architected QARMA5 | ||
34 | and QARMA3 algorithms have good cryptographic properties, but can | ||
35 | be quite slow to emulate. The impdef algorithm used by QEMU is | ||
36 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/target/arm/cpu.h | ||
39 | +++ b/target/arm/cpu.h | ||
40 | @@ -XXX,XX +XXX,XX @@ struct ArchCPU { | ||
41 | bool prop_pauth; | ||
42 | bool prop_pauth_impdef; | ||
43 | bool prop_pauth_qarma3; | ||
44 | + bool prop_pauth_qarma5; | ||
45 | bool prop_lpa2; | ||
46 | |||
47 | /* DCZ blocksize, in log_2(words), ie low 4 bits of DCZID_EL0 */ | ||
48 | diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/target/arm/arm-qmp-cmds.c | ||
51 | +++ b/target/arm/arm-qmp-cmds.c | ||
52 | @@ -XXX,XX +XXX,XX @@ static const char *cpu_model_advertised_features[] = { | ||
53 | "sve640", "sve768", "sve896", "sve1024", "sve1152", "sve1280", | ||
54 | "sve1408", "sve1536", "sve1664", "sve1792", "sve1920", "sve2048", | ||
55 | "kvm-no-adjvtime", "kvm-steal-time", | ||
56 | - "pauth", "pauth-impdef", "pauth-qarma3", | ||
57 | + "pauth", "pauth-impdef", "pauth-qarma3", "pauth-qarma5", | ||
58 | NULL | ||
59 | }; | ||
60 | |||
61 | diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c | ||
62 | index XXXXXXX..XXXXXXX 100644 | ||
63 | --- a/target/arm/cpu64.c | ||
64 | +++ b/target/arm/cpu64.c | ||
65 | @@ -XXX,XX +XXX,XX @@ void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp) | ||
66 | } | ||
67 | |||
68 | if (cpu->prop_pauth) { | ||
69 | - if (cpu->prop_pauth_impdef && cpu->prop_pauth_qarma3) { | ||
70 | + if ((cpu->prop_pauth_impdef && cpu->prop_pauth_qarma3) || | ||
71 | + (cpu->prop_pauth_impdef && cpu->prop_pauth_qarma5) || | ||
72 | + (cpu->prop_pauth_qarma3 && cpu->prop_pauth_qarma5)) { | ||
73 | error_setg(errp, | ||
74 | - "cannot enable both pauth-impdef and pauth-qarma3"); | ||
75 | + "cannot enable pauth-impdef, pauth-qarma3 and " | ||
76 | + "pauth-qarma5 at the same time"); | ||
77 | return; | ||
78 | } | ||
79 | |||
80 | @@ -XXX,XX +XXX,XX @@ void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp) | ||
81 | } else if (cpu->prop_pauth_qarma3) { | ||
82 | isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, APA3, features); | ||
83 | isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, GPA3, 1); | ||
84 | - } else { | ||
85 | + } else { /* default is pauth-qarma5 */ | ||
86 | isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, APA, features); | ||
87 | isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPA, 1); | ||
88 | } | ||
89 | - } else if (cpu->prop_pauth_impdef || cpu->prop_pauth_qarma3) { | ||
90 | - error_setg(errp, "cannot enable pauth-impdef or " | ||
91 | - "pauth-qarma3 without pauth"); | ||
92 | + } else if (cpu->prop_pauth_impdef || | ||
93 | + cpu->prop_pauth_qarma3 || | ||
94 | + cpu->prop_pauth_qarma5) { | ||
95 | + error_setg(errp, "cannot enable pauth-impdef, pauth-qarma3 or " | ||
96 | + "pauth-qarma5 without pauth"); | ||
97 | error_append_hint(errp, "Add pauth=on to the CPU property list.\n"); | ||
98 | } | ||
99 | } | ||
100 | @@ -XXX,XX +XXX,XX @@ static const Property arm_cpu_pauth_impdef_property = | ||
101 | DEFINE_PROP_BOOL("pauth-impdef", ARMCPU, prop_pauth_impdef, false); | ||
102 | static const Property arm_cpu_pauth_qarma3_property = | ||
103 | DEFINE_PROP_BOOL("pauth-qarma3", ARMCPU, prop_pauth_qarma3, false); | ||
104 | +static Property arm_cpu_pauth_qarma5_property = | ||
105 | + DEFINE_PROP_BOOL("pauth-qarma5", ARMCPU, prop_pauth_qarma5, false); | ||
106 | |||
107 | void aarch64_add_pauth_properties(Object *obj) | ||
108 | { | ||
109 | @@ -XXX,XX +XXX,XX @@ void aarch64_add_pauth_properties(Object *obj) | ||
110 | } else { | ||
111 | qdev_property_add_static(DEVICE(obj), &arm_cpu_pauth_impdef_property); | ||
112 | qdev_property_add_static(DEVICE(obj), &arm_cpu_pauth_qarma3_property); | ||
113 | + qdev_property_add_static(DEVICE(obj), &arm_cpu_pauth_qarma5_property); | ||
114 | } | ||
115 | } | ||
116 | |||
13 | diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c | 117 | diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c |
14 | index XXXXXXX..XXXXXXX 100644 | 118 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/tests/qtest/arm-cpu-features.c | 119 | --- a/tests/qtest/arm-cpu-features.c |
16 | +++ b/tests/qtest/arm-cpu-features.c | 120 | +++ b/tests/qtest/arm-cpu-features.c |
17 | @@ -XXX,XX +XXX,XX @@ static void test_query_cpu_model_expansion_kvm(const void *data) | 121 | @@ -XXX,XX +XXX,XX @@ static void pauth_tests_default(QTestState *qts, const char *cpu_type) |
18 | assert_set_feature(qts, "host", "kvm-no-adjvtime", false); | 122 | assert_has_feature_enabled(qts, cpu_type, "pauth"); |
19 | 123 | assert_has_feature_disabled(qts, cpu_type, "pauth-impdef"); | |
20 | if (g_str_equal(qtest_get_arch(), "aarch64")) { | 124 | assert_has_feature_disabled(qts, cpu_type, "pauth-qarma3"); |
21 | + bool kvm_supports_pmu; | 125 | + assert_has_feature_disabled(qts, cpu_type, "pauth-qarma5"); |
22 | bool kvm_supports_steal_time; | 126 | assert_set_feature(qts, cpu_type, "pauth", false); |
23 | bool kvm_supports_sve; | 127 | assert_set_feature(qts, cpu_type, "pauth", true); |
24 | char max_name[8], name[8]; | 128 | assert_set_feature(qts, cpu_type, "pauth-impdef", true); |
25 | @@ -XXX,XX +XXX,XX @@ static void test_query_cpu_model_expansion_kvm(const void *data) | 129 | assert_set_feature(qts, cpu_type, "pauth-impdef", false); |
26 | 130 | assert_set_feature(qts, cpu_type, "pauth-qarma3", true); | |
27 | assert_has_feature_enabled(qts, "host", "aarch64"); | 131 | assert_set_feature(qts, cpu_type, "pauth-qarma3", false); |
28 | 132 | + assert_set_feature(qts, cpu_type, "pauth-qarma5", true); | |
29 | - /* Enabling and disabling pmu should always work. */ | 133 | + assert_set_feature(qts, cpu_type, "pauth-qarma5", false); |
30 | - assert_has_feature_enabled(qts, "host", "pmu"); | 134 | assert_error(qts, cpu_type, |
31 | - assert_set_feature(qts, "host", "pmu", false); | 135 | - "cannot enable pauth-impdef or pauth-qarma3 without pauth", |
32 | - assert_set_feature(qts, "host", "pmu", true); | 136 | + "cannot enable pauth-impdef, pauth-qarma3 or pauth-qarma5 without pauth", |
33 | - | 137 | "{ 'pauth': false, 'pauth-impdef': true }"); |
34 | /* | 138 | assert_error(qts, cpu_type, |
35 | * Some features would be enabled by default, but they're disabled | 139 | - "cannot enable pauth-impdef or pauth-qarma3 without pauth", |
36 | * because this instance of KVM doesn't support them. Test that the | 140 | + "cannot enable pauth-impdef, pauth-qarma3 or pauth-qarma5 without pauth", |
37 | @@ -XXX,XX +XXX,XX @@ static void test_query_cpu_model_expansion_kvm(const void *data) | 141 | "{ 'pauth': false, 'pauth-qarma3': true }"); |
38 | assert_has_feature(qts, "host", "sve"); | 142 | assert_error(qts, cpu_type, |
39 | 143 | - "cannot enable both pauth-impdef and pauth-qarma3", | |
40 | resp = do_query_no_props(qts, "host"); | 144 | - "{ 'pauth': true, 'pauth-impdef': true, 'pauth-qarma3': true }"); |
41 | + kvm_supports_pmu = resp_get_feature(resp, "pmu"); | 145 | + "cannot enable pauth-impdef, pauth-qarma3 or pauth-qarma5 without pauth", |
42 | kvm_supports_steal_time = resp_get_feature(resp, "kvm-steal-time"); | 146 | + "{ 'pauth': false, 'pauth-qarma5': true }"); |
43 | kvm_supports_sve = resp_get_feature(resp, "sve"); | 147 | + assert_error(qts, cpu_type, |
44 | vls = resp_get_sve_vls(resp); | 148 | + "cannot enable pauth-impdef, pauth-qarma3 and pauth-qarma5 at the same time", |
45 | qobject_unref(resp); | 149 | + "{ 'pauth': true, 'pauth-impdef': true, 'pauth-qarma3': true," |
46 | 150 | + " 'pauth-qarma5': true }"); | |
47 | + if (kvm_supports_pmu) { | 151 | } |
48 | + /* If we have pmu then we should be able to toggle it. */ | 152 | |
49 | + assert_set_feature(qts, "host", "pmu", false); | 153 | static void test_query_cpu_model_expansion(const void *data) |
50 | + assert_set_feature(qts, "host", "pmu", true); | ||
51 | + } | ||
52 | + | ||
53 | if (kvm_supports_steal_time) { | ||
54 | /* If we have steal-time then we should be able to toggle it. */ | ||
55 | assert_set_feature(qts, "host", "kvm-steal-time", false); | ||
56 | -- | 154 | -- |
57 | 2.34.1 | 155 | 2.34.1 |
58 | |||
59 | diff view generated by jsdifflib |
1 | From: Mostafa Saleh <smostafa@google.com> | 1 | The pauth-3 test explicitly tests that a computation of the |
---|---|---|---|
2 | pointer-authentication produces the expected result. This means that | ||
3 | it must be run with the QARMA5 algorithm. | ||
2 | 4 | ||
3 | According to the SMMU architecture specification (ARM IHI 0070 F.b), | 5 | Explicitly set the pauth algorithm when running this test, so that it |
4 | in “3.4 Address sizes” | 6 | doesn't break when we change the default algorithm the 'max' CPU |
5 | The address output from the translation causes a stage 1 Address Size | 7 | uses. |
6 | fault if it exceeds the range of the effective IPA size for the given CD. | ||
7 | 8 | ||
8 | However, this check was missing. | ||
9 | |||
10 | There is already a similar check for stage-2 against effective PA. | ||
11 | |||
12 | Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org> | ||
13 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
14 | Signed-off-by: Mostafa Saleh <smostafa@google.com> | ||
15 | Message-id: 20240715084519.1189624-2-smostafa@google.com | ||
16 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
17 | --- | 10 | --- |
18 | hw/arm/smmu-common.c | 10 ++++++++++ | 11 | tests/tcg/aarch64/Makefile.softmmu-target | 3 +++ |
19 | 1 file changed, 10 insertions(+) | 12 | 1 file changed, 3 insertions(+) |
20 | 13 | ||
21 | diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c | 14 | diff --git a/tests/tcg/aarch64/Makefile.softmmu-target b/tests/tcg/aarch64/Makefile.softmmu-target |
22 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
23 | --- a/hw/arm/smmu-common.c | 16 | --- a/tests/tcg/aarch64/Makefile.softmmu-target |
24 | +++ b/hw/arm/smmu-common.c | 17 | +++ b/tests/tcg/aarch64/Makefile.softmmu-target |
25 | @@ -XXX,XX +XXX,XX @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg, | 18 | @@ -XXX,XX +XXX,XX @@ EXTRA_RUNS+=run-memory-replay |
26 | goto error; | 19 | |
27 | } | 20 | ifneq ($(CROSS_CC_HAS_ARMV8_3),) |
28 | 21 | pauth-3: CFLAGS += $(CROSS_CC_HAS_ARMV8_3) | |
29 | + /* | 22 | +# This test explicitly checks the output of the pauth operation so we |
30 | + * The address output from the translation causes a stage 1 Address | 23 | +# must force the use of the QARMA5 algorithm for it. |
31 | + * Size fault if it exceeds the range of the effective IPA size for | 24 | +run-pauth-3: QEMU_BASE_MACHINE=-M virt -cpu max,pauth-qarma5=on -display none |
32 | + * the given CD. | 25 | else |
33 | + */ | 26 | pauth-3: |
34 | + if (gpa >= (1ULL << cfg->oas)) { | 27 | $(call skip-test, "BUILD of $@", "missing compiler support") |
35 | + info->type = SMMU_PTW_ERR_ADDR_SIZE; | ||
36 | + goto error; | ||
37 | + } | ||
38 | + | ||
39 | tlbe->entry.translated_addr = gpa; | ||
40 | tlbe->entry.iova = iova & ~mask; | ||
41 | tlbe->entry.addr_mask = mask; | ||
42 | -- | 28 | -- |
43 | 2.34.1 | 29 | 2.34.1 |
44 | |||
45 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Mostafa Saleh <smostafa@google.com> | ||
2 | 1 | ||
3 | The SMMUv3 spec (ARM IHI 0070 F.b - 7.3 Event records) defines the | ||
4 | class of events faults as: | ||
5 | |||
6 | CLASS: The class of the operation that caused the fault: | ||
7 | - 0b00: CD, CD fetch. | ||
8 | - 0b01: TTD, Stage 1 translation table fetch. | ||
9 | - 0b10: IN, Input address | ||
10 | |||
11 | However, this value was not set and left as 0 which means CD and not | ||
12 | IN (0b10). | ||
13 | |||
14 | Another problem was that stage-2 class is considered IN not TT for | ||
15 | EABT, according to the spec: | ||
16 | Translation of an IPA after successful stage 1 translation (or, | ||
17 | in stage 2-only configuration, an input IPA) | ||
18 | - S2 == 1 (stage 2), CLASS == IN (Input to stage) | ||
19 | |||
20 | This would change soon when nested translations are supported. | ||
21 | |||
22 | While at it, add an enum for class as it would be used for nesting. | ||
23 | However, at the moment stage-1 and stage-2 use the same class values, | ||
24 | except for EABT. | ||
25 | |||
26 | Fixes: 9bde7f0674 “hw/arm/smmuv3: Implement translate callback” | ||
27 | Signed-off-by: Mostafa Saleh <smostafa@google.com> | ||
28 | Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org> | ||
29 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
30 | Message-id: 20240715084519.1189624-4-smostafa@google.com | ||
31 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
32 | --- | ||
33 | hw/arm/smmuv3-internal.h | 6 ++++++ | ||
34 | hw/arm/smmuv3.c | 8 +++++++- | ||
35 | 2 files changed, 13 insertions(+), 1 deletion(-) | ||
36 | |||
37 | diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/hw/arm/smmuv3-internal.h | ||
40 | +++ b/hw/arm/smmuv3-internal.h | ||
41 | @@ -XXX,XX +XXX,XX @@ typedef enum SMMUTranslationStatus { | ||
42 | SMMU_TRANS_SUCCESS, | ||
43 | } SMMUTranslationStatus; | ||
44 | |||
45 | +typedef enum SMMUTranslationClass { | ||
46 | + SMMU_CLASS_CD, | ||
47 | + SMMU_CLASS_TT, | ||
48 | + SMMU_CLASS_IN, | ||
49 | +} SMMUTranslationClass; | ||
50 | + | ||
51 | /* MMIO Registers */ | ||
52 | |||
53 | REG32(IDR0, 0x0) | ||
54 | diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c | ||
55 | index XXXXXXX..XXXXXXX 100644 | ||
56 | --- a/hw/arm/smmuv3.c | ||
57 | +++ b/hw/arm/smmuv3.c | ||
58 | @@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, | ||
59 | event.type = SMMU_EVT_F_WALK_EABT; | ||
60 | event.u.f_walk_eabt.addr = addr; | ||
61 | event.u.f_walk_eabt.rnw = flag & 0x1; | ||
62 | - event.u.f_walk_eabt.class = 0x1; | ||
63 | + /* Stage-2 (only) is class IN while stage-1 is class TT */ | ||
64 | + event.u.f_walk_eabt.class = (ptw_info.stage == 2) ? | ||
65 | + SMMU_CLASS_IN : SMMU_CLASS_TT; | ||
66 | event.u.f_walk_eabt.addr2 = ptw_info.addr; | ||
67 | break; | ||
68 | case SMMU_PTW_ERR_TRANSLATION: | ||
69 | @@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, | ||
70 | event.type = SMMU_EVT_F_TRANSLATION; | ||
71 | event.u.f_translation.addr = addr; | ||
72 | event.u.f_translation.addr2 = ptw_info.addr; | ||
73 | + event.u.f_translation.class = SMMU_CLASS_IN; | ||
74 | event.u.f_translation.rnw = flag & 0x1; | ||
75 | } | ||
76 | break; | ||
77 | @@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, | ||
78 | event.type = SMMU_EVT_F_ADDR_SIZE; | ||
79 | event.u.f_addr_size.addr = addr; | ||
80 | event.u.f_addr_size.addr2 = ptw_info.addr; | ||
81 | + event.u.f_translation.class = SMMU_CLASS_IN; | ||
82 | event.u.f_addr_size.rnw = flag & 0x1; | ||
83 | } | ||
84 | break; | ||
85 | @@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, | ||
86 | event.type = SMMU_EVT_F_ACCESS; | ||
87 | event.u.f_access.addr = addr; | ||
88 | event.u.f_access.addr2 = ptw_info.addr; | ||
89 | + event.u.f_translation.class = SMMU_CLASS_IN; | ||
90 | event.u.f_access.rnw = flag & 0x1; | ||
91 | } | ||
92 | break; | ||
93 | @@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, | ||
94 | event.type = SMMU_EVT_F_PERMISSION; | ||
95 | event.u.f_permission.addr = addr; | ||
96 | event.u.f_permission.addr2 = ptw_info.addr; | ||
97 | + event.u.f_translation.class = SMMU_CLASS_IN; | ||
98 | event.u.f_permission.rnw = flag & 0x1; | ||
99 | } | ||
100 | break; | ||
101 | -- | ||
102 | 2.34.1 | ||
103 | |||
104 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Mostafa Saleh <smostafa@google.com> | ||
2 | 1 | ||
3 | Currently, translation stage is represented as an int, where 1 is stage-1 and | ||
4 | 2 is stage-2, when nested is added, 3 would be confusing to represent nesting, | ||
5 | so we use an enum instead. | ||
6 | |||
7 | While keeping the same values, this is useful for: | ||
8 | - Doing tricks with bit masks, where BIT(0) is stage-1 and BIT(1) is | ||
9 | stage-2 and both is nested. | ||
10 | - Tracing, as stage is printed as int. | ||
11 | |||
12 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
13 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
14 | Signed-off-by: Mostafa Saleh <smostafa@google.com> | ||
15 | Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org> | ||
16 | Message-id: 20240715084519.1189624-5-smostafa@google.com | ||
17 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
18 | --- | ||
19 | include/hw/arm/smmu-common.h | 11 +++++++++-- | ||
20 | hw/arm/smmu-common.c | 14 +++++++------- | ||
21 | hw/arm/smmuv3.c | 17 +++++++++-------- | ||
22 | 3 files changed, 25 insertions(+), 17 deletions(-) | ||
23 | |||
24 | diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h | ||
25 | index XXXXXXX..XXXXXXX 100644 | ||
26 | --- a/include/hw/arm/smmu-common.h | ||
27 | +++ b/include/hw/arm/smmu-common.h | ||
28 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
29 | SMMU_PTW_ERR_PERMISSION, /* Permission fault */ | ||
30 | } SMMUPTWEventType; | ||
31 | |||
32 | +/* SMMU Stage */ | ||
33 | +typedef enum { | ||
34 | + SMMU_STAGE_1 = 1, | ||
35 | + SMMU_STAGE_2, | ||
36 | + SMMU_NESTED, | ||
37 | +} SMMUStage; | ||
38 | + | ||
39 | typedef struct SMMUPTWEventInfo { | ||
40 | - int stage; | ||
41 | + SMMUStage stage; | ||
42 | SMMUPTWEventType type; | ||
43 | dma_addr_t addr; /* fetched address that induced an abort, if any */ | ||
44 | } SMMUPTWEventInfo; | ||
45 | @@ -XXX,XX +XXX,XX @@ typedef struct SMMUS2Cfg { | ||
46 | */ | ||
47 | typedef struct SMMUTransCfg { | ||
48 | /* Shared fields between stage-1 and stage-2. */ | ||
49 | - int stage; /* translation stage */ | ||
50 | + SMMUStage stage; /* translation stage */ | ||
51 | bool disabled; /* smmu is disabled */ | ||
52 | bool bypassed; /* translation is bypassed */ | ||
53 | bool aborted; /* translation is aborted */ | ||
54 | diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c | ||
55 | index XXXXXXX..XXXXXXX 100644 | ||
56 | --- a/hw/arm/smmu-common.c | ||
57 | +++ b/hw/arm/smmu-common.c | ||
58 | @@ -XXX,XX +XXX,XX @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg, | ||
59 | SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info) | ||
60 | { | ||
61 | dma_addr_t baseaddr, indexmask; | ||
62 | - int stage = cfg->stage; | ||
63 | + SMMUStage stage = cfg->stage; | ||
64 | SMMUTransTableInfo *tt = select_tt(cfg, iova); | ||
65 | uint8_t level, granule_sz, inputsize, stride; | ||
66 | |||
67 | @@ -XXX,XX +XXX,XX @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg, | ||
68 | info->type = SMMU_PTW_ERR_TRANSLATION; | ||
69 | |||
70 | error: | ||
71 | - info->stage = 1; | ||
72 | + info->stage = SMMU_STAGE_1; | ||
73 | tlbe->entry.perm = IOMMU_NONE; | ||
74 | return -EINVAL; | ||
75 | } | ||
76 | @@ -XXX,XX +XXX,XX @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg, | ||
77 | dma_addr_t ipa, IOMMUAccessFlags perm, | ||
78 | SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info) | ||
79 | { | ||
80 | - const int stage = 2; | ||
81 | + const SMMUStage stage = SMMU_STAGE_2; | ||
82 | int granule_sz = cfg->s2cfg.granule_sz; | ||
83 | /* ARM DDI0487I.a: Table D8-7. */ | ||
84 | int inputsize = 64 - cfg->s2cfg.tsz; | ||
85 | @@ -XXX,XX +XXX,XX @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg, | ||
86 | error_ipa: | ||
87 | info->addr = ipa; | ||
88 | error: | ||
89 | - info->stage = 2; | ||
90 | + info->stage = SMMU_STAGE_2; | ||
91 | tlbe->entry.perm = IOMMU_NONE; | ||
92 | return -EINVAL; | ||
93 | } | ||
94 | @@ -XXX,XX +XXX,XX @@ error: | ||
95 | int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm, | ||
96 | SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info) | ||
97 | { | ||
98 | - if (cfg->stage == 1) { | ||
99 | + if (cfg->stage == SMMU_STAGE_1) { | ||
100 | return smmu_ptw_64_s1(cfg, iova, perm, tlbe, info); | ||
101 | - } else if (cfg->stage == 2) { | ||
102 | + } else if (cfg->stage == SMMU_STAGE_2) { | ||
103 | /* | ||
104 | * If bypassing stage 1(or unimplemented), the input address is passed | ||
105 | * directly to stage 2 as IPA. If the input address of a transaction | ||
106 | @@ -XXX,XX +XXX,XX @@ int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm, | ||
107 | */ | ||
108 | if (iova >= (1ULL << cfg->oas)) { | ||
109 | info->type = SMMU_PTW_ERR_ADDR_SIZE; | ||
110 | - info->stage = 1; | ||
111 | + info->stage = SMMU_STAGE_1; | ||
112 | tlbe->entry.perm = IOMMU_NONE; | ||
113 | return -EINVAL; | ||
114 | } | ||
115 | diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c | ||
116 | index XXXXXXX..XXXXXXX 100644 | ||
117 | --- a/hw/arm/smmuv3.c | ||
118 | +++ b/hw/arm/smmuv3.c | ||
119 | @@ -XXX,XX +XXX,XX @@ | ||
120 | #include "smmuv3-internal.h" | ||
121 | #include "smmu-internal.h" | ||
122 | |||
123 | -#define PTW_RECORD_FAULT(cfg) (((cfg)->stage == 1) ? (cfg)->record_faults : \ | ||
124 | +#define PTW_RECORD_FAULT(cfg) (((cfg)->stage == SMMU_STAGE_1) ? \ | ||
125 | + (cfg)->record_faults : \ | ||
126 | (cfg)->s2cfg.record_faults) | ||
127 | |||
128 | /** | ||
129 | @@ -XXX,XX +XXX,XX @@ static bool s2_pgtable_config_valid(uint8_t sl0, uint8_t t0sz, uint8_t gran) | ||
130 | |||
131 | static int decode_ste_s2_cfg(SMMUTransCfg *cfg, STE *ste) | ||
132 | { | ||
133 | - cfg->stage = 2; | ||
134 | + cfg->stage = SMMU_STAGE_2; | ||
135 | |||
136 | if (STE_S2AA64(ste) == 0x0) { | ||
137 | qemu_log_mask(LOG_UNIMP, | ||
138 | @@ -XXX,XX +XXX,XX @@ static int decode_cd(SMMUTransCfg *cfg, CD *cd, SMMUEventInfo *event) | ||
139 | |||
140 | /* we support only those at the moment */ | ||
141 | cfg->aa64 = true; | ||
142 | - cfg->stage = 1; | ||
143 | + cfg->stage = SMMU_STAGE_1; | ||
144 | |||
145 | cfg->oas = oas2bits(CD_IPS(cd)); | ||
146 | cfg->oas = MIN(oas2bits(SMMU_IDR5_OAS), cfg->oas); | ||
147 | @@ -XXX,XX +XXX,XX @@ static int smmuv3_decode_config(IOMMUMemoryRegion *mr, SMMUTransCfg *cfg, | ||
148 | return ret; | ||
149 | } | ||
150 | |||
151 | - if (cfg->aborted || cfg->bypassed || (cfg->stage == 2)) { | ||
152 | + if (cfg->aborted || cfg->bypassed || (cfg->stage == SMMU_STAGE_2)) { | ||
153 | return 0; | ||
154 | } | ||
155 | |||
156 | @@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, | ||
157 | goto epilogue; | ||
158 | } | ||
159 | |||
160 | - if (cfg->stage == 1) { | ||
161 | + if (cfg->stage == SMMU_STAGE_1) { | ||
162 | /* Select stage1 translation table. */ | ||
163 | tt = select_tt(cfg, addr); | ||
164 | if (!tt) { | ||
165 | @@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, | ||
166 | * nesting is not supported. So it is sufficient to check the | ||
167 | * translation stage to know the TLB stage for now. | ||
168 | */ | ||
169 | - event.u.f_walk_eabt.s2 = (cfg->stage == 2); | ||
170 | + event.u.f_walk_eabt.s2 = (cfg->stage == SMMU_STAGE_2); | ||
171 | if (PTW_RECORD_FAULT(cfg)) { | ||
172 | event.type = SMMU_EVT_F_PERMISSION; | ||
173 | event.u.f_permission.addr = addr; | ||
174 | @@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, | ||
175 | |||
176 | if (smmu_ptw(cfg, aligned_addr, flag, cached_entry, &ptw_info)) { | ||
177 | /* All faults from PTW has S2 field. */ | ||
178 | - event.u.f_walk_eabt.s2 = (ptw_info.stage == 2); | ||
179 | + event.u.f_walk_eabt.s2 = (ptw_info.stage == SMMU_STAGE_2); | ||
180 | g_free(cached_entry); | ||
181 | switch (ptw_info.type) { | ||
182 | case SMMU_PTW_ERR_WALK_EABT: | ||
183 | @@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, | ||
184 | event.u.f_walk_eabt.addr = addr; | ||
185 | event.u.f_walk_eabt.rnw = flag & 0x1; | ||
186 | /* Stage-2 (only) is class IN while stage-1 is class TT */ | ||
187 | - event.u.f_walk_eabt.class = (ptw_info.stage == 2) ? | ||
188 | + event.u.f_walk_eabt.class = (ptw_info.stage == SMMU_STAGE_2) ? | ||
189 | SMMU_CLASS_IN : SMMU_CLASS_TT; | ||
190 | event.u.f_walk_eabt.addr2 = ptw_info.addr; | ||
191 | break; | ||
192 | -- | ||
193 | 2.34.1 | ||
194 | |||
195 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Mostafa Saleh <smostafa@google.com> | ||
2 | 1 | ||
3 | smmuv3_translate() does everything from STE/CD parsing to TLB lookup | ||
4 | and PTW. | ||
5 | |||
6 | Soon, when nesting is supported, stage-1 data (tt, CD) needs to be | ||
7 | translated using stage-2. | ||
8 | |||
9 | Split smmuv3_translate() to 3 functions: | ||
10 | |||
11 | - smmu_translate(): in smmu-common.c, which does the TLB lookup, PTW, | ||
12 | TLB insertion, all the functions are already there, this just puts | ||
13 | them together. | ||
14 | This also simplifies the code as it consolidates event generation | ||
15 | in case of TLB lookup permission failure or in TT selection. | ||
16 | |||
17 | - smmuv3_do_translate(): in smmuv3.c, Calls smmu_translate() and does | ||
18 | the event population in case of errors. | ||
19 | |||
20 | - smmuv3_translate(), now calls smmuv3_do_translate() for | ||
21 | translation while the rest is the same. | ||
22 | |||
23 | Also, add stage in trace_smmuv3_translate_success() | ||
24 | |||
25 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
26 | Signed-off-by: Mostafa Saleh <smostafa@google.com> | ||
27 | Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org> | ||
28 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
29 | Message-id: 20240715084519.1189624-6-smostafa@google.com | ||
30 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
31 | --- | ||
32 | include/hw/arm/smmu-common.h | 8 ++ | ||
33 | hw/arm/smmu-common.c | 59 +++++++++++ | ||
34 | hw/arm/smmuv3.c | 194 +++++++++++++---------------------- | ||
35 | hw/arm/trace-events | 2 +- | ||
36 | 4 files changed, 142 insertions(+), 121 deletions(-) | ||
37 | |||
38 | diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h | ||
39 | index XXXXXXX..XXXXXXX 100644 | ||
40 | --- a/include/hw/arm/smmu-common.h | ||
41 | +++ b/include/hw/arm/smmu-common.h | ||
42 | @@ -XXX,XX +XXX,XX @@ static inline uint16_t smmu_get_sid(SMMUDevice *sdev) | ||
43 | int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm, | ||
44 | SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info); | ||
45 | |||
46 | + | ||
47 | +/* | ||
48 | + * smmu_translate - Look for a translation in TLB, if not, do a PTW. | ||
49 | + * Returns NULL on PTW error or incase of TLB permission errors. | ||
50 | + */ | ||
51 | +SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t addr, | ||
52 | + IOMMUAccessFlags flag, SMMUPTWEventInfo *info); | ||
53 | + | ||
54 | /** | ||
55 | * select_tt - compute which translation table shall be used according to | ||
56 | * the input iova and translation config and return the TT specific info | ||
57 | diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c | ||
58 | index XXXXXXX..XXXXXXX 100644 | ||
59 | --- a/hw/arm/smmu-common.c | ||
60 | +++ b/hw/arm/smmu-common.c | ||
61 | @@ -XXX,XX +XXX,XX @@ int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm, | ||
62 | g_assert_not_reached(); | ||
63 | } | ||
64 | |||
65 | +SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t addr, | ||
66 | + IOMMUAccessFlags flag, SMMUPTWEventInfo *info) | ||
67 | +{ | ||
68 | + uint64_t page_mask, aligned_addr; | ||
69 | + SMMUTLBEntry *cached_entry = NULL; | ||
70 | + SMMUTransTableInfo *tt; | ||
71 | + int status; | ||
72 | + | ||
73 | + /* | ||
74 | + * Combined attributes used for TLB lookup, as only one stage is supported, | ||
75 | + * it will hold attributes based on the enabled stage. | ||
76 | + */ | ||
77 | + SMMUTransTableInfo tt_combined; | ||
78 | + | ||
79 | + if (cfg->stage == SMMU_STAGE_1) { | ||
80 | + /* Select stage1 translation table. */ | ||
81 | + tt = select_tt(cfg, addr); | ||
82 | + if (!tt) { | ||
83 | + info->type = SMMU_PTW_ERR_TRANSLATION; | ||
84 | + info->stage = SMMU_STAGE_1; | ||
85 | + return NULL; | ||
86 | + } | ||
87 | + tt_combined.granule_sz = tt->granule_sz; | ||
88 | + tt_combined.tsz = tt->tsz; | ||
89 | + | ||
90 | + } else { | ||
91 | + /* Stage2. */ | ||
92 | + tt_combined.granule_sz = cfg->s2cfg.granule_sz; | ||
93 | + tt_combined.tsz = cfg->s2cfg.tsz; | ||
94 | + } | ||
95 | + | ||
96 | + /* | ||
97 | + * TLB lookup looks for granule and input size for a translation stage, | ||
98 | + * as only one stage is supported right now, choose the right values | ||
99 | + * from the configuration. | ||
100 | + */ | ||
101 | + page_mask = (1ULL << tt_combined.granule_sz) - 1; | ||
102 | + aligned_addr = addr & ~page_mask; | ||
103 | + | ||
104 | + cached_entry = smmu_iotlb_lookup(bs, cfg, &tt_combined, aligned_addr); | ||
105 | + if (cached_entry) { | ||
106 | + if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & IOMMU_WO)) { | ||
107 | + info->type = SMMU_PTW_ERR_PERMISSION; | ||
108 | + info->stage = cfg->stage; | ||
109 | + return NULL; | ||
110 | + } | ||
111 | + return cached_entry; | ||
112 | + } | ||
113 | + | ||
114 | + cached_entry = g_new0(SMMUTLBEntry, 1); | ||
115 | + status = smmu_ptw(cfg, aligned_addr, flag, cached_entry, info); | ||
116 | + if (status) { | ||
117 | + g_free(cached_entry); | ||
118 | + return NULL; | ||
119 | + } | ||
120 | + smmu_iotlb_insert(bs, cfg, cached_entry); | ||
121 | + return cached_entry; | ||
122 | +} | ||
123 | + | ||
124 | /** | ||
125 | * The bus number is used for lookup when SID based invalidation occurs. | ||
126 | * In that case we lazily populate the SMMUPciBus array from the bus hash | ||
127 | diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c | ||
128 | index XXXXXXX..XXXXXXX 100644 | ||
129 | --- a/hw/arm/smmuv3.c | ||
130 | +++ b/hw/arm/smmuv3.c | ||
131 | @@ -XXX,XX +XXX,XX @@ static void smmuv3_flush_config(SMMUDevice *sdev) | ||
132 | g_hash_table_remove(bc->configs, sdev); | ||
133 | } | ||
134 | |||
135 | +/* Do translation with TLB lookup. */ | ||
136 | +static SMMUTranslationStatus smmuv3_do_translate(SMMUv3State *s, hwaddr addr, | ||
137 | + SMMUTransCfg *cfg, | ||
138 | + SMMUEventInfo *event, | ||
139 | + IOMMUAccessFlags flag, | ||
140 | + SMMUTLBEntry **out_entry) | ||
141 | +{ | ||
142 | + SMMUPTWEventInfo ptw_info = {}; | ||
143 | + SMMUState *bs = ARM_SMMU(s); | ||
144 | + SMMUTLBEntry *cached_entry = NULL; | ||
145 | + | ||
146 | + cached_entry = smmu_translate(bs, cfg, addr, flag, &ptw_info); | ||
147 | + if (!cached_entry) { | ||
148 | + /* All faults from PTW has S2 field. */ | ||
149 | + event->u.f_walk_eabt.s2 = (ptw_info.stage == SMMU_STAGE_2); | ||
150 | + switch (ptw_info.type) { | ||
151 | + case SMMU_PTW_ERR_WALK_EABT: | ||
152 | + event->type = SMMU_EVT_F_WALK_EABT; | ||
153 | + event->u.f_walk_eabt.addr = addr; | ||
154 | + event->u.f_walk_eabt.rnw = flag & 0x1; | ||
155 | + event->u.f_walk_eabt.class = (ptw_info.stage == SMMU_STAGE_2) ? | ||
156 | + SMMU_CLASS_IN : SMMU_CLASS_TT; | ||
157 | + event->u.f_walk_eabt.addr2 = ptw_info.addr; | ||
158 | + break; | ||
159 | + case SMMU_PTW_ERR_TRANSLATION: | ||
160 | + if (PTW_RECORD_FAULT(cfg)) { | ||
161 | + event->type = SMMU_EVT_F_TRANSLATION; | ||
162 | + event->u.f_translation.addr = addr; | ||
163 | + event->u.f_translation.addr2 = ptw_info.addr; | ||
164 | + event->u.f_translation.class = SMMU_CLASS_IN; | ||
165 | + event->u.f_translation.rnw = flag & 0x1; | ||
166 | + } | ||
167 | + break; | ||
168 | + case SMMU_PTW_ERR_ADDR_SIZE: | ||
169 | + if (PTW_RECORD_FAULT(cfg)) { | ||
170 | + event->type = SMMU_EVT_F_ADDR_SIZE; | ||
171 | + event->u.f_addr_size.addr = addr; | ||
172 | + event->u.f_addr_size.addr2 = ptw_info.addr; | ||
173 | + event->u.f_addr_size.class = SMMU_CLASS_IN; | ||
174 | + event->u.f_addr_size.rnw = flag & 0x1; | ||
175 | + } | ||
176 | + break; | ||
177 | + case SMMU_PTW_ERR_ACCESS: | ||
178 | + if (PTW_RECORD_FAULT(cfg)) { | ||
179 | + event->type = SMMU_EVT_F_ACCESS; | ||
180 | + event->u.f_access.addr = addr; | ||
181 | + event->u.f_access.addr2 = ptw_info.addr; | ||
182 | + event->u.f_access.class = SMMU_CLASS_IN; | ||
183 | + event->u.f_access.rnw = flag & 0x1; | ||
184 | + } | ||
185 | + break; | ||
186 | + case SMMU_PTW_ERR_PERMISSION: | ||
187 | + if (PTW_RECORD_FAULT(cfg)) { | ||
188 | + event->type = SMMU_EVT_F_PERMISSION; | ||
189 | + event->u.f_permission.addr = addr; | ||
190 | + event->u.f_permission.addr2 = ptw_info.addr; | ||
191 | + event->u.f_permission.class = SMMU_CLASS_IN; | ||
192 | + event->u.f_permission.rnw = flag & 0x1; | ||
193 | + } | ||
194 | + break; | ||
195 | + default: | ||
196 | + g_assert_not_reached(); | ||
197 | + } | ||
198 | + return SMMU_TRANS_ERROR; | ||
199 | + } | ||
200 | + *out_entry = cached_entry; | ||
201 | + return SMMU_TRANS_SUCCESS; | ||
202 | +} | ||
203 | + | ||
204 | +/* Entry point to SMMU, does everything. */ | ||
205 | static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, | ||
206 | IOMMUAccessFlags flag, int iommu_idx) | ||
207 | { | ||
208 | @@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, | ||
209 | SMMUEventInfo event = {.type = SMMU_EVT_NONE, | ||
210 | .sid = sid, | ||
211 | .inval_ste_allowed = false}; | ||
212 | - SMMUPTWEventInfo ptw_info = {}; | ||
213 | SMMUTranslationStatus status; | ||
214 | - SMMUState *bs = ARM_SMMU(s); | ||
215 | - uint64_t page_mask, aligned_addr; | ||
216 | - SMMUTLBEntry *cached_entry = NULL; | ||
217 | - SMMUTransTableInfo *tt; | ||
218 | SMMUTransCfg *cfg = NULL; | ||
219 | IOMMUTLBEntry entry = { | ||
220 | .target_as = &address_space_memory, | ||
221 | @@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, | ||
222 | .addr_mask = ~(hwaddr)0, | ||
223 | .perm = IOMMU_NONE, | ||
224 | }; | ||
225 | - /* | ||
226 | - * Combined attributes used for TLB lookup, as only one stage is supported, | ||
227 | - * it will hold attributes based on the enabled stage. | ||
228 | - */ | ||
229 | - SMMUTransTableInfo tt_combined; | ||
230 | + SMMUTLBEntry *cached_entry = NULL; | ||
231 | |||
232 | qemu_mutex_lock(&s->mutex); | ||
233 | |||
234 | @@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, | ||
235 | goto epilogue; | ||
236 | } | ||
237 | |||
238 | - if (cfg->stage == SMMU_STAGE_1) { | ||
239 | - /* Select stage1 translation table. */ | ||
240 | - tt = select_tt(cfg, addr); | ||
241 | - if (!tt) { | ||
242 | - if (cfg->record_faults) { | ||
243 | - event.type = SMMU_EVT_F_TRANSLATION; | ||
244 | - event.u.f_translation.addr = addr; | ||
245 | - event.u.f_translation.rnw = flag & 0x1; | ||
246 | - } | ||
247 | - status = SMMU_TRANS_ERROR; | ||
248 | - goto epilogue; | ||
249 | - } | ||
250 | - tt_combined.granule_sz = tt->granule_sz; | ||
251 | - tt_combined.tsz = tt->tsz; | ||
252 | - | ||
253 | - } else { | ||
254 | - /* Stage2. */ | ||
255 | - tt_combined.granule_sz = cfg->s2cfg.granule_sz; | ||
256 | - tt_combined.tsz = cfg->s2cfg.tsz; | ||
257 | - } | ||
258 | - /* | ||
259 | - * TLB lookup looks for granule and input size for a translation stage, | ||
260 | - * as only one stage is supported right now, choose the right values | ||
261 | - * from the configuration. | ||
262 | - */ | ||
263 | - page_mask = (1ULL << tt_combined.granule_sz) - 1; | ||
264 | - aligned_addr = addr & ~page_mask; | ||
265 | - | ||
266 | - cached_entry = smmu_iotlb_lookup(bs, cfg, &tt_combined, aligned_addr); | ||
267 | - if (cached_entry) { | ||
268 | - if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & IOMMU_WO)) { | ||
269 | - status = SMMU_TRANS_ERROR; | ||
270 | - /* | ||
271 | - * We know that the TLB only contains either stage-1 or stage-2 as | ||
272 | - * nesting is not supported. So it is sufficient to check the | ||
273 | - * translation stage to know the TLB stage for now. | ||
274 | - */ | ||
275 | - event.u.f_walk_eabt.s2 = (cfg->stage == SMMU_STAGE_2); | ||
276 | - if (PTW_RECORD_FAULT(cfg)) { | ||
277 | - event.type = SMMU_EVT_F_PERMISSION; | ||
278 | - event.u.f_permission.addr = addr; | ||
279 | - event.u.f_permission.rnw = flag & 0x1; | ||
280 | - } | ||
281 | - } else { | ||
282 | - status = SMMU_TRANS_SUCCESS; | ||
283 | - } | ||
284 | - goto epilogue; | ||
285 | - } | ||
286 | - | ||
287 | - cached_entry = g_new0(SMMUTLBEntry, 1); | ||
288 | - | ||
289 | - if (smmu_ptw(cfg, aligned_addr, flag, cached_entry, &ptw_info)) { | ||
290 | - /* All faults from PTW has S2 field. */ | ||
291 | - event.u.f_walk_eabt.s2 = (ptw_info.stage == SMMU_STAGE_2); | ||
292 | - g_free(cached_entry); | ||
293 | - switch (ptw_info.type) { | ||
294 | - case SMMU_PTW_ERR_WALK_EABT: | ||
295 | - event.type = SMMU_EVT_F_WALK_EABT; | ||
296 | - event.u.f_walk_eabt.addr = addr; | ||
297 | - event.u.f_walk_eabt.rnw = flag & 0x1; | ||
298 | - /* Stage-2 (only) is class IN while stage-1 is class TT */ | ||
299 | - event.u.f_walk_eabt.class = (ptw_info.stage == SMMU_STAGE_2) ? | ||
300 | - SMMU_CLASS_IN : SMMU_CLASS_TT; | ||
301 | - event.u.f_walk_eabt.addr2 = ptw_info.addr; | ||
302 | - break; | ||
303 | - case SMMU_PTW_ERR_TRANSLATION: | ||
304 | - if (PTW_RECORD_FAULT(cfg)) { | ||
305 | - event.type = SMMU_EVT_F_TRANSLATION; | ||
306 | - event.u.f_translation.addr = addr; | ||
307 | - event.u.f_translation.addr2 = ptw_info.addr; | ||
308 | - event.u.f_translation.class = SMMU_CLASS_IN; | ||
309 | - event.u.f_translation.rnw = flag & 0x1; | ||
310 | - } | ||
311 | - break; | ||
312 | - case SMMU_PTW_ERR_ADDR_SIZE: | ||
313 | - if (PTW_RECORD_FAULT(cfg)) { | ||
314 | - event.type = SMMU_EVT_F_ADDR_SIZE; | ||
315 | - event.u.f_addr_size.addr = addr; | ||
316 | - event.u.f_addr_size.addr2 = ptw_info.addr; | ||
317 | - event.u.f_translation.class = SMMU_CLASS_IN; | ||
318 | - event.u.f_addr_size.rnw = flag & 0x1; | ||
319 | - } | ||
320 | - break; | ||
321 | - case SMMU_PTW_ERR_ACCESS: | ||
322 | - if (PTW_RECORD_FAULT(cfg)) { | ||
323 | - event.type = SMMU_EVT_F_ACCESS; | ||
324 | - event.u.f_access.addr = addr; | ||
325 | - event.u.f_access.addr2 = ptw_info.addr; | ||
326 | - event.u.f_translation.class = SMMU_CLASS_IN; | ||
327 | - event.u.f_access.rnw = flag & 0x1; | ||
328 | - } | ||
329 | - break; | ||
330 | - case SMMU_PTW_ERR_PERMISSION: | ||
331 | - if (PTW_RECORD_FAULT(cfg)) { | ||
332 | - event.type = SMMU_EVT_F_PERMISSION; | ||
333 | - event.u.f_permission.addr = addr; | ||
334 | - event.u.f_permission.addr2 = ptw_info.addr; | ||
335 | - event.u.f_translation.class = SMMU_CLASS_IN; | ||
336 | - event.u.f_permission.rnw = flag & 0x1; | ||
337 | - } | ||
338 | - break; | ||
339 | - default: | ||
340 | - g_assert_not_reached(); | ||
341 | - } | ||
342 | - status = SMMU_TRANS_ERROR; | ||
343 | - } else { | ||
344 | - smmu_iotlb_insert(bs, cfg, cached_entry); | ||
345 | - status = SMMU_TRANS_SUCCESS; | ||
346 | - } | ||
347 | + status = smmuv3_do_translate(s, addr, cfg, &event, flag, &cached_entry); | ||
348 | |||
349 | epilogue: | ||
350 | qemu_mutex_unlock(&s->mutex); | ||
351 | @@ -XXX,XX +XXX,XX @@ epilogue: | ||
352 | (addr & cached_entry->entry.addr_mask); | ||
353 | entry.addr_mask = cached_entry->entry.addr_mask; | ||
354 | trace_smmuv3_translate_success(mr->parent_obj.name, sid, addr, | ||
355 | - entry.translated_addr, entry.perm); | ||
356 | + entry.translated_addr, entry.perm, | ||
357 | + cfg->stage); | ||
358 | break; | ||
359 | case SMMU_TRANS_DISABLE: | ||
360 | entry.perm = flag; | ||
361 | diff --git a/hw/arm/trace-events b/hw/arm/trace-events | ||
362 | index XXXXXXX..XXXXXXX 100644 | ||
363 | --- a/hw/arm/trace-events | ||
364 | +++ b/hw/arm/trace-events | ||
365 | @@ -XXX,XX +XXX,XX @@ smmuv3_get_ste(uint64_t addr) "STE addr: 0x%"PRIx64 | ||
366 | smmuv3_translate_disable(const char *n, uint16_t sid, uint64_t addr, bool is_write) "%s sid=0x%x bypass (smmu disabled) iova:0x%"PRIx64" is_write=%d" | ||
367 | smmuv3_translate_bypass(const char *n, uint16_t sid, uint64_t addr, bool is_write) "%s sid=0x%x STE bypass iova:0x%"PRIx64" is_write=%d" | ||
368 | smmuv3_translate_abort(const char *n, uint16_t sid, uint64_t addr, bool is_write) "%s sid=0x%x abort on iova:0x%"PRIx64" is_write=%d" | ||
369 | -smmuv3_translate_success(const char *n, uint16_t sid, uint64_t iova, uint64_t translated, int perm) "%s sid=0x%x iova=0x%"PRIx64" translated=0x%"PRIx64" perm=0x%x" | ||
370 | +smmuv3_translate_success(const char *n, uint16_t sid, uint64_t iova, uint64_t translated, int perm, int stage) "%s sid=0x%x iova=0x%"PRIx64" translated=0x%"PRIx64" perm=0x%x stage=%d" | ||
371 | smmuv3_get_cd(uint64_t addr) "CD addr: 0x%"PRIx64 | ||
372 | smmuv3_decode_cd(uint32_t oas) "oas=%d" | ||
373 | smmuv3_decode_cd_tt(int i, uint32_t tsz, uint64_t ttb, uint32_t granule_sz, bool had) "TT[%d]:tsz:%d ttb:0x%"PRIx64" granule_sz:%d had:%d" | ||
374 | -- | ||
375 | 2.34.1 | ||
376 | |||
377 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Mostafa Saleh <smostafa@google.com> | ||
2 | 1 | ||
3 | ASID and VMID used to be uint16_t in the translation config, however, | ||
4 | in other contexts they can be int as -1 in case of TLB invalidation, | ||
5 | to represent all (don’t care). | ||
6 | When stage-2 was added asid was set to -1 in stage-2 and vmid to -1 | ||
7 | in stage-1 configs. However, that meant they were set as (65536), | ||
8 | this was not an issue as nesting was not supported and no | ||
9 | commands/lookup uses both. | ||
10 | |||
11 | With nesting, it’s critical to get this right as translation must be | ||
12 | tagged correctly with ASID/VMID, and with ASID=-1 meaning stage-2. | ||
13 | Represent ASID/VMID everywhere as int. | ||
14 | |||
15 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
16 | Signed-off-by: Mostafa Saleh <smostafa@google.com> | ||
17 | Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org> | ||
18 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
19 | Message-id: 20240715084519.1189624-7-smostafa@google.com | ||
20 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
21 | --- | ||
22 | include/hw/arm/smmu-common.h | 14 +++++++------- | ||
23 | hw/arm/smmu-common.c | 10 +++++----- | ||
24 | hw/arm/smmuv3.c | 4 ++-- | ||
25 | hw/arm/trace-events | 18 +++++++++--------- | ||
26 | 4 files changed, 23 insertions(+), 23 deletions(-) | ||
27 | |||
28 | diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/include/hw/arm/smmu-common.h | ||
31 | +++ b/include/hw/arm/smmu-common.h | ||
32 | @@ -XXX,XX +XXX,XX @@ typedef struct SMMUS2Cfg { | ||
33 | bool record_faults; /* Record fault events (S2R) */ | ||
34 | uint8_t granule_sz; /* Granule page shift (based on S2TG) */ | ||
35 | uint8_t eff_ps; /* Effective PA output range (based on S2PS) */ | ||
36 | - uint16_t vmid; /* Virtual Machine ID (S2VMID) */ | ||
37 | + int vmid; /* Virtual Machine ID (S2VMID) */ | ||
38 | uint64_t vttb; /* Address of translation table base (S2TTB) */ | ||
39 | } SMMUS2Cfg; | ||
40 | |||
41 | @@ -XXX,XX +XXX,XX @@ typedef struct SMMUTransCfg { | ||
42 | uint64_t ttb; /* TT base address */ | ||
43 | uint8_t oas; /* output address width */ | ||
44 | uint8_t tbi; /* Top Byte Ignore */ | ||
45 | - uint16_t asid; | ||
46 | + int asid; | ||
47 | SMMUTransTableInfo tt[2]; | ||
48 | /* Used by stage-2 only. */ | ||
49 | struct SMMUS2Cfg s2cfg; | ||
50 | @@ -XXX,XX +XXX,XX @@ typedef struct SMMUPciBus { | ||
51 | |||
52 | typedef struct SMMUIOTLBKey { | ||
53 | uint64_t iova; | ||
54 | - uint16_t asid; | ||
55 | - uint16_t vmid; | ||
56 | + int asid; | ||
57 | + int vmid; | ||
58 | uint8_t tg; | ||
59 | uint8_t level; | ||
60 | } SMMUIOTLBKey; | ||
61 | @@ -XXX,XX +XXX,XX @@ SMMUDevice *smmu_find_sdev(SMMUState *s, uint32_t sid); | ||
62 | SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, | ||
63 | SMMUTransTableInfo *tt, hwaddr iova); | ||
64 | void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, SMMUTLBEntry *entry); | ||
65 | -SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint16_t vmid, uint64_t iova, | ||
66 | +SMMUIOTLBKey smmu_get_iotlb_key(int asid, int vmid, uint64_t iova, | ||
67 | uint8_t tg, uint8_t level); | ||
68 | void smmu_iotlb_inv_all(SMMUState *s); | ||
69 | -void smmu_iotlb_inv_asid(SMMUState *s, uint16_t asid); | ||
70 | -void smmu_iotlb_inv_vmid(SMMUState *s, uint16_t vmid); | ||
71 | +void smmu_iotlb_inv_asid(SMMUState *s, int asid); | ||
72 | +void smmu_iotlb_inv_vmid(SMMUState *s, int vmid); | ||
73 | void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, dma_addr_t iova, | ||
74 | uint8_t tg, uint64_t num_pages, uint8_t ttl); | ||
75 | |||
76 | diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c | ||
77 | index XXXXXXX..XXXXXXX 100644 | ||
78 | --- a/hw/arm/smmu-common.c | ||
79 | +++ b/hw/arm/smmu-common.c | ||
80 | @@ -XXX,XX +XXX,XX @@ static gboolean smmu_iotlb_key_equal(gconstpointer v1, gconstpointer v2) | ||
81 | (k1->vmid == k2->vmid); | ||
82 | } | ||
83 | |||
84 | -SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint16_t vmid, uint64_t iova, | ||
85 | +SMMUIOTLBKey smmu_get_iotlb_key(int asid, int vmid, uint64_t iova, | ||
86 | uint8_t tg, uint8_t level) | ||
87 | { | ||
88 | SMMUIOTLBKey key = {.asid = asid, .vmid = vmid, .iova = iova, | ||
89 | @@ -XXX,XX +XXX,XX @@ void smmu_iotlb_inv_all(SMMUState *s) | ||
90 | static gboolean smmu_hash_remove_by_asid(gpointer key, gpointer value, | ||
91 | gpointer user_data) | ||
92 | { | ||
93 | - uint16_t asid = *(uint16_t *)user_data; | ||
94 | + int asid = *(int *)user_data; | ||
95 | SMMUIOTLBKey *iotlb_key = (SMMUIOTLBKey *)key; | ||
96 | |||
97 | return SMMU_IOTLB_ASID(*iotlb_key) == asid; | ||
98 | @@ -XXX,XX +XXX,XX @@ static gboolean smmu_hash_remove_by_asid(gpointer key, gpointer value, | ||
99 | static gboolean smmu_hash_remove_by_vmid(gpointer key, gpointer value, | ||
100 | gpointer user_data) | ||
101 | { | ||
102 | - uint16_t vmid = *(uint16_t *)user_data; | ||
103 | + int vmid = *(int *)user_data; | ||
104 | SMMUIOTLBKey *iotlb_key = (SMMUIOTLBKey *)key; | ||
105 | |||
106 | return SMMU_IOTLB_VMID(*iotlb_key) == vmid; | ||
107 | @@ -XXX,XX +XXX,XX @@ void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, dma_addr_t iova, | ||
108 | &info); | ||
109 | } | ||
110 | |||
111 | -void smmu_iotlb_inv_asid(SMMUState *s, uint16_t asid) | ||
112 | +void smmu_iotlb_inv_asid(SMMUState *s, int asid) | ||
113 | { | ||
114 | trace_smmu_iotlb_inv_asid(asid); | ||
115 | g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_asid, &asid); | ||
116 | } | ||
117 | |||
118 | -void smmu_iotlb_inv_vmid(SMMUState *s, uint16_t vmid) | ||
119 | +void smmu_iotlb_inv_vmid(SMMUState *s, int vmid) | ||
120 | { | ||
121 | trace_smmu_iotlb_inv_vmid(vmid); | ||
122 | g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_vmid, &vmid); | ||
123 | diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c | ||
124 | index XXXXXXX..XXXXXXX 100644 | ||
125 | --- a/hw/arm/smmuv3.c | ||
126 | +++ b/hw/arm/smmuv3.c | ||
127 | @@ -XXX,XX +XXX,XX @@ static int smmuv3_cmdq_consume(SMMUv3State *s) | ||
128 | } | ||
129 | case SMMU_CMD_TLBI_NH_ASID: | ||
130 | { | ||
131 | - uint16_t asid = CMD_ASID(&cmd); | ||
132 | + int asid = CMD_ASID(&cmd); | ||
133 | |||
134 | if (!STAGE1_SUPPORTED(s)) { | ||
135 | cmd_error = SMMU_CERROR_ILL; | ||
136 | @@ -XXX,XX +XXX,XX @@ static int smmuv3_cmdq_consume(SMMUv3State *s) | ||
137 | break; | ||
138 | case SMMU_CMD_TLBI_S12_VMALL: | ||
139 | { | ||
140 | - uint16_t vmid = CMD_VMID(&cmd); | ||
141 | + int vmid = CMD_VMID(&cmd); | ||
142 | |||
143 | if (!STAGE2_SUPPORTED(s)) { | ||
144 | cmd_error = SMMU_CERROR_ILL; | ||
145 | diff --git a/hw/arm/trace-events b/hw/arm/trace-events | ||
146 | index XXXXXXX..XXXXXXX 100644 | ||
147 | --- a/hw/arm/trace-events | ||
148 | +++ b/hw/arm/trace-events | ||
149 | @@ -XXX,XX +XXX,XX @@ smmu_ptw_page_pte(int stage, int level, uint64_t iova, uint64_t baseaddr, uint6 | ||
150 | smmu_ptw_block_pte(int stage, int level, uint64_t baseaddr, uint64_t pteaddr, uint64_t pte, uint64_t iova, uint64_t gpa, int bsize_mb) "stage=%d level=%d base@=0x%"PRIx64" pte@=0x%"PRIx64" pte=0x%"PRIx64" iova=0x%"PRIx64" block address = 0x%"PRIx64" block size = %d MiB" | ||
151 | smmu_get_pte(uint64_t baseaddr, int index, uint64_t pteaddr, uint64_t pte) "baseaddr=0x%"PRIx64" index=0x%x, pteaddr=0x%"PRIx64", pte=0x%"PRIx64 | ||
152 | smmu_iotlb_inv_all(void) "IOTLB invalidate all" | ||
153 | -smmu_iotlb_inv_asid(uint16_t asid) "IOTLB invalidate asid=%d" | ||
154 | -smmu_iotlb_inv_vmid(uint16_t vmid) "IOTLB invalidate vmid=%d" | ||
155 | -smmu_iotlb_inv_iova(uint16_t asid, uint64_t addr) "IOTLB invalidate asid=%d addr=0x%"PRIx64 | ||
156 | +smmu_iotlb_inv_asid(int asid) "IOTLB invalidate asid=%d" | ||
157 | +smmu_iotlb_inv_vmid(int vmid) "IOTLB invalidate vmid=%d" | ||
158 | +smmu_iotlb_inv_iova(int asid, uint64_t addr) "IOTLB invalidate asid=%d addr=0x%"PRIx64 | ||
159 | smmu_inv_notifiers_mr(const char *name) "iommu mr=%s" | ||
160 | -smmu_iotlb_lookup_hit(uint16_t asid, uint16_t vmid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache HIT asid=%d vmid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d" | ||
161 | -smmu_iotlb_lookup_miss(uint16_t asid, uint16_t vmid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache MISS asid=%d vmid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d" | ||
162 | -smmu_iotlb_insert(uint16_t asid, uint16_t vmid, uint64_t addr, uint8_t tg, uint8_t level) "IOTLB ++ asid=%d vmid=%d addr=0x%"PRIx64" tg=%d level=%d" | ||
163 | +smmu_iotlb_lookup_hit(int asid, int vmid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache HIT asid=%d vmid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d" | ||
164 | +smmu_iotlb_lookup_miss(int asid, int vmid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache MISS asid=%d vmid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d" | ||
165 | +smmu_iotlb_insert(int asid, int vmid, uint64_t addr, uint8_t tg, uint8_t level) "IOTLB ++ asid=%d vmid=%d addr=0x%"PRIx64" tg=%d level=%d" | ||
166 | |||
167 | # smmuv3.c | ||
168 | smmuv3_read_mmio(uint64_t addr, uint64_t val, unsigned size, uint32_t r) "addr: 0x%"PRIx64" val:0x%"PRIx64" size: 0x%x(%d)" | ||
169 | @@ -XXX,XX +XXX,XX @@ smmuv3_config_cache_hit(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t p | ||
170 | smmuv3_config_cache_miss(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t perc) "Config cache MISS for sid=0x%x (hits=%d, misses=%d, hit rate=%d)" | ||
171 | smmuv3_range_inval(int vmid, int asid, uint64_t addr, uint8_t tg, uint64_t num_pages, uint8_t ttl, bool leaf) "vmid=%d asid=%d addr=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64" ttl=%d leaf=%d" | ||
172 | smmuv3_cmdq_tlbi_nh(void) "" | ||
173 | -smmuv3_cmdq_tlbi_nh_asid(uint16_t asid) "asid=%d" | ||
174 | -smmuv3_cmdq_tlbi_s12_vmid(uint16_t vmid) "vmid=%d" | ||
175 | +smmuv3_cmdq_tlbi_nh_asid(int asid) "asid=%d" | ||
176 | +smmuv3_cmdq_tlbi_s12_vmid(int vmid) "vmid=%d" | ||
177 | smmuv3_config_cache_inv(uint32_t sid) "Config cache INV for sid=0x%x" | ||
178 | smmuv3_notify_flag_add(const char *iommu) "ADD SMMUNotifier node for iommu mr=%s" | ||
179 | smmuv3_notify_flag_del(const char *iommu) "DEL SMMUNotifier node for iommu mr=%s" | ||
180 | -smmuv3_inv_notifiers_iova(const char *name, uint16_t asid, uint16_t vmid, uint64_t iova, uint8_t tg, uint64_t num_pages) "iommu mr=%s asid=%d vmid=%d iova=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64 | ||
181 | +smmuv3_inv_notifiers_iova(const char *name, int asid, int vmid, uint64_t iova, uint8_t tg, uint64_t num_pages) "iommu mr=%s asid=%d vmid=%d iova=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64 | ||
182 | |||
183 | # strongarm.c | ||
184 | strongarm_uart_update_parameters(const char *label, int speed, char parity, int data_bits, int stop_bits) "%s speed=%d parity=%c data=%d stop=%d" | ||
185 | -- | ||
186 | 2.34.1 | ||
187 | |||
188 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Mostafa Saleh <smostafa@google.com> | ||
2 | 1 | ||
3 | Soon, smmuv3_do_translate() will be used to translate the CD and the | ||
4 | TTBx, instead of re-writting the same logic to convert the returned | ||
5 | cached entry to an address, add a new macro CACHED_ENTRY_TO_ADDR. | ||
6 | |||
7 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
8 | Signed-off-by: Mostafa Saleh <smostafa@google.com> | ||
9 | Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org> | ||
10 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
11 | Message-id: 20240715084519.1189624-8-smostafa@google.com | ||
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
13 | --- | ||
14 | include/hw/arm/smmu-common.h | 3 +++ | ||
15 | hw/arm/smmuv3.c | 3 +-- | ||
16 | 2 files changed, 4 insertions(+), 2 deletions(-) | ||
17 | |||
18 | diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/include/hw/arm/smmu-common.h | ||
21 | +++ b/include/hw/arm/smmu-common.h | ||
22 | @@ -XXX,XX +XXX,XX @@ | ||
23 | #define VMSA_IDXMSK(isz, strd, lvl) ((1ULL << \ | ||
24 | VMSA_BIT_LVL(isz, strd, lvl)) - 1) | ||
25 | |||
26 | +#define CACHED_ENTRY_TO_ADDR(ent, addr) ((ent)->entry.translated_addr + \ | ||
27 | + ((addr) & (ent)->entry.addr_mask)) | ||
28 | + | ||
29 | /* | ||
30 | * Page table walk error types | ||
31 | */ | ||
32 | diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/hw/arm/smmuv3.c | ||
35 | +++ b/hw/arm/smmuv3.c | ||
36 | @@ -XXX,XX +XXX,XX @@ epilogue: | ||
37 | switch (status) { | ||
38 | case SMMU_TRANS_SUCCESS: | ||
39 | entry.perm = cached_entry->entry.perm; | ||
40 | - entry.translated_addr = cached_entry->entry.translated_addr + | ||
41 | - (addr & cached_entry->entry.addr_mask); | ||
42 | + entry.translated_addr = CACHED_ENTRY_TO_ADDR(cached_entry, addr); | ||
43 | entry.addr_mask = cached_entry->entry.addr_mask; | ||
44 | trace_smmuv3_translate_success(mr->parent_obj.name, sid, addr, | ||
45 | entry.translated_addr, entry.perm, | ||
46 | -- | ||
47 | 2.34.1 | ||
48 | |||
49 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Mostafa Saleh <smostafa@google.com> | ||
2 | 1 | ||
3 | According to ARM SMMU architecture specification (ARM IHI 0070 F.b), | ||
4 | In "5.2 Stream Table Entry": | ||
5 | [51:6] S1ContextPtr | ||
6 | If Config[1] == 1 (stage 2 enabled), this pointer is an IPA translated by | ||
7 | stage 2 and the programmed value must be within the range of the IAS. | ||
8 | |||
9 | In "5.4.1 CD notes": | ||
10 | The translation table walks performed from TTB0 or TTB1 are always performed | ||
11 | in IPA space if stage 2 translations are enabled. | ||
12 | |||
13 | This patch implements translation of the S1 context descriptor pointer and | ||
14 | TTBx base addresses through the S2 stage (IPA -> PA) | ||
15 | |||
16 | smmuv3_do_translate() is updated to have one arg which is translation | ||
17 | class, this is useful to: | ||
18 | - Decide wether a translation is stage-2 only or use the STE config. | ||
19 | - Populate the class in case of faults, WALK_EABT is left unchanged | ||
20 | for stage-1 as it is always IN, while stage-2 would match the | ||
21 | used class (TT, IN, CD), this will change slightly when the ptw | ||
22 | supports nested translation as it can also issue TT event with | ||
23 | class IN. | ||
24 | |||
25 | In case for stage-2 only translation, used in the context of nested | ||
26 | translation, the stage and asid are saved and restored before and | ||
27 | after calling smmu_translate(). | ||
28 | |||
29 | Translating CD or TTBx can fail for the following reasons: | ||
30 | 1) Large address size: This is described in | ||
31 | (3.4.3 Address sizes of SMMU-originated accesses) | ||
32 | - For CD ptr larger than IAS, for SMMUv3.1, it can trigger either | ||
33 | C_BAD_STE or Translation fault, we implement the latter as it | ||
34 | requires no extra code. | ||
35 | - For TTBx, if larger than the effective stage 1 output address size, it | ||
36 | triggers C_BAD_CD. | ||
37 | |||
38 | 2) Faults from PTWs (7.3 Event records) | ||
39 | - F_ADDR_SIZE: large address size after first level causes stage 2 Address | ||
40 | Size fault (Also in 3.4.3 Address sizes of SMMU-originated accesses) | ||
41 | - F_PERMISSION: Same as an address translation. However, when | ||
42 | CLASS == CD, the access is implicitly Data and a read. | ||
43 | - F_ACCESS: Same as an address translation. | ||
44 | - F_TRANSLATION: Same as an address translation. | ||
45 | - F_WALK_EABT: Same as an address translation. | ||
46 | These are already implemented in the PTW logic, so no extra handling | ||
47 | required. | ||
48 | |||
49 | As in CD and TTBx translation context, the iova is not known, setting | ||
50 | the InputAddr was removed from "smmuv3_do_translate" and set after | ||
51 | from "smmuv3_translate" with the new function "smmuv3_fixup_event" | ||
52 | |||
53 | Signed-off-by: Mostafa Saleh <smostafa@google.com> | ||
54 | Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org> | ||
55 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
56 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
57 | Message-id: 20240715084519.1189624-9-smostafa@google.com | ||
58 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
59 | --- | ||
60 | hw/arm/smmuv3.c | 120 +++++++++++++++++++++++++++++++++++++++++------- | ||
61 | 1 file changed, 103 insertions(+), 17 deletions(-) | ||
62 | |||
63 | diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c | ||
64 | index XXXXXXX..XXXXXXX 100644 | ||
65 | --- a/hw/arm/smmuv3.c | ||
66 | +++ b/hw/arm/smmuv3.c | ||
67 | @@ -XXX,XX +XXX,XX @@ static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf, | ||
68 | |||
69 | } | ||
70 | |||
71 | +static SMMUTranslationStatus smmuv3_do_translate(SMMUv3State *s, hwaddr addr, | ||
72 | + SMMUTransCfg *cfg, | ||
73 | + SMMUEventInfo *event, | ||
74 | + IOMMUAccessFlags flag, | ||
75 | + SMMUTLBEntry **out_entry, | ||
76 | + SMMUTranslationClass class); | ||
77 | /* @ssid > 0 not supported yet */ | ||
78 | -static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid, | ||
79 | - CD *buf, SMMUEventInfo *event) | ||
80 | +static int smmu_get_cd(SMMUv3State *s, STE *ste, SMMUTransCfg *cfg, | ||
81 | + uint32_t ssid, CD *buf, SMMUEventInfo *event) | ||
82 | { | ||
83 | dma_addr_t addr = STE_CTXPTR(ste); | ||
84 | int ret, i; | ||
85 | + SMMUTranslationStatus status; | ||
86 | + SMMUTLBEntry *entry; | ||
87 | |||
88 | trace_smmuv3_get_cd(addr); | ||
89 | + | ||
90 | + if (cfg->stage == SMMU_NESTED) { | ||
91 | + status = smmuv3_do_translate(s, addr, cfg, event, | ||
92 | + IOMMU_RO, &entry, SMMU_CLASS_CD); | ||
93 | + | ||
94 | + /* Same PTW faults are reported but with CLASS = CD. */ | ||
95 | + if (status != SMMU_TRANS_SUCCESS) { | ||
96 | + return -EINVAL; | ||
97 | + } | ||
98 | + | ||
99 | + addr = CACHED_ENTRY_TO_ADDR(entry, addr); | ||
100 | + } | ||
101 | + | ||
102 | /* TODO: guarantee 64-bit single-copy atomicity */ | ||
103 | ret = dma_memory_read(&address_space_memory, addr, buf, sizeof(*buf), | ||
104 | MEMTXATTRS_UNSPECIFIED); | ||
105 | @@ -XXX,XX +XXX,XX @@ static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste, | ||
106 | return 0; | ||
107 | } | ||
108 | |||
109 | -static int decode_cd(SMMUTransCfg *cfg, CD *cd, SMMUEventInfo *event) | ||
110 | +static int decode_cd(SMMUv3State *s, SMMUTransCfg *cfg, | ||
111 | + CD *cd, SMMUEventInfo *event) | ||
112 | { | ||
113 | int ret = -EINVAL; | ||
114 | int i; | ||
115 | + SMMUTranslationStatus status; | ||
116 | + SMMUTLBEntry *entry; | ||
117 | |||
118 | if (!CD_VALID(cd) || !CD_AARCH64(cd)) { | ||
119 | goto bad_cd; | ||
120 | @@ -XXX,XX +XXX,XX @@ static int decode_cd(SMMUTransCfg *cfg, CD *cd, SMMUEventInfo *event) | ||
121 | |||
122 | tt->tsz = tsz; | ||
123 | tt->ttb = CD_TTB(cd, i); | ||
124 | + | ||
125 | if (tt->ttb & ~(MAKE_64BIT_MASK(0, cfg->oas))) { | ||
126 | goto bad_cd; | ||
127 | } | ||
128 | + | ||
129 | + /* Translate the TTBx, from IPA to PA if nesting is enabled. */ | ||
130 | + if (cfg->stage == SMMU_NESTED) { | ||
131 | + status = smmuv3_do_translate(s, tt->ttb, cfg, event, IOMMU_RO, | ||
132 | + &entry, SMMU_CLASS_TT); | ||
133 | + /* | ||
134 | + * Same PTW faults are reported but with CLASS = TT. | ||
135 | + * If TTBx is larger than the effective stage 1 output addres | ||
136 | + * size, it reports C_BAD_CD, which is handled by the above case. | ||
137 | + */ | ||
138 | + if (status != SMMU_TRANS_SUCCESS) { | ||
139 | + return -EINVAL; | ||
140 | + } | ||
141 | + tt->ttb = CACHED_ENTRY_TO_ADDR(entry, tt->ttb); | ||
142 | + } | ||
143 | + | ||
144 | tt->had = CD_HAD(cd, i); | ||
145 | trace_smmuv3_decode_cd_tt(i, tt->tsz, tt->ttb, tt->granule_sz, tt->had); | ||
146 | } | ||
147 | @@ -XXX,XX +XXX,XX @@ static int smmuv3_decode_config(IOMMUMemoryRegion *mr, SMMUTransCfg *cfg, | ||
148 | return 0; | ||
149 | } | ||
150 | |||
151 | - ret = smmu_get_cd(s, &ste, 0 /* ssid */, &cd, event); | ||
152 | + ret = smmu_get_cd(s, &ste, cfg, 0 /* ssid */, &cd, event); | ||
153 | if (ret) { | ||
154 | return ret; | ||
155 | } | ||
156 | |||
157 | - return decode_cd(cfg, &cd, event); | ||
158 | + return decode_cd(s, cfg, &cd, event); | ||
159 | } | ||
160 | |||
161 | /** | ||
162 | @@ -XXX,XX +XXX,XX @@ static SMMUTranslationStatus smmuv3_do_translate(SMMUv3State *s, hwaddr addr, | ||
163 | SMMUTransCfg *cfg, | ||
164 | SMMUEventInfo *event, | ||
165 | IOMMUAccessFlags flag, | ||
166 | - SMMUTLBEntry **out_entry) | ||
167 | + SMMUTLBEntry **out_entry, | ||
168 | + SMMUTranslationClass class) | ||
169 | { | ||
170 | SMMUPTWEventInfo ptw_info = {}; | ||
171 | SMMUState *bs = ARM_SMMU(s); | ||
172 | SMMUTLBEntry *cached_entry = NULL; | ||
173 | + int asid, stage; | ||
174 | + bool desc_s2_translation = class != SMMU_CLASS_IN; | ||
175 | + | ||
176 | + /* | ||
177 | + * The function uses the argument class to identify which stage is used: | ||
178 | + * - CLASS = IN: Means an input translation, determine the stage from STE. | ||
179 | + * - CLASS = CD: Means the addr is an IPA of the CD, and it would be | ||
180 | + * translated using the stage-2. | ||
181 | + * - CLASS = TT: Means the addr is an IPA of the stage-1 translation table | ||
182 | + * and it would be translated using the stage-2. | ||
183 | + * For the last 2 cases instead of having intrusive changes in the common | ||
184 | + * logic, we modify the cfg to be a stage-2 translation only in case of | ||
185 | + * nested, and then restore it after. | ||
186 | + */ | ||
187 | + if (desc_s2_translation) { | ||
188 | + asid = cfg->asid; | ||
189 | + stage = cfg->stage; | ||
190 | + cfg->asid = -1; | ||
191 | + cfg->stage = SMMU_STAGE_2; | ||
192 | + } | ||
193 | |||
194 | cached_entry = smmu_translate(bs, cfg, addr, flag, &ptw_info); | ||
195 | + | ||
196 | + if (desc_s2_translation) { | ||
197 | + cfg->asid = asid; | ||
198 | + cfg->stage = stage; | ||
199 | + } | ||
200 | + | ||
201 | if (!cached_entry) { | ||
202 | /* All faults from PTW has S2 field. */ | ||
203 | event->u.f_walk_eabt.s2 = (ptw_info.stage == SMMU_STAGE_2); | ||
204 | switch (ptw_info.type) { | ||
205 | case SMMU_PTW_ERR_WALK_EABT: | ||
206 | event->type = SMMU_EVT_F_WALK_EABT; | ||
207 | - event->u.f_walk_eabt.addr = addr; | ||
208 | event->u.f_walk_eabt.rnw = flag & 0x1; | ||
209 | event->u.f_walk_eabt.class = (ptw_info.stage == SMMU_STAGE_2) ? | ||
210 | - SMMU_CLASS_IN : SMMU_CLASS_TT; | ||
211 | + class : SMMU_CLASS_TT; | ||
212 | event->u.f_walk_eabt.addr2 = ptw_info.addr; | ||
213 | break; | ||
214 | case SMMU_PTW_ERR_TRANSLATION: | ||
215 | if (PTW_RECORD_FAULT(cfg)) { | ||
216 | event->type = SMMU_EVT_F_TRANSLATION; | ||
217 | - event->u.f_translation.addr = addr; | ||
218 | event->u.f_translation.addr2 = ptw_info.addr; | ||
219 | - event->u.f_translation.class = SMMU_CLASS_IN; | ||
220 | + event->u.f_translation.class = class; | ||
221 | event->u.f_translation.rnw = flag & 0x1; | ||
222 | } | ||
223 | break; | ||
224 | case SMMU_PTW_ERR_ADDR_SIZE: | ||
225 | if (PTW_RECORD_FAULT(cfg)) { | ||
226 | event->type = SMMU_EVT_F_ADDR_SIZE; | ||
227 | - event->u.f_addr_size.addr = addr; | ||
228 | event->u.f_addr_size.addr2 = ptw_info.addr; | ||
229 | - event->u.f_addr_size.class = SMMU_CLASS_IN; | ||
230 | + event->u.f_addr_size.class = class; | ||
231 | event->u.f_addr_size.rnw = flag & 0x1; | ||
232 | } | ||
233 | break; | ||
234 | case SMMU_PTW_ERR_ACCESS: | ||
235 | if (PTW_RECORD_FAULT(cfg)) { | ||
236 | event->type = SMMU_EVT_F_ACCESS; | ||
237 | - event->u.f_access.addr = addr; | ||
238 | event->u.f_access.addr2 = ptw_info.addr; | ||
239 | - event->u.f_access.class = SMMU_CLASS_IN; | ||
240 | + event->u.f_access.class = class; | ||
241 | event->u.f_access.rnw = flag & 0x1; | ||
242 | } | ||
243 | break; | ||
244 | case SMMU_PTW_ERR_PERMISSION: | ||
245 | if (PTW_RECORD_FAULT(cfg)) { | ||
246 | event->type = SMMU_EVT_F_PERMISSION; | ||
247 | - event->u.f_permission.addr = addr; | ||
248 | event->u.f_permission.addr2 = ptw_info.addr; | ||
249 | - event->u.f_permission.class = SMMU_CLASS_IN; | ||
250 | + event->u.f_permission.class = class; | ||
251 | event->u.f_permission.rnw = flag & 0x1; | ||
252 | } | ||
253 | break; | ||
254 | @@ -XXX,XX +XXX,XX @@ static SMMUTranslationStatus smmuv3_do_translate(SMMUv3State *s, hwaddr addr, | ||
255 | return SMMU_TRANS_SUCCESS; | ||
256 | } | ||
257 | |||
258 | +/* | ||
259 | + * Sets the InputAddr for an SMMU_TRANS_ERROR, as it can't be | ||
260 | + * set from all contexts, as smmuv3_get_config() can return | ||
261 | + * translation faults in case of nested translation (for CD | ||
262 | + * and TTBx). But in that case the iova is not known. | ||
263 | + */ | ||
264 | +static void smmuv3_fixup_event(SMMUEventInfo *event, hwaddr iova) | ||
265 | +{ | ||
266 | + switch (event->type) { | ||
267 | + case SMMU_EVT_F_WALK_EABT: | ||
268 | + case SMMU_EVT_F_TRANSLATION: | ||
269 | + case SMMU_EVT_F_ADDR_SIZE: | ||
270 | + case SMMU_EVT_F_ACCESS: | ||
271 | + case SMMU_EVT_F_PERMISSION: | ||
272 | + event->u.f_walk_eabt.addr = iova; | ||
273 | + break; | ||
274 | + default: | ||
275 | + break; | ||
276 | + } | ||
277 | +} | ||
278 | + | ||
279 | /* Entry point to SMMU, does everything. */ | ||
280 | static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, | ||
281 | IOMMUAccessFlags flag, int iommu_idx) | ||
282 | @@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, | ||
283 | goto epilogue; | ||
284 | } | ||
285 | |||
286 | - status = smmuv3_do_translate(s, addr, cfg, &event, flag, &cached_entry); | ||
287 | + status = smmuv3_do_translate(s, addr, cfg, &event, flag, | ||
288 | + &cached_entry, SMMU_CLASS_IN); | ||
289 | |||
290 | epilogue: | ||
291 | qemu_mutex_unlock(&s->mutex); | ||
292 | @@ -XXX,XX +XXX,XX @@ epilogue: | ||
293 | entry.perm); | ||
294 | break; | ||
295 | case SMMU_TRANS_ERROR: | ||
296 | + smmuv3_fixup_event(&event, addr); | ||
297 | qemu_log_mask(LOG_GUEST_ERROR, | ||
298 | "%s translation failed for iova=0x%"PRIx64" (%s)\n", | ||
299 | mr->parent_obj.name, addr, smmu_event_string(event.type)); | ||
300 | -- | ||
301 | 2.34.1 | ||
302 | |||
303 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Mostafa Saleh <smostafa@google.com> | ||
2 | 1 | ||
3 | In the next patch, combine_tlb() will be added which combines 2 TLB | ||
4 | entries into one for nested translations, which chooses the granule | ||
5 | and level from the smallest entry. | ||
6 | |||
7 | This means that with nested translation, an entry can be cached with | ||
8 | the granule of stage-2 and not stage-1. | ||
9 | |||
10 | However, currently, the lookup for an IOVA is done with input stage | ||
11 | granule, which is stage-1 for nested configuration, which will not | ||
12 | work with the above logic. | ||
13 | This patch reworks lookup in that case, so it falls back to stage-2 | ||
14 | granule if no entry is found using stage-1 granule. | ||
15 | |||
16 | Also, drop aligning the iova to avoid over-aligning in case the iova | ||
17 | is cached with a smaller granule, the TLB lookup will align the iova | ||
18 | anyway for each granule and level, and the page table walker doesn't | ||
19 | consider the page offset bits. | ||
20 | |||
21 | Signed-off-by: Mostafa Saleh <smostafa@google.com> | ||
22 | Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org> | ||
23 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
24 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
25 | Message-id: 20240715084519.1189624-10-smostafa@google.com | ||
26 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
27 | --- | ||
28 | hw/arm/smmu-common.c | 64 +++++++++++++++++++++++++++++--------------- | ||
29 | 1 file changed, 43 insertions(+), 21 deletions(-) | ||
30 | |||
31 | diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c | ||
32 | index XXXXXXX..XXXXXXX 100644 | ||
33 | --- a/hw/arm/smmu-common.c | ||
34 | +++ b/hw/arm/smmu-common.c | ||
35 | @@ -XXX,XX +XXX,XX @@ SMMUIOTLBKey smmu_get_iotlb_key(int asid, int vmid, uint64_t iova, | ||
36 | return key; | ||
37 | } | ||
38 | |||
39 | -SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, | ||
40 | - SMMUTransTableInfo *tt, hwaddr iova) | ||
41 | +static SMMUTLBEntry *smmu_iotlb_lookup_all_levels(SMMUState *bs, | ||
42 | + SMMUTransCfg *cfg, | ||
43 | + SMMUTransTableInfo *tt, | ||
44 | + hwaddr iova) | ||
45 | { | ||
46 | uint8_t tg = (tt->granule_sz - 10) / 2; | ||
47 | uint8_t inputsize = 64 - tt->tsz; | ||
48 | @@ -XXX,XX +XXX,XX @@ SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, | ||
49 | } | ||
50 | level++; | ||
51 | } | ||
52 | + return entry; | ||
53 | +} | ||
54 | + | ||
55 | +/** | ||
56 | + * smmu_iotlb_lookup - Look up for a TLB entry. | ||
57 | + * @bs: SMMU state which includes the TLB instance | ||
58 | + * @cfg: Configuration of the translation | ||
59 | + * @tt: Translation table info (granule and tsz) | ||
60 | + * @iova: IOVA address to lookup | ||
61 | + * | ||
62 | + * returns a valid entry on success, otherwise NULL. | ||
63 | + * In case of nested translation, tt can be updated to include | ||
64 | + * the granule of the found entry as it might different from | ||
65 | + * the IOVA granule. | ||
66 | + */ | ||
67 | +SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, | ||
68 | + SMMUTransTableInfo *tt, hwaddr iova) | ||
69 | +{ | ||
70 | + SMMUTLBEntry *entry = NULL; | ||
71 | + | ||
72 | + entry = smmu_iotlb_lookup_all_levels(bs, cfg, tt, iova); | ||
73 | + /* | ||
74 | + * For nested translation also try the s2 granule, as the TLB will insert | ||
75 | + * it if the size of s2 tlb entry was smaller. | ||
76 | + */ | ||
77 | + if (!entry && (cfg->stage == SMMU_NESTED) && | ||
78 | + (cfg->s2cfg.granule_sz != tt->granule_sz)) { | ||
79 | + tt->granule_sz = cfg->s2cfg.granule_sz; | ||
80 | + entry = smmu_iotlb_lookup_all_levels(bs, cfg, tt, iova); | ||
81 | + } | ||
82 | |||
83 | if (entry) { | ||
84 | cfg->iotlb_hits++; | ||
85 | @@ -XXX,XX +XXX,XX @@ int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm, | ||
86 | SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t addr, | ||
87 | IOMMUAccessFlags flag, SMMUPTWEventInfo *info) | ||
88 | { | ||
89 | - uint64_t page_mask, aligned_addr; | ||
90 | SMMUTLBEntry *cached_entry = NULL; | ||
91 | SMMUTransTableInfo *tt; | ||
92 | int status; | ||
93 | |||
94 | /* | ||
95 | - * Combined attributes used for TLB lookup, as only one stage is supported, | ||
96 | - * it will hold attributes based on the enabled stage. | ||
97 | + * Combined attributes used for TLB lookup, holds the attributes for | ||
98 | + * the input stage. | ||
99 | */ | ||
100 | SMMUTransTableInfo tt_combined; | ||
101 | |||
102 | - if (cfg->stage == SMMU_STAGE_1) { | ||
103 | + if (cfg->stage == SMMU_STAGE_2) { | ||
104 | + /* Stage2. */ | ||
105 | + tt_combined.granule_sz = cfg->s2cfg.granule_sz; | ||
106 | + tt_combined.tsz = cfg->s2cfg.tsz; | ||
107 | + } else { | ||
108 | /* Select stage1 translation table. */ | ||
109 | tt = select_tt(cfg, addr); | ||
110 | if (!tt) { | ||
111 | @@ -XXX,XX +XXX,XX @@ SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t addr, | ||
112 | } | ||
113 | tt_combined.granule_sz = tt->granule_sz; | ||
114 | tt_combined.tsz = tt->tsz; | ||
115 | - | ||
116 | - } else { | ||
117 | - /* Stage2. */ | ||
118 | - tt_combined.granule_sz = cfg->s2cfg.granule_sz; | ||
119 | - tt_combined.tsz = cfg->s2cfg.tsz; | ||
120 | } | ||
121 | |||
122 | - /* | ||
123 | - * TLB lookup looks for granule and input size for a translation stage, | ||
124 | - * as only one stage is supported right now, choose the right values | ||
125 | - * from the configuration. | ||
126 | - */ | ||
127 | - page_mask = (1ULL << tt_combined.granule_sz) - 1; | ||
128 | - aligned_addr = addr & ~page_mask; | ||
129 | - | ||
130 | - cached_entry = smmu_iotlb_lookup(bs, cfg, &tt_combined, aligned_addr); | ||
131 | + cached_entry = smmu_iotlb_lookup(bs, cfg, &tt_combined, addr); | ||
132 | if (cached_entry) { | ||
133 | if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & IOMMU_WO)) { | ||
134 | info->type = SMMU_PTW_ERR_PERMISSION; | ||
135 | @@ -XXX,XX +XXX,XX @@ SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t addr, | ||
136 | } | ||
137 | |||
138 | cached_entry = g_new0(SMMUTLBEntry, 1); | ||
139 | - status = smmu_ptw(cfg, aligned_addr, flag, cached_entry, info); | ||
140 | + status = smmu_ptw(cfg, addr, flag, cached_entry, info); | ||
141 | if (status) { | ||
142 | g_free(cached_entry); | ||
143 | return NULL; | ||
144 | -- | ||
145 | 2.34.1 | ||
146 | |||
147 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Mostafa Saleh <smostafa@google.com> | ||
2 | 1 | ||
3 | This patch adds support for nested (combined) TLB entries. | ||
4 | The main function combine_tlb() is not used here but in the next | ||
5 | patches, but to simplify the patches it is introduced first. | ||
6 | |||
7 | Main changes: | ||
8 | 1) New field added in the SMMUTLBEntry struct: parent_perm, for | ||
9 | nested TLB, holds the stage-2 permission, this can be used to know | ||
10 | the origin of a permission fault from a cached entry as caching | ||
11 | the “and” of the permissions loses this information. | ||
12 | |||
13 | SMMUPTWEventInfo is used to hold information about PTW faults so | ||
14 | the event can be populated, the value of stage used to be set | ||
15 | based on the current stage for TLB permission faults, however | ||
16 | with the parent_perm, it is now set based on which perm has | ||
17 | the missing permission | ||
18 | |||
19 | When nesting is not enabled it has the same value as perm which | ||
20 | doesn't change the logic. | ||
21 | |||
22 | 2) As combined TLB implementation is used, the combination logic | ||
23 | chooses: | ||
24 | - tg and level from the entry which has the smallest addr_mask. | ||
25 | - Based on that the iova that would be cached is recalculated. | ||
26 | - Translated_addr is chosen from stage-2. | ||
27 | |||
28 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
29 | Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org> | ||
30 | Signed-off-by: Mostafa Saleh <smostafa@google.com> | ||
31 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
32 | Message-id: 20240715084519.1189624-11-smostafa@google.com | ||
33 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
34 | --- | ||
35 | include/hw/arm/smmu-common.h | 1 + | ||
36 | hw/arm/smmu-common.c | 37 ++++++++++++++++++++++++++++++++---- | ||
37 | 2 files changed, 34 insertions(+), 4 deletions(-) | ||
38 | |||
39 | diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h | ||
40 | index XXXXXXX..XXXXXXX 100644 | ||
41 | --- a/include/hw/arm/smmu-common.h | ||
42 | +++ b/include/hw/arm/smmu-common.h | ||
43 | @@ -XXX,XX +XXX,XX @@ typedef struct SMMUTLBEntry { | ||
44 | IOMMUTLBEntry entry; | ||
45 | uint8_t level; | ||
46 | uint8_t granule; | ||
47 | + IOMMUAccessFlags parent_perm; | ||
48 | } SMMUTLBEntry; | ||
49 | |||
50 | /* Stage-2 configuration. */ | ||
51 | diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c | ||
52 | index XXXXXXX..XXXXXXX 100644 | ||
53 | --- a/hw/arm/smmu-common.c | ||
54 | +++ b/hw/arm/smmu-common.c | ||
55 | @@ -XXX,XX +XXX,XX @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg, | ||
56 | tlbe->entry.translated_addr = gpa; | ||
57 | tlbe->entry.iova = iova & ~mask; | ||
58 | tlbe->entry.addr_mask = mask; | ||
59 | - tlbe->entry.perm = PTE_AP_TO_PERM(ap); | ||
60 | + tlbe->parent_perm = PTE_AP_TO_PERM(ap); | ||
61 | + tlbe->entry.perm = tlbe->parent_perm; | ||
62 | tlbe->level = level; | ||
63 | tlbe->granule = granule_sz; | ||
64 | return 0; | ||
65 | @@ -XXX,XX +XXX,XX @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg, | ||
66 | tlbe->entry.translated_addr = gpa; | ||
67 | tlbe->entry.iova = ipa & ~mask; | ||
68 | tlbe->entry.addr_mask = mask; | ||
69 | - tlbe->entry.perm = s2ap; | ||
70 | + tlbe->parent_perm = s2ap; | ||
71 | + tlbe->entry.perm = tlbe->parent_perm; | ||
72 | tlbe->level = level; | ||
73 | tlbe->granule = granule_sz; | ||
74 | return 0; | ||
75 | @@ -XXX,XX +XXX,XX @@ error: | ||
76 | return -EINVAL; | ||
77 | } | ||
78 | |||
79 | +/* | ||
80 | + * combine S1 and S2 TLB entries into a single entry. | ||
81 | + * As a result the S1 entry is overriden with combined data. | ||
82 | + */ | ||
83 | +static void __attribute__((unused)) combine_tlb(SMMUTLBEntry *tlbe, | ||
84 | + SMMUTLBEntry *tlbe_s2, | ||
85 | + dma_addr_t iova, | ||
86 | + SMMUTransCfg *cfg) | ||
87 | +{ | ||
88 | + if (tlbe_s2->entry.addr_mask < tlbe->entry.addr_mask) { | ||
89 | + tlbe->entry.addr_mask = tlbe_s2->entry.addr_mask; | ||
90 | + tlbe->granule = tlbe_s2->granule; | ||
91 | + tlbe->level = tlbe_s2->level; | ||
92 | + } | ||
93 | + | ||
94 | + tlbe->entry.translated_addr = CACHED_ENTRY_TO_ADDR(tlbe_s2, | ||
95 | + tlbe->entry.translated_addr); | ||
96 | + | ||
97 | + tlbe->entry.iova = iova & ~tlbe->entry.addr_mask; | ||
98 | + /* parent_perm has s2 perm while perm keeps s1 perm. */ | ||
99 | + tlbe->parent_perm = tlbe_s2->entry.perm; | ||
100 | + return; | ||
101 | +} | ||
102 | + | ||
103 | /** | ||
104 | * smmu_ptw - Walk the page tables for an IOVA, according to @cfg | ||
105 | * | ||
106 | @@ -XXX,XX +XXX,XX @@ SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t addr, | ||
107 | |||
108 | cached_entry = smmu_iotlb_lookup(bs, cfg, &tt_combined, addr); | ||
109 | if (cached_entry) { | ||
110 | - if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & IOMMU_WO)) { | ||
111 | + if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & | ||
112 | + cached_entry->parent_perm & IOMMU_WO)) { | ||
113 | info->type = SMMU_PTW_ERR_PERMISSION; | ||
114 | - info->stage = cfg->stage; | ||
115 | + info->stage = !(cached_entry->entry.perm & IOMMU_WO) ? | ||
116 | + SMMU_STAGE_1 : | ||
117 | + SMMU_STAGE_2; | ||
118 | return NULL; | ||
119 | } | ||
120 | return cached_entry; | ||
121 | -- | ||
122 | 2.34.1 | ||
123 | |||
124 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Mostafa Saleh <smostafa@google.com> | ||
2 | 1 | ||
3 | When nested translation is requested, do the following: | ||
4 | - Translate stage-1 table address IPA into PA through stage-2. | ||
5 | - Translate stage-1 table walk output (IPA) through stage-2. | ||
6 | - Create a single TLB entry from stage-1 and stage-2 translations | ||
7 | using logic introduced before. | ||
8 | |||
9 | smmu_ptw() has a new argument SMMUState which include the TLB as | ||
10 | stage-1 table address can be cached in there. | ||
11 | |||
12 | Also in smmu_ptw(), a separate path used for nesting to simplify the | ||
13 | code, although some logic can be combined. | ||
14 | |||
15 | With nested translation class of translation fault can be different, | ||
16 | from the class of the translation, as faults from translating stage-1 | ||
17 | tables are considered as CLASS_TT and not CLASS_IN, a new member | ||
18 | "is_ipa_descriptor" added to "SMMUPTWEventInfo" to differ faults | ||
19 | from walking stage 1 translation table and faults from translating | ||
20 | an IPA for a transaction. | ||
21 | |||
22 | Signed-off-by: Mostafa Saleh <smostafa@google.com> | ||
23 | Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org> | ||
24 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
25 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
26 | Message-id: 20240715084519.1189624-12-smostafa@google.com | ||
27 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
28 | --- | ||
29 | include/hw/arm/smmu-common.h | 7 ++-- | ||
30 | hw/arm/smmu-common.c | 74 +++++++++++++++++++++++++++++++----- | ||
31 | hw/arm/smmuv3.c | 14 +++++++ | ||
32 | 3 files changed, 82 insertions(+), 13 deletions(-) | ||
33 | |||
34 | diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h | ||
35 | index XXXXXXX..XXXXXXX 100644 | ||
36 | --- a/include/hw/arm/smmu-common.h | ||
37 | +++ b/include/hw/arm/smmu-common.h | ||
38 | @@ -XXX,XX +XXX,XX @@ typedef struct SMMUPTWEventInfo { | ||
39 | SMMUStage stage; | ||
40 | SMMUPTWEventType type; | ||
41 | dma_addr_t addr; /* fetched address that induced an abort, if any */ | ||
42 | + bool is_ipa_descriptor; /* src for fault in nested translation. */ | ||
43 | } SMMUPTWEventInfo; | ||
44 | |||
45 | typedef struct SMMUTransTableInfo { | ||
46 | @@ -XXX,XX +XXX,XX @@ static inline uint16_t smmu_get_sid(SMMUDevice *sdev) | ||
47 | * smmu_ptw - Perform the page table walk for a given iova / access flags | ||
48 | * pair, according to @cfg translation config | ||
49 | */ | ||
50 | -int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm, | ||
51 | - SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info); | ||
52 | - | ||
53 | +int smmu_ptw(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t iova, | ||
54 | + IOMMUAccessFlags perm, SMMUTLBEntry *tlbe, | ||
55 | + SMMUPTWEventInfo *info); | ||
56 | |||
57 | /* | ||
58 | * smmu_translate - Look for a translation in TLB, if not, do a PTW. | ||
59 | diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c | ||
60 | index XXXXXXX..XXXXXXX 100644 | ||
61 | --- a/hw/arm/smmu-common.c | ||
62 | +++ b/hw/arm/smmu-common.c | ||
63 | @@ -XXX,XX +XXX,XX @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, dma_addr_t iova) | ||
64 | return NULL; | ||
65 | } | ||
66 | |||
67 | +/* Translate stage-1 table address using stage-2 page table. */ | ||
68 | +static inline int translate_table_addr_ipa(SMMUState *bs, | ||
69 | + dma_addr_t *table_addr, | ||
70 | + SMMUTransCfg *cfg, | ||
71 | + SMMUPTWEventInfo *info) | ||
72 | +{ | ||
73 | + dma_addr_t addr = *table_addr; | ||
74 | + SMMUTLBEntry *cached_entry; | ||
75 | + int asid; | ||
76 | + | ||
77 | + /* | ||
78 | + * The translation table walks performed from TTB0 or TTB1 are always | ||
79 | + * performed in IPA space if stage 2 translations are enabled. | ||
80 | + */ | ||
81 | + asid = cfg->asid; | ||
82 | + cfg->stage = SMMU_STAGE_2; | ||
83 | + cfg->asid = -1; | ||
84 | + cached_entry = smmu_translate(bs, cfg, addr, IOMMU_RO, info); | ||
85 | + cfg->asid = asid; | ||
86 | + cfg->stage = SMMU_NESTED; | ||
87 | + | ||
88 | + if (cached_entry) { | ||
89 | + *table_addr = CACHED_ENTRY_TO_ADDR(cached_entry, addr); | ||
90 | + return 0; | ||
91 | + } | ||
92 | + | ||
93 | + info->stage = SMMU_STAGE_2; | ||
94 | + info->addr = addr; | ||
95 | + info->is_ipa_descriptor = true; | ||
96 | + return -EINVAL; | ||
97 | +} | ||
98 | + | ||
99 | /** | ||
100 | * smmu_ptw_64_s1 - VMSAv8-64 Walk of the page tables for a given IOVA | ||
101 | + * @bs: smmu state which includes TLB instance | ||
102 | * @cfg: translation config | ||
103 | * @iova: iova to translate | ||
104 | * @perm: access type | ||
105 | @@ -XXX,XX +XXX,XX @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, dma_addr_t iova) | ||
106 | * Upon success, @tlbe is filled with translated_addr and entry | ||
107 | * permission rights. | ||
108 | */ | ||
109 | -static int smmu_ptw_64_s1(SMMUTransCfg *cfg, | ||
110 | +static int smmu_ptw_64_s1(SMMUState *bs, SMMUTransCfg *cfg, | ||
111 | dma_addr_t iova, IOMMUAccessFlags perm, | ||
112 | SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info) | ||
113 | { | ||
114 | @@ -XXX,XX +XXX,XX @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg, | ||
115 | goto error; | ||
116 | } | ||
117 | baseaddr = get_table_pte_address(pte, granule_sz); | ||
118 | + if (cfg->stage == SMMU_NESTED) { | ||
119 | + if (translate_table_addr_ipa(bs, &baseaddr, cfg, info)) { | ||
120 | + goto error; | ||
121 | + } | ||
122 | + } | ||
123 | level++; | ||
124 | continue; | ||
125 | } else if (is_page_pte(pte, level)) { | ||
126 | @@ -XXX,XX +XXX,XX @@ error: | ||
127 | * combine S1 and S2 TLB entries into a single entry. | ||
128 | * As a result the S1 entry is overriden with combined data. | ||
129 | */ | ||
130 | -static void __attribute__((unused)) combine_tlb(SMMUTLBEntry *tlbe, | ||
131 | - SMMUTLBEntry *tlbe_s2, | ||
132 | - dma_addr_t iova, | ||
133 | - SMMUTransCfg *cfg) | ||
134 | +static void combine_tlb(SMMUTLBEntry *tlbe, SMMUTLBEntry *tlbe_s2, | ||
135 | + dma_addr_t iova, SMMUTransCfg *cfg) | ||
136 | { | ||
137 | if (tlbe_s2->entry.addr_mask < tlbe->entry.addr_mask) { | ||
138 | tlbe->entry.addr_mask = tlbe_s2->entry.addr_mask; | ||
139 | @@ -XXX,XX +XXX,XX @@ static void __attribute__((unused)) combine_tlb(SMMUTLBEntry *tlbe, | ||
140 | /** | ||
141 | * smmu_ptw - Walk the page tables for an IOVA, according to @cfg | ||
142 | * | ||
143 | + * @bs: smmu state which includes TLB instance | ||
144 | * @cfg: translation configuration | ||
145 | * @iova: iova to translate | ||
146 | * @perm: tentative access type | ||
147 | @@ -XXX,XX +XXX,XX @@ static void __attribute__((unused)) combine_tlb(SMMUTLBEntry *tlbe, | ||
148 | * | ||
149 | * return 0 on success | ||
150 | */ | ||
151 | -int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm, | ||
152 | - SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info) | ||
153 | +int smmu_ptw(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t iova, | ||
154 | + IOMMUAccessFlags perm, SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info) | ||
155 | { | ||
156 | + int ret; | ||
157 | + SMMUTLBEntry tlbe_s2; | ||
158 | + dma_addr_t ipa; | ||
159 | + | ||
160 | if (cfg->stage == SMMU_STAGE_1) { | ||
161 | - return smmu_ptw_64_s1(cfg, iova, perm, tlbe, info); | ||
162 | + return smmu_ptw_64_s1(bs, cfg, iova, perm, tlbe, info); | ||
163 | } else if (cfg->stage == SMMU_STAGE_2) { | ||
164 | /* | ||
165 | * If bypassing stage 1(or unimplemented), the input address is passed | ||
166 | @@ -XXX,XX +XXX,XX @@ int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm, | ||
167 | return smmu_ptw_64_s2(cfg, iova, perm, tlbe, info); | ||
168 | } | ||
169 | |||
170 | - g_assert_not_reached(); | ||
171 | + /* SMMU_NESTED. */ | ||
172 | + ret = smmu_ptw_64_s1(bs, cfg, iova, perm, tlbe, info); | ||
173 | + if (ret) { | ||
174 | + return ret; | ||
175 | + } | ||
176 | + | ||
177 | + ipa = CACHED_ENTRY_TO_ADDR(tlbe, iova); | ||
178 | + ret = smmu_ptw_64_s2(cfg, ipa, perm, &tlbe_s2, info); | ||
179 | + if (ret) { | ||
180 | + return ret; | ||
181 | + } | ||
182 | + | ||
183 | + combine_tlb(tlbe, &tlbe_s2, iova, cfg); | ||
184 | + return 0; | ||
185 | } | ||
186 | |||
187 | SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t addr, | ||
188 | @@ -XXX,XX +XXX,XX @@ SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t addr, | ||
189 | } | ||
190 | |||
191 | cached_entry = g_new0(SMMUTLBEntry, 1); | ||
192 | - status = smmu_ptw(cfg, addr, flag, cached_entry, info); | ||
193 | + status = smmu_ptw(bs, cfg, addr, flag, cached_entry, info); | ||
194 | if (status) { | ||
195 | g_free(cached_entry); | ||
196 | return NULL; | ||
197 | diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c | ||
198 | index XXXXXXX..XXXXXXX 100644 | ||
199 | --- a/hw/arm/smmuv3.c | ||
200 | +++ b/hw/arm/smmuv3.c | ||
201 | @@ -XXX,XX +XXX,XX @@ static SMMUTranslationStatus smmuv3_do_translate(SMMUv3State *s, hwaddr addr, | ||
202 | if (!cached_entry) { | ||
203 | /* All faults from PTW has S2 field. */ | ||
204 | event->u.f_walk_eabt.s2 = (ptw_info.stage == SMMU_STAGE_2); | ||
205 | + /* | ||
206 | + * Fault class is set as follows based on "class" input to | ||
207 | + * the function and to "ptw_info" from "smmu_translate()" | ||
208 | + * For stage-1: | ||
209 | + * - EABT => CLASS_TT (hardcoded) | ||
210 | + * - other events => CLASS_IN (input to function) | ||
211 | + * For stage-2 => CLASS_IN (input to function) | ||
212 | + * For nested, for all events: | ||
213 | + * - CD fetch => CLASS_CD (input to function) | ||
214 | + * - walking stage 1 translation table => CLASS_TT (from | ||
215 | + * is_ipa_descriptor or input in case of TTBx) | ||
216 | + * - s2 translation => CLASS_IN (input to function) | ||
217 | + */ | ||
218 | + class = ptw_info.is_ipa_descriptor ? SMMU_CLASS_TT : class; | ||
219 | switch (ptw_info.type) { | ||
220 | case SMMU_PTW_ERR_WALK_EABT: | ||
221 | event->type = SMMU_EVT_F_WALK_EABT; | ||
222 | -- | ||
223 | 2.34.1 | ||
224 | |||
225 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Mostafa Saleh <smostafa@google.com> | ||
2 | 1 | ||
3 | With nesting, we would need to invalidate IPAs without | ||
4 | over-invalidating stage-1 IOVAs. This can be done by | ||
5 | distinguishing IPAs in the TLBs by having ASID=-1. | ||
6 | To achieve that, rework the invalidation for IPAs to have a | ||
7 | separate function, while for IOVA invalidation ASID=-1 means | ||
8 | invalidate for all ASIDs. | ||
9 | |||
10 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
11 | Signed-off-by: Mostafa Saleh <smostafa@google.com> | ||
12 | Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org> | ||
13 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
14 | Message-id: 20240715084519.1189624-13-smostafa@google.com | ||
15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
16 | --- | ||
17 | include/hw/arm/smmu-common.h | 3 ++- | ||
18 | hw/arm/smmu-common.c | 47 ++++++++++++++++++++++++++++++++++++ | ||
19 | hw/arm/smmuv3.c | 23 ++++++++++++------ | ||
20 | hw/arm/trace-events | 2 +- | ||
21 | 4 files changed, 66 insertions(+), 9 deletions(-) | ||
22 | |||
23 | diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h | ||
24 | index XXXXXXX..XXXXXXX 100644 | ||
25 | --- a/include/hw/arm/smmu-common.h | ||
26 | +++ b/include/hw/arm/smmu-common.h | ||
27 | @@ -XXX,XX +XXX,XX @@ void smmu_iotlb_inv_asid(SMMUState *s, int asid); | ||
28 | void smmu_iotlb_inv_vmid(SMMUState *s, int vmid); | ||
29 | void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, dma_addr_t iova, | ||
30 | uint8_t tg, uint64_t num_pages, uint8_t ttl); | ||
31 | - | ||
32 | +void smmu_iotlb_inv_ipa(SMMUState *s, int vmid, dma_addr_t ipa, uint8_t tg, | ||
33 | + uint64_t num_pages, uint8_t ttl); | ||
34 | /* Unmap the range of all the notifiers registered to any IOMMU mr */ | ||
35 | void smmu_inv_notifiers_all(SMMUState *s); | ||
36 | |||
37 | diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/hw/arm/smmu-common.c | ||
40 | +++ b/hw/arm/smmu-common.c | ||
41 | @@ -XXX,XX +XXX,XX @@ static gboolean smmu_hash_remove_by_asid_vmid_iova(gpointer key, gpointer value, | ||
42 | ((entry->iova & ~info->mask) == info->iova); | ||
43 | } | ||
44 | |||
45 | +static gboolean smmu_hash_remove_by_vmid_ipa(gpointer key, gpointer value, | ||
46 | + gpointer user_data) | ||
47 | +{ | ||
48 | + SMMUTLBEntry *iter = (SMMUTLBEntry *)value; | ||
49 | + IOMMUTLBEntry *entry = &iter->entry; | ||
50 | + SMMUIOTLBPageInvInfo *info = (SMMUIOTLBPageInvInfo *)user_data; | ||
51 | + SMMUIOTLBKey iotlb_key = *(SMMUIOTLBKey *)key; | ||
52 | + | ||
53 | + if (SMMU_IOTLB_ASID(iotlb_key) >= 0) { | ||
54 | + /* This is a stage-1 address. */ | ||
55 | + return false; | ||
56 | + } | ||
57 | + if (info->vmid != SMMU_IOTLB_VMID(iotlb_key)) { | ||
58 | + return false; | ||
59 | + } | ||
60 | + return ((info->iova & ~entry->addr_mask) == entry->iova) || | ||
61 | + ((entry->iova & ~info->mask) == info->iova); | ||
62 | +} | ||
63 | + | ||
64 | void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, dma_addr_t iova, | ||
65 | uint8_t tg, uint64_t num_pages, uint8_t ttl) | ||
66 | { | ||
67 | @@ -XXX,XX +XXX,XX @@ void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, dma_addr_t iova, | ||
68 | &info); | ||
69 | } | ||
70 | |||
71 | +/* | ||
72 | + * Similar to smmu_iotlb_inv_iova(), but for Stage-2, ASID is always -1, | ||
73 | + * in Stage-1 invalidation ASID = -1, means don't care. | ||
74 | + */ | ||
75 | +void smmu_iotlb_inv_ipa(SMMUState *s, int vmid, dma_addr_t ipa, uint8_t tg, | ||
76 | + uint64_t num_pages, uint8_t ttl) | ||
77 | +{ | ||
78 | + uint8_t granule = tg ? tg * 2 + 10 : 12; | ||
79 | + int asid = -1; | ||
80 | + | ||
81 | + if (ttl && (num_pages == 1)) { | ||
82 | + SMMUIOTLBKey key = smmu_get_iotlb_key(asid, vmid, ipa, tg, ttl); | ||
83 | + | ||
84 | + if (g_hash_table_remove(s->iotlb, &key)) { | ||
85 | + return; | ||
86 | + } | ||
87 | + } | ||
88 | + | ||
89 | + SMMUIOTLBPageInvInfo info = { | ||
90 | + .iova = ipa, | ||
91 | + .vmid = vmid, | ||
92 | + .mask = (num_pages << granule) - 1}; | ||
93 | + | ||
94 | + g_hash_table_foreach_remove(s->iotlb, | ||
95 | + smmu_hash_remove_by_vmid_ipa, | ||
96 | + &info); | ||
97 | +} | ||
98 | + | ||
99 | void smmu_iotlb_inv_asid(SMMUState *s, int asid) | ||
100 | { | ||
101 | trace_smmu_iotlb_inv_asid(asid); | ||
102 | diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c | ||
103 | index XXXXXXX..XXXXXXX 100644 | ||
104 | --- a/hw/arm/smmuv3.c | ||
105 | +++ b/hw/arm/smmuv3.c | ||
106 | @@ -XXX,XX +XXX,XX @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, int vmid, | ||
107 | } | ||
108 | } | ||
109 | |||
110 | -static void smmuv3_range_inval(SMMUState *s, Cmd *cmd) | ||
111 | +static void smmuv3_range_inval(SMMUState *s, Cmd *cmd, SMMUStage stage) | ||
112 | { | ||
113 | dma_addr_t end, addr = CMD_ADDR(cmd); | ||
114 | uint8_t type = CMD_TYPE(cmd); | ||
115 | @@ -XXX,XX +XXX,XX @@ static void smmuv3_range_inval(SMMUState *s, Cmd *cmd) | ||
116 | } | ||
117 | |||
118 | if (!tg) { | ||
119 | - trace_smmuv3_range_inval(vmid, asid, addr, tg, 1, ttl, leaf); | ||
120 | + trace_smmuv3_range_inval(vmid, asid, addr, tg, 1, ttl, leaf, stage); | ||
121 | smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, 1); | ||
122 | - smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, 1, ttl); | ||
123 | + if (stage == SMMU_STAGE_1) { | ||
124 | + smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, 1, ttl); | ||
125 | + } else { | ||
126 | + smmu_iotlb_inv_ipa(s, vmid, addr, tg, 1, ttl); | ||
127 | + } | ||
128 | return; | ||
129 | } | ||
130 | |||
131 | @@ -XXX,XX +XXX,XX @@ static void smmuv3_range_inval(SMMUState *s, Cmd *cmd) | ||
132 | uint64_t mask = dma_aligned_pow2_mask(addr, end, 64); | ||
133 | |||
134 | num_pages = (mask + 1) >> granule; | ||
135 | - trace_smmuv3_range_inval(vmid, asid, addr, tg, num_pages, ttl, leaf); | ||
136 | + trace_smmuv3_range_inval(vmid, asid, addr, tg, num_pages, | ||
137 | + ttl, leaf, stage); | ||
138 | smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, num_pages); | ||
139 | - smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, num_pages, ttl); | ||
140 | + if (stage == SMMU_STAGE_1) { | ||
141 | + smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, num_pages, ttl); | ||
142 | + } else { | ||
143 | + smmu_iotlb_inv_ipa(s, vmid, addr, tg, num_pages, ttl); | ||
144 | + } | ||
145 | addr += mask + 1; | ||
146 | } | ||
147 | } | ||
148 | @@ -XXX,XX +XXX,XX @@ static int smmuv3_cmdq_consume(SMMUv3State *s) | ||
149 | cmd_error = SMMU_CERROR_ILL; | ||
150 | break; | ||
151 | } | ||
152 | - smmuv3_range_inval(bs, &cmd); | ||
153 | + smmuv3_range_inval(bs, &cmd, SMMU_STAGE_1); | ||
154 | break; | ||
155 | case SMMU_CMD_TLBI_S12_VMALL: | ||
156 | { | ||
157 | @@ -XXX,XX +XXX,XX @@ static int smmuv3_cmdq_consume(SMMUv3State *s) | ||
158 | * As currently only either s1 or s2 are supported | ||
159 | * we can reuse same function for s2. | ||
160 | */ | ||
161 | - smmuv3_range_inval(bs, &cmd); | ||
162 | + smmuv3_range_inval(bs, &cmd, SMMU_STAGE_2); | ||
163 | break; | ||
164 | case SMMU_CMD_TLBI_EL3_ALL: | ||
165 | case SMMU_CMD_TLBI_EL3_VA: | ||
166 | diff --git a/hw/arm/trace-events b/hw/arm/trace-events | ||
167 | index XXXXXXX..XXXXXXX 100644 | ||
168 | --- a/hw/arm/trace-events | ||
169 | +++ b/hw/arm/trace-events | ||
170 | @@ -XXX,XX +XXX,XX @@ smmuv3_cmdq_cfgi_ste_range(int start, int end) "start=0x%x - end=0x%x" | ||
171 | smmuv3_cmdq_cfgi_cd(uint32_t sid) "sid=0x%x" | ||
172 | smmuv3_config_cache_hit(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t perc) "Config cache HIT for sid=0x%x (hits=%d, misses=%d, hit rate=%d)" | ||
173 | smmuv3_config_cache_miss(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t perc) "Config cache MISS for sid=0x%x (hits=%d, misses=%d, hit rate=%d)" | ||
174 | -smmuv3_range_inval(int vmid, int asid, uint64_t addr, uint8_t tg, uint64_t num_pages, uint8_t ttl, bool leaf) "vmid=%d asid=%d addr=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64" ttl=%d leaf=%d" | ||
175 | +smmuv3_range_inval(int vmid, int asid, uint64_t addr, uint8_t tg, uint64_t num_pages, uint8_t ttl, bool leaf, int stage) "vmid=%d asid=%d addr=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64" ttl=%d leaf=%d stage=%d" | ||
176 | smmuv3_cmdq_tlbi_nh(void) "" | ||
177 | smmuv3_cmdq_tlbi_nh_asid(int asid) "asid=%d" | ||
178 | smmuv3_cmdq_tlbi_s12_vmid(int vmid) "vmid=%d" | ||
179 | -- | ||
180 | 2.34.1 | ||
181 | |||
182 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Mostafa Saleh <smostafa@google.com> | ||
2 | 1 | ||
3 | Soon, Instead of doing TLB invalidation by ASID only, VMID will be | ||
4 | also required. | ||
5 | Add smmu_iotlb_inv_asid_vmid() which invalidates by both ASID and VMID. | ||
6 | |||
7 | However, at the moment this function is only used in SMMU_CMD_TLBI_NH_ASID | ||
8 | which is a stage-1 command, so passing VMID = -1 keeps the original | ||
9 | behaviour. | ||
10 | |||
11 | Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org> | ||
12 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
13 | Signed-off-by: Mostafa Saleh <smostafa@google.com> | ||
14 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
15 | Message-id: 20240715084519.1189624-14-smostafa@google.com | ||
16 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
17 | --- | ||
18 | include/hw/arm/smmu-common.h | 2 +- | ||
19 | hw/arm/smmu-common.c | 20 +++++++++++++------- | ||
20 | hw/arm/smmuv3.c | 2 +- | ||
21 | hw/arm/trace-events | 2 +- | ||
22 | 4 files changed, 16 insertions(+), 10 deletions(-) | ||
23 | |||
24 | diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h | ||
25 | index XXXXXXX..XXXXXXX 100644 | ||
26 | --- a/include/hw/arm/smmu-common.h | ||
27 | +++ b/include/hw/arm/smmu-common.h | ||
28 | @@ -XXX,XX +XXX,XX @@ void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, SMMUTLBEntry *entry); | ||
29 | SMMUIOTLBKey smmu_get_iotlb_key(int asid, int vmid, uint64_t iova, | ||
30 | uint8_t tg, uint8_t level); | ||
31 | void smmu_iotlb_inv_all(SMMUState *s); | ||
32 | -void smmu_iotlb_inv_asid(SMMUState *s, int asid); | ||
33 | +void smmu_iotlb_inv_asid_vmid(SMMUState *s, int asid, int vmid); | ||
34 | void smmu_iotlb_inv_vmid(SMMUState *s, int vmid); | ||
35 | void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, dma_addr_t iova, | ||
36 | uint8_t tg, uint64_t num_pages, uint8_t ttl); | ||
37 | diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/hw/arm/smmu-common.c | ||
40 | +++ b/hw/arm/smmu-common.c | ||
41 | @@ -XXX,XX +XXX,XX @@ void smmu_iotlb_inv_all(SMMUState *s) | ||
42 | g_hash_table_remove_all(s->iotlb); | ||
43 | } | ||
44 | |||
45 | -static gboolean smmu_hash_remove_by_asid(gpointer key, gpointer value, | ||
46 | - gpointer user_data) | ||
47 | +static gboolean smmu_hash_remove_by_asid_vmid(gpointer key, gpointer value, | ||
48 | + gpointer user_data) | ||
49 | { | ||
50 | - int asid = *(int *)user_data; | ||
51 | + SMMUIOTLBPageInvInfo *info = (SMMUIOTLBPageInvInfo *)user_data; | ||
52 | SMMUIOTLBKey *iotlb_key = (SMMUIOTLBKey *)key; | ||
53 | |||
54 | - return SMMU_IOTLB_ASID(*iotlb_key) == asid; | ||
55 | + return (SMMU_IOTLB_ASID(*iotlb_key) == info->asid) && | ||
56 | + (SMMU_IOTLB_VMID(*iotlb_key) == info->vmid); | ||
57 | } | ||
58 | |||
59 | static gboolean smmu_hash_remove_by_vmid(gpointer key, gpointer value, | ||
60 | @@ -XXX,XX +XXX,XX @@ void smmu_iotlb_inv_ipa(SMMUState *s, int vmid, dma_addr_t ipa, uint8_t tg, | ||
61 | &info); | ||
62 | } | ||
63 | |||
64 | -void smmu_iotlb_inv_asid(SMMUState *s, int asid) | ||
65 | +void smmu_iotlb_inv_asid_vmid(SMMUState *s, int asid, int vmid) | ||
66 | { | ||
67 | - trace_smmu_iotlb_inv_asid(asid); | ||
68 | - g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_asid, &asid); | ||
69 | + SMMUIOTLBPageInvInfo info = { | ||
70 | + .asid = asid, | ||
71 | + .vmid = vmid, | ||
72 | + }; | ||
73 | + | ||
74 | + trace_smmu_iotlb_inv_asid_vmid(asid, vmid); | ||
75 | + g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_asid_vmid, &info); | ||
76 | } | ||
77 | |||
78 | void smmu_iotlb_inv_vmid(SMMUState *s, int vmid) | ||
79 | diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c | ||
80 | index XXXXXXX..XXXXXXX 100644 | ||
81 | --- a/hw/arm/smmuv3.c | ||
82 | +++ b/hw/arm/smmuv3.c | ||
83 | @@ -XXX,XX +XXX,XX @@ static int smmuv3_cmdq_consume(SMMUv3State *s) | ||
84 | |||
85 | trace_smmuv3_cmdq_tlbi_nh_asid(asid); | ||
86 | smmu_inv_notifiers_all(&s->smmu_state); | ||
87 | - smmu_iotlb_inv_asid(bs, asid); | ||
88 | + smmu_iotlb_inv_asid_vmid(bs, asid, -1); | ||
89 | break; | ||
90 | } | ||
91 | case SMMU_CMD_TLBI_NH_ALL: | ||
92 | diff --git a/hw/arm/trace-events b/hw/arm/trace-events | ||
93 | index XXXXXXX..XXXXXXX 100644 | ||
94 | --- a/hw/arm/trace-events | ||
95 | +++ b/hw/arm/trace-events | ||
96 | @@ -XXX,XX +XXX,XX @@ smmu_ptw_page_pte(int stage, int level, uint64_t iova, uint64_t baseaddr, uint6 | ||
97 | smmu_ptw_block_pte(int stage, int level, uint64_t baseaddr, uint64_t pteaddr, uint64_t pte, uint64_t iova, uint64_t gpa, int bsize_mb) "stage=%d level=%d base@=0x%"PRIx64" pte@=0x%"PRIx64" pte=0x%"PRIx64" iova=0x%"PRIx64" block address = 0x%"PRIx64" block size = %d MiB" | ||
98 | smmu_get_pte(uint64_t baseaddr, int index, uint64_t pteaddr, uint64_t pte) "baseaddr=0x%"PRIx64" index=0x%x, pteaddr=0x%"PRIx64", pte=0x%"PRIx64 | ||
99 | smmu_iotlb_inv_all(void) "IOTLB invalidate all" | ||
100 | -smmu_iotlb_inv_asid(int asid) "IOTLB invalidate asid=%d" | ||
101 | +smmu_iotlb_inv_asid_vmid(int asid, int vmid) "IOTLB invalidate asid=%d vmid=%d" | ||
102 | smmu_iotlb_inv_vmid(int vmid) "IOTLB invalidate vmid=%d" | ||
103 | smmu_iotlb_inv_iova(int asid, uint64_t addr) "IOTLB invalidate asid=%d addr=0x%"PRIx64 | ||
104 | smmu_inv_notifiers_mr(const char *name) "iommu mr=%s" | ||
105 | -- | ||
106 | 2.34.1 | ||
107 | |||
108 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Mostafa Saleh <smostafa@google.com> | ||
2 | 1 | ||
3 | Some commands need rework for nesting, as they used to assume S1 | ||
4 | and S2 are mutually exclusive: | ||
5 | |||
6 | - CMD_TLBI_NH_ASID: Consider VMID if stage-2 is supported | ||
7 | - CMD_TLBI_NH_ALL: Consider VMID if stage-2 is supported, otherwise | ||
8 | invalidate everything, this required a new vmid invalidation | ||
9 | function for stage-1 only (ASID >= 0) | ||
10 | |||
11 | Also, rework trace events to reflect the new implementation. | ||
12 | |||
13 | Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org> | ||
14 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
15 | Signed-off-by: Mostafa Saleh <smostafa@google.com> | ||
16 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
17 | Message-id: 20240715084519.1189624-15-smostafa@google.com | ||
18 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
19 | --- | ||
20 | include/hw/arm/smmu-common.h | 1 + | ||
21 | hw/arm/smmu-common.c | 16 ++++++++++++++++ | ||
22 | hw/arm/smmuv3.c | 28 ++++++++++++++++++++++++++-- | ||
23 | hw/arm/trace-events | 4 +++- | ||
24 | 4 files changed, 46 insertions(+), 3 deletions(-) | ||
25 | |||
26 | diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h | ||
27 | index XXXXXXX..XXXXXXX 100644 | ||
28 | --- a/include/hw/arm/smmu-common.h | ||
29 | +++ b/include/hw/arm/smmu-common.h | ||
30 | @@ -XXX,XX +XXX,XX @@ SMMUIOTLBKey smmu_get_iotlb_key(int asid, int vmid, uint64_t iova, | ||
31 | void smmu_iotlb_inv_all(SMMUState *s); | ||
32 | void smmu_iotlb_inv_asid_vmid(SMMUState *s, int asid, int vmid); | ||
33 | void smmu_iotlb_inv_vmid(SMMUState *s, int vmid); | ||
34 | +void smmu_iotlb_inv_vmid_s1(SMMUState *s, int vmid); | ||
35 | void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, dma_addr_t iova, | ||
36 | uint8_t tg, uint64_t num_pages, uint8_t ttl); | ||
37 | void smmu_iotlb_inv_ipa(SMMUState *s, int vmid, dma_addr_t ipa, uint8_t tg, | ||
38 | diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c | ||
39 | index XXXXXXX..XXXXXXX 100644 | ||
40 | --- a/hw/arm/smmu-common.c | ||
41 | +++ b/hw/arm/smmu-common.c | ||
42 | @@ -XXX,XX +XXX,XX @@ static gboolean smmu_hash_remove_by_vmid(gpointer key, gpointer value, | ||
43 | return SMMU_IOTLB_VMID(*iotlb_key) == vmid; | ||
44 | } | ||
45 | |||
46 | +static gboolean smmu_hash_remove_by_vmid_s1(gpointer key, gpointer value, | ||
47 | + gpointer user_data) | ||
48 | +{ | ||
49 | + int vmid = *(int *)user_data; | ||
50 | + SMMUIOTLBKey *iotlb_key = (SMMUIOTLBKey *)key; | ||
51 | + | ||
52 | + return (SMMU_IOTLB_VMID(*iotlb_key) == vmid) && | ||
53 | + (SMMU_IOTLB_ASID(*iotlb_key) >= 0); | ||
54 | +} | ||
55 | + | ||
56 | static gboolean smmu_hash_remove_by_asid_vmid_iova(gpointer key, gpointer value, | ||
57 | gpointer user_data) | ||
58 | { | ||
59 | @@ -XXX,XX +XXX,XX @@ void smmu_iotlb_inv_vmid(SMMUState *s, int vmid) | ||
60 | g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_vmid, &vmid); | ||
61 | } | ||
62 | |||
63 | +inline void smmu_iotlb_inv_vmid_s1(SMMUState *s, int vmid) | ||
64 | +{ | ||
65 | + trace_smmu_iotlb_inv_vmid_s1(vmid); | ||
66 | + g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_vmid_s1, &vmid); | ||
67 | +} | ||
68 | + | ||
69 | /* VMSAv8-64 Translation */ | ||
70 | |||
71 | /** | ||
72 | diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c | ||
73 | index XXXXXXX..XXXXXXX 100644 | ||
74 | --- a/hw/arm/smmuv3.c | ||
75 | +++ b/hw/arm/smmuv3.c | ||
76 | @@ -XXX,XX +XXX,XX @@ static int smmuv3_cmdq_consume(SMMUv3State *s) | ||
77 | case SMMU_CMD_TLBI_NH_ASID: | ||
78 | { | ||
79 | int asid = CMD_ASID(&cmd); | ||
80 | + int vmid = -1; | ||
81 | |||
82 | if (!STAGE1_SUPPORTED(s)) { | ||
83 | cmd_error = SMMU_CERROR_ILL; | ||
84 | break; | ||
85 | } | ||
86 | |||
87 | + /* | ||
88 | + * VMID is only matched when stage 2 is supported, otherwise set it | ||
89 | + * to -1 as the value used for stage-1 only VMIDs. | ||
90 | + */ | ||
91 | + if (STAGE2_SUPPORTED(s)) { | ||
92 | + vmid = CMD_VMID(&cmd); | ||
93 | + } | ||
94 | + | ||
95 | trace_smmuv3_cmdq_tlbi_nh_asid(asid); | ||
96 | smmu_inv_notifiers_all(&s->smmu_state); | ||
97 | - smmu_iotlb_inv_asid_vmid(bs, asid, -1); | ||
98 | + smmu_iotlb_inv_asid_vmid(bs, asid, vmid); | ||
99 | break; | ||
100 | } | ||
101 | case SMMU_CMD_TLBI_NH_ALL: | ||
102 | + { | ||
103 | + int vmid = -1; | ||
104 | + | ||
105 | if (!STAGE1_SUPPORTED(s)) { | ||
106 | cmd_error = SMMU_CERROR_ILL; | ||
107 | break; | ||
108 | } | ||
109 | + | ||
110 | + /* | ||
111 | + * If stage-2 is supported, invalidate for this VMID only, otherwise | ||
112 | + * invalidate the whole thing. | ||
113 | + */ | ||
114 | + if (STAGE2_SUPPORTED(s)) { | ||
115 | + vmid = CMD_VMID(&cmd); | ||
116 | + trace_smmuv3_cmdq_tlbi_nh(vmid); | ||
117 | + smmu_iotlb_inv_vmid_s1(bs, vmid); | ||
118 | + break; | ||
119 | + } | ||
120 | QEMU_FALLTHROUGH; | ||
121 | + } | ||
122 | case SMMU_CMD_TLBI_NSNH_ALL: | ||
123 | - trace_smmuv3_cmdq_tlbi_nh(); | ||
124 | + trace_smmuv3_cmdq_tlbi_nsnh(); | ||
125 | smmu_inv_notifiers_all(&s->smmu_state); | ||
126 | smmu_iotlb_inv_all(bs); | ||
127 | break; | ||
128 | diff --git a/hw/arm/trace-events b/hw/arm/trace-events | ||
129 | index XXXXXXX..XXXXXXX 100644 | ||
130 | --- a/hw/arm/trace-events | ||
131 | +++ b/hw/arm/trace-events | ||
132 | @@ -XXX,XX +XXX,XX @@ smmu_get_pte(uint64_t baseaddr, int index, uint64_t pteaddr, uint64_t pte) "base | ||
133 | smmu_iotlb_inv_all(void) "IOTLB invalidate all" | ||
134 | smmu_iotlb_inv_asid_vmid(int asid, int vmid) "IOTLB invalidate asid=%d vmid=%d" | ||
135 | smmu_iotlb_inv_vmid(int vmid) "IOTLB invalidate vmid=%d" | ||
136 | +smmu_iotlb_inv_vmid_s1(int vmid) "IOTLB invalidate vmid=%d" | ||
137 | smmu_iotlb_inv_iova(int asid, uint64_t addr) "IOTLB invalidate asid=%d addr=0x%"PRIx64 | ||
138 | smmu_inv_notifiers_mr(const char *name) "iommu mr=%s" | ||
139 | smmu_iotlb_lookup_hit(int asid, int vmid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache HIT asid=%d vmid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d" | ||
140 | @@ -XXX,XX +XXX,XX @@ smmuv3_cmdq_cfgi_cd(uint32_t sid) "sid=0x%x" | ||
141 | smmuv3_config_cache_hit(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t perc) "Config cache HIT for sid=0x%x (hits=%d, misses=%d, hit rate=%d)" | ||
142 | smmuv3_config_cache_miss(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t perc) "Config cache MISS for sid=0x%x (hits=%d, misses=%d, hit rate=%d)" | ||
143 | smmuv3_range_inval(int vmid, int asid, uint64_t addr, uint8_t tg, uint64_t num_pages, uint8_t ttl, bool leaf, int stage) "vmid=%d asid=%d addr=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64" ttl=%d leaf=%d stage=%d" | ||
144 | -smmuv3_cmdq_tlbi_nh(void) "" | ||
145 | +smmuv3_cmdq_tlbi_nh(int vmid) "vmid=%d" | ||
146 | +smmuv3_cmdq_tlbi_nsnh(void) "" | ||
147 | smmuv3_cmdq_tlbi_nh_asid(int asid) "asid=%d" | ||
148 | smmuv3_cmdq_tlbi_s12_vmid(int vmid) "vmid=%d" | ||
149 | smmuv3_config_cache_inv(uint32_t sid) "Config cache INV for sid=0x%x" | ||
150 | -- | ||
151 | 2.34.1 | ||
152 | |||
153 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Mostafa Saleh <smostafa@google.com> | ||
2 | 1 | ||
3 | IOMMUTLBEvent only understands IOVA, for stage-1 or stage-2 | ||
4 | SMMU instances we consider the input address as the IOVA, but when | ||
5 | nesting is used, we can't mix stage-1 and stage-2 addresses, so for | ||
6 | nesting only stage-1 is considered the IOVA and would be notified. | ||
7 | |||
8 | Signed-off-by: Mostafa Saleh <smostafa@google.com> | ||
9 | Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org> | ||
10 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
11 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
12 | Message-id: 20240715084519.1189624-16-smostafa@google.com | ||
13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
14 | --- | ||
15 | hw/arm/smmuv3.c | 39 +++++++++++++++++++++++++-------------- | ||
16 | hw/arm/trace-events | 2 +- | ||
17 | 2 files changed, 26 insertions(+), 15 deletions(-) | ||
18 | |||
19 | diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/hw/arm/smmuv3.c | ||
22 | +++ b/hw/arm/smmuv3.c | ||
23 | @@ -XXX,XX +XXX,XX @@ epilogue: | ||
24 | * @iova: iova | ||
25 | * @tg: translation granule (if communicated through range invalidation) | ||
26 | * @num_pages: number of @granule sized pages (if tg != 0), otherwise 1 | ||
27 | + * @stage: Which stage(1 or 2) is used | ||
28 | */ | ||
29 | static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, | ||
30 | IOMMUNotifier *n, | ||
31 | int asid, int vmid, | ||
32 | dma_addr_t iova, uint8_t tg, | ||
33 | - uint64_t num_pages) | ||
34 | + uint64_t num_pages, int stage) | ||
35 | { | ||
36 | SMMUDevice *sdev = container_of(mr, SMMUDevice, iommu); | ||
37 | + SMMUEventInfo eventinfo = {.inval_ste_allowed = true}; | ||
38 | + SMMUTransCfg *cfg = smmuv3_get_config(sdev, &eventinfo); | ||
39 | IOMMUTLBEvent event; | ||
40 | uint8_t granule; | ||
41 | - SMMUv3State *s = sdev->smmu; | ||
42 | + | ||
43 | + if (!cfg) { | ||
44 | + return; | ||
45 | + } | ||
46 | + | ||
47 | + /* | ||
48 | + * stage is passed from TLB invalidation commands which can be either | ||
49 | + * stage-1 or stage-2. | ||
50 | + * However, IOMMUTLBEvent only understands IOVA, for stage-1 or stage-2 | ||
51 | + * SMMU instances we consider the input address as the IOVA, but when | ||
52 | + * nesting is used, we can't mix stage-1 and stage-2 addresses, so for | ||
53 | + * nesting only stage-1 is considered the IOVA and would be notified. | ||
54 | + */ | ||
55 | + if ((stage == SMMU_STAGE_2) && (cfg->stage == SMMU_NESTED)) | ||
56 | + return; | ||
57 | |||
58 | if (!tg) { | ||
59 | - SMMUEventInfo eventinfo = {.inval_ste_allowed = true}; | ||
60 | - SMMUTransCfg *cfg = smmuv3_get_config(sdev, &eventinfo); | ||
61 | SMMUTransTableInfo *tt; | ||
62 | |||
63 | - if (!cfg) { | ||
64 | - return; | ||
65 | - } | ||
66 | - | ||
67 | if (asid >= 0 && cfg->asid != asid) { | ||
68 | return; | ||
69 | } | ||
70 | @@ -XXX,XX +XXX,XX @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, | ||
71 | return; | ||
72 | } | ||
73 | |||
74 | - if (STAGE1_SUPPORTED(s)) { | ||
75 | + if (stage == SMMU_STAGE_1) { | ||
76 | tt = select_tt(cfg, iova); | ||
77 | if (!tt) { | ||
78 | return; | ||
79 | @@ -XXX,XX +XXX,XX @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, | ||
80 | /* invalidate an asid/vmid/iova range tuple in all mr's */ | ||
81 | static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, int vmid, | ||
82 | dma_addr_t iova, uint8_t tg, | ||
83 | - uint64_t num_pages) | ||
84 | + uint64_t num_pages, int stage) | ||
85 | { | ||
86 | SMMUDevice *sdev; | ||
87 | |||
88 | @@ -XXX,XX +XXX,XX @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, int vmid, | ||
89 | IOMMUNotifier *n; | ||
90 | |||
91 | trace_smmuv3_inv_notifiers_iova(mr->parent_obj.name, asid, vmid, | ||
92 | - iova, tg, num_pages); | ||
93 | + iova, tg, num_pages, stage); | ||
94 | |||
95 | IOMMU_NOTIFIER_FOREACH(n, mr) { | ||
96 | - smmuv3_notify_iova(mr, n, asid, vmid, iova, tg, num_pages); | ||
97 | + smmuv3_notify_iova(mr, n, asid, vmid, iova, tg, num_pages, stage); | ||
98 | } | ||
99 | } | ||
100 | } | ||
101 | @@ -XXX,XX +XXX,XX @@ static void smmuv3_range_inval(SMMUState *s, Cmd *cmd, SMMUStage stage) | ||
102 | |||
103 | if (!tg) { | ||
104 | trace_smmuv3_range_inval(vmid, asid, addr, tg, 1, ttl, leaf, stage); | ||
105 | - smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, 1); | ||
106 | + smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, 1, stage); | ||
107 | if (stage == SMMU_STAGE_1) { | ||
108 | smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, 1, ttl); | ||
109 | } else { | ||
110 | @@ -XXX,XX +XXX,XX @@ static void smmuv3_range_inval(SMMUState *s, Cmd *cmd, SMMUStage stage) | ||
111 | num_pages = (mask + 1) >> granule; | ||
112 | trace_smmuv3_range_inval(vmid, asid, addr, tg, num_pages, | ||
113 | ttl, leaf, stage); | ||
114 | - smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, num_pages); | ||
115 | + smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, num_pages, stage); | ||
116 | if (stage == SMMU_STAGE_1) { | ||
117 | smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, num_pages, ttl); | ||
118 | } else { | ||
119 | diff --git a/hw/arm/trace-events b/hw/arm/trace-events | ||
120 | index XXXXXXX..XXXXXXX 100644 | ||
121 | --- a/hw/arm/trace-events | ||
122 | +++ b/hw/arm/trace-events | ||
123 | @@ -XXX,XX +XXX,XX @@ smmuv3_cmdq_tlbi_s12_vmid(int vmid) "vmid=%d" | ||
124 | smmuv3_config_cache_inv(uint32_t sid) "Config cache INV for sid=0x%x" | ||
125 | smmuv3_notify_flag_add(const char *iommu) "ADD SMMUNotifier node for iommu mr=%s" | ||
126 | smmuv3_notify_flag_del(const char *iommu) "DEL SMMUNotifier node for iommu mr=%s" | ||
127 | -smmuv3_inv_notifiers_iova(const char *name, int asid, int vmid, uint64_t iova, uint8_t tg, uint64_t num_pages) "iommu mr=%s asid=%d vmid=%d iova=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64 | ||
128 | +smmuv3_inv_notifiers_iova(const char *name, int asid, int vmid, uint64_t iova, uint8_t tg, uint64_t num_pages, int stage) "iommu mr=%s asid=%d vmid=%d iova=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64" stage=%d" | ||
129 | |||
130 | # strongarm.c | ||
131 | strongarm_uart_update_parameters(const char *label, int speed, char parity, int data_bits, int stop_bits) "%s speed=%d parity=%c data=%d stop=%d" | ||
132 | -- | ||
133 | 2.34.1 | ||
134 | |||
135 | diff view generated by jsdifflib |
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | 1 | From: Pierrick Bouvier <pierrick.bouvier@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | hvf did not advance PC when raising an exception for most unhandled | 3 | Pointer authentication on aarch64 is pretty expensive (up to 50% of |
4 | system registers, but it mistakenly advanced PC when raising an | 4 | execution time) when running a virtual machine with tcg and -cpu max |
5 | exception for GICv3 registers. | 5 | (which enables pauth=on). |
6 | 6 | ||
7 | Cc: qemu-stable@nongnu.org | 7 | The advice is always: use pauth-impdef=on. |
8 | Fixes: a2260983c655 ("hvf: arm: Add support for GICv3") | 8 | Our documentation even mentions it "by default" in |
9 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | 9 | docs/system/introduction.rst. |
10 | Message-id: 20240716-pmu-v3-4-8c7c1858a227@daynix.com | 10 | |
11 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 11 | Thus, we change the default to use impdef by default. This does not |
12 | affect kvm or hvf acceleration, since pauth algorithm used is the one | ||
13 | from host cpu. | ||
14 | |||
15 | This change is retro compatible, in terms of cli, with previous | ||
16 | versions, as the semantic of using -cpu max,pauth-impdef=on, and -cpu | ||
17 | max,pauth-qarma3=on is preserved. | ||
18 | The new option introduced in previous patch and matching old default is | ||
19 | -cpu max,pauth-qarma5=on. | ||
20 | It is retro compatible with migration as well, by defining a backcompat | ||
21 | property, that will use qarma5 by default for virt machine <= 9.2. | ||
22 | Tested by saving and restoring a vm from qemu 9.2.0 into qemu-master | ||
23 | (10.0) for cpus neoverse-n2 and max. | ||
24 | |||
25 | Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> | ||
26 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
27 | Message-id: 20241219183211.3493974-3-pierrick.bouvier@linaro.org | ||
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 28 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
13 | --- | 29 | --- |
14 | target/arm/hvf/hvf.c | 1 + | 30 | docs/system/arm/cpu-features.rst | 2 +- |
15 | 1 file changed, 1 insertion(+) | 31 | docs/system/introduction.rst | 2 +- |
32 | target/arm/cpu.h | 3 +++ | ||
33 | hw/core/machine.c | 4 +++- | ||
34 | target/arm/cpu.c | 2 ++ | ||
35 | target/arm/cpu64.c | 22 ++++++++++++++++------ | ||
36 | 6 files changed, 26 insertions(+), 9 deletions(-) | ||
16 | 37 | ||
17 | diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c | 38 | diff --git a/docs/system/arm/cpu-features.rst b/docs/system/arm/cpu-features.rst |
18 | index XXXXXXX..XXXXXXX 100644 | 39 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/target/arm/hvf/hvf.c | 40 | --- a/docs/system/arm/cpu-features.rst |
20 | +++ b/target/arm/hvf/hvf.c | 41 | +++ b/docs/system/arm/cpu-features.rst |
21 | @@ -XXX,XX +XXX,XX @@ static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint32_t rt) | 42 | @@ -XXX,XX +XXX,XX @@ Below is the list of TCG VCPU features and their descriptions. |
22 | /* Call the TCG sysreg handler. This is only safe for GICv3 regs. */ | 43 | When ``pauth`` is enabled, select the architected QARMA5 algorithm. |
23 | if (!hvf_sysreg_read_cp(cpu, reg, &val)) { | 44 | |
24 | hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized()); | 45 | Without ``pauth-impdef``, ``pauth-qarma3`` or ``pauth-qarma5`` enabled, |
25 | + return 1; | 46 | -the architected QARMA5 algorithm is used. The architected QARMA5 |
26 | } | 47 | +the QEMU impdef algorithm is used. The architected QARMA5 |
27 | break; | 48 | and QARMA3 algorithms have good cryptographic properties, but can |
28 | case SYSREG_DBGBVR0_EL1: | 49 | be quite slow to emulate. The impdef algorithm used by QEMU is |
50 | non-cryptographic but significantly faster. | ||
51 | diff --git a/docs/system/introduction.rst b/docs/system/introduction.rst | ||
52 | index XXXXXXX..XXXXXXX 100644 | ||
53 | --- a/docs/system/introduction.rst | ||
54 | +++ b/docs/system/introduction.rst | ||
55 | @@ -XXX,XX +XXX,XX @@ would default to it anyway. | ||
56 | |||
57 | .. code:: | ||
58 | |||
59 | - -cpu max,pauth-impdef=on \ | ||
60 | + -cpu max \ | ||
61 | -smp 4 \ | ||
62 | -accel tcg \ | ||
63 | |||
64 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
65 | index XXXXXXX..XXXXXXX 100644 | ||
66 | --- a/target/arm/cpu.h | ||
67 | +++ b/target/arm/cpu.h | ||
68 | @@ -XXX,XX +XXX,XX @@ struct ArchCPU { | ||
69 | /* QOM property to indicate we should use the back-compat CNTFRQ default */ | ||
70 | bool backcompat_cntfrq; | ||
71 | |||
72 | + /* QOM property to indicate we should use the back-compat QARMA5 default */ | ||
73 | + bool backcompat_pauth_default_use_qarma5; | ||
74 | + | ||
75 | /* Specify the number of cores in this CPU cluster. Used for the L2CTLR | ||
76 | * register. | ||
77 | */ | ||
78 | diff --git a/hw/core/machine.c b/hw/core/machine.c | ||
79 | index XXXXXXX..XXXXXXX 100644 | ||
80 | --- a/hw/core/machine.c | ||
81 | +++ b/hw/core/machine.c | ||
82 | @@ -XXX,XX +XXX,XX @@ | ||
83 | #include "hw/virtio/virtio-iommu.h" | ||
84 | #include "audio/audio.h" | ||
85 | |||
86 | -GlobalProperty hw_compat_9_2[] = {}; | ||
87 | +GlobalProperty hw_compat_9_2[] = { | ||
88 | + {"arm-cpu", "backcompat-pauth-default-use-qarma5", "true"}, | ||
89 | +}; | ||
90 | const size_t hw_compat_9_2_len = G_N_ELEMENTS(hw_compat_9_2); | ||
91 | |||
92 | GlobalProperty hw_compat_9_1[] = { | ||
93 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
94 | index XXXXXXX..XXXXXXX 100644 | ||
95 | --- a/target/arm/cpu.c | ||
96 | +++ b/target/arm/cpu.c | ||
97 | @@ -XXX,XX +XXX,XX @@ static const Property arm_cpu_properties[] = { | ||
98 | DEFINE_PROP_INT32("core-count", ARMCPU, core_count, -1), | ||
99 | /* True to default to the backward-compat old CNTFRQ rather than 1Ghz */ | ||
100 | DEFINE_PROP_BOOL("backcompat-cntfrq", ARMCPU, backcompat_cntfrq, false), | ||
101 | + DEFINE_PROP_BOOL("backcompat-pauth-default-use-qarma5", ARMCPU, | ||
102 | + backcompat_pauth_default_use_qarma5, false), | ||
103 | }; | ||
104 | |||
105 | static const gchar *arm_gdb_arch_name(CPUState *cs) | ||
106 | diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c | ||
107 | index XXXXXXX..XXXXXXX 100644 | ||
108 | --- a/target/arm/cpu64.c | ||
109 | +++ b/target/arm/cpu64.c | ||
110 | @@ -XXX,XX +XXX,XX @@ void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp) | ||
111 | return; | ||
112 | } | ||
113 | |||
114 | - if (cpu->prop_pauth_impdef) { | ||
115 | - isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, API, features); | ||
116 | - isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPI, 1); | ||
117 | + bool use_default = !cpu->prop_pauth_qarma5 && | ||
118 | + !cpu->prop_pauth_qarma3 && | ||
119 | + !cpu->prop_pauth_impdef; | ||
120 | + | ||
121 | + if (cpu->prop_pauth_qarma5 || | ||
122 | + (use_default && | ||
123 | + cpu->backcompat_pauth_default_use_qarma5)) { | ||
124 | + isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, APA, features); | ||
125 | + isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPA, 1); | ||
126 | } else if (cpu->prop_pauth_qarma3) { | ||
127 | isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, APA3, features); | ||
128 | isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, GPA3, 1); | ||
129 | - } else { /* default is pauth-qarma5 */ | ||
130 | - isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, APA, features); | ||
131 | - isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPA, 1); | ||
132 | + } else if (cpu->prop_pauth_impdef || | ||
133 | + (use_default && | ||
134 | + !cpu->backcompat_pauth_default_use_qarma5)) { | ||
135 | + isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, API, features); | ||
136 | + isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPI, 1); | ||
137 | + } else { | ||
138 | + g_assert_not_reached(); | ||
139 | } | ||
140 | } else if (cpu->prop_pauth_impdef || | ||
141 | cpu->prop_pauth_qarma3 || | ||
29 | -- | 142 | -- |
30 | 2.34.1 | 143 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Mostafa Saleh <smostafa@google.com> | 1 | From: Pierrick Bouvier <pierrick.bouvier@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | SMMUv3 OAS is currently hardcoded in the code to 44 bits, for nested | 3 | Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> |
4 | configurations that can be a problem, as stage-2 might be shared with | 4 | Message-id: 20241219183211.3493974-4-pierrick.bouvier@linaro.org |
5 | the CPU which might have different PARANGE, and according to SMMU manual | 5 | [PMM: Removed a paragraph about using non-versioned models.] |
6 | ARM IHI 0070F.b: | ||
7 | 6.3.6 SMMU_IDR5, OAS must match the system physical address size. | ||
8 | |||
9 | This patch doesn't change the SMMU OAS, but refactors the code to | ||
10 | make it easier to do that: | ||
11 | - Rely everywhere on IDR5 for reading OAS instead of using the | ||
12 | SMMU_IDR5_OAS macro, so, it is easier just to change IDR5 and | ||
13 | it propagages correctly. | ||
14 | - Add additional checks when OAS is greater than 48bits. | ||
15 | - Remove unused functions/macros: pa_range/MAX_PA. | ||
16 | |||
17 | Reviewed-by: Eric Auger <eric.auger@redhat.com> | ||
18 | Signed-off-by: Mostafa Saleh <smostafa@google.com> | ||
19 | Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org> | ||
20 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
21 | Message-id: 20240715084519.1189624-19-smostafa@google.com | ||
22 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
23 | --- | 7 | --- |
24 | hw/arm/smmuv3-internal.h | 13 ------------- | 8 | docs/system/arm/virt.rst | 4 ++++ |
25 | hw/arm/smmu-common.c | 7 ++++--- | 9 | 1 file changed, 4 insertions(+) |
26 | hw/arm/smmuv3.c | 35 ++++++++++++++++++++++++++++------- | ||
27 | 3 files changed, 32 insertions(+), 23 deletions(-) | ||
28 | 10 | ||
29 | diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h | 11 | diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst |
30 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
31 | --- a/hw/arm/smmuv3-internal.h | 13 | --- a/docs/system/arm/virt.rst |
32 | +++ b/hw/arm/smmuv3-internal.h | 14 | +++ b/docs/system/arm/virt.rst |
33 | @@ -XXX,XX +XXX,XX @@ static inline int oas2bits(int oas_field) | 15 | @@ -XXX,XX +XXX,XX @@ of the 5.0 release and ``virt-5.0`` of the 5.1 release. Migration |
34 | return -1; | 16 | is not guaranteed to work between different QEMU releases for |
35 | } | 17 | the non-versioned ``virt`` machine type. |
36 | 18 | ||
37 | -static inline int pa_range(STE *ste) | 19 | +VM migration is not guaranteed when using ``-cpu max``, as features |
38 | -{ | 20 | +supported may change between QEMU versions. To ensure your VM can be |
39 | - int oas_field = MIN(STE_S2PS(ste), SMMU_IDR5_OAS); | 21 | +migrated, it is recommended to use another cpu model instead. |
40 | - | ||
41 | - if (!STE_S2AA64(ste)) { | ||
42 | - return 40; | ||
43 | - } | ||
44 | - | ||
45 | - return oas2bits(oas_field); | ||
46 | -} | ||
47 | - | ||
48 | -#define MAX_PA(ste) ((1 << pa_range(ste)) - 1) | ||
49 | - | ||
50 | /* CD fields */ | ||
51 | |||
52 | #define CD_VALID(x) extract32((x)->word[0], 31, 1) | ||
53 | diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c | ||
54 | index XXXXXXX..XXXXXXX 100644 | ||
55 | --- a/hw/arm/smmu-common.c | ||
56 | +++ b/hw/arm/smmu-common.c | ||
57 | @@ -XXX,XX +XXX,XX @@ static int smmu_ptw_64_s1(SMMUState *bs, SMMUTransCfg *cfg, | ||
58 | inputsize = 64 - tt->tsz; | ||
59 | level = 4 - (inputsize - 4) / stride; | ||
60 | indexmask = VMSA_IDXMSK(inputsize, stride, level); | ||
61 | - baseaddr = extract64(tt->ttb, 0, 48); | ||
62 | + | 22 | + |
63 | + baseaddr = extract64(tt->ttb, 0, cfg->oas); | 23 | Supported devices |
64 | baseaddr &= ~indexmask; | 24 | """"""""""""""""" |
65 | |||
66 | while (level < VMSA_LEVELS) { | ||
67 | @@ -XXX,XX +XXX,XX @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg, | ||
68 | * Get the ttb from concatenated structure. | ||
69 | * The offset is the idx * size of each ttb(number of ptes * (sizeof(pte)) | ||
70 | */ | ||
71 | - uint64_t baseaddr = extract64(cfg->s2cfg.vttb, 0, 48) + (1 << stride) * | ||
72 | - idx * sizeof(uint64_t); | ||
73 | + uint64_t baseaddr = extract64(cfg->s2cfg.vttb, 0, cfg->s2cfg.eff_ps) + | ||
74 | + (1 << stride) * idx * sizeof(uint64_t); | ||
75 | dma_addr_t indexmask = VMSA_IDXMSK(inputsize, stride, level); | ||
76 | |||
77 | baseaddr &= ~indexmask; | ||
78 | diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c | ||
79 | index XXXXXXX..XXXXXXX 100644 | ||
80 | --- a/hw/arm/smmuv3.c | ||
81 | +++ b/hw/arm/smmuv3.c | ||
82 | @@ -XXX,XX +XXX,XX @@ static bool s2t0sz_valid(SMMUTransCfg *cfg) | ||
83 | } | ||
84 | |||
85 | if (cfg->s2cfg.granule_sz == 16) { | ||
86 | - return (cfg->s2cfg.tsz >= 64 - oas2bits(SMMU_IDR5_OAS)); | ||
87 | + return (cfg->s2cfg.tsz >= 64 - cfg->s2cfg.eff_ps); | ||
88 | } | ||
89 | |||
90 | - return (cfg->s2cfg.tsz >= MAX(64 - oas2bits(SMMU_IDR5_OAS), 16)); | ||
91 | + return (cfg->s2cfg.tsz >= MAX(64 - cfg->s2cfg.eff_ps, 16)); | ||
92 | } | ||
93 | |||
94 | /* | ||
95 | @@ -XXX,XX +XXX,XX @@ static bool s2_pgtable_config_valid(uint8_t sl0, uint8_t t0sz, uint8_t gran) | ||
96 | return nr_concat <= VMSA_MAX_S2_CONCAT; | ||
97 | } | ||
98 | |||
99 | -static int decode_ste_s2_cfg(SMMUTransCfg *cfg, STE *ste) | ||
100 | +static int decode_ste_s2_cfg(SMMUv3State *s, SMMUTransCfg *cfg, | ||
101 | + STE *ste) | ||
102 | { | ||
103 | + uint8_t oas = FIELD_EX32(s->idr[5], IDR5, OAS); | ||
104 | + | ||
105 | if (STE_S2AA64(ste) == 0x0) { | ||
106 | qemu_log_mask(LOG_UNIMP, | ||
107 | "SMMUv3 AArch32 tables not supported\n"); | ||
108 | @@ -XXX,XX +XXX,XX @@ static int decode_ste_s2_cfg(SMMUTransCfg *cfg, STE *ste) | ||
109 | } | ||
110 | |||
111 | /* For AA64, The effective S2PS size is capped to the OAS. */ | ||
112 | - cfg->s2cfg.eff_ps = oas2bits(MIN(STE_S2PS(ste), SMMU_IDR5_OAS)); | ||
113 | + cfg->s2cfg.eff_ps = oas2bits(MIN(STE_S2PS(ste), oas)); | ||
114 | + /* | ||
115 | + * For SMMUv3.1 and later, when OAS == IAS == 52, the stage 2 input | ||
116 | + * range is further limited to 48 bits unless STE.S2TG indicates a | ||
117 | + * 64KB granule. | ||
118 | + */ | ||
119 | + if (cfg->s2cfg.granule_sz != 16) { | ||
120 | + cfg->s2cfg.eff_ps = MIN(cfg->s2cfg.eff_ps, 48); | ||
121 | + } | ||
122 | /* | ||
123 | * It is ILLEGAL for the address in S2TTB to be outside the range | ||
124 | * described by the effective S2PS value. | ||
125 | @@ -XXX,XX +XXX,XX @@ static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg, | ||
126 | STE *ste, SMMUEventInfo *event) | ||
127 | { | ||
128 | uint32_t config; | ||
129 | + uint8_t oas = FIELD_EX32(s->idr[5], IDR5, OAS); | ||
130 | int ret; | ||
131 | |||
132 | if (!STE_VALID(ste)) { | ||
133 | @@ -XXX,XX +XXX,XX @@ static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg, | ||
134 | * Stage-1 OAS defaults to OAS even if not enabled as it would be used | ||
135 | * in input address check for stage-2. | ||
136 | */ | ||
137 | - cfg->oas = oas2bits(SMMU_IDR5_OAS); | ||
138 | - ret = decode_ste_s2_cfg(cfg, ste); | ||
139 | + cfg->oas = oas2bits(oas); | ||
140 | + ret = decode_ste_s2_cfg(s, cfg, ste); | ||
141 | if (ret) { | ||
142 | goto bad_ste; | ||
143 | } | ||
144 | @@ -XXX,XX +XXX,XX @@ static int decode_cd(SMMUv3State *s, SMMUTransCfg *cfg, | ||
145 | int i; | ||
146 | SMMUTranslationStatus status; | ||
147 | SMMUTLBEntry *entry; | ||
148 | + uint8_t oas = FIELD_EX32(s->idr[5], IDR5, OAS); | ||
149 | |||
150 | if (!CD_VALID(cd) || !CD_AARCH64(cd)) { | ||
151 | goto bad_cd; | ||
152 | @@ -XXX,XX +XXX,XX @@ static int decode_cd(SMMUv3State *s, SMMUTransCfg *cfg, | ||
153 | cfg->aa64 = true; | ||
154 | |||
155 | cfg->oas = oas2bits(CD_IPS(cd)); | ||
156 | - cfg->oas = MIN(oas2bits(SMMU_IDR5_OAS), cfg->oas); | ||
157 | + cfg->oas = MIN(oas2bits(oas), cfg->oas); | ||
158 | cfg->tbi = CD_TBI(cd); | ||
159 | cfg->asid = CD_ASID(cd); | ||
160 | cfg->affd = CD_AFFD(cd); | ||
161 | @@ -XXX,XX +XXX,XX @@ static int decode_cd(SMMUv3State *s, SMMUTransCfg *cfg, | ||
162 | goto bad_cd; | ||
163 | } | ||
164 | |||
165 | + /* | ||
166 | + * An address greater than 48 bits in size can only be output from a | ||
167 | + * TTD when, in SMMUv3.1 and later, the effective IPS is 52 and a 64KB | ||
168 | + * granule is in use for that translation table | ||
169 | + */ | ||
170 | + if (tt->granule_sz != 16) { | ||
171 | + cfg->oas = MIN(cfg->oas, 48); | ||
172 | + } | ||
173 | tt->tsz = tsz; | ||
174 | tt->ttb = CD_TTB(cd, i); | ||
175 | 25 | ||
176 | -- | 26 | -- |
177 | 2.34.1 | 27 | 2.34.1 |
178 | |||
179 | diff view generated by jsdifflib |