1
Hi; hopefully this is the last arm pullreq before softfreeze.
1
The following changes since commit 3214bec13d8d4c40f707d21d8350d04e4123ae97:
2
There's a handful of miscellaneous bug fixes here, but the
3
bulk of the pullreq is Mostafa's implementation of 2-stage
4
translation in the SMMUv3.
5
2
6
thanks
3
Merge tag 'migration-20250110-pull-request' of https://gitlab.com/farosas/qemu into staging (2025-01-10 13:39:19 -0500)
7
-- PMM
8
9
The following changes since commit d74ec4d7dda6322bcc51d1b13ccbd993d3574795:
10
11
Merge tag 'pull-trivial-patches' of https://gitlab.com/mjt0k/qemu into staging (2024-07-18 10:07:23 +1000)
12
4
13
are available in the Git repository at:
5
are available in the Git repository at:
14
6
15
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20240718
7
https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20250113
16
8
17
for you to fetch changes up to 30a1690f2402e6c1582d5b3ebcf7940bfe2fad4b:
9
for you to fetch changes up to 435d260e7ec5ff9c79e3e62f1d66ec82d2d691ae:
18
10
19
hvf: arm: Do not advance PC when raising an exception (2024-07-18 13:49:30 +0100)
11
docs/system/arm/virt: mention specific migration information (2025-01-13 12:35:35 +0000)
20
12
21
----------------------------------------------------------------
13
----------------------------------------------------------------
22
target-arm queue:
14
target-arm queue:
23
* Fix handling of LDAPR/STLR with negative offset
15
* hw/arm_sysctl: fix extracting 31th bit of val
24
* LDAPR should honour SCTLR_ELx.nAA
16
* hw/misc: cast rpm to uint64_t
25
* Use float_status copy in sme_fmopa_s
17
* tests/qtest/boot-serial-test: Improve ASM
26
* hw/display/bcm2835_fb: fix fb_use_offsets condition
18
* target/arm: Move minor arithmetic helpers out of helper.c
27
* hw/arm/smmuv3: Support and advertise nesting
19
* target/arm: change default pauth algorithm to impdef
28
* Use FPST_F16 for SME FMOPA (widening)
29
* tests/arm-cpu-features: Do not assume PMU availability
30
* hvf: arm: Do not advance PC when raising an exception
31
20
32
----------------------------------------------------------------
21
----------------------------------------------------------------
33
Akihiko Odaki (2):
22
Anastasia Belova (1):
34
tests/arm-cpu-features: Do not assume PMU availability
23
hw/arm_sysctl: fix extracting 31th bit of val
35
hvf: arm: Do not advance PC when raising an exception
36
37
Daniyal Khan (2):
38
target/arm: Use float_status copy in sme_fmopa_s
39
tests/tcg/aarch64: Add test cases for SME FMOPA (widening)
40
41
Mostafa Saleh (18):
42
hw/arm/smmu-common: Add missing size check for stage-1
43
hw/arm/smmu: Fix IPA for stage-2 events
44
hw/arm/smmuv3: Fix encoding of CLASS in events
45
hw/arm/smmu: Use enum for SMMU stage
46
hw/arm/smmu: Split smmuv3_translate()
47
hw/arm/smmu: Consolidate ASID and VMID types
48
hw/arm/smmu: Introduce CACHED_ENTRY_TO_ADDR
49
hw/arm/smmuv3: Translate CD and TT using stage-2 table
50
hw/arm/smmu-common: Rework TLB lookup for nesting
51
hw/arm/smmu-common: Add support for nested TLB
52
hw/arm/smmu-common: Support nested translation
53
hw/arm/smmu: Support nesting in smmuv3_range_inval()
54
hw/arm/smmu: Introduce smmu_iotlb_inv_asid_vmid
55
hw/arm/smmu: Support nesting in the rest of commands
56
hw/arm/smmuv3: Support nested SMMUs in smmuv3_notify_iova()
57
hw/arm/smmuv3: Handle translation faults according to SMMUPTWEventInfo
58
hw/arm/smmuv3: Support and advertise nesting
59
hw/arm/smmu: Refactor SMMU OAS
60
24
61
Peter Maydell (2):
25
Peter Maydell (2):
62
target/arm: Fix handling of LDAPR/STLR with negative offset
26
target/arm: Move minor arithmetic helpers out of helper.c
63
target/arm: LDAPR should honour SCTLR_ELx.nAA
27
tests/tcg/aarch64: force qarma5 for pauth-3 test
64
28
65
Richard Henderson (1):
29
Philippe Mathieu-Daudé (4):
66
target/arm: Use FPST_F16 for SME FMOPA (widening)
30
tests/qtest/boot-serial-test: Improve ASM comments of PL011 tests
31
tests/qtest/boot-serial-test: Reduce for() loop in PL011 tests
32
tests/qtest/boot-serial-test: Reorder pair of instructions in PL011 test
33
tests/qtest/boot-serial-test: Initialize PL011 Control register
67
34
68
SamJakob (1):
35
Pierrick Bouvier (3):
69
hw/display/bcm2835_fb: fix fb_use_offsets condition
36
target/arm: add new property to select pauth-qarma5
37
target/arm: change default pauth algorithm to impdef
38
docs/system/arm/virt: mention specific migration information
70
39
71
hw/arm/smmuv3-internal.h | 19 +-
40
Tigran Sogomonian (1):
72
include/hw/arm/smmu-common.h | 46 +++-
41
hw/misc: cast rpm to uint64_t
73
target/arm/tcg/a64.decode | 2 +-
42
74
hw/arm/smmu-common.c | 312 ++++++++++++++++++++++---
43
docs/system/arm/cpu-features.rst | 7 +-
75
hw/arm/smmuv3.c | 467 +++++++++++++++++++++++++-------------
44
docs/system/arm/virt.rst | 4 +
76
hw/display/bcm2835_fb.c | 2 +-
45
docs/system/introduction.rst | 2 +-
77
target/arm/hvf/hvf.c | 1 +
46
target/arm/cpu.h | 4 +
78
target/arm/tcg/sme_helper.c | 2 +-
47
hw/core/machine.c | 4 +-
79
target/arm/tcg/translate-a64.c | 2 +-
48
hw/misc/arm_sysctl.c | 2 +-
80
target/arm/tcg/translate-sme.c | 12 +-
49
hw/misc/npcm7xx_mft.c | 5 +-
81
tests/qtest/arm-cpu-features.c | 13 +-
50
target/arm/arm-qmp-cmds.c | 2 +-
82
tests/tcg/aarch64/sme-fmopa-1.c | 63 +++++
51
target/arm/cpu.c | 2 +
83
tests/tcg/aarch64/sme-fmopa-2.c | 56 +++++
52
target/arm/cpu64.c | 38 ++-
84
tests/tcg/aarch64/sme-fmopa-3.c | 63 +++++
53
target/arm/helper.c | 285 -----------------------
85
hw/arm/trace-events | 26 ++-
54
target/arm/tcg/arith_helper.c | 296 ++++++++++++++++++++++++
86
tests/tcg/aarch64/Makefile.target | 5 +-
55
tests/qtest/arm-cpu-features.c | 15 +-
87
16 files changed, 846 insertions(+), 245 deletions(-)
56
tests/qtest/boot-serial-test.c | 23 +-
88
create mode 100644 tests/tcg/aarch64/sme-fmopa-1.c
57
target/arm/{op_addsub.h => tcg/op_addsub.c.inc} | 0
89
create mode 100644 tests/tcg/aarch64/sme-fmopa-2.c
58
target/arm/tcg/meson.build | 1 +
90
create mode 100644 tests/tcg/aarch64/sme-fmopa-3.c
59
tests/tcg/aarch64/Makefile.softmmu-target | 3 +
60
17 files changed, 377 insertions(+), 316 deletions(-)
61
create mode 100644 target/arm/tcg/arith_helper.c
62
rename target/arm/{op_addsub.h => tcg/op_addsub.c.inc} (100%)
63
diff view generated by jsdifflib
Deleted patch
1
When we converted the LDAPR/STLR instructions to decodetree we
2
accidentally introduced a regression where the offset is negative.
3
The 9-bit immediate field is signed, and the old hand decoder
4
correctly used sextract32() to get it out of the insn word,
5
but the ldapr_stlr_i pattern in the decode file used "imm:9"
6
instead of "imm:s9", so it treated the field as unsigned.
7
1
8
Fix the pattern to treat the field as a signed immediate.
9
10
Cc: qemu-stable@nongnu.org
11
Fixes: 2521b6073b7 ("target/arm: Convert LDAPR/STLR (imm) to decodetree")
12
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2419
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
15
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
16
Message-id: 20240709134504.3500007-2-peter.maydell@linaro.org
17
---
18
target/arm/tcg/a64.decode | 2 +-
19
1 file changed, 1 insertion(+), 1 deletion(-)
20
21
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
22
index XXXXXXX..XXXXXXX 100644
23
--- a/target/arm/tcg/a64.decode
24
+++ b/target/arm/tcg/a64.decode
25
@@ -XXX,XX +XXX,XX @@ LDAPR sz:2 111 0 00 1 0 1 11111 1100 00 rn:5 rt:5
26
LDRA 11 111 0 00 m:1 . 1 ......... w:1 1 rn:5 rt:5 imm=%ldra_imm
27
28
&ldapr_stlr_i rn rt imm sz sign ext
29
-@ldapr_stlr_i .. ...... .. . imm:9 .. rn:5 rt:5 &ldapr_stlr_i
30
+@ldapr_stlr_i .. ...... .. . imm:s9 .. rn:5 rt:5 &ldapr_stlr_i
31
STLR_i sz:2 011001 00 0 ......... 00 ..... ..... @ldapr_stlr_i sign=0 ext=0
32
LDAPR_i sz:2 011001 01 0 ......... 00 ..... ..... @ldapr_stlr_i sign=0 ext=0
33
LDAPR_i 00 011001 10 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=0 sz=0
34
--
35
2.34.1
36
37
diff view generated by jsdifflib
Deleted patch
1
In commit c1a1f80518d360b when we added the FEAT_LSE2 relaxations to
2
the alignment requirements for atomic and ordered loads and stores,
3
we didn't quite get it right for LDAPR/LDAPRH/LDAPRB with no
4
immediate offset. These instructions were handled in the old decoder
5
as part of disas_ldst_atomic(), but unlike all the other insns that
6
function decoded (LDADD, LDCLR, etc) these insns are "ordered", not
7
"atomic", so they should be using check_ordered_align() rather than
8
check_atomic_align(). Commit c1a1f80518d360b used
9
check_atomic_align() regardless for everything in
10
disas_ldst_atomic(). We then carried that incorrect check over in
11
the decodetree conversion, where LDAPR/LDAPRH/LDAPRB are now handled
12
by trans_LDAPR().
13
1
14
The effect is that when FEAT_LSE2 is implemented, these instructions
15
don't honour the SCTLR_ELx.nAA bit and will generate alignment
16
faults when they should not.
17
18
(The LDAPR insns with an immediate offset were in disas_ldst_ldapr_stlr()
19
and then in trans_LDAPR_i() and trans_STLR_i(), and have always used
20
the correct check_ordered_align().)
21
22
Use check_ordered_align() in trans_LDAPR().
23
24
Cc: qemu-stable@nongnu.org
25
Fixes: c1a1f80518d360b ("target/arm: Relax ordered/atomic alignment checks for LSE2")
26
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
27
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
28
Message-id: 20240709134504.3500007-3-peter.maydell@linaro.org
29
---
30
target/arm/tcg/translate-a64.c | 2 +-
31
1 file changed, 1 insertion(+), 1 deletion(-)
32
33
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
34
index XXXXXXX..XXXXXXX 100644
35
--- a/target/arm/tcg/translate-a64.c
36
+++ b/target/arm/tcg/translate-a64.c
37
@@ -XXX,XX +XXX,XX @@ static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a)
38
if (a->rn == 31) {
39
gen_check_sp_alignment(s);
40
}
41
- mop = check_atomic_align(s, a->rn, a->sz);
42
+ mop = check_ordered_align(s, a->rn, 0, false, a->sz);
43
clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
44
a->rn != 31, mop);
45
/*
46
--
47
2.34.1
diff view generated by jsdifflib
1
From: Daniyal Khan <danikhan632@gmail.com>
1
From: Anastasia Belova <abelova@astralinux.ru>
2
2
3
We made a copy above because the fp exception flags
3
1 << 31 is casted to uint64_t while bitwise and with val.
4
are not propagated back to the FPST register, but
4
So this value may become 0xffffffff80000000 but only
5
then failed to use the copy.
5
31th "start" bit is required.
6
6
7
Cc: qemu-stable@nongnu.org
7
This is not possible in practice because the MemoryRegionOps
8
Fixes: 558e956c719 ("target/arm: Implement FMOPA, FMOPS (non-widening)")
8
uses the default max access size of 4 bytes and so none
9
Signed-off-by: Daniyal Khan <danikhan632@gmail.com>
9
of the upper bytes of val will be set, but the bitfield
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
extract API is clearer anyway.
11
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
11
12
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
12
Use the bitfield extract() API instead.
13
Message-id: 20240717060149.204788-2-richard.henderson@linaro.org
13
14
[rth: Split from a larger patch]
14
Found by Linux Verification Center (linuxtesting.org) with SVACE.
15
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
15
16
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
16
Signed-off-by: Anastasia Belova <abelova@astralinux.ru>
17
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
17
Message-id: 20241220125429.7552-1-abelova@astralinux.ru
18
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
19
[PMM: add clarification to commit message]
18
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
20
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
19
---
21
---
20
target/arm/tcg/sme_helper.c | 2 +-
22
hw/misc/arm_sysctl.c | 2 +-
21
1 file changed, 1 insertion(+), 1 deletion(-)
23
1 file changed, 1 insertion(+), 1 deletion(-)
22
24
23
diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
25
diff --git a/hw/misc/arm_sysctl.c b/hw/misc/arm_sysctl.c
24
index XXXXXXX..XXXXXXX 100644
26
index XXXXXXX..XXXXXXX 100644
25
--- a/target/arm/tcg/sme_helper.c
27
--- a/hw/misc/arm_sysctl.c
26
+++ b/target/arm/tcg/sme_helper.c
28
+++ b/hw/misc/arm_sysctl.c
27
@@ -XXX,XX +XXX,XX @@ void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn,
29
@@ -XXX,XX +XXX,XX @@ static void arm_sysctl_write(void *opaque, hwaddr offset,
28
if (pb & 1) {
30
* as zero.
29
uint32_t *a = vza_row + H1_4(col);
31
*/
30
uint32_t *m = vzm + H1_4(col);
32
s->sys_cfgctrl = val & ~((3 << 18) | (1 << 31));
31
- *a = float32_muladd(n, *m, *a, 0, vst);
33
- if (val & (1 << 31)) {
32
+ *a = float32_muladd(n, *m, *a, 0, &fpst);
34
+ if (extract64(val, 31, 1)) {
33
}
35
/* Start bit set -- actually do something */
34
col += 4;
36
unsigned int dcc = extract32(s->sys_cfgctrl, 26, 4);
35
pb >>= 4;
37
unsigned int function = extract32(s->sys_cfgctrl, 20, 6);
36
--
38
--
37
2.34.1
39
2.34.1
38
39
diff view generated by jsdifflib
1
From: Mostafa Saleh <smostafa@google.com>
1
From: Tigran Sogomonian <tsogomonian@astralinux.ru>
2
2
3
For the following events (ARM IHI 0070 F.b - 7.3 Event records):
3
The value of an arithmetic expression
4
- F_TRANSLATION
4
'rpm * NPCM7XX_MFT_PULSE_PER_REVOLUTION' is a subject
5
- F_ACCESS
5
to overflow because its operands are not cast to
6
- F_PERMISSION
6
a larger data type before performing arithmetic. Thus, need
7
- F_ADDR_SIZE
7
to cast rpm to uint64_t.
8
8
9
If fault occurs at stage 2, S2 == 1 and:
9
Found by Linux Verification Center (linuxtesting.org) with SVACE.
10
- If translating an IPA for a transaction (whether by input to
11
stage 2-only configuration, or after successful stage 1 translation),
12
CLASS == IN, and IPA is provided.
13
10
14
At the moment only CLASS == IN is used which indicates input
11
Signed-off-by: Tigran Sogomonian <tsogomonian@astralinux.ru>
15
translation.
12
Reviewed-by: Patrick Leis <venture@google.com>
16
13
Reviewed-by: Hao Wu <wuhaotsh@google.com>
17
However, this was not implemented correctly, as for stage 2, the code
14
Message-id: 20241226130311.1349-1-tsogomonian@astralinux.ru
18
only sets the S2 bit but not the IPA.
19
20
This field has the same bits as FetchAddr in F_WALK_EABT which is
21
populated correctly, so we don’t change that.
22
The setting of this field should be done from the walker as the IPA address
23
wouldn't be known in case of nesting.
24
25
For stage 1, the spec says:
26
If fault occurs at stage 1, S2 == 0 and:
27
CLASS == IN, IPA is UNKNOWN.
28
29
So, no need to set it to for stage 1, as ptw_info is initialised by zero in
30
smmuv3_translate().
31
32
Fixes: e703f7076a “hw/arm/smmuv3: Add page table walk for stage-2”
33
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
34
Reviewed-by: Eric Auger <eric.auger@redhat.com>
35
Signed-off-by: Mostafa Saleh <smostafa@google.com>
36
Message-id: 20240715084519.1189624-3-smostafa@google.com
37
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
38
---
16
---
39
hw/arm/smmu-common.c | 10 ++++++----
17
hw/misc/npcm7xx_mft.c | 5 +++--
40
hw/arm/smmuv3.c | 4 ++++
18
1 file changed, 3 insertions(+), 2 deletions(-)
41
2 files changed, 10 insertions(+), 4 deletions(-)
42
19
43
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
20
diff --git a/hw/misc/npcm7xx_mft.c b/hw/misc/npcm7xx_mft.c
44
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
45
--- a/hw/arm/smmu-common.c
22
--- a/hw/misc/npcm7xx_mft.c
46
+++ b/hw/arm/smmu-common.c
23
+++ b/hw/misc/npcm7xx_mft.c
47
@@ -XXX,XX +XXX,XX @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
24
@@ -XXX,XX +XXX,XX @@ static NPCM7xxMFTCaptureState npcm7xx_mft_compute_cnt(
48
*/
25
* RPM = revolution/min. The time for one revlution (in ns) is
49
if (ipa >= (1ULL << inputsize)) {
26
* MINUTE_TO_NANOSECOND / RPM.
50
info->type = SMMU_PTW_ERR_TRANSLATION;
27
*/
51
- goto error;
28
- count = clock_ns_to_ticks(clock, (60 * NANOSECONDS_PER_SECOND) /
52
+ goto error_ipa;
29
- (rpm * NPCM7XX_MFT_PULSE_PER_REVOLUTION));
30
+ count = clock_ns_to_ticks(clock,
31
+ (uint64_t)(60 * NANOSECONDS_PER_SECOND) /
32
+ ((uint64_t)rpm * NPCM7XX_MFT_PULSE_PER_REVOLUTION));
53
}
33
}
54
34
55
while (level < VMSA_LEVELS) {
35
if (count > NPCM7XX_MFT_MAX_CNT) {
56
@@ -XXX,XX +XXX,XX @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
57
*/
58
if (!PTE_AF(pte) && !cfg->s2cfg.affd) {
59
info->type = SMMU_PTW_ERR_ACCESS;
60
- goto error;
61
+ goto error_ipa;
62
}
63
64
s2ap = PTE_AP(pte);
65
if (is_permission_fault_s2(s2ap, perm)) {
66
info->type = SMMU_PTW_ERR_PERMISSION;
67
- goto error;
68
+ goto error_ipa;
69
}
70
71
/*
72
@@ -XXX,XX +XXX,XX @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
73
*/
74
if (gpa >= (1ULL << cfg->s2cfg.eff_ps)) {
75
info->type = SMMU_PTW_ERR_ADDR_SIZE;
76
- goto error;
77
+ goto error_ipa;
78
}
79
80
tlbe->entry.translated_addr = gpa;
81
@@ -XXX,XX +XXX,XX @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
82
}
83
info->type = SMMU_PTW_ERR_TRANSLATION;
84
85
+error_ipa:
86
+ info->addr = ipa;
87
error:
88
info->stage = 2;
89
tlbe->entry.perm = IOMMU_NONE;
90
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
91
index XXXXXXX..XXXXXXX 100644
92
--- a/hw/arm/smmuv3.c
93
+++ b/hw/arm/smmuv3.c
94
@@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr,
95
if (PTW_RECORD_FAULT(cfg)) {
96
event.type = SMMU_EVT_F_TRANSLATION;
97
event.u.f_translation.addr = addr;
98
+ event.u.f_translation.addr2 = ptw_info.addr;
99
event.u.f_translation.rnw = flag & 0x1;
100
}
101
break;
102
@@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr,
103
if (PTW_RECORD_FAULT(cfg)) {
104
event.type = SMMU_EVT_F_ADDR_SIZE;
105
event.u.f_addr_size.addr = addr;
106
+ event.u.f_addr_size.addr2 = ptw_info.addr;
107
event.u.f_addr_size.rnw = flag & 0x1;
108
}
109
break;
110
@@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr,
111
if (PTW_RECORD_FAULT(cfg)) {
112
event.type = SMMU_EVT_F_ACCESS;
113
event.u.f_access.addr = addr;
114
+ event.u.f_access.addr2 = ptw_info.addr;
115
event.u.f_access.rnw = flag & 0x1;
116
}
117
break;
118
@@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr,
119
if (PTW_RECORD_FAULT(cfg)) {
120
event.type = SMMU_EVT_F_PERMISSION;
121
event.u.f_permission.addr = addr;
122
+ event.u.f_permission.addr2 = ptw_info.addr;
123
event.u.f_permission.rnw = flag & 0x1;
124
}
125
break;
126
--
36
--
127
2.34.1
37
2.34.1
128
129
diff view generated by jsdifflib
1
From: Richard Henderson <richard.henderson@linaro.org>
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
2
3
This operation has float16 inputs and thus must use
3
Re-indent ASM comments adding the 'loop:' label.
4
the FZ16 control not the FZ control.
5
4
6
Cc: qemu-stable@nongnu.org
5
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Fixes: 3916841ac75 ("target/arm: Implement FMOPA, FMOPS (widening)")
6
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Reported-by: Daniyal Khan <danikhan632@gmail.com>
7
Reviewed-by: Fabiano Rosas <farosas@suse.de>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
11
Message-id: 20240717060149.204788-3-richard.henderson@linaro.org
12
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2374
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
---
9
---
17
target/arm/tcg/translate-sme.c | 12 ++++++++----
10
tests/qtest/boot-serial-test.c | 18 +++++++++---------
18
1 file changed, 8 insertions(+), 4 deletions(-)
11
1 file changed, 9 insertions(+), 9 deletions(-)
19
12
20
diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c
13
diff --git a/tests/qtest/boot-serial-test.c b/tests/qtest/boot-serial-test.c
21
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
22
--- a/target/arm/tcg/translate-sme.c
15
--- a/tests/qtest/boot-serial-test.c
23
+++ b/target/arm/tcg/translate-sme.c
16
+++ b/tests/qtest/boot-serial-test.c
24
@@ -XXX,XX +XXX,XX @@ static bool do_outprod(DisasContext *s, arg_op *a, MemOp esz,
17
@@ -XXX,XX +XXX,XX @@ static const uint8_t kernel_plml605[] = {
25
}
18
};
26
19
27
static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
20
static const uint8_t bios_raspi2[] = {
28
+ ARMFPStatusFlavour e_fpst,
21
- 0x08, 0x30, 0x9f, 0xe5, /* ldr r3,[pc,#8] Get base */
29
gen_helper_gvec_5_ptr *fn)
22
- 0x54, 0x20, 0xa0, 0xe3, /* mov r2,#'T' */
30
{
23
- 0x00, 0x20, 0xc3, 0xe5, /* strb r2,[r3] */
31
int svl = streaming_vec_reg_size(s);
24
- 0xfb, 0xff, 0xff, 0xea, /* b loop */
32
@@ -XXX,XX +XXX,XX @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
25
- 0x00, 0x10, 0x20, 0x3f, /* 0x3f201000 = UART0 base addr */
33
zm = vec_full_reg_ptr(s, a->zm);
26
+ 0x08, 0x30, 0x9f, 0xe5, /* loop: ldr r3, [pc, #8] Get &UART0 */
34
pn = pred_full_reg_ptr(s, a->pn);
27
+ 0x54, 0x20, 0xa0, 0xe3, /* mov r2, #'T' */
35
pm = pred_full_reg_ptr(s, a->pm);
28
+ 0x00, 0x20, 0xc3, 0xe5, /* strb r2, [r3] *TXDAT = 'T' */
36
- fpst = fpstatus_ptr(FPST_FPCR);
29
+ 0xfb, 0xff, 0xff, 0xea, /* b -12 (loop) */
37
+ fpst = fpstatus_ptr(e_fpst);
30
+ 0x00, 0x10, 0x20, 0x3f, /* UART0: 0x3f201000 */
38
31
};
39
fn(za, zn, zm, pn, pm, fpst, tcg_constant_i32(desc));
32
40
return true;
33
static const uint8_t kernel_aarch64[] = {
41
}
34
- 0x81, 0x0a, 0x80, 0x52, /* mov w1, #0x54 */
42
35
- 0x02, 0x20, 0xa1, 0xd2, /* mov x2, #0x9000000 */
43
-TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_h)
36
- 0x41, 0x00, 0x00, 0x39, /* strb w1, [x2] */
44
-TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
37
- 0xfd, 0xff, 0xff, 0x17, /* b -12 (loop) */
45
-TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
38
+ 0x81, 0x0a, 0x80, 0x52, /* loop: mov w1, #'T' */
46
+TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a,
39
+ 0x02, 0x20, 0xa1, 0xd2, /* mov x2, #0x9000000 Load UART0 */
47
+ MO_32, FPST_FPCR_F16, gen_helper_sme_fmopa_h)
40
+ 0x41, 0x00, 0x00, 0x39, /* strb w1, [x2] *TXDAT = 'T' */
48
+TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a,
41
+ 0xfd, 0xff, 0xff, 0x17, /* b -12 (loop) */
49
+ MO_32, FPST_FPCR, gen_helper_sme_fmopa_s)
42
};
50
+TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a,
43
51
+ MO_64, FPST_FPCR, gen_helper_sme_fmopa_d)
44
static const uint8_t kernel_nrf51[] = {
52
53
/* TODO: FEAT_EBF16 */
54
TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa)
55
--
45
--
56
2.34.1
46
2.34.1
57
47
58
48
diff view generated by jsdifflib
1
From: Mostafa Saleh <smostafa@google.com>
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
2
3
Everything is in place, consolidate parsing of STE cfg and setting
3
Since registers are not modified, we don't need
4
translation stage.
4
to refill their values. Directly jump to the previous
5
store instruction to keep filling the TXDAT register.
5
6
6
Advertise nesting if stage requested is "nested".
7
The equivalent C code remains:
7
8
8
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
9
while (true) {
9
Reviewed-by: Eric Auger <eric.auger@redhat.com>
10
*UART_DATA = 'T';
10
Signed-off-by: Mostafa Saleh <smostafa@google.com>
11
}
11
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
12
12
Message-id: 20240715084519.1189624-18-smostafa@google.com
13
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
14
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
15
Reviewed-by: Fabiano Rosas <farosas@suse.de>
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
---
17
---
15
hw/arm/smmuv3.c | 35 ++++++++++++++++++++++++++---------
18
tests/qtest/boot-serial-test.c | 12 ++++++------
16
1 file changed, 26 insertions(+), 9 deletions(-)
19
1 file changed, 6 insertions(+), 6 deletions(-)
17
20
18
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
21
diff --git a/tests/qtest/boot-serial-test.c b/tests/qtest/boot-serial-test.c
19
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
20
--- a/hw/arm/smmuv3.c
23
--- a/tests/qtest/boot-serial-test.c
21
+++ b/hw/arm/smmuv3.c
24
+++ b/tests/qtest/boot-serial-test.c
22
@@ -XXX,XX +XXX,XX @@ static void smmuv3_init_regs(SMMUv3State *s)
25
@@ -XXX,XX +XXX,XX @@ static const uint8_t kernel_plml605[] = {
23
/* Based on sys property, the stages supported in smmu will be advertised.*/
26
};
24
if (s->stage && !strcmp("2", s->stage)) {
27
25
s->idr[0] = FIELD_DP32(s->idr[0], IDR0, S2P, 1);
28
static const uint8_t bios_raspi2[] = {
26
+ } else if (s->stage && !strcmp("nested", s->stage)) {
29
- 0x08, 0x30, 0x9f, 0xe5, /* loop: ldr r3, [pc, #8] Get &UART0 */
27
+ s->idr[0] = FIELD_DP32(s->idr[0], IDR0, S1P, 1);
30
+ 0x08, 0x30, 0x9f, 0xe5, /* ldr r3, [pc, #8] Get &UART0 */
28
+ s->idr[0] = FIELD_DP32(s->idr[0], IDR0, S2P, 1);
31
0x54, 0x20, 0xa0, 0xe3, /* mov r2, #'T' */
29
} else {
32
- 0x00, 0x20, 0xc3, 0xe5, /* strb r2, [r3] *TXDAT = 'T' */
30
s->idr[0] = FIELD_DP32(s->idr[0], IDR0, S1P, 1);
33
- 0xfb, 0xff, 0xff, 0xea, /* b -12 (loop) */
31
}
34
+ 0x00, 0x20, 0xc3, 0xe5, /* loop: strb r2, [r3] *TXDAT = 'T' */
32
@@ -XXX,XX +XXX,XX @@ static bool s2_pgtable_config_valid(uint8_t sl0, uint8_t t0sz, uint8_t gran)
35
+ 0xff, 0xff, 0xff, 0xea, /* b -4 (loop) */
33
36
0x00, 0x10, 0x20, 0x3f, /* UART0: 0x3f201000 */
34
static int decode_ste_s2_cfg(SMMUTransCfg *cfg, STE *ste)
37
};
35
{
38
36
- cfg->stage = SMMU_STAGE_2;
39
static const uint8_t kernel_aarch64[] = {
37
-
40
- 0x81, 0x0a, 0x80, 0x52, /* loop: mov w1, #'T' */
38
if (STE_S2AA64(ste) == 0x0) {
41
+ 0x81, 0x0a, 0x80, 0x52, /* mov w1, #'T' */
39
qemu_log_mask(LOG_UNIMP,
42
0x02, 0x20, 0xa1, 0xd2, /* mov x2, #0x9000000 Load UART0 */
40
"SMMUv3 AArch32 tables not supported\n");
43
- 0x41, 0x00, 0x00, 0x39, /* strb w1, [x2] *TXDAT = 'T' */
41
@@ -XXX,XX +XXX,XX @@ bad_ste:
44
- 0xfd, 0xff, 0xff, 0x17, /* b -12 (loop) */
42
return -EINVAL;
45
+ 0x41, 0x00, 0x00, 0x39, /* loop: strb w1, [x2] *TXDAT = 'T' */
43
}
46
+ 0xff, 0xff, 0xff, 0x17, /* b -4 (loop) */
44
47
};
45
+static void decode_ste_config(SMMUTransCfg *cfg, uint32_t config)
48
46
+{
49
static const uint8_t kernel_nrf51[] = {
47
+
48
+ if (STE_CFG_ABORT(config)) {
49
+ cfg->aborted = true;
50
+ return;
51
+ }
52
+ if (STE_CFG_BYPASS(config)) {
53
+ cfg->bypassed = true;
54
+ return;
55
+ }
56
+
57
+ if (STE_CFG_S1_ENABLED(config)) {
58
+ cfg->stage = SMMU_STAGE_1;
59
+ }
60
+
61
+ if (STE_CFG_S2_ENABLED(config)) {
62
+ cfg->stage |= SMMU_STAGE_2;
63
+ }
64
+}
65
+
66
/* Returns < 0 in case of invalid STE, 0 otherwise */
67
static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg,
68
STE *ste, SMMUEventInfo *event)
69
@@ -XXX,XX +XXX,XX @@ static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg,
70
71
config = STE_CONFIG(ste);
72
73
- if (STE_CFG_ABORT(config)) {
74
- cfg->aborted = true;
75
- return 0;
76
- }
77
+ decode_ste_config(cfg, config);
78
79
- if (STE_CFG_BYPASS(config)) {
80
- cfg->bypassed = true;
81
+ if (cfg->aborted || cfg->bypassed) {
82
return 0;
83
}
84
85
@@ -XXX,XX +XXX,XX @@ static int decode_cd(SMMUv3State *s, SMMUTransCfg *cfg,
86
87
/* we support only those at the moment */
88
cfg->aa64 = true;
89
- cfg->stage = SMMU_STAGE_1;
90
91
cfg->oas = oas2bits(CD_IPS(cd));
92
cfg->oas = MIN(oas2bits(SMMU_IDR5_OAS), cfg->oas);
93
--
50
--
94
2.34.1
51
2.34.1
95
52
96
53
diff view generated by jsdifflib
1
From: SamJakob <me@samjakob.com>
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
2
3
It is common practice when implementing double-buffering on VideoCore
3
In the next commit we are going to use a different value
4
to do so by multiplying the height of the virtual buffer by the
4
for the $w1 register, maintaining the same $x2 value. In
5
number of virtual screens desired (i.e., two - in the case of
5
order to keep the next commit trivial to review, set $x2
6
double-bufferring).
6
before $w1.
7
7
8
At present, this won't work in QEMU because the logic in
8
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
9
fb_use_offsets require that both the virtual width and height exceed
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
their physical counterparts.
10
Reviewed-by: Fabiano Rosas <farosas@suse.de>
11
12
This appears to be unintentional/a typo and indeed the comment
13
states; "Experimentally, the hardware seems to do this only if the
14
viewport size is larger than the physical screen". The
15
viewport/virtual size would be larger than the physical size if
16
either virtual dimension were larger than their physical counterparts
17
and not necessarily both.
18
19
Signed-off-by: SamJakob <me@samjakob.com>
20
Message-id: 20240713160353.62410-1-me@samjakob.com
21
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
22
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
23
---
12
---
24
hw/display/bcm2835_fb.c | 2 +-
13
tests/qtest/boot-serial-test.c | 2 +-
25
1 file changed, 1 insertion(+), 1 deletion(-)
14
1 file changed, 1 insertion(+), 1 deletion(-)
26
15
27
diff --git a/hw/display/bcm2835_fb.c b/hw/display/bcm2835_fb.c
16
diff --git a/tests/qtest/boot-serial-test.c b/tests/qtest/boot-serial-test.c
28
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
29
--- a/hw/display/bcm2835_fb.c
18
--- a/tests/qtest/boot-serial-test.c
30
+++ b/hw/display/bcm2835_fb.c
19
+++ b/tests/qtest/boot-serial-test.c
31
@@ -XXX,XX +XXX,XX @@ static bool fb_use_offsets(BCM2835FBConfig *config)
20
@@ -XXX,XX +XXX,XX @@ static const uint8_t bios_raspi2[] = {
32
* viewport size is larger than the physical screen. (It doesn't
21
};
33
* prevent the guest setting this silly viewport setting, though...)
22
34
*/
23
static const uint8_t kernel_aarch64[] = {
35
- return config->xres_virtual > config->xres &&
24
- 0x81, 0x0a, 0x80, 0x52, /* mov w1, #'T' */
36
+ return config->xres_virtual > config->xres ||
25
0x02, 0x20, 0xa1, 0xd2, /* mov x2, #0x9000000 Load UART0 */
37
config->yres_virtual > config->yres;
26
+ 0x81, 0x0a, 0x80, 0x52, /* mov w1, #'T' */
38
}
27
0x41, 0x00, 0x00, 0x39, /* loop: strb w1, [x2] *TXDAT = 'T' */
39
28
0xff, 0xff, 0xff, 0x17, /* b -4 (loop) */
29
};
40
--
30
--
41
2.34.1
31
2.34.1
42
32
43
33
diff view generated by jsdifflib
1
From: Mostafa Saleh <smostafa@google.com>
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
2
3
Previously, to check if faults are enabled, it was sufficient to check
3
The tests using the PL011 UART of the virt and raspi machines
4
the current stage of translation and check the corresponding
4
weren't properly enabling the UART and its transmitter previous
5
record_faults flag.
5
to sending characters. Follow the PL011 manual initialization
6
recommendation by setting the proper bits of the control register.
6
7
7
However, with nesting, it is possible for stage-1 (nested) translation
8
Update the ASM code prefixing:
8
to trigger a stage-2 fault, so we check SMMUPTWEventInfo as it would
9
have the correct stage set from the page table walk.
10
9
11
Signed-off-by: Mostafa Saleh <smostafa@google.com>
10
*UART_CTRL = UART_ENABLE | TX_ENABLE;
12
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
11
13
Reviewed-by: Eric Auger <eric.auger@redhat.com>
12
to:
14
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
13
15
Message-id: 20240715084519.1189624-17-smostafa@google.com
14
while (true) {
15
*UART_DATA = 'T';
16
}
17
18
Note, since commit 51b61dd4d56 ("hw/char/pl011: Warn when using
19
disabled transmitter") incomplete PL011 initialization can be
20
logged using the '-d guest_errors' command line option.
21
22
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
23
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
16
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
24
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
17
---
25
---
18
hw/arm/smmuv3.c | 15 ++++++++-------
26
tests/qtest/boot-serial-test.c | 7 ++++++-
19
1 file changed, 8 insertions(+), 7 deletions(-)
27
1 file changed, 6 insertions(+), 1 deletion(-)
20
28
21
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
29
diff --git a/tests/qtest/boot-serial-test.c b/tests/qtest/boot-serial-test.c
22
index XXXXXXX..XXXXXXX 100644
30
index XXXXXXX..XXXXXXX 100644
23
--- a/hw/arm/smmuv3.c
31
--- a/tests/qtest/boot-serial-test.c
24
+++ b/hw/arm/smmuv3.c
32
+++ b/tests/qtest/boot-serial-test.c
25
@@ -XXX,XX +XXX,XX @@
33
@@ -XXX,XX +XXX,XX @@ static const uint8_t kernel_plml605[] = {
26
#include "smmuv3-internal.h"
34
};
27
#include "smmu-internal.h"
35
28
36
static const uint8_t bios_raspi2[] = {
29
-#define PTW_RECORD_FAULT(cfg) (((cfg)->stage == SMMU_STAGE_1) ? \
37
- 0x08, 0x30, 0x9f, 0xe5, /* ldr r3, [pc, #8] Get &UART0 */
30
- (cfg)->record_faults : \
38
+ 0x10, 0x30, 0x9f, 0xe5, /* ldr r3, [pc, #16] Get &UART0 */
31
- (cfg)->s2cfg.record_faults)
39
+ 0x10, 0x20, 0x9f, 0xe5, /* ldr r2, [pc, #16] Get &CR */
32
+#define PTW_RECORD_FAULT(ptw_info, cfg) (((ptw_info).stage == SMMU_STAGE_1 && \
40
+ 0xb0, 0x23, 0xc3, 0xe1, /* strh r2, [r3, #48] Set CR */
33
+ (cfg)->record_faults) || \
41
0x54, 0x20, 0xa0, 0xe3, /* mov r2, #'T' */
34
+ ((ptw_info).stage == SMMU_STAGE_2 && \
42
0x00, 0x20, 0xc3, 0xe5, /* loop: strb r2, [r3] *TXDAT = 'T' */
35
+ (cfg)->s2cfg.record_faults))
43
0xff, 0xff, 0xff, 0xea, /* b -4 (loop) */
36
44
0x00, 0x10, 0x20, 0x3f, /* UART0: 0x3f201000 */
37
/**
45
+ 0x01, 0x01, 0x00, 0x00, /* CR: 0x101 = UARTEN|TXE */
38
* smmuv3_trigger_irq - pulse @irq if enabled and update
46
};
39
@@ -XXX,XX +XXX,XX @@ static SMMUTranslationStatus smmuv3_do_translate(SMMUv3State *s, hwaddr addr,
47
40
event->u.f_walk_eabt.addr2 = ptw_info.addr;
48
static const uint8_t kernel_aarch64[] = {
41
break;
49
0x02, 0x20, 0xa1, 0xd2, /* mov x2, #0x9000000 Load UART0 */
42
case SMMU_PTW_ERR_TRANSLATION:
50
+ 0x21, 0x20, 0x80, 0x52, /* mov w1, 0x101 CR = UARTEN|TXE */
43
- if (PTW_RECORD_FAULT(cfg)) {
51
+ 0x41, 0x60, 0x00, 0x79, /* strh w1, [x2, #48] Set CR */
44
+ if (PTW_RECORD_FAULT(ptw_info, cfg)) {
52
0x81, 0x0a, 0x80, 0x52, /* mov w1, #'T' */
45
event->type = SMMU_EVT_F_TRANSLATION;
53
0x41, 0x00, 0x00, 0x39, /* loop: strb w1, [x2] *TXDAT = 'T' */
46
event->u.f_translation.addr2 = ptw_info.addr;
54
0xff, 0xff, 0xff, 0x17, /* b -4 (loop) */
47
event->u.f_translation.class = class;
48
@@ -XXX,XX +XXX,XX @@ static SMMUTranslationStatus smmuv3_do_translate(SMMUv3State *s, hwaddr addr,
49
}
50
break;
51
case SMMU_PTW_ERR_ADDR_SIZE:
52
- if (PTW_RECORD_FAULT(cfg)) {
53
+ if (PTW_RECORD_FAULT(ptw_info, cfg)) {
54
event->type = SMMU_EVT_F_ADDR_SIZE;
55
event->u.f_addr_size.addr2 = ptw_info.addr;
56
event->u.f_addr_size.class = class;
57
@@ -XXX,XX +XXX,XX @@ static SMMUTranslationStatus smmuv3_do_translate(SMMUv3State *s, hwaddr addr,
58
}
59
break;
60
case SMMU_PTW_ERR_ACCESS:
61
- if (PTW_RECORD_FAULT(cfg)) {
62
+ if (PTW_RECORD_FAULT(ptw_info, cfg)) {
63
event->type = SMMU_EVT_F_ACCESS;
64
event->u.f_access.addr2 = ptw_info.addr;
65
event->u.f_access.class = class;
66
@@ -XXX,XX +XXX,XX @@ static SMMUTranslationStatus smmuv3_do_translate(SMMUv3State *s, hwaddr addr,
67
}
68
break;
69
case SMMU_PTW_ERR_PERMISSION:
70
- if (PTW_RECORD_FAULT(cfg)) {
71
+ if (PTW_RECORD_FAULT(ptw_info, cfg)) {
72
event->type = SMMU_EVT_F_PERMISSION;
73
event->u.f_permission.addr2 = ptw_info.addr;
74
event->u.f_permission.class = class;
75
--
55
--
76
2.34.1
56
2.34.1
77
57
78
58
diff view generated by jsdifflib
1
From: Daniyal Khan <danikhan632@gmail.com>
1
helper.c includes some small TCG helper functions used for mostly
2
arithmetic instructions. These are TCG only and there's no need for
3
them to be in the large and unwieldy helper.c. Move them out to
4
their own source file in the tcg/ subdirectory, together with the
5
op_addsub.h multiply-included template header that they use.
2
6
3
Signed-off-by: Daniyal Khan <danikhan632@gmail.com>
7
Since we are moving op_addsub.h, we take the opportunity to
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
give it a name which matches our convention for files which
9
are not true header files but which are #included from other
10
C files: op_addsub.c.inc.
11
12
(Ironically, this means that helper.c no longer contains
13
any TCG helper function definitions at all.)
14
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
17
Message-id: 20250110131211.2546314-1-peter.maydell@linaro.org
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
18
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Message-id: 20240717060149.204788-4-richard.henderson@linaro.org
7
Message-Id: 172090222034.13953.16888708708822922098-1@git.sr.ht
8
[rth: Split test from a larger patch, tidy assembly]
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
12
---
19
---
13
tests/tcg/aarch64/sme-fmopa-1.c | 63 +++++++++++++++++++++++++++++++
20
target/arm/helper.c | 285 -----------------
14
tests/tcg/aarch64/sme-fmopa-2.c | 56 +++++++++++++++++++++++++++
21
target/arm/tcg/arith_helper.c | 296 ++++++++++++++++++
15
tests/tcg/aarch64/sme-fmopa-3.c | 63 +++++++++++++++++++++++++++++++
22
.../arm/{op_addsub.h => tcg/op_addsub.c.inc} | 0
16
tests/tcg/aarch64/Makefile.target | 5 ++-
23
target/arm/tcg/meson.build | 1 +
17
4 files changed, 185 insertions(+), 2 deletions(-)
24
4 files changed, 297 insertions(+), 285 deletions(-)
18
create mode 100644 tests/tcg/aarch64/sme-fmopa-1.c
25
create mode 100644 target/arm/tcg/arith_helper.c
19
create mode 100644 tests/tcg/aarch64/sme-fmopa-2.c
26
rename target/arm/{op_addsub.h => tcg/op_addsub.c.inc} (100%)
20
create mode 100644 tests/tcg/aarch64/sme-fmopa-3.c
21
27
22
diff --git a/tests/tcg/aarch64/sme-fmopa-1.c b/tests/tcg/aarch64/sme-fmopa-1.c
28
diff --git a/target/arm/helper.c b/target/arm/helper.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/target/arm/helper.c
31
+++ b/target/arm/helper.c
32
@@ -XXX,XX +XXX,XX @@
33
#include "qemu/main-loop.h"
34
#include "qemu/timer.h"
35
#include "qemu/bitops.h"
36
-#include "qemu/crc32c.h"
37
#include "qemu/qemu-print.h"
38
#include "exec/exec-all.h"
39
#include "exec/translation-block.h"
40
-#include <zlib.h> /* for crc32 */
41
#include "hw/irq.h"
42
#include "system/cpu-timers.h"
43
#include "system/kvm.h"
44
@@ -XXX,XX +XXX,XX @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
45
};
46
}
47
48
-/*
49
- * Note that signed overflow is undefined in C. The following routines are
50
- * careful to use unsigned types where modulo arithmetic is required.
51
- * Failure to do so _will_ break on newer gcc.
52
- */
53
-
54
-/* Signed saturating arithmetic. */
55
-
56
-/* Perform 16-bit signed saturating addition. */
57
-static inline uint16_t add16_sat(uint16_t a, uint16_t b)
58
-{
59
- uint16_t res;
60
-
61
- res = a + b;
62
- if (((res ^ a) & 0x8000) && !((a ^ b) & 0x8000)) {
63
- if (a & 0x8000) {
64
- res = 0x8000;
65
- } else {
66
- res = 0x7fff;
67
- }
68
- }
69
- return res;
70
-}
71
-
72
-/* Perform 8-bit signed saturating addition. */
73
-static inline uint8_t add8_sat(uint8_t a, uint8_t b)
74
-{
75
- uint8_t res;
76
-
77
- res = a + b;
78
- if (((res ^ a) & 0x80) && !((a ^ b) & 0x80)) {
79
- if (a & 0x80) {
80
- res = 0x80;
81
- } else {
82
- res = 0x7f;
83
- }
84
- }
85
- return res;
86
-}
87
-
88
-/* Perform 16-bit signed saturating subtraction. */
89
-static inline uint16_t sub16_sat(uint16_t a, uint16_t b)
90
-{
91
- uint16_t res;
92
-
93
- res = a - b;
94
- if (((res ^ a) & 0x8000) && ((a ^ b) & 0x8000)) {
95
- if (a & 0x8000) {
96
- res = 0x8000;
97
- } else {
98
- res = 0x7fff;
99
- }
100
- }
101
- return res;
102
-}
103
-
104
-/* Perform 8-bit signed saturating subtraction. */
105
-static inline uint8_t sub8_sat(uint8_t a, uint8_t b)
106
-{
107
- uint8_t res;
108
-
109
- res = a - b;
110
- if (((res ^ a) & 0x80) && ((a ^ b) & 0x80)) {
111
- if (a & 0x80) {
112
- res = 0x80;
113
- } else {
114
- res = 0x7f;
115
- }
116
- }
117
- return res;
118
-}
119
-
120
-#define ADD16(a, b, n) RESULT(add16_sat(a, b), n, 16);
121
-#define SUB16(a, b, n) RESULT(sub16_sat(a, b), n, 16);
122
-#define ADD8(a, b, n) RESULT(add8_sat(a, b), n, 8);
123
-#define SUB8(a, b, n) RESULT(sub8_sat(a, b), n, 8);
124
-#define PFX q
125
-
126
-#include "op_addsub.h"
127
-
128
-/* Unsigned saturating arithmetic. */
129
-static inline uint16_t add16_usat(uint16_t a, uint16_t b)
130
-{
131
- uint16_t res;
132
- res = a + b;
133
- if (res < a) {
134
- res = 0xffff;
135
- }
136
- return res;
137
-}
138
-
139
-static inline uint16_t sub16_usat(uint16_t a, uint16_t b)
140
-{
141
- if (a > b) {
142
- return a - b;
143
- } else {
144
- return 0;
145
- }
146
-}
147
-
148
-static inline uint8_t add8_usat(uint8_t a, uint8_t b)
149
-{
150
- uint8_t res;
151
- res = a + b;
152
- if (res < a) {
153
- res = 0xff;
154
- }
155
- return res;
156
-}
157
-
158
-static inline uint8_t sub8_usat(uint8_t a, uint8_t b)
159
-{
160
- if (a > b) {
161
- return a - b;
162
- } else {
163
- return 0;
164
- }
165
-}
166
-
167
-#define ADD16(a, b, n) RESULT(add16_usat(a, b), n, 16);
168
-#define SUB16(a, b, n) RESULT(sub16_usat(a, b), n, 16);
169
-#define ADD8(a, b, n) RESULT(add8_usat(a, b), n, 8);
170
-#define SUB8(a, b, n) RESULT(sub8_usat(a, b), n, 8);
171
-#define PFX uq
172
-
173
-#include "op_addsub.h"
174
-
175
-/* Signed modulo arithmetic. */
176
-#define SARITH16(a, b, n, op) do { \
177
- int32_t sum; \
178
- sum = (int32_t)(int16_t)(a) op (int32_t)(int16_t)(b); \
179
- RESULT(sum, n, 16); \
180
- if (sum >= 0) \
181
- ge |= 3 << (n * 2); \
182
- } while (0)
183
-
184
-#define SARITH8(a, b, n, op) do { \
185
- int32_t sum; \
186
- sum = (int32_t)(int8_t)(a) op (int32_t)(int8_t)(b); \
187
- RESULT(sum, n, 8); \
188
- if (sum >= 0) \
189
- ge |= 1 << n; \
190
- } while (0)
191
-
192
-
193
-#define ADD16(a, b, n) SARITH16(a, b, n, +)
194
-#define SUB16(a, b, n) SARITH16(a, b, n, -)
195
-#define ADD8(a, b, n) SARITH8(a, b, n, +)
196
-#define SUB8(a, b, n) SARITH8(a, b, n, -)
197
-#define PFX s
198
-#define ARITH_GE
199
-
200
-#include "op_addsub.h"
201
-
202
-/* Unsigned modulo arithmetic. */
203
-#define ADD16(a, b, n) do { \
204
- uint32_t sum; \
205
- sum = (uint32_t)(uint16_t)(a) + (uint32_t)(uint16_t)(b); \
206
- RESULT(sum, n, 16); \
207
- if ((sum >> 16) == 1) \
208
- ge |= 3 << (n * 2); \
209
- } while (0)
210
-
211
-#define ADD8(a, b, n) do { \
212
- uint32_t sum; \
213
- sum = (uint32_t)(uint8_t)(a) + (uint32_t)(uint8_t)(b); \
214
- RESULT(sum, n, 8); \
215
- if ((sum >> 8) == 1) \
216
- ge |= 1 << n; \
217
- } while (0)
218
-
219
-#define SUB16(a, b, n) do { \
220
- uint32_t sum; \
221
- sum = (uint32_t)(uint16_t)(a) - (uint32_t)(uint16_t)(b); \
222
- RESULT(sum, n, 16); \
223
- if ((sum >> 16) == 0) \
224
- ge |= 3 << (n * 2); \
225
- } while (0)
226
-
227
-#define SUB8(a, b, n) do { \
228
- uint32_t sum; \
229
- sum = (uint32_t)(uint8_t)(a) - (uint32_t)(uint8_t)(b); \
230
- RESULT(sum, n, 8); \
231
- if ((sum >> 8) == 0) \
232
- ge |= 1 << n; \
233
- } while (0)
234
-
235
-#define PFX u
236
-#define ARITH_GE
237
-
238
-#include "op_addsub.h"
239
-
240
-/* Halved signed arithmetic. */
241
-#define ADD16(a, b, n) \
242
- RESULT(((int32_t)(int16_t)(a) + (int32_t)(int16_t)(b)) >> 1, n, 16)
243
-#define SUB16(a, b, n) \
244
- RESULT(((int32_t)(int16_t)(a) - (int32_t)(int16_t)(b)) >> 1, n, 16)
245
-#define ADD8(a, b, n) \
246
- RESULT(((int32_t)(int8_t)(a) + (int32_t)(int8_t)(b)) >> 1, n, 8)
247
-#define SUB8(a, b, n) \
248
- RESULT(((int32_t)(int8_t)(a) - (int32_t)(int8_t)(b)) >> 1, n, 8)
249
-#define PFX sh
250
-
251
-#include "op_addsub.h"
252
-
253
-/* Halved unsigned arithmetic. */
254
-#define ADD16(a, b, n) \
255
- RESULT(((uint32_t)(uint16_t)(a) + (uint32_t)(uint16_t)(b)) >> 1, n, 16)
256
-#define SUB16(a, b, n) \
257
- RESULT(((uint32_t)(uint16_t)(a) - (uint32_t)(uint16_t)(b)) >> 1, n, 16)
258
-#define ADD8(a, b, n) \
259
- RESULT(((uint32_t)(uint8_t)(a) + (uint32_t)(uint8_t)(b)) >> 1, n, 8)
260
-#define SUB8(a, b, n) \
261
- RESULT(((uint32_t)(uint8_t)(a) - (uint32_t)(uint8_t)(b)) >> 1, n, 8)
262
-#define PFX uh
263
-
264
-#include "op_addsub.h"
265
-
266
-static inline uint8_t do_usad(uint8_t a, uint8_t b)
267
-{
268
- if (a > b) {
269
- return a - b;
270
- } else {
271
- return b - a;
272
- }
273
-}
274
-
275
-/* Unsigned sum of absolute byte differences. */
276
-uint32_t HELPER(usad8)(uint32_t a, uint32_t b)
277
-{
278
- uint32_t sum;
279
- sum = do_usad(a, b);
280
- sum += do_usad(a >> 8, b >> 8);
281
- sum += do_usad(a >> 16, b >> 16);
282
- sum += do_usad(a >> 24, b >> 24);
283
- return sum;
284
-}
285
-
286
-/* For ARMv6 SEL instruction. */
287
-uint32_t HELPER(sel_flags)(uint32_t flags, uint32_t a, uint32_t b)
288
-{
289
- uint32_t mask;
290
-
291
- mask = 0;
292
- if (flags & 1) {
293
- mask |= 0xff;
294
- }
295
- if (flags & 2) {
296
- mask |= 0xff00;
297
- }
298
- if (flags & 4) {
299
- mask |= 0xff0000;
300
- }
301
- if (flags & 8) {
302
- mask |= 0xff000000;
303
- }
304
- return (a & mask) | (b & ~mask);
305
-}
306
-
307
-/*
308
- * CRC helpers.
309
- * The upper bytes of val (above the number specified by 'bytes') must have
310
- * been zeroed out by the caller.
311
- */
312
-uint32_t HELPER(crc32)(uint32_t acc, uint32_t val, uint32_t bytes)
313
-{
314
- uint8_t buf[4];
315
-
316
- stl_le_p(buf, val);
317
-
318
- /* zlib crc32 converts the accumulator and output to one's complement. */
319
- return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff;
320
-}
321
-
322
-uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes)
323
-{
324
- uint8_t buf[4];
325
-
326
- stl_le_p(buf, val);
327
-
328
- /* Linux crc32c converts the output to one's complement. */
329
- return crc32c(acc, buf, bytes) ^ 0xffffffff;
330
-}
331
332
/*
333
* Return the exception level to which FP-disabled exceptions should
334
diff --git a/target/arm/tcg/arith_helper.c b/target/arm/tcg/arith_helper.c
23
new file mode 100644
335
new file mode 100644
24
index XXXXXXX..XXXXXXX
336
index XXXXXXX..XXXXXXX
25
--- /dev/null
337
--- /dev/null
26
+++ b/tests/tcg/aarch64/sme-fmopa-1.c
338
+++ b/target/arm/tcg/arith_helper.c
27
@@ -XXX,XX +XXX,XX @@
339
@@ -XXX,XX +XXX,XX @@
28
+/*
340
+/*
29
+ * SME outer product, 1 x 1.
341
+ * ARM generic helpers for various arithmetical operations.
342
+ *
343
+ * This code is licensed under the GNU GPL v2 or later.
344
+ *
30
+ * SPDX-License-Identifier: GPL-2.0-or-later
345
+ * SPDX-License-Identifier: GPL-2.0-or-later
31
+ */
346
+ */
32
+
347
+#include "qemu/osdep.h"
33
+#include <stdio.h>
348
+#include "cpu.h"
34
+
349
+#include "exec/helper-proto.h"
35
+static void foo(float *dst)
350
+#include "qemu/crc32c.h"
36
+{
351
+#include <zlib.h> /* for crc32 */
37
+ asm(".arch_extension sme\n\t"
352
+
38
+ "smstart\n\t"
353
+/*
39
+ "ptrue p0.s, vl4\n\t"
354
+ * Note that signed overflow is undefined in C. The following routines are
40
+ "fmov z0.s, #1.0\n\t"
355
+ * careful to use unsigned types where modulo arithmetic is required.
41
+ /*
356
+ * Failure to do so _will_ break on newer gcc.
42
+ * An outer product of a vector of 1.0 by itself should be a matrix of 1.0.
357
+ */
43
+ * Note that we are using tile 1 here (za1.s) rather than tile 0.
358
+
44
+ */
359
+/* Signed saturating arithmetic. */
45
+ "zero {za}\n\t"
360
+
46
+ "fmopa za1.s, p0/m, p0/m, z0.s, z0.s\n\t"
361
+/* Perform 16-bit signed saturating addition. */
47
+ /*
362
+static inline uint16_t add16_sat(uint16_t a, uint16_t b)
48
+ * Read the first 4x4 sub-matrix of elements from tile 1:
363
+{
49
+ * Note that za1h should be interchangeable here.
364
+ uint16_t res;
50
+ */
365
+
51
+ "mov w12, #0\n\t"
366
+ res = a + b;
52
+ "mova z0.s, p0/m, za1v.s[w12, #0]\n\t"
367
+ if (((res ^ a) & 0x8000) && !((a ^ b) & 0x8000)) {
53
+ "mova z1.s, p0/m, za1v.s[w12, #1]\n\t"
368
+ if (a & 0x8000) {
54
+ "mova z2.s, p0/m, za1v.s[w12, #2]\n\t"
369
+ res = 0x8000;
55
+ "mova z3.s, p0/m, za1v.s[w12, #3]\n\t"
370
+ } else {
56
+ /*
371
+ res = 0x7fff;
57
+ * And store them to the input pointer (dst in the C code):
58
+ */
59
+ "st1w {z0.s}, p0, [%0]\n\t"
60
+ "add x0, x0, #16\n\t"
61
+ "st1w {z1.s}, p0, [x0]\n\t"
62
+ "add x0, x0, #16\n\t"
63
+ "st1w {z2.s}, p0, [x0]\n\t"
64
+ "add x0, x0, #16\n\t"
65
+ "st1w {z3.s}, p0, [x0]\n\t"
66
+ "smstop"
67
+ : : "r"(dst)
68
+ : "x12", "d0", "d1", "d2", "d3", "memory");
69
+}
70
+
71
+int main()
72
+{
73
+ float dst[16] = { };
74
+
75
+ foo(dst);
76
+
77
+ for (int i = 0; i < 16; i++) {
78
+ if (dst[i] != 1.0f) {
79
+ goto failure;
80
+ }
372
+ }
81
+ }
373
+ }
82
+ /* success */
374
+ return res;
83
+ return 0;
375
+}
84
+
376
+
85
+ failure:
377
+/* Perform 8-bit signed saturating addition. */
86
+ for (int i = 0; i < 16; i++) {
378
+static inline uint8_t add8_sat(uint8_t a, uint8_t b)
87
+ printf("%f%c", dst[i], i % 4 == 3 ? '\n' : ' ');
379
+{
88
+ }
380
+ uint8_t res;
89
+ return 1;
381
+
90
+}
382
+ res = a + b;
91
diff --git a/tests/tcg/aarch64/sme-fmopa-2.c b/tests/tcg/aarch64/sme-fmopa-2.c
383
+ if (((res ^ a) & 0x80) && !((a ^ b) & 0x80)) {
92
new file mode 100644
384
+ if (a & 0x80) {
93
index XXXXXXX..XXXXXXX
385
+ res = 0x80;
94
--- /dev/null
386
+ } else {
95
+++ b/tests/tcg/aarch64/sme-fmopa-2.c
387
+ res = 0x7f;
96
@@ -XXX,XX +XXX,XX @@
388
+ }
389
+ }
390
+ return res;
391
+}
392
+
393
+/* Perform 16-bit signed saturating subtraction. */
394
+static inline uint16_t sub16_sat(uint16_t a, uint16_t b)
395
+{
396
+ uint16_t res;
397
+
398
+ res = a - b;
399
+ if (((res ^ a) & 0x8000) && ((a ^ b) & 0x8000)) {
400
+ if (a & 0x8000) {
401
+ res = 0x8000;
402
+ } else {
403
+ res = 0x7fff;
404
+ }
405
+ }
406
+ return res;
407
+}
408
+
409
+/* Perform 8-bit signed saturating subtraction. */
410
+static inline uint8_t sub8_sat(uint8_t a, uint8_t b)
411
+{
412
+ uint8_t res;
413
+
414
+ res = a - b;
415
+ if (((res ^ a) & 0x80) && ((a ^ b) & 0x80)) {
416
+ if (a & 0x80) {
417
+ res = 0x80;
418
+ } else {
419
+ res = 0x7f;
420
+ }
421
+ }
422
+ return res;
423
+}
424
+
425
+#define ADD16(a, b, n) RESULT(add16_sat(a, b), n, 16);
426
+#define SUB16(a, b, n) RESULT(sub16_sat(a, b), n, 16);
427
+#define ADD8(a, b, n) RESULT(add8_sat(a, b), n, 8);
428
+#define SUB8(a, b, n) RESULT(sub8_sat(a, b), n, 8);
429
+#define PFX q
430
+
431
+#include "op_addsub.c.inc"
432
+
433
+/* Unsigned saturating arithmetic. */
434
+static inline uint16_t add16_usat(uint16_t a, uint16_t b)
435
+{
436
+ uint16_t res;
437
+ res = a + b;
438
+ if (res < a) {
439
+ res = 0xffff;
440
+ }
441
+ return res;
442
+}
443
+
444
+static inline uint16_t sub16_usat(uint16_t a, uint16_t b)
445
+{
446
+ if (a > b) {
447
+ return a - b;
448
+ } else {
449
+ return 0;
450
+ }
451
+}
452
+
453
+static inline uint8_t add8_usat(uint8_t a, uint8_t b)
454
+{
455
+ uint8_t res;
456
+ res = a + b;
457
+ if (res < a) {
458
+ res = 0xff;
459
+ }
460
+ return res;
461
+}
462
+
463
+static inline uint8_t sub8_usat(uint8_t a, uint8_t b)
464
+{
465
+ if (a > b) {
466
+ return a - b;
467
+ } else {
468
+ return 0;
469
+ }
470
+}
471
+
472
+#define ADD16(a, b, n) RESULT(add16_usat(a, b), n, 16);
473
+#define SUB16(a, b, n) RESULT(sub16_usat(a, b), n, 16);
474
+#define ADD8(a, b, n) RESULT(add8_usat(a, b), n, 8);
475
+#define SUB8(a, b, n) RESULT(sub8_usat(a, b), n, 8);
476
+#define PFX uq
477
+
478
+#include "op_addsub.c.inc"
479
+
480
+/* Signed modulo arithmetic. */
481
+#define SARITH16(a, b, n, op) do { \
482
+ int32_t sum; \
483
+ sum = (int32_t)(int16_t)(a) op (int32_t)(int16_t)(b); \
484
+ RESULT(sum, n, 16); \
485
+ if (sum >= 0) \
486
+ ge |= 3 << (n * 2); \
487
+ } while (0)
488
+
489
+#define SARITH8(a, b, n, op) do { \
490
+ int32_t sum; \
491
+ sum = (int32_t)(int8_t)(a) op (int32_t)(int8_t)(b); \
492
+ RESULT(sum, n, 8); \
493
+ if (sum >= 0) \
494
+ ge |= 1 << n; \
495
+ } while (0)
496
+
497
+
498
+#define ADD16(a, b, n) SARITH16(a, b, n, +)
499
+#define SUB16(a, b, n) SARITH16(a, b, n, -)
500
+#define ADD8(a, b, n) SARITH8(a, b, n, +)
501
+#define SUB8(a, b, n) SARITH8(a, b, n, -)
502
+#define PFX s
503
+#define ARITH_GE
504
+
505
+#include "op_addsub.c.inc"
506
+
507
+/* Unsigned modulo arithmetic. */
508
+#define ADD16(a, b, n) do { \
509
+ uint32_t sum; \
510
+ sum = (uint32_t)(uint16_t)(a) + (uint32_t)(uint16_t)(b); \
511
+ RESULT(sum, n, 16); \
512
+ if ((sum >> 16) == 1) \
513
+ ge |= 3 << (n * 2); \
514
+ } while (0)
515
+
516
+#define ADD8(a, b, n) do { \
517
+ uint32_t sum; \
518
+ sum = (uint32_t)(uint8_t)(a) + (uint32_t)(uint8_t)(b); \
519
+ RESULT(sum, n, 8); \
520
+ if ((sum >> 8) == 1) \
521
+ ge |= 1 << n; \
522
+ } while (0)
523
+
524
+#define SUB16(a, b, n) do { \
525
+ uint32_t sum; \
526
+ sum = (uint32_t)(uint16_t)(a) - (uint32_t)(uint16_t)(b); \
527
+ RESULT(sum, n, 16); \
528
+ if ((sum >> 16) == 0) \
529
+ ge |= 3 << (n * 2); \
530
+ } while (0)
531
+
532
+#define SUB8(a, b, n) do { \
533
+ uint32_t sum; \
534
+ sum = (uint32_t)(uint8_t)(a) - (uint32_t)(uint8_t)(b); \
535
+ RESULT(sum, n, 8); \
536
+ if ((sum >> 8) == 0) \
537
+ ge |= 1 << n; \
538
+ } while (0)
539
+
540
+#define PFX u
541
+#define ARITH_GE
542
+
543
+#include "op_addsub.c.inc"
544
+
545
+/* Halved signed arithmetic. */
546
+#define ADD16(a, b, n) \
547
+ RESULT(((int32_t)(int16_t)(a) + (int32_t)(int16_t)(b)) >> 1, n, 16)
548
+#define SUB16(a, b, n) \
549
+ RESULT(((int32_t)(int16_t)(a) - (int32_t)(int16_t)(b)) >> 1, n, 16)
550
+#define ADD8(a, b, n) \
551
+ RESULT(((int32_t)(int8_t)(a) + (int32_t)(int8_t)(b)) >> 1, n, 8)
552
+#define SUB8(a, b, n) \
553
+ RESULT(((int32_t)(int8_t)(a) - (int32_t)(int8_t)(b)) >> 1, n, 8)
554
+#define PFX sh
555
+
556
+#include "op_addsub.c.inc"
557
+
558
+/* Halved unsigned arithmetic. */
559
+#define ADD16(a, b, n) \
560
+ RESULT(((uint32_t)(uint16_t)(a) + (uint32_t)(uint16_t)(b)) >> 1, n, 16)
561
+#define SUB16(a, b, n) \
562
+ RESULT(((uint32_t)(uint16_t)(a) - (uint32_t)(uint16_t)(b)) >> 1, n, 16)
563
+#define ADD8(a, b, n) \
564
+ RESULT(((uint32_t)(uint8_t)(a) + (uint32_t)(uint8_t)(b)) >> 1, n, 8)
565
+#define SUB8(a, b, n) \
566
+ RESULT(((uint32_t)(uint8_t)(a) - (uint32_t)(uint8_t)(b)) >> 1, n, 8)
567
+#define PFX uh
568
+
569
+#include "op_addsub.c.inc"
570
+
571
+static inline uint8_t do_usad(uint8_t a, uint8_t b)
572
+{
573
+ if (a > b) {
574
+ return a - b;
575
+ } else {
576
+ return b - a;
577
+ }
578
+}
579
+
580
+/* Unsigned sum of absolute byte differences. */
581
+uint32_t HELPER(usad8)(uint32_t a, uint32_t b)
582
+{
583
+ uint32_t sum;
584
+ sum = do_usad(a, b);
585
+ sum += do_usad(a >> 8, b >> 8);
586
+ sum += do_usad(a >> 16, b >> 16);
587
+ sum += do_usad(a >> 24, b >> 24);
588
+ return sum;
589
+}
590
+
591
+/* For ARMv6 SEL instruction. */
592
+uint32_t HELPER(sel_flags)(uint32_t flags, uint32_t a, uint32_t b)
593
+{
594
+ uint32_t mask;
595
+
596
+ mask = 0;
597
+ if (flags & 1) {
598
+ mask |= 0xff;
599
+ }
600
+ if (flags & 2) {
601
+ mask |= 0xff00;
602
+ }
603
+ if (flags & 4) {
604
+ mask |= 0xff0000;
605
+ }
606
+ if (flags & 8) {
607
+ mask |= 0xff000000;
608
+ }
609
+ return (a & mask) | (b & ~mask);
610
+}
611
+
97
+/*
612
+/*
98
+ * SME outer product, FZ vs FZ16
613
+ * CRC helpers.
99
+ * SPDX-License-Identifier: GPL-2.0-or-later
614
+ * The upper bytes of val (above the number specified by 'bytes') must have
615
+ * been zeroed out by the caller.
100
+ */
616
+ */
101
+
617
+uint32_t HELPER(crc32)(uint32_t acc, uint32_t val, uint32_t bytes)
102
+#include <stdint.h>
618
+{
103
+#include <stdio.h>
619
+ uint8_t buf[4];
104
+
620
+
105
+static void test_fmopa(uint32_t *result)
621
+ stl_le_p(buf, val);
106
+{
622
+
107
+ asm(".arch_extension sme\n\t"
623
+ /* zlib crc32 converts the accumulator and output to one's complement. */
108
+ "smstart\n\t" /* Z*, P* and ZArray cleared */
624
+ return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff;
109
+ "ptrue p2.b, vl16\n\t" /* Limit vector length to 16 */
625
+}
110
+ "ptrue p5.b, vl16\n\t"
626
+
111
+ "movi d0, #0x00ff\n\t" /* fp16 denormal */
627
+uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes)
112
+ "movi d16, #0x00ff\n\t"
628
+{
113
+ "mov w15, #0x0001000000\n\t" /* FZ=1, FZ16=0 */
629
+ uint8_t buf[4];
114
+ "msr fpcr, x15\n\t"
630
+
115
+ "fmopa za3.s, p2/m, p5/m, z16.h, z0.h\n\t"
631
+ stl_le_p(buf, val);
116
+ "mov w15, #0\n\t"
632
+
117
+ "st1w {za3h.s[w15, 0]}, p2, [%0]\n\t"
633
+ /* Linux crc32c converts the output to one's complement. */
118
+ "add %0, %0, #16\n\t"
634
+ return crc32c(acc, buf, bytes) ^ 0xffffffff;
119
+ "st1w {za3h.s[w15, 1]}, p2, [%0]\n\t"
635
+}
120
+ "mov w15, #2\n\t"
636
diff --git a/target/arm/op_addsub.h b/target/arm/tcg/op_addsub.c.inc
121
+ "add %0, %0, #16\n\t"
637
similarity index 100%
122
+ "st1w {za3h.s[w15, 0]}, p2, [%0]\n\t"
638
rename from target/arm/op_addsub.h
123
+ "add %0, %0, #16\n\t"
639
rename to target/arm/tcg/op_addsub.c.inc
124
+ "st1w {za3h.s[w15, 1]}, p2, [%0]\n\t"
640
diff --git a/target/arm/tcg/meson.build b/target/arm/tcg/meson.build
125
+ "smstop"
126
+ : "+r"(result) :
127
+ : "x15", "x16", "p2", "p5", "d0", "d16", "memory");
128
+}
129
+
130
+int main(void)
131
+{
132
+ uint32_t result[4 * 4] = { };
133
+
134
+ test_fmopa(result);
135
+
136
+ if (result[0] != 0x2f7e0100) {
137
+ printf("Test failed: Incorrect output in first 4 bytes\n"
138
+ "Expected: %08x\n"
139
+ "Got: %08x\n",
140
+ 0x2f7e0100, result[0]);
141
+ return 1;
142
+ }
143
+
144
+ for (int i = 1; i < 16; ++i) {
145
+ if (result[i] != 0) {
146
+ printf("Test failed: Non-zero word at position %d\n", i);
147
+ return 1;
148
+ }
149
+ }
150
+
151
+ return 0;
152
+}
153
diff --git a/tests/tcg/aarch64/sme-fmopa-3.c b/tests/tcg/aarch64/sme-fmopa-3.c
154
new file mode 100644
155
index XXXXXXX..XXXXXXX
156
--- /dev/null
157
+++ b/tests/tcg/aarch64/sme-fmopa-3.c
158
@@ -XXX,XX +XXX,XX @@
159
+/*
160
+ * SME outer product, [ 1 2 3 4 ] squared
161
+ * SPDX-License-Identifier: GPL-2.0-or-later
162
+ */
163
+
164
+#include <stdio.h>
165
+#include <stdint.h>
166
+#include <string.h>
167
+#include <math.h>
168
+
169
+static const float i_1234[4] = {
170
+ 1.0f, 2.0f, 3.0f, 4.0f
171
+};
172
+
173
+static const float expected[4] = {
174
+ 4.515625f, 5.750000f, 6.984375f, 8.218750f
175
+};
176
+
177
+static void test_fmopa(float *result)
178
+{
179
+ asm(".arch_extension sme\n\t"
180
+ "smstart\n\t" /* ZArray cleared */
181
+ "ptrue p2.b, vl16\n\t" /* Limit vector length to 16 */
182
+ "ld1w {z0.s}, p2/z, [%1]\n\t"
183
+ "mov w15, #0\n\t"
184
+ "mov za3h.s[w15, 0], p2/m, z0.s\n\t"
185
+ "mov za3h.s[w15, 1], p2/m, z0.s\n\t"
186
+ "mov w15, #2\n\t"
187
+ "mov za3h.s[w15, 0], p2/m, z0.s\n\t"
188
+ "mov za3h.s[w15, 1], p2/m, z0.s\n\t"
189
+ "msr fpcr, xzr\n\t"
190
+ "fmopa za3.s, p2/m, p2/m, z0.h, z0.h\n\t"
191
+ "mov w15, #0\n\t"
192
+ "st1w {za3h.s[w15, 0]}, p2, [%0]\n"
193
+ "add %0, %0, #16\n\t"
194
+ "st1w {za3h.s[w15, 1]}, p2, [%0]\n\t"
195
+ "mov w15, #2\n\t"
196
+ "add %0, %0, #16\n\t"
197
+ "st1w {za3h.s[w15, 0]}, p2, [%0]\n\t"
198
+ "add %0, %0, #16\n\t"
199
+ "st1w {za3h.s[w15, 1]}, p2, [%0]\n\t"
200
+ "smstop"
201
+ : "+r"(result) : "r"(i_1234)
202
+ : "x15", "x16", "p2", "d0", "memory");
203
+}
204
+
205
+int main(void)
206
+{
207
+ float result[4 * 4] = { };
208
+ int ret = 0;
209
+
210
+ test_fmopa(result);
211
+
212
+ for (int i = 0; i < 4; i++) {
213
+ float actual = result[i];
214
+ if (fabsf(actual - expected[i]) > 0.001f) {
215
+ printf("Test failed at element %d: Expected %f, got %f\n",
216
+ i, expected[i], actual);
217
+ ret = 1;
218
+ }
219
+ }
220
+ return ret;
221
+}
222
diff --git a/tests/tcg/aarch64/Makefile.target b/tests/tcg/aarch64/Makefile.target
223
index XXXXXXX..XXXXXXX 100644
641
index XXXXXXX..XXXXXXX 100644
224
--- a/tests/tcg/aarch64/Makefile.target
642
--- a/target/arm/tcg/meson.build
225
+++ b/tests/tcg/aarch64/Makefile.target
643
+++ b/target/arm/tcg/meson.build
226
@@ -XXX,XX +XXX,XX @@ endif
644
@@ -XXX,XX +XXX,XX @@ arm_ss.add(files(
227
645
'tlb_helper.c',
228
# SME Tests
646
'vec_helper.c',
229
ifneq ($(CROSS_AS_HAS_ARMV9_SME),)
647
'tlb-insns.c',
230
-AARCH64_TESTS += sme-outprod1 sme-smopa-1 sme-smopa-2
648
+ 'arith_helper.c',
231
-sme-outprod1 sme-smopa-1 sme-smopa-2: CFLAGS += $(CROSS_AS_HAS_ARMV9_SME)
649
))
232
+SME_TESTS = sme-outprod1 sme-smopa-1 sme-smopa-2 sme-fmopa-1 sme-fmopa-2 sme-fmopa-3
650
233
+AARCH64_TESTS += $(SME_TESTS)
651
arm_ss.add(when: 'TARGET_AARCH64', if_true: files(
234
+$(SME_TESTS): CFLAGS += $(CROSS_AS_HAS_ARMV9_SME)
235
endif
236
237
# System Registers Tests
238
--
652
--
239
2.34.1
653
2.34.1
240
654
241
655
diff view generated by jsdifflib
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
From: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
2
3
Asahi Linux supports KVM but lacks PMU support.
3
Before changing default pauth algorithm, we need to make sure current
4
default one (QARMA5) can still be selected.
4
5
5
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
6
$ qemu-system-aarch64 -cpu max,pauth-qarma5=on ...
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
7
Message-id: 20240716-pmu-v3-1-8c7c1858a227@daynix.com
8
Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Message-id: 20241219183211.3493974-2-pierrick.bouvier@linaro.org
8
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
---
12
---
10
tests/qtest/arm-cpu-features.c | 13 ++++++++-----
13
docs/system/arm/cpu-features.rst | 5 ++++-
11
1 file changed, 8 insertions(+), 5 deletions(-)
14
target/arm/cpu.h | 1 +
15
target/arm/arm-qmp-cmds.c | 2 +-
16
target/arm/cpu64.c | 20 ++++++++++++++------
17
tests/qtest/arm-cpu-features.c | 15 +++++++++++----
18
5 files changed, 31 insertions(+), 12 deletions(-)
12
19
20
diff --git a/docs/system/arm/cpu-features.rst b/docs/system/arm/cpu-features.rst
21
index XXXXXXX..XXXXXXX 100644
22
--- a/docs/system/arm/cpu-features.rst
23
+++ b/docs/system/arm/cpu-features.rst
24
@@ -XXX,XX +XXX,XX @@ Below is the list of TCG VCPU features and their descriptions.
25
``pauth-qarma3``
26
When ``pauth`` is enabled, select the architected QARMA3 algorithm.
27
28
-Without either ``pauth-impdef`` or ``pauth-qarma3`` enabled,
29
+``pauth-qarma5``
30
+ When ``pauth`` is enabled, select the architected QARMA5 algorithm.
31
+
32
+Without ``pauth-impdef``, ``pauth-qarma3`` or ``pauth-qarma5`` enabled,
33
the architected QARMA5 algorithm is used. The architected QARMA5
34
and QARMA3 algorithms have good cryptographic properties, but can
35
be quite slow to emulate. The impdef algorithm used by QEMU is
36
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
37
index XXXXXXX..XXXXXXX 100644
38
--- a/target/arm/cpu.h
39
+++ b/target/arm/cpu.h
40
@@ -XXX,XX +XXX,XX @@ struct ArchCPU {
41
bool prop_pauth;
42
bool prop_pauth_impdef;
43
bool prop_pauth_qarma3;
44
+ bool prop_pauth_qarma5;
45
bool prop_lpa2;
46
47
/* DCZ blocksize, in log_2(words), ie low 4 bits of DCZID_EL0 */
48
diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/target/arm/arm-qmp-cmds.c
51
+++ b/target/arm/arm-qmp-cmds.c
52
@@ -XXX,XX +XXX,XX @@ static const char *cpu_model_advertised_features[] = {
53
"sve640", "sve768", "sve896", "sve1024", "sve1152", "sve1280",
54
"sve1408", "sve1536", "sve1664", "sve1792", "sve1920", "sve2048",
55
"kvm-no-adjvtime", "kvm-steal-time",
56
- "pauth", "pauth-impdef", "pauth-qarma3",
57
+ "pauth", "pauth-impdef", "pauth-qarma3", "pauth-qarma5",
58
NULL
59
};
60
61
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
62
index XXXXXXX..XXXXXXX 100644
63
--- a/target/arm/cpu64.c
64
+++ b/target/arm/cpu64.c
65
@@ -XXX,XX +XXX,XX @@ void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp)
66
}
67
68
if (cpu->prop_pauth) {
69
- if (cpu->prop_pauth_impdef && cpu->prop_pauth_qarma3) {
70
+ if ((cpu->prop_pauth_impdef && cpu->prop_pauth_qarma3) ||
71
+ (cpu->prop_pauth_impdef && cpu->prop_pauth_qarma5) ||
72
+ (cpu->prop_pauth_qarma3 && cpu->prop_pauth_qarma5)) {
73
error_setg(errp,
74
- "cannot enable both pauth-impdef and pauth-qarma3");
75
+ "cannot enable pauth-impdef, pauth-qarma3 and "
76
+ "pauth-qarma5 at the same time");
77
return;
78
}
79
80
@@ -XXX,XX +XXX,XX @@ void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp)
81
} else if (cpu->prop_pauth_qarma3) {
82
isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, APA3, features);
83
isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, GPA3, 1);
84
- } else {
85
+ } else { /* default is pauth-qarma5 */
86
isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, APA, features);
87
isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPA, 1);
88
}
89
- } else if (cpu->prop_pauth_impdef || cpu->prop_pauth_qarma3) {
90
- error_setg(errp, "cannot enable pauth-impdef or "
91
- "pauth-qarma3 without pauth");
92
+ } else if (cpu->prop_pauth_impdef ||
93
+ cpu->prop_pauth_qarma3 ||
94
+ cpu->prop_pauth_qarma5) {
95
+ error_setg(errp, "cannot enable pauth-impdef, pauth-qarma3 or "
96
+ "pauth-qarma5 without pauth");
97
error_append_hint(errp, "Add pauth=on to the CPU property list.\n");
98
}
99
}
100
@@ -XXX,XX +XXX,XX @@ static const Property arm_cpu_pauth_impdef_property =
101
DEFINE_PROP_BOOL("pauth-impdef", ARMCPU, prop_pauth_impdef, false);
102
static const Property arm_cpu_pauth_qarma3_property =
103
DEFINE_PROP_BOOL("pauth-qarma3", ARMCPU, prop_pauth_qarma3, false);
104
+static Property arm_cpu_pauth_qarma5_property =
105
+ DEFINE_PROP_BOOL("pauth-qarma5", ARMCPU, prop_pauth_qarma5, false);
106
107
void aarch64_add_pauth_properties(Object *obj)
108
{
109
@@ -XXX,XX +XXX,XX @@ void aarch64_add_pauth_properties(Object *obj)
110
} else {
111
qdev_property_add_static(DEVICE(obj), &arm_cpu_pauth_impdef_property);
112
qdev_property_add_static(DEVICE(obj), &arm_cpu_pauth_qarma3_property);
113
+ qdev_property_add_static(DEVICE(obj), &arm_cpu_pauth_qarma5_property);
114
}
115
}
116
13
diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c
117
diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c
14
index XXXXXXX..XXXXXXX 100644
118
index XXXXXXX..XXXXXXX 100644
15
--- a/tests/qtest/arm-cpu-features.c
119
--- a/tests/qtest/arm-cpu-features.c
16
+++ b/tests/qtest/arm-cpu-features.c
120
+++ b/tests/qtest/arm-cpu-features.c
17
@@ -XXX,XX +XXX,XX @@ static void test_query_cpu_model_expansion_kvm(const void *data)
121
@@ -XXX,XX +XXX,XX @@ static void pauth_tests_default(QTestState *qts, const char *cpu_type)
18
assert_set_feature(qts, "host", "kvm-no-adjvtime", false);
122
assert_has_feature_enabled(qts, cpu_type, "pauth");
19
123
assert_has_feature_disabled(qts, cpu_type, "pauth-impdef");
20
if (g_str_equal(qtest_get_arch(), "aarch64")) {
124
assert_has_feature_disabled(qts, cpu_type, "pauth-qarma3");
21
+ bool kvm_supports_pmu;
125
+ assert_has_feature_disabled(qts, cpu_type, "pauth-qarma5");
22
bool kvm_supports_steal_time;
126
assert_set_feature(qts, cpu_type, "pauth", false);
23
bool kvm_supports_sve;
127
assert_set_feature(qts, cpu_type, "pauth", true);
24
char max_name[8], name[8];
128
assert_set_feature(qts, cpu_type, "pauth-impdef", true);
25
@@ -XXX,XX +XXX,XX @@ static void test_query_cpu_model_expansion_kvm(const void *data)
129
assert_set_feature(qts, cpu_type, "pauth-impdef", false);
26
130
assert_set_feature(qts, cpu_type, "pauth-qarma3", true);
27
assert_has_feature_enabled(qts, "host", "aarch64");
131
assert_set_feature(qts, cpu_type, "pauth-qarma3", false);
28
132
+ assert_set_feature(qts, cpu_type, "pauth-qarma5", true);
29
- /* Enabling and disabling pmu should always work. */
133
+ assert_set_feature(qts, cpu_type, "pauth-qarma5", false);
30
- assert_has_feature_enabled(qts, "host", "pmu");
134
assert_error(qts, cpu_type,
31
- assert_set_feature(qts, "host", "pmu", false);
135
- "cannot enable pauth-impdef or pauth-qarma3 without pauth",
32
- assert_set_feature(qts, "host", "pmu", true);
136
+ "cannot enable pauth-impdef, pauth-qarma3 or pauth-qarma5 without pauth",
33
-
137
"{ 'pauth': false, 'pauth-impdef': true }");
34
/*
138
assert_error(qts, cpu_type,
35
* Some features would be enabled by default, but they're disabled
139
- "cannot enable pauth-impdef or pauth-qarma3 without pauth",
36
* because this instance of KVM doesn't support them. Test that the
140
+ "cannot enable pauth-impdef, pauth-qarma3 or pauth-qarma5 without pauth",
37
@@ -XXX,XX +XXX,XX @@ static void test_query_cpu_model_expansion_kvm(const void *data)
141
"{ 'pauth': false, 'pauth-qarma3': true }");
38
assert_has_feature(qts, "host", "sve");
142
assert_error(qts, cpu_type,
39
143
- "cannot enable both pauth-impdef and pauth-qarma3",
40
resp = do_query_no_props(qts, "host");
144
- "{ 'pauth': true, 'pauth-impdef': true, 'pauth-qarma3': true }");
41
+ kvm_supports_pmu = resp_get_feature(resp, "pmu");
145
+ "cannot enable pauth-impdef, pauth-qarma3 or pauth-qarma5 without pauth",
42
kvm_supports_steal_time = resp_get_feature(resp, "kvm-steal-time");
146
+ "{ 'pauth': false, 'pauth-qarma5': true }");
43
kvm_supports_sve = resp_get_feature(resp, "sve");
147
+ assert_error(qts, cpu_type,
44
vls = resp_get_sve_vls(resp);
148
+ "cannot enable pauth-impdef, pauth-qarma3 and pauth-qarma5 at the same time",
45
qobject_unref(resp);
149
+ "{ 'pauth': true, 'pauth-impdef': true, 'pauth-qarma3': true,"
46
150
+ " 'pauth-qarma5': true }");
47
+ if (kvm_supports_pmu) {
151
}
48
+ /* If we have pmu then we should be able to toggle it. */
152
49
+ assert_set_feature(qts, "host", "pmu", false);
153
static void test_query_cpu_model_expansion(const void *data)
50
+ assert_set_feature(qts, "host", "pmu", true);
51
+ }
52
+
53
if (kvm_supports_steal_time) {
54
/* If we have steal-time then we should be able to toggle it. */
55
assert_set_feature(qts, "host", "kvm-steal-time", false);
56
--
154
--
57
2.34.1
155
2.34.1
58
59
diff view generated by jsdifflib
1
From: Mostafa Saleh <smostafa@google.com>
1
The pauth-3 test explicitly tests that a computation of the
2
pointer-authentication produces the expected result. This means that
3
it must be run with the QARMA5 algorithm.
2
4
3
According to the SMMU architecture specification (ARM IHI 0070 F.b),
5
Explicitly set the pauth algorithm when running this test, so that it
4
in “3.4 Address sizes”
6
doesn't break when we change the default algorithm the 'max' CPU
5
The address output from the translation causes a stage 1 Address Size
7
uses.
6
fault if it exceeds the range of the effective IPA size for the given CD.
7
8
8
However, this check was missing.
9
10
There is already a similar check for stage-2 against effective PA.
11
12
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
13
Reviewed-by: Eric Auger <eric.auger@redhat.com>
14
Signed-off-by: Mostafa Saleh <smostafa@google.com>
15
Message-id: 20240715084519.1189624-2-smostafa@google.com
16
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
17
---
10
---
18
hw/arm/smmu-common.c | 10 ++++++++++
11
tests/tcg/aarch64/Makefile.softmmu-target | 3 +++
19
1 file changed, 10 insertions(+)
12
1 file changed, 3 insertions(+)
20
13
21
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
14
diff --git a/tests/tcg/aarch64/Makefile.softmmu-target b/tests/tcg/aarch64/Makefile.softmmu-target
22
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
23
--- a/hw/arm/smmu-common.c
16
--- a/tests/tcg/aarch64/Makefile.softmmu-target
24
+++ b/hw/arm/smmu-common.c
17
+++ b/tests/tcg/aarch64/Makefile.softmmu-target
25
@@ -XXX,XX +XXX,XX @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
18
@@ -XXX,XX +XXX,XX @@ EXTRA_RUNS+=run-memory-replay
26
goto error;
19
27
}
20
ifneq ($(CROSS_CC_HAS_ARMV8_3),)
28
21
pauth-3: CFLAGS += $(CROSS_CC_HAS_ARMV8_3)
29
+ /*
22
+# This test explicitly checks the output of the pauth operation so we
30
+ * The address output from the translation causes a stage 1 Address
23
+# must force the use of the QARMA5 algorithm for it.
31
+ * Size fault if it exceeds the range of the effective IPA size for
24
+run-pauth-3: QEMU_BASE_MACHINE=-M virt -cpu max,pauth-qarma5=on -display none
32
+ * the given CD.
25
else
33
+ */
26
pauth-3:
34
+ if (gpa >= (1ULL << cfg->oas)) {
27
    $(call skip-test, "BUILD of $@", "missing compiler support")
35
+ info->type = SMMU_PTW_ERR_ADDR_SIZE;
36
+ goto error;
37
+ }
38
+
39
tlbe->entry.translated_addr = gpa;
40
tlbe->entry.iova = iova & ~mask;
41
tlbe->entry.addr_mask = mask;
42
--
28
--
43
2.34.1
29
2.34.1
44
45
diff view generated by jsdifflib
Deleted patch
1
From: Mostafa Saleh <smostafa@google.com>
2
1
3
The SMMUv3 spec (ARM IHI 0070 F.b - 7.3 Event records) defines the
4
class of events faults as:
5
6
CLASS: The class of the operation that caused the fault:
7
- 0b00: CD, CD fetch.
8
- 0b01: TTD, Stage 1 translation table fetch.
9
- 0b10: IN, Input address
10
11
However, this value was not set and left as 0 which means CD and not
12
IN (0b10).
13
14
Another problem was that stage-2 class is considered IN not TT for
15
EABT, according to the spec:
16
Translation of an IPA after successful stage 1 translation (or,
17
in stage 2-only configuration, an input IPA)
18
- S2 == 1 (stage 2), CLASS == IN (Input to stage)
19
20
This would change soon when nested translations are supported.
21
22
While at it, add an enum for class as it would be used for nesting.
23
However, at the moment stage-1 and stage-2 use the same class values,
24
except for EABT.
25
26
Fixes: 9bde7f0674 “hw/arm/smmuv3: Implement translate callback”
27
Signed-off-by: Mostafa Saleh <smostafa@google.com>
28
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
29
Reviewed-by: Eric Auger <eric.auger@redhat.com>
30
Message-id: 20240715084519.1189624-4-smostafa@google.com
31
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
32
---
33
hw/arm/smmuv3-internal.h | 6 ++++++
34
hw/arm/smmuv3.c | 8 +++++++-
35
2 files changed, 13 insertions(+), 1 deletion(-)
36
37
diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
38
index XXXXXXX..XXXXXXX 100644
39
--- a/hw/arm/smmuv3-internal.h
40
+++ b/hw/arm/smmuv3-internal.h
41
@@ -XXX,XX +XXX,XX @@ typedef enum SMMUTranslationStatus {
42
SMMU_TRANS_SUCCESS,
43
} SMMUTranslationStatus;
44
45
+typedef enum SMMUTranslationClass {
46
+ SMMU_CLASS_CD,
47
+ SMMU_CLASS_TT,
48
+ SMMU_CLASS_IN,
49
+} SMMUTranslationClass;
50
+
51
/* MMIO Registers */
52
53
REG32(IDR0, 0x0)
54
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
55
index XXXXXXX..XXXXXXX 100644
56
--- a/hw/arm/smmuv3.c
57
+++ b/hw/arm/smmuv3.c
58
@@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr,
59
event.type = SMMU_EVT_F_WALK_EABT;
60
event.u.f_walk_eabt.addr = addr;
61
event.u.f_walk_eabt.rnw = flag & 0x1;
62
- event.u.f_walk_eabt.class = 0x1;
63
+ /* Stage-2 (only) is class IN while stage-1 is class TT */
64
+ event.u.f_walk_eabt.class = (ptw_info.stage == 2) ?
65
+ SMMU_CLASS_IN : SMMU_CLASS_TT;
66
event.u.f_walk_eabt.addr2 = ptw_info.addr;
67
break;
68
case SMMU_PTW_ERR_TRANSLATION:
69
@@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr,
70
event.type = SMMU_EVT_F_TRANSLATION;
71
event.u.f_translation.addr = addr;
72
event.u.f_translation.addr2 = ptw_info.addr;
73
+ event.u.f_translation.class = SMMU_CLASS_IN;
74
event.u.f_translation.rnw = flag & 0x1;
75
}
76
break;
77
@@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr,
78
event.type = SMMU_EVT_F_ADDR_SIZE;
79
event.u.f_addr_size.addr = addr;
80
event.u.f_addr_size.addr2 = ptw_info.addr;
81
+ event.u.f_translation.class = SMMU_CLASS_IN;
82
event.u.f_addr_size.rnw = flag & 0x1;
83
}
84
break;
85
@@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr,
86
event.type = SMMU_EVT_F_ACCESS;
87
event.u.f_access.addr = addr;
88
event.u.f_access.addr2 = ptw_info.addr;
89
+ event.u.f_translation.class = SMMU_CLASS_IN;
90
event.u.f_access.rnw = flag & 0x1;
91
}
92
break;
93
@@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr,
94
event.type = SMMU_EVT_F_PERMISSION;
95
event.u.f_permission.addr = addr;
96
event.u.f_permission.addr2 = ptw_info.addr;
97
+ event.u.f_translation.class = SMMU_CLASS_IN;
98
event.u.f_permission.rnw = flag & 0x1;
99
}
100
break;
101
--
102
2.34.1
103
104
diff view generated by jsdifflib
Deleted patch
1
From: Mostafa Saleh <smostafa@google.com>
2
1
3
Currently, translation stage is represented as an int, where 1 is stage-1 and
4
2 is stage-2, when nested is added, 3 would be confusing to represent nesting,
5
so we use an enum instead.
6
7
While keeping the same values, this is useful for:
8
- Doing tricks with bit masks, where BIT(0) is stage-1 and BIT(1) is
9
stage-2 and both is nested.
10
- Tracing, as stage is printed as int.
11
12
Reviewed-by: Eric Auger <eric.auger@redhat.com>
13
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
14
Signed-off-by: Mostafa Saleh <smostafa@google.com>
15
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
16
Message-id: 20240715084519.1189624-5-smostafa@google.com
17
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
18
---
19
include/hw/arm/smmu-common.h | 11 +++++++++--
20
hw/arm/smmu-common.c | 14 +++++++-------
21
hw/arm/smmuv3.c | 17 +++++++++--------
22
3 files changed, 25 insertions(+), 17 deletions(-)
23
24
diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
25
index XXXXXXX..XXXXXXX 100644
26
--- a/include/hw/arm/smmu-common.h
27
+++ b/include/hw/arm/smmu-common.h
28
@@ -XXX,XX +XXX,XX @@ typedef enum {
29
SMMU_PTW_ERR_PERMISSION, /* Permission fault */
30
} SMMUPTWEventType;
31
32
+/* SMMU Stage */
33
+typedef enum {
34
+ SMMU_STAGE_1 = 1,
35
+ SMMU_STAGE_2,
36
+ SMMU_NESTED,
37
+} SMMUStage;
38
+
39
typedef struct SMMUPTWEventInfo {
40
- int stage;
41
+ SMMUStage stage;
42
SMMUPTWEventType type;
43
dma_addr_t addr; /* fetched address that induced an abort, if any */
44
} SMMUPTWEventInfo;
45
@@ -XXX,XX +XXX,XX @@ typedef struct SMMUS2Cfg {
46
*/
47
typedef struct SMMUTransCfg {
48
/* Shared fields between stage-1 and stage-2. */
49
- int stage; /* translation stage */
50
+ SMMUStage stage; /* translation stage */
51
bool disabled; /* smmu is disabled */
52
bool bypassed; /* translation is bypassed */
53
bool aborted; /* translation is aborted */
54
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
55
index XXXXXXX..XXXXXXX 100644
56
--- a/hw/arm/smmu-common.c
57
+++ b/hw/arm/smmu-common.c
58
@@ -XXX,XX +XXX,XX @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
59
SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
60
{
61
dma_addr_t baseaddr, indexmask;
62
- int stage = cfg->stage;
63
+ SMMUStage stage = cfg->stage;
64
SMMUTransTableInfo *tt = select_tt(cfg, iova);
65
uint8_t level, granule_sz, inputsize, stride;
66
67
@@ -XXX,XX +XXX,XX @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
68
info->type = SMMU_PTW_ERR_TRANSLATION;
69
70
error:
71
- info->stage = 1;
72
+ info->stage = SMMU_STAGE_1;
73
tlbe->entry.perm = IOMMU_NONE;
74
return -EINVAL;
75
}
76
@@ -XXX,XX +XXX,XX @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
77
dma_addr_t ipa, IOMMUAccessFlags perm,
78
SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
79
{
80
- const int stage = 2;
81
+ const SMMUStage stage = SMMU_STAGE_2;
82
int granule_sz = cfg->s2cfg.granule_sz;
83
/* ARM DDI0487I.a: Table D8-7. */
84
int inputsize = 64 - cfg->s2cfg.tsz;
85
@@ -XXX,XX +XXX,XX @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
86
error_ipa:
87
info->addr = ipa;
88
error:
89
- info->stage = 2;
90
+ info->stage = SMMU_STAGE_2;
91
tlbe->entry.perm = IOMMU_NONE;
92
return -EINVAL;
93
}
94
@@ -XXX,XX +XXX,XX @@ error:
95
int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm,
96
SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
97
{
98
- if (cfg->stage == 1) {
99
+ if (cfg->stage == SMMU_STAGE_1) {
100
return smmu_ptw_64_s1(cfg, iova, perm, tlbe, info);
101
- } else if (cfg->stage == 2) {
102
+ } else if (cfg->stage == SMMU_STAGE_2) {
103
/*
104
* If bypassing stage 1(or unimplemented), the input address is passed
105
* directly to stage 2 as IPA. If the input address of a transaction
106
@@ -XXX,XX +XXX,XX @@ int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm,
107
*/
108
if (iova >= (1ULL << cfg->oas)) {
109
info->type = SMMU_PTW_ERR_ADDR_SIZE;
110
- info->stage = 1;
111
+ info->stage = SMMU_STAGE_1;
112
tlbe->entry.perm = IOMMU_NONE;
113
return -EINVAL;
114
}
115
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
116
index XXXXXXX..XXXXXXX 100644
117
--- a/hw/arm/smmuv3.c
118
+++ b/hw/arm/smmuv3.c
119
@@ -XXX,XX +XXX,XX @@
120
#include "smmuv3-internal.h"
121
#include "smmu-internal.h"
122
123
-#define PTW_RECORD_FAULT(cfg) (((cfg)->stage == 1) ? (cfg)->record_faults : \
124
+#define PTW_RECORD_FAULT(cfg) (((cfg)->stage == SMMU_STAGE_1) ? \
125
+ (cfg)->record_faults : \
126
(cfg)->s2cfg.record_faults)
127
128
/**
129
@@ -XXX,XX +XXX,XX @@ static bool s2_pgtable_config_valid(uint8_t sl0, uint8_t t0sz, uint8_t gran)
130
131
static int decode_ste_s2_cfg(SMMUTransCfg *cfg, STE *ste)
132
{
133
- cfg->stage = 2;
134
+ cfg->stage = SMMU_STAGE_2;
135
136
if (STE_S2AA64(ste) == 0x0) {
137
qemu_log_mask(LOG_UNIMP,
138
@@ -XXX,XX +XXX,XX @@ static int decode_cd(SMMUTransCfg *cfg, CD *cd, SMMUEventInfo *event)
139
140
/* we support only those at the moment */
141
cfg->aa64 = true;
142
- cfg->stage = 1;
143
+ cfg->stage = SMMU_STAGE_1;
144
145
cfg->oas = oas2bits(CD_IPS(cd));
146
cfg->oas = MIN(oas2bits(SMMU_IDR5_OAS), cfg->oas);
147
@@ -XXX,XX +XXX,XX @@ static int smmuv3_decode_config(IOMMUMemoryRegion *mr, SMMUTransCfg *cfg,
148
return ret;
149
}
150
151
- if (cfg->aborted || cfg->bypassed || (cfg->stage == 2)) {
152
+ if (cfg->aborted || cfg->bypassed || (cfg->stage == SMMU_STAGE_2)) {
153
return 0;
154
}
155
156
@@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr,
157
goto epilogue;
158
}
159
160
- if (cfg->stage == 1) {
161
+ if (cfg->stage == SMMU_STAGE_1) {
162
/* Select stage1 translation table. */
163
tt = select_tt(cfg, addr);
164
if (!tt) {
165
@@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr,
166
* nesting is not supported. So it is sufficient to check the
167
* translation stage to know the TLB stage for now.
168
*/
169
- event.u.f_walk_eabt.s2 = (cfg->stage == 2);
170
+ event.u.f_walk_eabt.s2 = (cfg->stage == SMMU_STAGE_2);
171
if (PTW_RECORD_FAULT(cfg)) {
172
event.type = SMMU_EVT_F_PERMISSION;
173
event.u.f_permission.addr = addr;
174
@@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr,
175
176
if (smmu_ptw(cfg, aligned_addr, flag, cached_entry, &ptw_info)) {
177
/* All faults from PTW has S2 field. */
178
- event.u.f_walk_eabt.s2 = (ptw_info.stage == 2);
179
+ event.u.f_walk_eabt.s2 = (ptw_info.stage == SMMU_STAGE_2);
180
g_free(cached_entry);
181
switch (ptw_info.type) {
182
case SMMU_PTW_ERR_WALK_EABT:
183
@@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr,
184
event.u.f_walk_eabt.addr = addr;
185
event.u.f_walk_eabt.rnw = flag & 0x1;
186
/* Stage-2 (only) is class IN while stage-1 is class TT */
187
- event.u.f_walk_eabt.class = (ptw_info.stage == 2) ?
188
+ event.u.f_walk_eabt.class = (ptw_info.stage == SMMU_STAGE_2) ?
189
SMMU_CLASS_IN : SMMU_CLASS_TT;
190
event.u.f_walk_eabt.addr2 = ptw_info.addr;
191
break;
192
--
193
2.34.1
194
195
diff view generated by jsdifflib
Deleted patch
1
From: Mostafa Saleh <smostafa@google.com>
2
1
3
smmuv3_translate() does everything from STE/CD parsing to TLB lookup
4
and PTW.
5
6
Soon, when nesting is supported, stage-1 data (tt, CD) needs to be
7
translated using stage-2.
8
9
Split smmuv3_translate() to 3 functions:
10
11
- smmu_translate(): in smmu-common.c, which does the TLB lookup, PTW,
12
TLB insertion, all the functions are already there, this just puts
13
them together.
14
This also simplifies the code as it consolidates event generation
15
in case of TLB lookup permission failure or in TT selection.
16
17
- smmuv3_do_translate(): in smmuv3.c, Calls smmu_translate() and does
18
the event population in case of errors.
19
20
- smmuv3_translate(), now calls smmuv3_do_translate() for
21
translation while the rest is the same.
22
23
Also, add stage in trace_smmuv3_translate_success()
24
25
Reviewed-by: Eric Auger <eric.auger@redhat.com>
26
Signed-off-by: Mostafa Saleh <smostafa@google.com>
27
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
28
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
29
Message-id: 20240715084519.1189624-6-smostafa@google.com
30
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
31
---
32
include/hw/arm/smmu-common.h | 8 ++
33
hw/arm/smmu-common.c | 59 +++++++++++
34
hw/arm/smmuv3.c | 194 +++++++++++++----------------------
35
hw/arm/trace-events | 2 +-
36
4 files changed, 142 insertions(+), 121 deletions(-)
37
38
diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
39
index XXXXXXX..XXXXXXX 100644
40
--- a/include/hw/arm/smmu-common.h
41
+++ b/include/hw/arm/smmu-common.h
42
@@ -XXX,XX +XXX,XX @@ static inline uint16_t smmu_get_sid(SMMUDevice *sdev)
43
int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm,
44
SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info);
45
46
+
47
+/*
48
+ * smmu_translate - Look for a translation in TLB, if not, do a PTW.
49
+ * Returns NULL on PTW error or incase of TLB permission errors.
50
+ */
51
+SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t addr,
52
+ IOMMUAccessFlags flag, SMMUPTWEventInfo *info);
53
+
54
/**
55
* select_tt - compute which translation table shall be used according to
56
* the input iova and translation config and return the TT specific info
57
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
58
index XXXXXXX..XXXXXXX 100644
59
--- a/hw/arm/smmu-common.c
60
+++ b/hw/arm/smmu-common.c
61
@@ -XXX,XX +XXX,XX @@ int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm,
62
g_assert_not_reached();
63
}
64
65
+SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t addr,
66
+ IOMMUAccessFlags flag, SMMUPTWEventInfo *info)
67
+{
68
+ uint64_t page_mask, aligned_addr;
69
+ SMMUTLBEntry *cached_entry = NULL;
70
+ SMMUTransTableInfo *tt;
71
+ int status;
72
+
73
+ /*
74
+ * Combined attributes used for TLB lookup, as only one stage is supported,
75
+ * it will hold attributes based on the enabled stage.
76
+ */
77
+ SMMUTransTableInfo tt_combined;
78
+
79
+ if (cfg->stage == SMMU_STAGE_1) {
80
+ /* Select stage1 translation table. */
81
+ tt = select_tt(cfg, addr);
82
+ if (!tt) {
83
+ info->type = SMMU_PTW_ERR_TRANSLATION;
84
+ info->stage = SMMU_STAGE_1;
85
+ return NULL;
86
+ }
87
+ tt_combined.granule_sz = tt->granule_sz;
88
+ tt_combined.tsz = tt->tsz;
89
+
90
+ } else {
91
+ /* Stage2. */
92
+ tt_combined.granule_sz = cfg->s2cfg.granule_sz;
93
+ tt_combined.tsz = cfg->s2cfg.tsz;
94
+ }
95
+
96
+ /*
97
+ * TLB lookup looks for granule and input size for a translation stage,
98
+ * as only one stage is supported right now, choose the right values
99
+ * from the configuration.
100
+ */
101
+ page_mask = (1ULL << tt_combined.granule_sz) - 1;
102
+ aligned_addr = addr & ~page_mask;
103
+
104
+ cached_entry = smmu_iotlb_lookup(bs, cfg, &tt_combined, aligned_addr);
105
+ if (cached_entry) {
106
+ if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & IOMMU_WO)) {
107
+ info->type = SMMU_PTW_ERR_PERMISSION;
108
+ info->stage = cfg->stage;
109
+ return NULL;
110
+ }
111
+ return cached_entry;
112
+ }
113
+
114
+ cached_entry = g_new0(SMMUTLBEntry, 1);
115
+ status = smmu_ptw(cfg, aligned_addr, flag, cached_entry, info);
116
+ if (status) {
117
+ g_free(cached_entry);
118
+ return NULL;
119
+ }
120
+ smmu_iotlb_insert(bs, cfg, cached_entry);
121
+ return cached_entry;
122
+}
123
+
124
/**
125
* The bus number is used for lookup when SID based invalidation occurs.
126
* In that case we lazily populate the SMMUPciBus array from the bus hash
127
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
128
index XXXXXXX..XXXXXXX 100644
129
--- a/hw/arm/smmuv3.c
130
+++ b/hw/arm/smmuv3.c
131
@@ -XXX,XX +XXX,XX @@ static void smmuv3_flush_config(SMMUDevice *sdev)
132
g_hash_table_remove(bc->configs, sdev);
133
}
134
135
+/* Do translation with TLB lookup. */
136
+static SMMUTranslationStatus smmuv3_do_translate(SMMUv3State *s, hwaddr addr,
137
+ SMMUTransCfg *cfg,
138
+ SMMUEventInfo *event,
139
+ IOMMUAccessFlags flag,
140
+ SMMUTLBEntry **out_entry)
141
+{
142
+ SMMUPTWEventInfo ptw_info = {};
143
+ SMMUState *bs = ARM_SMMU(s);
144
+ SMMUTLBEntry *cached_entry = NULL;
145
+
146
+ cached_entry = smmu_translate(bs, cfg, addr, flag, &ptw_info);
147
+ if (!cached_entry) {
148
+ /* All faults from PTW has S2 field. */
149
+ event->u.f_walk_eabt.s2 = (ptw_info.stage == SMMU_STAGE_2);
150
+ switch (ptw_info.type) {
151
+ case SMMU_PTW_ERR_WALK_EABT:
152
+ event->type = SMMU_EVT_F_WALK_EABT;
153
+ event->u.f_walk_eabt.addr = addr;
154
+ event->u.f_walk_eabt.rnw = flag & 0x1;
155
+ event->u.f_walk_eabt.class = (ptw_info.stage == SMMU_STAGE_2) ?
156
+ SMMU_CLASS_IN : SMMU_CLASS_TT;
157
+ event->u.f_walk_eabt.addr2 = ptw_info.addr;
158
+ break;
159
+ case SMMU_PTW_ERR_TRANSLATION:
160
+ if (PTW_RECORD_FAULT(cfg)) {
161
+ event->type = SMMU_EVT_F_TRANSLATION;
162
+ event->u.f_translation.addr = addr;
163
+ event->u.f_translation.addr2 = ptw_info.addr;
164
+ event->u.f_translation.class = SMMU_CLASS_IN;
165
+ event->u.f_translation.rnw = flag & 0x1;
166
+ }
167
+ break;
168
+ case SMMU_PTW_ERR_ADDR_SIZE:
169
+ if (PTW_RECORD_FAULT(cfg)) {
170
+ event->type = SMMU_EVT_F_ADDR_SIZE;
171
+ event->u.f_addr_size.addr = addr;
172
+ event->u.f_addr_size.addr2 = ptw_info.addr;
173
+ event->u.f_addr_size.class = SMMU_CLASS_IN;
174
+ event->u.f_addr_size.rnw = flag & 0x1;
175
+ }
176
+ break;
177
+ case SMMU_PTW_ERR_ACCESS:
178
+ if (PTW_RECORD_FAULT(cfg)) {
179
+ event->type = SMMU_EVT_F_ACCESS;
180
+ event->u.f_access.addr = addr;
181
+ event->u.f_access.addr2 = ptw_info.addr;
182
+ event->u.f_access.class = SMMU_CLASS_IN;
183
+ event->u.f_access.rnw = flag & 0x1;
184
+ }
185
+ break;
186
+ case SMMU_PTW_ERR_PERMISSION:
187
+ if (PTW_RECORD_FAULT(cfg)) {
188
+ event->type = SMMU_EVT_F_PERMISSION;
189
+ event->u.f_permission.addr = addr;
190
+ event->u.f_permission.addr2 = ptw_info.addr;
191
+ event->u.f_permission.class = SMMU_CLASS_IN;
192
+ event->u.f_permission.rnw = flag & 0x1;
193
+ }
194
+ break;
195
+ default:
196
+ g_assert_not_reached();
197
+ }
198
+ return SMMU_TRANS_ERROR;
199
+ }
200
+ *out_entry = cached_entry;
201
+ return SMMU_TRANS_SUCCESS;
202
+}
203
+
204
+/* Entry point to SMMU, does everything. */
205
static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr,
206
IOMMUAccessFlags flag, int iommu_idx)
207
{
208
@@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr,
209
SMMUEventInfo event = {.type = SMMU_EVT_NONE,
210
.sid = sid,
211
.inval_ste_allowed = false};
212
- SMMUPTWEventInfo ptw_info = {};
213
SMMUTranslationStatus status;
214
- SMMUState *bs = ARM_SMMU(s);
215
- uint64_t page_mask, aligned_addr;
216
- SMMUTLBEntry *cached_entry = NULL;
217
- SMMUTransTableInfo *tt;
218
SMMUTransCfg *cfg = NULL;
219
IOMMUTLBEntry entry = {
220
.target_as = &address_space_memory,
221
@@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr,
222
.addr_mask = ~(hwaddr)0,
223
.perm = IOMMU_NONE,
224
};
225
- /*
226
- * Combined attributes used for TLB lookup, as only one stage is supported,
227
- * it will hold attributes based on the enabled stage.
228
- */
229
- SMMUTransTableInfo tt_combined;
230
+ SMMUTLBEntry *cached_entry = NULL;
231
232
qemu_mutex_lock(&s->mutex);
233
234
@@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr,
235
goto epilogue;
236
}
237
238
- if (cfg->stage == SMMU_STAGE_1) {
239
- /* Select stage1 translation table. */
240
- tt = select_tt(cfg, addr);
241
- if (!tt) {
242
- if (cfg->record_faults) {
243
- event.type = SMMU_EVT_F_TRANSLATION;
244
- event.u.f_translation.addr = addr;
245
- event.u.f_translation.rnw = flag & 0x1;
246
- }
247
- status = SMMU_TRANS_ERROR;
248
- goto epilogue;
249
- }
250
- tt_combined.granule_sz = tt->granule_sz;
251
- tt_combined.tsz = tt->tsz;
252
-
253
- } else {
254
- /* Stage2. */
255
- tt_combined.granule_sz = cfg->s2cfg.granule_sz;
256
- tt_combined.tsz = cfg->s2cfg.tsz;
257
- }
258
- /*
259
- * TLB lookup looks for granule and input size for a translation stage,
260
- * as only one stage is supported right now, choose the right values
261
- * from the configuration.
262
- */
263
- page_mask = (1ULL << tt_combined.granule_sz) - 1;
264
- aligned_addr = addr & ~page_mask;
265
-
266
- cached_entry = smmu_iotlb_lookup(bs, cfg, &tt_combined, aligned_addr);
267
- if (cached_entry) {
268
- if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & IOMMU_WO)) {
269
- status = SMMU_TRANS_ERROR;
270
- /*
271
- * We know that the TLB only contains either stage-1 or stage-2 as
272
- * nesting is not supported. So it is sufficient to check the
273
- * translation stage to know the TLB stage for now.
274
- */
275
- event.u.f_walk_eabt.s2 = (cfg->stage == SMMU_STAGE_2);
276
- if (PTW_RECORD_FAULT(cfg)) {
277
- event.type = SMMU_EVT_F_PERMISSION;
278
- event.u.f_permission.addr = addr;
279
- event.u.f_permission.rnw = flag & 0x1;
280
- }
281
- } else {
282
- status = SMMU_TRANS_SUCCESS;
283
- }
284
- goto epilogue;
285
- }
286
-
287
- cached_entry = g_new0(SMMUTLBEntry, 1);
288
-
289
- if (smmu_ptw(cfg, aligned_addr, flag, cached_entry, &ptw_info)) {
290
- /* All faults from PTW has S2 field. */
291
- event.u.f_walk_eabt.s2 = (ptw_info.stage == SMMU_STAGE_2);
292
- g_free(cached_entry);
293
- switch (ptw_info.type) {
294
- case SMMU_PTW_ERR_WALK_EABT:
295
- event.type = SMMU_EVT_F_WALK_EABT;
296
- event.u.f_walk_eabt.addr = addr;
297
- event.u.f_walk_eabt.rnw = flag & 0x1;
298
- /* Stage-2 (only) is class IN while stage-1 is class TT */
299
- event.u.f_walk_eabt.class = (ptw_info.stage == SMMU_STAGE_2) ?
300
- SMMU_CLASS_IN : SMMU_CLASS_TT;
301
- event.u.f_walk_eabt.addr2 = ptw_info.addr;
302
- break;
303
- case SMMU_PTW_ERR_TRANSLATION:
304
- if (PTW_RECORD_FAULT(cfg)) {
305
- event.type = SMMU_EVT_F_TRANSLATION;
306
- event.u.f_translation.addr = addr;
307
- event.u.f_translation.addr2 = ptw_info.addr;
308
- event.u.f_translation.class = SMMU_CLASS_IN;
309
- event.u.f_translation.rnw = flag & 0x1;
310
- }
311
- break;
312
- case SMMU_PTW_ERR_ADDR_SIZE:
313
- if (PTW_RECORD_FAULT(cfg)) {
314
- event.type = SMMU_EVT_F_ADDR_SIZE;
315
- event.u.f_addr_size.addr = addr;
316
- event.u.f_addr_size.addr2 = ptw_info.addr;
317
- event.u.f_translation.class = SMMU_CLASS_IN;
318
- event.u.f_addr_size.rnw = flag & 0x1;
319
- }
320
- break;
321
- case SMMU_PTW_ERR_ACCESS:
322
- if (PTW_RECORD_FAULT(cfg)) {
323
- event.type = SMMU_EVT_F_ACCESS;
324
- event.u.f_access.addr = addr;
325
- event.u.f_access.addr2 = ptw_info.addr;
326
- event.u.f_translation.class = SMMU_CLASS_IN;
327
- event.u.f_access.rnw = flag & 0x1;
328
- }
329
- break;
330
- case SMMU_PTW_ERR_PERMISSION:
331
- if (PTW_RECORD_FAULT(cfg)) {
332
- event.type = SMMU_EVT_F_PERMISSION;
333
- event.u.f_permission.addr = addr;
334
- event.u.f_permission.addr2 = ptw_info.addr;
335
- event.u.f_translation.class = SMMU_CLASS_IN;
336
- event.u.f_permission.rnw = flag & 0x1;
337
- }
338
- break;
339
- default:
340
- g_assert_not_reached();
341
- }
342
- status = SMMU_TRANS_ERROR;
343
- } else {
344
- smmu_iotlb_insert(bs, cfg, cached_entry);
345
- status = SMMU_TRANS_SUCCESS;
346
- }
347
+ status = smmuv3_do_translate(s, addr, cfg, &event, flag, &cached_entry);
348
349
epilogue:
350
qemu_mutex_unlock(&s->mutex);
351
@@ -XXX,XX +XXX,XX @@ epilogue:
352
(addr & cached_entry->entry.addr_mask);
353
entry.addr_mask = cached_entry->entry.addr_mask;
354
trace_smmuv3_translate_success(mr->parent_obj.name, sid, addr,
355
- entry.translated_addr, entry.perm);
356
+ entry.translated_addr, entry.perm,
357
+ cfg->stage);
358
break;
359
case SMMU_TRANS_DISABLE:
360
entry.perm = flag;
361
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
362
index XXXXXXX..XXXXXXX 100644
363
--- a/hw/arm/trace-events
364
+++ b/hw/arm/trace-events
365
@@ -XXX,XX +XXX,XX @@ smmuv3_get_ste(uint64_t addr) "STE addr: 0x%"PRIx64
366
smmuv3_translate_disable(const char *n, uint16_t sid, uint64_t addr, bool is_write) "%s sid=0x%x bypass (smmu disabled) iova:0x%"PRIx64" is_write=%d"
367
smmuv3_translate_bypass(const char *n, uint16_t sid, uint64_t addr, bool is_write) "%s sid=0x%x STE bypass iova:0x%"PRIx64" is_write=%d"
368
smmuv3_translate_abort(const char *n, uint16_t sid, uint64_t addr, bool is_write) "%s sid=0x%x abort on iova:0x%"PRIx64" is_write=%d"
369
-smmuv3_translate_success(const char *n, uint16_t sid, uint64_t iova, uint64_t translated, int perm) "%s sid=0x%x iova=0x%"PRIx64" translated=0x%"PRIx64" perm=0x%x"
370
+smmuv3_translate_success(const char *n, uint16_t sid, uint64_t iova, uint64_t translated, int perm, int stage) "%s sid=0x%x iova=0x%"PRIx64" translated=0x%"PRIx64" perm=0x%x stage=%d"
371
smmuv3_get_cd(uint64_t addr) "CD addr: 0x%"PRIx64
372
smmuv3_decode_cd(uint32_t oas) "oas=%d"
373
smmuv3_decode_cd_tt(int i, uint32_t tsz, uint64_t ttb, uint32_t granule_sz, bool had) "TT[%d]:tsz:%d ttb:0x%"PRIx64" granule_sz:%d had:%d"
374
--
375
2.34.1
376
377
diff view generated by jsdifflib
Deleted patch
1
From: Mostafa Saleh <smostafa@google.com>
2
1
3
ASID and VMID used to be uint16_t in the translation config, however,
4
in other contexts they can be int as -1 in case of TLB invalidation,
5
to represent all (don’t care).
6
When stage-2 was added asid was set to -1 in stage-2 and vmid to -1
7
in stage-1 configs. However, that meant they were set as (65536),
8
this was not an issue as nesting was not supported and no
9
commands/lookup uses both.
10
11
With nesting, it’s critical to get this right as translation must be
12
tagged correctly with ASID/VMID, and with ASID=-1 meaning stage-2.
13
Represent ASID/VMID everywhere as int.
14
15
Reviewed-by: Eric Auger <eric.auger@redhat.com>
16
Signed-off-by: Mostafa Saleh <smostafa@google.com>
17
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
18
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
19
Message-id: 20240715084519.1189624-7-smostafa@google.com
20
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
21
---
22
include/hw/arm/smmu-common.h | 14 +++++++-------
23
hw/arm/smmu-common.c | 10 +++++-----
24
hw/arm/smmuv3.c | 4 ++--
25
hw/arm/trace-events | 18 +++++++++---------
26
4 files changed, 23 insertions(+), 23 deletions(-)
27
28
diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
29
index XXXXXXX..XXXXXXX 100644
30
--- a/include/hw/arm/smmu-common.h
31
+++ b/include/hw/arm/smmu-common.h
32
@@ -XXX,XX +XXX,XX @@ typedef struct SMMUS2Cfg {
33
bool record_faults; /* Record fault events (S2R) */
34
uint8_t granule_sz; /* Granule page shift (based on S2TG) */
35
uint8_t eff_ps; /* Effective PA output range (based on S2PS) */
36
- uint16_t vmid; /* Virtual Machine ID (S2VMID) */
37
+ int vmid; /* Virtual Machine ID (S2VMID) */
38
uint64_t vttb; /* Address of translation table base (S2TTB) */
39
} SMMUS2Cfg;
40
41
@@ -XXX,XX +XXX,XX @@ typedef struct SMMUTransCfg {
42
uint64_t ttb; /* TT base address */
43
uint8_t oas; /* output address width */
44
uint8_t tbi; /* Top Byte Ignore */
45
- uint16_t asid;
46
+ int asid;
47
SMMUTransTableInfo tt[2];
48
/* Used by stage-2 only. */
49
struct SMMUS2Cfg s2cfg;
50
@@ -XXX,XX +XXX,XX @@ typedef struct SMMUPciBus {
51
52
typedef struct SMMUIOTLBKey {
53
uint64_t iova;
54
- uint16_t asid;
55
- uint16_t vmid;
56
+ int asid;
57
+ int vmid;
58
uint8_t tg;
59
uint8_t level;
60
} SMMUIOTLBKey;
61
@@ -XXX,XX +XXX,XX @@ SMMUDevice *smmu_find_sdev(SMMUState *s, uint32_t sid);
62
SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg,
63
SMMUTransTableInfo *tt, hwaddr iova);
64
void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, SMMUTLBEntry *entry);
65
-SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint16_t vmid, uint64_t iova,
66
+SMMUIOTLBKey smmu_get_iotlb_key(int asid, int vmid, uint64_t iova,
67
uint8_t tg, uint8_t level);
68
void smmu_iotlb_inv_all(SMMUState *s);
69
-void smmu_iotlb_inv_asid(SMMUState *s, uint16_t asid);
70
-void smmu_iotlb_inv_vmid(SMMUState *s, uint16_t vmid);
71
+void smmu_iotlb_inv_asid(SMMUState *s, int asid);
72
+void smmu_iotlb_inv_vmid(SMMUState *s, int vmid);
73
void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, dma_addr_t iova,
74
uint8_t tg, uint64_t num_pages, uint8_t ttl);
75
76
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
77
index XXXXXXX..XXXXXXX 100644
78
--- a/hw/arm/smmu-common.c
79
+++ b/hw/arm/smmu-common.c
80
@@ -XXX,XX +XXX,XX @@ static gboolean smmu_iotlb_key_equal(gconstpointer v1, gconstpointer v2)
81
(k1->vmid == k2->vmid);
82
}
83
84
-SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint16_t vmid, uint64_t iova,
85
+SMMUIOTLBKey smmu_get_iotlb_key(int asid, int vmid, uint64_t iova,
86
uint8_t tg, uint8_t level)
87
{
88
SMMUIOTLBKey key = {.asid = asid, .vmid = vmid, .iova = iova,
89
@@ -XXX,XX +XXX,XX @@ void smmu_iotlb_inv_all(SMMUState *s)
90
static gboolean smmu_hash_remove_by_asid(gpointer key, gpointer value,
91
gpointer user_data)
92
{
93
- uint16_t asid = *(uint16_t *)user_data;
94
+ int asid = *(int *)user_data;
95
SMMUIOTLBKey *iotlb_key = (SMMUIOTLBKey *)key;
96
97
return SMMU_IOTLB_ASID(*iotlb_key) == asid;
98
@@ -XXX,XX +XXX,XX @@ static gboolean smmu_hash_remove_by_asid(gpointer key, gpointer value,
99
static gboolean smmu_hash_remove_by_vmid(gpointer key, gpointer value,
100
gpointer user_data)
101
{
102
- uint16_t vmid = *(uint16_t *)user_data;
103
+ int vmid = *(int *)user_data;
104
SMMUIOTLBKey *iotlb_key = (SMMUIOTLBKey *)key;
105
106
return SMMU_IOTLB_VMID(*iotlb_key) == vmid;
107
@@ -XXX,XX +XXX,XX @@ void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, dma_addr_t iova,
108
&info);
109
}
110
111
-void smmu_iotlb_inv_asid(SMMUState *s, uint16_t asid)
112
+void smmu_iotlb_inv_asid(SMMUState *s, int asid)
113
{
114
trace_smmu_iotlb_inv_asid(asid);
115
g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_asid, &asid);
116
}
117
118
-void smmu_iotlb_inv_vmid(SMMUState *s, uint16_t vmid)
119
+void smmu_iotlb_inv_vmid(SMMUState *s, int vmid)
120
{
121
trace_smmu_iotlb_inv_vmid(vmid);
122
g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_vmid, &vmid);
123
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
124
index XXXXXXX..XXXXXXX 100644
125
--- a/hw/arm/smmuv3.c
126
+++ b/hw/arm/smmuv3.c
127
@@ -XXX,XX +XXX,XX @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
128
}
129
case SMMU_CMD_TLBI_NH_ASID:
130
{
131
- uint16_t asid = CMD_ASID(&cmd);
132
+ int asid = CMD_ASID(&cmd);
133
134
if (!STAGE1_SUPPORTED(s)) {
135
cmd_error = SMMU_CERROR_ILL;
136
@@ -XXX,XX +XXX,XX @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
137
break;
138
case SMMU_CMD_TLBI_S12_VMALL:
139
{
140
- uint16_t vmid = CMD_VMID(&cmd);
141
+ int vmid = CMD_VMID(&cmd);
142
143
if (!STAGE2_SUPPORTED(s)) {
144
cmd_error = SMMU_CERROR_ILL;
145
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
146
index XXXXXXX..XXXXXXX 100644
147
--- a/hw/arm/trace-events
148
+++ b/hw/arm/trace-events
149
@@ -XXX,XX +XXX,XX @@ smmu_ptw_page_pte(int stage, int level, uint64_t iova, uint64_t baseaddr, uint6
150
smmu_ptw_block_pte(int stage, int level, uint64_t baseaddr, uint64_t pteaddr, uint64_t pte, uint64_t iova, uint64_t gpa, int bsize_mb) "stage=%d level=%d base@=0x%"PRIx64" pte@=0x%"PRIx64" pte=0x%"PRIx64" iova=0x%"PRIx64" block address = 0x%"PRIx64" block size = %d MiB"
151
smmu_get_pte(uint64_t baseaddr, int index, uint64_t pteaddr, uint64_t pte) "baseaddr=0x%"PRIx64" index=0x%x, pteaddr=0x%"PRIx64", pte=0x%"PRIx64
152
smmu_iotlb_inv_all(void) "IOTLB invalidate all"
153
-smmu_iotlb_inv_asid(uint16_t asid) "IOTLB invalidate asid=%d"
154
-smmu_iotlb_inv_vmid(uint16_t vmid) "IOTLB invalidate vmid=%d"
155
-smmu_iotlb_inv_iova(uint16_t asid, uint64_t addr) "IOTLB invalidate asid=%d addr=0x%"PRIx64
156
+smmu_iotlb_inv_asid(int asid) "IOTLB invalidate asid=%d"
157
+smmu_iotlb_inv_vmid(int vmid) "IOTLB invalidate vmid=%d"
158
+smmu_iotlb_inv_iova(int asid, uint64_t addr) "IOTLB invalidate asid=%d addr=0x%"PRIx64
159
smmu_inv_notifiers_mr(const char *name) "iommu mr=%s"
160
-smmu_iotlb_lookup_hit(uint16_t asid, uint16_t vmid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache HIT asid=%d vmid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d"
161
-smmu_iotlb_lookup_miss(uint16_t asid, uint16_t vmid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache MISS asid=%d vmid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d"
162
-smmu_iotlb_insert(uint16_t asid, uint16_t vmid, uint64_t addr, uint8_t tg, uint8_t level) "IOTLB ++ asid=%d vmid=%d addr=0x%"PRIx64" tg=%d level=%d"
163
+smmu_iotlb_lookup_hit(int asid, int vmid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache HIT asid=%d vmid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d"
164
+smmu_iotlb_lookup_miss(int asid, int vmid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache MISS asid=%d vmid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d"
165
+smmu_iotlb_insert(int asid, int vmid, uint64_t addr, uint8_t tg, uint8_t level) "IOTLB ++ asid=%d vmid=%d addr=0x%"PRIx64" tg=%d level=%d"
166
167
# smmuv3.c
168
smmuv3_read_mmio(uint64_t addr, uint64_t val, unsigned size, uint32_t r) "addr: 0x%"PRIx64" val:0x%"PRIx64" size: 0x%x(%d)"
169
@@ -XXX,XX +XXX,XX @@ smmuv3_config_cache_hit(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t p
170
smmuv3_config_cache_miss(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t perc) "Config cache MISS for sid=0x%x (hits=%d, misses=%d, hit rate=%d)"
171
smmuv3_range_inval(int vmid, int asid, uint64_t addr, uint8_t tg, uint64_t num_pages, uint8_t ttl, bool leaf) "vmid=%d asid=%d addr=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64" ttl=%d leaf=%d"
172
smmuv3_cmdq_tlbi_nh(void) ""
173
-smmuv3_cmdq_tlbi_nh_asid(uint16_t asid) "asid=%d"
174
-smmuv3_cmdq_tlbi_s12_vmid(uint16_t vmid) "vmid=%d"
175
+smmuv3_cmdq_tlbi_nh_asid(int asid) "asid=%d"
176
+smmuv3_cmdq_tlbi_s12_vmid(int vmid) "vmid=%d"
177
smmuv3_config_cache_inv(uint32_t sid) "Config cache INV for sid=0x%x"
178
smmuv3_notify_flag_add(const char *iommu) "ADD SMMUNotifier node for iommu mr=%s"
179
smmuv3_notify_flag_del(const char *iommu) "DEL SMMUNotifier node for iommu mr=%s"
180
-smmuv3_inv_notifiers_iova(const char *name, uint16_t asid, uint16_t vmid, uint64_t iova, uint8_t tg, uint64_t num_pages) "iommu mr=%s asid=%d vmid=%d iova=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64
181
+smmuv3_inv_notifiers_iova(const char *name, int asid, int vmid, uint64_t iova, uint8_t tg, uint64_t num_pages) "iommu mr=%s asid=%d vmid=%d iova=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64
182
183
# strongarm.c
184
strongarm_uart_update_parameters(const char *label, int speed, char parity, int data_bits, int stop_bits) "%s speed=%d parity=%c data=%d stop=%d"
185
--
186
2.34.1
187
188
diff view generated by jsdifflib
Deleted patch
1
From: Mostafa Saleh <smostafa@google.com>
2
1
3
Soon, smmuv3_do_translate() will be used to translate the CD and the
4
TTBx, instead of re-writting the same logic to convert the returned
5
cached entry to an address, add a new macro CACHED_ENTRY_TO_ADDR.
6
7
Reviewed-by: Eric Auger <eric.auger@redhat.com>
8
Signed-off-by: Mostafa Saleh <smostafa@google.com>
9
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
10
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
11
Message-id: 20240715084519.1189624-8-smostafa@google.com
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
---
14
include/hw/arm/smmu-common.h | 3 +++
15
hw/arm/smmuv3.c | 3 +--
16
2 files changed, 4 insertions(+), 2 deletions(-)
17
18
diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/include/hw/arm/smmu-common.h
21
+++ b/include/hw/arm/smmu-common.h
22
@@ -XXX,XX +XXX,XX @@
23
#define VMSA_IDXMSK(isz, strd, lvl) ((1ULL << \
24
VMSA_BIT_LVL(isz, strd, lvl)) - 1)
25
26
+#define CACHED_ENTRY_TO_ADDR(ent, addr) ((ent)->entry.translated_addr + \
27
+ ((addr) & (ent)->entry.addr_mask))
28
+
29
/*
30
* Page table walk error types
31
*/
32
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/hw/arm/smmuv3.c
35
+++ b/hw/arm/smmuv3.c
36
@@ -XXX,XX +XXX,XX @@ epilogue:
37
switch (status) {
38
case SMMU_TRANS_SUCCESS:
39
entry.perm = cached_entry->entry.perm;
40
- entry.translated_addr = cached_entry->entry.translated_addr +
41
- (addr & cached_entry->entry.addr_mask);
42
+ entry.translated_addr = CACHED_ENTRY_TO_ADDR(cached_entry, addr);
43
entry.addr_mask = cached_entry->entry.addr_mask;
44
trace_smmuv3_translate_success(mr->parent_obj.name, sid, addr,
45
entry.translated_addr, entry.perm,
46
--
47
2.34.1
48
49
diff view generated by jsdifflib
Deleted patch
1
From: Mostafa Saleh <smostafa@google.com>
2
1
3
According to ARM SMMU architecture specification (ARM IHI 0070 F.b),
4
In "5.2 Stream Table Entry":
5
[51:6] S1ContextPtr
6
If Config[1] == 1 (stage 2 enabled), this pointer is an IPA translated by
7
stage 2 and the programmed value must be within the range of the IAS.
8
9
In "5.4.1 CD notes":
10
The translation table walks performed from TTB0 or TTB1 are always performed
11
in IPA space if stage 2 translations are enabled.
12
13
This patch implements translation of the S1 context descriptor pointer and
14
TTBx base addresses through the S2 stage (IPA -> PA)
15
16
smmuv3_do_translate() is updated to have one arg which is translation
17
class, this is useful to:
18
- Decide wether a translation is stage-2 only or use the STE config.
19
- Populate the class in case of faults, WALK_EABT is left unchanged
20
for stage-1 as it is always IN, while stage-2 would match the
21
used class (TT, IN, CD), this will change slightly when the ptw
22
supports nested translation as it can also issue TT event with
23
class IN.
24
25
In case for stage-2 only translation, used in the context of nested
26
translation, the stage and asid are saved and restored before and
27
after calling smmu_translate().
28
29
Translating CD or TTBx can fail for the following reasons:
30
1) Large address size: This is described in
31
(3.4.3 Address sizes of SMMU-originated accesses)
32
- For CD ptr larger than IAS, for SMMUv3.1, it can trigger either
33
C_BAD_STE or Translation fault, we implement the latter as it
34
requires no extra code.
35
- For TTBx, if larger than the effective stage 1 output address size, it
36
triggers C_BAD_CD.
37
38
2) Faults from PTWs (7.3 Event records)
39
- F_ADDR_SIZE: large address size after first level causes stage 2 Address
40
Size fault (Also in 3.4.3 Address sizes of SMMU-originated accesses)
41
- F_PERMISSION: Same as an address translation. However, when
42
CLASS == CD, the access is implicitly Data and a read.
43
- F_ACCESS: Same as an address translation.
44
- F_TRANSLATION: Same as an address translation.
45
- F_WALK_EABT: Same as an address translation.
46
These are already implemented in the PTW logic, so no extra handling
47
required.
48
49
As in CD and TTBx translation context, the iova is not known, setting
50
the InputAddr was removed from "smmuv3_do_translate" and set after
51
from "smmuv3_translate" with the new function "smmuv3_fixup_event"
52
53
Signed-off-by: Mostafa Saleh <smostafa@google.com>
54
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
55
Reviewed-by: Eric Auger <eric.auger@redhat.com>
56
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
57
Message-id: 20240715084519.1189624-9-smostafa@google.com
58
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
59
---
60
hw/arm/smmuv3.c | 120 +++++++++++++++++++++++++++++++++++++++++-------
61
1 file changed, 103 insertions(+), 17 deletions(-)
62
63
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
64
index XXXXXXX..XXXXXXX 100644
65
--- a/hw/arm/smmuv3.c
66
+++ b/hw/arm/smmuv3.c
67
@@ -XXX,XX +XXX,XX @@ static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf,
68
69
}
70
71
+static SMMUTranslationStatus smmuv3_do_translate(SMMUv3State *s, hwaddr addr,
72
+ SMMUTransCfg *cfg,
73
+ SMMUEventInfo *event,
74
+ IOMMUAccessFlags flag,
75
+ SMMUTLBEntry **out_entry,
76
+ SMMUTranslationClass class);
77
/* @ssid > 0 not supported yet */
78
-static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid,
79
- CD *buf, SMMUEventInfo *event)
80
+static int smmu_get_cd(SMMUv3State *s, STE *ste, SMMUTransCfg *cfg,
81
+ uint32_t ssid, CD *buf, SMMUEventInfo *event)
82
{
83
dma_addr_t addr = STE_CTXPTR(ste);
84
int ret, i;
85
+ SMMUTranslationStatus status;
86
+ SMMUTLBEntry *entry;
87
88
trace_smmuv3_get_cd(addr);
89
+
90
+ if (cfg->stage == SMMU_NESTED) {
91
+ status = smmuv3_do_translate(s, addr, cfg, event,
92
+ IOMMU_RO, &entry, SMMU_CLASS_CD);
93
+
94
+ /* Same PTW faults are reported but with CLASS = CD. */
95
+ if (status != SMMU_TRANS_SUCCESS) {
96
+ return -EINVAL;
97
+ }
98
+
99
+ addr = CACHED_ENTRY_TO_ADDR(entry, addr);
100
+ }
101
+
102
/* TODO: guarantee 64-bit single-copy atomicity */
103
ret = dma_memory_read(&address_space_memory, addr, buf, sizeof(*buf),
104
MEMTXATTRS_UNSPECIFIED);
105
@@ -XXX,XX +XXX,XX @@ static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste,
106
return 0;
107
}
108
109
-static int decode_cd(SMMUTransCfg *cfg, CD *cd, SMMUEventInfo *event)
110
+static int decode_cd(SMMUv3State *s, SMMUTransCfg *cfg,
111
+ CD *cd, SMMUEventInfo *event)
112
{
113
int ret = -EINVAL;
114
int i;
115
+ SMMUTranslationStatus status;
116
+ SMMUTLBEntry *entry;
117
118
if (!CD_VALID(cd) || !CD_AARCH64(cd)) {
119
goto bad_cd;
120
@@ -XXX,XX +XXX,XX @@ static int decode_cd(SMMUTransCfg *cfg, CD *cd, SMMUEventInfo *event)
121
122
tt->tsz = tsz;
123
tt->ttb = CD_TTB(cd, i);
124
+
125
if (tt->ttb & ~(MAKE_64BIT_MASK(0, cfg->oas))) {
126
goto bad_cd;
127
}
128
+
129
+ /* Translate the TTBx, from IPA to PA if nesting is enabled. */
130
+ if (cfg->stage == SMMU_NESTED) {
131
+ status = smmuv3_do_translate(s, tt->ttb, cfg, event, IOMMU_RO,
132
+ &entry, SMMU_CLASS_TT);
133
+ /*
134
+ * Same PTW faults are reported but with CLASS = TT.
135
+ * If TTBx is larger than the effective stage 1 output addres
136
+ * size, it reports C_BAD_CD, which is handled by the above case.
137
+ */
138
+ if (status != SMMU_TRANS_SUCCESS) {
139
+ return -EINVAL;
140
+ }
141
+ tt->ttb = CACHED_ENTRY_TO_ADDR(entry, tt->ttb);
142
+ }
143
+
144
tt->had = CD_HAD(cd, i);
145
trace_smmuv3_decode_cd_tt(i, tt->tsz, tt->ttb, tt->granule_sz, tt->had);
146
}
147
@@ -XXX,XX +XXX,XX @@ static int smmuv3_decode_config(IOMMUMemoryRegion *mr, SMMUTransCfg *cfg,
148
return 0;
149
}
150
151
- ret = smmu_get_cd(s, &ste, 0 /* ssid */, &cd, event);
152
+ ret = smmu_get_cd(s, &ste, cfg, 0 /* ssid */, &cd, event);
153
if (ret) {
154
return ret;
155
}
156
157
- return decode_cd(cfg, &cd, event);
158
+ return decode_cd(s, cfg, &cd, event);
159
}
160
161
/**
162
@@ -XXX,XX +XXX,XX @@ static SMMUTranslationStatus smmuv3_do_translate(SMMUv3State *s, hwaddr addr,
163
SMMUTransCfg *cfg,
164
SMMUEventInfo *event,
165
IOMMUAccessFlags flag,
166
- SMMUTLBEntry **out_entry)
167
+ SMMUTLBEntry **out_entry,
168
+ SMMUTranslationClass class)
169
{
170
SMMUPTWEventInfo ptw_info = {};
171
SMMUState *bs = ARM_SMMU(s);
172
SMMUTLBEntry *cached_entry = NULL;
173
+ int asid, stage;
174
+ bool desc_s2_translation = class != SMMU_CLASS_IN;
175
+
176
+ /*
177
+ * The function uses the argument class to identify which stage is used:
178
+ * - CLASS = IN: Means an input translation, determine the stage from STE.
179
+ * - CLASS = CD: Means the addr is an IPA of the CD, and it would be
180
+ * translated using the stage-2.
181
+ * - CLASS = TT: Means the addr is an IPA of the stage-1 translation table
182
+ * and it would be translated using the stage-2.
183
+ * For the last 2 cases instead of having intrusive changes in the common
184
+ * logic, we modify the cfg to be a stage-2 translation only in case of
185
+ * nested, and then restore it after.
186
+ */
187
+ if (desc_s2_translation) {
188
+ asid = cfg->asid;
189
+ stage = cfg->stage;
190
+ cfg->asid = -1;
191
+ cfg->stage = SMMU_STAGE_2;
192
+ }
193
194
cached_entry = smmu_translate(bs, cfg, addr, flag, &ptw_info);
195
+
196
+ if (desc_s2_translation) {
197
+ cfg->asid = asid;
198
+ cfg->stage = stage;
199
+ }
200
+
201
if (!cached_entry) {
202
/* All faults from PTW has S2 field. */
203
event->u.f_walk_eabt.s2 = (ptw_info.stage == SMMU_STAGE_2);
204
switch (ptw_info.type) {
205
case SMMU_PTW_ERR_WALK_EABT:
206
event->type = SMMU_EVT_F_WALK_EABT;
207
- event->u.f_walk_eabt.addr = addr;
208
event->u.f_walk_eabt.rnw = flag & 0x1;
209
event->u.f_walk_eabt.class = (ptw_info.stage == SMMU_STAGE_2) ?
210
- SMMU_CLASS_IN : SMMU_CLASS_TT;
211
+ class : SMMU_CLASS_TT;
212
event->u.f_walk_eabt.addr2 = ptw_info.addr;
213
break;
214
case SMMU_PTW_ERR_TRANSLATION:
215
if (PTW_RECORD_FAULT(cfg)) {
216
event->type = SMMU_EVT_F_TRANSLATION;
217
- event->u.f_translation.addr = addr;
218
event->u.f_translation.addr2 = ptw_info.addr;
219
- event->u.f_translation.class = SMMU_CLASS_IN;
220
+ event->u.f_translation.class = class;
221
event->u.f_translation.rnw = flag & 0x1;
222
}
223
break;
224
case SMMU_PTW_ERR_ADDR_SIZE:
225
if (PTW_RECORD_FAULT(cfg)) {
226
event->type = SMMU_EVT_F_ADDR_SIZE;
227
- event->u.f_addr_size.addr = addr;
228
event->u.f_addr_size.addr2 = ptw_info.addr;
229
- event->u.f_addr_size.class = SMMU_CLASS_IN;
230
+ event->u.f_addr_size.class = class;
231
event->u.f_addr_size.rnw = flag & 0x1;
232
}
233
break;
234
case SMMU_PTW_ERR_ACCESS:
235
if (PTW_RECORD_FAULT(cfg)) {
236
event->type = SMMU_EVT_F_ACCESS;
237
- event->u.f_access.addr = addr;
238
event->u.f_access.addr2 = ptw_info.addr;
239
- event->u.f_access.class = SMMU_CLASS_IN;
240
+ event->u.f_access.class = class;
241
event->u.f_access.rnw = flag & 0x1;
242
}
243
break;
244
case SMMU_PTW_ERR_PERMISSION:
245
if (PTW_RECORD_FAULT(cfg)) {
246
event->type = SMMU_EVT_F_PERMISSION;
247
- event->u.f_permission.addr = addr;
248
event->u.f_permission.addr2 = ptw_info.addr;
249
- event->u.f_permission.class = SMMU_CLASS_IN;
250
+ event->u.f_permission.class = class;
251
event->u.f_permission.rnw = flag & 0x1;
252
}
253
break;
254
@@ -XXX,XX +XXX,XX @@ static SMMUTranslationStatus smmuv3_do_translate(SMMUv3State *s, hwaddr addr,
255
return SMMU_TRANS_SUCCESS;
256
}
257
258
+/*
259
+ * Sets the InputAddr for an SMMU_TRANS_ERROR, as it can't be
260
+ * set from all contexts, as smmuv3_get_config() can return
261
+ * translation faults in case of nested translation (for CD
262
+ * and TTBx). But in that case the iova is not known.
263
+ */
264
+static void smmuv3_fixup_event(SMMUEventInfo *event, hwaddr iova)
265
+{
266
+ switch (event->type) {
267
+ case SMMU_EVT_F_WALK_EABT:
268
+ case SMMU_EVT_F_TRANSLATION:
269
+ case SMMU_EVT_F_ADDR_SIZE:
270
+ case SMMU_EVT_F_ACCESS:
271
+ case SMMU_EVT_F_PERMISSION:
272
+ event->u.f_walk_eabt.addr = iova;
273
+ break;
274
+ default:
275
+ break;
276
+ }
277
+}
278
+
279
/* Entry point to SMMU, does everything. */
280
static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr,
281
IOMMUAccessFlags flag, int iommu_idx)
282
@@ -XXX,XX +XXX,XX @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr,
283
goto epilogue;
284
}
285
286
- status = smmuv3_do_translate(s, addr, cfg, &event, flag, &cached_entry);
287
+ status = smmuv3_do_translate(s, addr, cfg, &event, flag,
288
+ &cached_entry, SMMU_CLASS_IN);
289
290
epilogue:
291
qemu_mutex_unlock(&s->mutex);
292
@@ -XXX,XX +XXX,XX @@ epilogue:
293
entry.perm);
294
break;
295
case SMMU_TRANS_ERROR:
296
+ smmuv3_fixup_event(&event, addr);
297
qemu_log_mask(LOG_GUEST_ERROR,
298
"%s translation failed for iova=0x%"PRIx64" (%s)\n",
299
mr->parent_obj.name, addr, smmu_event_string(event.type));
300
--
301
2.34.1
302
303
diff view generated by jsdifflib
Deleted patch
1
From: Mostafa Saleh <smostafa@google.com>
2
1
3
In the next patch, combine_tlb() will be added which combines 2 TLB
4
entries into one for nested translations, which chooses the granule
5
and level from the smallest entry.
6
7
This means that with nested translation, an entry can be cached with
8
the granule of stage-2 and not stage-1.
9
10
However, currently, the lookup for an IOVA is done with input stage
11
granule, which is stage-1 for nested configuration, which will not
12
work with the above logic.
13
This patch reworks lookup in that case, so it falls back to stage-2
14
granule if no entry is found using stage-1 granule.
15
16
Also, drop aligning the iova to avoid over-aligning in case the iova
17
is cached with a smaller granule, the TLB lookup will align the iova
18
anyway for each granule and level, and the page table walker doesn't
19
consider the page offset bits.
20
21
Signed-off-by: Mostafa Saleh <smostafa@google.com>
22
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
23
Reviewed-by: Eric Auger <eric.auger@redhat.com>
24
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
25
Message-id: 20240715084519.1189624-10-smostafa@google.com
26
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
27
---
28
hw/arm/smmu-common.c | 64 +++++++++++++++++++++++++++++---------------
29
1 file changed, 43 insertions(+), 21 deletions(-)
30
31
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
32
index XXXXXXX..XXXXXXX 100644
33
--- a/hw/arm/smmu-common.c
34
+++ b/hw/arm/smmu-common.c
35
@@ -XXX,XX +XXX,XX @@ SMMUIOTLBKey smmu_get_iotlb_key(int asid, int vmid, uint64_t iova,
36
return key;
37
}
38
39
-SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg,
40
- SMMUTransTableInfo *tt, hwaddr iova)
41
+static SMMUTLBEntry *smmu_iotlb_lookup_all_levels(SMMUState *bs,
42
+ SMMUTransCfg *cfg,
43
+ SMMUTransTableInfo *tt,
44
+ hwaddr iova)
45
{
46
uint8_t tg = (tt->granule_sz - 10) / 2;
47
uint8_t inputsize = 64 - tt->tsz;
48
@@ -XXX,XX +XXX,XX @@ SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg,
49
}
50
level++;
51
}
52
+ return entry;
53
+}
54
+
55
+/**
56
+ * smmu_iotlb_lookup - Look up for a TLB entry.
57
+ * @bs: SMMU state which includes the TLB instance
58
+ * @cfg: Configuration of the translation
59
+ * @tt: Translation table info (granule and tsz)
60
+ * @iova: IOVA address to lookup
61
+ *
62
+ * returns a valid entry on success, otherwise NULL.
63
+ * In case of nested translation, tt can be updated to include
64
+ * the granule of the found entry as it might different from
65
+ * the IOVA granule.
66
+ */
67
+SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg,
68
+ SMMUTransTableInfo *tt, hwaddr iova)
69
+{
70
+ SMMUTLBEntry *entry = NULL;
71
+
72
+ entry = smmu_iotlb_lookup_all_levels(bs, cfg, tt, iova);
73
+ /*
74
+ * For nested translation also try the s2 granule, as the TLB will insert
75
+ * it if the size of s2 tlb entry was smaller.
76
+ */
77
+ if (!entry && (cfg->stage == SMMU_NESTED) &&
78
+ (cfg->s2cfg.granule_sz != tt->granule_sz)) {
79
+ tt->granule_sz = cfg->s2cfg.granule_sz;
80
+ entry = smmu_iotlb_lookup_all_levels(bs, cfg, tt, iova);
81
+ }
82
83
if (entry) {
84
cfg->iotlb_hits++;
85
@@ -XXX,XX +XXX,XX @@ int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm,
86
SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t addr,
87
IOMMUAccessFlags flag, SMMUPTWEventInfo *info)
88
{
89
- uint64_t page_mask, aligned_addr;
90
SMMUTLBEntry *cached_entry = NULL;
91
SMMUTransTableInfo *tt;
92
int status;
93
94
/*
95
- * Combined attributes used for TLB lookup, as only one stage is supported,
96
- * it will hold attributes based on the enabled stage.
97
+ * Combined attributes used for TLB lookup, holds the attributes for
98
+ * the input stage.
99
*/
100
SMMUTransTableInfo tt_combined;
101
102
- if (cfg->stage == SMMU_STAGE_1) {
103
+ if (cfg->stage == SMMU_STAGE_2) {
104
+ /* Stage2. */
105
+ tt_combined.granule_sz = cfg->s2cfg.granule_sz;
106
+ tt_combined.tsz = cfg->s2cfg.tsz;
107
+ } else {
108
/* Select stage1 translation table. */
109
tt = select_tt(cfg, addr);
110
if (!tt) {
111
@@ -XXX,XX +XXX,XX @@ SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t addr,
112
}
113
tt_combined.granule_sz = tt->granule_sz;
114
tt_combined.tsz = tt->tsz;
115
-
116
- } else {
117
- /* Stage2. */
118
- tt_combined.granule_sz = cfg->s2cfg.granule_sz;
119
- tt_combined.tsz = cfg->s2cfg.tsz;
120
}
121
122
- /*
123
- * TLB lookup looks for granule and input size for a translation stage,
124
- * as only one stage is supported right now, choose the right values
125
- * from the configuration.
126
- */
127
- page_mask = (1ULL << tt_combined.granule_sz) - 1;
128
- aligned_addr = addr & ~page_mask;
129
-
130
- cached_entry = smmu_iotlb_lookup(bs, cfg, &tt_combined, aligned_addr);
131
+ cached_entry = smmu_iotlb_lookup(bs, cfg, &tt_combined, addr);
132
if (cached_entry) {
133
if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & IOMMU_WO)) {
134
info->type = SMMU_PTW_ERR_PERMISSION;
135
@@ -XXX,XX +XXX,XX @@ SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t addr,
136
}
137
138
cached_entry = g_new0(SMMUTLBEntry, 1);
139
- status = smmu_ptw(cfg, aligned_addr, flag, cached_entry, info);
140
+ status = smmu_ptw(cfg, addr, flag, cached_entry, info);
141
if (status) {
142
g_free(cached_entry);
143
return NULL;
144
--
145
2.34.1
146
147
diff view generated by jsdifflib
Deleted patch
1
From: Mostafa Saleh <smostafa@google.com>
2
1
3
This patch adds support for nested (combined) TLB entries.
4
The main function combine_tlb() is not used here but in the next
5
patches, but to simplify the patches it is introduced first.
6
7
Main changes:
8
1) New field added in the SMMUTLBEntry struct: parent_perm, for
9
nested TLB, holds the stage-2 permission, this can be used to know
10
the origin of a permission fault from a cached entry as caching
11
the “and” of the permissions loses this information.
12
13
SMMUPTWEventInfo is used to hold information about PTW faults so
14
the event can be populated, the value of stage used to be set
15
based on the current stage for TLB permission faults, however
16
with the parent_perm, it is now set based on which perm has
17
the missing permission
18
19
When nesting is not enabled it has the same value as perm which
20
doesn't change the logic.
21
22
2) As combined TLB implementation is used, the combination logic
23
chooses:
24
- tg and level from the entry which has the smallest addr_mask.
25
- Based on that the iova that would be cached is recalculated.
26
- Translated_addr is chosen from stage-2.
27
28
Reviewed-by: Eric Auger <eric.auger@redhat.com>
29
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
30
Signed-off-by: Mostafa Saleh <smostafa@google.com>
31
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
32
Message-id: 20240715084519.1189624-11-smostafa@google.com
33
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
34
---
35
include/hw/arm/smmu-common.h | 1 +
36
hw/arm/smmu-common.c | 37 ++++++++++++++++++++++++++++++++----
37
2 files changed, 34 insertions(+), 4 deletions(-)
38
39
diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
40
index XXXXXXX..XXXXXXX 100644
41
--- a/include/hw/arm/smmu-common.h
42
+++ b/include/hw/arm/smmu-common.h
43
@@ -XXX,XX +XXX,XX @@ typedef struct SMMUTLBEntry {
44
IOMMUTLBEntry entry;
45
uint8_t level;
46
uint8_t granule;
47
+ IOMMUAccessFlags parent_perm;
48
} SMMUTLBEntry;
49
50
/* Stage-2 configuration. */
51
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
52
index XXXXXXX..XXXXXXX 100644
53
--- a/hw/arm/smmu-common.c
54
+++ b/hw/arm/smmu-common.c
55
@@ -XXX,XX +XXX,XX @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
56
tlbe->entry.translated_addr = gpa;
57
tlbe->entry.iova = iova & ~mask;
58
tlbe->entry.addr_mask = mask;
59
- tlbe->entry.perm = PTE_AP_TO_PERM(ap);
60
+ tlbe->parent_perm = PTE_AP_TO_PERM(ap);
61
+ tlbe->entry.perm = tlbe->parent_perm;
62
tlbe->level = level;
63
tlbe->granule = granule_sz;
64
return 0;
65
@@ -XXX,XX +XXX,XX @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
66
tlbe->entry.translated_addr = gpa;
67
tlbe->entry.iova = ipa & ~mask;
68
tlbe->entry.addr_mask = mask;
69
- tlbe->entry.perm = s2ap;
70
+ tlbe->parent_perm = s2ap;
71
+ tlbe->entry.perm = tlbe->parent_perm;
72
tlbe->level = level;
73
tlbe->granule = granule_sz;
74
return 0;
75
@@ -XXX,XX +XXX,XX @@ error:
76
return -EINVAL;
77
}
78
79
+/*
80
+ * combine S1 and S2 TLB entries into a single entry.
81
+ * As a result the S1 entry is overriden with combined data.
82
+ */
83
+static void __attribute__((unused)) combine_tlb(SMMUTLBEntry *tlbe,
84
+ SMMUTLBEntry *tlbe_s2,
85
+ dma_addr_t iova,
86
+ SMMUTransCfg *cfg)
87
+{
88
+ if (tlbe_s2->entry.addr_mask < tlbe->entry.addr_mask) {
89
+ tlbe->entry.addr_mask = tlbe_s2->entry.addr_mask;
90
+ tlbe->granule = tlbe_s2->granule;
91
+ tlbe->level = tlbe_s2->level;
92
+ }
93
+
94
+ tlbe->entry.translated_addr = CACHED_ENTRY_TO_ADDR(tlbe_s2,
95
+ tlbe->entry.translated_addr);
96
+
97
+ tlbe->entry.iova = iova & ~tlbe->entry.addr_mask;
98
+ /* parent_perm has s2 perm while perm keeps s1 perm. */
99
+ tlbe->parent_perm = tlbe_s2->entry.perm;
100
+ return;
101
+}
102
+
103
/**
104
* smmu_ptw - Walk the page tables for an IOVA, according to @cfg
105
*
106
@@ -XXX,XX +XXX,XX @@ SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t addr,
107
108
cached_entry = smmu_iotlb_lookup(bs, cfg, &tt_combined, addr);
109
if (cached_entry) {
110
- if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & IOMMU_WO)) {
111
+ if ((flag & IOMMU_WO) && !(cached_entry->entry.perm &
112
+ cached_entry->parent_perm & IOMMU_WO)) {
113
info->type = SMMU_PTW_ERR_PERMISSION;
114
- info->stage = cfg->stage;
115
+ info->stage = !(cached_entry->entry.perm & IOMMU_WO) ?
116
+ SMMU_STAGE_1 :
117
+ SMMU_STAGE_2;
118
return NULL;
119
}
120
return cached_entry;
121
--
122
2.34.1
123
124
diff view generated by jsdifflib
Deleted patch
1
From: Mostafa Saleh <smostafa@google.com>
2
1
3
When nested translation is requested, do the following:
4
- Translate stage-1 table address IPA into PA through stage-2.
5
- Translate stage-1 table walk output (IPA) through stage-2.
6
- Create a single TLB entry from stage-1 and stage-2 translations
7
using logic introduced before.
8
9
smmu_ptw() has a new argument SMMUState which include the TLB as
10
stage-1 table address can be cached in there.
11
12
Also in smmu_ptw(), a separate path used for nesting to simplify the
13
code, although some logic can be combined.
14
15
With nested translation class of translation fault can be different,
16
from the class of the translation, as faults from translating stage-1
17
tables are considered as CLASS_TT and not CLASS_IN, a new member
18
"is_ipa_descriptor" added to "SMMUPTWEventInfo" to differ faults
19
from walking stage 1 translation table and faults from translating
20
an IPA for a transaction.
21
22
Signed-off-by: Mostafa Saleh <smostafa@google.com>
23
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
24
Reviewed-by: Eric Auger <eric.auger@redhat.com>
25
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
26
Message-id: 20240715084519.1189624-12-smostafa@google.com
27
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
28
---
29
include/hw/arm/smmu-common.h | 7 ++--
30
hw/arm/smmu-common.c | 74 +++++++++++++++++++++++++++++++-----
31
hw/arm/smmuv3.c | 14 +++++++
32
3 files changed, 82 insertions(+), 13 deletions(-)
33
34
diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
35
index XXXXXXX..XXXXXXX 100644
36
--- a/include/hw/arm/smmu-common.h
37
+++ b/include/hw/arm/smmu-common.h
38
@@ -XXX,XX +XXX,XX @@ typedef struct SMMUPTWEventInfo {
39
SMMUStage stage;
40
SMMUPTWEventType type;
41
dma_addr_t addr; /* fetched address that induced an abort, if any */
42
+ bool is_ipa_descriptor; /* src for fault in nested translation. */
43
} SMMUPTWEventInfo;
44
45
typedef struct SMMUTransTableInfo {
46
@@ -XXX,XX +XXX,XX @@ static inline uint16_t smmu_get_sid(SMMUDevice *sdev)
47
* smmu_ptw - Perform the page table walk for a given iova / access flags
48
* pair, according to @cfg translation config
49
*/
50
-int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm,
51
- SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info);
52
-
53
+int smmu_ptw(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t iova,
54
+ IOMMUAccessFlags perm, SMMUTLBEntry *tlbe,
55
+ SMMUPTWEventInfo *info);
56
57
/*
58
* smmu_translate - Look for a translation in TLB, if not, do a PTW.
59
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
60
index XXXXXXX..XXXXXXX 100644
61
--- a/hw/arm/smmu-common.c
62
+++ b/hw/arm/smmu-common.c
63
@@ -XXX,XX +XXX,XX @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, dma_addr_t iova)
64
return NULL;
65
}
66
67
+/* Translate stage-1 table address using stage-2 page table. */
68
+static inline int translate_table_addr_ipa(SMMUState *bs,
69
+ dma_addr_t *table_addr,
70
+ SMMUTransCfg *cfg,
71
+ SMMUPTWEventInfo *info)
72
+{
73
+ dma_addr_t addr = *table_addr;
74
+ SMMUTLBEntry *cached_entry;
75
+ int asid;
76
+
77
+ /*
78
+ * The translation table walks performed from TTB0 or TTB1 are always
79
+ * performed in IPA space if stage 2 translations are enabled.
80
+ */
81
+ asid = cfg->asid;
82
+ cfg->stage = SMMU_STAGE_2;
83
+ cfg->asid = -1;
84
+ cached_entry = smmu_translate(bs, cfg, addr, IOMMU_RO, info);
85
+ cfg->asid = asid;
86
+ cfg->stage = SMMU_NESTED;
87
+
88
+ if (cached_entry) {
89
+ *table_addr = CACHED_ENTRY_TO_ADDR(cached_entry, addr);
90
+ return 0;
91
+ }
92
+
93
+ info->stage = SMMU_STAGE_2;
94
+ info->addr = addr;
95
+ info->is_ipa_descriptor = true;
96
+ return -EINVAL;
97
+}
98
+
99
/**
100
* smmu_ptw_64_s1 - VMSAv8-64 Walk of the page tables for a given IOVA
101
+ * @bs: smmu state which includes TLB instance
102
* @cfg: translation config
103
* @iova: iova to translate
104
* @perm: access type
105
@@ -XXX,XX +XXX,XX @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, dma_addr_t iova)
106
* Upon success, @tlbe is filled with translated_addr and entry
107
* permission rights.
108
*/
109
-static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
110
+static int smmu_ptw_64_s1(SMMUState *bs, SMMUTransCfg *cfg,
111
dma_addr_t iova, IOMMUAccessFlags perm,
112
SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
113
{
114
@@ -XXX,XX +XXX,XX @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
115
goto error;
116
}
117
baseaddr = get_table_pte_address(pte, granule_sz);
118
+ if (cfg->stage == SMMU_NESTED) {
119
+ if (translate_table_addr_ipa(bs, &baseaddr, cfg, info)) {
120
+ goto error;
121
+ }
122
+ }
123
level++;
124
continue;
125
} else if (is_page_pte(pte, level)) {
126
@@ -XXX,XX +XXX,XX @@ error:
127
* combine S1 and S2 TLB entries into a single entry.
128
* As a result the S1 entry is overriden with combined data.
129
*/
130
-static void __attribute__((unused)) combine_tlb(SMMUTLBEntry *tlbe,
131
- SMMUTLBEntry *tlbe_s2,
132
- dma_addr_t iova,
133
- SMMUTransCfg *cfg)
134
+static void combine_tlb(SMMUTLBEntry *tlbe, SMMUTLBEntry *tlbe_s2,
135
+ dma_addr_t iova, SMMUTransCfg *cfg)
136
{
137
if (tlbe_s2->entry.addr_mask < tlbe->entry.addr_mask) {
138
tlbe->entry.addr_mask = tlbe_s2->entry.addr_mask;
139
@@ -XXX,XX +XXX,XX @@ static void __attribute__((unused)) combine_tlb(SMMUTLBEntry *tlbe,
140
/**
141
* smmu_ptw - Walk the page tables for an IOVA, according to @cfg
142
*
143
+ * @bs: smmu state which includes TLB instance
144
* @cfg: translation configuration
145
* @iova: iova to translate
146
* @perm: tentative access type
147
@@ -XXX,XX +XXX,XX @@ static void __attribute__((unused)) combine_tlb(SMMUTLBEntry *tlbe,
148
*
149
* return 0 on success
150
*/
151
-int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm,
152
- SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
153
+int smmu_ptw(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t iova,
154
+ IOMMUAccessFlags perm, SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
155
{
156
+ int ret;
157
+ SMMUTLBEntry tlbe_s2;
158
+ dma_addr_t ipa;
159
+
160
if (cfg->stage == SMMU_STAGE_1) {
161
- return smmu_ptw_64_s1(cfg, iova, perm, tlbe, info);
162
+ return smmu_ptw_64_s1(bs, cfg, iova, perm, tlbe, info);
163
} else if (cfg->stage == SMMU_STAGE_2) {
164
/*
165
* If bypassing stage 1(or unimplemented), the input address is passed
166
@@ -XXX,XX +XXX,XX @@ int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm,
167
return smmu_ptw_64_s2(cfg, iova, perm, tlbe, info);
168
}
169
170
- g_assert_not_reached();
171
+ /* SMMU_NESTED. */
172
+ ret = smmu_ptw_64_s1(bs, cfg, iova, perm, tlbe, info);
173
+ if (ret) {
174
+ return ret;
175
+ }
176
+
177
+ ipa = CACHED_ENTRY_TO_ADDR(tlbe, iova);
178
+ ret = smmu_ptw_64_s2(cfg, ipa, perm, &tlbe_s2, info);
179
+ if (ret) {
180
+ return ret;
181
+ }
182
+
183
+ combine_tlb(tlbe, &tlbe_s2, iova, cfg);
184
+ return 0;
185
}
186
187
SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t addr,
188
@@ -XXX,XX +XXX,XX @@ SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t addr,
189
}
190
191
cached_entry = g_new0(SMMUTLBEntry, 1);
192
- status = smmu_ptw(cfg, addr, flag, cached_entry, info);
193
+ status = smmu_ptw(bs, cfg, addr, flag, cached_entry, info);
194
if (status) {
195
g_free(cached_entry);
196
return NULL;
197
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
198
index XXXXXXX..XXXXXXX 100644
199
--- a/hw/arm/smmuv3.c
200
+++ b/hw/arm/smmuv3.c
201
@@ -XXX,XX +XXX,XX @@ static SMMUTranslationStatus smmuv3_do_translate(SMMUv3State *s, hwaddr addr,
202
if (!cached_entry) {
203
/* All faults from PTW has S2 field. */
204
event->u.f_walk_eabt.s2 = (ptw_info.stage == SMMU_STAGE_2);
205
+ /*
206
+ * Fault class is set as follows based on "class" input to
207
+ * the function and to "ptw_info" from "smmu_translate()"
208
+ * For stage-1:
209
+ * - EABT => CLASS_TT (hardcoded)
210
+ * - other events => CLASS_IN (input to function)
211
+ * For stage-2 => CLASS_IN (input to function)
212
+ * For nested, for all events:
213
+ * - CD fetch => CLASS_CD (input to function)
214
+ * - walking stage 1 translation table => CLASS_TT (from
215
+ * is_ipa_descriptor or input in case of TTBx)
216
+ * - s2 translation => CLASS_IN (input to function)
217
+ */
218
+ class = ptw_info.is_ipa_descriptor ? SMMU_CLASS_TT : class;
219
switch (ptw_info.type) {
220
case SMMU_PTW_ERR_WALK_EABT:
221
event->type = SMMU_EVT_F_WALK_EABT;
222
--
223
2.34.1
224
225
diff view generated by jsdifflib
Deleted patch
1
From: Mostafa Saleh <smostafa@google.com>
2
1
3
With nesting, we would need to invalidate IPAs without
4
over-invalidating stage-1 IOVAs. This can be done by
5
distinguishing IPAs in the TLBs by having ASID=-1.
6
To achieve that, rework the invalidation for IPAs to have a
7
separate function, while for IOVA invalidation ASID=-1 means
8
invalidate for all ASIDs.
9
10
Reviewed-by: Eric Auger <eric.auger@redhat.com>
11
Signed-off-by: Mostafa Saleh <smostafa@google.com>
12
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
13
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
14
Message-id: 20240715084519.1189624-13-smostafa@google.com
15
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
16
---
17
include/hw/arm/smmu-common.h | 3 ++-
18
hw/arm/smmu-common.c | 47 ++++++++++++++++++++++++++++++++++++
19
hw/arm/smmuv3.c | 23 ++++++++++++------
20
hw/arm/trace-events | 2 +-
21
4 files changed, 66 insertions(+), 9 deletions(-)
22
23
diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
24
index XXXXXXX..XXXXXXX 100644
25
--- a/include/hw/arm/smmu-common.h
26
+++ b/include/hw/arm/smmu-common.h
27
@@ -XXX,XX +XXX,XX @@ void smmu_iotlb_inv_asid(SMMUState *s, int asid);
28
void smmu_iotlb_inv_vmid(SMMUState *s, int vmid);
29
void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, dma_addr_t iova,
30
uint8_t tg, uint64_t num_pages, uint8_t ttl);
31
-
32
+void smmu_iotlb_inv_ipa(SMMUState *s, int vmid, dma_addr_t ipa, uint8_t tg,
33
+ uint64_t num_pages, uint8_t ttl);
34
/* Unmap the range of all the notifiers registered to any IOMMU mr */
35
void smmu_inv_notifiers_all(SMMUState *s);
36
37
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/hw/arm/smmu-common.c
40
+++ b/hw/arm/smmu-common.c
41
@@ -XXX,XX +XXX,XX @@ static gboolean smmu_hash_remove_by_asid_vmid_iova(gpointer key, gpointer value,
42
((entry->iova & ~info->mask) == info->iova);
43
}
44
45
+static gboolean smmu_hash_remove_by_vmid_ipa(gpointer key, gpointer value,
46
+ gpointer user_data)
47
+{
48
+ SMMUTLBEntry *iter = (SMMUTLBEntry *)value;
49
+ IOMMUTLBEntry *entry = &iter->entry;
50
+ SMMUIOTLBPageInvInfo *info = (SMMUIOTLBPageInvInfo *)user_data;
51
+ SMMUIOTLBKey iotlb_key = *(SMMUIOTLBKey *)key;
52
+
53
+ if (SMMU_IOTLB_ASID(iotlb_key) >= 0) {
54
+ /* This is a stage-1 address. */
55
+ return false;
56
+ }
57
+ if (info->vmid != SMMU_IOTLB_VMID(iotlb_key)) {
58
+ return false;
59
+ }
60
+ return ((info->iova & ~entry->addr_mask) == entry->iova) ||
61
+ ((entry->iova & ~info->mask) == info->iova);
62
+}
63
+
64
void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, dma_addr_t iova,
65
uint8_t tg, uint64_t num_pages, uint8_t ttl)
66
{
67
@@ -XXX,XX +XXX,XX @@ void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, dma_addr_t iova,
68
&info);
69
}
70
71
+/*
72
+ * Similar to smmu_iotlb_inv_iova(), but for Stage-2, ASID is always -1,
73
+ * in Stage-1 invalidation ASID = -1, means don't care.
74
+ */
75
+void smmu_iotlb_inv_ipa(SMMUState *s, int vmid, dma_addr_t ipa, uint8_t tg,
76
+ uint64_t num_pages, uint8_t ttl)
77
+{
78
+ uint8_t granule = tg ? tg * 2 + 10 : 12;
79
+ int asid = -1;
80
+
81
+ if (ttl && (num_pages == 1)) {
82
+ SMMUIOTLBKey key = smmu_get_iotlb_key(asid, vmid, ipa, tg, ttl);
83
+
84
+ if (g_hash_table_remove(s->iotlb, &key)) {
85
+ return;
86
+ }
87
+ }
88
+
89
+ SMMUIOTLBPageInvInfo info = {
90
+ .iova = ipa,
91
+ .vmid = vmid,
92
+ .mask = (num_pages << granule) - 1};
93
+
94
+ g_hash_table_foreach_remove(s->iotlb,
95
+ smmu_hash_remove_by_vmid_ipa,
96
+ &info);
97
+}
98
+
99
void smmu_iotlb_inv_asid(SMMUState *s, int asid)
100
{
101
trace_smmu_iotlb_inv_asid(asid);
102
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
103
index XXXXXXX..XXXXXXX 100644
104
--- a/hw/arm/smmuv3.c
105
+++ b/hw/arm/smmuv3.c
106
@@ -XXX,XX +XXX,XX @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, int vmid,
107
}
108
}
109
110
-static void smmuv3_range_inval(SMMUState *s, Cmd *cmd)
111
+static void smmuv3_range_inval(SMMUState *s, Cmd *cmd, SMMUStage stage)
112
{
113
dma_addr_t end, addr = CMD_ADDR(cmd);
114
uint8_t type = CMD_TYPE(cmd);
115
@@ -XXX,XX +XXX,XX @@ static void smmuv3_range_inval(SMMUState *s, Cmd *cmd)
116
}
117
118
if (!tg) {
119
- trace_smmuv3_range_inval(vmid, asid, addr, tg, 1, ttl, leaf);
120
+ trace_smmuv3_range_inval(vmid, asid, addr, tg, 1, ttl, leaf, stage);
121
smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, 1);
122
- smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, 1, ttl);
123
+ if (stage == SMMU_STAGE_1) {
124
+ smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, 1, ttl);
125
+ } else {
126
+ smmu_iotlb_inv_ipa(s, vmid, addr, tg, 1, ttl);
127
+ }
128
return;
129
}
130
131
@@ -XXX,XX +XXX,XX @@ static void smmuv3_range_inval(SMMUState *s, Cmd *cmd)
132
uint64_t mask = dma_aligned_pow2_mask(addr, end, 64);
133
134
num_pages = (mask + 1) >> granule;
135
- trace_smmuv3_range_inval(vmid, asid, addr, tg, num_pages, ttl, leaf);
136
+ trace_smmuv3_range_inval(vmid, asid, addr, tg, num_pages,
137
+ ttl, leaf, stage);
138
smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, num_pages);
139
- smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, num_pages, ttl);
140
+ if (stage == SMMU_STAGE_1) {
141
+ smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, num_pages, ttl);
142
+ } else {
143
+ smmu_iotlb_inv_ipa(s, vmid, addr, tg, num_pages, ttl);
144
+ }
145
addr += mask + 1;
146
}
147
}
148
@@ -XXX,XX +XXX,XX @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
149
cmd_error = SMMU_CERROR_ILL;
150
break;
151
}
152
- smmuv3_range_inval(bs, &cmd);
153
+ smmuv3_range_inval(bs, &cmd, SMMU_STAGE_1);
154
break;
155
case SMMU_CMD_TLBI_S12_VMALL:
156
{
157
@@ -XXX,XX +XXX,XX @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
158
* As currently only either s1 or s2 are supported
159
* we can reuse same function for s2.
160
*/
161
- smmuv3_range_inval(bs, &cmd);
162
+ smmuv3_range_inval(bs, &cmd, SMMU_STAGE_2);
163
break;
164
case SMMU_CMD_TLBI_EL3_ALL:
165
case SMMU_CMD_TLBI_EL3_VA:
166
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
167
index XXXXXXX..XXXXXXX 100644
168
--- a/hw/arm/trace-events
169
+++ b/hw/arm/trace-events
170
@@ -XXX,XX +XXX,XX @@ smmuv3_cmdq_cfgi_ste_range(int start, int end) "start=0x%x - end=0x%x"
171
smmuv3_cmdq_cfgi_cd(uint32_t sid) "sid=0x%x"
172
smmuv3_config_cache_hit(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t perc) "Config cache HIT for sid=0x%x (hits=%d, misses=%d, hit rate=%d)"
173
smmuv3_config_cache_miss(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t perc) "Config cache MISS for sid=0x%x (hits=%d, misses=%d, hit rate=%d)"
174
-smmuv3_range_inval(int vmid, int asid, uint64_t addr, uint8_t tg, uint64_t num_pages, uint8_t ttl, bool leaf) "vmid=%d asid=%d addr=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64" ttl=%d leaf=%d"
175
+smmuv3_range_inval(int vmid, int asid, uint64_t addr, uint8_t tg, uint64_t num_pages, uint8_t ttl, bool leaf, int stage) "vmid=%d asid=%d addr=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64" ttl=%d leaf=%d stage=%d"
176
smmuv3_cmdq_tlbi_nh(void) ""
177
smmuv3_cmdq_tlbi_nh_asid(int asid) "asid=%d"
178
smmuv3_cmdq_tlbi_s12_vmid(int vmid) "vmid=%d"
179
--
180
2.34.1
181
182
diff view generated by jsdifflib
Deleted patch
1
From: Mostafa Saleh <smostafa@google.com>
2
1
3
Soon, Instead of doing TLB invalidation by ASID only, VMID will be
4
also required.
5
Add smmu_iotlb_inv_asid_vmid() which invalidates by both ASID and VMID.
6
7
However, at the moment this function is only used in SMMU_CMD_TLBI_NH_ASID
8
which is a stage-1 command, so passing VMID = -1 keeps the original
9
behaviour.
10
11
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
12
Reviewed-by: Eric Auger <eric.auger@redhat.com>
13
Signed-off-by: Mostafa Saleh <smostafa@google.com>
14
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
15
Message-id: 20240715084519.1189624-14-smostafa@google.com
16
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
17
---
18
include/hw/arm/smmu-common.h | 2 +-
19
hw/arm/smmu-common.c | 20 +++++++++++++-------
20
hw/arm/smmuv3.c | 2 +-
21
hw/arm/trace-events | 2 +-
22
4 files changed, 16 insertions(+), 10 deletions(-)
23
24
diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
25
index XXXXXXX..XXXXXXX 100644
26
--- a/include/hw/arm/smmu-common.h
27
+++ b/include/hw/arm/smmu-common.h
28
@@ -XXX,XX +XXX,XX @@ void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, SMMUTLBEntry *entry);
29
SMMUIOTLBKey smmu_get_iotlb_key(int asid, int vmid, uint64_t iova,
30
uint8_t tg, uint8_t level);
31
void smmu_iotlb_inv_all(SMMUState *s);
32
-void smmu_iotlb_inv_asid(SMMUState *s, int asid);
33
+void smmu_iotlb_inv_asid_vmid(SMMUState *s, int asid, int vmid);
34
void smmu_iotlb_inv_vmid(SMMUState *s, int vmid);
35
void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, dma_addr_t iova,
36
uint8_t tg, uint64_t num_pages, uint8_t ttl);
37
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/hw/arm/smmu-common.c
40
+++ b/hw/arm/smmu-common.c
41
@@ -XXX,XX +XXX,XX @@ void smmu_iotlb_inv_all(SMMUState *s)
42
g_hash_table_remove_all(s->iotlb);
43
}
44
45
-static gboolean smmu_hash_remove_by_asid(gpointer key, gpointer value,
46
- gpointer user_data)
47
+static gboolean smmu_hash_remove_by_asid_vmid(gpointer key, gpointer value,
48
+ gpointer user_data)
49
{
50
- int asid = *(int *)user_data;
51
+ SMMUIOTLBPageInvInfo *info = (SMMUIOTLBPageInvInfo *)user_data;
52
SMMUIOTLBKey *iotlb_key = (SMMUIOTLBKey *)key;
53
54
- return SMMU_IOTLB_ASID(*iotlb_key) == asid;
55
+ return (SMMU_IOTLB_ASID(*iotlb_key) == info->asid) &&
56
+ (SMMU_IOTLB_VMID(*iotlb_key) == info->vmid);
57
}
58
59
static gboolean smmu_hash_remove_by_vmid(gpointer key, gpointer value,
60
@@ -XXX,XX +XXX,XX @@ void smmu_iotlb_inv_ipa(SMMUState *s, int vmid, dma_addr_t ipa, uint8_t tg,
61
&info);
62
}
63
64
-void smmu_iotlb_inv_asid(SMMUState *s, int asid)
65
+void smmu_iotlb_inv_asid_vmid(SMMUState *s, int asid, int vmid)
66
{
67
- trace_smmu_iotlb_inv_asid(asid);
68
- g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_asid, &asid);
69
+ SMMUIOTLBPageInvInfo info = {
70
+ .asid = asid,
71
+ .vmid = vmid,
72
+ };
73
+
74
+ trace_smmu_iotlb_inv_asid_vmid(asid, vmid);
75
+ g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_asid_vmid, &info);
76
}
77
78
void smmu_iotlb_inv_vmid(SMMUState *s, int vmid)
79
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
80
index XXXXXXX..XXXXXXX 100644
81
--- a/hw/arm/smmuv3.c
82
+++ b/hw/arm/smmuv3.c
83
@@ -XXX,XX +XXX,XX @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
84
85
trace_smmuv3_cmdq_tlbi_nh_asid(asid);
86
smmu_inv_notifiers_all(&s->smmu_state);
87
- smmu_iotlb_inv_asid(bs, asid);
88
+ smmu_iotlb_inv_asid_vmid(bs, asid, -1);
89
break;
90
}
91
case SMMU_CMD_TLBI_NH_ALL:
92
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
93
index XXXXXXX..XXXXXXX 100644
94
--- a/hw/arm/trace-events
95
+++ b/hw/arm/trace-events
96
@@ -XXX,XX +XXX,XX @@ smmu_ptw_page_pte(int stage, int level, uint64_t iova, uint64_t baseaddr, uint6
97
smmu_ptw_block_pte(int stage, int level, uint64_t baseaddr, uint64_t pteaddr, uint64_t pte, uint64_t iova, uint64_t gpa, int bsize_mb) "stage=%d level=%d base@=0x%"PRIx64" pte@=0x%"PRIx64" pte=0x%"PRIx64" iova=0x%"PRIx64" block address = 0x%"PRIx64" block size = %d MiB"
98
smmu_get_pte(uint64_t baseaddr, int index, uint64_t pteaddr, uint64_t pte) "baseaddr=0x%"PRIx64" index=0x%x, pteaddr=0x%"PRIx64", pte=0x%"PRIx64
99
smmu_iotlb_inv_all(void) "IOTLB invalidate all"
100
-smmu_iotlb_inv_asid(int asid) "IOTLB invalidate asid=%d"
101
+smmu_iotlb_inv_asid_vmid(int asid, int vmid) "IOTLB invalidate asid=%d vmid=%d"
102
smmu_iotlb_inv_vmid(int vmid) "IOTLB invalidate vmid=%d"
103
smmu_iotlb_inv_iova(int asid, uint64_t addr) "IOTLB invalidate asid=%d addr=0x%"PRIx64
104
smmu_inv_notifiers_mr(const char *name) "iommu mr=%s"
105
--
106
2.34.1
107
108
diff view generated by jsdifflib
Deleted patch
1
From: Mostafa Saleh <smostafa@google.com>
2
1
3
Some commands need rework for nesting, as they used to assume S1
4
and S2 are mutually exclusive:
5
6
- CMD_TLBI_NH_ASID: Consider VMID if stage-2 is supported
7
- CMD_TLBI_NH_ALL: Consider VMID if stage-2 is supported, otherwise
8
invalidate everything, this required a new vmid invalidation
9
function for stage-1 only (ASID >= 0)
10
11
Also, rework trace events to reflect the new implementation.
12
13
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
14
Reviewed-by: Eric Auger <eric.auger@redhat.com>
15
Signed-off-by: Mostafa Saleh <smostafa@google.com>
16
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
17
Message-id: 20240715084519.1189624-15-smostafa@google.com
18
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
19
---
20
include/hw/arm/smmu-common.h | 1 +
21
hw/arm/smmu-common.c | 16 ++++++++++++++++
22
hw/arm/smmuv3.c | 28 ++++++++++++++++++++++++++--
23
hw/arm/trace-events | 4 +++-
24
4 files changed, 46 insertions(+), 3 deletions(-)
25
26
diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
27
index XXXXXXX..XXXXXXX 100644
28
--- a/include/hw/arm/smmu-common.h
29
+++ b/include/hw/arm/smmu-common.h
30
@@ -XXX,XX +XXX,XX @@ SMMUIOTLBKey smmu_get_iotlb_key(int asid, int vmid, uint64_t iova,
31
void smmu_iotlb_inv_all(SMMUState *s);
32
void smmu_iotlb_inv_asid_vmid(SMMUState *s, int asid, int vmid);
33
void smmu_iotlb_inv_vmid(SMMUState *s, int vmid);
34
+void smmu_iotlb_inv_vmid_s1(SMMUState *s, int vmid);
35
void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, dma_addr_t iova,
36
uint8_t tg, uint64_t num_pages, uint8_t ttl);
37
void smmu_iotlb_inv_ipa(SMMUState *s, int vmid, dma_addr_t ipa, uint8_t tg,
38
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/hw/arm/smmu-common.c
41
+++ b/hw/arm/smmu-common.c
42
@@ -XXX,XX +XXX,XX @@ static gboolean smmu_hash_remove_by_vmid(gpointer key, gpointer value,
43
return SMMU_IOTLB_VMID(*iotlb_key) == vmid;
44
}
45
46
+static gboolean smmu_hash_remove_by_vmid_s1(gpointer key, gpointer value,
47
+ gpointer user_data)
48
+{
49
+ int vmid = *(int *)user_data;
50
+ SMMUIOTLBKey *iotlb_key = (SMMUIOTLBKey *)key;
51
+
52
+ return (SMMU_IOTLB_VMID(*iotlb_key) == vmid) &&
53
+ (SMMU_IOTLB_ASID(*iotlb_key) >= 0);
54
+}
55
+
56
static gboolean smmu_hash_remove_by_asid_vmid_iova(gpointer key, gpointer value,
57
gpointer user_data)
58
{
59
@@ -XXX,XX +XXX,XX @@ void smmu_iotlb_inv_vmid(SMMUState *s, int vmid)
60
g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_vmid, &vmid);
61
}
62
63
+inline void smmu_iotlb_inv_vmid_s1(SMMUState *s, int vmid)
64
+{
65
+ trace_smmu_iotlb_inv_vmid_s1(vmid);
66
+ g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_vmid_s1, &vmid);
67
+}
68
+
69
/* VMSAv8-64 Translation */
70
71
/**
72
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
73
index XXXXXXX..XXXXXXX 100644
74
--- a/hw/arm/smmuv3.c
75
+++ b/hw/arm/smmuv3.c
76
@@ -XXX,XX +XXX,XX @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
77
case SMMU_CMD_TLBI_NH_ASID:
78
{
79
int asid = CMD_ASID(&cmd);
80
+ int vmid = -1;
81
82
if (!STAGE1_SUPPORTED(s)) {
83
cmd_error = SMMU_CERROR_ILL;
84
break;
85
}
86
87
+ /*
88
+ * VMID is only matched when stage 2 is supported, otherwise set it
89
+ * to -1 as the value used for stage-1 only VMIDs.
90
+ */
91
+ if (STAGE2_SUPPORTED(s)) {
92
+ vmid = CMD_VMID(&cmd);
93
+ }
94
+
95
trace_smmuv3_cmdq_tlbi_nh_asid(asid);
96
smmu_inv_notifiers_all(&s->smmu_state);
97
- smmu_iotlb_inv_asid_vmid(bs, asid, -1);
98
+ smmu_iotlb_inv_asid_vmid(bs, asid, vmid);
99
break;
100
}
101
case SMMU_CMD_TLBI_NH_ALL:
102
+ {
103
+ int vmid = -1;
104
+
105
if (!STAGE1_SUPPORTED(s)) {
106
cmd_error = SMMU_CERROR_ILL;
107
break;
108
}
109
+
110
+ /*
111
+ * If stage-2 is supported, invalidate for this VMID only, otherwise
112
+ * invalidate the whole thing.
113
+ */
114
+ if (STAGE2_SUPPORTED(s)) {
115
+ vmid = CMD_VMID(&cmd);
116
+ trace_smmuv3_cmdq_tlbi_nh(vmid);
117
+ smmu_iotlb_inv_vmid_s1(bs, vmid);
118
+ break;
119
+ }
120
QEMU_FALLTHROUGH;
121
+ }
122
case SMMU_CMD_TLBI_NSNH_ALL:
123
- trace_smmuv3_cmdq_tlbi_nh();
124
+ trace_smmuv3_cmdq_tlbi_nsnh();
125
smmu_inv_notifiers_all(&s->smmu_state);
126
smmu_iotlb_inv_all(bs);
127
break;
128
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
129
index XXXXXXX..XXXXXXX 100644
130
--- a/hw/arm/trace-events
131
+++ b/hw/arm/trace-events
132
@@ -XXX,XX +XXX,XX @@ smmu_get_pte(uint64_t baseaddr, int index, uint64_t pteaddr, uint64_t pte) "base
133
smmu_iotlb_inv_all(void) "IOTLB invalidate all"
134
smmu_iotlb_inv_asid_vmid(int asid, int vmid) "IOTLB invalidate asid=%d vmid=%d"
135
smmu_iotlb_inv_vmid(int vmid) "IOTLB invalidate vmid=%d"
136
+smmu_iotlb_inv_vmid_s1(int vmid) "IOTLB invalidate vmid=%d"
137
smmu_iotlb_inv_iova(int asid, uint64_t addr) "IOTLB invalidate asid=%d addr=0x%"PRIx64
138
smmu_inv_notifiers_mr(const char *name) "iommu mr=%s"
139
smmu_iotlb_lookup_hit(int asid, int vmid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache HIT asid=%d vmid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d"
140
@@ -XXX,XX +XXX,XX @@ smmuv3_cmdq_cfgi_cd(uint32_t sid) "sid=0x%x"
141
smmuv3_config_cache_hit(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t perc) "Config cache HIT for sid=0x%x (hits=%d, misses=%d, hit rate=%d)"
142
smmuv3_config_cache_miss(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t perc) "Config cache MISS for sid=0x%x (hits=%d, misses=%d, hit rate=%d)"
143
smmuv3_range_inval(int vmid, int asid, uint64_t addr, uint8_t tg, uint64_t num_pages, uint8_t ttl, bool leaf, int stage) "vmid=%d asid=%d addr=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64" ttl=%d leaf=%d stage=%d"
144
-smmuv3_cmdq_tlbi_nh(void) ""
145
+smmuv3_cmdq_tlbi_nh(int vmid) "vmid=%d"
146
+smmuv3_cmdq_tlbi_nsnh(void) ""
147
smmuv3_cmdq_tlbi_nh_asid(int asid) "asid=%d"
148
smmuv3_cmdq_tlbi_s12_vmid(int vmid) "vmid=%d"
149
smmuv3_config_cache_inv(uint32_t sid) "Config cache INV for sid=0x%x"
150
--
151
2.34.1
152
153
diff view generated by jsdifflib
Deleted patch
1
From: Mostafa Saleh <smostafa@google.com>
2
1
3
IOMMUTLBEvent only understands IOVA, for stage-1 or stage-2
4
SMMU instances we consider the input address as the IOVA, but when
5
nesting is used, we can't mix stage-1 and stage-2 addresses, so for
6
nesting only stage-1 is considered the IOVA and would be notified.
7
8
Signed-off-by: Mostafa Saleh <smostafa@google.com>
9
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
10
Reviewed-by: Eric Auger <eric.auger@redhat.com>
11
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
12
Message-id: 20240715084519.1189624-16-smostafa@google.com
13
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
14
---
15
hw/arm/smmuv3.c | 39 +++++++++++++++++++++++++--------------
16
hw/arm/trace-events | 2 +-
17
2 files changed, 26 insertions(+), 15 deletions(-)
18
19
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/hw/arm/smmuv3.c
22
+++ b/hw/arm/smmuv3.c
23
@@ -XXX,XX +XXX,XX @@ epilogue:
24
* @iova: iova
25
* @tg: translation granule (if communicated through range invalidation)
26
* @num_pages: number of @granule sized pages (if tg != 0), otherwise 1
27
+ * @stage: Which stage(1 or 2) is used
28
*/
29
static void smmuv3_notify_iova(IOMMUMemoryRegion *mr,
30
IOMMUNotifier *n,
31
int asid, int vmid,
32
dma_addr_t iova, uint8_t tg,
33
- uint64_t num_pages)
34
+ uint64_t num_pages, int stage)
35
{
36
SMMUDevice *sdev = container_of(mr, SMMUDevice, iommu);
37
+ SMMUEventInfo eventinfo = {.inval_ste_allowed = true};
38
+ SMMUTransCfg *cfg = smmuv3_get_config(sdev, &eventinfo);
39
IOMMUTLBEvent event;
40
uint8_t granule;
41
- SMMUv3State *s = sdev->smmu;
42
+
43
+ if (!cfg) {
44
+ return;
45
+ }
46
+
47
+ /*
48
+ * stage is passed from TLB invalidation commands which can be either
49
+ * stage-1 or stage-2.
50
+ * However, IOMMUTLBEvent only understands IOVA, for stage-1 or stage-2
51
+ * SMMU instances we consider the input address as the IOVA, but when
52
+ * nesting is used, we can't mix stage-1 and stage-2 addresses, so for
53
+ * nesting only stage-1 is considered the IOVA and would be notified.
54
+ */
55
+ if ((stage == SMMU_STAGE_2) && (cfg->stage == SMMU_NESTED))
56
+ return;
57
58
if (!tg) {
59
- SMMUEventInfo eventinfo = {.inval_ste_allowed = true};
60
- SMMUTransCfg *cfg = smmuv3_get_config(sdev, &eventinfo);
61
SMMUTransTableInfo *tt;
62
63
- if (!cfg) {
64
- return;
65
- }
66
-
67
if (asid >= 0 && cfg->asid != asid) {
68
return;
69
}
70
@@ -XXX,XX +XXX,XX @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr,
71
return;
72
}
73
74
- if (STAGE1_SUPPORTED(s)) {
75
+ if (stage == SMMU_STAGE_1) {
76
tt = select_tt(cfg, iova);
77
if (!tt) {
78
return;
79
@@ -XXX,XX +XXX,XX @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr,
80
/* invalidate an asid/vmid/iova range tuple in all mr's */
81
static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, int vmid,
82
dma_addr_t iova, uint8_t tg,
83
- uint64_t num_pages)
84
+ uint64_t num_pages, int stage)
85
{
86
SMMUDevice *sdev;
87
88
@@ -XXX,XX +XXX,XX @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, int vmid,
89
IOMMUNotifier *n;
90
91
trace_smmuv3_inv_notifiers_iova(mr->parent_obj.name, asid, vmid,
92
- iova, tg, num_pages);
93
+ iova, tg, num_pages, stage);
94
95
IOMMU_NOTIFIER_FOREACH(n, mr) {
96
- smmuv3_notify_iova(mr, n, asid, vmid, iova, tg, num_pages);
97
+ smmuv3_notify_iova(mr, n, asid, vmid, iova, tg, num_pages, stage);
98
}
99
}
100
}
101
@@ -XXX,XX +XXX,XX @@ static void smmuv3_range_inval(SMMUState *s, Cmd *cmd, SMMUStage stage)
102
103
if (!tg) {
104
trace_smmuv3_range_inval(vmid, asid, addr, tg, 1, ttl, leaf, stage);
105
- smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, 1);
106
+ smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, 1, stage);
107
if (stage == SMMU_STAGE_1) {
108
smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, 1, ttl);
109
} else {
110
@@ -XXX,XX +XXX,XX @@ static void smmuv3_range_inval(SMMUState *s, Cmd *cmd, SMMUStage stage)
111
num_pages = (mask + 1) >> granule;
112
trace_smmuv3_range_inval(vmid, asid, addr, tg, num_pages,
113
ttl, leaf, stage);
114
- smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, num_pages);
115
+ smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, num_pages, stage);
116
if (stage == SMMU_STAGE_1) {
117
smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, num_pages, ttl);
118
} else {
119
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
120
index XXXXXXX..XXXXXXX 100644
121
--- a/hw/arm/trace-events
122
+++ b/hw/arm/trace-events
123
@@ -XXX,XX +XXX,XX @@ smmuv3_cmdq_tlbi_s12_vmid(int vmid) "vmid=%d"
124
smmuv3_config_cache_inv(uint32_t sid) "Config cache INV for sid=0x%x"
125
smmuv3_notify_flag_add(const char *iommu) "ADD SMMUNotifier node for iommu mr=%s"
126
smmuv3_notify_flag_del(const char *iommu) "DEL SMMUNotifier node for iommu mr=%s"
127
-smmuv3_inv_notifiers_iova(const char *name, int asid, int vmid, uint64_t iova, uint8_t tg, uint64_t num_pages) "iommu mr=%s asid=%d vmid=%d iova=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64
128
+smmuv3_inv_notifiers_iova(const char *name, int asid, int vmid, uint64_t iova, uint8_t tg, uint64_t num_pages, int stage) "iommu mr=%s asid=%d vmid=%d iova=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64" stage=%d"
129
130
# strongarm.c
131
strongarm_uart_update_parameters(const char *label, int speed, char parity, int data_bits, int stop_bits) "%s speed=%d parity=%c data=%d stop=%d"
132
--
133
2.34.1
134
135
diff view generated by jsdifflib
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
From: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
2
3
hvf did not advance PC when raising an exception for most unhandled
3
Pointer authentication on aarch64 is pretty expensive (up to 50% of
4
system registers, but it mistakenly advanced PC when raising an
4
execution time) when running a virtual machine with tcg and -cpu max
5
exception for GICv3 registers.
5
(which enables pauth=on).
6
6
7
Cc: qemu-stable@nongnu.org
7
The advice is always: use pauth-impdef=on.
8
Fixes: a2260983c655 ("hvf: arm: Add support for GICv3")
8
Our documentation even mentions it "by default" in
9
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
9
docs/system/introduction.rst.
10
Message-id: 20240716-pmu-v3-4-8c7c1858a227@daynix.com
10
11
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Thus, we change the default to use impdef by default. This does not
12
affect kvm or hvf acceleration, since pauth algorithm used is the one
13
from host cpu.
14
15
This change is retro compatible, in terms of cli, with previous
16
versions, as the semantic of using -cpu max,pauth-impdef=on, and -cpu
17
max,pauth-qarma3=on is preserved.
18
The new option introduced in previous patch and matching old default is
19
-cpu max,pauth-qarma5=on.
20
It is retro compatible with migration as well, by defining a backcompat
21
property, that will use qarma5 by default for virt machine <= 9.2.
22
Tested by saving and restoring a vm from qemu 9.2.0 into qemu-master
23
(10.0) for cpus neoverse-n2 and max.
24
25
Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
26
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
27
Message-id: 20241219183211.3493974-3-pierrick.bouvier@linaro.org
12
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
28
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
13
---
29
---
14
target/arm/hvf/hvf.c | 1 +
30
docs/system/arm/cpu-features.rst | 2 +-
15
1 file changed, 1 insertion(+)
31
docs/system/introduction.rst | 2 +-
32
target/arm/cpu.h | 3 +++
33
hw/core/machine.c | 4 +++-
34
target/arm/cpu.c | 2 ++
35
target/arm/cpu64.c | 22 ++++++++++++++++------
36
6 files changed, 26 insertions(+), 9 deletions(-)
16
37
17
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
38
diff --git a/docs/system/arm/cpu-features.rst b/docs/system/arm/cpu-features.rst
18
index XXXXXXX..XXXXXXX 100644
39
index XXXXXXX..XXXXXXX 100644
19
--- a/target/arm/hvf/hvf.c
40
--- a/docs/system/arm/cpu-features.rst
20
+++ b/target/arm/hvf/hvf.c
41
+++ b/docs/system/arm/cpu-features.rst
21
@@ -XXX,XX +XXX,XX @@ static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint32_t rt)
42
@@ -XXX,XX +XXX,XX @@ Below is the list of TCG VCPU features and their descriptions.
22
/* Call the TCG sysreg handler. This is only safe for GICv3 regs. */
43
When ``pauth`` is enabled, select the architected QARMA5 algorithm.
23
if (!hvf_sysreg_read_cp(cpu, reg, &val)) {
44
24
hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized());
45
Without ``pauth-impdef``, ``pauth-qarma3`` or ``pauth-qarma5`` enabled,
25
+ return 1;
46
-the architected QARMA5 algorithm is used. The architected QARMA5
26
}
47
+the QEMU impdef algorithm is used. The architected QARMA5
27
break;
48
and QARMA3 algorithms have good cryptographic properties, but can
28
case SYSREG_DBGBVR0_EL1:
49
be quite slow to emulate. The impdef algorithm used by QEMU is
50
non-cryptographic but significantly faster.
51
diff --git a/docs/system/introduction.rst b/docs/system/introduction.rst
52
index XXXXXXX..XXXXXXX 100644
53
--- a/docs/system/introduction.rst
54
+++ b/docs/system/introduction.rst
55
@@ -XXX,XX +XXX,XX @@ would default to it anyway.
56
57
.. code::
58
59
- -cpu max,pauth-impdef=on \
60
+ -cpu max \
61
-smp 4 \
62
-accel tcg \
63
64
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
65
index XXXXXXX..XXXXXXX 100644
66
--- a/target/arm/cpu.h
67
+++ b/target/arm/cpu.h
68
@@ -XXX,XX +XXX,XX @@ struct ArchCPU {
69
/* QOM property to indicate we should use the back-compat CNTFRQ default */
70
bool backcompat_cntfrq;
71
72
+ /* QOM property to indicate we should use the back-compat QARMA5 default */
73
+ bool backcompat_pauth_default_use_qarma5;
74
+
75
/* Specify the number of cores in this CPU cluster. Used for the L2CTLR
76
* register.
77
*/
78
diff --git a/hw/core/machine.c b/hw/core/machine.c
79
index XXXXXXX..XXXXXXX 100644
80
--- a/hw/core/machine.c
81
+++ b/hw/core/machine.c
82
@@ -XXX,XX +XXX,XX @@
83
#include "hw/virtio/virtio-iommu.h"
84
#include "audio/audio.h"
85
86
-GlobalProperty hw_compat_9_2[] = {};
87
+GlobalProperty hw_compat_9_2[] = {
88
+ {"arm-cpu", "backcompat-pauth-default-use-qarma5", "true"},
89
+};
90
const size_t hw_compat_9_2_len = G_N_ELEMENTS(hw_compat_9_2);
91
92
GlobalProperty hw_compat_9_1[] = {
93
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
94
index XXXXXXX..XXXXXXX 100644
95
--- a/target/arm/cpu.c
96
+++ b/target/arm/cpu.c
97
@@ -XXX,XX +XXX,XX @@ static const Property arm_cpu_properties[] = {
98
DEFINE_PROP_INT32("core-count", ARMCPU, core_count, -1),
99
/* True to default to the backward-compat old CNTFRQ rather than 1Ghz */
100
DEFINE_PROP_BOOL("backcompat-cntfrq", ARMCPU, backcompat_cntfrq, false),
101
+ DEFINE_PROP_BOOL("backcompat-pauth-default-use-qarma5", ARMCPU,
102
+ backcompat_pauth_default_use_qarma5, false),
103
};
104
105
static const gchar *arm_gdb_arch_name(CPUState *cs)
106
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
107
index XXXXXXX..XXXXXXX 100644
108
--- a/target/arm/cpu64.c
109
+++ b/target/arm/cpu64.c
110
@@ -XXX,XX +XXX,XX @@ void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp)
111
return;
112
}
113
114
- if (cpu->prop_pauth_impdef) {
115
- isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, API, features);
116
- isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPI, 1);
117
+ bool use_default = !cpu->prop_pauth_qarma5 &&
118
+ !cpu->prop_pauth_qarma3 &&
119
+ !cpu->prop_pauth_impdef;
120
+
121
+ if (cpu->prop_pauth_qarma5 ||
122
+ (use_default &&
123
+ cpu->backcompat_pauth_default_use_qarma5)) {
124
+ isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, APA, features);
125
+ isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPA, 1);
126
} else if (cpu->prop_pauth_qarma3) {
127
isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, APA3, features);
128
isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, GPA3, 1);
129
- } else { /* default is pauth-qarma5 */
130
- isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, APA, features);
131
- isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPA, 1);
132
+ } else if (cpu->prop_pauth_impdef ||
133
+ (use_default &&
134
+ !cpu->backcompat_pauth_default_use_qarma5)) {
135
+ isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, API, features);
136
+ isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPI, 1);
137
+ } else {
138
+ g_assert_not_reached();
139
}
140
} else if (cpu->prop_pauth_impdef ||
141
cpu->prop_pauth_qarma3 ||
29
--
142
--
30
2.34.1
143
2.34.1
diff view generated by jsdifflib
1
From: Mostafa Saleh <smostafa@google.com>
1
From: Pierrick Bouvier <pierrick.bouvier@linaro.org>
2
2
3
SMMUv3 OAS is currently hardcoded in the code to 44 bits, for nested
3
Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
4
configurations that can be a problem, as stage-2 might be shared with
4
Message-id: 20241219183211.3493974-4-pierrick.bouvier@linaro.org
5
the CPU which might have different PARANGE, and according to SMMU manual
5
[PMM: Removed a paragraph about using non-versioned models.]
6
ARM IHI 0070F.b:
7
6.3.6 SMMU_IDR5, OAS must match the system physical address size.
8
9
This patch doesn't change the SMMU OAS, but refactors the code to
10
make it easier to do that:
11
- Rely everywhere on IDR5 for reading OAS instead of using the
12
SMMU_IDR5_OAS macro, so, it is easier just to change IDR5 and
13
it propagages correctly.
14
- Add additional checks when OAS is greater than 48bits.
15
- Remove unused functions/macros: pa_range/MAX_PA.
16
17
Reviewed-by: Eric Auger <eric.auger@redhat.com>
18
Signed-off-by: Mostafa Saleh <smostafa@google.com>
19
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
20
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
21
Message-id: 20240715084519.1189624-19-smostafa@google.com
22
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
23
---
7
---
24
hw/arm/smmuv3-internal.h | 13 -------------
8
docs/system/arm/virt.rst | 4 ++++
25
hw/arm/smmu-common.c | 7 ++++---
9
1 file changed, 4 insertions(+)
26
hw/arm/smmuv3.c | 35 ++++++++++++++++++++++++++++-------
27
3 files changed, 32 insertions(+), 23 deletions(-)
28
10
29
diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
11
diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst
30
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
31
--- a/hw/arm/smmuv3-internal.h
13
--- a/docs/system/arm/virt.rst
32
+++ b/hw/arm/smmuv3-internal.h
14
+++ b/docs/system/arm/virt.rst
33
@@ -XXX,XX +XXX,XX @@ static inline int oas2bits(int oas_field)
15
@@ -XXX,XX +XXX,XX @@ of the 5.0 release and ``virt-5.0`` of the 5.1 release. Migration
34
return -1;
16
is not guaranteed to work between different QEMU releases for
35
}
17
the non-versioned ``virt`` machine type.
36
18
37
-static inline int pa_range(STE *ste)
19
+VM migration is not guaranteed when using ``-cpu max``, as features
38
-{
20
+supported may change between QEMU versions. To ensure your VM can be
39
- int oas_field = MIN(STE_S2PS(ste), SMMU_IDR5_OAS);
21
+migrated, it is recommended to use another cpu model instead.
40
-
41
- if (!STE_S2AA64(ste)) {
42
- return 40;
43
- }
44
-
45
- return oas2bits(oas_field);
46
-}
47
-
48
-#define MAX_PA(ste) ((1 << pa_range(ste)) - 1)
49
-
50
/* CD fields */
51
52
#define CD_VALID(x) extract32((x)->word[0], 31, 1)
53
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
54
index XXXXXXX..XXXXXXX 100644
55
--- a/hw/arm/smmu-common.c
56
+++ b/hw/arm/smmu-common.c
57
@@ -XXX,XX +XXX,XX @@ static int smmu_ptw_64_s1(SMMUState *bs, SMMUTransCfg *cfg,
58
inputsize = 64 - tt->tsz;
59
level = 4 - (inputsize - 4) / stride;
60
indexmask = VMSA_IDXMSK(inputsize, stride, level);
61
- baseaddr = extract64(tt->ttb, 0, 48);
62
+
22
+
63
+ baseaddr = extract64(tt->ttb, 0, cfg->oas);
23
Supported devices
64
baseaddr &= ~indexmask;
24
"""""""""""""""""
65
66
while (level < VMSA_LEVELS) {
67
@@ -XXX,XX +XXX,XX @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
68
* Get the ttb from concatenated structure.
69
* The offset is the idx * size of each ttb(number of ptes * (sizeof(pte))
70
*/
71
- uint64_t baseaddr = extract64(cfg->s2cfg.vttb, 0, 48) + (1 << stride) *
72
- idx * sizeof(uint64_t);
73
+ uint64_t baseaddr = extract64(cfg->s2cfg.vttb, 0, cfg->s2cfg.eff_ps) +
74
+ (1 << stride) * idx * sizeof(uint64_t);
75
dma_addr_t indexmask = VMSA_IDXMSK(inputsize, stride, level);
76
77
baseaddr &= ~indexmask;
78
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
79
index XXXXXXX..XXXXXXX 100644
80
--- a/hw/arm/smmuv3.c
81
+++ b/hw/arm/smmuv3.c
82
@@ -XXX,XX +XXX,XX @@ static bool s2t0sz_valid(SMMUTransCfg *cfg)
83
}
84
85
if (cfg->s2cfg.granule_sz == 16) {
86
- return (cfg->s2cfg.tsz >= 64 - oas2bits(SMMU_IDR5_OAS));
87
+ return (cfg->s2cfg.tsz >= 64 - cfg->s2cfg.eff_ps);
88
}
89
90
- return (cfg->s2cfg.tsz >= MAX(64 - oas2bits(SMMU_IDR5_OAS), 16));
91
+ return (cfg->s2cfg.tsz >= MAX(64 - cfg->s2cfg.eff_ps, 16));
92
}
93
94
/*
95
@@ -XXX,XX +XXX,XX @@ static bool s2_pgtable_config_valid(uint8_t sl0, uint8_t t0sz, uint8_t gran)
96
return nr_concat <= VMSA_MAX_S2_CONCAT;
97
}
98
99
-static int decode_ste_s2_cfg(SMMUTransCfg *cfg, STE *ste)
100
+static int decode_ste_s2_cfg(SMMUv3State *s, SMMUTransCfg *cfg,
101
+ STE *ste)
102
{
103
+ uint8_t oas = FIELD_EX32(s->idr[5], IDR5, OAS);
104
+
105
if (STE_S2AA64(ste) == 0x0) {
106
qemu_log_mask(LOG_UNIMP,
107
"SMMUv3 AArch32 tables not supported\n");
108
@@ -XXX,XX +XXX,XX @@ static int decode_ste_s2_cfg(SMMUTransCfg *cfg, STE *ste)
109
}
110
111
/* For AA64, The effective S2PS size is capped to the OAS. */
112
- cfg->s2cfg.eff_ps = oas2bits(MIN(STE_S2PS(ste), SMMU_IDR5_OAS));
113
+ cfg->s2cfg.eff_ps = oas2bits(MIN(STE_S2PS(ste), oas));
114
+ /*
115
+ * For SMMUv3.1 and later, when OAS == IAS == 52, the stage 2 input
116
+ * range is further limited to 48 bits unless STE.S2TG indicates a
117
+ * 64KB granule.
118
+ */
119
+ if (cfg->s2cfg.granule_sz != 16) {
120
+ cfg->s2cfg.eff_ps = MIN(cfg->s2cfg.eff_ps, 48);
121
+ }
122
/*
123
* It is ILLEGAL for the address in S2TTB to be outside the range
124
* described by the effective S2PS value.
125
@@ -XXX,XX +XXX,XX @@ static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg,
126
STE *ste, SMMUEventInfo *event)
127
{
128
uint32_t config;
129
+ uint8_t oas = FIELD_EX32(s->idr[5], IDR5, OAS);
130
int ret;
131
132
if (!STE_VALID(ste)) {
133
@@ -XXX,XX +XXX,XX @@ static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg,
134
* Stage-1 OAS defaults to OAS even if not enabled as it would be used
135
* in input address check for stage-2.
136
*/
137
- cfg->oas = oas2bits(SMMU_IDR5_OAS);
138
- ret = decode_ste_s2_cfg(cfg, ste);
139
+ cfg->oas = oas2bits(oas);
140
+ ret = decode_ste_s2_cfg(s, cfg, ste);
141
if (ret) {
142
goto bad_ste;
143
}
144
@@ -XXX,XX +XXX,XX @@ static int decode_cd(SMMUv3State *s, SMMUTransCfg *cfg,
145
int i;
146
SMMUTranslationStatus status;
147
SMMUTLBEntry *entry;
148
+ uint8_t oas = FIELD_EX32(s->idr[5], IDR5, OAS);
149
150
if (!CD_VALID(cd) || !CD_AARCH64(cd)) {
151
goto bad_cd;
152
@@ -XXX,XX +XXX,XX @@ static int decode_cd(SMMUv3State *s, SMMUTransCfg *cfg,
153
cfg->aa64 = true;
154
155
cfg->oas = oas2bits(CD_IPS(cd));
156
- cfg->oas = MIN(oas2bits(SMMU_IDR5_OAS), cfg->oas);
157
+ cfg->oas = MIN(oas2bits(oas), cfg->oas);
158
cfg->tbi = CD_TBI(cd);
159
cfg->asid = CD_ASID(cd);
160
cfg->affd = CD_AFFD(cd);
161
@@ -XXX,XX +XXX,XX @@ static int decode_cd(SMMUv3State *s, SMMUTransCfg *cfg,
162
goto bad_cd;
163
}
164
165
+ /*
166
+ * An address greater than 48 bits in size can only be output from a
167
+ * TTD when, in SMMUv3.1 and later, the effective IPS is 52 and a 64KB
168
+ * granule is in use for that translation table
169
+ */
170
+ if (tt->granule_sz != 16) {
171
+ cfg->oas = MIN(cfg->oas, 48);
172
+ }
173
tt->tsz = tsz;
174
tt->ttb = CD_TTB(cd, i);
175
25
176
--
26
--
177
2.34.1
27
2.34.1
178
179
diff view generated by jsdifflib