1 | The following changes since commit 11b8920ed2093848f79f93d106afe8a69a61a523: | 1 | The following changes since commit 3214bec13d8d4c40f707d21d8350d04e4123ae97: |
---|---|---|---|
2 | 2 | ||
3 | Merge tag 'pull-request-2024-11-04' of https://gitlab.com/thuth/qemu into staging (2024-11-04 17:37:59 +0000) | 3 | Merge tag 'migration-20250110-pull-request' of https://gitlab.com/farosas/qemu into staging (2025-01-10 13:39:19 -0500) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20241105 | 7 | https://git.linaro.org/people/pmaydell/qemu-arm.git tags/pull-target-arm-20250113 |
8 | 8 | ||
9 | for you to fetch changes up to 374cdc8efe4a039510cca47e8399d54a1aeb4f2d: | 9 | for you to fetch changes up to 435d260e7ec5ff9c79e3e62f1d66ec82d2d691ae: |
10 | 10 | ||
11 | target/arm: Enable FEAT_CMOW for -cpu max (2024-11-05 10:10:00 +0000) | 11 | docs/system/arm/virt: mention specific migration information (2025-01-13 12:35:35 +0000) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | target-arm queue: | 14 | target-arm queue: |
15 | * Fix MMU indexes for AArch32 Secure PL1&0 in a less complex and buggy way | 15 | * hw/arm_sysctl: fix extracting 31th bit of val |
16 | * Fix SVE SDOT/UDOT/USDOT (4-way, indexed) | 16 | * hw/misc: cast rpm to uint64_t |
17 | * softfloat: set 2-operand NaN propagation rule at runtime | 17 | * tests/qtest/boot-serial-test: Improve ASM |
18 | * disas: Fix build against Capstone v6 (again) | 18 | * target/arm: Move minor arithmetic helpers out of helper.c |
19 | * hw/rtc/ds1338: Trace send and receive operations | 19 | * target/arm: change default pauth algorithm to impdef |
20 | * hw/timer/imx_gpt: Convert DPRINTF to trace events | ||
21 | * hw/watchdog/wdt_imx2: Remove redundant assignment | ||
22 | * hw/sensor/tmp105: Convert printf() to trace event, add tracing for read/write access | ||
23 | * hw/net/npcm_gmac: Change error log to trace event | ||
24 | * target/arm: Enable FEAT_CMOW for -cpu max | ||
25 | 20 | ||
26 | ---------------------------------------------------------------- | 21 | ---------------------------------------------------------------- |
27 | Bernhard Beschow (4): | 22 | Anastasia Belova (1): |
28 | hw/rtc/ds1338: Trace send and receive operations | 23 | hw/arm_sysctl: fix extracting 31th bit of val |
29 | hw/timer/imx_gpt: Convert DPRINTF to trace events | ||
30 | hw/watchdog/wdt_imx2: Remove redundant assignment | ||
31 | hw/sensor/tmp105: Convert printf() to trace event, add tracing for read/write access | ||
32 | 24 | ||
33 | Gustavo Romero (1): | 25 | Peter Maydell (2): |
34 | target/arm: Enable FEAT_CMOW for -cpu max | 26 | target/arm: Move minor arithmetic helpers out of helper.c |
27 | tests/tcg/aarch64: force qarma5 for pauth-3 test | ||
35 | 28 | ||
36 | Nabih Estefan (1): | 29 | Philippe Mathieu-Daudé (4): |
37 | hw/net/npcm_gmac: Change error log to trace event | 30 | tests/qtest/boot-serial-test: Improve ASM comments of PL011 tests |
31 | tests/qtest/boot-serial-test: Reduce for() loop in PL011 tests | ||
32 | tests/qtest/boot-serial-test: Reorder pair of instructions in PL011 test | ||
33 | tests/qtest/boot-serial-test: Initialize PL011 Control register | ||
38 | 34 | ||
39 | Peter Maydell (24): | 35 | Pierrick Bouvier (3): |
40 | softfloat: Allow 2-operand NaN propagation rule to be set at runtime | 36 | target/arm: add new property to select pauth-qarma5 |
41 | tests/fp: Explicitly set 2-NaN propagation rule | 37 | target/arm: change default pauth algorithm to impdef |
42 | target/arm: Explicitly set 2-NaN propagation rule | 38 | docs/system/arm/virt: mention specific migration information |
43 | target/mips: Explicitly set 2-NaN propagation rule | ||
44 | target/loongarch: Explicitly set 2-NaN propagation rule | ||
45 | target/hppa: Explicitly set 2-NaN propagation rule | ||
46 | target/s390x: Explicitly set 2-NaN propagation rule | ||
47 | target/ppc: Explicitly set 2-NaN propagation rule | ||
48 | target/m68k: Explicitly set 2-NaN propagation rule | ||
49 | target/m68k: Initialize float_status fields in gdb set/get functions | ||
50 | target/sparc: Move cpu_put_fsr(env, 0) call to reset | ||
51 | target/sparc: Explicitly set 2-NaN propagation rule | ||
52 | target/xtensa: Factor out calls to set_use_first_nan() | ||
53 | target/xtensa: Explicitly set 2-NaN propagation rule | ||
54 | target/i386: Set 2-NaN propagation rule explicitly | ||
55 | target/alpha: Explicitly set 2-NaN propagation rule | ||
56 | target/microblaze: Move setting of float rounding mode to reset | ||
57 | target/microblaze: Explicitly set 2-NaN propagation rule | ||
58 | target/openrisc: Explicitly set 2-NaN propagation rule | ||
59 | target/rx: Explicitly set 2-NaN propagation rule | ||
60 | softfloat: Remove fallback rule from pickNaN() | ||
61 | Revert "target/arm: Fix usage of MMU indexes when EL3 is AArch32" | ||
62 | target/arm: Add new MMU indexes for AArch32 Secure PL1&0 | ||
63 | target/arm: Fix SVE SDOT/UDOT/USDOT (4-way, indexed) | ||
64 | 39 | ||
65 | Richard Henderson (1): | 40 | Tigran Sogomonian (1): |
66 | disas: Fix build against Capstone v6 (again) | 41 | hw/misc: cast rpm to uint64_t |
67 | 42 | ||
68 | docs/system/arm/emulation.rst | 1 + | 43 | docs/system/arm/cpu-features.rst | 7 +- |
69 | meson.build | 1 + | 44 | docs/system/arm/virt.rst | 4 + |
70 | hw/sensor/trace.h | 1 + | 45 | docs/system/introduction.rst | 2 +- |
71 | include/disas/capstone.h | 1 + | 46 | target/arm/cpu.h | 4 + |
72 | include/fpu/softfloat-helpers.h | 11 +++ | 47 | hw/core/machine.c | 4 +- |
73 | include/fpu/softfloat-types.h | 38 ++++++++++ | 48 | hw/misc/arm_sysctl.c | 2 +- |
74 | target/arm/cpu-features.h | 5 ++ | 49 | hw/misc/npcm7xx_mft.c | 5 +- |
75 | target/arm/cpu.h | 49 ++++++------ | 50 | target/arm/arm-qmp-cmds.c | 2 +- |
76 | target/arm/internals.h | 41 +++++----- | 51 | target/arm/cpu.c | 2 + |
77 | target/arm/tcg/translate.h | 2 - | 52 | target/arm/cpu64.c | 38 ++- |
78 | target/i386/cpu.h | 3 + | 53 | target/arm/helper.c | 285 ----------------------- |
79 | target/mips/fpu_helper.h | 22 ++++++ | 54 | target/arm/tcg/arith_helper.c | 296 ++++++++++++++++++++++++ |
80 | target/xtensa/cpu.h | 6 ++ | 55 | tests/qtest/arm-cpu-features.c | 15 +- |
81 | hw/net/npcm_gmac.c | 5 +- | 56 | tests/qtest/boot-serial-test.c | 23 +- |
82 | hw/rtc/ds1338.c | 6 ++ | 57 | target/arm/{op_addsub.h => tcg/op_addsub.c.inc} | 0 |
83 | hw/sensor/tmp105.c | 7 +- | 58 | target/arm/tcg/meson.build | 1 + |
84 | hw/timer/imx_gpt.c | 18 ++--- | 59 | tests/tcg/aarch64/Makefile.softmmu-target | 3 + |
85 | hw/watchdog/wdt_imx2.c | 1 - | 60 | 17 files changed, 377 insertions(+), 316 deletions(-) |
86 | linux-user/arm/nwfpe/fpa11.c | 18 +++++ | 61 | create mode 100644 target/arm/tcg/arith_helper.c |
87 | target/alpha/cpu.c | 11 +++ | 62 | rename target/arm/{op_addsub.h => tcg/op_addsub.c.inc} (100%) |
88 | target/arm/cpu.c | 25 ++++-- | 63 | |
89 | target/arm/helper.c | 73 ++++++++++++------ | ||
90 | target/arm/ptw.c | 10 +-- | ||
91 | target/arm/tcg/cpu64.c | 1 + | ||
92 | target/arm/tcg/hflags.c | 4 - | ||
93 | target/arm/tcg/op_helper.c | 14 +++- | ||
94 | target/arm/tcg/translate-a64.c | 2 +- | ||
95 | target/arm/tcg/translate.c | 12 +-- | ||
96 | target/arm/tcg/vec_helper.c | 9 ++- | ||
97 | target/hppa/fpu_helper.c | 6 ++ | ||
98 | target/i386/cpu.c | 4 + | ||
99 | target/i386/tcg/fpu_helper.c | 40 ++++++++++ | ||
100 | target/loongarch/tcg/fpu_helper.c | 1 + | ||
101 | target/m68k/cpu.c | 16 ++++ | ||
102 | target/m68k/fpu_helper.c | 1 + | ||
103 | target/m68k/helper.c | 4 +- | ||
104 | target/microblaze/cpu.c | 10 ++- | ||
105 | target/mips/cpu.c | 2 +- | ||
106 | target/mips/msa.c | 17 +++++ | ||
107 | target/openrisc/cpu.c | 6 ++ | ||
108 | target/ppc/cpu_init.c | 8 ++ | ||
109 | target/rx/cpu.c | 7 ++ | ||
110 | target/s390x/cpu.c | 1 + | ||
111 | target/sparc/cpu.c | 10 ++- | ||
112 | target/sparc/fop_helper.c | 10 ++- | ||
113 | target/xtensa/cpu.c | 2 +- | ||
114 | target/xtensa/fpu_helper.c | 35 +++++---- | ||
115 | tests/fp/fp-bench.c | 2 + | ||
116 | tests/fp/fp-test-log2.c | 1 + | ||
117 | tests/fp/fp-test.c | 2 + | ||
118 | fpu/softfloat-specialize.c.inc | 156 ++++++++++++++------------------------ | ||
119 | hw/net/trace-events | 1 + | ||
120 | hw/rtc/trace-events | 4 + | ||
121 | hw/sensor/trace-events | 6 ++ | ||
122 | hw/timer/trace-events | 6 ++ | ||
123 | 55 files changed, 516 insertions(+), 239 deletions(-) | ||
124 | create mode 100644 hw/sensor/trace.h | ||
125 | create mode 100644 hw/sensor/trace-events | diff view generated by jsdifflib |
1 | From: Bernhard Beschow <shentey@gmail.com> | 1 | From: Anastasia Belova <abelova@astralinux.ru> |
---|---|---|---|
2 | 2 | ||
3 | The same statement is executed unconditionally right before the if statement. | 3 | 1 << 31 is casted to uint64_t while bitwise and with val. |
4 | So this value may become 0xffffffff80000000 but only | ||
5 | 31th "start" bit is required. | ||
4 | 6 | ||
5 | Cc: Guenter Roeck <linux@roeck-us.net> | 7 | This is not possible in practice because the MemoryRegionOps |
6 | Reviewed-by: Guenter Roeck <linux@roeck-us.net> | 8 | uses the default max access size of 4 bytes and so none |
7 | Signed-off-by: Bernhard Beschow <shentey@gmail.com> | 9 | of the upper bytes of val will be set, but the bitfield |
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 10 | extract API is clearer anyway. |
9 | Message-id: 20241103143330.123596-4-shentey@gmail.com | 11 | |
12 | Use the bitfield extract() API instead. | ||
13 | |||
14 | Found by Linux Verification Center (linuxtesting.org) with SVACE. | ||
15 | |||
16 | Signed-off-by: Anastasia Belova <abelova@astralinux.ru> | ||
17 | Message-id: 20241220125429.7552-1-abelova@astralinux.ru | ||
18 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
19 | [PMM: add clarification to commit message] | ||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 20 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
11 | --- | 21 | --- |
12 | hw/watchdog/wdt_imx2.c | 1 - | 22 | hw/misc/arm_sysctl.c | 2 +- |
13 | 1 file changed, 1 deletion(-) | 23 | 1 file changed, 1 insertion(+), 1 deletion(-) |
14 | 24 | ||
15 | diff --git a/hw/watchdog/wdt_imx2.c b/hw/watchdog/wdt_imx2.c | 25 | diff --git a/hw/misc/arm_sysctl.c b/hw/misc/arm_sysctl.c |
16 | index XXXXXXX..XXXXXXX 100644 | 26 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/hw/watchdog/wdt_imx2.c | 27 | --- a/hw/misc/arm_sysctl.c |
18 | +++ b/hw/watchdog/wdt_imx2.c | 28 | +++ b/hw/misc/arm_sysctl.c |
19 | @@ -XXX,XX +XXX,XX @@ static void imx2_wdt_expired(void *opaque) | 29 | @@ -XXX,XX +XXX,XX @@ static void arm_sysctl_write(void *opaque, hwaddr offset, |
20 | 30 | * as zero. | |
21 | /* Perform watchdog action if watchdog is enabled */ | 31 | */ |
22 | if (s->wcr & IMX2_WDT_WCR_WDE) { | 32 | s->sys_cfgctrl = val & ~((3 << 18) | (1 << 31)); |
23 | - s->wrsr = IMX2_WDT_WRSR_TOUT; | 33 | - if (val & (1 << 31)) { |
24 | watchdog_perform_action(); | 34 | + if (extract64(val, 31, 1)) { |
25 | } | 35 | /* Start bit set -- actually do something */ |
26 | } | 36 | unsigned int dcc = extract32(s->sys_cfgctrl, 26, 4); |
37 | unsigned int function = extract32(s->sys_cfgctrl, 20, 6); | ||
27 | -- | 38 | -- |
28 | 2.34.1 | 39 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Bernhard Beschow <shentey@gmail.com> | 1 | From: Tigran Sogomonian <tsogomonian@astralinux.ru> |
---|---|---|---|
2 | 2 | ||
3 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | 3 | The value of an arithmetic expression |
4 | Signed-off-by: Bernhard Beschow <shentey@gmail.com> | 4 | 'rpm * NPCM7XX_MFT_PULSE_PER_REVOLUTION' is a subject |
5 | Message-id: 20241103143330.123596-3-shentey@gmail.com | 5 | to overflow because its operands are not cast to |
6 | a larger data type before performing arithmetic. Thus, need | ||
7 | to cast rpm to uint64_t. | ||
8 | |||
9 | Found by Linux Verification Center (linuxtesting.org) with SVACE. | ||
10 | |||
11 | Signed-off-by: Tigran Sogomonian <tsogomonian@astralinux.ru> | ||
12 | Reviewed-by: Patrick Leis <venture@google.com> | ||
13 | Reviewed-by: Hao Wu <wuhaotsh@google.com> | ||
14 | Message-id: 20241226130311.1349-1-tsogomonian@astralinux.ru | ||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
7 | --- | 16 | --- |
8 | hw/timer/imx_gpt.c | 18 +++++------------- | 17 | hw/misc/npcm7xx_mft.c | 5 +++-- |
9 | hw/timer/trace-events | 6 ++++++ | 18 | 1 file changed, 3 insertions(+), 2 deletions(-) |
10 | 2 files changed, 11 insertions(+), 13 deletions(-) | ||
11 | 19 | ||
12 | diff --git a/hw/timer/imx_gpt.c b/hw/timer/imx_gpt.c | 20 | diff --git a/hw/misc/npcm7xx_mft.c b/hw/misc/npcm7xx_mft.c |
13 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/hw/timer/imx_gpt.c | 22 | --- a/hw/misc/npcm7xx_mft.c |
15 | +++ b/hw/timer/imx_gpt.c | 23 | +++ b/hw/misc/npcm7xx_mft.c |
16 | @@ -XXX,XX +XXX,XX @@ | 24 | @@ -XXX,XX +XXX,XX @@ static NPCM7xxMFTCaptureState npcm7xx_mft_compute_cnt( |
17 | #include "migration/vmstate.h" | 25 | * RPM = revolution/min. The time for one revlution (in ns) is |
18 | #include "qemu/module.h" | 26 | * MINUTE_TO_NANOSECOND / RPM. |
19 | #include "qemu/log.h" | 27 | */ |
20 | +#include "trace.h" | 28 | - count = clock_ns_to_ticks(clock, (60 * NANOSECONDS_PER_SECOND) / |
21 | 29 | - (rpm * NPCM7XX_MFT_PULSE_PER_REVOLUTION)); | |
22 | #ifndef DEBUG_IMX_GPT | 30 | + count = clock_ns_to_ticks(clock, |
23 | #define DEBUG_IMX_GPT 0 | 31 | + (uint64_t)(60 * NANOSECONDS_PER_SECOND) / |
24 | #endif | 32 | + ((uint64_t)rpm * NPCM7XX_MFT_PULSE_PER_REVOLUTION)); |
25 | |||
26 | -#define DPRINTF(fmt, args...) \ | ||
27 | - do { \ | ||
28 | - if (DEBUG_IMX_GPT) { \ | ||
29 | - fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_GPT, \ | ||
30 | - __func__, ##args); \ | ||
31 | - } \ | ||
32 | - } while (0) | ||
33 | - | ||
34 | static const char *imx_gpt_reg_name(uint32_t reg) | ||
35 | { | ||
36 | switch (reg) { | ||
37 | @@ -XXX,XX +XXX,XX @@ static void imx_gpt_set_freq(IMXGPTState *s) | ||
38 | s->freq = imx_ccm_get_clock_frequency(s->ccm, | ||
39 | s->clocks[clksrc]) / (1 + s->pr); | ||
40 | |||
41 | - DPRINTF("Setting clksrc %d to frequency %d\n", clksrc, s->freq); | ||
42 | + trace_imx_gpt_set_freq(clksrc, s->freq); | ||
43 | |||
44 | if (s->freq) { | ||
45 | ptimer_set_freq(s->timer, s->freq); | ||
46 | @@ -XXX,XX +XXX,XX @@ static uint64_t imx_gpt_read(void *opaque, hwaddr offset, unsigned size) | ||
47 | break; | ||
48 | } | 33 | } |
49 | 34 | ||
50 | - DPRINTF("(%s) = 0x%08x\n", imx_gpt_reg_name(offset >> 2), reg_value); | 35 | if (count > NPCM7XX_MFT_MAX_CNT) { |
51 | + trace_imx_gpt_read(imx_gpt_reg_name(offset >> 2), reg_value); | ||
52 | |||
53 | return reg_value; | ||
54 | } | ||
55 | @@ -XXX,XX +XXX,XX @@ static void imx_gpt_write(void *opaque, hwaddr offset, uint64_t value, | ||
56 | IMXGPTState *s = IMX_GPT(opaque); | ||
57 | uint32_t oldreg; | ||
58 | |||
59 | - DPRINTF("(%s, value = 0x%08x)\n", imx_gpt_reg_name(offset >> 2), | ||
60 | - (uint32_t)value); | ||
61 | + trace_imx_gpt_write(imx_gpt_reg_name(offset >> 2), (uint32_t)value); | ||
62 | |||
63 | switch (offset >> 2) { | ||
64 | case 0: | ||
65 | @@ -XXX,XX +XXX,XX @@ static void imx_gpt_timeout(void *opaque) | ||
66 | { | ||
67 | IMXGPTState *s = IMX_GPT(opaque); | ||
68 | |||
69 | - DPRINTF("\n"); | ||
70 | + trace_imx_gpt_timeout(); | ||
71 | |||
72 | s->sr |= s->next_int; | ||
73 | s->next_int = 0; | ||
74 | diff --git a/hw/timer/trace-events b/hw/timer/trace-events | ||
75 | index XXXXXXX..XXXXXXX 100644 | ||
76 | --- a/hw/timer/trace-events | ||
77 | +++ b/hw/timer/trace-events | ||
78 | @@ -XXX,XX +XXX,XX @@ cmsdk_apb_dualtimer_read(uint64_t offset, uint64_t data, unsigned size) "CMSDK A | ||
79 | cmsdk_apb_dualtimer_write(uint64_t offset, uint64_t data, unsigned size) "CMSDK APB dualtimer write: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u" | ||
80 | cmsdk_apb_dualtimer_reset(void) "CMSDK APB dualtimer: reset" | ||
81 | |||
82 | +# imx_gpt.c | ||
83 | +imx_gpt_set_freq(uint32_t clksrc, uint32_t freq) "Setting clksrc %u to %u Hz" | ||
84 | +imx_gpt_read(const char *name, uint64_t value) "%s -> 0x%08" PRIx64 | ||
85 | +imx_gpt_write(const char *name, uint64_t value) "%s <- 0x%08" PRIx64 | ||
86 | +imx_gpt_timeout(void) "" | ||
87 | + | ||
88 | # npcm7xx_timer.c | ||
89 | npcm7xx_timer_read(const char *id, uint64_t offset, uint64_t value) " %s offset: 0x%04" PRIx64 " value 0x%08" PRIx64 | ||
90 | npcm7xx_timer_write(const char *id, uint64_t offset, uint64_t value) "%s offset: 0x%04" PRIx64 " value 0x%08" PRIx64 | ||
91 | -- | 36 | -- |
92 | 2.34.1 | 37 | 2.34.1 |
93 | |||
94 | diff view generated by jsdifflib |
1 | From: Nabih Estefan <nabihestefan@google.com> | 1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Convert the LOG_GUEST_ERROR for the "tx descriptor is owned | 3 | Re-indent ASM comments adding the 'loop:' label. |
4 | by software" to a trace message. This condition is normal | ||
5 | when there is there is nothing to transmit, and we would | ||
6 | otherwise spam the logs with it in that situation. | ||
7 | 4 | ||
8 | Signed-off-by: Nabih Estefan <nabihestefan@google.com> | 5 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
9 | Signed-off-by: Roque Arcudia Hernandez <roqueh@google.com> | 6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
10 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | 7 | Reviewed-by: Fabiano Rosas <farosas@suse.de> |
11 | Message-id: 20241014184847.1594056-1-roqueh@google.com | ||
12 | [PMM: tweaked commit message] | ||
13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
14 | --- | 9 | --- |
15 | hw/net/npcm_gmac.c | 5 ++--- | 10 | tests/qtest/boot-serial-test.c | 18 +++++++++--------- |
16 | hw/net/trace-events | 1 + | 11 | 1 file changed, 9 insertions(+), 9 deletions(-) |
17 | 2 files changed, 3 insertions(+), 3 deletions(-) | ||
18 | 12 | ||
19 | diff --git a/hw/net/npcm_gmac.c b/hw/net/npcm_gmac.c | 13 | diff --git a/tests/qtest/boot-serial-test.c b/tests/qtest/boot-serial-test.c |
20 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/hw/net/npcm_gmac.c | 15 | --- a/tests/qtest/boot-serial-test.c |
22 | +++ b/hw/net/npcm_gmac.c | 16 | +++ b/tests/qtest/boot-serial-test.c |
23 | @@ -XXX,XX +XXX,XX @@ static void gmac_try_send_next_packet(NPCMGMACState *gmac) | 17 | @@ -XXX,XX +XXX,XX @@ static const uint8_t kernel_plml605[] = { |
24 | 18 | }; | |
25 | /* 1 = DMA Owned, 0 = Software Owned */ | 19 | |
26 | if (!(tx_desc.tdes0 & TX_DESC_TDES0_OWN)) { | 20 | static const uint8_t bios_raspi2[] = { |
27 | - qemu_log_mask(LOG_GUEST_ERROR, | 21 | - 0x08, 0x30, 0x9f, 0xe5, /* ldr r3,[pc,#8] Get base */ |
28 | - "TX Descriptor @ 0x%x is owned by software\n", | 22 | - 0x54, 0x20, 0xa0, 0xe3, /* mov r2,#'T' */ |
29 | - desc_addr); | 23 | - 0x00, 0x20, 0xc3, 0xe5, /* strb r2,[r3] */ |
30 | + trace_npcm_gmac_tx_desc_owner(DEVICE(gmac)->canonical_path, | 24 | - 0xfb, 0xff, 0xff, 0xea, /* b loop */ |
31 | + desc_addr); | 25 | - 0x00, 0x10, 0x20, 0x3f, /* 0x3f201000 = UART0 base addr */ |
32 | gmac->regs[R_NPCM_DMA_STATUS] |= NPCM_DMA_STATUS_TU; | 26 | + 0x08, 0x30, 0x9f, 0xe5, /* loop: ldr r3, [pc, #8] Get &UART0 */ |
33 | gmac_dma_set_state(gmac, NPCM_DMA_STATUS_TX_PROCESS_STATE_SHIFT, | 27 | + 0x54, 0x20, 0xa0, 0xe3, /* mov r2, #'T' */ |
34 | NPCM_DMA_STATUS_TX_SUSPENDED_STATE); | 28 | + 0x00, 0x20, 0xc3, 0xe5, /* strb r2, [r3] *TXDAT = 'T' */ |
35 | diff --git a/hw/net/trace-events b/hw/net/trace-events | 29 | + 0xfb, 0xff, 0xff, 0xea, /* b -12 (loop) */ |
36 | index XXXXXXX..XXXXXXX 100644 | 30 | + 0x00, 0x10, 0x20, 0x3f, /* UART0: 0x3f201000 */ |
37 | --- a/hw/net/trace-events | 31 | }; |
38 | +++ b/hw/net/trace-events | 32 | |
39 | @@ -XXX,XX +XXX,XX @@ npcm_gmac_packet_received(const char* name, uint32_t len) "%s: Reception finishe | 33 | static const uint8_t kernel_aarch64[] = { |
40 | npcm_gmac_packet_sent(const char* name, uint16_t len) "%s: TX packet sent!, length: 0x%04" PRIX16 | 34 | - 0x81, 0x0a, 0x80, 0x52, /* mov w1, #0x54 */ |
41 | npcm_gmac_debug_desc_data(const char* name, void* addr, uint32_t des0, uint32_t des1, uint32_t des2, uint32_t des3)"%s: Address: %p Descriptor 0: 0x%04" PRIX32 " Descriptor 1: 0x%04" PRIX32 "Descriptor 2: 0x%04" PRIX32 " Descriptor 3: 0x%04" PRIX32 | 35 | - 0x02, 0x20, 0xa1, 0xd2, /* mov x2, #0x9000000 */ |
42 | npcm_gmac_packet_tx_desc_data(const char* name, uint32_t tdes0, uint32_t tdes1) "%s: Tdes0: 0x%04" PRIX32 " Tdes1: 0x%04" PRIX32 | 36 | - 0x41, 0x00, 0x00, 0x39, /* strb w1, [x2] */ |
43 | +npcm_gmac_tx_desc_owner(const char* name, uint32_t desc_addr) "%s: TX Descriptor @0x%04" PRIX32 " is owned by software" | 37 | - 0xfd, 0xff, 0xff, 0x17, /* b -12 (loop) */ |
44 | 38 | + 0x81, 0x0a, 0x80, 0x52, /* loop: mov w1, #'T' */ | |
45 | # npcm_pcs.c | 39 | + 0x02, 0x20, 0xa1, 0xd2, /* mov x2, #0x9000000 Load UART0 */ |
46 | npcm_pcs_reg_read(const char *name, uint16_t indirect_access_baes, uint64_t offset, uint16_t value) "%s: IND: 0x%02" PRIx16 " offset: 0x%04" PRIx64 " value: 0x%04" PRIx16 | 40 | + 0x41, 0x00, 0x00, 0x39, /* strb w1, [x2] *TXDAT = 'T' */ |
41 | + 0xfd, 0xff, 0xff, 0x17, /* b -12 (loop) */ | ||
42 | }; | ||
43 | |||
44 | static const uint8_t kernel_nrf51[] = { | ||
47 | -- | 45 | -- |
48 | 2.34.1 | 46 | 2.34.1 |
49 | 47 | ||
50 | 48 | diff view generated by jsdifflib |
1 | From: Bernhard Beschow <shentey@gmail.com> | 1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | printf() unconditionally prints to the console which disturbs `-serial stdio`. | 3 | Since registers are not modified, we don't need |
4 | Fix that by converting into a trace event. While at it, add some tracing for | 4 | to refill their values. Directly jump to the previous |
5 | read and write access. | 5 | store instruction to keep filling the TXDAT register. |
6 | 6 | ||
7 | Fixes: 7e7c5e4c1ba5 "Nokia N800 machine support (ARM)." | 7 | The equivalent C code remains: |
8 | Signed-off-by: Bernhard Beschow <shentey@gmail.com> | 8 | |
9 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | 9 | while (true) { |
10 | Message-id: 20241103143330.123596-5-shentey@gmail.com | 10 | *UART_DATA = 'T'; |
11 | } | ||
12 | |||
13 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
14 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
15 | Reviewed-by: Fabiano Rosas <farosas@suse.de> | ||
11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 16 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
12 | --- | 17 | --- |
13 | meson.build | 1 + | 18 | tests/qtest/boot-serial-test.c | 12 ++++++------ |
14 | hw/sensor/trace.h | 1 + | 19 | 1 file changed, 6 insertions(+), 6 deletions(-) |
15 | hw/sensor/tmp105.c | 7 ++++++- | ||
16 | hw/sensor/trace-events | 6 ++++++ | ||
17 | 4 files changed, 14 insertions(+), 1 deletion(-) | ||
18 | create mode 100644 hw/sensor/trace.h | ||
19 | create mode 100644 hw/sensor/trace-events | ||
20 | 20 | ||
21 | diff --git a/meson.build b/meson.build | 21 | diff --git a/tests/qtest/boot-serial-test.c b/tests/qtest/boot-serial-test.c |
22 | index XXXXXXX..XXXXXXX 100644 | 22 | index XXXXXXX..XXXXXXX 100644 |
23 | --- a/meson.build | 23 | --- a/tests/qtest/boot-serial-test.c |
24 | +++ b/meson.build | 24 | +++ b/tests/qtest/boot-serial-test.c |
25 | @@ -XXX,XX +XXX,XX @@ if have_system | 25 | @@ -XXX,XX +XXX,XX @@ static const uint8_t kernel_plml605[] = { |
26 | 'hw/s390x', | 26 | }; |
27 | 'hw/scsi', | 27 | |
28 | 'hw/sd', | 28 | static const uint8_t bios_raspi2[] = { |
29 | + 'hw/sensor', | 29 | - 0x08, 0x30, 0x9f, 0xe5, /* loop: ldr r3, [pc, #8] Get &UART0 */ |
30 | 'hw/sh4', | 30 | + 0x08, 0x30, 0x9f, 0xe5, /* ldr r3, [pc, #8] Get &UART0 */ |
31 | 'hw/sparc', | 31 | 0x54, 0x20, 0xa0, 0xe3, /* mov r2, #'T' */ |
32 | 'hw/sparc64', | 32 | - 0x00, 0x20, 0xc3, 0xe5, /* strb r2, [r3] *TXDAT = 'T' */ |
33 | diff --git a/hw/sensor/trace.h b/hw/sensor/trace.h | 33 | - 0xfb, 0xff, 0xff, 0xea, /* b -12 (loop) */ |
34 | new file mode 100644 | 34 | + 0x00, 0x20, 0xc3, 0xe5, /* loop: strb r2, [r3] *TXDAT = 'T' */ |
35 | index XXXXXXX..XXXXXXX | 35 | + 0xff, 0xff, 0xff, 0xea, /* b -4 (loop) */ |
36 | --- /dev/null | 36 | 0x00, 0x10, 0x20, 0x3f, /* UART0: 0x3f201000 */ |
37 | +++ b/hw/sensor/trace.h | 37 | }; |
38 | @@ -0,0 +1 @@ | 38 | |
39 | +#include "trace/trace-hw_sensor.h" | 39 | static const uint8_t kernel_aarch64[] = { |
40 | diff --git a/hw/sensor/tmp105.c b/hw/sensor/tmp105.c | 40 | - 0x81, 0x0a, 0x80, 0x52, /* loop: mov w1, #'T' */ |
41 | index XXXXXXX..XXXXXXX 100644 | 41 | + 0x81, 0x0a, 0x80, 0x52, /* mov w1, #'T' */ |
42 | --- a/hw/sensor/tmp105.c | 42 | 0x02, 0x20, 0xa1, 0xd2, /* mov x2, #0x9000000 Load UART0 */ |
43 | +++ b/hw/sensor/tmp105.c | 43 | - 0x41, 0x00, 0x00, 0x39, /* strb w1, [x2] *TXDAT = 'T' */ |
44 | @@ -XXX,XX +XXX,XX @@ | 44 | - 0xfd, 0xff, 0xff, 0x17, /* b -12 (loop) */ |
45 | #include "qapi/visitor.h" | 45 | + 0x41, 0x00, 0x00, 0x39, /* loop: strb w1, [x2] *TXDAT = 'T' */ |
46 | #include "qemu/module.h" | 46 | + 0xff, 0xff, 0xff, 0x17, /* b -4 (loop) */ |
47 | #include "hw/registerfields.h" | 47 | }; |
48 | +#include "trace.h" | 48 | |
49 | 49 | static const uint8_t kernel_nrf51[] = { | |
50 | FIELD(CONFIG, SHUTDOWN_MODE, 0, 1) | ||
51 | FIELD(CONFIG, THERMOSTAT_MODE, 1, 1) | ||
52 | @@ -XXX,XX +XXX,XX @@ static void tmp105_read(TMP105State *s) | ||
53 | s->buf[s->len++] = ((uint16_t) s->limit[1]) >> 0; | ||
54 | break; | ||
55 | } | ||
56 | + | ||
57 | + trace_tmp105_read(s->i2c.address, s->pointer); | ||
58 | } | ||
59 | |||
60 | static void tmp105_write(TMP105State *s) | ||
61 | { | ||
62 | + trace_tmp105_write(s->i2c.address, s->pointer); | ||
63 | + | ||
64 | switch (s->pointer & 3) { | ||
65 | case TMP105_REG_TEMPERATURE: | ||
66 | break; | ||
67 | |||
68 | case TMP105_REG_CONFIG: | ||
69 | if (FIELD_EX8(s->buf[0] & ~s->config, CONFIG, SHUTDOWN_MODE)) { | ||
70 | - printf("%s: TMP105 shutdown\n", __func__); | ||
71 | + trace_tmp105_write_shutdown(s->i2c.address); | ||
72 | } | ||
73 | s->config = FIELD_DP8(s->buf[0], CONFIG, ONE_SHOT, 0); | ||
74 | s->faults = tmp105_faultq[FIELD_EX8(s->config, CONFIG, FAULT_QUEUE)]; | ||
75 | diff --git a/hw/sensor/trace-events b/hw/sensor/trace-events | ||
76 | new file mode 100644 | ||
77 | index XXXXXXX..XXXXXXX | ||
78 | --- /dev/null | ||
79 | +++ b/hw/sensor/trace-events | ||
80 | @@ -XXX,XX +XXX,XX @@ | ||
81 | +# See docs/devel/tracing.rst for syntax documentation. | ||
82 | + | ||
83 | +# tmp105.c | ||
84 | +tmp105_read(uint8_t dev, uint8_t addr) "device: 0x%02x, addr: 0x%02x" | ||
85 | +tmp105_write(uint8_t dev, uint8_t addr) "device: 0x%02x, addr 0x%02x" | ||
86 | +tmp105_write_shutdown(uint8_t dev) "device: 0x%02x" | ||
87 | -- | 50 | -- |
88 | 2.34.1 | 51 | 2.34.1 |
89 | 52 | ||
90 | 53 | diff view generated by jsdifflib |
1 | Currently we call cpu_put_fsr(0) in sparc_cpu_realizefn(), which | 1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> |
---|---|---|---|
2 | initializes various fields in the CPU struct: | ||
3 | * fsr_cexc_ftt | ||
4 | * fcc[] | ||
5 | * fsr_qne | ||
6 | * fsr | ||
7 | It also sets the rounding mode in env->fp_status. | ||
8 | 2 | ||
9 | This is largely pointless, because when we later reset the CPU | 3 | In the next commit we are going to use a different value |
10 | this will zero out all the fields up until the "end_reset_fields" | 4 | for the $w1 register, maintaining the same $x2 value. In |
11 | label, which includes all of these (but not fp_status!) | 5 | order to keep the next commit trivial to review, set $x2 |
6 | before $w1. | ||
12 | 7 | ||
13 | Move the cpu_put_fsr(env, 0) call to reset, because that expresses | 8 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
14 | the logical requirement: we want to reset FSR to 0 on every reset. | 9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
15 | This isn't a behaviour change because the fields are all zero anyway. | 10 | Reviewed-by: Fabiano Rosas <farosas@suse.de> |
16 | |||
17 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
18 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
19 | Reviewed-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> | ||
20 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
21 | Message-id: 20241025141254.2141506-12-peter.maydell@linaro.org | ||
22 | --- | 12 | --- |
23 | target/sparc/cpu.c | 2 +- | 13 | tests/qtest/boot-serial-test.c | 2 +- |
24 | 1 file changed, 1 insertion(+), 1 deletion(-) | 14 | 1 file changed, 1 insertion(+), 1 deletion(-) |
25 | 15 | ||
26 | diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c | 16 | diff --git a/tests/qtest/boot-serial-test.c b/tests/qtest/boot-serial-test.c |
27 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
28 | --- a/target/sparc/cpu.c | 18 | --- a/tests/qtest/boot-serial-test.c |
29 | +++ b/target/sparc/cpu.c | 19 | +++ b/tests/qtest/boot-serial-test.c |
30 | @@ -XXX,XX +XXX,XX @@ static void sparc_cpu_reset_hold(Object *obj, ResetType type) | 20 | @@ -XXX,XX +XXX,XX @@ static const uint8_t bios_raspi2[] = { |
31 | env->npc = env->pc + 4; | 21 | }; |
32 | #endif | 22 | |
33 | env->cache_control = 0; | 23 | static const uint8_t kernel_aarch64[] = { |
34 | + cpu_put_fsr(env, 0); | 24 | - 0x81, 0x0a, 0x80, 0x52, /* mov w1, #'T' */ |
35 | } | 25 | 0x02, 0x20, 0xa1, 0xd2, /* mov x2, #0x9000000 Load UART0 */ |
36 | 26 | + 0x81, 0x0a, 0x80, 0x52, /* mov w1, #'T' */ | |
37 | #ifndef CONFIG_USER_ONLY | 27 | 0x41, 0x00, 0x00, 0x39, /* loop: strb w1, [x2] *TXDAT = 'T' */ |
38 | @@ -XXX,XX +XXX,XX @@ static void sparc_cpu_realizefn(DeviceState *dev, Error **errp) | 28 | 0xff, 0xff, 0xff, 0x17, /* b -4 (loop) */ |
39 | env->version |= env->def.maxtl << 8; | 29 | }; |
40 | env->version |= env->def.nwindows - 1; | ||
41 | #endif | ||
42 | - cpu_put_fsr(env, 0); | ||
43 | |||
44 | cpu_exec_realizefn(cs, &local_err); | ||
45 | if (local_err != NULL) { | ||
46 | -- | 30 | -- |
47 | 2.34.1 | 31 | 2.34.1 |
48 | 32 | ||
49 | 33 | diff view generated by jsdifflib |
1 | From: Bernhard Beschow <shentey@gmail.com> | 1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | 3 | The tests using the PL011 UART of the virt and raspi machines |
4 | Signed-off-by: Bernhard Beschow <shentey@gmail.com> | 4 | weren't properly enabling the UART and its transmitter previous |
5 | Message-id: 20241103143330.123596-2-shentey@gmail.com | 5 | to sending characters. Follow the PL011 manual initialization |
6 | recommendation by setting the proper bits of the control register. | ||
7 | |||
8 | Update the ASM code prefixing: | ||
9 | |||
10 | *UART_CTRL = UART_ENABLE | TX_ENABLE; | ||
11 | |||
12 | to: | ||
13 | |||
14 | while (true) { | ||
15 | *UART_DATA = 'T'; | ||
16 | } | ||
17 | |||
18 | Note, since commit 51b61dd4d56 ("hw/char/pl011: Warn when using | ||
19 | disabled transmitter") incomplete PL011 initialization can be | ||
20 | logged using the '-d guest_errors' command line option. | ||
21 | |||
22 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
23 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 24 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
7 | --- | 25 | --- |
8 | hw/rtc/ds1338.c | 6 ++++++ | 26 | tests/qtest/boot-serial-test.c | 7 ++++++- |
9 | hw/rtc/trace-events | 4 ++++ | 27 | 1 file changed, 6 insertions(+), 1 deletion(-) |
10 | 2 files changed, 10 insertions(+) | ||
11 | 28 | ||
12 | diff --git a/hw/rtc/ds1338.c b/hw/rtc/ds1338.c | 29 | diff --git a/tests/qtest/boot-serial-test.c b/tests/qtest/boot-serial-test.c |
13 | index XXXXXXX..XXXXXXX 100644 | 30 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/hw/rtc/ds1338.c | 31 | --- a/tests/qtest/boot-serial-test.c |
15 | +++ b/hw/rtc/ds1338.c | 32 | +++ b/tests/qtest/boot-serial-test.c |
16 | @@ -XXX,XX +XXX,XX @@ | 33 | @@ -XXX,XX +XXX,XX @@ static const uint8_t kernel_plml605[] = { |
17 | #include "qemu/module.h" | 34 | }; |
18 | #include "qom/object.h" | 35 | |
19 | #include "sysemu/rtc.h" | 36 | static const uint8_t bios_raspi2[] = { |
20 | +#include "trace.h" | 37 | - 0x08, 0x30, 0x9f, 0xe5, /* ldr r3, [pc, #8] Get &UART0 */ |
21 | 38 | + 0x10, 0x30, 0x9f, 0xe5, /* ldr r3, [pc, #16] Get &UART0 */ | |
22 | /* Size of NVRAM including both the user-accessible area and the | 39 | + 0x10, 0x20, 0x9f, 0xe5, /* ldr r2, [pc, #16] Get &CR */ |
23 | * secondary register area. | 40 | + 0xb0, 0x23, 0xc3, 0xe1, /* strh r2, [r3, #48] Set CR */ |
24 | @@ -XXX,XX +XXX,XX @@ static uint8_t ds1338_recv(I2CSlave *i2c) | 41 | 0x54, 0x20, 0xa0, 0xe3, /* mov r2, #'T' */ |
25 | uint8_t res; | 42 | 0x00, 0x20, 0xc3, 0xe5, /* loop: strb r2, [r3] *TXDAT = 'T' */ |
26 | 43 | 0xff, 0xff, 0xff, 0xea, /* b -4 (loop) */ | |
27 | res = s->nvram[s->ptr]; | 44 | 0x00, 0x10, 0x20, 0x3f, /* UART0: 0x3f201000 */ |
28 | + | 45 | + 0x01, 0x01, 0x00, 0x00, /* CR: 0x101 = UARTEN|TXE */ |
29 | + trace_ds1338_recv(s->ptr, res); | 46 | }; |
30 | + | 47 | |
31 | inc_regptr(s); | 48 | static const uint8_t kernel_aarch64[] = { |
32 | return res; | 49 | 0x02, 0x20, 0xa1, 0xd2, /* mov x2, #0x9000000 Load UART0 */ |
33 | } | 50 | + 0x21, 0x20, 0x80, 0x52, /* mov w1, 0x101 CR = UARTEN|TXE */ |
34 | @@ -XXX,XX +XXX,XX @@ static int ds1338_send(I2CSlave *i2c, uint8_t data) | 51 | + 0x41, 0x60, 0x00, 0x79, /* strh w1, [x2, #48] Set CR */ |
35 | { | 52 | 0x81, 0x0a, 0x80, 0x52, /* mov w1, #'T' */ |
36 | DS1338State *s = DS1338(i2c); | 53 | 0x41, 0x00, 0x00, 0x39, /* loop: strb w1, [x2] *TXDAT = 'T' */ |
37 | 54 | 0xff, 0xff, 0xff, 0x17, /* b -4 (loop) */ | |
38 | + trace_ds1338_send(s->ptr, data); | ||
39 | + | ||
40 | if (s->addr_byte) { | ||
41 | s->ptr = data & (NVRAM_SIZE - 1); | ||
42 | s->addr_byte = false; | ||
43 | diff --git a/hw/rtc/trace-events b/hw/rtc/trace-events | ||
44 | index XXXXXXX..XXXXXXX 100644 | ||
45 | --- a/hw/rtc/trace-events | ||
46 | +++ b/hw/rtc/trace-events | ||
47 | @@ -XXX,XX +XXX,XX @@ pl031_set_alarm(uint32_t ticks) "alarm set for %u ticks" | ||
48 | aspeed_rtc_read(uint64_t addr, uint64_t value) "addr 0x%02" PRIx64 " value 0x%08" PRIx64 | ||
49 | aspeed_rtc_write(uint64_t addr, uint64_t value) "addr 0x%02" PRIx64 " value 0x%08" PRIx64 | ||
50 | |||
51 | +# ds1338.c | ||
52 | +ds1338_recv(uint32_t addr, uint8_t value) "[0x%" PRIx32 "] -> 0x%02" PRIx8 | ||
53 | +ds1338_send(uint32_t addr, uint8_t value) "[0x%" PRIx32 "] <- 0x%02" PRIx8 | ||
54 | + | ||
55 | # m48t59.c | ||
56 | m48txx_nvram_io_read(uint64_t addr, uint64_t value) "io read addr:0x%04" PRIx64 " value:0x%02" PRIx64 | ||
57 | m48txx_nvram_io_write(uint64_t addr, uint64_t value) "io write addr:0x%04" PRIx64 " value:0x%02" PRIx64 | ||
58 | -- | 55 | -- |
59 | 2.34.1 | 56 | 2.34.1 |
60 | 57 | ||
61 | 58 | diff view generated by jsdifflib |
1 | IEEE 758 does not define a fixed rule for which NaN to pick as the | 1 | helper.c includes some small TCG helper functions used for mostly |
---|---|---|---|
2 | result if both operands of a 2-operand operation are NaNs. As a | 2 | arithmetic instructions. These are TCG only and there's no need for |
3 | result different architectures have ended up with different rules for | 3 | them to be in the large and unwieldy helper.c. Move them out to |
4 | propagating NaNs. | 4 | their own source file in the tcg/ subdirectory, together with the |
5 | op_addsub.h multiply-included template header that they use. | ||
5 | 6 | ||
6 | QEMU currently hardcodes the NaN propagation logic into the binary | 7 | Since we are moving op_addsub.h, we take the opportunity to |
7 | because pickNaN() has an ifdef ladder for different targets. We want | 8 | give it a name which matches our convention for files which |
8 | to make the propagation rule instead be selectable at runtime, | 9 | are not true header files but which are #included from other |
9 | because: | 10 | C files: op_addsub.c.inc. |
10 | * this will let us have multiple targets in one QEMU binary | ||
11 | * the Arm FEAT_AFP architectural feature includes letting | ||
12 | the guest select a NaN propagation rule at runtime | ||
13 | * x86 specifies different propagation rules for x87 FPU ops | ||
14 | and for SSE ops, and specifying the rule in the float_status | ||
15 | would let us emulate this, instead of wrongly using the | ||
16 | x87 rules everywhere | ||
17 | 11 | ||
18 | In this commit we add an enum for the propagation rule, the field in | 12 | (Ironically, this means that helper.c no longer contains |
19 | float_status, and the corresponding getters and setters. We change | 13 | any TCG helper function definitions at all.) |
20 | pickNaN to honour this, but because all targets still leave this | ||
21 | field at its default 0 value, the fallback logic will pick the rule | ||
22 | type with the old ifdef ladder. | ||
23 | |||
24 | It's valid not to set a propagation rule if default_nan_mode is | ||
25 | enabled, because in that case there's no need to pick a NaN; all the | ||
26 | callers of pickNaN() catch this case and skip calling it. So we can | ||
27 | already assert that we don't get into the "no rule defined" codepath | ||
28 | for our four targets which always set default_nan_mode: Hexagon, | ||
29 | RiscV, SH4 and Tricore, and for the one target which does not have FP | ||
30 | at all: avr. These targets will not need to be updated to call | ||
31 | set_float_2nan_prop_rule(). | ||
32 | 14 | ||
33 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 15 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
34 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 16 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
35 | Message-id: 20241025141254.2141506-2-peter.maydell@linaro.org | 17 | Message-id: 20250110131211.2546314-1-peter.maydell@linaro.org |
18 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
36 | --- | 19 | --- |
37 | include/fpu/softfloat-helpers.h | 11 ++ | 20 | target/arm/helper.c | 285 ----------------- |
38 | include/fpu/softfloat-types.h | 42 ++++++ | 21 | target/arm/tcg/arith_helper.c | 296 ++++++++++++++++++ |
39 | fpu/softfloat-specialize.c.inc | 229 ++++++++++++++++++-------------- | 22 | .../arm/{op_addsub.h => tcg/op_addsub.c.inc} | 0 |
40 | 3 files changed, 185 insertions(+), 97 deletions(-) | 23 | target/arm/tcg/meson.build | 1 + |
24 | 4 files changed, 297 insertions(+), 285 deletions(-) | ||
25 | create mode 100644 target/arm/tcg/arith_helper.c | ||
26 | rename target/arm/{op_addsub.h => tcg/op_addsub.c.inc} (100%) | ||
41 | 27 | ||
42 | diff --git a/include/fpu/softfloat-helpers.h b/include/fpu/softfloat-helpers.h | 28 | diff --git a/target/arm/helper.c b/target/arm/helper.c |
43 | index XXXXXXX..XXXXXXX 100644 | 29 | index XXXXXXX..XXXXXXX 100644 |
44 | --- a/include/fpu/softfloat-helpers.h | 30 | --- a/target/arm/helper.c |
45 | +++ b/include/fpu/softfloat-helpers.h | 31 | +++ b/target/arm/helper.c |
46 | @@ -XXX,XX +XXX,XX @@ static inline void set_floatx80_rounding_precision(FloatX80RoundPrec val, | 32 | @@ -XXX,XX +XXX,XX @@ |
47 | status->floatx80_rounding_precision = val; | 33 | #include "qemu/main-loop.h" |
34 | #include "qemu/timer.h" | ||
35 | #include "qemu/bitops.h" | ||
36 | -#include "qemu/crc32c.h" | ||
37 | #include "qemu/qemu-print.h" | ||
38 | #include "exec/exec-all.h" | ||
39 | #include "exec/translation-block.h" | ||
40 | -#include <zlib.h> /* for crc32 */ | ||
41 | #include "hw/irq.h" | ||
42 | #include "system/cpu-timers.h" | ||
43 | #include "system/kvm.h" | ||
44 | @@ -XXX,XX +XXX,XX @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, | ||
45 | }; | ||
48 | } | 46 | } |
49 | 47 | ||
50 | +static inline void set_float_2nan_prop_rule(Float2NaNPropRule rule, | 48 | -/* |
51 | + float_status *status) | 49 | - * Note that signed overflow is undefined in C. The following routines are |
52 | +{ | 50 | - * careful to use unsigned types where modulo arithmetic is required. |
53 | + status->float_2nan_prop_rule = rule; | 51 | - * Failure to do so _will_ break on newer gcc. |
54 | +} | 52 | - */ |
55 | + | 53 | - |
56 | static inline void set_flush_to_zero(bool val, float_status *status) | 54 | -/* Signed saturating arithmetic. */ |
57 | { | 55 | - |
58 | status->flush_to_zero = val; | 56 | -/* Perform 16-bit signed saturating addition. */ |
59 | @@ -XXX,XX +XXX,XX @@ get_floatx80_rounding_precision(float_status *status) | 57 | -static inline uint16_t add16_sat(uint16_t a, uint16_t b) |
60 | return status->floatx80_rounding_precision; | 58 | -{ |
61 | } | 59 | - uint16_t res; |
62 | 60 | - | |
63 | +static inline Float2NaNPropRule get_float_2nan_prop_rule(float_status *status) | 61 | - res = a + b; |
64 | +{ | 62 | - if (((res ^ a) & 0x8000) && !((a ^ b) & 0x8000)) { |
65 | + return status->float_2nan_prop_rule; | 63 | - if (a & 0x8000) { |
66 | +} | 64 | - res = 0x8000; |
67 | + | 65 | - } else { |
68 | static inline bool get_flush_to_zero(float_status *status) | 66 | - res = 0x7fff; |
69 | { | 67 | - } |
70 | return status->flush_to_zero; | 68 | - } |
71 | diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h | 69 | - return res; |
72 | index XXXXXXX..XXXXXXX 100644 | 70 | -} |
73 | --- a/include/fpu/softfloat-types.h | 71 | - |
74 | +++ b/include/fpu/softfloat-types.h | 72 | -/* Perform 8-bit signed saturating addition. */ |
75 | @@ -XXX,XX +XXX,XX @@ typedef enum __attribute__((__packed__)) { | 73 | -static inline uint8_t add8_sat(uint8_t a, uint8_t b) |
76 | floatx80_precision_s, | 74 | -{ |
77 | } FloatX80RoundPrec; | 75 | - uint8_t res; |
78 | 76 | - | |
79 | +/* | 77 | - res = a + b; |
80 | + * 2-input NaN propagation rule. Individual architectures have | 78 | - if (((res ^ a) & 0x80) && !((a ^ b) & 0x80)) { |
81 | + * different rules for which input NaN is propagated to the output | 79 | - if (a & 0x80) { |
82 | + * when there is more than one NaN on the input. | 80 | - res = 0x80; |
83 | + * | 81 | - } else { |
84 | + * If default_nan_mode is enabled then it is valid not to set a | 82 | - res = 0x7f; |
85 | + * NaN propagation rule, because the softfloat code guarantees | 83 | - } |
86 | + * not to try to pick a NaN to propagate in default NaN mode. | 84 | - } |
87 | + * | 85 | - return res; |
88 | + * For transition, currently the 'none' rule will cause us to | 86 | -} |
89 | + * fall back to picking the propagation rule based on the existing | 87 | - |
90 | + * ifdef ladder. When all targets are converted it will be an error | 88 | -/* Perform 16-bit signed saturating subtraction. */ |
91 | + * not to set the rule in float_status unless in default_nan_mode, | 89 | -static inline uint16_t sub16_sat(uint16_t a, uint16_t b) |
92 | + * and we will assert if we need to handle an input NaN and no | 90 | -{ |
93 | + * rule was selected. | 91 | - uint16_t res; |
94 | + */ | 92 | - |
95 | +typedef enum __attribute__((__packed__)) { | 93 | - res = a - b; |
96 | + /* No propagation rule specified */ | 94 | - if (((res ^ a) & 0x8000) && ((a ^ b) & 0x8000)) { |
97 | + float_2nan_prop_none = 0, | 95 | - if (a & 0x8000) { |
98 | + /* Prefer SNaN over QNaN, then operand A over B */ | 96 | - res = 0x8000; |
99 | + float_2nan_prop_s_ab, | 97 | - } else { |
100 | + /* Prefer SNaN over QNaN, then operand B over A */ | 98 | - res = 0x7fff; |
101 | + float_2nan_prop_s_ba, | 99 | - } |
102 | + /* Prefer A over B regardless of SNaN vs QNaN */ | 100 | - } |
103 | + float_2nan_prop_ab, | 101 | - return res; |
104 | + /* Prefer B over A regardless of SNaN vs QNaN */ | 102 | -} |
105 | + float_2nan_prop_ba, | 103 | - |
106 | + /* | 104 | -/* Perform 8-bit signed saturating subtraction. */ |
107 | + * This implements x87 NaN propagation rules: | 105 | -static inline uint8_t sub8_sat(uint8_t a, uint8_t b) |
108 | + * SNaN + QNaN => return the QNaN | 106 | -{ |
109 | + * two SNaNs => return the one with the larger significand, silenced | 107 | - uint8_t res; |
110 | + * two QNaNs => return the one with the larger significand | 108 | - |
111 | + * SNaN and a non-NaN => return the SNaN, silenced | 109 | - res = a - b; |
112 | + * QNaN and a non-NaN => return the QNaN | 110 | - if (((res ^ a) & 0x80) && ((a ^ b) & 0x80)) { |
113 | + * | 111 | - if (a & 0x80) { |
114 | + * If we get down to comparing significands and they are the same, | 112 | - res = 0x80; |
115 | + * return the NaN with the positive sign bit (if any). | 113 | - } else { |
116 | + */ | 114 | - res = 0x7f; |
117 | + float_2nan_prop_x87, | 115 | - } |
118 | +} Float2NaNPropRule; | 116 | - } |
119 | + | 117 | - return res; |
120 | /* | 118 | -} |
121 | * Floating Point Status. Individual architectures may maintain | 119 | - |
122 | * several versions of float_status for different functions. The | 120 | -#define ADD16(a, b, n) RESULT(add16_sat(a, b), n, 16); |
123 | @@ -XXX,XX +XXX,XX @@ typedef struct float_status { | 121 | -#define SUB16(a, b, n) RESULT(sub16_sat(a, b), n, 16); |
124 | uint16_t float_exception_flags; | 122 | -#define ADD8(a, b, n) RESULT(add8_sat(a, b), n, 8); |
125 | FloatRoundMode float_rounding_mode; | 123 | -#define SUB8(a, b, n) RESULT(sub8_sat(a, b), n, 8); |
126 | FloatX80RoundPrec floatx80_rounding_precision; | 124 | -#define PFX q |
127 | + Float2NaNPropRule float_2nan_prop_rule; | 125 | - |
128 | bool tininess_before_rounding; | 126 | -#include "op_addsub.h" |
129 | /* should denormalised results go to zero and set the inexact flag? */ | 127 | - |
130 | bool flush_to_zero; | 128 | -/* Unsigned saturating arithmetic. */ |
131 | diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc | 129 | -static inline uint16_t add16_usat(uint16_t a, uint16_t b) |
132 | index XXXXXXX..XXXXXXX 100644 | 130 | -{ |
133 | --- a/fpu/softfloat-specialize.c.inc | 131 | - uint16_t res; |
134 | +++ b/fpu/softfloat-specialize.c.inc | 132 | - res = a + b; |
135 | @@ -XXX,XX +XXX,XX @@ bool float32_is_signaling_nan(float32 a_, float_status *status) | 133 | - if (res < a) { |
136 | static int pickNaN(FloatClass a_cls, FloatClass b_cls, | 134 | - res = 0xffff; |
137 | bool aIsLargerSignificand, float_status *status) | 135 | - } |
138 | { | 136 | - return res; |
139 | -#if defined(TARGET_ARM) || defined(TARGET_MIPS) || defined(TARGET_HPPA) || \ | 137 | -} |
140 | - defined(TARGET_LOONGARCH64) || defined(TARGET_S390X) | 138 | - |
141 | - /* ARM mandated NaN propagation rules (see FPProcessNaNs()), take | 139 | -static inline uint16_t sub16_usat(uint16_t a, uint16_t b) |
142 | - * the first of: | 140 | -{ |
143 | - * 1. A if it is signaling | 141 | - if (a > b) { |
144 | - * 2. B if it is signaling | 142 | - return a - b; |
145 | - * 3. A (quiet) | ||
146 | - * 4. B (quiet) | ||
147 | - * A signaling NaN is always quietened before returning it. | ||
148 | - */ | ||
149 | - /* According to MIPS specifications, if one of the two operands is | ||
150 | - * a sNaN, a new qNaN has to be generated. This is done in | ||
151 | - * floatXX_silence_nan(). For qNaN inputs the specifications | ||
152 | - * says: "When possible, this QNaN result is one of the operand QNaN | ||
153 | - * values." In practice it seems that most implementations choose | ||
154 | - * the first operand if both operands are qNaN. In short this gives | ||
155 | - * the following rules: | ||
156 | - * 1. A if it is signaling | ||
157 | - * 2. B if it is signaling | ||
158 | - * 3. A (quiet) | ||
159 | - * 4. B (quiet) | ||
160 | - * A signaling NaN is always silenced before returning it. | ||
161 | - */ | ||
162 | - if (is_snan(a_cls)) { | ||
163 | - return 0; | ||
164 | - } else if (is_snan(b_cls)) { | ||
165 | - return 1; | ||
166 | - } else if (is_qnan(a_cls)) { | ||
167 | - return 0; | ||
168 | - } else { | ||
169 | - return 1; | ||
170 | - } | ||
171 | -#elif defined(TARGET_PPC) || defined(TARGET_M68K) | ||
172 | - /* PowerPC propagation rules: | ||
173 | - * 1. A if it sNaN or qNaN | ||
174 | - * 2. B if it sNaN or qNaN | ||
175 | - * A signaling NaN is always silenced before returning it. | ||
176 | - */ | ||
177 | - /* M68000 FAMILY PROGRAMMER'S REFERENCE MANUAL | ||
178 | - * 3.4 FLOATING-POINT INSTRUCTION DETAILS | ||
179 | - * If either operand, but not both operands, of an operation is a | ||
180 | - * nonsignaling NaN, then that NaN is returned as the result. If both | ||
181 | - * operands are nonsignaling NaNs, then the destination operand | ||
182 | - * nonsignaling NaN is returned as the result. | ||
183 | - * If either operand to an operation is a signaling NaN (SNaN), then the | ||
184 | - * SNaN bit is set in the FPSR EXC byte. If the SNaN exception enable bit | ||
185 | - * is set in the FPCR ENABLE byte, then the exception is taken and the | ||
186 | - * destination is not modified. If the SNaN exception enable bit is not | ||
187 | - * set, setting the SNaN bit in the operand to a one converts the SNaN to | ||
188 | - * a nonsignaling NaN. The operation then continues as described in the | ||
189 | - * preceding paragraph for nonsignaling NaNs. | ||
190 | - */ | ||
191 | - if (is_nan(a_cls)) { | ||
192 | - return 0; | ||
193 | - } else { | ||
194 | - return 1; | ||
195 | - } | ||
196 | -#elif defined(TARGET_SPARC) | ||
197 | - /* Prefer SNaN over QNaN, order B then A. */ | ||
198 | - if (is_snan(b_cls)) { | ||
199 | - return 1; | ||
200 | - } else if (is_snan(a_cls)) { | ||
201 | - return 0; | ||
202 | - } else if (is_qnan(b_cls)) { | ||
203 | - return 1; | ||
204 | - } else { | 143 | - } else { |
205 | - return 0; | 144 | - return 0; |
206 | - } | 145 | - } |
207 | -#elif defined(TARGET_XTENSA) | 146 | -} |
208 | + Float2NaNPropRule rule = status->float_2nan_prop_rule; | 147 | - |
209 | + | 148 | -static inline uint8_t add8_usat(uint8_t a, uint8_t b) |
210 | /* | 149 | -{ |
211 | - * Xtensa has two NaN propagation modes. | 150 | - uint8_t res; |
212 | - * Which one is active is controlled by float_status::use_first_nan. | 151 | - res = a + b; |
213 | + * We guarantee not to require the target to tell us how to | 152 | - if (res < a) { |
214 | + * pick a NaN if we're always returning the default NaN. | 153 | - res = 0xff; |
215 | */ | 154 | - } |
216 | - if (status->use_first_nan) { | 155 | - return res; |
217 | + assert(!status->default_nan_mode); | 156 | -} |
218 | + | 157 | - |
219 | + if (rule == float_2nan_prop_none) { | 158 | -static inline uint8_t sub8_usat(uint8_t a, uint8_t b) |
220 | + /* target didn't set the rule: fall back to old ifdef choices */ | 159 | -{ |
221 | +#if defined(TARGET_AVR) || defined(TARGET_HEXAGON) \ | 160 | - if (a > b) { |
222 | + || defined(TARGET_RISCV) || defined(TARGET_SH4) \ | 161 | - return a - b; |
223 | + || defined(TARGET_TRICORE) | 162 | - } else { |
224 | + g_assert_not_reached(); | 163 | - return 0; |
225 | +#elif defined(TARGET_ARM) || defined(TARGET_MIPS) || defined(TARGET_HPPA) || \ | 164 | - } |
226 | + defined(TARGET_LOONGARCH64) || defined(TARGET_S390X) | 165 | -} |
227 | + /* | 166 | - |
228 | + * ARM mandated NaN propagation rules (see FPProcessNaNs()), take | 167 | -#define ADD16(a, b, n) RESULT(add16_usat(a, b), n, 16); |
229 | + * the first of: | 168 | -#define SUB16(a, b, n) RESULT(sub16_usat(a, b), n, 16); |
230 | + * 1. A if it is signaling | 169 | -#define ADD8(a, b, n) RESULT(add8_usat(a, b), n, 8); |
231 | + * 2. B if it is signaling | 170 | -#define SUB8(a, b, n) RESULT(sub8_usat(a, b), n, 8); |
232 | + * 3. A (quiet) | 171 | -#define PFX uq |
233 | + * 4. B (quiet) | 172 | - |
234 | + * A signaling NaN is always quietened before returning it. | 173 | -#include "op_addsub.h" |
235 | + */ | 174 | - |
236 | + /* | 175 | -/* Signed modulo arithmetic. */ |
237 | + * According to MIPS specifications, if one of the two operands is | 176 | -#define SARITH16(a, b, n, op) do { \ |
238 | + * a sNaN, a new qNaN has to be generated. This is done in | 177 | - int32_t sum; \ |
239 | + * floatXX_silence_nan(). For qNaN inputs the specifications | 178 | - sum = (int32_t)(int16_t)(a) op (int32_t)(int16_t)(b); \ |
240 | + * says: "When possible, this QNaN result is one of the operand QNaN | 179 | - RESULT(sum, n, 16); \ |
241 | + * values." In practice it seems that most implementations choose | 180 | - if (sum >= 0) \ |
242 | + * the first operand if both operands are qNaN. In short this gives | 181 | - ge |= 3 << (n * 2); \ |
243 | + * the following rules: | 182 | - } while (0) |
244 | + * 1. A if it is signaling | 183 | - |
245 | + * 2. B if it is signaling | 184 | -#define SARITH8(a, b, n, op) do { \ |
246 | + * 3. A (quiet) | 185 | - int32_t sum; \ |
247 | + * 4. B (quiet) | 186 | - sum = (int32_t)(int8_t)(a) op (int32_t)(int8_t)(b); \ |
248 | + * A signaling NaN is always silenced before returning it. | 187 | - RESULT(sum, n, 8); \ |
249 | + */ | 188 | - if (sum >= 0) \ |
250 | + rule = float_2nan_prop_s_ab; | 189 | - ge |= 1 << n; \ |
251 | +#elif defined(TARGET_PPC) || defined(TARGET_M68K) | 190 | - } while (0) |
252 | + /* | 191 | - |
253 | + * PowerPC propagation rules: | 192 | - |
254 | + * 1. A if it sNaN or qNaN | 193 | -#define ADD16(a, b, n) SARITH16(a, b, n, +) |
255 | + * 2. B if it sNaN or qNaN | 194 | -#define SUB16(a, b, n) SARITH16(a, b, n, -) |
256 | + * A signaling NaN is always silenced before returning it. | 195 | -#define ADD8(a, b, n) SARITH8(a, b, n, +) |
257 | + */ | 196 | -#define SUB8(a, b, n) SARITH8(a, b, n, -) |
258 | + /* | 197 | -#define PFX s |
259 | + * M68000 FAMILY PROGRAMMER'S REFERENCE MANUAL | 198 | -#define ARITH_GE |
260 | + * 3.4 FLOATING-POINT INSTRUCTION DETAILS | 199 | - |
261 | + * If either operand, but not both operands, of an operation is a | 200 | -#include "op_addsub.h" |
262 | + * nonsignaling NaN, then that NaN is returned as the result. If both | 201 | - |
263 | + * operands are nonsignaling NaNs, then the destination operand | 202 | -/* Unsigned modulo arithmetic. */ |
264 | + * nonsignaling NaN is returned as the result. | 203 | -#define ADD16(a, b, n) do { \ |
265 | + * If either operand to an operation is a signaling NaN (SNaN), then the | 204 | - uint32_t sum; \ |
266 | + * SNaN bit is set in the FPSR EXC byte. If the SNaN exception enable bit | 205 | - sum = (uint32_t)(uint16_t)(a) + (uint32_t)(uint16_t)(b); \ |
267 | + * is set in the FPCR ENABLE byte, then the exception is taken and the | 206 | - RESULT(sum, n, 16); \ |
268 | + * destination is not modified. If the SNaN exception enable bit is not | 207 | - if ((sum >> 16) == 1) \ |
269 | + * set, setting the SNaN bit in the operand to a one converts the SNaN to | 208 | - ge |= 3 << (n * 2); \ |
270 | + * a nonsignaling NaN. The operation then continues as described in the | 209 | - } while (0) |
271 | + * preceding paragraph for nonsignaling NaNs. | 210 | - |
272 | + */ | 211 | -#define ADD8(a, b, n) do { \ |
273 | + rule = float_2nan_prop_ab; | 212 | - uint32_t sum; \ |
274 | +#elif defined(TARGET_SPARC) | 213 | - sum = (uint32_t)(uint8_t)(a) + (uint32_t)(uint8_t)(b); \ |
275 | + /* Prefer SNaN over QNaN, order B then A. */ | 214 | - RESULT(sum, n, 8); \ |
276 | + rule = float_2nan_prop_s_ba; | 215 | - if ((sum >> 8) == 1) \ |
277 | +#elif defined(TARGET_XTENSA) | 216 | - ge |= 1 << n; \ |
278 | + /* | 217 | - } while (0) |
279 | + * Xtensa has two NaN propagation modes. | 218 | - |
280 | + * Which one is active is controlled by float_status::use_first_nan. | 219 | -#define SUB16(a, b, n) do { \ |
281 | + */ | 220 | - uint32_t sum; \ |
282 | + if (status->use_first_nan) { | 221 | - sum = (uint32_t)(uint16_t)(a) - (uint32_t)(uint16_t)(b); \ |
283 | + rule = float_2nan_prop_ab; | 222 | - RESULT(sum, n, 16); \ |
223 | - if ((sum >> 16) == 0) \ | ||
224 | - ge |= 3 << (n * 2); \ | ||
225 | - } while (0) | ||
226 | - | ||
227 | -#define SUB8(a, b, n) do { \ | ||
228 | - uint32_t sum; \ | ||
229 | - sum = (uint32_t)(uint8_t)(a) - (uint32_t)(uint8_t)(b); \ | ||
230 | - RESULT(sum, n, 8); \ | ||
231 | - if ((sum >> 8) == 0) \ | ||
232 | - ge |= 1 << n; \ | ||
233 | - } while (0) | ||
234 | - | ||
235 | -#define PFX u | ||
236 | -#define ARITH_GE | ||
237 | - | ||
238 | -#include "op_addsub.h" | ||
239 | - | ||
240 | -/* Halved signed arithmetic. */ | ||
241 | -#define ADD16(a, b, n) \ | ||
242 | - RESULT(((int32_t)(int16_t)(a) + (int32_t)(int16_t)(b)) >> 1, n, 16) | ||
243 | -#define SUB16(a, b, n) \ | ||
244 | - RESULT(((int32_t)(int16_t)(a) - (int32_t)(int16_t)(b)) >> 1, n, 16) | ||
245 | -#define ADD8(a, b, n) \ | ||
246 | - RESULT(((int32_t)(int8_t)(a) + (int32_t)(int8_t)(b)) >> 1, n, 8) | ||
247 | -#define SUB8(a, b, n) \ | ||
248 | - RESULT(((int32_t)(int8_t)(a) - (int32_t)(int8_t)(b)) >> 1, n, 8) | ||
249 | -#define PFX sh | ||
250 | - | ||
251 | -#include "op_addsub.h" | ||
252 | - | ||
253 | -/* Halved unsigned arithmetic. */ | ||
254 | -#define ADD16(a, b, n) \ | ||
255 | - RESULT(((uint32_t)(uint16_t)(a) + (uint32_t)(uint16_t)(b)) >> 1, n, 16) | ||
256 | -#define SUB16(a, b, n) \ | ||
257 | - RESULT(((uint32_t)(uint16_t)(a) - (uint32_t)(uint16_t)(b)) >> 1, n, 16) | ||
258 | -#define ADD8(a, b, n) \ | ||
259 | - RESULT(((uint32_t)(uint8_t)(a) + (uint32_t)(uint8_t)(b)) >> 1, n, 8) | ||
260 | -#define SUB8(a, b, n) \ | ||
261 | - RESULT(((uint32_t)(uint8_t)(a) - (uint32_t)(uint8_t)(b)) >> 1, n, 8) | ||
262 | -#define PFX uh | ||
263 | - | ||
264 | -#include "op_addsub.h" | ||
265 | - | ||
266 | -static inline uint8_t do_usad(uint8_t a, uint8_t b) | ||
267 | -{ | ||
268 | - if (a > b) { | ||
269 | - return a - b; | ||
270 | - } else { | ||
271 | - return b - a; | ||
272 | - } | ||
273 | -} | ||
274 | - | ||
275 | -/* Unsigned sum of absolute byte differences. */ | ||
276 | -uint32_t HELPER(usad8)(uint32_t a, uint32_t b) | ||
277 | -{ | ||
278 | - uint32_t sum; | ||
279 | - sum = do_usad(a, b); | ||
280 | - sum += do_usad(a >> 8, b >> 8); | ||
281 | - sum += do_usad(a >> 16, b >> 16); | ||
282 | - sum += do_usad(a >> 24, b >> 24); | ||
283 | - return sum; | ||
284 | -} | ||
285 | - | ||
286 | -/* For ARMv6 SEL instruction. */ | ||
287 | -uint32_t HELPER(sel_flags)(uint32_t flags, uint32_t a, uint32_t b) | ||
288 | -{ | ||
289 | - uint32_t mask; | ||
290 | - | ||
291 | - mask = 0; | ||
292 | - if (flags & 1) { | ||
293 | - mask |= 0xff; | ||
294 | - } | ||
295 | - if (flags & 2) { | ||
296 | - mask |= 0xff00; | ||
297 | - } | ||
298 | - if (flags & 4) { | ||
299 | - mask |= 0xff0000; | ||
300 | - } | ||
301 | - if (flags & 8) { | ||
302 | - mask |= 0xff000000; | ||
303 | - } | ||
304 | - return (a & mask) | (b & ~mask); | ||
305 | -} | ||
306 | - | ||
307 | -/* | ||
308 | - * CRC helpers. | ||
309 | - * The upper bytes of val (above the number specified by 'bytes') must have | ||
310 | - * been zeroed out by the caller. | ||
311 | - */ | ||
312 | -uint32_t HELPER(crc32)(uint32_t acc, uint32_t val, uint32_t bytes) | ||
313 | -{ | ||
314 | - uint8_t buf[4]; | ||
315 | - | ||
316 | - stl_le_p(buf, val); | ||
317 | - | ||
318 | - /* zlib crc32 converts the accumulator and output to one's complement. */ | ||
319 | - return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff; | ||
320 | -} | ||
321 | - | ||
322 | -uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes) | ||
323 | -{ | ||
324 | - uint8_t buf[4]; | ||
325 | - | ||
326 | - stl_le_p(buf, val); | ||
327 | - | ||
328 | - /* Linux crc32c converts the output to one's complement. */ | ||
329 | - return crc32c(acc, buf, bytes) ^ 0xffffffff; | ||
330 | -} | ||
331 | |||
332 | /* | ||
333 | * Return the exception level to which FP-disabled exceptions should | ||
334 | diff --git a/target/arm/tcg/arith_helper.c b/target/arm/tcg/arith_helper.c | ||
335 | new file mode 100644 | ||
336 | index XXXXXXX..XXXXXXX | ||
337 | --- /dev/null | ||
338 | +++ b/target/arm/tcg/arith_helper.c | ||
339 | @@ -XXX,XX +XXX,XX @@ | ||
340 | +/* | ||
341 | + * ARM generic helpers for various arithmetical operations. | ||
342 | + * | ||
343 | + * This code is licensed under the GNU GPL v2 or later. | ||
344 | + * | ||
345 | + * SPDX-License-Identifier: GPL-2.0-or-later | ||
346 | + */ | ||
347 | +#include "qemu/osdep.h" | ||
348 | +#include "cpu.h" | ||
349 | +#include "exec/helper-proto.h" | ||
350 | +#include "qemu/crc32c.h" | ||
351 | +#include <zlib.h> /* for crc32 */ | ||
352 | + | ||
353 | +/* | ||
354 | + * Note that signed overflow is undefined in C. The following routines are | ||
355 | + * careful to use unsigned types where modulo arithmetic is required. | ||
356 | + * Failure to do so _will_ break on newer gcc. | ||
357 | + */ | ||
358 | + | ||
359 | +/* Signed saturating arithmetic. */ | ||
360 | + | ||
361 | +/* Perform 16-bit signed saturating addition. */ | ||
362 | +static inline uint16_t add16_sat(uint16_t a, uint16_t b) | ||
363 | +{ | ||
364 | + uint16_t res; | ||
365 | + | ||
366 | + res = a + b; | ||
367 | + if (((res ^ a) & 0x8000) && !((a ^ b) & 0x8000)) { | ||
368 | + if (a & 0x8000) { | ||
369 | + res = 0x8000; | ||
284 | + } else { | 370 | + } else { |
285 | + rule = float_2nan_prop_ba; | 371 | + res = 0x7fff; |
286 | + } | 372 | + } |
287 | +#else | 373 | + } |
288 | + rule = float_2nan_prop_x87; | 374 | + return res; |
289 | +#endif | 375 | +} |
290 | + } | 376 | + |
291 | + | 377 | +/* Perform 8-bit signed saturating addition. */ |
292 | + switch (rule) { | 378 | +static inline uint8_t add8_sat(uint8_t a, uint8_t b) |
293 | + case float_2nan_prop_s_ab: | 379 | +{ |
294 | + if (is_snan(a_cls)) { | 380 | + uint8_t res; |
295 | + return 0; | 381 | + |
296 | + } else if (is_snan(b_cls)) { | 382 | + res = a + b; |
297 | + return 1; | 383 | + if (((res ^ a) & 0x80) && !((a ^ b) & 0x80)) { |
298 | + } else if (is_qnan(a_cls)) { | 384 | + if (a & 0x80) { |
299 | + return 0; | 385 | + res = 0x80; |
300 | + } else { | 386 | + } else { |
301 | + return 1; | 387 | + res = 0x7f; |
302 | + } | 388 | + } |
303 | + break; | 389 | + } |
304 | + case float_2nan_prop_s_ba: | 390 | + return res; |
305 | + if (is_snan(b_cls)) { | 391 | +} |
306 | + return 1; | 392 | + |
307 | + } else if (is_snan(a_cls)) { | 393 | +/* Perform 16-bit signed saturating subtraction. */ |
308 | + return 0; | 394 | +static inline uint16_t sub16_sat(uint16_t a, uint16_t b) |
309 | + } else if (is_qnan(b_cls)) { | 395 | +{ |
310 | + return 1; | 396 | + uint16_t res; |
397 | + | ||
398 | + res = a - b; | ||
399 | + if (((res ^ a) & 0x8000) && ((a ^ b) & 0x8000)) { | ||
400 | + if (a & 0x8000) { | ||
401 | + res = 0x8000; | ||
311 | + } else { | 402 | + } else { |
312 | + return 0; | 403 | + res = 0x7fff; |
313 | + } | 404 | + } |
314 | + break; | 405 | + } |
315 | + case float_2nan_prop_ab: | 406 | + return res; |
316 | if (is_nan(a_cls)) { | 407 | +} |
317 | return 0; | 408 | + |
318 | } else { | 409 | +/* Perform 8-bit signed saturating subtraction. */ |
319 | return 1; | 410 | +static inline uint8_t sub8_sat(uint8_t a, uint8_t b) |
320 | } | 411 | +{ |
321 | - } else { | 412 | + uint8_t res; |
322 | + break; | 413 | + |
323 | + case float_2nan_prop_ba: | 414 | + res = a - b; |
324 | if (is_nan(b_cls)) { | 415 | + if (((res ^ a) & 0x80) && ((a ^ b) & 0x80)) { |
325 | return 1; | 416 | + if (a & 0x80) { |
326 | } else { | 417 | + res = 0x80; |
327 | return 0; | 418 | + } else { |
328 | } | 419 | + res = 0x7f; |
329 | - } | 420 | + } |
330 | -#else | 421 | + } |
331 | - /* This implements x87 NaN propagation rules: | 422 | + return res; |
332 | - * SNaN + QNaN => return the QNaN | 423 | +} |
333 | - * two SNaNs => return the one with the larger significand, silenced | 424 | + |
334 | - * two QNaNs => return the one with the larger significand | 425 | +#define ADD16(a, b, n) RESULT(add16_sat(a, b), n, 16); |
335 | - * SNaN and a non-NaN => return the SNaN, silenced | 426 | +#define SUB16(a, b, n) RESULT(sub16_sat(a, b), n, 16); |
336 | - * QNaN and a non-NaN => return the QNaN | 427 | +#define ADD8(a, b, n) RESULT(add8_sat(a, b), n, 8); |
337 | - * | 428 | +#define SUB8(a, b, n) RESULT(sub8_sat(a, b), n, 8); |
338 | - * If we get down to comparing significands and they are the same, | 429 | +#define PFX q |
339 | - * return the NaN with the positive sign bit (if any). | 430 | + |
340 | - */ | 431 | +#include "op_addsub.c.inc" |
341 | - if (is_snan(a_cls)) { | 432 | + |
342 | - if (is_snan(b_cls)) { | 433 | +/* Unsigned saturating arithmetic. */ |
343 | - return aIsLargerSignificand ? 0 : 1; | 434 | +static inline uint16_t add16_usat(uint16_t a, uint16_t b) |
344 | - } | 435 | +{ |
345 | - return is_qnan(b_cls) ? 1 : 0; | 436 | + uint16_t res; |
346 | - } else if (is_qnan(a_cls)) { | 437 | + res = a + b; |
347 | - if (is_snan(b_cls) || !is_qnan(b_cls)) { | 438 | + if (res < a) { |
348 | - return 0; | 439 | + res = 0xffff; |
349 | + break; | 440 | + } |
350 | + case float_2nan_prop_x87: | 441 | + return res; |
351 | + /* | 442 | +} |
352 | + * This implements x87 NaN propagation rules: | 443 | + |
353 | + * SNaN + QNaN => return the QNaN | 444 | +static inline uint16_t sub16_usat(uint16_t a, uint16_t b) |
354 | + * two SNaNs => return the one with the larger significand, silenced | 445 | +{ |
355 | + * two QNaNs => return the one with the larger significand | 446 | + if (a > b) { |
356 | + * SNaN and a non-NaN => return the SNaN, silenced | 447 | + return a - b; |
357 | + * QNaN and a non-NaN => return the QNaN | 448 | + } else { |
358 | + * | 449 | + return 0; |
359 | + * If we get down to comparing significands and they are the same, | 450 | + } |
360 | + * return the NaN with the positive sign bit (if any). | 451 | +} |
361 | + */ | 452 | + |
362 | + if (is_snan(a_cls)) { | 453 | +static inline uint8_t add8_usat(uint8_t a, uint8_t b) |
363 | + if (is_snan(b_cls)) { | 454 | +{ |
364 | + return aIsLargerSignificand ? 0 : 1; | 455 | + uint8_t res; |
365 | + } | 456 | + res = a + b; |
366 | + return is_qnan(b_cls) ? 1 : 0; | 457 | + if (res < a) { |
367 | + } else if (is_qnan(a_cls)) { | 458 | + res = 0xff; |
368 | + if (is_snan(b_cls) || !is_qnan(b_cls)) { | 459 | + } |
369 | + return 0; | 460 | + return res; |
370 | + } else { | 461 | +} |
371 | + return aIsLargerSignificand ? 0 : 1; | 462 | + |
372 | + } | 463 | +static inline uint8_t sub8_usat(uint8_t a, uint8_t b) |
373 | } else { | 464 | +{ |
374 | - return aIsLargerSignificand ? 0 : 1; | 465 | + if (a > b) { |
375 | + return 1; | 466 | + return a - b; |
376 | } | 467 | + } else { |
377 | - } else { | 468 | + return 0; |
378 | - return 1; | 469 | + } |
379 | + default: | 470 | +} |
380 | + g_assert_not_reached(); | 471 | + |
381 | } | 472 | +#define ADD16(a, b, n) RESULT(add16_usat(a, b), n, 16); |
382 | -#endif | 473 | +#define SUB16(a, b, n) RESULT(sub16_usat(a, b), n, 16); |
383 | } | 474 | +#define ADD8(a, b, n) RESULT(add8_usat(a, b), n, 8); |
384 | 475 | +#define SUB8(a, b, n) RESULT(sub8_usat(a, b), n, 8); | |
385 | /*---------------------------------------------------------------------------- | 476 | +#define PFX uq |
477 | + | ||
478 | +#include "op_addsub.c.inc" | ||
479 | + | ||
480 | +/* Signed modulo arithmetic. */ | ||
481 | +#define SARITH16(a, b, n, op) do { \ | ||
482 | + int32_t sum; \ | ||
483 | + sum = (int32_t)(int16_t)(a) op (int32_t)(int16_t)(b); \ | ||
484 | + RESULT(sum, n, 16); \ | ||
485 | + if (sum >= 0) \ | ||
486 | + ge |= 3 << (n * 2); \ | ||
487 | + } while (0) | ||
488 | + | ||
489 | +#define SARITH8(a, b, n, op) do { \ | ||
490 | + int32_t sum; \ | ||
491 | + sum = (int32_t)(int8_t)(a) op (int32_t)(int8_t)(b); \ | ||
492 | + RESULT(sum, n, 8); \ | ||
493 | + if (sum >= 0) \ | ||
494 | + ge |= 1 << n; \ | ||
495 | + } while (0) | ||
496 | + | ||
497 | + | ||
498 | +#define ADD16(a, b, n) SARITH16(a, b, n, +) | ||
499 | +#define SUB16(a, b, n) SARITH16(a, b, n, -) | ||
500 | +#define ADD8(a, b, n) SARITH8(a, b, n, +) | ||
501 | +#define SUB8(a, b, n) SARITH8(a, b, n, -) | ||
502 | +#define PFX s | ||
503 | +#define ARITH_GE | ||
504 | + | ||
505 | +#include "op_addsub.c.inc" | ||
506 | + | ||
507 | +/* Unsigned modulo arithmetic. */ | ||
508 | +#define ADD16(a, b, n) do { \ | ||
509 | + uint32_t sum; \ | ||
510 | + sum = (uint32_t)(uint16_t)(a) + (uint32_t)(uint16_t)(b); \ | ||
511 | + RESULT(sum, n, 16); \ | ||
512 | + if ((sum >> 16) == 1) \ | ||
513 | + ge |= 3 << (n * 2); \ | ||
514 | + } while (0) | ||
515 | + | ||
516 | +#define ADD8(a, b, n) do { \ | ||
517 | + uint32_t sum; \ | ||
518 | + sum = (uint32_t)(uint8_t)(a) + (uint32_t)(uint8_t)(b); \ | ||
519 | + RESULT(sum, n, 8); \ | ||
520 | + if ((sum >> 8) == 1) \ | ||
521 | + ge |= 1 << n; \ | ||
522 | + } while (0) | ||
523 | + | ||
524 | +#define SUB16(a, b, n) do { \ | ||
525 | + uint32_t sum; \ | ||
526 | + sum = (uint32_t)(uint16_t)(a) - (uint32_t)(uint16_t)(b); \ | ||
527 | + RESULT(sum, n, 16); \ | ||
528 | + if ((sum >> 16) == 0) \ | ||
529 | + ge |= 3 << (n * 2); \ | ||
530 | + } while (0) | ||
531 | + | ||
532 | +#define SUB8(a, b, n) do { \ | ||
533 | + uint32_t sum; \ | ||
534 | + sum = (uint32_t)(uint8_t)(a) - (uint32_t)(uint8_t)(b); \ | ||
535 | + RESULT(sum, n, 8); \ | ||
536 | + if ((sum >> 8) == 0) \ | ||
537 | + ge |= 1 << n; \ | ||
538 | + } while (0) | ||
539 | + | ||
540 | +#define PFX u | ||
541 | +#define ARITH_GE | ||
542 | + | ||
543 | +#include "op_addsub.c.inc" | ||
544 | + | ||
545 | +/* Halved signed arithmetic. */ | ||
546 | +#define ADD16(a, b, n) \ | ||
547 | + RESULT(((int32_t)(int16_t)(a) + (int32_t)(int16_t)(b)) >> 1, n, 16) | ||
548 | +#define SUB16(a, b, n) \ | ||
549 | + RESULT(((int32_t)(int16_t)(a) - (int32_t)(int16_t)(b)) >> 1, n, 16) | ||
550 | +#define ADD8(a, b, n) \ | ||
551 | + RESULT(((int32_t)(int8_t)(a) + (int32_t)(int8_t)(b)) >> 1, n, 8) | ||
552 | +#define SUB8(a, b, n) \ | ||
553 | + RESULT(((int32_t)(int8_t)(a) - (int32_t)(int8_t)(b)) >> 1, n, 8) | ||
554 | +#define PFX sh | ||
555 | + | ||
556 | +#include "op_addsub.c.inc" | ||
557 | + | ||
558 | +/* Halved unsigned arithmetic. */ | ||
559 | +#define ADD16(a, b, n) \ | ||
560 | + RESULT(((uint32_t)(uint16_t)(a) + (uint32_t)(uint16_t)(b)) >> 1, n, 16) | ||
561 | +#define SUB16(a, b, n) \ | ||
562 | + RESULT(((uint32_t)(uint16_t)(a) - (uint32_t)(uint16_t)(b)) >> 1, n, 16) | ||
563 | +#define ADD8(a, b, n) \ | ||
564 | + RESULT(((uint32_t)(uint8_t)(a) + (uint32_t)(uint8_t)(b)) >> 1, n, 8) | ||
565 | +#define SUB8(a, b, n) \ | ||
566 | + RESULT(((uint32_t)(uint8_t)(a) - (uint32_t)(uint8_t)(b)) >> 1, n, 8) | ||
567 | +#define PFX uh | ||
568 | + | ||
569 | +#include "op_addsub.c.inc" | ||
570 | + | ||
571 | +static inline uint8_t do_usad(uint8_t a, uint8_t b) | ||
572 | +{ | ||
573 | + if (a > b) { | ||
574 | + return a - b; | ||
575 | + } else { | ||
576 | + return b - a; | ||
577 | + } | ||
578 | +} | ||
579 | + | ||
580 | +/* Unsigned sum of absolute byte differences. */ | ||
581 | +uint32_t HELPER(usad8)(uint32_t a, uint32_t b) | ||
582 | +{ | ||
583 | + uint32_t sum; | ||
584 | + sum = do_usad(a, b); | ||
585 | + sum += do_usad(a >> 8, b >> 8); | ||
586 | + sum += do_usad(a >> 16, b >> 16); | ||
587 | + sum += do_usad(a >> 24, b >> 24); | ||
588 | + return sum; | ||
589 | +} | ||
590 | + | ||
591 | +/* For ARMv6 SEL instruction. */ | ||
592 | +uint32_t HELPER(sel_flags)(uint32_t flags, uint32_t a, uint32_t b) | ||
593 | +{ | ||
594 | + uint32_t mask; | ||
595 | + | ||
596 | + mask = 0; | ||
597 | + if (flags & 1) { | ||
598 | + mask |= 0xff; | ||
599 | + } | ||
600 | + if (flags & 2) { | ||
601 | + mask |= 0xff00; | ||
602 | + } | ||
603 | + if (flags & 4) { | ||
604 | + mask |= 0xff0000; | ||
605 | + } | ||
606 | + if (flags & 8) { | ||
607 | + mask |= 0xff000000; | ||
608 | + } | ||
609 | + return (a & mask) | (b & ~mask); | ||
610 | +} | ||
611 | + | ||
612 | +/* | ||
613 | + * CRC helpers. | ||
614 | + * The upper bytes of val (above the number specified by 'bytes') must have | ||
615 | + * been zeroed out by the caller. | ||
616 | + */ | ||
617 | +uint32_t HELPER(crc32)(uint32_t acc, uint32_t val, uint32_t bytes) | ||
618 | +{ | ||
619 | + uint8_t buf[4]; | ||
620 | + | ||
621 | + stl_le_p(buf, val); | ||
622 | + | ||
623 | + /* zlib crc32 converts the accumulator and output to one's complement. */ | ||
624 | + return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff; | ||
625 | +} | ||
626 | + | ||
627 | +uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes) | ||
628 | +{ | ||
629 | + uint8_t buf[4]; | ||
630 | + | ||
631 | + stl_le_p(buf, val); | ||
632 | + | ||
633 | + /* Linux crc32c converts the output to one's complement. */ | ||
634 | + return crc32c(acc, buf, bytes) ^ 0xffffffff; | ||
635 | +} | ||
636 | diff --git a/target/arm/op_addsub.h b/target/arm/tcg/op_addsub.c.inc | ||
637 | similarity index 100% | ||
638 | rename from target/arm/op_addsub.h | ||
639 | rename to target/arm/tcg/op_addsub.c.inc | ||
640 | diff --git a/target/arm/tcg/meson.build b/target/arm/tcg/meson.build | ||
641 | index XXXXXXX..XXXXXXX 100644 | ||
642 | --- a/target/arm/tcg/meson.build | ||
643 | +++ b/target/arm/tcg/meson.build | ||
644 | @@ -XXX,XX +XXX,XX @@ arm_ss.add(files( | ||
645 | 'tlb_helper.c', | ||
646 | 'vec_helper.c', | ||
647 | 'tlb-insns.c', | ||
648 | + 'arith_helper.c', | ||
649 | )) | ||
650 | |||
651 | arm_ss.add(when: 'TARGET_AARCH64', if_true: files( | ||
386 | -- | 652 | -- |
387 | 2.34.1 | 653 | 2.34.1 |
654 | |||
655 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Explicitly set a 2-NaN propagation rule in the softfloat tests. In | ||
2 | meson.build we put -DTARGET_ARM in fpcflags, and so we should select | ||
3 | here the Arm propagation rule of float_2nan_prop_s_ab. | ||
4 | 1 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-id: 20241025141254.2141506-3-peter.maydell@linaro.org | ||
9 | --- | ||
10 | tests/fp/fp-bench.c | 2 ++ | ||
11 | tests/fp/fp-test-log2.c | 1 + | ||
12 | tests/fp/fp-test.c | 2 ++ | ||
13 | 3 files changed, 5 insertions(+) | ||
14 | |||
15 | diff --git a/tests/fp/fp-bench.c b/tests/fp/fp-bench.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/tests/fp/fp-bench.c | ||
18 | +++ b/tests/fp/fp-bench.c | ||
19 | @@ -XXX,XX +XXX,XX @@ static void run_bench(void) | ||
20 | { | ||
21 | bench_func_t f; | ||
22 | |||
23 | + set_float_2nan_prop_rule(float_2nan_prop_s_ab, &soft_status); | ||
24 | + | ||
25 | f = bench_funcs[operation][precision]; | ||
26 | g_assert(f); | ||
27 | f(); | ||
28 | diff --git a/tests/fp/fp-test-log2.c b/tests/fp/fp-test-log2.c | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/tests/fp/fp-test-log2.c | ||
31 | +++ b/tests/fp/fp-test-log2.c | ||
32 | @@ -XXX,XX +XXX,XX @@ int main(int ac, char **av) | ||
33 | float_status qsf = {0}; | ||
34 | int i; | ||
35 | |||
36 | + set_float_2nan_prop_rule(float_2nan_prop_s_ab, &qsf); | ||
37 | set_float_rounding_mode(float_round_nearest_even, &qsf); | ||
38 | |||
39 | test.d = 0.0; | ||
40 | diff --git a/tests/fp/fp-test.c b/tests/fp/fp-test.c | ||
41 | index XXXXXXX..XXXXXXX 100644 | ||
42 | --- a/tests/fp/fp-test.c | ||
43 | +++ b/tests/fp/fp-test.c | ||
44 | @@ -XXX,XX +XXX,XX @@ void run_test(void) | ||
45 | { | ||
46 | unsigned int i; | ||
47 | |||
48 | + set_float_2nan_prop_rule(float_2nan_prop_s_ab, &qsf); | ||
49 | + | ||
50 | genCases_setLevel(test_level); | ||
51 | verCases_maxErrorCount = n_max_errors; | ||
52 | |||
53 | -- | ||
54 | 2.34.1 | ||
55 | |||
56 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Set the 2-NaN propagation rule explicitly in the float_status words | ||
2 | we use. We wrap this plus the pre-existing setting of the | ||
3 | tininess-before-rounding flag in a new function | ||
4 | arm_set_default_fp_behaviours() to avoid repetition, since we have a | ||
5 | lot of float_status words at this point. | ||
6 | 1 | ||
7 | The situation with FPA11 emulation in linux-user is a little odd, and | ||
8 | arguably "correct" behaviour there would be to exactly match a real | ||
9 | Linux kernel's FPA11 emulation. However FPA11 emulation is | ||
10 | essentially dead at this point and so it seems better to continue | ||
11 | with QEMU's current behaviour and leave a comment describing the | ||
12 | situation. | ||
13 | |||
14 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
15 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
16 | Message-id: 20241025141254.2141506-4-peter.maydell@linaro.org | ||
17 | --- | ||
18 | linux-user/arm/nwfpe/fpa11.c | 18 ++++++++++++++++++ | ||
19 | target/arm/cpu.c | 25 +++++++++++++++++-------- | ||
20 | fpu/softfloat-specialize.c.inc | 13 ++----------- | ||
21 | 3 files changed, 37 insertions(+), 19 deletions(-) | ||
22 | |||
23 | diff --git a/linux-user/arm/nwfpe/fpa11.c b/linux-user/arm/nwfpe/fpa11.c | ||
24 | index XXXXXXX..XXXXXXX 100644 | ||
25 | --- a/linux-user/arm/nwfpe/fpa11.c | ||
26 | +++ b/linux-user/arm/nwfpe/fpa11.c | ||
27 | @@ -XXX,XX +XXX,XX @@ void resetFPA11(void) | ||
28 | #ifdef MAINTAIN_FPCR | ||
29 | fpa11->fpcr = MASK_RESET; | ||
30 | #endif | ||
31 | + | ||
32 | + /* | ||
33 | + * Real FPA11 hardware does not handle NaNs, but always takes an | ||
34 | + * exception for them to be software-emulated (ARM7500FE datasheet | ||
35 | + * section 10.4). There is no documented architectural requirement | ||
36 | + * for NaN propagation rules and it will depend on how the OS | ||
37 | + * level software emulation opted to do it. We here use prop_s_ab | ||
38 | + * which matches the later VFP hardware choice and how QEMU's | ||
39 | + * fpa11 emulation has worked in the past. The real Linux kernel | ||
40 | + * does something slightly different: arch/arm/nwfpe/softfloat-specialize | ||
41 | + * propagateFloat64NaN() has the curious behaviour that it prefers | ||
42 | + * the QNaN over the SNaN, but if both are QNaN it picks A and | ||
43 | + * if both are SNaN it picks B. In theory we could add this as | ||
44 | + * a NaN propagation rule, but in practice FPA11 emulation is so | ||
45 | + * close to totally dead that it's not worth trying to match it at | ||
46 | + * this late date. | ||
47 | + */ | ||
48 | + set_float_2nan_prop_rule(float_2nan_prop_s_ab, &fpa11->fp_status); | ||
49 | } | ||
50 | |||
51 | void SetRoundingMode(const unsigned int opcode) | ||
52 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
53 | index XXXXXXX..XXXXXXX 100644 | ||
54 | --- a/target/arm/cpu.c | ||
55 | +++ b/target/arm/cpu.c | ||
56 | @@ -XXX,XX +XXX,XX @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, | ||
57 | QLIST_INSERT_HEAD(&cpu->el_change_hooks, entry, node); | ||
58 | } | ||
59 | |||
60 | +/* | ||
61 | + * Set the float_status behaviour to match the Arm defaults: | ||
62 | + * * tininess-before-rounding | ||
63 | + * * 2-input NaN propagation prefers SNaN over QNaN, and then | ||
64 | + * operand A over operand B (see FPProcessNaNs() pseudocode) | ||
65 | + */ | ||
66 | +static void arm_set_default_fp_behaviours(float_status *s) | ||
67 | +{ | ||
68 | + set_float_detect_tininess(float_tininess_before_rounding, s); | ||
69 | + set_float_2nan_prop_rule(float_2nan_prop_s_ab, s); | ||
70 | +} | ||
71 | + | ||
72 | static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque) | ||
73 | { | ||
74 | /* Reset a single ARMCPRegInfo register */ | ||
75 | @@ -XXX,XX +XXX,XX @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) | ||
76 | set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status); | ||
77 | set_default_nan_mode(1, &env->vfp.standard_fp_status); | ||
78 | set_default_nan_mode(1, &env->vfp.standard_fp_status_f16); | ||
79 | - set_float_detect_tininess(float_tininess_before_rounding, | ||
80 | - &env->vfp.fp_status); | ||
81 | - set_float_detect_tininess(float_tininess_before_rounding, | ||
82 | - &env->vfp.standard_fp_status); | ||
83 | - set_float_detect_tininess(float_tininess_before_rounding, | ||
84 | - &env->vfp.fp_status_f16); | ||
85 | - set_float_detect_tininess(float_tininess_before_rounding, | ||
86 | - &env->vfp.standard_fp_status_f16); | ||
87 | + arm_set_default_fp_behaviours(&env->vfp.fp_status); | ||
88 | + arm_set_default_fp_behaviours(&env->vfp.standard_fp_status); | ||
89 | + arm_set_default_fp_behaviours(&env->vfp.fp_status_f16); | ||
90 | + arm_set_default_fp_behaviours(&env->vfp.standard_fp_status_f16); | ||
91 | + | ||
92 | #ifndef CONFIG_USER_ONLY | ||
93 | if (kvm_enabled()) { | ||
94 | kvm_arm_reset_vcpu(cpu); | ||
95 | diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc | ||
96 | index XXXXXXX..XXXXXXX 100644 | ||
97 | --- a/fpu/softfloat-specialize.c.inc | ||
98 | +++ b/fpu/softfloat-specialize.c.inc | ||
99 | @@ -XXX,XX +XXX,XX @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls, | ||
100 | /* target didn't set the rule: fall back to old ifdef choices */ | ||
101 | #if defined(TARGET_AVR) || defined(TARGET_HEXAGON) \ | ||
102 | || defined(TARGET_RISCV) || defined(TARGET_SH4) \ | ||
103 | - || defined(TARGET_TRICORE) | ||
104 | + || defined(TARGET_TRICORE) || defined(TARGET_ARM) | ||
105 | g_assert_not_reached(); | ||
106 | -#elif defined(TARGET_ARM) || defined(TARGET_MIPS) || defined(TARGET_HPPA) || \ | ||
107 | +#elif defined(TARGET_MIPS) || defined(TARGET_HPPA) || \ | ||
108 | defined(TARGET_LOONGARCH64) || defined(TARGET_S390X) | ||
109 | - /* | ||
110 | - * ARM mandated NaN propagation rules (see FPProcessNaNs()), take | ||
111 | - * the first of: | ||
112 | - * 1. A if it is signaling | ||
113 | - * 2. B if it is signaling | ||
114 | - * 3. A (quiet) | ||
115 | - * 4. B (quiet) | ||
116 | - * A signaling NaN is always quietened before returning it. | ||
117 | - */ | ||
118 | /* | ||
119 | * According to MIPS specifications, if one of the two operands is | ||
120 | * a sNaN, a new qNaN has to be generated. This is done in | ||
121 | -- | ||
122 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Set the 2-NaN propagation rule explicitly in the float_status words | ||
2 | we use. | ||
3 | 1 | ||
4 | For active_fpu.fp_status, we do this in a new fp_reset() function | ||
5 | which mirrors the existing msa_reset() function in doing "first call | ||
6 | restore to set the fp status parts that depend on CPU state, then set | ||
7 | the fp status parts that are constant". | ||
8 | |||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
11 | Message-id: 20241025141254.2141506-5-peter.maydell@linaro.org | ||
12 | --- | ||
13 | target/mips/fpu_helper.h | 22 ++++++++++++++++++++++ | ||
14 | target/mips/cpu.c | 2 +- | ||
15 | target/mips/msa.c | 17 +++++++++++++++++ | ||
16 | fpu/softfloat-specialize.c.inc | 18 ++---------------- | ||
17 | 4 files changed, 42 insertions(+), 17 deletions(-) | ||
18 | |||
19 | diff --git a/target/mips/fpu_helper.h b/target/mips/fpu_helper.h | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/target/mips/fpu_helper.h | ||
22 | +++ b/target/mips/fpu_helper.h | ||
23 | @@ -XXX,XX +XXX,XX @@ static inline void restore_fp_status(CPUMIPSState *env) | ||
24 | restore_snan_bit_mode(env); | ||
25 | } | ||
26 | |||
27 | +static inline void fp_reset(CPUMIPSState *env) | ||
28 | +{ | ||
29 | + restore_fp_status(env); | ||
30 | + | ||
31 | + /* | ||
32 | + * According to MIPS specifications, if one of the two operands is | ||
33 | + * a sNaN, a new qNaN has to be generated. This is done in | ||
34 | + * floatXX_silence_nan(). For qNaN inputs the specifications | ||
35 | + * says: "When possible, this QNaN result is one of the operand QNaN | ||
36 | + * values." In practice it seems that most implementations choose | ||
37 | + * the first operand if both operands are qNaN. In short this gives | ||
38 | + * the following rules: | ||
39 | + * 1. A if it is signaling | ||
40 | + * 2. B if it is signaling | ||
41 | + * 3. A (quiet) | ||
42 | + * 4. B (quiet) | ||
43 | + * A signaling NaN is always silenced before returning it. | ||
44 | + */ | ||
45 | + set_float_2nan_prop_rule(float_2nan_prop_s_ab, | ||
46 | + &env->active_fpu.fp_status); | ||
47 | +} | ||
48 | + | ||
49 | /* MSA */ | ||
50 | |||
51 | enum CPUMIPSMSADataFormat { | ||
52 | diff --git a/target/mips/cpu.c b/target/mips/cpu.c | ||
53 | index XXXXXXX..XXXXXXX 100644 | ||
54 | --- a/target/mips/cpu.c | ||
55 | +++ b/target/mips/cpu.c | ||
56 | @@ -XXX,XX +XXX,XX @@ static void mips_cpu_reset_hold(Object *obj, ResetType type) | ||
57 | } | ||
58 | |||
59 | msa_reset(env); | ||
60 | + fp_reset(env); | ||
61 | |||
62 | compute_hflags(env); | ||
63 | - restore_fp_status(env); | ||
64 | restore_pamask(env); | ||
65 | cs->exception_index = EXCP_NONE; | ||
66 | |||
67 | diff --git a/target/mips/msa.c b/target/mips/msa.c | ||
68 | index XXXXXXX..XXXXXXX 100644 | ||
69 | --- a/target/mips/msa.c | ||
70 | +++ b/target/mips/msa.c | ||
71 | @@ -XXX,XX +XXX,XX @@ void msa_reset(CPUMIPSState *env) | ||
72 | set_float_detect_tininess(float_tininess_after_rounding, | ||
73 | &env->active_tc.msa_fp_status); | ||
74 | |||
75 | + /* | ||
76 | + * According to MIPS specifications, if one of the two operands is | ||
77 | + * a sNaN, a new qNaN has to be generated. This is done in | ||
78 | + * floatXX_silence_nan(). For qNaN inputs the specifications | ||
79 | + * says: "When possible, this QNaN result is one of the operand QNaN | ||
80 | + * values." In practice it seems that most implementations choose | ||
81 | + * the first operand if both operands are qNaN. In short this gives | ||
82 | + * the following rules: | ||
83 | + * 1. A if it is signaling | ||
84 | + * 2. B if it is signaling | ||
85 | + * 3. A (quiet) | ||
86 | + * 4. B (quiet) | ||
87 | + * A signaling NaN is always silenced before returning it. | ||
88 | + */ | ||
89 | + set_float_2nan_prop_rule(float_2nan_prop_s_ab, | ||
90 | + &env->active_tc.msa_fp_status); | ||
91 | + | ||
92 | /* clear float_status exception flags */ | ||
93 | set_float_exception_flags(0, &env->active_tc.msa_fp_status); | ||
94 | |||
95 | diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc | ||
96 | index XXXXXXX..XXXXXXX 100644 | ||
97 | --- a/fpu/softfloat-specialize.c.inc | ||
98 | +++ b/fpu/softfloat-specialize.c.inc | ||
99 | @@ -XXX,XX +XXX,XX @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls, | ||
100 | /* target didn't set the rule: fall back to old ifdef choices */ | ||
101 | #if defined(TARGET_AVR) || defined(TARGET_HEXAGON) \ | ||
102 | || defined(TARGET_RISCV) || defined(TARGET_SH4) \ | ||
103 | - || defined(TARGET_TRICORE) || defined(TARGET_ARM) | ||
104 | + || defined(TARGET_TRICORE) || defined(TARGET_ARM) || defined(TARGET_MIPS) | ||
105 | g_assert_not_reached(); | ||
106 | -#elif defined(TARGET_MIPS) || defined(TARGET_HPPA) || \ | ||
107 | +#elif defined(TARGET_HPPA) || \ | ||
108 | defined(TARGET_LOONGARCH64) || defined(TARGET_S390X) | ||
109 | - /* | ||
110 | - * According to MIPS specifications, if one of the two operands is | ||
111 | - * a sNaN, a new qNaN has to be generated. This is done in | ||
112 | - * floatXX_silence_nan(). For qNaN inputs the specifications | ||
113 | - * says: "When possible, this QNaN result is one of the operand QNaN | ||
114 | - * values." In practice it seems that most implementations choose | ||
115 | - * the first operand if both operands are qNaN. In short this gives | ||
116 | - * the following rules: | ||
117 | - * 1. A if it is signaling | ||
118 | - * 2. B if it is signaling | ||
119 | - * 3. A (quiet) | ||
120 | - * 4. B (quiet) | ||
121 | - * A signaling NaN is always silenced before returning it. | ||
122 | - */ | ||
123 | rule = float_2nan_prop_s_ab; | ||
124 | #elif defined(TARGET_PPC) || defined(TARGET_M68K) | ||
125 | /* | ||
126 | -- | ||
127 | 2.34.1 | ||
128 | |||
129 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Set the 2-NaN propagation rule explicitly in the float_status word we | ||
2 | use. | ||
3 | 1 | ||
4 | (There are a couple of places in fpu_helper.c where we create a | ||
5 | dummy float_status word with "float_status *s = { };", but these | ||
6 | are only used for calling float*_is_quiet_nan() so it doesn't | ||
7 | matter that we don't set a 2-NaN propagation rule there.) | ||
8 | |||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
11 | Message-id: 20241025141254.2141506-6-peter.maydell@linaro.org | ||
12 | --- | ||
13 | target/loongarch/tcg/fpu_helper.c | 1 + | ||
14 | fpu/softfloat-specialize.c.inc | 6 +++--- | ||
15 | 2 files changed, 4 insertions(+), 3 deletions(-) | ||
16 | |||
17 | diff --git a/target/loongarch/tcg/fpu_helper.c b/target/loongarch/tcg/fpu_helper.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/loongarch/tcg/fpu_helper.c | ||
20 | +++ b/target/loongarch/tcg/fpu_helper.c | ||
21 | @@ -XXX,XX +XXX,XX @@ void restore_fp_status(CPULoongArchState *env) | ||
22 | set_float_rounding_mode(ieee_rm[(env->fcsr0 >> FCSR0_RM) & 0x3], | ||
23 | &env->fp_status); | ||
24 | set_flush_to_zero(0, &env->fp_status); | ||
25 | + set_float_2nan_prop_rule(float_2nan_prop_s_ab, &env->fp_status); | ||
26 | } | ||
27 | |||
28 | int ieee_ex_to_loongarch(int xcpt) | ||
29 | diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc | ||
30 | index XXXXXXX..XXXXXXX 100644 | ||
31 | --- a/fpu/softfloat-specialize.c.inc | ||
32 | +++ b/fpu/softfloat-specialize.c.inc | ||
33 | @@ -XXX,XX +XXX,XX @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls, | ||
34 | /* target didn't set the rule: fall back to old ifdef choices */ | ||
35 | #if defined(TARGET_AVR) || defined(TARGET_HEXAGON) \ | ||
36 | || defined(TARGET_RISCV) || defined(TARGET_SH4) \ | ||
37 | - || defined(TARGET_TRICORE) || defined(TARGET_ARM) || defined(TARGET_MIPS) | ||
38 | + || defined(TARGET_TRICORE) || defined(TARGET_ARM) || defined(TARGET_MIPS) \ | ||
39 | + || defined(TARGET_LOONGARCH64) | ||
40 | g_assert_not_reached(); | ||
41 | -#elif defined(TARGET_HPPA) || \ | ||
42 | - defined(TARGET_LOONGARCH64) || defined(TARGET_S390X) | ||
43 | +#elif defined(TARGET_HPPA) || defined(TARGET_S390X) | ||
44 | rule = float_2nan_prop_s_ab; | ||
45 | #elif defined(TARGET_PPC) || defined(TARGET_M68K) | ||
46 | /* | ||
47 | -- | ||
48 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Set the 2-NaN propagation rule explicitly in env->fp_status. | ||
2 | 1 | ||
3 | Really we only need to do this at CPU reset (after reset has zeroed | ||
4 | out most of the CPU state struct, which typically includes fp_status | ||
5 | fields). However target/hppa does not currently implement CPU reset | ||
6 | at all, so leave a TODO comment to note that this could be moved if | ||
7 | we ever do implement reset. | ||
8 | |||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
11 | Message-id: 20241025141254.2141506-7-peter.maydell@linaro.org | ||
12 | --- | ||
13 | target/hppa/fpu_helper.c | 6 ++++++ | ||
14 | fpu/softfloat-specialize.c.inc | 4 ++-- | ||
15 | 2 files changed, 8 insertions(+), 2 deletions(-) | ||
16 | |||
17 | diff --git a/target/hppa/fpu_helper.c b/target/hppa/fpu_helper.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/hppa/fpu_helper.c | ||
20 | +++ b/target/hppa/fpu_helper.c | ||
21 | @@ -XXX,XX +XXX,XX @@ void HELPER(loaded_fr0)(CPUHPPAState *env) | ||
22 | d = FIELD_EX32(shadow, FPSR, D); | ||
23 | set_flush_to_zero(d, &env->fp_status); | ||
24 | set_flush_inputs_to_zero(d, &env->fp_status); | ||
25 | + | ||
26 | + /* | ||
27 | + * TODO: we only need to do this at CPU reset, but currently | ||
28 | + * HPPA does note implement a CPU reset method at all... | ||
29 | + */ | ||
30 | + set_float_2nan_prop_rule(float_2nan_prop_s_ab, &env->fp_status); | ||
31 | } | ||
32 | |||
33 | void cpu_hppa_loaded_fr0(CPUHPPAState *env) | ||
34 | diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc | ||
35 | index XXXXXXX..XXXXXXX 100644 | ||
36 | --- a/fpu/softfloat-specialize.c.inc | ||
37 | +++ b/fpu/softfloat-specialize.c.inc | ||
38 | @@ -XXX,XX +XXX,XX @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls, | ||
39 | #if defined(TARGET_AVR) || defined(TARGET_HEXAGON) \ | ||
40 | || defined(TARGET_RISCV) || defined(TARGET_SH4) \ | ||
41 | || defined(TARGET_TRICORE) || defined(TARGET_ARM) || defined(TARGET_MIPS) \ | ||
42 | - || defined(TARGET_LOONGARCH64) | ||
43 | + || defined(TARGET_LOONGARCH64) || defined(TARGET_HPPA) | ||
44 | g_assert_not_reached(); | ||
45 | -#elif defined(TARGET_HPPA) || defined(TARGET_S390X) | ||
46 | +#elif defined(TARGET_S390X) | ||
47 | rule = float_2nan_prop_s_ab; | ||
48 | #elif defined(TARGET_PPC) || defined(TARGET_M68K) | ||
49 | /* | ||
50 | -- | ||
51 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Set the 2-NaN propagation rule explicitly in env->fpu_status. | ||
2 | 1 | ||
3 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
4 | Reviewed-by: Ilya Leoshkevich <iii@linux.ibm.com> | ||
5 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Message-id: 20241025141254.2141506-8-peter.maydell@linaro.org | ||
8 | --- | ||
9 | target/s390x/cpu.c | 1 + | ||
10 | fpu/softfloat-specialize.c.inc | 5 ++--- | ||
11 | 2 files changed, 3 insertions(+), 3 deletions(-) | ||
12 | |||
13 | diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/target/s390x/cpu.c | ||
16 | +++ b/target/s390x/cpu.c | ||
17 | @@ -XXX,XX +XXX,XX @@ static void s390_cpu_reset_hold(Object *obj, ResetType type) | ||
18 | /* tininess for underflow is detected before rounding */ | ||
19 | set_float_detect_tininess(float_tininess_before_rounding, | ||
20 | &env->fpu_status); | ||
21 | + set_float_2nan_prop_rule(float_2nan_prop_s_ab, &env->fpu_status); | ||
22 | /* fall through */ | ||
23 | case RESET_TYPE_S390_CPU_NORMAL: | ||
24 | env->psw.mask &= ~PSW_MASK_RI; | ||
25 | diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc | ||
26 | index XXXXXXX..XXXXXXX 100644 | ||
27 | --- a/fpu/softfloat-specialize.c.inc | ||
28 | +++ b/fpu/softfloat-specialize.c.inc | ||
29 | @@ -XXX,XX +XXX,XX @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls, | ||
30 | #if defined(TARGET_AVR) || defined(TARGET_HEXAGON) \ | ||
31 | || defined(TARGET_RISCV) || defined(TARGET_SH4) \ | ||
32 | || defined(TARGET_TRICORE) || defined(TARGET_ARM) || defined(TARGET_MIPS) \ | ||
33 | - || defined(TARGET_LOONGARCH64) || defined(TARGET_HPPA) | ||
34 | + || defined(TARGET_LOONGARCH64) || defined(TARGET_HPPA) \ | ||
35 | + || defined(TARGET_S390X) | ||
36 | g_assert_not_reached(); | ||
37 | -#elif defined(TARGET_S390X) | ||
38 | - rule = float_2nan_prop_s_ab; | ||
39 | #elif defined(TARGET_PPC) || defined(TARGET_M68K) | ||
40 | /* | ||
41 | * PowerPC propagation rules: | ||
42 | -- | ||
43 | 2.34.1 | ||
44 | |||
45 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Set the 2-NaN propagation rule explicitly in env->fp_status | ||
2 | and env->vec_status. | ||
3 | 1 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Message-id: 20241025141254.2141506-9-peter.maydell@linaro.org | ||
7 | --- | ||
8 | target/ppc/cpu_init.c | 8 ++++++++ | ||
9 | fpu/softfloat-specialize.c.inc | 10 ++-------- | ||
10 | 2 files changed, 10 insertions(+), 8 deletions(-) | ||
11 | |||
12 | diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/target/ppc/cpu_init.c | ||
15 | +++ b/target/ppc/cpu_init.c | ||
16 | @@ -XXX,XX +XXX,XX @@ static void ppc_cpu_reset_hold(Object *obj, ResetType type) | ||
17 | /* tininess for underflow is detected before rounding */ | ||
18 | set_float_detect_tininess(float_tininess_before_rounding, | ||
19 | &env->fp_status); | ||
20 | + /* | ||
21 | + * PowerPC propagation rules: | ||
22 | + * 1. A if it sNaN or qNaN | ||
23 | + * 2. B if it sNaN or qNaN | ||
24 | + * A signaling NaN is always silenced before returning it. | ||
25 | + */ | ||
26 | + set_float_2nan_prop_rule(float_2nan_prop_ab, &env->fp_status); | ||
27 | + set_float_2nan_prop_rule(float_2nan_prop_ab, &env->vec_status); | ||
28 | |||
29 | for (i = 0; i < ARRAY_SIZE(env->spr_cb); i++) { | ||
30 | ppc_spr_t *spr = &env->spr_cb[i]; | ||
31 | diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc | ||
32 | index XXXXXXX..XXXXXXX 100644 | ||
33 | --- a/fpu/softfloat-specialize.c.inc | ||
34 | +++ b/fpu/softfloat-specialize.c.inc | ||
35 | @@ -XXX,XX +XXX,XX @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls, | ||
36 | || defined(TARGET_RISCV) || defined(TARGET_SH4) \ | ||
37 | || defined(TARGET_TRICORE) || defined(TARGET_ARM) || defined(TARGET_MIPS) \ | ||
38 | || defined(TARGET_LOONGARCH64) || defined(TARGET_HPPA) \ | ||
39 | - || defined(TARGET_S390X) | ||
40 | + || defined(TARGET_S390X) || defined(TARGET_PPC) | ||
41 | g_assert_not_reached(); | ||
42 | -#elif defined(TARGET_PPC) || defined(TARGET_M68K) | ||
43 | - /* | ||
44 | - * PowerPC propagation rules: | ||
45 | - * 1. A if it sNaN or qNaN | ||
46 | - * 2. B if it sNaN or qNaN | ||
47 | - * A signaling NaN is always silenced before returning it. | ||
48 | - */ | ||
49 | +#elif defined(TARGET_M68K) | ||
50 | /* | ||
51 | * M68000 FAMILY PROGRAMMER'S REFERENCE MANUAL | ||
52 | * 3.4 FLOATING-POINT INSTRUCTION DETAILS | ||
53 | -- | ||
54 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Explicitly set the 2-NaN propagation rule on env->fp_status | ||
2 | and on the temporary fp_status that we use in frem (since | ||
3 | we pass that to a division operation function). | ||
4 | 1 | ||
5 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
8 | --- | ||
9 | target/m68k/cpu.c | 16 ++++++++++++++++ | ||
10 | target/m68k/fpu_helper.c | 1 + | ||
11 | fpu/softfloat-specialize.c.inc | 19 +------------------ | ||
12 | 3 files changed, 18 insertions(+), 18 deletions(-) | ||
13 | |||
14 | diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/target/m68k/cpu.c | ||
17 | +++ b/target/m68k/cpu.c | ||
18 | @@ -XXX,XX +XXX,XX @@ static void m68k_cpu_reset_hold(Object *obj, ResetType type) | ||
19 | env->fregs[i].d = nan; | ||
20 | } | ||
21 | cpu_m68k_set_fpcr(env, 0); | ||
22 | + /* | ||
23 | + * M68000 FAMILY PROGRAMMER'S REFERENCE MANUAL | ||
24 | + * 3.4 FLOATING-POINT INSTRUCTION DETAILS | ||
25 | + * If either operand, but not both operands, of an operation is a | ||
26 | + * nonsignaling NaN, then that NaN is returned as the result. If both | ||
27 | + * operands are nonsignaling NaNs, then the destination operand | ||
28 | + * nonsignaling NaN is returned as the result. | ||
29 | + * If either operand to an operation is a signaling NaN (SNaN), then the | ||
30 | + * SNaN bit is set in the FPSR EXC byte. If the SNaN exception enable bit | ||
31 | + * is set in the FPCR ENABLE byte, then the exception is taken and the | ||
32 | + * destination is not modified. If the SNaN exception enable bit is not | ||
33 | + * set, setting the SNaN bit in the operand to a one converts the SNaN to | ||
34 | + * a nonsignaling NaN. The operation then continues as described in the | ||
35 | + * preceding paragraph for nonsignaling NaNs. | ||
36 | + */ | ||
37 | + set_float_2nan_prop_rule(float_2nan_prop_ab, &env->fp_status); | ||
38 | env->fpsr = 0; | ||
39 | |||
40 | /* TODO: We should set PC from the interrupt vector. */ | ||
41 | diff --git a/target/m68k/fpu_helper.c b/target/m68k/fpu_helper.c | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/target/m68k/fpu_helper.c | ||
44 | +++ b/target/m68k/fpu_helper.c | ||
45 | @@ -XXX,XX +XXX,XX @@ void HELPER(frem)(CPUM68KState *env, FPReg *res, FPReg *val0, FPReg *val1) | ||
46 | int sign; | ||
47 | |||
48 | /* Calculate quotient directly using round to nearest mode */ | ||
49 | + set_float_2nan_prop_rule(float_2nan_prop_ab, &fp_status); | ||
50 | set_float_rounding_mode(float_round_nearest_even, &fp_status); | ||
51 | set_floatx80_rounding_precision( | ||
52 | get_floatx80_rounding_precision(&env->fp_status), &fp_status); | ||
53 | diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc | ||
54 | index XXXXXXX..XXXXXXX 100644 | ||
55 | --- a/fpu/softfloat-specialize.c.inc | ||
56 | +++ b/fpu/softfloat-specialize.c.inc | ||
57 | @@ -XXX,XX +XXX,XX @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls, | ||
58 | || defined(TARGET_RISCV) || defined(TARGET_SH4) \ | ||
59 | || defined(TARGET_TRICORE) || defined(TARGET_ARM) || defined(TARGET_MIPS) \ | ||
60 | || defined(TARGET_LOONGARCH64) || defined(TARGET_HPPA) \ | ||
61 | - || defined(TARGET_S390X) || defined(TARGET_PPC) | ||
62 | + || defined(TARGET_S390X) || defined(TARGET_PPC) || defined(TARGET_M68K) | ||
63 | g_assert_not_reached(); | ||
64 | -#elif defined(TARGET_M68K) | ||
65 | - /* | ||
66 | - * M68000 FAMILY PROGRAMMER'S REFERENCE MANUAL | ||
67 | - * 3.4 FLOATING-POINT INSTRUCTION DETAILS | ||
68 | - * If either operand, but not both operands, of an operation is a | ||
69 | - * nonsignaling NaN, then that NaN is returned as the result. If both | ||
70 | - * operands are nonsignaling NaNs, then the destination operand | ||
71 | - * nonsignaling NaN is returned as the result. | ||
72 | - * If either operand to an operation is a signaling NaN (SNaN), then the | ||
73 | - * SNaN bit is set in the FPSR EXC byte. If the SNaN exception enable bit | ||
74 | - * is set in the FPCR ENABLE byte, then the exception is taken and the | ||
75 | - * destination is not modified. If the SNaN exception enable bit is not | ||
76 | - * set, setting the SNaN bit in the operand to a one converts the SNaN to | ||
77 | - * a nonsignaling NaN. The operation then continues as described in the | ||
78 | - * preceding paragraph for nonsignaling NaNs. | ||
79 | - */ | ||
80 | - rule = float_2nan_prop_ab; | ||
81 | #elif defined(TARGET_SPARC) | ||
82 | /* Prefer SNaN over QNaN, order B then A. */ | ||
83 | rule = float_2nan_prop_s_ba; | ||
84 | -- | ||
85 | 2.34.1 | ||
86 | |||
87 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | In cf_fpu_gdb_get_reg() and cf_fpu_gdb_set_reg() we use a temporary | ||
2 | float_status variable to pass to floatx80_to_float64() and | ||
3 | float64_to_floatx80(), but we don't initialize it, meaning that those | ||
4 | functions could access uninitialized data. Zero-init the structs. | ||
5 | 1 | ||
6 | (We don't need to set a NaN-propagation rule here because we | ||
7 | don't use these with a 2-argument fpu operation.) | ||
8 | |||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | Message-id: 20241025141254.2141506-11-peter.maydell@linaro.org | ||
13 | --- | ||
14 | target/m68k/helper.c | 4 ++-- | ||
15 | 1 file changed, 2 insertions(+), 2 deletions(-) | ||
16 | |||
17 | diff --git a/target/m68k/helper.c b/target/m68k/helper.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/m68k/helper.c | ||
20 | +++ b/target/m68k/helper.c | ||
21 | @@ -XXX,XX +XXX,XX @@ static int cf_fpu_gdb_get_reg(CPUState *cs, GByteArray *mem_buf, int n) | ||
22 | CPUM68KState *env = &cpu->env; | ||
23 | |||
24 | if (n < 8) { | ||
25 | - float_status s; | ||
26 | + float_status s = {}; | ||
27 | return gdb_get_reg64(mem_buf, floatx80_to_float64(env->fregs[n].d, &s)); | ||
28 | } | ||
29 | switch (n) { | ||
30 | @@ -XXX,XX +XXX,XX @@ static int cf_fpu_gdb_set_reg(CPUState *cs, uint8_t *mem_buf, int n) | ||
31 | CPUM68KState *env = &cpu->env; | ||
32 | |||
33 | if (n < 8) { | ||
34 | - float_status s; | ||
35 | + float_status s = {}; | ||
36 | env->fregs[n].d = float64_to_floatx80(ldq_be_p(mem_buf), &s); | ||
37 | return 8; | ||
38 | } | ||
39 | -- | ||
40 | 2.34.1 | ||
41 | |||
42 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Set the NaN propagation rule explicitly in the float_status | ||
2 | words we use. | ||
3 | 1 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Acked-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Message-id: 20241025141254.2141506-13-peter.maydell@linaro.org | ||
8 | --- | ||
9 | target/sparc/cpu.c | 8 ++++++++ | ||
10 | target/sparc/fop_helper.c | 10 ++++++++-- | ||
11 | fpu/softfloat-specialize.c.inc | 6 ++---- | ||
12 | 3 files changed, 18 insertions(+), 6 deletions(-) | ||
13 | |||
14 | diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/target/sparc/cpu.c | ||
17 | +++ b/target/sparc/cpu.c | ||
18 | @@ -XXX,XX +XXX,XX @@ | ||
19 | #include "hw/qdev-properties.h" | ||
20 | #include "qapi/visitor.h" | ||
21 | #include "tcg/tcg.h" | ||
22 | +#include "fpu/softfloat.h" | ||
23 | |||
24 | //#define DEBUG_FEATURES | ||
25 | |||
26 | @@ -XXX,XX +XXX,XX @@ static void sparc_cpu_realizefn(DeviceState *dev, Error **errp) | ||
27 | env->version |= env->def.nwindows - 1; | ||
28 | #endif | ||
29 | |||
30 | + /* | ||
31 | + * Prefer SNaN over QNaN, order B then A. It's OK to do this in realize | ||
32 | + * rather than reset, because fp_status is after 'end_reset_fields' in | ||
33 | + * the CPU state struct so it won't get zeroed on reset. | ||
34 | + */ | ||
35 | + set_float_2nan_prop_rule(float_2nan_prop_s_ba, &env->fp_status); | ||
36 | + | ||
37 | cpu_exec_realizefn(cs, &local_err); | ||
38 | if (local_err != NULL) { | ||
39 | error_propagate(errp, local_err); | ||
40 | diff --git a/target/sparc/fop_helper.c b/target/sparc/fop_helper.c | ||
41 | index XXXXXXX..XXXXXXX 100644 | ||
42 | --- a/target/sparc/fop_helper.c | ||
43 | +++ b/target/sparc/fop_helper.c | ||
44 | @@ -XXX,XX +XXX,XX @@ uint32_t helper_flcmps(float32 src1, float32 src2) | ||
45 | * Perform the comparison with a dummy fp environment. | ||
46 | */ | ||
47 | float_status discard = { }; | ||
48 | - FloatRelation r = float32_compare_quiet(src1, src2, &discard); | ||
49 | + FloatRelation r; | ||
50 | + | ||
51 | + set_float_2nan_prop_rule(float_2nan_prop_s_ba, &discard); | ||
52 | + r = float32_compare_quiet(src1, src2, &discard); | ||
53 | |||
54 | switch (r) { | ||
55 | case float_relation_equal: | ||
56 | @@ -XXX,XX +XXX,XX @@ uint32_t helper_flcmps(float32 src1, float32 src2) | ||
57 | uint32_t helper_flcmpd(float64 src1, float64 src2) | ||
58 | { | ||
59 | float_status discard = { }; | ||
60 | - FloatRelation r = float64_compare_quiet(src1, src2, &discard); | ||
61 | + FloatRelation r; | ||
62 | + | ||
63 | + set_float_2nan_prop_rule(float_2nan_prop_s_ba, &discard); | ||
64 | + r = float64_compare_quiet(src1, src2, &discard); | ||
65 | |||
66 | switch (r) { | ||
67 | case float_relation_equal: | ||
68 | diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc | ||
69 | index XXXXXXX..XXXXXXX 100644 | ||
70 | --- a/fpu/softfloat-specialize.c.inc | ||
71 | +++ b/fpu/softfloat-specialize.c.inc | ||
72 | @@ -XXX,XX +XXX,XX @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls, | ||
73 | || defined(TARGET_RISCV) || defined(TARGET_SH4) \ | ||
74 | || defined(TARGET_TRICORE) || defined(TARGET_ARM) || defined(TARGET_MIPS) \ | ||
75 | || defined(TARGET_LOONGARCH64) || defined(TARGET_HPPA) \ | ||
76 | - || defined(TARGET_S390X) || defined(TARGET_PPC) || defined(TARGET_M68K) | ||
77 | + || defined(TARGET_S390X) || defined(TARGET_PPC) || defined(TARGET_M68K) \ | ||
78 | + || defined(TARGET_SPARC) | ||
79 | g_assert_not_reached(); | ||
80 | -#elif defined(TARGET_SPARC) | ||
81 | - /* Prefer SNaN over QNaN, order B then A. */ | ||
82 | - rule = float_2nan_prop_s_ba; | ||
83 | #elif defined(TARGET_XTENSA) | ||
84 | /* | ||
85 | * Xtensa has two NaN propagation modes. | ||
86 | -- | ||
87 | 2.34.1 | diff view generated by jsdifflib |
1 | Our current usage of MMU indexes when EL3 is AArch32 is confused. | 1 | From: Pierrick Bouvier <pierrick.bouvier@linaro.org> |
---|---|---|---|
2 | Architecturally, when EL3 is AArch32, all Secure code runs under the | ||
3 | Secure PL1&0 translation regime: | ||
4 | * code at EL3, which might be Mon, or SVC, or any of the | ||
5 | other privileged modes (PL1) | ||
6 | * code at EL0 (Secure PL0) | ||
7 | 2 | ||
8 | This is different from when EL3 is AArch64, in which case EL3 is its | 3 | Before changing default pauth algorithm, we need to make sure current |
9 | own translation regime, and EL1 and EL0 (whether AArch32 or AArch64) | 4 | default one (QARMA5) can still be selected. |
10 | have their own regime. | ||
11 | 5 | ||
12 | We claimed to be mapping Secure PL1 to our ARMMMUIdx_EL3, but didn't | 6 | $ qemu-system-aarch64 -cpu max,pauth-qarma5=on ... |
13 | do anything special about Secure PL0, which meant it used the same | ||
14 | ARMMMUIdx_EL10_0 that NonSecure PL0 does. This resulted in a bug | ||
15 | where arm_sctlr() incorrectly picked the NonSecure SCTLR as the | ||
16 | controlling register when in Secure PL0, which meant we were | ||
17 | spuriously generating alignment faults because we were looking at the | ||
18 | wrong SCTLR control bits. | ||
19 | 7 | ||
20 | The use of ARMMMUIdx_EL3 for Secure PL1 also resulted in the bug that | 8 | Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> |
21 | we wouldn't honour the PAN bit for Secure PL1, because there's no | 9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
22 | equivalent _PAN mmu index for it. | 10 | Message-id: 20241219183211.3493974-2-pierrick.bouvier@linaro.org |
11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
12 | --- | ||
13 | docs/system/arm/cpu-features.rst | 5 ++++- | ||
14 | target/arm/cpu.h | 1 + | ||
15 | target/arm/arm-qmp-cmds.c | 2 +- | ||
16 | target/arm/cpu64.c | 20 ++++++++++++++------ | ||
17 | tests/qtest/arm-cpu-features.c | 15 +++++++++++---- | ||
18 | 5 files changed, 31 insertions(+), 12 deletions(-) | ||
23 | 19 | ||
24 | Fix this by adding two new MMU indexes: | 20 | diff --git a/docs/system/arm/cpu-features.rst b/docs/system/arm/cpu-features.rst |
25 | * ARMMMUIdx_E30_0 is for Secure PL0 | 21 | index XXXXXXX..XXXXXXX 100644 |
26 | * ARMMMUIdx_E30_3_PAN is for Secure PL1 when PAN is enabled | 22 | --- a/docs/system/arm/cpu-features.rst |
27 | The existing ARMMMUIdx_E3 is used to mean "Secure PL1 without PAN" | 23 | +++ b/docs/system/arm/cpu-features.rst |
28 | (and would be named ARMMMUIdx_E30_3 in an AArch32-centric scheme). | 24 | @@ -XXX,XX +XXX,XX @@ Below is the list of TCG VCPU features and their descriptions. |
29 | 25 | ``pauth-qarma3`` | |
30 | These extra two indexes bring us up to the maximum of 16 that the | 26 | When ``pauth`` is enabled, select the architected QARMA3 algorithm. |
31 | core code can currently support. | 27 | |
32 | 28 | -Without either ``pauth-impdef`` or ``pauth-qarma3`` enabled, | |
33 | This commit: | 29 | +``pauth-qarma5`` |
34 | * adds the new MMU index handling to the various places | 30 | + When ``pauth`` is enabled, select the architected QARMA5 algorithm. |
35 | where we deal in MMU index values | 31 | + |
36 | * adds assertions that we aren't AArch32 EL3 in a couple of | 32 | +Without ``pauth-impdef``, ``pauth-qarma3`` or ``pauth-qarma5`` enabled, |
37 | places that currently use the E10 indexes, to document why | 33 | the architected QARMA5 algorithm is used. The architected QARMA5 |
38 | they don't also need to handle the E30 indexes | 34 | and QARMA3 algorithms have good cryptographic properties, but can |
39 | * documents in a comment why regime_has_2_ranges() doesn't need | 35 | be quite slow to emulate. The impdef algorithm used by QEMU is |
40 | updating | ||
41 | |||
42 | Notes for backporting: this commit depends on the preceding revert of | ||
43 | 4c2c04746932; that revert and this commit should probably be | ||
44 | backported to everywhere that we originally backported 4c2c04746932. | ||
45 | |||
46 | Cc: qemu-stable@nongnu.org | ||
47 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2326 | ||
48 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2588 | ||
49 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
50 | Tested-by: Thomas Huth <thuth@redhat.com> | ||
51 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
52 | Message-id: 20241101142845.1712482-3-peter.maydell@linaro.org | ||
53 | --- | ||
54 | target/arm/cpu.h | 31 ++++++++++++++++++------------- | ||
55 | target/arm/internals.h | 16 ++++++++++++++-- | ||
56 | target/arm/helper.c | 38 ++++++++++++++++++++++++++++++++++---- | ||
57 | target/arm/ptw.c | 4 ++++ | ||
58 | target/arm/tcg/op_helper.c | 14 +++++++++++++- | ||
59 | target/arm/tcg/translate.c | 3 +++ | ||
60 | 6 files changed, 86 insertions(+), 20 deletions(-) | ||
61 | |||
62 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | 36 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
63 | index XXXXXXX..XXXXXXX 100644 | 37 | index XXXXXXX..XXXXXXX 100644 |
64 | --- a/target/arm/cpu.h | 38 | --- a/target/arm/cpu.h |
65 | +++ b/target/arm/cpu.h | 39 | +++ b/target/arm/cpu.h |
66 | @@ -XXX,XX +XXX,XX @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync); | 40 | @@ -XXX,XX +XXX,XX @@ struct ArchCPU { |
67 | * + NonSecure PL1 & 0 stage 1 | 41 | bool prop_pauth; |
68 | * + NonSecure PL1 & 0 stage 2 | 42 | bool prop_pauth_impdef; |
69 | * + NonSecure PL2 | 43 | bool prop_pauth_qarma3; |
70 | - * + Secure PL0 | 44 | + bool prop_pauth_qarma5; |
71 | - * + Secure PL1 | 45 | bool prop_lpa2; |
72 | + * + Secure PL1 & 0 | 46 | |
73 | * (reminder: for 32 bit EL3, Secure PL1 is *EL3*, not EL1.) | 47 | /* DCZ blocksize, in log_2(words), ie low 4 bits of DCZID_EL0 */ |
74 | * | 48 | diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c |
75 | * For QEMU, an mmu_idx is not quite the same as a translation regime because: | ||
76 | @@ -XXX,XX +XXX,XX @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync); | ||
77 | * | ||
78 | * This gives us the following list of cases: | ||
79 | * | ||
80 | - * EL0 EL1&0 stage 1+2 (aka NS PL0) | ||
81 | - * EL1 EL1&0 stage 1+2 (aka NS PL1) | ||
82 | - * EL1 EL1&0 stage 1+2 +PAN | ||
83 | + * EL0 EL1&0 stage 1+2 (aka NS PL0 PL1&0 stage 1+2) | ||
84 | + * EL1 EL1&0 stage 1+2 (aka NS PL1 PL1&0 stage 1+2) | ||
85 | + * EL1 EL1&0 stage 1+2 +PAN (aka NS PL1 P1&0 stage 1+2 +PAN) | ||
86 | * EL0 EL2&0 | ||
87 | * EL2 EL2&0 | ||
88 | * EL2 EL2&0 +PAN | ||
89 | * EL2 (aka NS PL2) | ||
90 | - * EL3 (aka S PL1) | ||
91 | + * EL3 (aka AArch32 S PL1 PL1&0) | ||
92 | + * AArch32 S PL0 PL1&0 (we call this EL30_0) | ||
93 | + * AArch32 S PL1 PL1&0 +PAN (we call this EL30_3_PAN) | ||
94 | * Stage2 Secure | ||
95 | * Stage2 NonSecure | ||
96 | * plus one TLB per Physical address space: S, NS, Realm, Root | ||
97 | * | ||
98 | - * for a total of 14 different mmu_idx. | ||
99 | + * for a total of 16 different mmu_idx. | ||
100 | * | ||
101 | * R profile CPUs have an MPU, but can use the same set of MMU indexes | ||
102 | * as A profile. They only need to distinguish EL0 and EL1 (and | ||
103 | @@ -XXX,XX +XXX,XX @@ typedef enum ARMMMUIdx { | ||
104 | ARMMMUIdx_E20_2_PAN = 5 | ARM_MMU_IDX_A, | ||
105 | ARMMMUIdx_E2 = 6 | ARM_MMU_IDX_A, | ||
106 | ARMMMUIdx_E3 = 7 | ARM_MMU_IDX_A, | ||
107 | + ARMMMUIdx_E30_0 = 8 | ARM_MMU_IDX_A, | ||
108 | + ARMMMUIdx_E30_3_PAN = 9 | ARM_MMU_IDX_A, | ||
109 | |||
110 | /* | ||
111 | * Used for second stage of an S12 page table walk, or for descriptor | ||
112 | @@ -XXX,XX +XXX,XX @@ typedef enum ARMMMUIdx { | ||
113 | * are in use simultaneously for SecureEL2: the security state for | ||
114 | * the S2 ptw is selected by the NS bit from the S1 ptw. | ||
115 | */ | ||
116 | - ARMMMUIdx_Stage2_S = 8 | ARM_MMU_IDX_A, | ||
117 | - ARMMMUIdx_Stage2 = 9 | ARM_MMU_IDX_A, | ||
118 | + ARMMMUIdx_Stage2_S = 10 | ARM_MMU_IDX_A, | ||
119 | + ARMMMUIdx_Stage2 = 11 | ARM_MMU_IDX_A, | ||
120 | |||
121 | /* TLBs with 1-1 mapping to the physical address spaces. */ | ||
122 | - ARMMMUIdx_Phys_S = 10 | ARM_MMU_IDX_A, | ||
123 | - ARMMMUIdx_Phys_NS = 11 | ARM_MMU_IDX_A, | ||
124 | - ARMMMUIdx_Phys_Root = 12 | ARM_MMU_IDX_A, | ||
125 | - ARMMMUIdx_Phys_Realm = 13 | ARM_MMU_IDX_A, | ||
126 | + ARMMMUIdx_Phys_S = 12 | ARM_MMU_IDX_A, | ||
127 | + ARMMMUIdx_Phys_NS = 13 | ARM_MMU_IDX_A, | ||
128 | + ARMMMUIdx_Phys_Root = 14 | ARM_MMU_IDX_A, | ||
129 | + ARMMMUIdx_Phys_Realm = 15 | ARM_MMU_IDX_A, | ||
130 | |||
131 | /* | ||
132 | * These are not allocated TLBs and are used only for AT system | ||
133 | @@ -XXX,XX +XXX,XX @@ typedef enum ARMMMUIdxBit { | ||
134 | TO_CORE_BIT(E20_2), | ||
135 | TO_CORE_BIT(E20_2_PAN), | ||
136 | TO_CORE_BIT(E3), | ||
137 | + TO_CORE_BIT(E30_0), | ||
138 | + TO_CORE_BIT(E30_3_PAN), | ||
139 | TO_CORE_BIT(Stage2), | ||
140 | TO_CORE_BIT(Stage2_S), | ||
141 | |||
142 | diff --git a/target/arm/internals.h b/target/arm/internals.h | ||
143 | index XXXXXXX..XXXXXXX 100644 | 49 | index XXXXXXX..XXXXXXX 100644 |
144 | --- a/target/arm/internals.h | 50 | --- a/target/arm/arm-qmp-cmds.c |
145 | +++ b/target/arm/internals.h | 51 | +++ b/target/arm/arm-qmp-cmds.c |
146 | @@ -XXX,XX +XXX,XX @@ static inline void arm_call_el_change_hook(ARMCPU *cpu) | 52 | @@ -XXX,XX +XXX,XX @@ static const char *cpu_model_advertised_features[] = { |
53 | "sve640", "sve768", "sve896", "sve1024", "sve1152", "sve1280", | ||
54 | "sve1408", "sve1536", "sve1664", "sve1792", "sve1920", "sve2048", | ||
55 | "kvm-no-adjvtime", "kvm-steal-time", | ||
56 | - "pauth", "pauth-impdef", "pauth-qarma3", | ||
57 | + "pauth", "pauth-impdef", "pauth-qarma3", "pauth-qarma5", | ||
58 | NULL | ||
59 | }; | ||
60 | |||
61 | diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c | ||
62 | index XXXXXXX..XXXXXXX 100644 | ||
63 | --- a/target/arm/cpu64.c | ||
64 | +++ b/target/arm/cpu64.c | ||
65 | @@ -XXX,XX +XXX,XX @@ void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp) | ||
66 | } | ||
67 | |||
68 | if (cpu->prop_pauth) { | ||
69 | - if (cpu->prop_pauth_impdef && cpu->prop_pauth_qarma3) { | ||
70 | + if ((cpu->prop_pauth_impdef && cpu->prop_pauth_qarma3) || | ||
71 | + (cpu->prop_pauth_impdef && cpu->prop_pauth_qarma5) || | ||
72 | + (cpu->prop_pauth_qarma3 && cpu->prop_pauth_qarma5)) { | ||
73 | error_setg(errp, | ||
74 | - "cannot enable both pauth-impdef and pauth-qarma3"); | ||
75 | + "cannot enable pauth-impdef, pauth-qarma3 and " | ||
76 | + "pauth-qarma5 at the same time"); | ||
77 | return; | ||
78 | } | ||
79 | |||
80 | @@ -XXX,XX +XXX,XX @@ void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp) | ||
81 | } else if (cpu->prop_pauth_qarma3) { | ||
82 | isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, APA3, features); | ||
83 | isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, GPA3, 1); | ||
84 | - } else { | ||
85 | + } else { /* default is pauth-qarma5 */ | ||
86 | isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, APA, features); | ||
87 | isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPA, 1); | ||
88 | } | ||
89 | - } else if (cpu->prop_pauth_impdef || cpu->prop_pauth_qarma3) { | ||
90 | - error_setg(errp, "cannot enable pauth-impdef or " | ||
91 | - "pauth-qarma3 without pauth"); | ||
92 | + } else if (cpu->prop_pauth_impdef || | ||
93 | + cpu->prop_pauth_qarma3 || | ||
94 | + cpu->prop_pauth_qarma5) { | ||
95 | + error_setg(errp, "cannot enable pauth-impdef, pauth-qarma3 or " | ||
96 | + "pauth-qarma5 without pauth"); | ||
97 | error_append_hint(errp, "Add pauth=on to the CPU property list.\n"); | ||
98 | } | ||
99 | } | ||
100 | @@ -XXX,XX +XXX,XX @@ static const Property arm_cpu_pauth_impdef_property = | ||
101 | DEFINE_PROP_BOOL("pauth-impdef", ARMCPU, prop_pauth_impdef, false); | ||
102 | static const Property arm_cpu_pauth_qarma3_property = | ||
103 | DEFINE_PROP_BOOL("pauth-qarma3", ARMCPU, prop_pauth_qarma3, false); | ||
104 | +static Property arm_cpu_pauth_qarma5_property = | ||
105 | + DEFINE_PROP_BOOL("pauth-qarma5", ARMCPU, prop_pauth_qarma5, false); | ||
106 | |||
107 | void aarch64_add_pauth_properties(Object *obj) | ||
108 | { | ||
109 | @@ -XXX,XX +XXX,XX @@ void aarch64_add_pauth_properties(Object *obj) | ||
110 | } else { | ||
111 | qdev_property_add_static(DEVICE(obj), &arm_cpu_pauth_impdef_property); | ||
112 | qdev_property_add_static(DEVICE(obj), &arm_cpu_pauth_qarma3_property); | ||
113 | + qdev_property_add_static(DEVICE(obj), &arm_cpu_pauth_qarma5_property); | ||
147 | } | 114 | } |
148 | } | 115 | } |
149 | 116 | ||
150 | -/* Return true if this address translation regime has two ranges. */ | 117 | diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c |
151 | +/* | ||
152 | + * Return true if this address translation regime has two ranges. | ||
153 | + * Note that this will not return the correct answer for AArch32 | ||
154 | + * Secure PL1&0 (i.e. mmu indexes E3, E30_0, E30_3_PAN), but it is | ||
155 | + * never called from a context where EL3 can be AArch32. (The | ||
156 | + * correct return value for ARMMMUIdx_E3 would be different for | ||
157 | + * that case, so we can't just make the function return the | ||
158 | + * correct value anyway; we would need an extra "bool e3_is_aarch32" | ||
159 | + * argument which all the current callsites would pass as 'false'.) | ||
160 | + */ | ||
161 | static inline bool regime_has_2_ranges(ARMMMUIdx mmu_idx) | ||
162 | { | ||
163 | switch (mmu_idx) { | ||
164 | @@ -XXX,XX +XXX,XX @@ static inline bool regime_is_pan(CPUARMState *env, ARMMMUIdx mmu_idx) | ||
165 | case ARMMMUIdx_Stage1_E1_PAN: | ||
166 | case ARMMMUIdx_E10_1_PAN: | ||
167 | case ARMMMUIdx_E20_2_PAN: | ||
168 | + case ARMMMUIdx_E30_3_PAN: | ||
169 | return true; | ||
170 | default: | ||
171 | return false; | ||
172 | @@ -XXX,XX +XXX,XX @@ static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx) | ||
173 | case ARMMMUIdx_E2: | ||
174 | return 2; | ||
175 | case ARMMMUIdx_E3: | ||
176 | + case ARMMMUIdx_E30_0: | ||
177 | + case ARMMMUIdx_E30_3_PAN: | ||
178 | return 3; | ||
179 | case ARMMMUIdx_E10_0: | ||
180 | case ARMMMUIdx_Stage1_E0: | ||
181 | - return arm_el_is_aa64(env, 3) || !arm_is_secure_below_el3(env) ? 1 : 3; | ||
182 | case ARMMMUIdx_Stage1_E1: | ||
183 | case ARMMMUIdx_Stage1_E1_PAN: | ||
184 | case ARMMMUIdx_E10_1: | ||
185 | @@ -XXX,XX +XXX,XX @@ static inline bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx) | ||
186 | switch (mmu_idx) { | ||
187 | case ARMMMUIdx_E10_0: | ||
188 | case ARMMMUIdx_E20_0: | ||
189 | + case ARMMMUIdx_E30_0: | ||
190 | case ARMMMUIdx_Stage1_E0: | ||
191 | case ARMMMUIdx_MUser: | ||
192 | case ARMMMUIdx_MSUser: | ||
193 | diff --git a/target/arm/helper.c b/target/arm/helper.c | ||
194 | index XXXXXXX..XXXXXXX 100644 | 118 | index XXXXXXX..XXXXXXX 100644 |
195 | --- a/target/arm/helper.c | 119 | --- a/tests/qtest/arm-cpu-features.c |
196 | +++ b/target/arm/helper.c | 120 | +++ b/tests/qtest/arm-cpu-features.c |
197 | @@ -XXX,XX +XXX,XX @@ static int alle1_tlbmask(CPUARMState *env) | 121 | @@ -XXX,XX +XXX,XX @@ static void pauth_tests_default(QTestState *qts, const char *cpu_type) |
198 | * Note that the 'ALL' scope must invalidate both stage 1 and | 122 | assert_has_feature_enabled(qts, cpu_type, "pauth"); |
199 | * stage 2 translations, whereas most other scopes only invalidate | 123 | assert_has_feature_disabled(qts, cpu_type, "pauth-impdef"); |
200 | * stage 1 translations. | 124 | assert_has_feature_disabled(qts, cpu_type, "pauth-qarma3"); |
201 | + * | 125 | + assert_has_feature_disabled(qts, cpu_type, "pauth-qarma5"); |
202 | + * For AArch32 this is only used for TLBIALLNSNH and VTTBR | 126 | assert_set_feature(qts, cpu_type, "pauth", false); |
203 | + * writes, so only needs to apply to NS PL1&0, not S PL1&0. | 127 | assert_set_feature(qts, cpu_type, "pauth", true); |
204 | */ | 128 | assert_set_feature(qts, cpu_type, "pauth-impdef", true); |
205 | return (ARMMMUIdxBit_E10_1 | | 129 | assert_set_feature(qts, cpu_type, "pauth-impdef", false); |
206 | ARMMMUIdxBit_E10_1_PAN | | 130 | assert_set_feature(qts, cpu_type, "pauth-qarma3", true); |
207 | @@ -XXX,XX +XXX,XX @@ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) | 131 | assert_set_feature(qts, cpu_type, "pauth-qarma3", false); |
208 | /* stage 1 current state PL1: ATS1CPR, ATS1CPW, ATS1CPRP, ATS1CPWP */ | 132 | + assert_set_feature(qts, cpu_type, "pauth-qarma5", true); |
209 | switch (el) { | 133 | + assert_set_feature(qts, cpu_type, "pauth-qarma5", false); |
210 | case 3: | 134 | assert_error(qts, cpu_type, |
211 | - mmu_idx = ARMMMUIdx_E3; | 135 | - "cannot enable pauth-impdef or pauth-qarma3 without pauth", |
212 | + if (ri->crm == 9 && arm_pan_enabled(env)) { | 136 | + "cannot enable pauth-impdef, pauth-qarma3 or pauth-qarma5 without pauth", |
213 | + mmu_idx = ARMMMUIdx_E30_3_PAN; | 137 | "{ 'pauth': false, 'pauth-impdef': true }"); |
214 | + } else { | 138 | assert_error(qts, cpu_type, |
215 | + mmu_idx = ARMMMUIdx_E3; | 139 | - "cannot enable pauth-impdef or pauth-qarma3 without pauth", |
216 | + } | 140 | + "cannot enable pauth-impdef, pauth-qarma3 or pauth-qarma5 without pauth", |
217 | break; | 141 | "{ 'pauth': false, 'pauth-qarma3': true }"); |
218 | case 2: | 142 | assert_error(qts, cpu_type, |
219 | g_assert(ss != ARMSS_Secure); /* ARMv8.4-SecEL2 is 64-bit only */ | 143 | - "cannot enable both pauth-impdef and pauth-qarma3", |
220 | @@ -XXX,XX +XXX,XX @@ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) | 144 | - "{ 'pauth': true, 'pauth-impdef': true, 'pauth-qarma3': true }"); |
221 | /* stage 1 current state PL0: ATS1CUR, ATS1CUW */ | 145 | + "cannot enable pauth-impdef, pauth-qarma3 or pauth-qarma5 without pauth", |
222 | switch (el) { | 146 | + "{ 'pauth': false, 'pauth-qarma5': true }"); |
223 | case 3: | 147 | + assert_error(qts, cpu_type, |
224 | - mmu_idx = ARMMMUIdx_E10_0; | 148 | + "cannot enable pauth-impdef, pauth-qarma3 and pauth-qarma5 at the same time", |
225 | + mmu_idx = ARMMMUIdx_E30_0; | 149 | + "{ 'pauth': true, 'pauth-impdef': true, 'pauth-qarma3': true," |
226 | break; | 150 | + " 'pauth-qarma5': true }"); |
227 | case 2: | ||
228 | g_assert(ss != ARMSS_Secure); /* ARMv8.4-SecEL2 is 64-bit only */ | ||
229 | @@ -XXX,XX +XXX,XX @@ static int vae1_tlbmask(CPUARMState *env) | ||
230 | uint64_t hcr = arm_hcr_el2_eff(env); | ||
231 | uint16_t mask; | ||
232 | |||
233 | + assert(arm_feature(env, ARM_FEATURE_AARCH64)); | ||
234 | + | ||
235 | if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) { | ||
236 | mask = ARMMMUIdxBit_E20_2 | | ||
237 | ARMMMUIdxBit_E20_2_PAN | | ||
238 | ARMMMUIdxBit_E20_0; | ||
239 | } else { | ||
240 | + /* This is AArch64 only, so we don't need to touch the EL30_x TLBs */ | ||
241 | mask = ARMMMUIdxBit_E10_1 | | ||
242 | ARMMMUIdxBit_E10_1_PAN | | ||
243 | ARMMMUIdxBit_E10_0; | ||
244 | @@ -XXX,XX +XXX,XX @@ static int vae1_tlbbits(CPUARMState *env, uint64_t addr) | ||
245 | uint64_t hcr = arm_hcr_el2_eff(env); | ||
246 | ARMMMUIdx mmu_idx; | ||
247 | |||
248 | + assert(arm_feature(env, ARM_FEATURE_AARCH64)); | ||
249 | + | ||
250 | /* Only the regime of the mmu_idx below is significant. */ | ||
251 | if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) { | ||
252 | mmu_idx = ARMMMUIdx_E20_0; | ||
253 | @@ -XXX,XX +XXX,XX @@ void arm_cpu_do_interrupt(CPUState *cs) | ||
254 | |||
255 | uint64_t arm_sctlr(CPUARMState *env, int el) | ||
256 | { | ||
257 | - /* Only EL0 needs to be adjusted for EL1&0 or EL2&0. */ | ||
258 | + /* Only EL0 needs to be adjusted for EL1&0 or EL2&0 or EL3&0 */ | ||
259 | if (el == 0) { | ||
260 | ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, 0); | ||
261 | - el = mmu_idx == ARMMMUIdx_E20_0 ? 2 : 1; | ||
262 | + switch (mmu_idx) { | ||
263 | + case ARMMMUIdx_E20_0: | ||
264 | + el = 2; | ||
265 | + break; | ||
266 | + case ARMMMUIdx_E30_0: | ||
267 | + el = 3; | ||
268 | + break; | ||
269 | + default: | ||
270 | + el = 1; | ||
271 | + break; | ||
272 | + } | ||
273 | } | ||
274 | return env->cp15.sctlr_el[el]; | ||
275 | } | 151 | } |
276 | @@ -XXX,XX +XXX,XX @@ int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx) | 152 | |
277 | switch (mmu_idx) { | 153 | static void test_query_cpu_model_expansion(const void *data) |
278 | case ARMMMUIdx_E10_0: | ||
279 | case ARMMMUIdx_E20_0: | ||
280 | + case ARMMMUIdx_E30_0: | ||
281 | return 0; | ||
282 | case ARMMMUIdx_E10_1: | ||
283 | case ARMMMUIdx_E10_1_PAN: | ||
284 | @@ -XXX,XX +XXX,XX @@ int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx) | ||
285 | case ARMMMUIdx_E20_2_PAN: | ||
286 | return 2; | ||
287 | case ARMMMUIdx_E3: | ||
288 | + case ARMMMUIdx_E30_3_PAN: | ||
289 | return 3; | ||
290 | default: | ||
291 | g_assert_not_reached(); | ||
292 | @@ -XXX,XX +XXX,XX @@ ARMMMUIdx arm_mmu_idx_el(CPUARMState *env, int el) | ||
293 | hcr = arm_hcr_el2_eff(env); | ||
294 | if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) { | ||
295 | idx = ARMMMUIdx_E20_0; | ||
296 | + } else if (arm_is_secure_below_el3(env) && | ||
297 | + !arm_el_is_aa64(env, 3)) { | ||
298 | + idx = ARMMMUIdx_E30_0; | ||
299 | } else { | ||
300 | idx = ARMMMUIdx_E10_0; | ||
301 | } | ||
302 | @@ -XXX,XX +XXX,XX @@ ARMMMUIdx arm_mmu_idx_el(CPUARMState *env, int el) | ||
303 | } | ||
304 | break; | ||
305 | case 3: | ||
306 | + if (!arm_el_is_aa64(env, 3) && arm_pan_enabled(env)) { | ||
307 | + return ARMMMUIdx_E30_3_PAN; | ||
308 | + } | ||
309 | return ARMMMUIdx_E3; | ||
310 | default: | ||
311 | g_assert_not_reached(); | ||
312 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | ||
313 | index XXXXXXX..XXXXXXX 100644 | ||
314 | --- a/target/arm/ptw.c | ||
315 | +++ b/target/arm/ptw.c | ||
316 | @@ -XXX,XX +XXX,XX @@ static bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx, | ||
317 | case ARMMMUIdx_E20_2_PAN: | ||
318 | case ARMMMUIdx_E2: | ||
319 | case ARMMMUIdx_E3: | ||
320 | + case ARMMMUIdx_E30_0: | ||
321 | + case ARMMMUIdx_E30_3_PAN: | ||
322 | break; | ||
323 | |||
324 | case ARMMMUIdx_Phys_S: | ||
325 | @@ -XXX,XX +XXX,XX @@ bool get_phys_addr(CPUARMState *env, vaddr address, | ||
326 | ss = ARMSS_Secure; | ||
327 | break; | ||
328 | case ARMMMUIdx_E3: | ||
329 | + case ARMMMUIdx_E30_0: | ||
330 | + case ARMMMUIdx_E30_3_PAN: | ||
331 | if (arm_feature(env, ARM_FEATURE_AARCH64) && | ||
332 | cpu_isar_feature(aa64_rme, env_archcpu(env))) { | ||
333 | ss = ARMSS_Root; | ||
334 | diff --git a/target/arm/tcg/op_helper.c b/target/arm/tcg/op_helper.c | ||
335 | index XXXXXXX..XXXXXXX 100644 | ||
336 | --- a/target/arm/tcg/op_helper.c | ||
337 | +++ b/target/arm/tcg/op_helper.c | ||
338 | @@ -XXX,XX +XXX,XX @@ void HELPER(tidcp_el0)(CPUARMState *env, uint32_t syndrome) | ||
339 | { | ||
340 | /* See arm_sctlr(), but we also need the sctlr el. */ | ||
341 | ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, 0); | ||
342 | - int target_el = mmu_idx == ARMMMUIdx_E20_0 ? 2 : 1; | ||
343 | + int target_el; | ||
344 | + | ||
345 | + switch (mmu_idx) { | ||
346 | + case ARMMMUIdx_E20_0: | ||
347 | + target_el = 2; | ||
348 | + break; | ||
349 | + case ARMMMUIdx_E30_0: | ||
350 | + target_el = 3; | ||
351 | + break; | ||
352 | + default: | ||
353 | + target_el = 1; | ||
354 | + break; | ||
355 | + } | ||
356 | |||
357 | /* | ||
358 | * The bit is not valid unless the target el is aa64, but since the | ||
359 | diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c | ||
360 | index XXXXXXX..XXXXXXX 100644 | ||
361 | --- a/target/arm/tcg/translate.c | ||
362 | +++ b/target/arm/tcg/translate.c | ||
363 | @@ -XXX,XX +XXX,XX @@ static inline int get_a32_user_mem_index(DisasContext *s) | ||
364 | */ | ||
365 | switch (s->mmu_idx) { | ||
366 | case ARMMMUIdx_E3: | ||
367 | + case ARMMMUIdx_E30_0: | ||
368 | + case ARMMMUIdx_E30_3_PAN: | ||
369 | + return arm_to_core_mmu_idx(ARMMMUIdx_E30_0); | ||
370 | case ARMMMUIdx_E2: /* this one is UNPREDICTABLE */ | ||
371 | case ARMMMUIdx_E10_0: | ||
372 | case ARMMMUIdx_E10_1: | ||
373 | -- | 154 | -- |
374 | 2.34.1 | 155 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Richard Henderson <richard.henderson@linaro.org> | 1 | The pauth-3 test explicitly tests that a computation of the |
---|---|---|---|
2 | pointer-authentication produces the expected result. This means that | ||
3 | it must be run with the QARMA5 algorithm. | ||
2 | 4 | ||
3 | Like 9971cbac2f3, which set CAPSTONE_AARCH64_COMPAT_HEADER, | 5 | Explicitly set the pauth algorithm when running this test, so that it |
4 | also set CAPSTONE_SYSTEMZ_COMPAT_HEADER. Fixes the build | 6 | doesn't break when we change the default algorithm the 'max' CPU |
5 | against capstone v6-alpha. | 7 | uses. |
6 | 8 | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Reviewed-by: Gustavo Romero <gustavo.romero@linaro.org> | ||
9 | Message-id: 20241022013047.830273-1-richard.henderson@linaro.org | ||
10 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
12 | --- | 10 | --- |
13 | include/disas/capstone.h | 1 + | 11 | tests/tcg/aarch64/Makefile.softmmu-target | 3 +++ |
14 | 1 file changed, 1 insertion(+) | 12 | 1 file changed, 3 insertions(+) |
15 | 13 | ||
16 | diff --git a/include/disas/capstone.h b/include/disas/capstone.h | 14 | diff --git a/tests/tcg/aarch64/Makefile.softmmu-target b/tests/tcg/aarch64/Makefile.softmmu-target |
17 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/include/disas/capstone.h | 16 | --- a/tests/tcg/aarch64/Makefile.softmmu-target |
19 | +++ b/include/disas/capstone.h | 17 | +++ b/tests/tcg/aarch64/Makefile.softmmu-target |
20 | @@ -XXX,XX +XXX,XX @@ | 18 | @@ -XXX,XX +XXX,XX @@ EXTRA_RUNS+=run-memory-replay |
21 | #ifdef CONFIG_CAPSTONE | 19 | |
22 | 20 | ifneq ($(CROSS_CC_HAS_ARMV8_3),) | |
23 | #define CAPSTONE_AARCH64_COMPAT_HEADER | 21 | pauth-3: CFLAGS += $(CROSS_CC_HAS_ARMV8_3) |
24 | +#define CAPSTONE_SYSTEMZ_COMPAT_HEADER | 22 | +# This test explicitly checks the output of the pauth operation so we |
25 | #include <capstone.h> | 23 | +# must force the use of the QARMA5 algorithm for it. |
26 | 24 | +run-pauth-3: QEMU_BASE_MACHINE=-M virt -cpu max,pauth-qarma5=on -display none | |
27 | #else | 25 | else |
26 | pauth-3: | ||
27 | $(call skip-test, "BUILD of $@", "missing compiler support") | ||
28 | -- | 28 | -- |
29 | 2.34.1 | 29 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Gustavo Romero <gustavo.romero@linaro.org> | 1 | From: Pierrick Bouvier <pierrick.bouvier@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | FEAT_CMOW introduces support for controlling cache maintenance | 3 | Pointer authentication on aarch64 is pretty expensive (up to 50% of |
4 | instructions executed in EL0/1 and is mandatory from Armv8.8. | 4 | execution time) when running a virtual machine with tcg and -cpu max |
5 | (which enables pauth=on). | ||
5 | 6 | ||
6 | On real hardware, the main use for this feature is to prevent processes | 7 | The advice is always: use pauth-impdef=on. |
7 | from invalidating or flushing cache lines for addresses they only have | 8 | Our documentation even mentions it "by default" in |
8 | read permission, which can impact the performance of other processes. | 9 | docs/system/introduction.rst. |
9 | 10 | ||
10 | QEMU implements all cache instructions as NOPs, and, according to rule | 11 | Thus, we change the default to use impdef by default. This does not |
11 | [1], which states that generating any Permission fault when a cache | 12 | affect kvm or hvf acceleration, since pauth algorithm used is the one |
12 | instruction is implemented as a NOP is implementation-defined, no | 13 | from host cpu. |
13 | Permission fault is generated for any cache instruction when it lacks | ||
14 | read and write permissions. | ||
15 | 14 | ||
16 | QEMU does not model any cache topology, so the PoU and PoC are before | 15 | This change is retro compatible, in terms of cli, with previous |
17 | any cache, and rules [2] apply. These rules state that generating any | 16 | versions, as the semantic of using -cpu max,pauth-impdef=on, and -cpu |
18 | MMU fault for cache instructions in this topology is also | 17 | max,pauth-qarma3=on is preserved. |
19 | implementation-defined. Therefore, for FEAT_CMOW, we do not generate any | 18 | The new option introduced in previous patch and matching old default is |
20 | MMU faults either, instead, we only advertise it in the feature | 19 | -cpu max,pauth-qarma5=on. |
21 | register. | 20 | It is retro compatible with migration as well, by defining a backcompat |
21 | property, that will use qarma5 by default for virt machine <= 9.2. | ||
22 | Tested by saving and restoring a vm from qemu 9.2.0 into qemu-master | ||
23 | (10.0) for cpus neoverse-n2 and max. | ||
22 | 24 | ||
23 | [1] Rule R_HGLYG of section D8.14.3, Arm ARM K.a. | 25 | Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> |
24 | [2] Rules R_MZTNR and R_DNZYL of section D8.14.3, Arm ARM K.a. | ||
25 | |||
26 | Signed-off-by: Gustavo Romero <gustavo.romero@linaro.org> | ||
27 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 26 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
28 | Message-id: 20241104142606.941638-1-gustavo.romero@linaro.org | 27 | Message-id: 20241219183211.3493974-3-pierrick.bouvier@linaro.org |
29 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
30 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 28 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> |
31 | --- | 29 | --- |
32 | docs/system/arm/emulation.rst | 1 + | 30 | docs/system/arm/cpu-features.rst | 2 +- |
33 | target/arm/cpu-features.h | 5 +++++ | 31 | docs/system/introduction.rst | 2 +- |
34 | target/arm/cpu.h | 1 + | 32 | target/arm/cpu.h | 3 +++ |
35 | target/arm/helper.c | 5 +++++ | 33 | hw/core/machine.c | 4 +++- |
36 | target/arm/tcg/cpu64.c | 1 + | 34 | target/arm/cpu.c | 2 ++ |
37 | 5 files changed, 13 insertions(+) | 35 | target/arm/cpu64.c | 22 ++++++++++++++++------ |
36 | 6 files changed, 26 insertions(+), 9 deletions(-) | ||
38 | 37 | ||
39 | diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst | 38 | diff --git a/docs/system/arm/cpu-features.rst b/docs/system/arm/cpu-features.rst |
40 | index XXXXXXX..XXXXXXX 100644 | 39 | index XXXXXXX..XXXXXXX 100644 |
41 | --- a/docs/system/arm/emulation.rst | 40 | --- a/docs/system/arm/cpu-features.rst |
42 | +++ b/docs/system/arm/emulation.rst | 41 | +++ b/docs/system/arm/cpu-features.rst |
43 | @@ -XXX,XX +XXX,XX @@ the following architecture extensions: | 42 | @@ -XXX,XX +XXX,XX @@ Below is the list of TCG VCPU features and their descriptions. |
44 | - FEAT_BF16 (AArch64 BFloat16 instructions) | 43 | When ``pauth`` is enabled, select the architected QARMA5 algorithm. |
45 | - FEAT_BTI (Branch Target Identification) | 44 | |
46 | - FEAT_CCIDX (Extended cache index) | 45 | Without ``pauth-impdef``, ``pauth-qarma3`` or ``pauth-qarma5`` enabled, |
47 | +- FEAT_CMOW (Control for cache maintenance permission) | 46 | -the architected QARMA5 algorithm is used. The architected QARMA5 |
48 | - FEAT_CRC32 (CRC32 instructions) | 47 | +the QEMU impdef algorithm is used. The architected QARMA5 |
49 | - FEAT_Crypto (Cryptographic Extension) | 48 | and QARMA3 algorithms have good cryptographic properties, but can |
50 | - FEAT_CSV2 (Cache speculation variant 2) | 49 | be quite slow to emulate. The impdef algorithm used by QEMU is |
51 | diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h | 50 | non-cryptographic but significantly faster. |
51 | diff --git a/docs/system/introduction.rst b/docs/system/introduction.rst | ||
52 | index XXXXXXX..XXXXXXX 100644 | 52 | index XXXXXXX..XXXXXXX 100644 |
53 | --- a/target/arm/cpu-features.h | 53 | --- a/docs/system/introduction.rst |
54 | +++ b/target/arm/cpu-features.h | 54 | +++ b/docs/system/introduction.rst |
55 | @@ -XXX,XX +XXX,XX @@ static inline bool isar_feature_aa64_tidcp1(const ARMISARegisters *id) | 55 | @@ -XXX,XX +XXX,XX @@ would default to it anyway. |
56 | return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, TIDCP1) != 0; | 56 | |
57 | } | 57 | .. code:: |
58 | 58 | ||
59 | +static inline bool isar_feature_aa64_cmow(const ARMISARegisters *id) | 59 | - -cpu max,pauth-impdef=on \ |
60 | +{ | 60 | + -cpu max \ |
61 | + return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, CMOW) != 0; | 61 | -smp 4 \ |
62 | +} | 62 | -accel tcg \ |
63 | + | 63 | |
64 | static inline bool isar_feature_aa64_hafs(const ARMISARegisters *id) | ||
65 | { | ||
66 | return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HAFDBS) != 0; | ||
67 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | 64 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h |
68 | index XXXXXXX..XXXXXXX 100644 | 65 | index XXXXXXX..XXXXXXX 100644 |
69 | --- a/target/arm/cpu.h | 66 | --- a/target/arm/cpu.h |
70 | +++ b/target/arm/cpu.h | 67 | +++ b/target/arm/cpu.h |
71 | @@ -XXX,XX +XXX,XX @@ void pmu_init(ARMCPU *cpu); | 68 | @@ -XXX,XX +XXX,XX @@ struct ArchCPU { |
72 | #define SCTLR_EnIB (1U << 30) /* v8.3, AArch64 only */ | 69 | /* QOM property to indicate we should use the back-compat CNTFRQ default */ |
73 | #define SCTLR_EnIA (1U << 31) /* v8.3, AArch64 only */ | 70 | bool backcompat_cntfrq; |
74 | #define SCTLR_DSSBS_32 (1U << 31) /* v8.5, AArch32 only */ | 71 | |
75 | +#define SCTLR_CMOW (1ULL << 32) /* FEAT_CMOW */ | 72 | + /* QOM property to indicate we should use the back-compat QARMA5 default */ |
76 | #define SCTLR_MSCEN (1ULL << 33) /* FEAT_MOPS */ | 73 | + bool backcompat_pauth_default_use_qarma5; |
77 | #define SCTLR_BT0 (1ULL << 35) /* v8.5-BTI */ | 74 | + |
78 | #define SCTLR_BT1 (1ULL << 36) /* v8.5-BTI */ | 75 | /* Specify the number of cores in this CPU cluster. Used for the L2CTLR |
79 | diff --git a/target/arm/helper.c b/target/arm/helper.c | 76 | * register. |
77 | */ | ||
78 | diff --git a/hw/core/machine.c b/hw/core/machine.c | ||
80 | index XXXXXXX..XXXXXXX 100644 | 79 | index XXXXXXX..XXXXXXX 100644 |
81 | --- a/target/arm/helper.c | 80 | --- a/hw/core/machine.c |
82 | +++ b/target/arm/helper.c | 81 | +++ b/hw/core/machine.c |
83 | @@ -XXX,XX +XXX,XX @@ static void hcrx_write(CPUARMState *env, const ARMCPRegInfo *ri, | 82 | @@ -XXX,XX +XXX,XX @@ |
84 | if (cpu_isar_feature(aa64_nmi, cpu)) { | 83 | #include "hw/virtio/virtio-iommu.h" |
85 | valid_mask |= HCRX_TALLINT | HCRX_VINMI | HCRX_VFNMI; | 84 | #include "audio/audio.h" |
86 | } | 85 | |
87 | + /* FEAT_CMOW adds CMOW */ | 86 | -GlobalProperty hw_compat_9_2[] = {}; |
87 | +GlobalProperty hw_compat_9_2[] = { | ||
88 | + {"arm-cpu", "backcompat-pauth-default-use-qarma5", "true"}, | ||
89 | +}; | ||
90 | const size_t hw_compat_9_2_len = G_N_ELEMENTS(hw_compat_9_2); | ||
91 | |||
92 | GlobalProperty hw_compat_9_1[] = { | ||
93 | diff --git a/target/arm/cpu.c b/target/arm/cpu.c | ||
94 | index XXXXXXX..XXXXXXX 100644 | ||
95 | --- a/target/arm/cpu.c | ||
96 | +++ b/target/arm/cpu.c | ||
97 | @@ -XXX,XX +XXX,XX @@ static const Property arm_cpu_properties[] = { | ||
98 | DEFINE_PROP_INT32("core-count", ARMCPU, core_count, -1), | ||
99 | /* True to default to the backward-compat old CNTFRQ rather than 1Ghz */ | ||
100 | DEFINE_PROP_BOOL("backcompat-cntfrq", ARMCPU, backcompat_cntfrq, false), | ||
101 | + DEFINE_PROP_BOOL("backcompat-pauth-default-use-qarma5", ARMCPU, | ||
102 | + backcompat_pauth_default_use_qarma5, false), | ||
103 | }; | ||
104 | |||
105 | static const gchar *arm_gdb_arch_name(CPUState *cs) | ||
106 | diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c | ||
107 | index XXXXXXX..XXXXXXX 100644 | ||
108 | --- a/target/arm/cpu64.c | ||
109 | +++ b/target/arm/cpu64.c | ||
110 | @@ -XXX,XX +XXX,XX @@ void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp) | ||
111 | return; | ||
112 | } | ||
113 | |||
114 | - if (cpu->prop_pauth_impdef) { | ||
115 | - isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, API, features); | ||
116 | - isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPI, 1); | ||
117 | + bool use_default = !cpu->prop_pauth_qarma5 && | ||
118 | + !cpu->prop_pauth_qarma3 && | ||
119 | + !cpu->prop_pauth_impdef; | ||
88 | + | 120 | + |
89 | + if (cpu_isar_feature(aa64_cmow, cpu)) { | 121 | + if (cpu->prop_pauth_qarma5 || |
90 | + valid_mask |= HCRX_CMOW; | 122 | + (use_default && |
91 | + } | 123 | + cpu->backcompat_pauth_default_use_qarma5)) { |
92 | 124 | + isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, APA, features); | |
93 | /* Clear RES0 bits. */ | 125 | + isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPA, 1); |
94 | env->cp15.hcrx_el2 = value & valid_mask; | 126 | } else if (cpu->prop_pauth_qarma3) { |
95 | diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c | 127 | isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, APA3, features); |
96 | index XXXXXXX..XXXXXXX 100644 | 128 | isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, GPA3, 1); |
97 | --- a/target/arm/tcg/cpu64.c | 129 | - } else { /* default is pauth-qarma5 */ |
98 | +++ b/target/arm/tcg/cpu64.c | 130 | - isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, APA, features); |
99 | @@ -XXX,XX +XXX,XX @@ void aarch64_max_tcg_initfn(Object *obj) | 131 | - isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPA, 1); |
100 | t = FIELD_DP64(t, ID_AA64MMFR1, ETS, 2); /* FEAT_ETS2 */ | 132 | + } else if (cpu->prop_pauth_impdef || |
101 | t = FIELD_DP64(t, ID_AA64MMFR1, HCX, 1); /* FEAT_HCX */ | 133 | + (use_default && |
102 | t = FIELD_DP64(t, ID_AA64MMFR1, TIDCP1, 1); /* FEAT_TIDCP1 */ | 134 | + !cpu->backcompat_pauth_default_use_qarma5)) { |
103 | + t = FIELD_DP64(t, ID_AA64MMFR1, CMOW, 1); /* FEAT_CMOW */ | 135 | + isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, API, features); |
104 | cpu->isar.id_aa64mmfr1 = t; | 136 | + isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPI, 1); |
105 | 137 | + } else { | |
106 | t = cpu->isar.id_aa64mmfr2; | 138 | + g_assert_not_reached(); |
139 | } | ||
140 | } else if (cpu->prop_pauth_impdef || | ||
141 | cpu->prop_pauth_qarma3 || | ||
107 | -- | 142 | -- |
108 | 2.34.1 | 143 | 2.34.1 | diff view generated by jsdifflib |
1 | In xtensa we currently call set_use_first_nan() in a lot of | 1 | From: Pierrick Bouvier <pierrick.bouvier@linaro.org> |
---|---|---|---|
2 | places where we want to switch the NaN-propagation handling. | ||
3 | We're about to change the softfloat API we use to do that, | ||
4 | so start by factoring all the calls out into a single | ||
5 | xtensa_use_first_nan() function. | ||
6 | 2 | ||
7 | The bulk of this change was done with | 3 | Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> |
8 | sed -i -e 's/set_use_first_nan(\([^,]*\),[^)]*)/xtensa_use_first_nan(env, \1)/' target/xtensa/fpu_helper.c | 4 | Message-id: 20241219183211.3493974-4-pierrick.bouvier@linaro.org |
5 | [PMM: Removed a paragraph about using non-versioned models.] | ||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | --- | ||
8 | docs/system/arm/virt.rst | 4 ++++ | ||
9 | 1 file changed, 4 insertions(+) | ||
9 | 10 | ||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | 11 | diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst |
11 | Reviewed-by: Max Filippov <jcmvbkbc@gmail.com> | ||
12 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
13 | Message-id: 20241025141254.2141506-14-peter.maydell@linaro.org | ||
14 | --- | ||
15 | target/xtensa/cpu.h | 6 ++++++ | ||
16 | target/xtensa/cpu.c | 2 +- | ||
17 | target/xtensa/fpu_helper.c | 33 +++++++++++++++++++-------------- | ||
18 | 3 files changed, 26 insertions(+), 15 deletions(-) | ||
19 | |||
20 | diff --git a/target/xtensa/cpu.h b/target/xtensa/cpu.h | ||
21 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/target/xtensa/cpu.h | 13 | --- a/docs/system/arm/virt.rst |
23 | +++ b/target/xtensa/cpu.h | 14 | +++ b/docs/system/arm/virt.rst |
24 | @@ -XXX,XX +XXX,XX @@ static inline void cpu_get_tb_cpu_state(CPUXtensaState *env, vaddr *pc, | 15 | @@ -XXX,XX +XXX,XX @@ of the 5.0 release and ``virt-5.0`` of the 5.1 release. Migration |
25 | XtensaCPU *xtensa_cpu_create_with_clock(const char *cpu_type, | 16 | is not guaranteed to work between different QEMU releases for |
26 | Clock *cpu_refclk); | 17 | the non-versioned ``virt`` machine type. |
27 | 18 | ||
28 | +/* | 19 | +VM migration is not guaranteed when using ``-cpu max``, as features |
29 | + * Set the NaN propagation rule for future FPU operations: | 20 | +supported may change between QEMU versions. To ensure your VM can be |
30 | + * use_first is true to pick the first NaN as the result if both | 21 | +migrated, it is recommended to use another cpu model instead. |
31 | + * inputs are NaNs, false to pick the second. | ||
32 | + */ | ||
33 | +void xtensa_use_first_nan(CPUXtensaState *env, bool use_first); | ||
34 | #endif | ||
35 | diff --git a/target/xtensa/cpu.c b/target/xtensa/cpu.c | ||
36 | index XXXXXXX..XXXXXXX 100644 | ||
37 | --- a/target/xtensa/cpu.c | ||
38 | +++ b/target/xtensa/cpu.c | ||
39 | @@ -XXX,XX +XXX,XX @@ static void xtensa_cpu_reset_hold(Object *obj, ResetType type) | ||
40 | cs->halted = env->runstall; | ||
41 | #endif | ||
42 | set_no_signaling_nans(!dfpu, &env->fp_status); | ||
43 | - set_use_first_nan(!dfpu, &env->fp_status); | ||
44 | + xtensa_use_first_nan(env, !dfpu); | ||
45 | } | ||
46 | |||
47 | static ObjectClass *xtensa_cpu_class_by_name(const char *cpu_model) | ||
48 | diff --git a/target/xtensa/fpu_helper.c b/target/xtensa/fpu_helper.c | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/target/xtensa/fpu_helper.c | ||
51 | +++ b/target/xtensa/fpu_helper.c | ||
52 | @@ -XXX,XX +XXX,XX @@ static const struct { | ||
53 | { XTENSA_FP_V, float_flag_invalid, }, | ||
54 | }; | ||
55 | |||
56 | +void xtensa_use_first_nan(CPUXtensaState *env, bool use_first) | ||
57 | +{ | ||
58 | + set_use_first_nan(use_first, &env->fp_status); | ||
59 | +} | ||
60 | + | 22 | + |
61 | void HELPER(wur_fpu2k_fcr)(CPUXtensaState *env, uint32_t v) | 23 | Supported devices |
62 | { | 24 | """"""""""""""""" |
63 | static const int rounding_mode[] = { | ||
64 | @@ -XXX,XX +XXX,XX @@ float32 HELPER(fpu2k_msub_s)(CPUXtensaState *env, | ||
65 | |||
66 | float64 HELPER(add_d)(CPUXtensaState *env, float64 a, float64 b) | ||
67 | { | ||
68 | - set_use_first_nan(true, &env->fp_status); | ||
69 | + xtensa_use_first_nan(env, true); | ||
70 | return float64_add(a, b, &env->fp_status); | ||
71 | } | ||
72 | |||
73 | float32 HELPER(add_s)(CPUXtensaState *env, float32 a, float32 b) | ||
74 | { | ||
75 | - set_use_first_nan(env->config->use_first_nan, &env->fp_status); | ||
76 | + xtensa_use_first_nan(env, env->config->use_first_nan); | ||
77 | return float32_add(a, b, &env->fp_status); | ||
78 | } | ||
79 | |||
80 | float64 HELPER(sub_d)(CPUXtensaState *env, float64 a, float64 b) | ||
81 | { | ||
82 | - set_use_first_nan(true, &env->fp_status); | ||
83 | + xtensa_use_first_nan(env, true); | ||
84 | return float64_sub(a, b, &env->fp_status); | ||
85 | } | ||
86 | |||
87 | float32 HELPER(sub_s)(CPUXtensaState *env, float32 a, float32 b) | ||
88 | { | ||
89 | - set_use_first_nan(env->config->use_first_nan, &env->fp_status); | ||
90 | + xtensa_use_first_nan(env, env->config->use_first_nan); | ||
91 | return float32_sub(a, b, &env->fp_status); | ||
92 | } | ||
93 | |||
94 | float64 HELPER(mul_d)(CPUXtensaState *env, float64 a, float64 b) | ||
95 | { | ||
96 | - set_use_first_nan(true, &env->fp_status); | ||
97 | + xtensa_use_first_nan(env, true); | ||
98 | return float64_mul(a, b, &env->fp_status); | ||
99 | } | ||
100 | |||
101 | float32 HELPER(mul_s)(CPUXtensaState *env, float32 a, float32 b) | ||
102 | { | ||
103 | - set_use_first_nan(env->config->use_first_nan, &env->fp_status); | ||
104 | + xtensa_use_first_nan(env, env->config->use_first_nan); | ||
105 | return float32_mul(a, b, &env->fp_status); | ||
106 | } | ||
107 | |||
108 | float64 HELPER(madd_d)(CPUXtensaState *env, float64 a, float64 b, float64 c) | ||
109 | { | ||
110 | - set_use_first_nan(env->config->use_first_nan, &env->fp_status); | ||
111 | + xtensa_use_first_nan(env, env->config->use_first_nan); | ||
112 | return float64_muladd(b, c, a, 0, &env->fp_status); | ||
113 | } | ||
114 | |||
115 | float32 HELPER(madd_s)(CPUXtensaState *env, float32 a, float32 b, float32 c) | ||
116 | { | ||
117 | - set_use_first_nan(env->config->use_first_nan, &env->fp_status); | ||
118 | + xtensa_use_first_nan(env, env->config->use_first_nan); | ||
119 | return float32_muladd(b, c, a, 0, &env->fp_status); | ||
120 | } | ||
121 | |||
122 | float64 HELPER(msub_d)(CPUXtensaState *env, float64 a, float64 b, float64 c) | ||
123 | { | ||
124 | - set_use_first_nan(env->config->use_first_nan, &env->fp_status); | ||
125 | + xtensa_use_first_nan(env, env->config->use_first_nan); | ||
126 | return float64_muladd(b, c, a, float_muladd_negate_product, | ||
127 | &env->fp_status); | ||
128 | } | ||
129 | |||
130 | float32 HELPER(msub_s)(CPUXtensaState *env, float32 a, float32 b, float32 c) | ||
131 | { | ||
132 | - set_use_first_nan(env->config->use_first_nan, &env->fp_status); | ||
133 | + xtensa_use_first_nan(env, env->config->use_first_nan); | ||
134 | return float32_muladd(b, c, a, float_muladd_negate_product, | ||
135 | &env->fp_status); | ||
136 | } | ||
137 | |||
138 | float64 HELPER(mkdadj_d)(CPUXtensaState *env, float64 a, float64 b) | ||
139 | { | ||
140 | - set_use_first_nan(true, &env->fp_status); | ||
141 | + xtensa_use_first_nan(env, true); | ||
142 | return float64_div(b, a, &env->fp_status); | ||
143 | } | ||
144 | |||
145 | float32 HELPER(mkdadj_s)(CPUXtensaState *env, float32 a, float32 b) | ||
146 | { | ||
147 | - set_use_first_nan(env->config->use_first_nan, &env->fp_status); | ||
148 | + xtensa_use_first_nan(env, env->config->use_first_nan); | ||
149 | return float32_div(b, a, &env->fp_status); | ||
150 | } | ||
151 | |||
152 | float64 HELPER(mksadj_d)(CPUXtensaState *env, float64 v) | ||
153 | { | ||
154 | - set_use_first_nan(true, &env->fp_status); | ||
155 | + xtensa_use_first_nan(env, true); | ||
156 | return float64_sqrt(v, &env->fp_status); | ||
157 | } | ||
158 | |||
159 | float32 HELPER(mksadj_s)(CPUXtensaState *env, float32 v) | ||
160 | { | ||
161 | - set_use_first_nan(env->config->use_first_nan, &env->fp_status); | ||
162 | + xtensa_use_first_nan(env, env->config->use_first_nan); | ||
163 | return float32_sqrt(v, &env->fp_status); | ||
164 | } | ||
165 | 25 | ||
166 | -- | 26 | -- |
167 | 2.34.1 | 27 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Set the NaN propagation rule explicitly in xtensa_use_first_nan(). | ||
2 | 1 | ||
3 | (When we convert the softfloat pickNaNMulAdd routine to also | ||
4 | select a NaN propagation rule at runtime, we will be able to | ||
5 | remove the use_first_nan flag because the propagation rules | ||
6 | will handle everything.) | ||
7 | |||
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
9 | Reviewed-by: Max Filippov <jcmvbkbc@gmail.com> | ||
10 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | Message-id: 20241025141254.2141506-15-peter.maydell@linaro.org | ||
13 | --- | ||
14 | target/xtensa/fpu_helper.c | 2 ++ | ||
15 | fpu/softfloat-specialize.c.inc | 12 +----------- | ||
16 | 2 files changed, 3 insertions(+), 11 deletions(-) | ||
17 | |||
18 | diff --git a/target/xtensa/fpu_helper.c b/target/xtensa/fpu_helper.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/target/xtensa/fpu_helper.c | ||
21 | +++ b/target/xtensa/fpu_helper.c | ||
22 | @@ -XXX,XX +XXX,XX @@ static const struct { | ||
23 | void xtensa_use_first_nan(CPUXtensaState *env, bool use_first) | ||
24 | { | ||
25 | set_use_first_nan(use_first, &env->fp_status); | ||
26 | + set_float_2nan_prop_rule(use_first ? float_2nan_prop_ab : float_2nan_prop_ba, | ||
27 | + &env->fp_status); | ||
28 | } | ||
29 | |||
30 | void HELPER(wur_fpu2k_fcr)(CPUXtensaState *env, uint32_t v) | ||
31 | diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc | ||
32 | index XXXXXXX..XXXXXXX 100644 | ||
33 | --- a/fpu/softfloat-specialize.c.inc | ||
34 | +++ b/fpu/softfloat-specialize.c.inc | ||
35 | @@ -XXX,XX +XXX,XX @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls, | ||
36 | || defined(TARGET_TRICORE) || defined(TARGET_ARM) || defined(TARGET_MIPS) \ | ||
37 | || defined(TARGET_LOONGARCH64) || defined(TARGET_HPPA) \ | ||
38 | || defined(TARGET_S390X) || defined(TARGET_PPC) || defined(TARGET_M68K) \ | ||
39 | - || defined(TARGET_SPARC) | ||
40 | + || defined(TARGET_SPARC) || defined(TARGET_XTENSA) | ||
41 | g_assert_not_reached(); | ||
42 | -#elif defined(TARGET_XTENSA) | ||
43 | - /* | ||
44 | - * Xtensa has two NaN propagation modes. | ||
45 | - * Which one is active is controlled by float_status::use_first_nan. | ||
46 | - */ | ||
47 | - if (status->use_first_nan) { | ||
48 | - rule = float_2nan_prop_ab; | ||
49 | - } else { | ||
50 | - rule = float_2nan_prop_ba; | ||
51 | - } | ||
52 | #else | ||
53 | rule = float_2nan_prop_x87; | ||
54 | #endif | ||
55 | -- | ||
56 | 2.34.1 | ||
57 | |||
58 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Set the NaN propagation rule explicitly for the float_status words | ||
2 | used in the x86 target. | ||
3 | 1 | ||
4 | This is a no-behaviour-change commit, so we retain the existing | ||
5 | behaviour of using the x87-style "prefer QNaN over SNaN, then prefer | ||
6 | the NaN with the larger significand" for MMX and SSE. This is | ||
7 | however not the documented hardware behaviour, so we leave a TODO | ||
8 | note about what we should be doing instead. | ||
9 | |||
10 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | Message-id: 20241025141254.2141506-16-peter.maydell@linaro.org | ||
13 | --- | ||
14 | target/i386/cpu.h | 3 +++ | ||
15 | target/i386/cpu.c | 4 ++++ | ||
16 | target/i386/tcg/fpu_helper.c | 40 ++++++++++++++++++++++++++++++++++ | ||
17 | fpu/softfloat-specialize.c.inc | 3 ++- | ||
18 | 4 files changed, 49 insertions(+), 1 deletion(-) | ||
19 | |||
20 | diff --git a/target/i386/cpu.h b/target/i386/cpu.h | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/target/i386/cpu.h | ||
23 | +++ b/target/i386/cpu.h | ||
24 | @@ -XXX,XX +XXX,XX @@ static inline bool cpu_vmx_maybe_enabled(CPUX86State *env) | ||
25 | int get_pg_mode(CPUX86State *env); | ||
26 | |||
27 | /* fpu_helper.c */ | ||
28 | + | ||
29 | +/* Set all non-runtime-variable float_status fields to x86 handling */ | ||
30 | +void cpu_init_fp_statuses(CPUX86State *env); | ||
31 | void update_fp_status(CPUX86State *env); | ||
32 | void update_mxcsr_status(CPUX86State *env); | ||
33 | void update_mxcsr_from_sse_status(CPUX86State *env); | ||
34 | diff --git a/target/i386/cpu.c b/target/i386/cpu.c | ||
35 | index XXXXXXX..XXXXXXX 100644 | ||
36 | --- a/target/i386/cpu.c | ||
37 | +++ b/target/i386/cpu.c | ||
38 | @@ -XXX,XX +XXX,XX @@ static void x86_cpu_reset_hold(Object *obj, ResetType type) | ||
39 | |||
40 | memset(env, 0, offsetof(CPUX86State, end_reset_fields)); | ||
41 | |||
42 | + if (tcg_enabled()) { | ||
43 | + cpu_init_fp_statuses(env); | ||
44 | + } | ||
45 | + | ||
46 | env->old_exception = -1; | ||
47 | |||
48 | /* init to reset state */ | ||
49 | diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c | ||
50 | index XXXXXXX..XXXXXXX 100644 | ||
51 | --- a/target/i386/tcg/fpu_helper.c | ||
52 | +++ b/target/i386/tcg/fpu_helper.c | ||
53 | @@ -XXX,XX +XXX,XX @@ static void fpu_set_exception(CPUX86State *env, int mask) | ||
54 | } | ||
55 | } | ||
56 | |||
57 | +void cpu_init_fp_statuses(CPUX86State *env) | ||
58 | +{ | ||
59 | + /* | ||
60 | + * Initialise the non-runtime-varying fields of the various | ||
61 | + * float_status words to x86 behaviour. This must be called at | ||
62 | + * CPU reset because the float_status words are in the | ||
63 | + * "zeroed on reset" portion of the CPU state struct. | ||
64 | + * Fields in float_status that vary under guest control are set | ||
65 | + * via the codepath for setting that register, eg cpu_set_fpuc(). | ||
66 | + */ | ||
67 | + /* | ||
68 | + * Use x87 NaN propagation rules: | ||
69 | + * SNaN + QNaN => return the QNaN | ||
70 | + * two SNaNs => return the one with the larger significand, silenced | ||
71 | + * two QNaNs => return the one with the larger significand | ||
72 | + * SNaN and a non-NaN => return the SNaN, silenced | ||
73 | + * QNaN and a non-NaN => return the QNaN | ||
74 | + * | ||
75 | + * If we get down to comparing significands and they are the same, | ||
76 | + * return the NaN with the positive sign bit (if any). | ||
77 | + */ | ||
78 | + set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status); | ||
79 | + /* | ||
80 | + * TODO: These are incorrect: the x86 Software Developer's Manual vol 1 | ||
81 | + * section 4.8.3.5 "Operating on SNaNs and QNaNs" says that the | ||
82 | + * "larger significand" behaviour is only used for x87 FPU operations. | ||
83 | + * For SSE the required behaviour is to always return the first NaN, | ||
84 | + * which is float_2nan_prop_ab. | ||
85 | + * | ||
86 | + * mmx_status is used only for the AMD 3DNow! instructions, which | ||
87 | + * are documented in the "3DNow! Technology Manual" as not supporting | ||
88 | + * NaNs or infinities as inputs. The result of passing two NaNs is | ||
89 | + * documented as "undefined", so we can do what we choose. | ||
90 | + * (Strictly there is some behaviour we don't implement correctly | ||
91 | + * for these "unsupported" NaN and Inf values, like "NaN * 0 == 0".) | ||
92 | + */ | ||
93 | + set_float_2nan_prop_rule(float_2nan_prop_x87, &env->mmx_status); | ||
94 | + set_float_2nan_prop_rule(float_2nan_prop_x87, &env->sse_status); | ||
95 | +} | ||
96 | + | ||
97 | static inline uint8_t save_exception_flags(CPUX86State *env) | ||
98 | { | ||
99 | uint8_t old_flags = get_float_exception_flags(&env->fp_status); | ||
100 | diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc | ||
101 | index XXXXXXX..XXXXXXX 100644 | ||
102 | --- a/fpu/softfloat-specialize.c.inc | ||
103 | +++ b/fpu/softfloat-specialize.c.inc | ||
104 | @@ -XXX,XX +XXX,XX @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls, | ||
105 | || defined(TARGET_TRICORE) || defined(TARGET_ARM) || defined(TARGET_MIPS) \ | ||
106 | || defined(TARGET_LOONGARCH64) || defined(TARGET_HPPA) \ | ||
107 | || defined(TARGET_S390X) || defined(TARGET_PPC) || defined(TARGET_M68K) \ | ||
108 | - || defined(TARGET_SPARC) || defined(TARGET_XTENSA) | ||
109 | + || defined(TARGET_SPARC) || defined(TARGET_XTENSA) \ | ||
110 | + || defined(TARGET_I386) | ||
111 | g_assert_not_reached(); | ||
112 | #else | ||
113 | rule = float_2nan_prop_x87; | ||
114 | -- | ||
115 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Set the NaN propagation rule explicitly for the float_status word | ||
2 | used in this target. | ||
3 | 1 | ||
4 | This is a no-behaviour-change commit, so we retain the existing | ||
5 | behaviour of x87-style pick-largest-significand NaN propagation. | ||
6 | This is however not the architecturally correct handling, so we leave | ||
7 | a TODO note to that effect. | ||
8 | |||
9 | We also leave a TODO note pointing out that all this code in the cpu | ||
10 | initfn (including the existing setting up of env->flags and the FPCR) | ||
11 | should be in a currently non-existent CPU reset function. | ||
12 | |||
13 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
14 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
15 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
16 | Message-id: 20241025141254.2141506-17-peter.maydell@linaro.org | ||
17 | --- | ||
18 | target/alpha/cpu.c | 11 +++++++++++ | ||
19 | fpu/softfloat-specialize.c.inc | 2 +- | ||
20 | 2 files changed, 12 insertions(+), 1 deletion(-) | ||
21 | |||
22 | diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c | ||
23 | index XXXXXXX..XXXXXXX 100644 | ||
24 | --- a/target/alpha/cpu.c | ||
25 | +++ b/target/alpha/cpu.c | ||
26 | @@ -XXX,XX +XXX,XX @@ | ||
27 | #include "qemu/qemu-print.h" | ||
28 | #include "cpu.h" | ||
29 | #include "exec/exec-all.h" | ||
30 | +#include "fpu/softfloat.h" | ||
31 | |||
32 | |||
33 | static void alpha_cpu_set_pc(CPUState *cs, vaddr value) | ||
34 | @@ -XXX,XX +XXX,XX @@ static void alpha_cpu_initfn(Object *obj) | ||
35 | { | ||
36 | CPUAlphaState *env = cpu_env(CPU(obj)); | ||
37 | |||
38 | + /* TODO all this should be done in reset, not init */ | ||
39 | + | ||
40 | env->lock_addr = -1; | ||
41 | + | ||
42 | + /* | ||
43 | + * TODO: this is incorrect. The Alpha Architecture Handbook version 4 | ||
44 | + * describes NaN propagation in section 4.7.10.4. We should prefer | ||
45 | + * the operand in Fb (whether it is a QNaN or an SNaN), then the | ||
46 | + * operand in Fa. That is float_2nan_prop_ba. | ||
47 | + */ | ||
48 | + set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status); | ||
49 | #if defined(CONFIG_USER_ONLY) | ||
50 | env->flags = ENV_FLAG_PS_USER | ENV_FLAG_FEN; | ||
51 | cpu_alpha_store_fpcr(env, (uint64_t)(FPCR_INVD | FPCR_DZED | FPCR_OVFD | ||
52 | diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc | ||
53 | index XXXXXXX..XXXXXXX 100644 | ||
54 | --- a/fpu/softfloat-specialize.c.inc | ||
55 | +++ b/fpu/softfloat-specialize.c.inc | ||
56 | @@ -XXX,XX +XXX,XX @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls, | ||
57 | || defined(TARGET_LOONGARCH64) || defined(TARGET_HPPA) \ | ||
58 | || defined(TARGET_S390X) || defined(TARGET_PPC) || defined(TARGET_M68K) \ | ||
59 | || defined(TARGET_SPARC) || defined(TARGET_XTENSA) \ | ||
60 | - || defined(TARGET_I386) | ||
61 | + || defined(TARGET_I386) || defined(TARGET_ALPHA) | ||
62 | g_assert_not_reached(); | ||
63 | #else | ||
64 | rule = float_2nan_prop_x87; | ||
65 | -- | ||
66 | 2.34.1 | ||
67 | |||
68 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Although the floating point rounding mode for Microblaze is always | ||
2 | nearest-even, we cannot set it just once in the CPU initfn. This is | ||
3 | because env->fp_status is in the part of the CPU state struct that is | ||
4 | zeroed on reset. | ||
5 | 1 | ||
6 | Move the call to set_float_rounding_mode() into the reset fn. | ||
7 | |||
8 | (This had no guest-visible effects because it happens that the | ||
9 | float_round_nearest_even enum value is 0, so when the struct was | ||
10 | zeroed it didn't corrupt the setting.) | ||
11 | |||
12 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
13 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
14 | Message-id: 20241025141254.2141506-18-peter.maydell@linaro.org | ||
15 | --- | ||
16 | target/microblaze/cpu.c | 5 ++--- | ||
17 | 1 file changed, 2 insertions(+), 3 deletions(-) | ||
18 | |||
19 | diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/target/microblaze/cpu.c | ||
22 | +++ b/target/microblaze/cpu.c | ||
23 | @@ -XXX,XX +XXX,XX @@ static void mb_cpu_reset_hold(Object *obj, ResetType type) | ||
24 | |||
25 | env->pc = cpu->cfg.base_vectors; | ||
26 | |||
27 | + set_float_rounding_mode(float_round_nearest_even, &env->fp_status); | ||
28 | + | ||
29 | #if defined(CONFIG_USER_ONLY) | ||
30 | /* start in user mode with interrupts enabled. */ | ||
31 | mb_cpu_write_msr(env, MSR_EE | MSR_IE | MSR_VM | MSR_UM); | ||
32 | @@ -XXX,XX +XXX,XX @@ static void mb_cpu_realizefn(DeviceState *dev, Error **errp) | ||
33 | static void mb_cpu_initfn(Object *obj) | ||
34 | { | ||
35 | MicroBlazeCPU *cpu = MICROBLAZE_CPU(obj); | ||
36 | - CPUMBState *env = &cpu->env; | ||
37 | |||
38 | gdb_register_coprocessor(CPU(cpu), mb_cpu_gdb_read_stack_protect, | ||
39 | mb_cpu_gdb_write_stack_protect, | ||
40 | gdb_find_static_feature("microblaze-stack-protect.xml"), | ||
41 | 0); | ||
42 | |||
43 | - set_float_rounding_mode(float_round_nearest_even, &env->fp_status); | ||
44 | - | ||
45 | #ifndef CONFIG_USER_ONLY | ||
46 | /* Inbound IRQ and FIR lines */ | ||
47 | qdev_init_gpio_in(DEVICE(cpu), microblaze_cpu_set_irq, 2); | ||
48 | -- | ||
49 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Set the NaN propagation rule explicitly for the float_status word | ||
2 | used in the microblaze target. | ||
3 | 1 | ||
4 | This is probably not the architecturally correct behaviour, | ||
5 | but since this is a no-behaviour-change patch, we leave a | ||
6 | TODO note to that effect. | ||
7 | |||
8 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
9 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
10 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
11 | Message-id: 20241025141254.2141506-19-peter.maydell@linaro.org | ||
12 | --- | ||
13 | target/microblaze/cpu.c | 5 +++++ | ||
14 | fpu/softfloat-specialize.c.inc | 3 ++- | ||
15 | 2 files changed, 7 insertions(+), 1 deletion(-) | ||
16 | |||
17 | diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/target/microblaze/cpu.c | ||
20 | +++ b/target/microblaze/cpu.c | ||
21 | @@ -XXX,XX +XXX,XX @@ static void mb_cpu_reset_hold(Object *obj, ResetType type) | ||
22 | env->pc = cpu->cfg.base_vectors; | ||
23 | |||
24 | set_float_rounding_mode(float_round_nearest_even, &env->fp_status); | ||
25 | + /* | ||
26 | + * TODO: this is probably not the correct NaN propagation rule for | ||
27 | + * this architecture. | ||
28 | + */ | ||
29 | + set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status); | ||
30 | |||
31 | #if defined(CONFIG_USER_ONLY) | ||
32 | /* start in user mode with interrupts enabled. */ | ||
33 | diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc | ||
34 | index XXXXXXX..XXXXXXX 100644 | ||
35 | --- a/fpu/softfloat-specialize.c.inc | ||
36 | +++ b/fpu/softfloat-specialize.c.inc | ||
37 | @@ -XXX,XX +XXX,XX @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls, | ||
38 | || defined(TARGET_LOONGARCH64) || defined(TARGET_HPPA) \ | ||
39 | || defined(TARGET_S390X) || defined(TARGET_PPC) || defined(TARGET_M68K) \ | ||
40 | || defined(TARGET_SPARC) || defined(TARGET_XTENSA) \ | ||
41 | - || defined(TARGET_I386) || defined(TARGET_ALPHA) | ||
42 | + || defined(TARGET_I386) || defined(TARGET_ALPHA) \ | ||
43 | + || defined(TARGET_MICROBLAZE) | ||
44 | g_assert_not_reached(); | ||
45 | #else | ||
46 | rule = float_2nan_prop_x87; | ||
47 | -- | ||
48 | 2.34.1 | ||
49 | |||
50 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Set the NaN propagation rule explicitly for the float_status word | ||
2 | used in the openrisc target. | ||
3 | 1 | ||
4 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
6 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | Message-id: 20241025141254.2141506-20-peter.maydell@linaro.org | ||
8 | --- | ||
9 | target/openrisc/cpu.c | 6 ++++++ | ||
10 | fpu/softfloat-specialize.c.inc | 2 +- | ||
11 | 2 files changed, 7 insertions(+), 1 deletion(-) | ||
12 | |||
13 | diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/target/openrisc/cpu.c | ||
16 | +++ b/target/openrisc/cpu.c | ||
17 | @@ -XXX,XX +XXX,XX @@ static void openrisc_cpu_reset_hold(Object *obj, ResetType type) | ||
18 | |||
19 | set_float_detect_tininess(float_tininess_before_rounding, | ||
20 | &cpu->env.fp_status); | ||
21 | + /* | ||
22 | + * TODO: this is probably not the correct NaN propagation rule for | ||
23 | + * this architecture. | ||
24 | + */ | ||
25 | + set_float_2nan_prop_rule(float_2nan_prop_x87, &cpu->env.fp_status); | ||
26 | + | ||
27 | |||
28 | #ifndef CONFIG_USER_ONLY | ||
29 | cpu->env.picmr = 0x00000000; | ||
30 | diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/fpu/softfloat-specialize.c.inc | ||
33 | +++ b/fpu/softfloat-specialize.c.inc | ||
34 | @@ -XXX,XX +XXX,XX @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls, | ||
35 | || defined(TARGET_S390X) || defined(TARGET_PPC) || defined(TARGET_M68K) \ | ||
36 | || defined(TARGET_SPARC) || defined(TARGET_XTENSA) \ | ||
37 | || defined(TARGET_I386) || defined(TARGET_ALPHA) \ | ||
38 | - || defined(TARGET_MICROBLAZE) | ||
39 | + || defined(TARGET_MICROBLAZE) || defined(TARGET_OPENRISC) | ||
40 | g_assert_not_reached(); | ||
41 | #else | ||
42 | rule = float_2nan_prop_x87; | ||
43 | -- | ||
44 | 2.34.1 | ||
45 | |||
46 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Set the NaN propagation rule explicitly for the float_status word | ||
2 | used in the rx target. | ||
3 | 1 | ||
4 | This not the architecturally correct behaviour, but since this is a | ||
5 | no-behaviour-change patch, we leave a TODO note to that effect. | ||
6 | |||
7 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
8 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
9 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | Message-id: 20241025141254.2141506-21-peter.maydell@linaro.org | ||
11 | --- | ||
12 | target/rx/cpu.c | 7 +++++++ | ||
13 | fpu/softfloat-specialize.c.inc | 3 ++- | ||
14 | 2 files changed, 9 insertions(+), 1 deletion(-) | ||
15 | |||
16 | diff --git a/target/rx/cpu.c b/target/rx/cpu.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/target/rx/cpu.c | ||
19 | +++ b/target/rx/cpu.c | ||
20 | @@ -XXX,XX +XXX,XX @@ static void rx_cpu_reset_hold(Object *obj, ResetType type) | ||
21 | env->fpsw = 0; | ||
22 | set_flush_to_zero(1, &env->fp_status); | ||
23 | set_flush_inputs_to_zero(1, &env->fp_status); | ||
24 | + /* | ||
25 | + * TODO: this is not the correct NaN propagation rule for this | ||
26 | + * architecture. The "RX Family User's Manual: Software" table 1.6 | ||
27 | + * defines the propagation rules as "prefer SNaN over QNaN; | ||
28 | + * then prefer dest over source", which is float_2nan_prop_s_ab. | ||
29 | + */ | ||
30 | + set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status); | ||
31 | } | ||
32 | |||
33 | static ObjectClass *rx_cpu_class_by_name(const char *cpu_model) | ||
34 | diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc | ||
35 | index XXXXXXX..XXXXXXX 100644 | ||
36 | --- a/fpu/softfloat-specialize.c.inc | ||
37 | +++ b/fpu/softfloat-specialize.c.inc | ||
38 | @@ -XXX,XX +XXX,XX @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls, | ||
39 | || defined(TARGET_S390X) || defined(TARGET_PPC) || defined(TARGET_M68K) \ | ||
40 | || defined(TARGET_SPARC) || defined(TARGET_XTENSA) \ | ||
41 | || defined(TARGET_I386) || defined(TARGET_ALPHA) \ | ||
42 | - || defined(TARGET_MICROBLAZE) || defined(TARGET_OPENRISC) | ||
43 | + || defined(TARGET_MICROBLAZE) || defined(TARGET_OPENRISC) \ | ||
44 | + || defined(TARGET_RX) | ||
45 | g_assert_not_reached(); | ||
46 | #else | ||
47 | rule = float_2nan_prop_x87; | ||
48 | -- | ||
49 | 2.34.1 | ||
50 | |||
51 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Now that all targets have been converted to explicitly set a NaN | ||
2 | propagation rule, we can remove the set of target ifdefs (which now | ||
3 | list every target) and clean up the references to fallback behaviour | ||
4 | for float_2nan_prop_none. | ||
5 | 1 | ||
6 | The "default" case in the switch will catch any remaining places | ||
7 | where status->float_2nan_prop_rule was not set by the target. | ||
8 | |||
9 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
10 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
11 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | Message-id: 20241025141254.2141506-22-peter.maydell@linaro.org | ||
13 | --- | ||
14 | include/fpu/softfloat-types.h | 10 +++------- | ||
15 | fpu/softfloat-specialize.c.inc | 23 +++-------------------- | ||
16 | 2 files changed, 6 insertions(+), 27 deletions(-) | ||
17 | |||
18 | diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/include/fpu/softfloat-types.h | ||
21 | +++ b/include/fpu/softfloat-types.h | ||
22 | @@ -XXX,XX +XXX,XX @@ typedef enum __attribute__((__packed__)) { | ||
23 | * If default_nan_mode is enabled then it is valid not to set a | ||
24 | * NaN propagation rule, because the softfloat code guarantees | ||
25 | * not to try to pick a NaN to propagate in default NaN mode. | ||
26 | - * | ||
27 | - * For transition, currently the 'none' rule will cause us to | ||
28 | - * fall back to picking the propagation rule based on the existing | ||
29 | - * ifdef ladder. When all targets are converted it will be an error | ||
30 | - * not to set the rule in float_status unless in default_nan_mode, | ||
31 | - * and we will assert if we need to handle an input NaN and no | ||
32 | - * rule was selected. | ||
33 | + * When not in default-NaN mode, it is an error for the target | ||
34 | + * not to set the rule in float_status, and we will assert if | ||
35 | + * we need to handle an input NaN and no rule was selected. | ||
36 | */ | ||
37 | typedef enum __attribute__((__packed__)) { | ||
38 | /* No propagation rule specified */ | ||
39 | diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc | ||
40 | index XXXXXXX..XXXXXXX 100644 | ||
41 | --- a/fpu/softfloat-specialize.c.inc | ||
42 | +++ b/fpu/softfloat-specialize.c.inc | ||
43 | @@ -XXX,XX +XXX,XX @@ bool float32_is_signaling_nan(float32 a_, float_status *status) | ||
44 | static int pickNaN(FloatClass a_cls, FloatClass b_cls, | ||
45 | bool aIsLargerSignificand, float_status *status) | ||
46 | { | ||
47 | - Float2NaNPropRule rule = status->float_2nan_prop_rule; | ||
48 | - | ||
49 | /* | ||
50 | * We guarantee not to require the target to tell us how to | ||
51 | * pick a NaN if we're always returning the default NaN. | ||
52 | + * But if we're not in default-NaN mode then the target must | ||
53 | + * specify via set_float_2nan_prop_rule(). | ||
54 | */ | ||
55 | assert(!status->default_nan_mode); | ||
56 | |||
57 | - if (rule == float_2nan_prop_none) { | ||
58 | - /* target didn't set the rule: fall back to old ifdef choices */ | ||
59 | -#if defined(TARGET_AVR) || defined(TARGET_HEXAGON) \ | ||
60 | - || defined(TARGET_RISCV) || defined(TARGET_SH4) \ | ||
61 | - || defined(TARGET_TRICORE) || defined(TARGET_ARM) || defined(TARGET_MIPS) \ | ||
62 | - || defined(TARGET_LOONGARCH64) || defined(TARGET_HPPA) \ | ||
63 | - || defined(TARGET_S390X) || defined(TARGET_PPC) || defined(TARGET_M68K) \ | ||
64 | - || defined(TARGET_SPARC) || defined(TARGET_XTENSA) \ | ||
65 | - || defined(TARGET_I386) || defined(TARGET_ALPHA) \ | ||
66 | - || defined(TARGET_MICROBLAZE) || defined(TARGET_OPENRISC) \ | ||
67 | - || defined(TARGET_RX) | ||
68 | - g_assert_not_reached(); | ||
69 | -#else | ||
70 | - rule = float_2nan_prop_x87; | ||
71 | -#endif | ||
72 | - } | ||
73 | - | ||
74 | - switch (rule) { | ||
75 | + switch (status->float_2nan_prop_rule) { | ||
76 | case float_2nan_prop_s_ab: | ||
77 | if (is_snan(a_cls)) { | ||
78 | return 0; | ||
79 | -- | ||
80 | 2.34.1 | ||
81 | |||
82 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This reverts commit 4c2c0474693229c1f533239bb983495c5427784d. | ||
2 | 1 | ||
3 | This commit tried to fix a problem with our usage of MMU indexes when | ||
4 | EL3 is AArch32, using what it described as a "more complicated | ||
5 | approach" where we share the same MMU index values for Secure PL1&0 | ||
6 | and NonSecure PL1&0. In theory this should work, but the change | ||
7 | didn't account for (at least) two things: | ||
8 | |||
9 | (1) The design change means we need to flush the TLBs at any point | ||
10 | where the CPU state flips from one to the other. We already flush | ||
11 | the TLB when SCR.NS is changed, but we don't flush the TLB when we | ||
12 | take an exception from NS PL1&0 into Mon or when we return from Mon | ||
13 | to NS PL1&0, and the commit didn't add any code to do that. | ||
14 | |||
15 | (2) The ATS12NS* address translate instructions allow Mon code (which | ||
16 | is Secure) to do a stage 1+2 page table walk for NS. I thought this | ||
17 | was OK because do_ats_write() does a page table walk which doesn't | ||
18 | use the TLBs, so because it can pass both the MMU index and also an | ||
19 | ARMSecuritySpace argument we can tell the table walk that we want NS | ||
20 | stage1+2, not S. But that means that all the code within the ptw | ||
21 | that needs to find e.g. the regime EL cannot do so only with an | ||
22 | mmu_idx -- all these functions like regime_sctlr(), regime_el(), etc | ||
23 | would need to pass both an mmu_idx and the security_space, so they | ||
24 | can tell whether this is a translation regime controlled by EL1 or | ||
25 | EL3 (and so whether to look at SCTLR.S or SCTLR.NS, etc). | ||
26 | |||
27 | In particular, because regime_el() wasn't updated to look at the | ||
28 | ARMSecuritySpace it would return 1 even when the CPU was in Monitor | ||
29 | mode (and the controlling EL is 3). This meant that page table walks | ||
30 | in Monitor mode would look at the wrong SCTLR, TCR, etc and would | ||
31 | generally fault when they should not. | ||
32 | |||
33 | Rather than trying to make the complicated changes needed to rescue | ||
34 | the design of 4c2c04746932, we revert it in order to instead take the | ||
35 | route that that commit describes as "the most straightforward" fix, | ||
36 | where we add new MMU indexes EL30_0, EL30_3, EL30_3_PAN to correspond | ||
37 | to "Secure PL1&0 at PL0", "Secure PL1&0 at PL1", and "Secure PL1&0 at | ||
38 | PL1 with PAN". | ||
39 | |||
40 | This revert will re-expose the "spurious alignment faults in | ||
41 | Secure PL0" issue #2326; we'll fix it again in the next commit. | ||
42 | |||
43 | Cc: qemu-stable@nongnu.org | ||
44 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
45 | Tested-by: Thomas Huth <thuth@redhat.com> | ||
46 | Message-id: 20241101142845.1712482-2-peter.maydell@linaro.org | ||
47 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
48 | --- | ||
49 | target/arm/cpu.h | 31 +++++++++++++------------------ | ||
50 | target/arm/internals.h | 27 ++++----------------------- | ||
51 | target/arm/tcg/translate.h | 2 -- | ||
52 | target/arm/helper.c | 34 +++++++++++----------------------- | ||
53 | target/arm/ptw.c | 6 +----- | ||
54 | target/arm/tcg/hflags.c | 4 ---- | ||
55 | target/arm/tcg/translate-a64.c | 2 +- | ||
56 | target/arm/tcg/translate.c | 9 ++++----- | ||
57 | 8 files changed, 34 insertions(+), 81 deletions(-) | ||
58 | |||
59 | diff --git a/target/arm/cpu.h b/target/arm/cpu.h | ||
60 | index XXXXXXX..XXXXXXX 100644 | ||
61 | --- a/target/arm/cpu.h | ||
62 | +++ b/target/arm/cpu.h | ||
63 | @@ -XXX,XX +XXX,XX @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync); | ||
64 | * + NonSecure PL1 & 0 stage 1 | ||
65 | * + NonSecure PL1 & 0 stage 2 | ||
66 | * + NonSecure PL2 | ||
67 | - * + Secure PL1 & 0 | ||
68 | + * + Secure PL0 | ||
69 | + * + Secure PL1 | ||
70 | * (reminder: for 32 bit EL3, Secure PL1 is *EL3*, not EL1.) | ||
71 | * | ||
72 | * For QEMU, an mmu_idx is not quite the same as a translation regime because: | ||
73 | @@ -XXX,XX +XXX,XX @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync); | ||
74 | * The only use of stage 2 translations is either as part of an s1+2 | ||
75 | * lookup or when loading the descriptors during a stage 1 page table walk, | ||
76 | * and in both those cases we don't use the TLB. | ||
77 | - * 4. we want to be able to use the TLB for accesses done as part of a | ||
78 | + * 4. we can also safely fold together the "32 bit EL3" and "64 bit EL3" | ||
79 | + * translation regimes, because they map reasonably well to each other | ||
80 | + * and they can't both be active at the same time. | ||
81 | + * 5. we want to be able to use the TLB for accesses done as part of a | ||
82 | * stage1 page table walk, rather than having to walk the stage2 page | ||
83 | * table over and over. | ||
84 | - * 5. we need separate EL1/EL2 mmu_idx for handling the Privileged Access | ||
85 | + * 6. we need separate EL1/EL2 mmu_idx for handling the Privileged Access | ||
86 | * Never (PAN) bit within PSTATE. | ||
87 | - * 6. we fold together most secure and non-secure regimes for A-profile, | ||
88 | + * 7. we fold together most secure and non-secure regimes for A-profile, | ||
89 | * because there are no banked system registers for aarch64, so the | ||
90 | * process of switching between secure and non-secure is | ||
91 | * already heavyweight. | ||
92 | - * 7. we cannot fold together Stage 2 Secure and Stage 2 NonSecure, | ||
93 | + * 8. we cannot fold together Stage 2 Secure and Stage 2 NonSecure, | ||
94 | * because both are in use simultaneously for Secure EL2. | ||
95 | * | ||
96 | * This gives us the following list of cases: | ||
97 | * | ||
98 | - * EL0 EL1&0 stage 1+2 (or AArch32 PL0 PL1&0 stage 1+2) | ||
99 | - * EL1 EL1&0 stage 1+2 (or AArch32 PL1 PL1&0 stage 1+2) | ||
100 | - * EL1 EL1&0 stage 1+2 +PAN (or AArch32 PL1 PL1&0 stage 1+2 +PAN) | ||
101 | + * EL0 EL1&0 stage 1+2 (aka NS PL0) | ||
102 | + * EL1 EL1&0 stage 1+2 (aka NS PL1) | ||
103 | + * EL1 EL1&0 stage 1+2 +PAN | ||
104 | * EL0 EL2&0 | ||
105 | * EL2 EL2&0 | ||
106 | * EL2 EL2&0 +PAN | ||
107 | * EL2 (aka NS PL2) | ||
108 | - * EL3 (not used when EL3 is AArch32) | ||
109 | + * EL3 (aka S PL1) | ||
110 | * Stage2 Secure | ||
111 | * Stage2 NonSecure | ||
112 | * plus one TLB per Physical address space: S, NS, Realm, Root | ||
113 | * | ||
114 | * for a total of 14 different mmu_idx. | ||
115 | * | ||
116 | - * Note that when EL3 is AArch32, the usage is potentially confusing | ||
117 | - * because the MMU indexes are named for their AArch64 use, so code | ||
118 | - * using the ARMMMUIdx_E10_1 might be at EL3, not EL1. This is because | ||
119 | - * Secure PL1 is always at EL3. | ||
120 | - * | ||
121 | * R profile CPUs have an MPU, but can use the same set of MMU indexes | ||
122 | * as A profile. They only need to distinguish EL0 and EL1 (and | ||
123 | * EL2 for cores like the Cortex-R52). | ||
124 | @@ -XXX,XX +XXX,XX @@ FIELD(TBFLAG_A32, NS, 10, 1) | ||
125 | * This requires an SME trap from AArch32 mode when using NEON. | ||
126 | */ | ||
127 | FIELD(TBFLAG_A32, SME_TRAP_NONSTREAMING, 11, 1) | ||
128 | -/* | ||
129 | - * Indicates whether we are in the Secure PL1&0 translation regime | ||
130 | - */ | ||
131 | -FIELD(TBFLAG_A32, S_PL1_0, 12, 1) | ||
132 | |||
133 | /* | ||
134 | * Bit usage when in AArch32 state, for M-profile only. | ||
135 | diff --git a/target/arm/internals.h b/target/arm/internals.h | ||
136 | index XXXXXXX..XXXXXXX 100644 | ||
137 | --- a/target/arm/internals.h | ||
138 | +++ b/target/arm/internals.h | ||
139 | @@ -XXX,XX +XXX,XX @@ FIELD(CNTHCTL, CNTPMASK, 19, 1) | ||
140 | #define M_FAKE_FSR_NSC_EXEC 0xf /* NS executing in S&NSC memory */ | ||
141 | #define M_FAKE_FSR_SFAULT 0xe /* SecureFault INVTRAN, INVEP or AUVIOL */ | ||
142 | |||
143 | -/** | ||
144 | - * arm_aa32_secure_pl1_0(): Return true if in Secure PL1&0 regime | ||
145 | - * | ||
146 | - * Return true if the CPU is in the Secure PL1&0 translation regime. | ||
147 | - * This requires that EL3 exists and is AArch32 and we are currently | ||
148 | - * Secure. If this is the case then the ARMMMUIdx_E10* apply and | ||
149 | - * mean we are in EL3, not EL1. | ||
150 | - */ | ||
151 | -static inline bool arm_aa32_secure_pl1_0(CPUARMState *env) | ||
152 | -{ | ||
153 | - return arm_feature(env, ARM_FEATURE_EL3) && | ||
154 | - !arm_el_is_aa64(env, 3) && arm_is_secure(env); | ||
155 | -} | ||
156 | - | ||
157 | /** | ||
158 | * raise_exception: Raise the specified exception. | ||
159 | * Raise a guest exception with the specified value, syndrome register | ||
160 | @@ -XXX,XX +XXX,XX @@ static inline ARMMMUIdx core_to_aa64_mmu_idx(int mmu_idx) | ||
161 | return mmu_idx | ARM_MMU_IDX_A; | ||
162 | } | ||
163 | |||
164 | -/** | ||
165 | - * Return the exception level we're running at if our current MMU index | ||
166 | - * is @mmu_idx. @s_pl1_0 should be true if this is the AArch32 | ||
167 | - * Secure PL1&0 translation regime. | ||
168 | - */ | ||
169 | -int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx, bool s_pl1_0); | ||
170 | +int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx); | ||
171 | |||
172 | /* Return the MMU index for a v7M CPU in the specified security state */ | ||
173 | ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate); | ||
174 | @@ -XXX,XX +XXX,XX @@ static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx) | ||
175 | return 3; | ||
176 | case ARMMMUIdx_E10_0: | ||
177 | case ARMMMUIdx_Stage1_E0: | ||
178 | - case ARMMMUIdx_E10_1: | ||
179 | - case ARMMMUIdx_E10_1_PAN: | ||
180 | + return arm_el_is_aa64(env, 3) || !arm_is_secure_below_el3(env) ? 1 : 3; | ||
181 | case ARMMMUIdx_Stage1_E1: | ||
182 | case ARMMMUIdx_Stage1_E1_PAN: | ||
183 | - return arm_el_is_aa64(env, 3) || !arm_is_secure_below_el3(env) ? 1 : 3; | ||
184 | + case ARMMMUIdx_E10_1: | ||
185 | + case ARMMMUIdx_E10_1_PAN: | ||
186 | case ARMMMUIdx_MPrivNegPri: | ||
187 | case ARMMMUIdx_MUserNegPri: | ||
188 | case ARMMMUIdx_MPriv: | ||
189 | diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h | ||
190 | index XXXXXXX..XXXXXXX 100644 | ||
191 | --- a/target/arm/tcg/translate.h | ||
192 | +++ b/target/arm/tcg/translate.h | ||
193 | @@ -XXX,XX +XXX,XX @@ typedef struct DisasContext { | ||
194 | uint8_t gm_blocksize; | ||
195 | /* True if the current insn_start has been updated. */ | ||
196 | bool insn_start_updated; | ||
197 | - /* True if this is the AArch32 Secure PL1&0 translation regime */ | ||
198 | - bool s_pl1_0; | ||
199 | /* Bottom two bits of XScale c15_cpar coprocessor access control reg */ | ||
200 | int c15_cpar; | ||
201 | /* Offset from VNCR_EL2 when FEAT_NV2 redirects this reg to memory */ | ||
202 | diff --git a/target/arm/helper.c b/target/arm/helper.c | ||
203 | index XXXXXXX..XXXXXXX 100644 | ||
204 | --- a/target/arm/helper.c | ||
205 | +++ b/target/arm/helper.c | ||
206 | @@ -XXX,XX +XXX,XX @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value, | ||
207 | */ | ||
208 | format64 = arm_s1_regime_using_lpae_format(env, mmu_idx); | ||
209 | |||
210 | - if (arm_feature(env, ARM_FEATURE_EL2) && !arm_aa32_secure_pl1_0(env)) { | ||
211 | + if (arm_feature(env, ARM_FEATURE_EL2)) { | ||
212 | if (mmu_idx == ARMMMUIdx_E10_0 || | ||
213 | mmu_idx == ARMMMUIdx_E10_1 || | ||
214 | mmu_idx == ARMMMUIdx_E10_1_PAN) { | ||
215 | @@ -XXX,XX +XXX,XX @@ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) | ||
216 | case 0: | ||
217 | /* stage 1 current state PL1: ATS1CPR, ATS1CPW, ATS1CPRP, ATS1CPWP */ | ||
218 | switch (el) { | ||
219 | + case 3: | ||
220 | + mmu_idx = ARMMMUIdx_E3; | ||
221 | + break; | ||
222 | case 2: | ||
223 | g_assert(ss != ARMSS_Secure); /* ARMv8.4-SecEL2 is 64-bit only */ | ||
224 | /* fall through */ | ||
225 | case 1: | ||
226 | - case 3: | ||
227 | if (ri->crm == 9 && arm_pan_enabled(env)) { | ||
228 | mmu_idx = ARMMMUIdx_Stage1_E1_PAN; | ||
229 | } else { | ||
230 | @@ -XXX,XX +XXX,XX @@ void arm_cpu_do_interrupt(CPUState *cs) | ||
231 | |||
232 | uint64_t arm_sctlr(CPUARMState *env, int el) | ||
233 | { | ||
234 | - if (arm_aa32_secure_pl1_0(env)) { | ||
235 | - /* In Secure PL1&0 SCTLR_S is always controlling */ | ||
236 | - el = 3; | ||
237 | - } else if (el == 0) { | ||
238 | - /* Only EL0 needs to be adjusted for EL1&0 or EL2&0. */ | ||
239 | + /* Only EL0 needs to be adjusted for EL1&0 or EL2&0. */ | ||
240 | + if (el == 0) { | ||
241 | ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, 0); | ||
242 | el = mmu_idx == ARMMMUIdx_E20_0 ? 2 : 1; | ||
243 | } | ||
244 | @@ -XXX,XX +XXX,XX @@ int fp_exception_el(CPUARMState *env, int cur_el) | ||
245 | return 0; | ||
246 | } | ||
247 | |||
248 | -/* | ||
249 | - * Return the exception level we're running at if this is our mmu_idx. | ||
250 | - * s_pl1_0 should be true if this is the AArch32 Secure PL1&0 translation | ||
251 | - * regime. | ||
252 | - */ | ||
253 | -int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx, bool s_pl1_0) | ||
254 | +/* Return the exception level we're running at if this is our mmu_idx */ | ||
255 | +int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx) | ||
256 | { | ||
257 | if (mmu_idx & ARM_MMU_IDX_M) { | ||
258 | return mmu_idx & ARM_MMU_IDX_M_PRIV; | ||
259 | @@ -XXX,XX +XXX,XX @@ int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx, bool s_pl1_0) | ||
260 | return 0; | ||
261 | case ARMMMUIdx_E10_1: | ||
262 | case ARMMMUIdx_E10_1_PAN: | ||
263 | - return s_pl1_0 ? 3 : 1; | ||
264 | + return 1; | ||
265 | case ARMMMUIdx_E2: | ||
266 | case ARMMMUIdx_E20_2: | ||
267 | case ARMMMUIdx_E20_2_PAN: | ||
268 | @@ -XXX,XX +XXX,XX @@ ARMMMUIdx arm_mmu_idx_el(CPUARMState *env, int el) | ||
269 | idx = ARMMMUIdx_E10_0; | ||
270 | } | ||
271 | break; | ||
272 | - case 3: | ||
273 | - /* | ||
274 | - * AArch64 EL3 has its own translation regime; AArch32 EL3 | ||
275 | - * uses the Secure PL1&0 translation regime. | ||
276 | - */ | ||
277 | - if (arm_el_is_aa64(env, 3)) { | ||
278 | - return ARMMMUIdx_E3; | ||
279 | - } | ||
280 | - /* fall through */ | ||
281 | case 1: | ||
282 | if (arm_pan_enabled(env)) { | ||
283 | idx = ARMMMUIdx_E10_1_PAN; | ||
284 | @@ -XXX,XX +XXX,XX @@ ARMMMUIdx arm_mmu_idx_el(CPUARMState *env, int el) | ||
285 | idx = ARMMMUIdx_E2; | ||
286 | } | ||
287 | break; | ||
288 | + case 3: | ||
289 | + return ARMMMUIdx_E3; | ||
290 | default: | ||
291 | g_assert_not_reached(); | ||
292 | } | ||
293 | diff --git a/target/arm/ptw.c b/target/arm/ptw.c | ||
294 | index XXXXXXX..XXXXXXX 100644 | ||
295 | --- a/target/arm/ptw.c | ||
296 | +++ b/target/arm/ptw.c | ||
297 | @@ -XXX,XX +XXX,XX @@ bool get_phys_addr(CPUARMState *env, vaddr address, | ||
298 | case ARMMMUIdx_Stage1_E1: | ||
299 | case ARMMMUIdx_Stage1_E1_PAN: | ||
300 | case ARMMMUIdx_E2: | ||
301 | - if (arm_aa32_secure_pl1_0(env)) { | ||
302 | - ss = ARMSS_Secure; | ||
303 | - } else { | ||
304 | - ss = arm_security_space_below_el3(env); | ||
305 | - } | ||
306 | + ss = arm_security_space_below_el3(env); | ||
307 | break; | ||
308 | case ARMMMUIdx_Stage2: | ||
309 | /* | ||
310 | diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c | ||
311 | index XXXXXXX..XXXXXXX 100644 | ||
312 | --- a/target/arm/tcg/hflags.c | ||
313 | +++ b/target/arm/tcg/hflags.c | ||
314 | @@ -XXX,XX +XXX,XX @@ static CPUARMTBFlags rebuild_hflags_a32(CPUARMState *env, int fp_el, | ||
315 | DP_TBFLAG_A32(flags, SME_TRAP_NONSTREAMING, 1); | ||
316 | } | ||
317 | |||
318 | - if (arm_aa32_secure_pl1_0(env)) { | ||
319 | - DP_TBFLAG_A32(flags, S_PL1_0, 1); | ||
320 | - } | ||
321 | - | ||
322 | return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags); | ||
323 | } | ||
324 | |||
325 | diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c | ||
326 | index XXXXXXX..XXXXXXX 100644 | ||
327 | --- a/target/arm/tcg/translate-a64.c | ||
328 | +++ b/target/arm/tcg/translate-a64.c | ||
329 | @@ -XXX,XX +XXX,XX @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, | ||
330 | dc->tbii = EX_TBFLAG_A64(tb_flags, TBII); | ||
331 | dc->tbid = EX_TBFLAG_A64(tb_flags, TBID); | ||
332 | dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA); | ||
333 | - dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx, false); | ||
334 | + dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); | ||
335 | #if !defined(CONFIG_USER_ONLY) | ||
336 | dc->user = (dc->current_el == 0); | ||
337 | #endif | ||
338 | diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c | ||
339 | index XXXXXXX..XXXXXXX 100644 | ||
340 | --- a/target/arm/tcg/translate.c | ||
341 | +++ b/target/arm/tcg/translate.c | ||
342 | @@ -XXX,XX +XXX,XX @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) | ||
343 | |||
344 | core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX); | ||
345 | dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx); | ||
346 | + dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); | ||
347 | +#if !defined(CONFIG_USER_ONLY) | ||
348 | + dc->user = (dc->current_el == 0); | ||
349 | +#endif | ||
350 | dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL); | ||
351 | dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM); | ||
352 | dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL); | ||
353 | @@ -XXX,XX +XXX,XX @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) | ||
354 | } | ||
355 | dc->sme_trap_nonstreaming = | ||
356 | EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING); | ||
357 | - dc->s_pl1_0 = EX_TBFLAG_A32(tb_flags, S_PL1_0); | ||
358 | } | ||
359 | - dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx, dc->s_pl1_0); | ||
360 | -#if !defined(CONFIG_USER_ONLY) | ||
361 | - dc->user = (dc->current_el == 0); | ||
362 | -#endif | ||
363 | dc->lse2 = false; /* applies only to aarch64 */ | ||
364 | dc->cp_regs = cpu->cp_regs; | ||
365 | dc->features = env->features; | ||
366 | -- | ||
367 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Our implementation of the indexed version of SVE SDOT/UDOT/USDOT got | ||
2 | the calculation of the inner loop terminator wrong. Although we | ||
3 | correctly account for the element size when we calculate the | ||
4 | terminator for the first iteration: | ||
5 | intptr_t segend = MIN(16 / sizeof(TYPED), opr_sz_n); | ||
6 | we don't do that when we move it forward after the first inner loop | ||
7 | completes. The intention is that we process the vector in 128-bit | ||
8 | segments, which for a 64-bit element size should mean (1, 2), (3, 4), | ||
9 | (5, 6), etc. This bug meant that we would iterate (1, 2), (3, 4, 5, | ||
10 | 6), (7, 8, 9, 10) etc and apply the wrong indexed element to some of | ||
11 | the operations, and also index off the end of the vector. | ||
12 | 1 | ||
13 | You don't see this bug if the vector length is small enough that we | ||
14 | don't need to iterate the outer loop, i.e. if it is only 128 bits, | ||
15 | or if it is the 64-bit special case from AA32/AA64 AdvSIMD. If the | ||
16 | vector length is 256 bits then we calculate the right results for the | ||
17 | elements in the vector but do index off the end of the vector. Vector | ||
18 | lengths greater than 256 bits see wrong answers. The instructions | ||
19 | that produce 32-bit results behave correctly. | ||
20 | |||
21 | Fix the recalculation of 'segend' for subsequent iterations, and | ||
22 | restore a version of the comment that was lost in the refactor of | ||
23 | commit 7020ffd656a5 that explains why we only need to clamp segend to | ||
24 | opr_sz_n for the first iteration, not the later ones. | ||
25 | |||
26 | Cc: qemu-stable@nongnu.org | ||
27 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2595 | ||
28 | Fixes: 7020ffd656a5 ("target/arm: Macroize helper_gvec_{s,u}dot_idx_{b,h}") | ||
29 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
30 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
31 | Message-id: 20241101185544.2130972-1-peter.maydell@linaro.org | ||
32 | --- | ||
33 | target/arm/tcg/vec_helper.c | 9 ++++++++- | ||
34 | 1 file changed, 8 insertions(+), 1 deletion(-) | ||
35 | |||
36 | diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/target/arm/tcg/vec_helper.c | ||
39 | +++ b/target/arm/tcg/vec_helper.c | ||
40 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ | ||
41 | { \ | ||
42 | intptr_t i = 0, opr_sz = simd_oprsz(desc); \ | ||
43 | intptr_t opr_sz_n = opr_sz / sizeof(TYPED); \ | ||
44 | + /* \ | ||
45 | + * Special case: opr_sz == 8 from AA64/AA32 advsimd means the \ | ||
46 | + * first iteration might not be a full 16 byte segment. But \ | ||
47 | + * for vector lengths beyond that this must be SVE and we know \ | ||
48 | + * opr_sz is a multiple of 16, so we need not clamp segend \ | ||
49 | + * to opr_sz_n when we advance it at the end of the loop. \ | ||
50 | + */ \ | ||
51 | intptr_t segend = MIN(16 / sizeof(TYPED), opr_sz_n); \ | ||
52 | intptr_t index = simd_data(desc); \ | ||
53 | TYPED *d = vd, *a = va; \ | ||
54 | @@ -XXX,XX +XXX,XX @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ | ||
55 | n[i * 4 + 2] * m2 + \ | ||
56 | n[i * 4 + 3] * m3); \ | ||
57 | } while (++i < segend); \ | ||
58 | - segend = i + 4; \ | ||
59 | + segend = i + (16 / sizeof(TYPED)); \ | ||
60 | } while (i < opr_sz_n); \ | ||
61 | clear_tail(d, opr_sz, simd_maxsz(desc)); \ | ||
62 | } | ||
63 | -- | ||
64 | 2.34.1 | diff view generated by jsdifflib |