From: Brian Cain <bcain@quicinc.com>
The per-vCPU System Status Register controls many modal behaviors of the
system architecture. When the SSR is updated, we trigger the necessary
effects for interrupts, privilege/MMU, and HVX context mapping.
Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com>
---
target/hexagon/cpu_helper.c | 100 +++++++++++++++++++++++++++++++++++-
1 file changed, 99 insertions(+), 1 deletion(-)
diff --git a/target/hexagon/cpu_helper.c b/target/hexagon/cpu_helper.c
index e151c6335a..3e2364a7b0 100644
--- a/target/hexagon/cpu_helper.c
+++ b/target/hexagon/cpu_helper.c
@@ -14,6 +14,8 @@
#else
#include "hw/boards.h"
#include "hw/hexagon/hexagon.h"
+#include "hex_interrupts.h"
+#include "hex_mmu.h"
#endif
#include "exec/exec-all.h"
#include "exec/cpu_ldst.h"
@@ -69,9 +71,105 @@ void hexagon_set_sys_pcycle_count(CPUHexagonState *env, uint64_t cycles)
g_assert_not_reached();
}
+static MMVector VRegs[VECTOR_UNIT_MAX][NUM_VREGS];
+static MMQReg QRegs[VECTOR_UNIT_MAX][NUM_QREGS];
+
+/*
+ * EXT_CONTEXTS
+ * SSR.XA 2 4 6 8
+ * 000 HVX Context 0 HVX Context 0 HVX Context 0 HVX Context 0
+ * 001 HVX Context 1 HVX Context 1 HVX Context 1 HVX Context 1
+ * 010 HVX Context 0 HVX Context 2 HVX Context 2 HVX Context 2
+ * 011 HVX Context 1 HVX Context 3 HVX Context 3 HVX Context 3
+ * 100 HVX Context 0 HVX Context 0 HVX Context 4 HVX Context 4
+ * 101 HVX Context 1 HVX Context 1 HVX Context 5 HVX Context 5
+ * 110 HVX Context 0 HVX Context 2 HVX Context 2 HVX Context 6
+ * 111 HVX Context 1 HVX Context 3 HVX Context 3 HVX Context 7
+ */
+static int parse_context_idx(CPUHexagonState *env, uint8_t XA)
+{
+ int ret;
+ HexagonCPU *cpu = env_archcpu(env);
+ if (cpu->hvx_contexts == 6 && XA >= 6) {
+ ret = XA - 6 + 2;
+ } else {
+ ret = XA % cpu->hvx_contexts;
+ }
+ g_assert(ret >= 0 && ret < VECTOR_UNIT_MAX);
+ return ret;
+}
+
+static void check_overcommitted_hvx(CPUHexagonState *env, uint32_t ssr)
+{
+ if (!GET_FIELD(SSR_XE, ssr)) {
+ return;
+ }
+
+ uint8_t XA = GET_SSR_FIELD(SSR_XA, ssr);
+
+ CPUState *cs;
+ CPU_FOREACH(cs) {
+ CPUHexagonState *env_ = cpu_env(cs);
+ if (env_ == env) {
+ continue;
+ }
+ /* Check if another thread has the XE bit set and same XA */
+ uint32_t ssr_ = arch_get_system_reg(env_, HEX_SREG_SSR);
+ if (GET_SSR_FIELD(SSR_XE2, ssr_) && GET_FIELD(SSR_XA, ssr_) == XA) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "setting SSR.XA '%d' on thread %d but thread"
+ " %d has same extension active\n", XA, env->threadId,
+ env_->threadId);
+ }
+ }
+}
+
void hexagon_modify_ssr(CPUHexagonState *env, uint32_t new, uint32_t old)
{
- g_assert_not_reached();
+ g_assert(bql_locked());
+
+ bool old_EX = GET_SSR_FIELD(SSR_EX, old);
+ bool old_UM = GET_SSR_FIELD(SSR_UM, old);
+ bool old_GM = GET_SSR_FIELD(SSR_GM, old);
+ bool old_IE = GET_SSR_FIELD(SSR_IE, old);
+ uint8_t old_XA = GET_SSR_FIELD(SSR_XA, old);
+ bool new_EX = GET_SSR_FIELD(SSR_EX, new);
+ bool new_UM = GET_SSR_FIELD(SSR_UM, new);
+ bool new_GM = GET_SSR_FIELD(SSR_GM, new);
+ bool new_IE = GET_SSR_FIELD(SSR_IE, new);
+ uint8_t new_XA = GET_SSR_FIELD(SSR_XA, new);
+
+ if ((old_EX != new_EX) ||
+ (old_UM != new_UM) ||
+ (old_GM != new_GM)) {
+ hex_mmu_mode_change(env);
+ }
+
+ uint8_t old_asid = GET_SSR_FIELD(SSR_ASID, old);
+ uint8_t new_asid = GET_SSR_FIELD(SSR_ASID, new);
+ if (new_asid != old_asid) {
+ CPUState *cs = env_cpu(env);
+ tlb_flush(cs);
+ }
+
+ if (old_XA != new_XA) {
+ int old_unit = parse_context_idx(env, old_XA);
+ int new_unit = parse_context_idx(env, new_XA);
+
+ /* Ownership exchange */
+ memcpy(VRegs[old_unit], env->VRegs, sizeof(env->VRegs));
+ memcpy(QRegs[old_unit], env->QRegs, sizeof(env->QRegs));
+ memcpy(env->VRegs, VRegs[new_unit], sizeof(env->VRegs));
+ memcpy(env->QRegs, QRegs[new_unit], sizeof(env->QRegs));
+
+ check_overcommitted_hvx(env, new);
+ }
+
+ /* See if the interrupts have been enabled or we have exited EX mode */
+ if ((new_IE && !old_IE) ||
+ (!new_EX && old_EX)) {
+ hex_interrupt_update(env);
+ }
}
void clear_wait_mode(CPUHexagonState *env)
--
2.34.1
> -----Original Message----- > From: Brian Cain <brian.cain@oss.qualcomm.com> > Sent: Friday, February 28, 2025 11:28 PM > To: qemu-devel@nongnu.org > Cc: brian.cain@oss.qualcomm.com; richard.henderson@linaro.org; > philmd@linaro.org; quic_mathbern@quicinc.com; ale@rev.ng; anjo@rev.ng; > quic_mliebel@quicinc.com; ltaylorsimpson@gmail.com; > alex.bennee@linaro.org; quic_mburton@quicinc.com; > sidneym@quicinc.com; Brian Cain <bcain@quicinc.com> > Subject: [PATCH 05/39] target/hexagon: Implement modify SSR > > From: Brian Cain <bcain@quicinc.com> > > The per-vCPU System Status Register controls many modal behaviors of the > system architecture. When the SSR is updated, we trigger the necessary > effects for interrupts, privilege/MMU, and HVX context mapping. > > Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> > --- > target/hexagon/cpu_helper.c | 100 > +++++++++++++++++++++++++++++++++++- > 1 file changed, 99 insertions(+), 1 deletion(-) > > diff --git a/target/hexagon/cpu_helper.c b/target/hexagon/cpu_helper.c > index e151c6335a..3e2364a7b0 100644 > --- a/target/hexagon/cpu_helper.c > +++ b/target/hexagon/cpu_helper.c > @@ -14,6 +14,8 @@ > #else > #include "hw/boards.h" > #include "hw/hexagon/hexagon.h" > +#include "hex_interrupts.h" > +#include "hex_mmu.h" > #endif > #include "exec/exec-all.h" > #include "exec/cpu_ldst.h" > @@ -69,9 +71,105 @@ void > hexagon_set_sys_pcycle_count(CPUHexagonState *env, uint64_t cycles) > g_assert_not_reached(); > } > > +static MMVector VRegs[VECTOR_UNIT_MAX][NUM_VREGS]; > +static MMQReg QRegs[VECTOR_UNIT_MAX][NUM_QREGS]; This won't scale for a system with multiple Hexagon instances. See discussion on how to handle shared resources. > + > +/* > + * EXT_CONTEXTS > + * SSR.XA 2 4 6 8 > + * 000 HVX Context 0 HVX Context 0 HVX Context 0 HVX Context 0 > + * 001 HVX Context 1 HVX Context 1 HVX Context 1 HVX Context 1 > + * 010 HVX Context 0 HVX Context 2 HVX Context 2 HVX Context 2 > + * 011 HVX Context 1 HVX Context 3 HVX Context 3 HVX Context 3 > + * 100 HVX Context 0 HVX Context 0 HVX Context 4 HVX Context 4 > + * 101 HVX Context 1 HVX Context 1 HVX Context 5 HVX Context 5 > + * 110 HVX Context 0 HVX Context 2 HVX Context 2 HVX Context 6 > + * 111 HVX Context 1 HVX Context 3 HVX Context 3 HVX Context 7 > + */ This is different from what the HVX PRM says. It only specifies what XA values 4, 5, 6, 7 mean. Here is what it says: The HVX scalar core can contain any number of hardware threads greater or equal to the number of vector contexts. The scalar hardware thread is assignable to a vector context through per- thread SSR:XA programming, as follows: - SSR:XA = 4: HVX instructions use vector context 0. - SSR:XA = 5: HVX instructions use vector context 1, if it is available. - SSR:XA = 6: HVX instructions use vector context 2, if it is available. - SSR:XA = 7: HVX instructions use vector context 3, if it is available. > +static int parse_context_idx(CPUHexagonState *env, uint8_t XA) { > + int ret; > + HexagonCPU *cpu = env_archcpu(env); You should assert that cpu->hvx_contexts is in { 2, 4, 6, 8 }. This will future proof against changes to the hardware as well as protect against bad command-line settings. > + if (cpu->hvx_contexts == 6 && XA >= 6) { > + ret = XA - 6 + 2; > + } else { > + ret = XA % cpu->hvx_contexts; > + } > + g_assert(ret >= 0 && ret < VECTOR_UNIT_MAX); > + return ret; > +} > + > +static void check_overcommitted_hvx(CPUHexagonState *env, uint32_t > ssr) > +{ > + if (!GET_FIELD(SSR_XE, ssr)) { > + return; > + } What does SSR_XE indicate? > + > + uint8_t XA = GET_SSR_FIELD(SSR_XA, ssr); > + > + CPUState *cs; > + CPU_FOREACH(cs) { > + CPUHexagonState *env_ = cpu_env(cs); This underscore is confusing. Use a full name such as thread_env. > + if (env_ == env) { > + continue; > + } > + /* Check if another thread has the XE bit set and same XA */ > + uint32_t ssr_ = arch_get_system_reg(env_, HEX_SREG_SSR); Ditto > + if (GET_SSR_FIELD(SSR_XE2, ssr_) && GET_FIELD(SSR_XA, ssr_) == XA) { The comment says check the XE bit but the code checks XE2. Also, note the XE check on the current thread above. > + qemu_log_mask(LOG_GUEST_ERROR, > + "setting SSR.XA '%d' on thread %d but thread" > + " %d has same extension active\n", XA, env->threadId, > + env_->threadId); > + } > + } > +} > + > void hexagon_modify_ssr(CPUHexagonState *env, uint32_t new, uint32_t > old) { > - g_assert_not_reached(); > + g_assert(bql_locked()); > + > + bool old_EX = GET_SSR_FIELD(SSR_EX, old); > + bool old_UM = GET_SSR_FIELD(SSR_UM, old); > + bool old_GM = GET_SSR_FIELD(SSR_GM, old); > + bool old_IE = GET_SSR_FIELD(SSR_IE, old); > + uint8_t old_XA = GET_SSR_FIELD(SSR_XA, old); > + bool new_EX = GET_SSR_FIELD(SSR_EX, new); > + bool new_UM = GET_SSR_FIELD(SSR_UM, new); > + bool new_GM = GET_SSR_FIELD(SSR_GM, new); > + bool new_IE = GET_SSR_FIELD(SSR_IE, new); > + uint8_t new_XA = GET_SSR_FIELD(SSR_XA, new); > + > + if ((old_EX != new_EX) || > + (old_UM != new_UM) || > + (old_GM != new_GM)) { > + hex_mmu_mode_change(env); > + } > + > + uint8_t old_asid = GET_SSR_FIELD(SSR_ASID, old); > + uint8_t new_asid = GET_SSR_FIELD(SSR_ASID, new); > + if (new_asid != old_asid) { > + CPUState *cs = env_cpu(env); > + tlb_flush(cs); > + } > + > + if (old_XA != new_XA) { > + int old_unit = parse_context_idx(env, old_XA); > + int new_unit = parse_context_idx(env, new_XA); Check that old_unit != new_unit. Per the table above, different XA values can point to the same unit. For example, if cpu->hvx_contexts is 2, the XA=0 and XA=2 both point to context 0. > + > + /* Ownership exchange */ > + memcpy(VRegs[old_unit], env->VRegs, sizeof(env->VRegs)); > + memcpy(QRegs[old_unit], env->QRegs, sizeof(env->QRegs)); > + memcpy(env->VRegs, VRegs[new_unit], sizeof(env->VRegs)); > + memcpy(env->QRegs, QRegs[new_unit], sizeof(env->QRegs)); What does the hardware do? Does it clear the context, or is that the OS'es job? If the hardware leaves the context alone, the above should be 1) Copy env->{VQ}Regs to old_unit 2) Copy new_unit to env->{VQ}Regs Should you check SSR_EX before doing these copies? Do you need to do anything when SSR_EX changes? > + > + check_overcommitted_hvx(env, new); > + } > + > + /* See if the interrupts have been enabled or we have exited EX mode */ > + if ((new_IE && !old_IE) || > + (!new_EX && old_EX)) { > + hex_interrupt_update(env); > + } > } > > void clear_wait_mode(CPUHexagonState *env) > -- > 2.34.1
> -----Original Message----- > From: ltaylorsimpson@gmail.com <ltaylorsimpson@gmail.com> > Sent: Monday, March 17, 2025 12:37 PM > To: 'Brian Cain' <brian.cain@oss.qualcomm.com>; qemu-devel@nongnu.org > Cc: richard.henderson@linaro.org; philmd@linaro.org; Matheus Bernardino > (QUIC) <quic_mathbern@quicinc.com>; ale@rev.ng; anjo@rev.ng; Marco > Liebel (QUIC) <quic_mliebel@quicinc.com>; alex.bennee@linaro.org; Mark > Burton (QUIC) <quic_mburton@quicinc.com>; Sid Manning > <sidneym@quicinc.com>; Brian Cain <bcain@quicinc.com> > Subject: RE: [PATCH 05/39] target/hexagon: Implement modify SSR > > WARNING: This email originated from outside of Qualcomm. Please be wary > of any links or attachments, and do not enable macros. > > > -----Original Message----- > > From: Brian Cain <brian.cain@oss.qualcomm.com> > > Sent: Friday, February 28, 2025 11:28 PM > > To: qemu-devel@nongnu.org > > Cc: brian.cain@oss.qualcomm.com; richard.henderson@linaro.org; > > philmd@linaro.org; quic_mathbern@quicinc.com; ale@rev.ng; > anjo@rev.ng; > > quic_mliebel@quicinc.com; ltaylorsimpson@gmail.com; > > alex.bennee@linaro.org; quic_mburton@quicinc.com; > sidneym@quicinc.com; > > Brian Cain <bcain@quicinc.com> > > Subject: [PATCH 05/39] target/hexagon: Implement modify SSR > > > > From: Brian Cain <bcain@quicinc.com> > > > > The per-vCPU System Status Register controls many modal behaviors of > > the system architecture. When the SSR is updated, we trigger the > > necessary effects for interrupts, privilege/MMU, and HVX context > mapping. > > > > Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> > > --- > > target/hexagon/cpu_helper.c | 100 > > +++++++++++++++++++++++++++++++++++- > > 1 file changed, 99 insertions(+), 1 deletion(-) > > > > diff --git a/target/hexagon/cpu_helper.c b/target/hexagon/cpu_helper.c > > index e151c6335a..3e2364a7b0 100644 > > --- a/target/hexagon/cpu_helper.c > > +++ b/target/hexagon/cpu_helper.c > > @@ -14,6 +14,8 @@ > > #else > > #include "hw/boards.h" > > #include "hw/hexagon/hexagon.h" > > +#include "hex_interrupts.h" > > +#include "hex_mmu.h" > > #endif > > #include "exec/exec-all.h" > > #include "exec/cpu_ldst.h" > > @@ -69,9 +71,105 @@ void > > hexagon_set_sys_pcycle_count(CPUHexagonState *env, uint64_t cycles) > > g_assert_not_reached(); > > } > > > > +static MMVector VRegs[VECTOR_UNIT_MAX][NUM_VREGS]; > > +static MMQReg QRegs[VECTOR_UNIT_MAX][NUM_QREGS]; > > This won't scale for a system with multiple Hexagon instances. See > discussion on how to handle shared resources. I commented more below but yes I think we need to take a look at these and refactor because of what you point out and to avoid the copies we are doing when ownership changes. > > > + > > +/* > > + * EXT_CONTEXTS > > + * SSR.XA 2 4 6 8 > > + * 000 HVX Context 0 HVX Context 0 HVX Context 0 HVX Context 0 > > + * 001 HVX Context 1 HVX Context 1 HVX Context 1 HVX Context 1 > > + * 010 HVX Context 0 HVX Context 2 HVX Context 2 HVX Context 2 > > + * 011 HVX Context 1 HVX Context 3 HVX Context 3 HVX Context 3 > > + * 100 HVX Context 0 HVX Context 0 HVX Context 4 HVX Context 4 > > + * 101 HVX Context 1 HVX Context 1 HVX Context 5 HVX Context 5 > > + * 110 HVX Context 0 HVX Context 2 HVX Context 2 HVX Context 6 > > + * 111 HVX Context 1 HVX Context 3 HVX Context 3 HVX Context 7 > > + */ > > This is different from what the HVX PRM says. It only specifies what XA > values 4, 5, 6, 7 mean. [Sid Manning] The comment is the generic case when ssr:xe bit 2 isn't always 1. > > Here is what it says: > The HVX scalar core can contain any number of hardware threads greater or > equal to the number of vector contexts. The scalar hardware thread is > assignable to a vector context through per- thread SSR:XA programming, as > follows: > - SSR:XA = 4: HVX instructions use vector context 0. > - SSR:XA = 5: HVX instructions use vector context 1, if it is available. > - SSR:XA = 6: HVX instructions use vector context 2, if it is available. > - SSR:XA = 7: HVX instructions use vector context 3, if it is available. > > > +static int parse_context_idx(CPUHexagonState *env, uint8_t XA) { > > + int ret; > > + HexagonCPU *cpu = env_archcpu(env); > > You should assert that cpu->hvx_contexts is in { 2, 4, 6, 8 }. This will future > proof against changes to the hardware as well as protect against bad > command-line settings. [Sid Manning] Looking at virt_init or hexagon_common_init the value of hvx-contexts will come from the config table regardless of what the user specifies via the command line. I think we have to trust the config space entries or we will have to add specific asserts for every entry. > > > + if (cpu->hvx_contexts == 6 && XA >= 6) { > > + ret = XA - 6 + 2; > > + } else { > > + ret = XA % cpu->hvx_contexts; > > + } > > + g_assert(ret >= 0 && ret < VECTOR_UNIT_MAX); > > + return ret; > > +} > > + > > +static void check_overcommitted_hvx(CPUHexagonState *env, uint32_t > > ssr) > > +{ > > + if (!GET_FIELD(SSR_XE, ssr)) { > > + return; > > + } > > What does SSR_XE indicate? [Sid Manning] If SSR:XE isn't set this thread doesn't have the coproc enabled so discard additional checking. Any coproc insn issued when ssr:xe is 0 will trigger a, "Illegal execution of coprocessor instruction." error. > > > + > > + uint8_t XA = GET_SSR_FIELD(SSR_XA, ssr); > > + > > + CPUState *cs; > > + CPU_FOREACH(cs) { > > + CPUHexagonState *env_ = cpu_env(cs); > > This underscore is confusing. Use a full name such as thread_env. I see your point will update. Here and below. > > > + if (env_ == env) { > > + continue; > > + } > > + /* Check if another thread has the XE bit set and same XA */ > > + uint32_t ssr_ = arch_get_system_reg(env_, HEX_SREG_SSR); > > Ditto > > > + if (GET_SSR_FIELD(SSR_XE2, ssr_) && GET_FIELD(SSR_XA, ssr_) > > + == XA) { > > The comment says check the XE bit but the code checks XE2. Also, note the > XE check on the current thread above. This might be a type-o I think it should SSE_XE not XE2. > > > + qemu_log_mask(LOG_GUEST_ERROR, > > + "setting SSR.XA '%d' on thread %d but thread" > > + " %d has same extension active\n", XA, env->threadId, > > + env_->threadId); > > + } > > + } > > +} > > + > > void hexagon_modify_ssr(CPUHexagonState *env, uint32_t new, uint32_t > > old) { > > - g_assert_not_reached(); > > + g_assert(bql_locked()); > > + > > + bool old_EX = GET_SSR_FIELD(SSR_EX, old); > > + bool old_UM = GET_SSR_FIELD(SSR_UM, old); > > + bool old_GM = GET_SSR_FIELD(SSR_GM, old); > > + bool old_IE = GET_SSR_FIELD(SSR_IE, old); > > + uint8_t old_XA = GET_SSR_FIELD(SSR_XA, old); > > + bool new_EX = GET_SSR_FIELD(SSR_EX, new); > > + bool new_UM = GET_SSR_FIELD(SSR_UM, new); > > + bool new_GM = GET_SSR_FIELD(SSR_GM, new); > > + bool new_IE = GET_SSR_FIELD(SSR_IE, new); > > + uint8_t new_XA = GET_SSR_FIELD(SSR_XA, new); > > + > > + if ((old_EX != new_EX) || > > + (old_UM != new_UM) || > > + (old_GM != new_GM)) { > > + hex_mmu_mode_change(env); > > + } > > + > > + uint8_t old_asid = GET_SSR_FIELD(SSR_ASID, old); > > + uint8_t new_asid = GET_SSR_FIELD(SSR_ASID, new); > > + if (new_asid != old_asid) { > > + CPUState *cs = env_cpu(env); > > + tlb_flush(cs); > > + } > > + > > + if (old_XA != new_XA) { > > + int old_unit = parse_context_idx(env, old_XA); > > + int new_unit = parse_context_idx(env, new_XA); > > Check that old_unit != new_unit. Per the table above, different XA values > can point to the same unit. For example, if cpu->hvx_contexts is 2, the XA=0 > and XA=2 both point to context 0. > > > + > > + /* Ownership exchange */ > > + memcpy(VRegs[old_unit], env->VRegs, sizeof(env->VRegs)); > > + memcpy(QRegs[old_unit], env->QRegs, sizeof(env->QRegs)); > > + memcpy(env->VRegs, VRegs[new_unit], sizeof(env->VRegs)); > > + memcpy(env->QRegs, QRegs[new_unit], sizeof(env->QRegs)); > > What does the hardware do? Does it clear the context, or is that the OS'es > job? Nothing would keep a single htid from taking hvx unit 0, doing some hvx-ops , swapping to hvx unit 1 doing some more hvx-ops and so on. We are doing this because each thread has a private copy of the hvx register state. Since HVX units and threads are independent if one thread changes its ownership from HVX context 0 to HVX context 1 we have to do this copy. Instead of memcpy should create a new object that represents the HVX units available and change env->VRegs/QRegs to point to the one currently owned. Refactoring this will be an improvement. > > If the hardware leaves the context alone, the above should be > 1) Copy env->{VQ}Regs to old_unit > 2) Copy new_unit to env->{VQ}Regs > > Should you check SSR_EX before doing these copies? > > Do you need to do anything when SSR_EX changes? I think you mean SSR:XE before doing the copies. I think we have to do the copy here regardless of ssr:xe since a thread could swap ownership, update ssr:xa, without explicitly having ssr:xe set. Maybe the OS disables SSR:XE while changing hvx unit ownership? > > > + > > + check_overcommitted_hvx(env, new); > > + } > > + > > + /* See if the interrupts have been enabled or we have exited EX mode > */ > > + if ((new_IE && !old_IE) || > > + (!new_EX && old_EX)) { > > + hex_interrupt_update(env); > > + } > > } > > > > void clear_wait_mode(CPUHexagonState *env) > > -- > > 2.34.1 >
> -----Original Message----- > From: Sid Manning <sidneym@quicinc.com> > Sent: Tuesday, March 18, 2025 1:34 PM > To: ltaylorsimpson@gmail.com; 'Brian Cain' > <brian.cain@oss.qualcomm.com>; qemu-devel@nongnu.org > Cc: richard.henderson@linaro.org; philmd@linaro.org; Matheus Bernardino > (QUIC) <quic_mathbern@quicinc.com>; ale@rev.ng; anjo@rev.ng; Marco > Liebel (QUIC) <quic_mliebel@quicinc.com>; alex.bennee@linaro.org; Mark > Burton (QUIC) <quic_mburton@quicinc.com>; Brian Cain > <bcain@quicinc.com> > Subject: RE: [PATCH 05/39] target/hexagon: Implement modify SSR > > > > > -----Original Message----- > > From: ltaylorsimpson@gmail.com <ltaylorsimpson@gmail.com> > > Sent: Monday, March 17, 2025 12:37 PM > > To: 'Brian Cain' <brian.cain@oss.qualcomm.com>; qemu- > devel@nongnu.org > > Cc: richard.henderson@linaro.org; philmd@linaro.org; Matheus > > Bernardino > > (QUIC) <quic_mathbern@quicinc.com>; ale@rev.ng; anjo@rev.ng; Marco > > Liebel (QUIC) <quic_mliebel@quicinc.com>; alex.bennee@linaro.org; Mark > > Burton (QUIC) <quic_mburton@quicinc.com>; Sid Manning > > <sidneym@quicinc.com>; Brian Cain <bcain@quicinc.com> > > Subject: RE: [PATCH 05/39] target/hexagon: Implement modify SSR > > > > > -----Original Message----- > > > From: Brian Cain <brian.cain@oss.qualcomm.com> > > > Sent: Friday, February 28, 2025 11:28 PM > > > To: qemu-devel@nongnu.org > > > Cc: brian.cain@oss.qualcomm.com; richard.henderson@linaro.org; > > > philmd@linaro.org; quic_mathbern@quicinc.com; ale@rev.ng; > > anjo@rev.ng; > > > quic_mliebel@quicinc.com; ltaylorsimpson@gmail.com; > > > alex.bennee@linaro.org; quic_mburton@quicinc.com; > > sidneym@quicinc.com; > > > Brian Cain <bcain@quicinc.com> > > > Subject: [PATCH 05/39] target/hexagon: Implement modify SSR > > > > > > From: Brian Cain <bcain@quicinc.com> > > > > > > The per-vCPU System Status Register controls many modal behaviors of > > > the system architecture. When the SSR is updated, we trigger the > > > necessary effects for interrupts, privilege/MMU, and HVX context > > mapping. > > > > > > Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> > > > --- > > > > What does SSR_XE indicate? > [Sid Manning] > If SSR:XE isn't set this thread doesn't have the coproc enabled so discard > additional checking. Any coproc insn issued when ssr:xe is 0 will trigger a, > "Illegal execution of coprocessor instruction." error. > > > + if (old_XA != new_XA) { > > > + int old_unit = parse_context_idx(env, old_XA); > > > + int new_unit = parse_context_idx(env, new_XA); > > > > Check that old_unit != new_unit. Per the table above, different XA values > > can point to the same unit. For example, if cpu->hvx_contexts is 2, the > XA=0 > > and XA=2 both point to context 0. > > > > > + > > > + /* Ownership exchange */ > > > + memcpy(VRegs[old_unit], env->VRegs, sizeof(env->VRegs)); > > > + memcpy(QRegs[old_unit], env->QRegs, sizeof(env->QRegs)); > > > + memcpy(env->VRegs, VRegs[new_unit], sizeof(env->VRegs)); > > > + memcpy(env->QRegs, QRegs[new_unit], sizeof(env->QRegs)); > > > > What does the hardware do? Does it clear the context, or is that the OS'es > > job? > Nothing would keep a single htid from taking hvx unit 0, doing some hvx-ops > , swapping to hvx unit 1 doing some more hvx-ops and so on. We are doing > this because each thread has a private copy of the hvx register state. Since > HVX units and threads are independent if one thread changes its ownership > from HVX context 0 to HVX context 1 we have to do this copy. Instead of > memcpy should create a new object that represents the HVX units available > and change env->VRegs/QRegs to point to the one currently owned. > > Refactoring this will be an improvement. > > > > > > If the hardware leaves the context alone, the above should be > > 1) Copy env->{VQ}Regs to old_unit > > 2) Copy new_unit to env->{VQ}Regs > > > > Should you check SSR_EX before doing these copies? > > > > Do you need to do anything when SSR_EX changes? > > I think you mean SSR:XE before doing the copies. I think we have to do the > copy here regardless of ssr:xe since a thread could swap ownership, update > ssr:xa, without explicitly having ssr:xe set. Maybe the OS disables SSR:XE > while changing hvx unit ownership? Correct. I meant SSR:XE. Some refactoring is in order but need to understand the HW behavior more specifically. - What will the HW do if more than one thread claims ownership of the same HVX context? - What happens if a thread claims ownership without setting SSR:XE and then sets SSR:XE later? - What about this example? 1) Thread 0 claims context 1 and sets SSR:XE 2) Thread 0 does some HVX computation 3) Thread 0 is done with HVX for now so clears SSR:XE 4) Thread 1 claims context 1 and sets SSR:XE, does some work, then clears SSR:XE 5) Thread 0 wants to do more HVX, so it sets SSR:XE (still pointing to HVX context 1) We should keep the copies for the contexts and local copies inside CPUHexagonState. This makes TCG generation easier as well as having common code between system mode and linux-user mode. Also, since check_overcommitted_hvx only prints a log message, add an early exit if LOG_GUEST_ERROR is off. Thanks, Taylor
On 3/18/2025 2:14 PM, ltaylorsimpson@gmail.com wrote: > >> -----Original Message----- >> From: Sid Manning <sidneym@quicinc.com> >> Sent: Tuesday, March 18, 2025 1:34 PM >> To: ltaylorsimpson@gmail.com; 'Brian Cain' >> <brian.cain@oss.qualcomm.com>; qemu-devel@nongnu.org >> Cc: richard.henderson@linaro.org; philmd@linaro.org; Matheus Bernardino >> (QUIC) <quic_mathbern@quicinc.com>; ale@rev.ng; anjo@rev.ng; Marco >> Liebel (QUIC) <quic_mliebel@quicinc.com>; alex.bennee@linaro.org; Mark >> Burton (QUIC) <quic_mburton@quicinc.com>; Brian Cain >> <bcain@quicinc.com> >> Subject: RE: [PATCH 05/39] target/hexagon: Implement modify SSR >> >> >> >>> -----Original Message----- >>> From: ltaylorsimpson@gmail.com <ltaylorsimpson@gmail.com> >>> Sent: Monday, March 17, 2025 12:37 PM >>> To: 'Brian Cain' <brian.cain@oss.qualcomm.com>; qemu- >> devel@nongnu.org >>> Cc: richard.henderson@linaro.org; philmd@linaro.org; Matheus >>> Bernardino >>> (QUIC) <quic_mathbern@quicinc.com>; ale@rev.ng; anjo@rev.ng; Marco >>> Liebel (QUIC) <quic_mliebel@quicinc.com>; alex.bennee@linaro.org; Mark >>> Burton (QUIC) <quic_mburton@quicinc.com>; Sid Manning >>> <sidneym@quicinc.com>; Brian Cain <bcain@quicinc.com> >>> Subject: RE: [PATCH 05/39] target/hexagon: Implement modify SSR >>> >>>> -----Original Message----- >>>> From: Brian Cain <brian.cain@oss.qualcomm.com> >>>> Sent: Friday, February 28, 2025 11:28 PM >>>> To: qemu-devel@nongnu.org >>>> Cc: brian.cain@oss.qualcomm.com; richard.henderson@linaro.org; >>>> philmd@linaro.org; quic_mathbern@quicinc.com; ale@rev.ng; >>> anjo@rev.ng; >>>> quic_mliebel@quicinc.com; ltaylorsimpson@gmail.com; >>>> alex.bennee@linaro.org; quic_mburton@quicinc.com; >>> sidneym@quicinc.com; >>>> Brian Cain <bcain@quicinc.com> >>>> Subject: [PATCH 05/39] target/hexagon: Implement modify SSR >>>> >>>> From: Brian Cain <bcain@quicinc.com> >>>> >>>> The per-vCPU System Status Register controls many modal behaviors of >>>> the system architecture. When the SSR is updated, we trigger the >>>> necessary effects for interrupts, privilege/MMU, and HVX context >>> mapping. >>>> Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> >>>> --- >>> What does SSR_XE indicate? >> [Sid Manning] >> If SSR:XE isn't set this thread doesn't have the coproc enabled so discard >> additional checking. Any coproc insn issued when ssr:xe is 0 will trigger a, >> "Illegal execution of coprocessor instruction." error. > > >>>> + if (old_XA != new_XA) { >>>> + int old_unit = parse_context_idx(env, old_XA); >>>> + int new_unit = parse_context_idx(env, new_XA); >>> Check that old_unit != new_unit. Per the table above, different XA values >>> can point to the same unit. For example, if cpu->hvx_contexts is 2, the >> XA=0 >>> and XA=2 both point to context 0. >>> >>>> + >>>> + /* Ownership exchange */ >>>> + memcpy(VRegs[old_unit], env->VRegs, sizeof(env->VRegs)); >>>> + memcpy(QRegs[old_unit], env->QRegs, sizeof(env->QRegs)); >>>> + memcpy(env->VRegs, VRegs[new_unit], sizeof(env->VRegs)); >>>> + memcpy(env->QRegs, QRegs[new_unit], sizeof(env->QRegs)); >>> What does the hardware do? Does it clear the context, or is that the OS'es >>> job? >> Nothing would keep a single htid from taking hvx unit 0, doing some hvx-ops >> , swapping to hvx unit 1 doing some more hvx-ops and so on. We are doing >> this because each thread has a private copy of the hvx register state. Since >> HVX units and threads are independent if one thread changes its ownership >> from HVX context 0 to HVX context 1 we have to do this copy. Instead of >> memcpy should create a new object that represents the HVX units available >> and change env->VRegs/QRegs to point to the one currently owned. >> >> Refactoring this will be an improvement. >> >> >>> If the hardware leaves the context alone, the above should be >>> 1) Copy env->{VQ}Regs to old_unit >>> 2) Copy new_unit to env->{VQ}Regs >>> >>> Should you check SSR_EX before doing these copies? >>> >>> Do you need to do anything when SSR_EX changes? >> I think you mean SSR:XE before doing the copies. I think we have to do the >> copy here regardless of ssr:xe since a thread could swap ownership, update >> ssr:xa, without explicitly having ssr:xe set. Maybe the OS disables SSR:XE >> while changing hvx unit ownership? > Correct. I meant SSR:XE. > > Some refactoring is in order but need to understand the HW behavior more specifically. > - What will the HW do if more than one thread claims ownership of the same HVX context? > - What happens if a thread claims ownership without setting SSR:XE and then sets SSR:XE later? > - What about this example? > 1) Thread 0 claims context 1 and sets SSR:XE > 2) Thread 0 does some HVX computation > 3) Thread 0 is done with HVX for now so clears SSR:XE > 4) Thread 1 claims context 1 and sets SSR:XE, does some work, then clears SSR:XE > 5) Thread 0 wants to do more HVX, so it sets SSR:XE (still pointing to HVX context 1) > > We should keep the copies for the contexts and local copies inside CPUHexagonState. This makes TCG generation easier as well as having common code between system mode and linux-user mode. Good point that linux-user will need their own exclusive HVX context. But it doesn't sound right to me to have CPU state store both the system contexts *and* a local context for system emulation. Our current design under review is better than that IMO. Once we have some experience modeling the system registers as an independent QEMU Object, I think we'll be better prepared to model the HVX contexts similarly. Ideally we can remap these via something along the lines of "object_property_set_link()" when the contexts change, without requiring any copies. And ideally the existing TCG should work as-is, despite the remappable register files. "What happens when ... " - multiple threads picking the same context is almost certainly explicitly or implicitly undefined architectural behavior, so whatever QEMU does should be appropriate. However, we'll talk to the architecture team and get a definitive answer. > > Also, since check_overcommitted_hvx only prints a log message, add an early exit if LOG_GUEST_ERROR is off. > > Thanks, > Taylor > >
> -----Original Message----- > From: Brian Cain <brian.cain@oss.qualcomm.com> > Sent: Tuesday, March 18, 2025 6:47 PM > To: ltaylorsimpson@gmail.com; 'Sid Manning' <sidneym@quicinc.com>; > qemu-devel@nongnu.org > Cc: richard.henderson@linaro.org; philmd@linaro.org; 'Matheus Bernardino > (QUIC)' <quic_mathbern@quicinc.com>; ale@rev.ng; anjo@rev.ng; 'Marco > Liebel (QUIC)' <quic_mliebel@quicinc.com>; alex.bennee@linaro.org; 'Mark > Burton (QUIC)' <quic_mburton@quicinc.com>; 'Brian Cain' > <bcain@quicinc.com> > Subject: Re: [PATCH 05/39] target/hexagon: Implement modify SSR > > > On 3/18/2025 2:14 PM, ltaylorsimpson@gmail.com wrote: > > > >> -----Original Message----- > >> From: Sid Manning <sidneym@quicinc.com> > >> Sent: Tuesday, March 18, 2025 1:34 PM > >> To: ltaylorsimpson@gmail.com; 'Brian Cain' > >> <brian.cain@oss.qualcomm.com>; qemu-devel@nongnu.org > >> Cc: richard.henderson@linaro.org; philmd@linaro.org; Matheus > >> Bernardino > >> (QUIC) <quic_mathbern@quicinc.com>; ale@rev.ng; anjo@rev.ng; Marco > >> Liebel (QUIC) <quic_mliebel@quicinc.com>; alex.bennee@linaro.org; > >> Mark Burton (QUIC) <quic_mburton@quicinc.com>; Brian Cain > >> <bcain@quicinc.com> > >> Subject: RE: [PATCH 05/39] target/hexagon: Implement modify SSR > >> > >> > >> > >>> -----Original Message----- > >>> From: ltaylorsimpson@gmail.com <ltaylorsimpson@gmail.com> > >>> Sent: Monday, March 17, 2025 12:37 PM > >>> To: 'Brian Cain' <brian.cain@oss.qualcomm.com>; qemu- > >> devel@nongnu.org > >>> Cc: richard.henderson@linaro.org; philmd@linaro.org; Matheus > >>> Bernardino > >>> (QUIC) <quic_mathbern@quicinc.com>; ale@rev.ng; anjo@rev.ng; > Marco > >>> Liebel (QUIC) <quic_mliebel@quicinc.com>; alex.bennee@linaro.org; > >>> Mark Burton (QUIC) <quic_mburton@quicinc.com>; Sid Manning > >>> <sidneym@quicinc.com>; Brian Cain <bcain@quicinc.com> > >>> Subject: RE: [PATCH 05/39] target/hexagon: Implement modify SSR > >>> > >>>> -----Original Message----- > >>>> From: Brian Cain <brian.cain@oss.qualcomm.com> > >>>> Sent: Friday, February 28, 2025 11:28 PM > >>>> To: qemu-devel@nongnu.org > >>>> Cc: brian.cain@oss.qualcomm.com; richard.henderson@linaro.org; > >>>> philmd@linaro.org; quic_mathbern@quicinc.com; ale@rev.ng; > >>> anjo@rev.ng; > >>>> quic_mliebel@quicinc.com; ltaylorsimpson@gmail.com; > >>>> alex.bennee@linaro.org; quic_mburton@quicinc.com; > >>> sidneym@quicinc.com; > >>>> Brian Cain <bcain@quicinc.com> > >>>> Subject: [PATCH 05/39] target/hexagon: Implement modify SSR > >>>> > >>>> From: Brian Cain <bcain@quicinc.com> > >>>> > >>>> The per-vCPU System Status Register controls many modal behaviors > >>>> of the system architecture. When the SSR is updated, we trigger > >>>> the necessary effects for interrupts, privilege/MMU, and HVX > >>>> context > >>> mapping. > >>>> Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> > >>>> --- > >>> What does SSR_XE indicate? > >> [Sid Manning] > >> If SSR:XE isn't set this thread doesn't have the coproc enabled so > >> discard additional checking. Any coproc insn issued when ssr:xe is 0 > >> will trigger a, "Illegal execution of coprocessor instruction." error. > > > > > >>>> + if (old_XA != new_XA) { > >>>> + int old_unit = parse_context_idx(env, old_XA); > >>>> + int new_unit = parse_context_idx(env, new_XA); > >>> Check that old_unit != new_unit. Per the table above, different XA > >>> values can point to the same unit. For example, if > >>> cpu->hvx_contexts is 2, the > >> XA=0 > >>> and XA=2 both point to context 0. > >>> > >>>> + > >>>> + /* Ownership exchange */ > >>>> + memcpy(VRegs[old_unit], env->VRegs, sizeof(env->VRegs)); > >>>> + memcpy(QRegs[old_unit], env->QRegs, sizeof(env->QRegs)); > >>>> + memcpy(env->VRegs, VRegs[new_unit], sizeof(env->VRegs)); > >>>> + memcpy(env->QRegs, QRegs[new_unit], sizeof(env->QRegs)); > >>> What does the hardware do? Does it clear the context, or is that > >>> the OS'es job? > >> Nothing would keep a single htid from taking hvx unit 0, doing some hvx- > ops > >> , swapping to hvx unit 1 doing some more hvx-ops and so on. We are > doing > >> this because each thread has a private copy of the hvx register > >> state. Since HVX units and threads are independent if one thread > >> changes its ownership from HVX context 0 to HVX context 1 we have to > >> do this copy. Instead of memcpy should create a new object that > >> represents the HVX units available and change env->VRegs/QRegs to > point to the one currently owned. > >> > >> Refactoring this will be an improvement. > >> > >> > >>> If the hardware leaves the context alone, the above should be > >>> 1) Copy env->{VQ}Regs to old_unit > >>> 2) Copy new_unit to env->{VQ}Regs > >>> > >>> Should you check SSR_EX before doing these copies? > >>> > >>> Do you need to do anything when SSR_EX changes? > >> I think you mean SSR:XE before doing the copies. I think we have to > >> do the copy here regardless of ssr:xe since a thread could swap > >> ownership, update ssr:xa, without explicitly having ssr:xe set. > >> Maybe the OS disables SSR:XE while changing hvx unit ownership? > > Correct. I meant SSR:XE. > > > > Some refactoring is in order but need to understand the HW behavior more > specifically. > > - What will the HW do if more than one thread claims ownership of the > same HVX context? > > - What happens if a thread claims ownership without setting SSR:XE and > then sets SSR:XE later? > > - What about this example? > > 1) Thread 0 claims context 1 and sets SSR:XE > > 2) Thread 0 does some HVX computation > > 3) Thread 0 is done with HVX for now so clears SSR:XE > > 4) Thread 1 claims context 1 and sets SSR:XE, does some work, then > clears SSR:XE > > 5) Thread 0 wants to do more HVX, so it sets SSR:XE (still > > pointing to HVX context 1) > > > > We should keep the copies for the contexts and local copies inside > CPUHexagonState. This makes TCG generation easier as well as having > common code between system mode and linux-user mode. > > Good point that linux-user will need their own exclusive HVX context. But it > doesn't sound right to me to have CPU state store both the system contexts > *and* a local context for system emulation. Our current design under > review is better than that IMO. Once we have some experience modeling > the system registers as an independent QEMU Object, I think we'll be better > prepared to model the HVX contexts similarly. Ideally we can remap these > via something along the lines of "object_property_set_link()" when the > contexts change, without requiring any copies. And ideally the existing TCG > should work as-is, despite the remappable register files. > > "What happens when ... " - multiple threads picking the same context is > almost certainly explicitly or implicitly undefined architectural behavior, so > whatever QEMU does should be appropriate. However, we'll talk to the > architecture team and get a definitive answer. I caution against putting a level of indirection into CPUHexagonState for the HVX registers. The HVX TCG implementation relies on an offset from the start of CPUHexagonState to identify the operands. This would be a very significant rewrite if it's even possible. I don't know if TCG's gvec code generation can handle random pointers or a level of indirection. If the behavior is undefined, we can avoid the copies. Then we just need some bookkeeping to check if multiple threads try to claim the same context (if that behavior is defined). If copies are needed, we could keep the hardware contexts as shared a shared resource. Another alternative would be to track the current owner of a context and copy from the previous owner's {VQ}Regs to the new owners {VQ}Regs. Thanks, Taylor
On 3/19/25 09:39, ltaylorsimpson@gmail.com wrote: > I caution against putting a level of indirection into CPUHexagonState for the HVX registers. The HVX TCG implementation relies on an offset from the start of CPUHexagonState to identify the operands. This would be a very significant rewrite if it's even possible. I don't know if TCG's gvec code generation can handle random pointers or a level of indirection. Not yet, it can't, no. I've been extending it for random pointers because of Arm FEAT_SME2, wherein we have indirect addressing of matrix slices. So we wind up with a pointer like &env->zarray + (env->xregs[reg] + offset) % size > If the behavior is undefined, we can avoid the copies. Then we just need some bookkeeping to check if multiple threads try to claim the same context (if that behavior is defined). If copies are needed, we could keep the hardware contexts as shared a shared resource. Another alternative would be to track the current owner of a context and copy from the previous owner's {VQ}Regs to the new owners {VQ}Regs. Depending on how you answer these questions, I could split out the TCG work. But in the short term, copying context data might well be easier. r~
© 2016 - 2025 Red Hat, Inc.