The store width is needed for packet commit, so it is stored in
ctx->store_width. Currently, it is set when a store has a TCG
override instead of a QEMU helper. In the QEMU helper case, the
ctx->store_width is not set, we invoke a helper during packet commit
that uses the runtime store width.
This patch ensures ctx->store_width is set for all store instructions,
so performance is improved because packet commit can generate the proper
TCG store rather than the generic helper.
We do this by
- Use the attributes from the instructions during translation to
set ctx->store_width
- Remove setting of ctx->store_width from genptr.c
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
---
target/hexagon/macros.h | 8 ++++----
target/hexagon/genptr.c | 36 ++++++++++++------------------------
target/hexagon/translate.c | 26 ++++++++++++++++++++++++++
3 files changed, 42 insertions(+), 28 deletions(-)
diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h
index 92eb8bbf05..c8805bdaeb 100644
--- a/target/hexagon/macros.h
+++ b/target/hexagon/macros.h
@@ -156,7 +156,7 @@
__builtin_choose_expr(TYPE_TCGV(X), \
gen_store1, (void)0))
#define MEM_STORE1(VA, DATA, SLOT) \
- MEM_STORE1_FUNC(DATA)(cpu_env, VA, DATA, ctx, SLOT)
+ MEM_STORE1_FUNC(DATA)(cpu_env, VA, DATA, SLOT)
#define MEM_STORE2_FUNC(X) \
__builtin_choose_expr(TYPE_INT(X), \
@@ -164,7 +164,7 @@
__builtin_choose_expr(TYPE_TCGV(X), \
gen_store2, (void)0))
#define MEM_STORE2(VA, DATA, SLOT) \
- MEM_STORE2_FUNC(DATA)(cpu_env, VA, DATA, ctx, SLOT)
+ MEM_STORE2_FUNC(DATA)(cpu_env, VA, DATA, SLOT)
#define MEM_STORE4_FUNC(X) \
__builtin_choose_expr(TYPE_INT(X), \
@@ -172,7 +172,7 @@
__builtin_choose_expr(TYPE_TCGV(X), \
gen_store4, (void)0))
#define MEM_STORE4(VA, DATA, SLOT) \
- MEM_STORE4_FUNC(DATA)(cpu_env, VA, DATA, ctx, SLOT)
+ MEM_STORE4_FUNC(DATA)(cpu_env, VA, DATA, SLOT)
#define MEM_STORE8_FUNC(X) \
__builtin_choose_expr(TYPE_INT(X), \
@@ -180,7 +180,7 @@
__builtin_choose_expr(TYPE_TCGV_I64(X), \
gen_store8, (void)0))
#define MEM_STORE8(VA, DATA, SLOT) \
- MEM_STORE8_FUNC(DATA)(cpu_env, VA, DATA, ctx, SLOT)
+ MEM_STORE8_FUNC(DATA)(cpu_env, VA, DATA, SLOT)
#else
#define MEM_LOAD1s(VA) ((int8_t)mem_load1(env, slot, VA))
#define MEM_LOAD1u(VA) ((uint8_t)mem_load1(env, slot, VA))
diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
index 8a334ba07b..806d0974ff 100644
--- a/target/hexagon/genptr.c
+++ b/target/hexagon/genptr.c
@@ -401,62 +401,50 @@ static inline void gen_store32(TCGv vaddr, TCGv src, int width, int slot)
tcg_gen_mov_tl(hex_store_val32[slot], src);
}
-static inline void gen_store1(TCGv_env cpu_env, TCGv vaddr, TCGv src,
- DisasContext *ctx, int slot)
+static inline void gen_store1(TCGv_env cpu_env, TCGv vaddr, TCGv src, int slot)
{
gen_store32(vaddr, src, 1, slot);
- ctx->store_width[slot] = 1;
}
-static inline void gen_store1i(TCGv_env cpu_env, TCGv vaddr, int32_t src,
- DisasContext *ctx, int slot)
+static inline void gen_store1i(TCGv_env cpu_env, TCGv vaddr, int32_t src, int slot)
{
TCGv tmp = tcg_constant_tl(src);
- gen_store1(cpu_env, vaddr, tmp, ctx, slot);
+ gen_store1(cpu_env, vaddr, tmp, slot);
}
-static inline void gen_store2(TCGv_env cpu_env, TCGv vaddr, TCGv src,
- DisasContext *ctx, int slot)
+static inline void gen_store2(TCGv_env cpu_env, TCGv vaddr, TCGv src, int slot)
{
gen_store32(vaddr, src, 2, slot);
- ctx->store_width[slot] = 2;
}
-static inline void gen_store2i(TCGv_env cpu_env, TCGv vaddr, int32_t src,
- DisasContext *ctx, int slot)
+static inline void gen_store2i(TCGv_env cpu_env, TCGv vaddr, int32_t src, int slot)
{
TCGv tmp = tcg_constant_tl(src);
- gen_store2(cpu_env, vaddr, tmp, ctx, slot);
+ gen_store2(cpu_env, vaddr, tmp, slot);
}
-static inline void gen_store4(TCGv_env cpu_env, TCGv vaddr, TCGv src,
- DisasContext *ctx, int slot)
+static inline void gen_store4(TCGv_env cpu_env, TCGv vaddr, TCGv src, int slot)
{
gen_store32(vaddr, src, 4, slot);
- ctx->store_width[slot] = 4;
}
-static inline void gen_store4i(TCGv_env cpu_env, TCGv vaddr, int32_t src,
- DisasContext *ctx, int slot)
+static inline void gen_store4i(TCGv_env cpu_env, TCGv vaddr, int32_t src, int slot)
{
TCGv tmp = tcg_constant_tl(src);
- gen_store4(cpu_env, vaddr, tmp, ctx, slot);
+ gen_store4(cpu_env, vaddr, tmp, slot);
}
-static inline void gen_store8(TCGv_env cpu_env, TCGv vaddr, TCGv_i64 src,
- DisasContext *ctx, int slot)
+static inline void gen_store8(TCGv_env cpu_env, TCGv vaddr, TCGv_i64 src, int slot)
{
tcg_gen_mov_tl(hex_store_addr[slot], vaddr);
tcg_gen_movi_tl(hex_store_width[slot], 8);
tcg_gen_mov_i64(hex_store_val64[slot], src);
- ctx->store_width[slot] = 8;
}
-static inline void gen_store8i(TCGv_env cpu_env, TCGv vaddr, int64_t src,
- DisasContext *ctx, int slot)
+static inline void gen_store8i(TCGv_env cpu_env, TCGv vaddr, int64_t src, int slot)
{
TCGv_i64 tmp = tcg_constant_i64(src);
- gen_store8(cpu_env, vaddr, tmp, ctx, slot);
+ gen_store8(cpu_env, vaddr, tmp, slot);
}
static TCGv gen_8bitsof(TCGv result, TCGv value)
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index 0e8a0772f7..bc02870b9f 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -327,6 +327,31 @@ static void mark_implicit_pred_writes(DisasContext *ctx, Insn *insn)
mark_implicit_pred_write(ctx, insn, A_IMPLICIT_WRITES_P3, 3);
}
+static void mark_store_width(DisasContext *ctx, Insn *insn)
+{
+ uint16_t opcode = insn->opcode;
+ uint32_t slot = insn->slot;
+
+ if (GET_ATTRIB(opcode, A_STORE)) {
+ if (GET_ATTRIB(opcode, A_MEMSIZE_1B)) {
+ ctx->store_width[slot] = 1;
+ return;
+ }
+ if (GET_ATTRIB(opcode, A_MEMSIZE_2B)) {
+ ctx->store_width[slot] = 2;
+ return;
+ }
+ if (GET_ATTRIB(opcode, A_MEMSIZE_4B)) {
+ ctx->store_width[slot] = 4;
+ return;
+ }
+ if (GET_ATTRIB(opcode, A_MEMSIZE_8B)) {
+ ctx->store_width[slot] = 8;
+ return;
+ }
+ }
+}
+
static void gen_insn(CPUHexagonState *env, DisasContext *ctx,
Insn *insn, Packet *pkt)
{
@@ -334,6 +359,7 @@ static void gen_insn(CPUHexagonState *env, DisasContext *ctx,
mark_implicit_reg_writes(ctx, insn);
insn->generate(env, ctx, insn, pkt);
mark_implicit_pred_writes(ctx, insn);
+ mark_store_width(ctx, insn);
} else {
gen_exception_end_tb(ctx, HEX_EXCP_INVALID_OPCODE);
}
--
2.17.1
On 9/20/22 01:07, Taylor Simpson wrote:
> The store width is needed for packet commit, so it is stored in
> ctx->store_width. Currently, it is set when a store has a TCG
> override instead of a QEMU helper. In the QEMU helper case, the
> ctx->store_width is not set, we invoke a helper during packet commit
> that uses the runtime store width.
>
> This patch ensures ctx->store_width is set for all store instructions,
> so performance is improved because packet commit can generate the proper
> TCG store rather than the generic helper.
>
> We do this by
> - Use the attributes from the instructions during translation to
> set ctx->store_width
> - Remove setting of ctx->store_width from genptr.c
>
> Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
> ---
> target/hexagon/macros.h | 8 ++++----
> target/hexagon/genptr.c | 36 ++++++++++++------------------------
> target/hexagon/translate.c | 26 ++++++++++++++++++++++++++
> 3 files changed, 42 insertions(+), 28 deletions(-)
>
> diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h
> index 92eb8bbf05..c8805bdaeb 100644
> --- a/target/hexagon/macros.h
> +++ b/target/hexagon/macros.h
> @@ -156,7 +156,7 @@
> __builtin_choose_expr(TYPE_TCGV(X), \
> gen_store1, (void)0))
> #define MEM_STORE1(VA, DATA, SLOT) \
> - MEM_STORE1_FUNC(DATA)(cpu_env, VA, DATA, ctx, SLOT)
> + MEM_STORE1_FUNC(DATA)(cpu_env, VA, DATA, SLOT)
>
> #define MEM_STORE2_FUNC(X) \
> __builtin_choose_expr(TYPE_INT(X), \
> @@ -164,7 +164,7 @@
> __builtin_choose_expr(TYPE_TCGV(X), \
> gen_store2, (void)0))
> #define MEM_STORE2(VA, DATA, SLOT) \
> - MEM_STORE2_FUNC(DATA)(cpu_env, VA, DATA, ctx, SLOT)
> + MEM_STORE2_FUNC(DATA)(cpu_env, VA, DATA, SLOT)
>
> #define MEM_STORE4_FUNC(X) \
> __builtin_choose_expr(TYPE_INT(X), \
> @@ -172,7 +172,7 @@
> __builtin_choose_expr(TYPE_TCGV(X), \
> gen_store4, (void)0))
> #define MEM_STORE4(VA, DATA, SLOT) \
> - MEM_STORE4_FUNC(DATA)(cpu_env, VA, DATA, ctx, SLOT)
> + MEM_STORE4_FUNC(DATA)(cpu_env, VA, DATA, SLOT)
>
> #define MEM_STORE8_FUNC(X) \
> __builtin_choose_expr(TYPE_INT(X), \
> @@ -180,7 +180,7 @@
> __builtin_choose_expr(TYPE_TCGV_I64(X), \
> gen_store8, (void)0))
> #define MEM_STORE8(VA, DATA, SLOT) \
> - MEM_STORE8_FUNC(DATA)(cpu_env, VA, DATA, ctx, SLOT)
> + MEM_STORE8_FUNC(DATA)(cpu_env, VA, DATA, SLOT)
> #else
> #define MEM_LOAD1s(VA) ((int8_t)mem_load1(env, slot, VA))
> #define MEM_LOAD1u(VA) ((uint8_t)mem_load1(env, slot, VA))
> diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
> index 8a334ba07b..806d0974ff 100644
> --- a/target/hexagon/genptr.c
> +++ b/target/hexagon/genptr.c
> @@ -401,62 +401,50 @@ static inline void gen_store32(TCGv vaddr, TCGv src, int width, int slot)
> tcg_gen_mov_tl(hex_store_val32[slot], src);
> }
>
> -static inline void gen_store1(TCGv_env cpu_env, TCGv vaddr, TCGv src,
> - DisasContext *ctx, int slot)
> +static inline void gen_store1(TCGv_env cpu_env, TCGv vaddr, TCGv src, int slot)
> {
> gen_store32(vaddr, src, 1, slot);
> - ctx->store_width[slot] = 1;
> }
>
> -static inline void gen_store1i(TCGv_env cpu_env, TCGv vaddr, int32_t src,
> - DisasContext *ctx, int slot)
> +static inline void gen_store1i(TCGv_env cpu_env, TCGv vaddr, int32_t src, int slot)
> {
> TCGv tmp = tcg_constant_tl(src);
> - gen_store1(cpu_env, vaddr, tmp, ctx, slot);
> + gen_store1(cpu_env, vaddr, tmp, slot);
> }
>
> -static inline void gen_store2(TCGv_env cpu_env, TCGv vaddr, TCGv src,
> - DisasContext *ctx, int slot)
> +static inline void gen_store2(TCGv_env cpu_env, TCGv vaddr, TCGv src, int slot)
> {
> gen_store32(vaddr, src, 2, slot);
> - ctx->store_width[slot] = 2;
> }
>
> -static inline void gen_store2i(TCGv_env cpu_env, TCGv vaddr, int32_t src,
> - DisasContext *ctx, int slot)
> +static inline void gen_store2i(TCGv_env cpu_env, TCGv vaddr, int32_t src, int slot)
> {
> TCGv tmp = tcg_constant_tl(src);
> - gen_store2(cpu_env, vaddr, tmp, ctx, slot);
> + gen_store2(cpu_env, vaddr, tmp, slot);
> }
>
> -static inline void gen_store4(TCGv_env cpu_env, TCGv vaddr, TCGv src,
> - DisasContext *ctx, int slot)
> +static inline void gen_store4(TCGv_env cpu_env, TCGv vaddr, TCGv src, int slot)
> {
> gen_store32(vaddr, src, 4, slot);
> - ctx->store_width[slot] = 4;
> }
>
> -static inline void gen_store4i(TCGv_env cpu_env, TCGv vaddr, int32_t src,
> - DisasContext *ctx, int slot)
> +static inline void gen_store4i(TCGv_env cpu_env, TCGv vaddr, int32_t src, int slot)
> {
> TCGv tmp = tcg_constant_tl(src);
> - gen_store4(cpu_env, vaddr, tmp, ctx, slot);
> + gen_store4(cpu_env, vaddr, tmp, slot);
> }
>
> -static inline void gen_store8(TCGv_env cpu_env, TCGv vaddr, TCGv_i64 src,
> - DisasContext *ctx, int slot)
> +static inline void gen_store8(TCGv_env cpu_env, TCGv vaddr, TCGv_i64 src, int slot)
> {
> tcg_gen_mov_tl(hex_store_addr[slot], vaddr);
> tcg_gen_movi_tl(hex_store_width[slot], 8);
> tcg_gen_mov_i64(hex_store_val64[slot], src);
> - ctx->store_width[slot] = 8;
> }
>
> -static inline void gen_store8i(TCGv_env cpu_env, TCGv vaddr, int64_t src,
> - DisasContext *ctx, int slot)
> +static inline void gen_store8i(TCGv_env cpu_env, TCGv vaddr, int64_t src, int slot)
> {
> TCGv_i64 tmp = tcg_constant_i64(src);
> - gen_store8(cpu_env, vaddr, tmp, ctx, slot);
> + gen_store8(cpu_env, vaddr, tmp, slot);
> }
>
> static TCGv gen_8bitsof(TCGv result, TCGv value)
> diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
> index 0e8a0772f7..bc02870b9f 100644
> --- a/target/hexagon/translate.c
> +++ b/target/hexagon/translate.c
> @@ -327,6 +327,31 @@ static void mark_implicit_pred_writes(DisasContext *ctx, Insn *insn)
> mark_implicit_pred_write(ctx, insn, A_IMPLICIT_WRITES_P3, 3);
> }
>
> +static void mark_store_width(DisasContext *ctx, Insn *insn)
> +{
> + uint16_t opcode = insn->opcode;
> + uint32_t slot = insn->slot;
> +
> + if (GET_ATTRIB(opcode, A_STORE)) {
> + if (GET_ATTRIB(opcode, A_MEMSIZE_1B)) {
> + ctx->store_width[slot] = 1;
> + return;
> + }
> + if (GET_ATTRIB(opcode, A_MEMSIZE_2B)) {
> + ctx->store_width[slot] = 2;
> + return;
> + }
> + if (GET_ATTRIB(opcode, A_MEMSIZE_4B)) {
> + ctx->store_width[slot] = 4;
> + return;
> + }
> + if (GET_ATTRIB(opcode, A_MEMSIZE_8B)) {
> + ctx->store_width[slot] = 8;
> + return;
> + }
Hmm. Perhaps
int size = 0;
if (GET_ATTRIB(opcode, A_MEMSIZE_1B)) {
size |= 1;
}
...
tcg_debug_assert(is_power_of_2(size));
ctx->store_width[slot] = size;
just to make sure you get exactly one of the above cases.
Otherwise, LGTM,
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
r~
© 2016 - 2026 Red Hat, Inc.