On 11/20/24 19:49, Anton Johansson wrote:
> This commit adds a gvec function for copying data from constant array
> given in C to a gvec intptr_t. For each element, a host store of
> each constant is performed, this is not ideal and will inflate TBs for
> large vectors.
>
> Moreover, data will be copied during each run of the generated code
> impacting performance. A more suitable solution might store constant
> vectors separately, this can be handled either on the QEMU or
> helper-to-tcg side.
>
> Signed-off-by: Anton Johansson <anjo@rev.ng>
This is invalid because generic code does not know how to index elements within the target
vector, which this is doing with its per-element copy.
The code in target/arch/ knows the element ordering (though I suspect you have not taught
llvm), and could arrange for the data to be put in the correct byte order, which could
then be copied into place using plain host vector operations. I won't attempt to riff on
what such an interface would look like exactly, but I imagine that something sensible
could be constructed with only a little effort.
r~
> ---
> include/tcg/tcg-op-gvec-common.h | 2 ++
> tcg/tcg-op-gvec.c | 30 ++++++++++++++++++++++++++++++
> 2 files changed, 32 insertions(+)
>
> diff --git a/include/tcg/tcg-op-gvec-common.h b/include/tcg/tcg-op-gvec-common.h
> index 39b0c2f64e..409a56c633 100644
> --- a/include/tcg/tcg-op-gvec-common.h
> +++ b/include/tcg/tcg-op-gvec-common.h
> @@ -331,6 +331,8 @@ void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs,
> uint32_t s, uint32_t m);
> void tcg_gen_gvec_dup_imm(unsigned vece, uint32_t dofs, uint32_t s,
> uint32_t m, uint64_t imm);
> +void tcg_gen_gvec_constant(unsigned vece, TCGv_env env, uint32_t dofs,
> + void *arr, uint32_t maxsz);
> void tcg_gen_gvec_dup_i32(unsigned vece, uint32_t dofs, uint32_t s,
> uint32_t m, TCGv_i32);
> void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs, uint32_t s,
> diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
> index 80649dc0d2..71b6875129 100644
> --- a/tcg/tcg-op-gvec.c
> +++ b/tcg/tcg-op-gvec.c
> @@ -1835,6 +1835,36 @@ void tcg_gen_gvec_dup_imm(unsigned vece, uint32_t dofs, uint32_t oprsz,
> do_dup(vece, dofs, oprsz, maxsz, NULL, NULL, x);
> }
>
> +
> +void tcg_gen_gvec_constant(unsigned vece, TCGv_env env, uint32_t dofs,
> + void *arr, uint32_t maxsz)
> +{
> + uint32_t elsz = memop_size(vece);
> + for (uint32_t i = 0; i < maxsz/elsz; ++i)
> + {
> + uint32_t off = i*elsz;
> + uint8_t *elptr = (uint8_t *)arr + off;
> + switch (vece) {
> + case MO_8:
> + tcg_gen_st8_i32(tcg_constant_i32(*elptr),
> + env, dofs + off);
> + break;
> + case MO_16:
> + tcg_gen_st16_i32(tcg_constant_i32(*(uint16_t *) elptr),
> + env, dofs + off);
> + break;
> + case MO_32:
> + tcg_gen_st_i32(tcg_constant_i32(*(uint32_t *) elptr),
> + env, dofs + off);
> + break;
> + case MO_64:
> + tcg_gen_st_i64(tcg_constant_i64(*(uint64_t *) elptr),
> + env, dofs + off);
> + break;
> + }
> + }
> +}
> +
> void tcg_gen_gvec_not(unsigned vece, uint32_t dofs, uint32_t aofs,
> uint32_t oprsz, uint32_t maxsz)
> {