We add vext_set_elems_1s to set agnostic elements to 1s in both big
and little endian situation.
In the function vext_set_elems_1s. We using esz argument to get the first
element to set. 'cnt' is just idx * esz.
Signed-off-by: Huang Tao <eric.huang@linux.alibaba.com>
---
target/riscv/vector_internals.c | 53 +++++++++++++++++++++++++++++++++
target/riscv/vector_internals.h | 2 ++
2 files changed, 55 insertions(+)
diff --git a/target/riscv/vector_internals.c b/target/riscv/vector_internals.c
index 349b24f4ae..455be96996 100644
--- a/target/riscv/vector_internals.c
+++ b/target/riscv/vector_internals.c
@@ -20,6 +20,59 @@
#include "vector_internals.h"
/* set agnostic elements to 1s */
+#if HOST_BIG_ENDIAN
+void vext_set_elems_1s(void *vd, uint32_t is_agnostic, uint32_t esz,
+ uint32_t idx, uint32_t tot)
+{
+ if (is_agnostic == 0) {
+ /* policy undisturbed */
+ return;
+ }
+ void *base = NULL;
+ switch (esz) {
+ case 1:
+ base = ((int8_t *)vd + H1(idx));
+ break;
+ case 2:
+ base = ((int16_t *)vd + H2(idx));
+ break;
+ case 4:
+ base = ((int32_t *)vd + H4(idx));
+ break;
+ case 8:
+ base = ((int64_t *)vd + H8(idx));
+ break;
+ default:
+ g_assert_not_reached();
+ break;
+ }
+ /*
+ * spilt the elements into 2 parts
+ * part_begin: the memory need to be set in the first uint64_t unit
+ * part_allign: the memory need to be set begins from next uint64_t
+ * unit and alligned to 8
+ */
+ uint32_t cnt = idx * esz;
+ int part_begin, part_allign;
+ part_begin = MIN(tot - cnt, 8 - (cnt % 8));
+ part_allign = ((tot - cnt - part_begin) / 8) * 8;
+
+ memset(base - part_begin + 1, -1, part_begin);
+ memset(QEMU_ALIGN_PTR_UP(base, 8), -1, part_allign);
+}
+#else
+void vext_set_elems_1s(void *vd, uint32_t is_agnostic, uint32_t esz,
+ uint32_t idx, uint32_t tot)
+{
+ if (is_agnostic == 0) {
+ /* policy undisturbed */
+ return;
+ }
+ uint32_t cnt = idx * esz;
+ memset(vd + cnt, -1, tot - cnt);
+}
+#endif
+
void vext_set_elems_1s_le(void *base, uint32_t is_agnostic, uint32_t cnt,
uint32_t tot)
{
diff --git a/target/riscv/vector_internals.h b/target/riscv/vector_internals.h
index fa599f60ca..c96e52f926 100644
--- a/target/riscv/vector_internals.h
+++ b/target/riscv/vector_internals.h
@@ -114,6 +114,8 @@ static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc,
}
/* set agnostic elements to 1s */
+void vext_set_elems_1s(void *vd, uint32_t is_agnostic, uint32_t esz,
+ uint32_t idx, uint32_t tot);
void vext_set_elems_1s_le(void *base, uint32_t is_agnostic, uint32_t cnt,
uint32_t tot);
--
2.41.0
(--- CCing Richard ---)
On 3/6/24 06:20, Huang Tao wrote:
> We add vext_set_elems_1s to set agnostic elements to 1s in both big
> and little endian situation.
> In the function vext_set_elems_1s. We using esz argument to get the first
> element to set. 'cnt' is just idx * esz.
>
> Signed-off-by: Huang Tao <eric.huang@linux.alibaba.com>
> ---
> target/riscv/vector_internals.c | 53 +++++++++++++++++++++++++++++++++
> target/riscv/vector_internals.h | 2 ++
> 2 files changed, 55 insertions(+)
>
> diff --git a/target/riscv/vector_internals.c b/target/riscv/vector_internals.c
> index 349b24f4ae..455be96996 100644
> --- a/target/riscv/vector_internals.c
> +++ b/target/riscv/vector_internals.c
> @@ -20,6 +20,59 @@
> #include "vector_internals.h"
>
> /* set agnostic elements to 1s */
> +#if HOST_BIG_ENDIAN
> +void vext_set_elems_1s(void *vd, uint32_t is_agnostic, uint32_t esz,
> + uint32_t idx, uint32_t tot)
> +{
> + if (is_agnostic == 0) {
> + /* policy undisturbed */
> + return;
> + }
> + void *base = NULL;
> + switch (esz) {
> + case 1:
> + base = ((int8_t *)vd + H1(idx));
> + break;
> + case 2:
> + base = ((int16_t *)vd + H2(idx));
> + break;
> + case 4:
> + base = ((int32_t *)vd + H4(idx));
> + break;
> + case 8:
> + base = ((int64_t *)vd + H8(idx));
> + break;
> + default:
> + g_assert_not_reached();
> + break;
> + }
> + /*
> + * spilt the elements into 2 parts
> + * part_begin: the memory need to be set in the first uint64_t unit
> + * part_allign: the memory need to be set begins from next uint64_t
> + * unit and alligned to 8
> + */
> + uint32_t cnt = idx * esz;
> + int part_begin, part_allign;
> + part_begin = MIN(tot - cnt, 8 - (cnt % 8));
> + part_allign = ((tot - cnt - part_begin) / 8) * 8;
> +
> + memset(base - part_begin + 1, -1, part_begin);
> + memset(QEMU_ALIGN_PTR_UP(base, 8), -1, part_allign);
This seems correct but a bit over complicated at first glance. I wonder if we have
something simpler already done somewhere.
Richard, does ARM (or any other arch) do anything of the sort? Aside from more trivial
byte swaps using bswap64() I didn't find anything similar.
We recently posted a big endian related fix here:
[PATCH for 9.0 v15 03/10] target/riscv/vector_helper.c: fix 'vmvr_v' memcpy endianess
But not sure how to apply it here.
Thanks,
Daniel
> +}
> +#else
> +void vext_set_elems_1s(void *vd, uint32_t is_agnostic, uint32_t esz,
> + uint32_t idx, uint32_t tot)
> +{
> + if (is_agnostic == 0) {
> + /* policy undisturbed */
> + return;
> + }
> + uint32_t cnt = idx * esz;
> + memset(vd + cnt, -1, tot - cnt);
> +}
> +#endif
> +
> void vext_set_elems_1s_le(void *base, uint32_t is_agnostic, uint32_t cnt,
> uint32_t tot)
> {
> diff --git a/target/riscv/vector_internals.h b/target/riscv/vector_internals.h
> index fa599f60ca..c96e52f926 100644
> --- a/target/riscv/vector_internals.h
> +++ b/target/riscv/vector_internals.h
> @@ -114,6 +114,8 @@ static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc,
> }
>
> /* set agnostic elements to 1s */
> +void vext_set_elems_1s(void *vd, uint32_t is_agnostic, uint32_t esz,
> + uint32_t idx, uint32_t tot);
> void vext_set_elems_1s_le(void *base, uint32_t is_agnostic, uint32_t cnt,
> uint32_t tot);
>
On 3/19/24 11:57, Daniel Henrique Barboza wrote:
> This seems correct but a bit over complicated at first glance. I wonder if we have
> something simpler already done somewhere.
>
> Richard, does ARM (or any other arch) do anything of the sort? Aside from more trivial
> byte swaps using bswap64() I didn't find anything similar.
No, nothing quite like.
> We recently posted a big endian related fix here:
>
> [PATCH for 9.0 v15 03/10] target/riscv/vector_helper.c: fix 'vmvr_v' memcpy endianess
>
> But not sure how to apply it here.
It's almost exactly the same, only with memset instead of memcpy.
if (HOST_BIG_ENDIAN && idx % 8 != 0) {
uint32_t j = ROUND_UP(idx, 8);
memset(vd + H(j - 1), -1, j - idx);
idx = j;
}
memset(vd + idx, -1, tot - idx);
I'll note that you don't need to change the api of vext_set_elems_1s -- so most of these
patches are not required.
r~
I will rewrite the patch, and send a new version soon.
Thanks,
Huang Tao
On 2024/3/20 07:32, Richard Henderson wrote:
> On 3/19/24 11:57, Daniel Henrique Barboza wrote:
>> This seems correct but a bit over complicated at first glance. I
>> wonder if we have
>> something simpler already done somewhere.
>>
>> Richard, does ARM (or any other arch) do anything of the sort? Aside
>> from more trivial
>> byte swaps using bswap64() I didn't find anything similar.
>
> No, nothing quite like.
>
>> We recently posted a big endian related fix here:
>>
>> [PATCH for 9.0 v15 03/10] target/riscv/vector_helper.c: fix 'vmvr_v'
>> memcpy endianess
>>
>> But not sure how to apply it here.
>
> It's almost exactly the same, only with memset instead of memcpy.
>
> if (HOST_BIG_ENDIAN && idx % 8 != 0) {
> uint32_t j = ROUND_UP(idx, 8);
> memset(vd + H(j - 1), -1, j - idx);
> idx = j;
> }
> memset(vd + idx, -1, tot - idx);
>
>
> I'll note that you don't need to change the api of vext_set_elems_1s
> -- so most of these patches are not required.
>
>
> r~
© 2016 - 2026 Red Hat, Inc.