We add vext_set_elems_1s to set agnostic elements to 1s in both big
and little endian situation.
In the function vext_set_elems_1s. We using esz argument to get the first
element to set. 'cnt' is just idx * esz.
Signed-off-by: Huang Tao <eric.huang@linux.alibaba.com>
---
target/riscv/vector_internals.c | 53 +++++++++++++++++++++++++++++++++
target/riscv/vector_internals.h | 2 ++
2 files changed, 55 insertions(+)
diff --git a/target/riscv/vector_internals.c b/target/riscv/vector_internals.c
index 349b24f4ae..455be96996 100644
--- a/target/riscv/vector_internals.c
+++ b/target/riscv/vector_internals.c
@@ -20,6 +20,59 @@
#include "vector_internals.h"
/* set agnostic elements to 1s */
+#if HOST_BIG_ENDIAN
+void vext_set_elems_1s(void *vd, uint32_t is_agnostic, uint32_t esz,
+ uint32_t idx, uint32_t tot)
+{
+ if (is_agnostic == 0) {
+ /* policy undisturbed */
+ return;
+ }
+ void *base = NULL;
+ switch (esz) {
+ case 1:
+ base = ((int8_t *)vd + H1(idx));
+ break;
+ case 2:
+ base = ((int16_t *)vd + H2(idx));
+ break;
+ case 4:
+ base = ((int32_t *)vd + H4(idx));
+ break;
+ case 8:
+ base = ((int64_t *)vd + H8(idx));
+ break;
+ default:
+ g_assert_not_reached();
+ break;
+ }
+ /*
+ * spilt the elements into 2 parts
+ * part_begin: the memory need to be set in the first uint64_t unit
+ * part_allign: the memory need to be set begins from next uint64_t
+ * unit and alligned to 8
+ */
+ uint32_t cnt = idx * esz;
+ int part_begin, part_allign;
+ part_begin = MIN(tot - cnt, 8 - (cnt % 8));
+ part_allign = ((tot - cnt - part_begin) / 8) * 8;
+
+ memset(base - part_begin + 1, -1, part_begin);
+ memset(QEMU_ALIGN_PTR_UP(base, 8), -1, part_allign);
+}
+#else
+void vext_set_elems_1s(void *vd, uint32_t is_agnostic, uint32_t esz,
+ uint32_t idx, uint32_t tot)
+{
+ if (is_agnostic == 0) {
+ /* policy undisturbed */
+ return;
+ }
+ uint32_t cnt = idx * esz;
+ memset(vd + cnt, -1, tot - cnt);
+}
+#endif
+
void vext_set_elems_1s_le(void *base, uint32_t is_agnostic, uint32_t cnt,
uint32_t tot)
{
diff --git a/target/riscv/vector_internals.h b/target/riscv/vector_internals.h
index fa599f60ca..c96e52f926 100644
--- a/target/riscv/vector_internals.h
+++ b/target/riscv/vector_internals.h
@@ -114,6 +114,8 @@ static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc,
}
/* set agnostic elements to 1s */
+void vext_set_elems_1s(void *vd, uint32_t is_agnostic, uint32_t esz,
+ uint32_t idx, uint32_t tot);
void vext_set_elems_1s_le(void *base, uint32_t is_agnostic, uint32_t cnt,
uint32_t tot);
--
2.41.0
(--- CCing Richard ---) On 3/6/24 06:20, Huang Tao wrote: > We add vext_set_elems_1s to set agnostic elements to 1s in both big > and little endian situation. > In the function vext_set_elems_1s. We using esz argument to get the first > element to set. 'cnt' is just idx * esz. > > Signed-off-by: Huang Tao <eric.huang@linux.alibaba.com> > --- > target/riscv/vector_internals.c | 53 +++++++++++++++++++++++++++++++++ > target/riscv/vector_internals.h | 2 ++ > 2 files changed, 55 insertions(+) > > diff --git a/target/riscv/vector_internals.c b/target/riscv/vector_internals.c > index 349b24f4ae..455be96996 100644 > --- a/target/riscv/vector_internals.c > +++ b/target/riscv/vector_internals.c > @@ -20,6 +20,59 @@ > #include "vector_internals.h" > > /* set agnostic elements to 1s */ > +#if HOST_BIG_ENDIAN > +void vext_set_elems_1s(void *vd, uint32_t is_agnostic, uint32_t esz, > + uint32_t idx, uint32_t tot) > +{ > + if (is_agnostic == 0) { > + /* policy undisturbed */ > + return; > + } > + void *base = NULL; > + switch (esz) { > + case 1: > + base = ((int8_t *)vd + H1(idx)); > + break; > + case 2: > + base = ((int16_t *)vd + H2(idx)); > + break; > + case 4: > + base = ((int32_t *)vd + H4(idx)); > + break; > + case 8: > + base = ((int64_t *)vd + H8(idx)); > + break; > + default: > + g_assert_not_reached(); > + break; > + } > + /* > + * spilt the elements into 2 parts > + * part_begin: the memory need to be set in the first uint64_t unit > + * part_allign: the memory need to be set begins from next uint64_t > + * unit and alligned to 8 > + */ > + uint32_t cnt = idx * esz; > + int part_begin, part_allign; > + part_begin = MIN(tot - cnt, 8 - (cnt % 8)); > + part_allign = ((tot - cnt - part_begin) / 8) * 8; > + > + memset(base - part_begin + 1, -1, part_begin); > + memset(QEMU_ALIGN_PTR_UP(base, 8), -1, part_allign); This seems correct but a bit over complicated at first glance. I wonder if we have something simpler already done somewhere. Richard, does ARM (or any other arch) do anything of the sort? Aside from more trivial byte swaps using bswap64() I didn't find anything similar. We recently posted a big endian related fix here: [PATCH for 9.0 v15 03/10] target/riscv/vector_helper.c: fix 'vmvr_v' memcpy endianess But not sure how to apply it here. Thanks, Daniel > +} > +#else > +void vext_set_elems_1s(void *vd, uint32_t is_agnostic, uint32_t esz, > + uint32_t idx, uint32_t tot) > +{ > + if (is_agnostic == 0) { > + /* policy undisturbed */ > + return; > + } > + uint32_t cnt = idx * esz; > + memset(vd + cnt, -1, tot - cnt); > +} > +#endif > + > void vext_set_elems_1s_le(void *base, uint32_t is_agnostic, uint32_t cnt, > uint32_t tot) > { > diff --git a/target/riscv/vector_internals.h b/target/riscv/vector_internals.h > index fa599f60ca..c96e52f926 100644 > --- a/target/riscv/vector_internals.h > +++ b/target/riscv/vector_internals.h > @@ -114,6 +114,8 @@ static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc, > } > > /* set agnostic elements to 1s */ > +void vext_set_elems_1s(void *vd, uint32_t is_agnostic, uint32_t esz, > + uint32_t idx, uint32_t tot); > void vext_set_elems_1s_le(void *base, uint32_t is_agnostic, uint32_t cnt, > uint32_t tot); >
On 3/19/24 11:57, Daniel Henrique Barboza wrote: > This seems correct but a bit over complicated at first glance. I wonder if we have > something simpler already done somewhere. > > Richard, does ARM (or any other arch) do anything of the sort? Aside from more trivial > byte swaps using bswap64() I didn't find anything similar. No, nothing quite like. > We recently posted a big endian related fix here: > > [PATCH for 9.0 v15 03/10] target/riscv/vector_helper.c: fix 'vmvr_v' memcpy endianess > > But not sure how to apply it here. It's almost exactly the same, only with memset instead of memcpy. if (HOST_BIG_ENDIAN && idx % 8 != 0) { uint32_t j = ROUND_UP(idx, 8); memset(vd + H(j - 1), -1, j - idx); idx = j; } memset(vd + idx, -1, tot - idx); I'll note that you don't need to change the api of vext_set_elems_1s -- so most of these patches are not required. r~
I will rewrite the patch, and send a new version soon. Thanks, Huang Tao On 2024/3/20 07:32, Richard Henderson wrote: > On 3/19/24 11:57, Daniel Henrique Barboza wrote: >> This seems correct but a bit over complicated at first glance. I >> wonder if we have >> something simpler already done somewhere. >> >> Richard, does ARM (or any other arch) do anything of the sort? Aside >> from more trivial >> byte swaps using bswap64() I didn't find anything similar. > > No, nothing quite like. > >> We recently posted a big endian related fix here: >> >> [PATCH for 9.0 v15 03/10] target/riscv/vector_helper.c: fix 'vmvr_v' >> memcpy endianess >> >> But not sure how to apply it here. > > It's almost exactly the same, only with memset instead of memcpy. > > if (HOST_BIG_ENDIAN && idx % 8 != 0) { > uint32_t j = ROUND_UP(idx, 8); > memset(vd + H(j - 1), -1, j - idx); > idx = j; > } > memset(vd + idx, -1, tot - idx); > > > I'll note that you don't need to change the api of vext_set_elems_1s > -- so most of these patches are not required. > > > r~
© 2016 - 2024 Red Hat, Inc.