target/s390x/translate_vx.inc.c | 38 ++++++--------------------------- 1 file changed, 6 insertions(+), 32 deletions(-)
This replaces the target-specific implementations for VSEL.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/s390x/translate_vx.inc.c | 38 ++++++---------------------------
1 file changed, 6 insertions(+), 32 deletions(-)
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index 7e0bfcb190..a8603cbfd6 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -233,6 +233,9 @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
#define gen_gvec_fn_3(fn, es, v1, v2, v3) \
tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
vec_full_reg_offset(v3), 16, 16)
+#define gen_gvec_fn_4(fn, es, v1, v2, v3, v4) \
+ tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
+ vec_full_reg_offset(v3), vec_full_reg_offset(v4), 16, 16)
/*
* Helper to carry out a 128 bit vector computation using 2 i64 values per
@@ -903,40 +906,11 @@ static DisasJumpType op_vsce(DisasContext *s, DisasOps *o)
return DISAS_NEXT;
}
-static void gen_sel_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 c)
-{
- TCGv_i64 t = tcg_temp_new_i64();
-
- /* bit in c not set -> copy bit from b */
- tcg_gen_andc_i64(t, b, c);
- /* bit in c set -> copy bit from a */
- tcg_gen_and_i64(d, a, c);
- /* merge the results */
- tcg_gen_or_i64(d, d, t);
- tcg_temp_free_i64(t);
-}
-
-static void gen_sel_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b,
- TCGv_vec c)
-{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
-
- tcg_gen_andc_vec(vece, t, b, c);
- tcg_gen_and_vec(vece, d, a, c);
- tcg_gen_or_vec(vece, d, d, t);
- tcg_temp_free_vec(t);
-}
-
static DisasJumpType op_vsel(DisasContext *s, DisasOps *o)
{
- static const GVecGen4 gvec_op = {
- .fni8 = gen_sel_i64,
- .fniv = gen_sel_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- };
-
- gen_gvec_4(get_field(s->fields, v1), get_field(s->fields, v2),
- get_field(s->fields, v3), get_field(s->fields, v4), &gvec_op);
+ gen_gvec_fn_4(bitsel, ES_8, get_field(s->fields, v1),
+ get_field(s->fields, v4), get_field(s->fields, v2),
+ get_field(s->fields, v3));
return DISAS_NEXT;
}
--
2.17.1
On 03.06.19 18:57, Richard Henderson wrote: > This replaces the target-specific implementations for VSEL. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/s390x/translate_vx.inc.c | 38 ++++++--------------------------- > 1 file changed, 6 insertions(+), 32 deletions(-) > > diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c > index 7e0bfcb190..a8603cbfd6 100644 > --- a/target/s390x/translate_vx.inc.c > +++ b/target/s390x/translate_vx.inc.c > @@ -233,6 +233,9 @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr, > #define gen_gvec_fn_3(fn, es, v1, v2, v3) \ > tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ > vec_full_reg_offset(v3), 16, 16) > +#define gen_gvec_fn_4(fn, es, v1, v2, v3, v4) \ > + tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ > + vec_full_reg_offset(v3), vec_full_reg_offset(v4), 16, 16) > > /* > * Helper to carry out a 128 bit vector computation using 2 i64 values per > @@ -903,40 +906,11 @@ static DisasJumpType op_vsce(DisasContext *s, DisasOps *o) > return DISAS_NEXT; > } > > -static void gen_sel_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 c) > -{ > - TCGv_i64 t = tcg_temp_new_i64(); > - > - /* bit in c not set -> copy bit from b */ > - tcg_gen_andc_i64(t, b, c); > - /* bit in c set -> copy bit from a */ > - tcg_gen_and_i64(d, a, c); > - /* merge the results */ > - tcg_gen_or_i64(d, d, t); > - tcg_temp_free_i64(t); > -} > - > -static void gen_sel_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b, > - TCGv_vec c) > -{ > - TCGv_vec t = tcg_temp_new_vec_matching(d); > - > - tcg_gen_andc_vec(vece, t, b, c); > - tcg_gen_and_vec(vece, d, a, c); > - tcg_gen_or_vec(vece, d, d, t); > - tcg_temp_free_vec(t); > -} > - Comparing against tcg_gen_bitsel_i64() 1. a and c are switched 2. b is _not_ switched (and() and andc() are switched) Shouldn't this be gen_gvec_fn_4(bitsel, ES_8, get_field(s->fields, v1), get_field(s->fields, v4), get_field(s->fields, v3), get_field(s->fields, v2)); ? Maybe I am missing something. It was a long day :) Should I send this patch with the next s390x/tcg pull request? -- Thanks, David / dhildenb
On 6/3/19 2:41 PM, David Hildenbrand wrote: >> -static void gen_sel_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b, >> - TCGv_vec c) >> -{ >> - TCGv_vec t = tcg_temp_new_vec_matching(d); >> - >> - tcg_gen_andc_vec(vece, t, b, c); >> - tcg_gen_and_vec(vece, d, a, c); >> - tcg_gen_or_vec(vece, d, d, t); >> - tcg_temp_free_vec(t); >> -} >> - > > Comparing against tcg_gen_bitsel_i64() > > 1. a and c are switched > 2. b is _not_ switched (and() and andc() are switched) Not quite. {a,b,c} from your s390 implementation becomes {c,a,b} for tcg. Running tests would show for sure; I guess you have those later in your vx patch set? > Should I send this patch with the next s390x/tcg pull request? Yes please. r~
On 03.06.19 23:59, Richard Henderson wrote: > On 6/3/19 2:41 PM, David Hildenbrand wrote: >>> -static void gen_sel_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b, >>> - TCGv_vec c) >>> -{ >>> - TCGv_vec t = tcg_temp_new_vec_matching(d); >>> - >>> - tcg_gen_andc_vec(vece, t, b, c); >>> - tcg_gen_and_vec(vece, d, a, c); >>> - tcg_gen_or_vec(vece, d, d, t); >>> - tcg_temp_free_vec(t); >>> -} >>> - >> >> Comparing against tcg_gen_bitsel_i64() >> >> 1. a and c are switched >> 2. b is _not_ switched (and() and andc() are switched) > > Not quite. {a,b,c} from your s390 implementation becomes {c,a,b} for tcg. > > Running tests would show for sure; I guess you have those later in your vx > patch set? I only have a small selection of tests for now 84fa6254e4 tests/tcg: target/s390x: Test VECTOR ADD * 9e9a9e5246 tests/tcg: target/s390x: Test VECTOR UNPACK * 9d2b7184c6 tests/tcg: target/s390x: Test VECTOR PACK * 4872c1b6bd tests/tcg: target/s390x: Test VECTOR MERGE (HIGH|LOW) 914502d1d1 tests/tcg: target/s390x: Test VECTOR LOAD AND REPLICATE 7d01bbca17 tests/tcg: target/s390x: Test VECTOR GENERATE MASK dc107ebdf7 tests/tcg: target/s390x: Test VECTOR GENERATE BYTE MASK 17212a732d tests/tcg: target/s390x: Test VECTOR LOAD GR FROM VR ELEMENT f120e93c15 tests/tcg: target/s390x: Test VECTOR GATHER ELEMENT But I just added a simple 6c43fe6a8c tests/tcg: target/s390x: Test VECTOR SELECT That test confirmed that your implementation works correctly. :) > >> Should I send this patch with the next s390x/tcg pull request? > > Yes please. I'll change the subject to "s390x/tcg: Use tcg_gen_gvec_bitsel for VECTOR SELECT" if you don't object. Thanks! > > > r~ > -- Thanks, David / dhildenb
© 2016 - 2024 Red Hat, Inc.