target/hexagon/mmvec/decode_ext_mmvec.c | 8 +++---- tests/tcg/hexagon/hvx_misc.c | 31 +++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 4 deletions(-)
The order in which instructions are generated by gen_insn() influences
assignment to tmp registers. During generation, tmp instructions (e.g.
generate_V6_vassign_tmp) use vreg_src_off() to determine what kind of
register to use as source. If some instruction (e.g.
generate_V6_vmpyowh_64_acc) uses a tmp register but is generated prior
to the corresponding tmp instruction, the vregs_updated_tmp bit map
isn't updated in time.
Exmple:
{ v14.tmp = v16; v25 = v14 } This works properly because
generate_V6_vassign_tmp is generated before generate_V6_vassign
and the bit map is updated.
{ v15:14.tmp = vcombine(v21, v16); v25:24 += vmpyo(v18.w,v14.h) }
This does not work properly because vmpyo is generated before
vcombine and therefore the bit map does not yet know that there's
a tmp register.
The parentheses in the decoding function were in the wrong place.
Moving them to the correct location makes shuffling of .tmp vector
registers work as expected.
Signed-off-by: Marco Liebel <quic_mliebel@quicinc.com>
---
target/hexagon/mmvec/decode_ext_mmvec.c | 8 +++----
tests/tcg/hexagon/hvx_misc.c | 31 +++++++++++++++++++++++++
2 files changed, 35 insertions(+), 4 deletions(-)
diff --git a/target/hexagon/mmvec/decode_ext_mmvec.c b/target/hexagon/mmvec/decode_ext_mmvec.c
index 061a65ab88..174eb3b78b 100644
--- a/target/hexagon/mmvec/decode_ext_mmvec.c
+++ b/target/hexagon/mmvec/decode_ext_mmvec.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -148,9 +148,9 @@ decode_shuffle_for_execution_vops(Packet *pkt)
int i;
for (i = 0; i < pkt->num_insns; i++) {
uint16_t opcode = pkt->insn[i].opcode;
- if (GET_ATTRIB(opcode, A_LOAD) &&
- (GET_ATTRIB(opcode, A_CVI_NEW) ||
- GET_ATTRIB(opcode, A_CVI_TMP))) {
+ if ((GET_ATTRIB(opcode, A_LOAD) &&
+ GET_ATTRIB(opcode, A_CVI_NEW)) ||
+ GET_ATTRIB(opcode, A_CVI_TMP)) {
/*
* Find prior consuming vector instructions
* Move to end of packet
diff --git a/tests/tcg/hexagon/hvx_misc.c b/tests/tcg/hexagon/hvx_misc.c
index 09dec8d7a1..b45170acd1 100644
--- a/tests/tcg/hexagon/hvx_misc.c
+++ b/tests/tcg/hexagon/hvx_misc.c
@@ -60,6 +60,36 @@ static void test_load_tmp(void)
check_output_w(__LINE__, BUFSIZE);
}
+static void test_load_tmp2(void)
+{
+ void *pout0 = &output[0];
+ void *pout1 = &output[1];
+
+ asm volatile(
+ "r0 = #0x03030303\n\t"
+ "v16 = vsplat(r0)\n\t"
+ "r0 = #0x04040404\n\t"
+ "v18 = vsplat(r0)\n\t"
+ "r0 = #0x05050505\n\t"
+ "v21 = vsplat(r0)\n\t"
+ "{\n\t"
+ " v25:24 += vmpyo(v18.w, v14.h)\n\t"
+ " v15:14.tmp = vcombine(v21, v16)\n\t"
+ "}\n\t"
+ "vmem(%0 + #0) = v24\n\t"
+ "vmem(%1 + #0) = v25\n\t"
+ : : "r"(pout0), "r"(pout1)
+ : "r0", "v16", "v18", "v21", "v24", "v25", "memory"
+ );
+
+ for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; ++i) {
+ expect[0].w[i] = 0x180c0000;
+ expect[1].w[i] = 0x000c1818;
+ }
+
+ check_output_w(__LINE__, 2);
+}
+
static void test_load_cur(void)
{
void *p0 = buffer0;
@@ -435,6 +465,7 @@ int main()
init_buffers();
test_load_tmp();
+ test_load_tmp2();
test_load_cur();
test_load_aligned();
test_load_unaligned();
--
2.25.1
> -----Original Message----- > From: Marco Liebel (QUIC) <quic_mliebel@quicinc.com> > Sent: Monday, May 22, 2023 12:47 PM > To: qemu-devel@nongnu.org > Cc: Taylor Simpson <tsimpson@quicinc.com>; Brian Cain <bcain@quicinc.com>; > Marco Liebel (QUIC) <quic_mliebel@quicinc.com> > Subject: [PATCH v2] Hexagon (target/hexagon) Fix assignment to tmp registers > > The order in which instructions are generated by gen_insn() influences > assignment to tmp registers. During generation, tmp instructions (e.g. > generate_V6_vassign_tmp) use vreg_src_off() to determine what kind of > register to use as source. If some instruction (e.g. > generate_V6_vmpyowh_64_acc) uses a tmp register but is generated prior > to the corresponding tmp instruction, the vregs_updated_tmp bit map > isn't updated in time. > > Exmple: > { v14.tmp = v16; v25 = v14 } This works properly because > generate_V6_vassign_tmp is generated before generate_V6_vassign > and the bit map is updated. > > { v15:14.tmp = vcombine(v21, v16); v25:24 += vmpyo(v18.w,v14.h) } > This does not work properly because vmpyo is generated before > vcombine and therefore the bit map does not yet know that there's > a tmp register. > > The parentheses in the decoding function were in the wrong place. > Moving them to the correct location makes shuffling of .tmp vector > registers work as expected. > > Signed-off-by: Marco Liebel <quic_mliebel@quicinc.com> > --- > target/hexagon/mmvec/decode_ext_mmvec.c | 8 +++---- > tests/tcg/hexagon/hvx_misc.c | 31 +++++++++++++++++++++++++ > 2 files changed, 35 insertions(+), 4 deletions(-) > > diff --git a/target/hexagon/mmvec/decode_ext_mmvec.c > b/target/hexagon/mmvec/decode_ext_mmvec.c > index 061a65ab88..174eb3b78b 100644 > --- a/target/hexagon/mmvec/decode_ext_mmvec.c > +++ b/target/hexagon/mmvec/decode_ext_mmvec.c > @@ -1,5 +1,5 @@ > /* > - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights > Reserved. > + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights > Reserved. > * > * This program is free software; you can redistribute it and/or modify > * it under the terms of the GNU General Public License as published by > @@ -148,9 +148,9 @@ decode_shuffle_for_execution_vops(Packet *pkt) > int i; > for (i = 0; i < pkt->num_insns; i++) { > uint16_t opcode = pkt->insn[i].opcode; > - if (GET_ATTRIB(opcode, A_LOAD) && > - (GET_ATTRIB(opcode, A_CVI_NEW) || > - GET_ATTRIB(opcode, A_CVI_TMP))) { > + if ((GET_ATTRIB(opcode, A_LOAD) && > + GET_ATTRIB(opcode, A_CVI_NEW)) || > + GET_ATTRIB(opcode, A_CVI_TMP)) { > /* > * Find prior consuming vector instructions > * Move to end of packet > diff --git a/tests/tcg/hexagon/hvx_misc.c b/tests/tcg/hexagon/hvx_misc.c > index 09dec8d7a1..b45170acd1 100644 > --- a/tests/tcg/hexagon/hvx_misc.c > +++ b/tests/tcg/hexagon/hvx_misc.c > @@ -60,6 +60,36 @@ static void test_load_tmp(void) > check_output_w(__LINE__, BUFSIZE); > } > > +static void test_load_tmp2(void) > +{ > + void *pout0 = &output[0]; > + void *pout1 = &output[1]; > + > + asm volatile( > + "r0 = #0x03030303\n\t" > + "v16 = vsplat(r0)\n\t" > + "r0 = #0x04040404\n\t" > + "v18 = vsplat(r0)\n\t" > + "r0 = #0x05050505\n\t" > + "v21 = vsplat(r0)\n\t" > + "{\n\t" > + " v25:24 += vmpyo(v18.w, v14.h)\n\t" > + " v15:14.tmp = vcombine(v21, v16)\n\t" > + "}\n\t" > + "vmem(%0 + #0) = v24\n\t" > + "vmem(%1 + #0) = v25\n\t" > + : : "r"(pout0), "r"(pout1) > + : "r0", "v16", "v18", "v21", "v24", "v25", "memory" > + ); > + > + for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; ++i) { > + expect[0].w[i] = 0x180c0000; > + expect[1].w[i] = 0x000c1818; > + } > + > + check_output_w(__LINE__, 2); > +} > + > static void test_load_cur(void) > { > void *p0 = buffer0; > @@ -435,6 +465,7 @@ int main() > init_buffers(); > > test_load_tmp(); > + test_load_tmp2(); > test_load_cur(); > test_load_aligned(); > test_load_unaligned(); > -- > 2.25.1 Reviewed-by: Brian Cain <bcain@quicinc.com>
© 2016 - 2024 Red Hat, Inc.