The VBROADCASTSD instruction only allows %ymm registers as destination.
Rather than forcing VEX.L and writing to the entire 256-bit register,
revert to using MOVDDUP with an %xmm register. This is sufficient for
an avx1 host since we do not support TCG_TYPE_V256 for that case.
Also fix the 32-bit avx2, which should have used VPBROADCASTW.
Fixes: 1e262b49b533
Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/i386/tcg-target.inc.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
index aafd01cb49..b3601446cd 100644
--- a/tcg/i386/tcg-target.inc.c
+++ b/tcg/i386/tcg-target.inc.c
@@ -358,6 +358,7 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
#define OPC_MOVBE_MyGy (0xf1 | P_EXT38)
#define OPC_MOVD_VyEy (0x6e | P_EXT | P_DATA16)
#define OPC_MOVD_EyVy (0x7e | P_EXT | P_DATA16)
+#define OPC_MOVDDUP (0x12 | P_EXT | P_SIMDF2)
#define OPC_MOVDQA_VxWx (0x6f | P_EXT | P_DATA16)
#define OPC_MOVDQA_WxVx (0x7f | P_EXT | P_DATA16)
#define OPC_MOVDQU_VxWx (0x6f | P_EXT | P_SIMDF3)
@@ -921,7 +922,7 @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
} else {
switch (vece) {
case MO_64:
- tcg_out_vex_modrm_offset(s, OPC_VBROADCASTSD, r, 0, base, offset);
+ tcg_out_vex_modrm_offset(s, OPC_MOVDDUP, r, 0, base, offset);
break;
case MO_32:
tcg_out_vex_modrm_offset(s, OPC_VBROADCASTSS, r, 0, base, offset);
@@ -963,12 +964,12 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
} else if (have_avx2) {
tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTQ + vex_l, ret);
} else {
- tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSD, ret);
+ tcg_out_vex_modrm_pool(s, OPC_MOVDDUP, ret);
}
new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4);
} else {
if (have_avx2) {
- tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSD + vex_l, ret);
+ tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTW + vex_l, ret);
} else {
tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSS, ret);
}
--
2.17.1
On 16/05/2019 23:50, Richard Henderson wrote: > The VBROADCASTSD instruction only allows %ymm registers as destination. > Rather than forcing VEX.L and writing to the entire 256-bit register, > revert to using MOVDDUP with an %xmm register. This is sufficient for > an avx1 host since we do not support TCG_TYPE_V256 for that case. > > Also fix the 32-bit avx2, which should have used VPBROADCASTW. > > Fixes: 1e262b49b533 > Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > tcg/i386/tcg-target.inc.c | 7 ++++--- > 1 file changed, 4 insertions(+), 3 deletions(-) > > diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c > index aafd01cb49..b3601446cd 100644 > --- a/tcg/i386/tcg-target.inc.c > +++ b/tcg/i386/tcg-target.inc.c > @@ -358,6 +358,7 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, > #define OPC_MOVBE_MyGy (0xf1 | P_EXT38) > #define OPC_MOVD_VyEy (0x6e | P_EXT | P_DATA16) > #define OPC_MOVD_EyVy (0x7e | P_EXT | P_DATA16) > +#define OPC_MOVDDUP (0x12 | P_EXT | P_SIMDF2) > #define OPC_MOVDQA_VxWx (0x6f | P_EXT | P_DATA16) > #define OPC_MOVDQA_WxVx (0x7f | P_EXT | P_DATA16) > #define OPC_MOVDQU_VxWx (0x6f | P_EXT | P_SIMDF3) > @@ -921,7 +922,7 @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, > } else { > switch (vece) { > case MO_64: > - tcg_out_vex_modrm_offset(s, OPC_VBROADCASTSD, r, 0, base, offset); > + tcg_out_vex_modrm_offset(s, OPC_MOVDDUP, r, 0, base, offset); > break; > case MO_32: > tcg_out_vex_modrm_offset(s, OPC_VBROADCASTSS, r, 0, base, offset); > @@ -963,12 +964,12 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, > } else if (have_avx2) { > tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTQ + vex_l, ret); > } else { > - tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSD, ret); > + tcg_out_vex_modrm_pool(s, OPC_MOVDDUP, ret); > } > new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4); > } else { > if (have_avx2) { > - tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSD + vex_l, ret); > + tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTW + vex_l, ret); > } else { > tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSS, ret); > } Indeed, this fixes the issue for me here - thank you! Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> ATB, Mark.
© 2016 - 2024 Red Hat, Inc.