tcg/i386/tcg-target.c.inc | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-)
We need to be able to represent VEX.W on a 32-bit host, where REX.W
will always be zero. Fixes the encoding for VPSLLVQ and VPSRLVQ.
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/385
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/i386/tcg-target.c.inc | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 98d924b91a..997510109d 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -241,8 +241,9 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
#define P_EXT 0x100 /* 0x0f opcode prefix */
#define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */
#define P_DATA16 0x400 /* 0x66 opcode prefix */
+#define P_VEXW 0x1000 /* Set VEX.W = 1 */
#if TCG_TARGET_REG_BITS == 64
-# define P_REXW 0x1000 /* Set REX.W = 1 */
+# define P_REXW P_VEXW /* Set REX.W = 1; match VEXW */
# define P_REXB_R 0x2000 /* REG field as byte register */
# define P_REXB_RM 0x4000 /* R/M field as byte register */
# define P_GS 0x8000 /* gs segment override */
@@ -410,13 +411,13 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
#define OPC_VPBROADCASTW (0x79 | P_EXT38 | P_DATA16)
#define OPC_VPBROADCASTD (0x58 | P_EXT38 | P_DATA16)
#define OPC_VPBROADCASTQ (0x59 | P_EXT38 | P_DATA16)
-#define OPC_VPERMQ (0x00 | P_EXT3A | P_DATA16 | P_REXW)
+#define OPC_VPERMQ (0x00 | P_EXT3A | P_DATA16 | P_VEXW)
#define OPC_VPERM2I128 (0x46 | P_EXT3A | P_DATA16 | P_VEXL)
#define OPC_VPSLLVD (0x47 | P_EXT38 | P_DATA16)
-#define OPC_VPSLLVQ (0x47 | P_EXT38 | P_DATA16 | P_REXW)
+#define OPC_VPSLLVQ (0x47 | P_EXT38 | P_DATA16 | P_VEXW)
#define OPC_VPSRAVD (0x46 | P_EXT38 | P_DATA16)
#define OPC_VPSRLVD (0x45 | P_EXT38 | P_DATA16)
-#define OPC_VPSRLVQ (0x45 | P_EXT38 | P_DATA16 | P_REXW)
+#define OPC_VPSRLVQ (0x45 | P_EXT38 | P_DATA16 | P_VEXW)
#define OPC_VZEROUPPER (0x77 | P_EXT)
#define OPC_XCHG_ax_r32 (0x90)
@@ -576,7 +577,7 @@ static void tcg_out_vex_opc(TCGContext *s, int opc, int r, int v,
/* Use the two byte form if possible, which cannot encode
VEX.W, VEX.B, VEX.X, or an m-mmmm field other than P_EXT. */
- if ((opc & (P_EXT | P_EXT38 | P_EXT3A | P_REXW)) == P_EXT
+ if ((opc & (P_EXT | P_EXT38 | P_EXT3A | P_VEXW)) == P_EXT
&& ((rm | index) & 8) == 0) {
/* Two byte VEX prefix. */
tcg_out8(s, 0xc5);
@@ -601,7 +602,7 @@ static void tcg_out_vex_opc(TCGContext *s, int opc, int r, int v,
tmp |= (rm & 8 ? 0 : 0x20); /* VEX.B */
tcg_out8(s, tmp);
- tmp = (opc & P_REXW ? 0x80 : 0); /* VEX.W */
+ tmp = (opc & P_VEXW ? 0x80 : 0); /* VEX.W */
}
tmp |= (opc & P_VEXL ? 0x04 : 0); /* VEX.L */
--
2.25.1
On Wed, 11 Aug 2021 at 00:26, Richard Henderson <richard.henderson@linaro.org> wrote: > > We need to be able to represent VEX.W on a 32-bit host, where REX.W > will always be zero. Fixes the encoding for VPSLLVQ and VPSRLVQ. > > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/385 > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> This patch fixes the "wget https://github.com/ -O /dev/null" part of the test case in issue 385, but not the "apt update" part: I see this with an i686 qemu-arm binary: root@e104462:/# apt-get update Get:1 http://archive.raspberrypi.org/debian buster InRelease [32.6 kB] Get:2 http://raspbian.raspberrypi.org/raspbian buster InRelease [15.0 kB] Err:1 http://archive.raspberrypi.org/debian buster InRelease At least one invalid signature was encountered. Err:2 http://raspbian.raspberrypi.org/raspbian buster InRelease At least one invalid signature was encountered. Fetched 47.6 kB in 1s (91.3 kB/s) Reading package lists... Done W: An error occurred during the signature verification. The repository is not updated and the previous index files will be used. GPG error: http://archive.raspberrypi.org/debian buster InRelease: At least one invalid signature was encountered. W: An error occurred during the signature verification. The repository is not updated and the previous index files will be used. GPG error: http://raspbian.raspberrypi.org/raspbian buster InRelease: At least one invalid signature was encountered. W: Failed to fetch http://raspbian.raspberrypi.org/raspbian/dists/buster/InRelease At least one invalid signature was encountered. W: Failed to fetch http://archive.raspberrypi.org/debian/dists/buster/InRelease At least one invalid signature was encountered. W: Some index files failed to download. They have been ignored, or old ones used instead. whereas an x86-64 binary downloads everything without errors: root@e104462:/# apt update Get:1 http://archive.raspberrypi.org/debian buster InRelease [32.6 kB] Get:2 http://raspbian.raspberrypi.org/raspbian buster InRelease [15.0 kB] Get:3 http://archive.raspberrypi.org/debian buster/main armhf Packages [378 kB] Get:4 http://raspbian.raspberrypi.org/raspbian buster/main armhf Packages [13.0 MB] Fetched 13.4 MB in 49s (272 kB/s) Reading package lists... Done Building dependency tree Reading state information... Done 44 packages can be upgraded. Run 'apt list --upgradable' to see them. So there must still be another bug here... -- PMM
On Wed, 11 Aug 2021 at 00:26, Richard Henderson <richard.henderson@linaro.org> wrote: > > We need to be able to represent VEX.W on a 32-bit host, where REX.W > will always be zero. Fixes the encoding for VPSLLVQ and VPSRLVQ. > > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/385 > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > tcg/i386/tcg-target.c.inc | 13 +++++++------ > 1 file changed, 7 insertions(+), 6 deletions(-) > > diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc > index 98d924b91a..997510109d 100644 > --- a/tcg/i386/tcg-target.c.inc > +++ b/tcg/i386/tcg-target.c.inc > @@ -241,8 +241,9 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) > #define P_EXT 0x100 /* 0x0f opcode prefix */ > #define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */ > #define P_DATA16 0x400 /* 0x66 opcode prefix */ > +#define P_VEXW 0x1000 /* Set VEX.W = 1 */ > #if TCG_TARGET_REG_BITS == 64 > -# define P_REXW 0x1000 /* Set REX.W = 1 */ > +# define P_REXW P_VEXW /* Set REX.W = 1; match VEXW */ > # define P_REXB_R 0x2000 /* REG field as byte register */ > # define P_REXB_RM 0x4000 /* R/M field as byte register */ > # define P_GS 0x8000 /* gs segment override */ > @@ -410,13 +411,13 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) > #define OPC_VPBROADCASTW (0x79 | P_EXT38 | P_DATA16) > #define OPC_VPBROADCASTD (0x58 | P_EXT38 | P_DATA16) > #define OPC_VPBROADCASTQ (0x59 | P_EXT38 | P_DATA16) > -#define OPC_VPERMQ (0x00 | P_EXT3A | P_DATA16 | P_REXW) > +#define OPC_VPERMQ (0x00 | P_EXT3A | P_DATA16 | P_VEXW) > #define OPC_VPERM2I128 (0x46 | P_EXT3A | P_DATA16 | P_VEXL) > #define OPC_VPSLLVD (0x47 | P_EXT38 | P_DATA16) > -#define OPC_VPSLLVQ (0x47 | P_EXT38 | P_DATA16 | P_REXW) > +#define OPC_VPSLLVQ (0x47 | P_EXT38 | P_DATA16 | P_VEXW) > #define OPC_VPSRAVD (0x46 | P_EXT38 | P_DATA16) > #define OPC_VPSRLVD (0x45 | P_EXT38 | P_DATA16) > -#define OPC_VPSRLVQ (0x45 | P_EXT38 | P_DATA16 | P_REXW) > +#define OPC_VPSRLVQ (0x45 | P_EXT38 | P_DATA16 | P_VEXW) > #define OPC_VZEROUPPER (0x77 | P_EXT) > #define OPC_XCHG_ax_r32 (0x90) > > @@ -576,7 +577,7 @@ static void tcg_out_vex_opc(TCGContext *s, int opc, int r, int v, > > /* Use the two byte form if possible, which cannot encode > VEX.W, VEX.B, VEX.X, or an m-mmmm field other than P_EXT. */ > - if ((opc & (P_EXT | P_EXT38 | P_EXT3A | P_REXW)) == P_EXT > + if ((opc & (P_EXT | P_EXT38 | P_EXT3A | P_VEXW)) == P_EXT > && ((rm | index) & 8) == 0) { > /* Two byte VEX prefix. */ > tcg_out8(s, 0xc5); > @@ -601,7 +602,7 @@ static void tcg_out_vex_opc(TCGContext *s, int opc, int r, int v, > tmp |= (rm & 8 ? 0 : 0x20); /* VEX.B */ > tcg_out8(s, tmp); > > - tmp = (opc & P_REXW ? 0x80 : 0); /* VEX.W */ > + tmp = (opc & P_VEXW ? 0x80 : 0); /* VEX.W */ > } > > tmp |= (opc & P_VEXL ? 0x04 : 0); /* VEX.L */ These changes look OK as far as they go, but it's not clear to me why the other places that set P_REXW are all OK to use P_REXW and not P_VEXW. For instance tcg_out_mov() sets rexw = P_REXW and some of the codepaths there will then pass that into tcg_out_vex_modrm() which ends up in tcg_out_vex_opc(). More generally, is there somewhere we can assert that we didn't try to use a REXW prefix for i386 codegen rather than just silently ignoring it ? thanks -- PMM
On 8/13/21 12:37 AM, Peter Maydell wrote: > These changes look OK as far as they go, but it's not clear to > me why the other places that set P_REXW are all OK to use P_REXW > and not P_VEXW. For instance tcg_out_mov() sets rexw = P_REXW > and some of the codepaths there will then pass that into > tcg_out_vex_modrm() which ends up in tcg_out_vex_opc(). This distinguishes between 32-bit and 64-bit transfer between vector and general register. Which of course doesn't make sense for i386. > More generally, is there somewhere we can assert that we > didn't try to use a REXW prefix for i386 codegen rather > than just silently ignoring it ? I guess tcg_out_opc might be a place. But mostly we try to avoid generating those places in the first place. E.g. #if TCG_TARGET_REG_BITS == 64 # define OP_32_64(x) \ case glue(glue(INDEX_op_, x), _i64): \ rexw = P_REXW; /* FALLTHRU */ \ case glue(glue(INDEX_op_, x), _i32) #else # define OP_32_64(x) \ case glue(glue(INDEX_op_, x), _i32) #endif r~
On 8/13/21 6:59 AM, Richard Henderson wrote: > On 8/13/21 12:37 AM, Peter Maydell wrote: >> These changes look OK as far as they go, but it's not clear to >> me why the other places that set P_REXW are all OK to use P_REXW >> and not P_VEXW. For instance tcg_out_mov() sets rexw = P_REXW >> and some of the codepaths there will then pass that into >> tcg_out_vex_modrm() which ends up in tcg_out_vex_opc(). > > This distinguishes between 32-bit and 64-bit transfer between vector and general register. > Which of course doesn't make sense for i386. I read this again and realized it doesn't really clear things up. The older opcodes which originated with SSE used completely separate opcodes to talk about 64-bit quantities within the vector registers. E.g. #define OPC_PSLLW (0xf1 | P_EXT | P_DATA16) #define OPC_PSLLD (0xf2 | P_EXT | P_DATA16) #define OPC_PSLLQ (0xf3 | P_EXT | P_DATA16) or even #define OPC_MOVQ_VqWq (0x7e | P_EXT | P_SIMDF3) #define OPC_MOVQ_WqVq (0xd6 | P_EXT | P_DATA16) It's only with the newest AVX2 instructions that they decided to use VEX.W to talk about the size of the vector element as opposed to the size of the general register on the other end. Which includes the two vector shift with the shift amount coming from a vector argument (as opposed to immediate): #define OPC_VPSLLVD (0x47 | P_EXT38 | P_DATA16) #define OPC_VPSLLVQ (0x47 | P_EXT38 | P_DATA16 | P_VEXW) #define OPC_VPSRLVD (0x45 | P_EXT38 | P_DATA16) #define OPC_VPSRLVQ (0x45 | P_EXT38 | P_DATA16 | P_VEXW) I guess I can add Fixes: a2ce146a068 ("tcg/i386: Support vector variable shift opcodes") because I failed to consider that P_REXW was always 0 for 32-bit. r~
On Fri, 13 Aug 2021 at 17:59, Richard Henderson <richard.henderson@linaro.org> wrote: > > On 8/13/21 12:37 AM, Peter Maydell wrote: > > These changes look OK as far as they go, but it's not clear to > > me why the other places that set P_REXW are all OK to use P_REXW > > and not P_VEXW. For instance tcg_out_mov() sets rexw = P_REXW > > and some of the codepaths there will then pass that into > > tcg_out_vex_modrm() which ends up in tcg_out_vex_opc(). > > This distinguishes between 32-bit and 64-bit transfer between vector and general register. > Which of course doesn't make sense for i386. > > > More generally, is there somewhere we can assert that we > > didn't try to use a REXW prefix for i386 codegen rather > > than just silently ignoring it ? > > I guess tcg_out_opc might be a place. But mostly we try to avoid generating those places > in the first place. E.g. > > #if TCG_TARGET_REG_BITS == 64 > # define OP_32_64(x) \ > case glue(glue(INDEX_op_, x), _i64): \ > rexw = P_REXW; /* FALLTHRU */ \ > case glue(glue(INDEX_op_, x), _i32) > #else > # define OP_32_64(x) \ > case glue(glue(INDEX_op_, x), _i32) > #endif Right, if we do that everywhere we could make P_REXW the same value on 32 bit and 64 bit hosts and assert that P_REXW doesn't ever actually get passed to the functions where we look at it to generate code. It's only if there's codepaths which rely on P_REXW being 0 on i386 in order to not generate invalid code that an assert would get awkward... -- PMM
PIng for review, or this slips to 6.2. On 8/10/21 1:25 PM, Richard Henderson wrote: > We need to be able to represent VEX.W on a 32-bit host, where REX.W > will always be zero. Fixes the encoding for VPSLLVQ and VPSRLVQ. > > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/385 > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > tcg/i386/tcg-target.c.inc | 13 +++++++------ > 1 file changed, 7 insertions(+), 6 deletions(-) > > diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc > index 98d924b91a..997510109d 100644 > --- a/tcg/i386/tcg-target.c.inc > +++ b/tcg/i386/tcg-target.c.inc > @@ -241,8 +241,9 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) > #define P_EXT 0x100 /* 0x0f opcode prefix */ > #define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */ > #define P_DATA16 0x400 /* 0x66 opcode prefix */ > +#define P_VEXW 0x1000 /* Set VEX.W = 1 */ > #if TCG_TARGET_REG_BITS == 64 > -# define P_REXW 0x1000 /* Set REX.W = 1 */ > +# define P_REXW P_VEXW /* Set REX.W = 1; match VEXW */ > # define P_REXB_R 0x2000 /* REG field as byte register */ > # define P_REXB_RM 0x4000 /* R/M field as byte register */ > # define P_GS 0x8000 /* gs segment override */ > @@ -410,13 +411,13 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) > #define OPC_VPBROADCASTW (0x79 | P_EXT38 | P_DATA16) > #define OPC_VPBROADCASTD (0x58 | P_EXT38 | P_DATA16) > #define OPC_VPBROADCASTQ (0x59 | P_EXT38 | P_DATA16) > -#define OPC_VPERMQ (0x00 | P_EXT3A | P_DATA16 | P_REXW) > +#define OPC_VPERMQ (0x00 | P_EXT3A | P_DATA16 | P_VEXW) > #define OPC_VPERM2I128 (0x46 | P_EXT3A | P_DATA16 | P_VEXL) > #define OPC_VPSLLVD (0x47 | P_EXT38 | P_DATA16) > -#define OPC_VPSLLVQ (0x47 | P_EXT38 | P_DATA16 | P_REXW) > +#define OPC_VPSLLVQ (0x47 | P_EXT38 | P_DATA16 | P_VEXW) > #define OPC_VPSRAVD (0x46 | P_EXT38 | P_DATA16) > #define OPC_VPSRLVD (0x45 | P_EXT38 | P_DATA16) > -#define OPC_VPSRLVQ (0x45 | P_EXT38 | P_DATA16 | P_REXW) > +#define OPC_VPSRLVQ (0x45 | P_EXT38 | P_DATA16 | P_VEXW) > #define OPC_VZEROUPPER (0x77 | P_EXT) > #define OPC_XCHG_ax_r32 (0x90) > > @@ -576,7 +577,7 @@ static void tcg_out_vex_opc(TCGContext *s, int opc, int r, int v, > > /* Use the two byte form if possible, which cannot encode > VEX.W, VEX.B, VEX.X, or an m-mmmm field other than P_EXT. */ > - if ((opc & (P_EXT | P_EXT38 | P_EXT3A | P_REXW)) == P_EXT > + if ((opc & (P_EXT | P_EXT38 | P_EXT3A | P_VEXW)) == P_EXT > && ((rm | index) & 8) == 0) { > /* Two byte VEX prefix. */ > tcg_out8(s, 0xc5); > @@ -601,7 +602,7 @@ static void tcg_out_vex_opc(TCGContext *s, int opc, int r, int v, > tmp |= (rm & 8 ? 0 : 0x20); /* VEX.B */ > tcg_out8(s, tmp); > > - tmp = (opc & P_REXW ? 0x80 : 0); /* VEX.W */ > + tmp = (opc & P_VEXW ? 0x80 : 0); /* VEX.W */ > } > > tmp |= (opc & P_VEXL ? 0x04 : 0); /* VEX.L */ >
On Thu, 12 Aug 2021 at 19:29, Richard Henderson <richard.henderson@linaro.org> wrote: > > PIng for review, or this slips to 6.2. > > On 8/10/21 1:25 PM, Richard Henderson wrote: > > We need to be able to represent VEX.W on a 32-bit host, where REX.W > > will always be zero. Fixes the encoding for VPSLLVQ and VPSRLVQ. > > > > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/385 > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Bug report says this isn't a regression since 6.0, and reporter says there's still issues with this fix, so I think this is probably going to go into 6.2 anyway. At any rate, I don't think it's sufficiently rc to make us spin an rc4 if we weren't going to anyway. -- PMM
© 2016 - 2024 Red Hat, Inc.