target/i386: fix operand order for PDEP and PEXT

[PATCH] target/i386: fix operand order for PDEP and PEXT

Posted by Paolo Bonzini 5 years, 2 months ago

For PDEP and PEXT, the mask is provided in the memory (mod+r/m)
operand, and therefore is loaded in s->T0 by gen_ldst_modrm.
The source is provided in the second source operand (VEX.vvvv)
and therefore is loaded in s->T1.  Fix the order in which
they are passed to the helpers.

Reported-by: Lenard Szolnoki <blog@lenardszolnoki.com>
Analyzed-by: Lenard Szolnoki <blog@lenardszolnoki.com>
Fixes: https://bugs.launchpad.net/qemu/+bug/1605123
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/translate.c         |  8 +++----
 tests/tcg/i386/Makefile.target  |  4 ++++
 tests/tcg/i386/test-i386-bmi2.c | 40 +++++++++++++++++++++++++++++++++
 3 files changed, 48 insertions(+), 4 deletions(-)
 create mode 100644 tests/tcg/i386/test-i386-bmi2.c

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 4c57307e42..e8f5f5803a 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -3936,14 +3936,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 }
                 ot = mo_64_32(s->dflag);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-                /* Note that by zero-extending the mask operand, we
+                /* Note that by zero-extending the source operand, we
                    automatically handle zero-extending the result.  */
                 if (ot == MO_64) {
                     tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
                 } else {
                     tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
                 }
-                gen_helper_pdep(cpu_regs[reg], s->T0, s->T1);
+                gen_helper_pdep(cpu_regs[reg], s->T1, s->T0);
                 break;
 
             case 0x2f5: /* pext Gy, By, Ey */
@@ -3954,14 +3954,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 }
                 ot = mo_64_32(s->dflag);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-                /* Note that by zero-extending the mask operand, we
+                /* Note that by zero-extending the source operand, we
                    automatically handle zero-extending the result.  */
                 if (ot == MO_64) {
                     tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
                 } else {
                     tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
                 }
-                gen_helper_pext(cpu_regs[reg], s->T0, s->T1);
+                gen_helper_pext(cpu_regs[reg], s->T1, s->T0);
                 break;
 
             case 0x1f6: /* adcx Gy, Ey */
diff --git a/tests/tcg/i386/Makefile.target b/tests/tcg/i386/Makefile.target
index a66232a67d..9f6f620944 100644
--- a/tests/tcg/i386/Makefile.target
+++ b/tests/tcg/i386/Makefile.target
@@ -18,6 +18,10 @@ test-i386-pcmpistri: CFLAGS += -msse4.2
 run-test-i386-pcmpistri: QEMU_OPTS += -cpu max
 run-plugin-test-i386-pcmpistri-%: QEMU_OPTS += -cpu max
 
+test-i386-bmi2: CFLAGS += -mbmi2
+run-test-i386-bmi2: QEMU_OPTS += -cpu max
+run-plugin-test-i386-bmi2-%: QEMU_OPTS += -cpu max
+
 #
 # hello-i386 is a barebones app
 #
diff --git a/tests/tcg/i386/test-i386-bmi2.c b/tests/tcg/i386/test-i386-bmi2.c
new file mode 100644
index 0000000000..e7a93b88b4
--- /dev/null
+++ b/tests/tcg/i386/test-i386-bmi2.c
@@ -0,0 +1,40 @@
+/* See if various BMI2 instructions give expected results */
+#include <assert.h>
+#include <stdint.h>
+
+int main(int argc, char *argv[]) {
+    char hello[16];
+    uint64_t ehlo = 0x202020204f4c4845ull;
+    uint64_t mask = 0xa080800302020001ull;
+    uint64_t result64;
+    uint32_t result32;
+
+    /* 64 bits */
+    asm volatile ("pextq   %2, %1, %0" : "=r"(result64) : "r"(ehlo), "m"(mask));
+    assert(result64 == 133);
+
+    asm volatile ("pdepq   %2, %1, %0" : "=r"(result64) : "r"(result64), "m"(mask));
+    assert(result64 == (ehlo & mask));
+
+    asm volatile ("pextq   %2, %1, %0" : "=r"(result64) : "r"(-1ull), "m"(mask));
+    assert(result64 == 511); /* mask has 9 bits set */
+
+    asm volatile ("pdepq   %2, %1, %0" : "=r"(result64) : "r"(-1ull), "m"(mask));
+    assert(result64 == mask);
+
+    /* 32 bits */
+    asm volatile ("pextl   %2, %k1, %k0" : "=r"(result32) : "r"(ehlo), "m"(mask));
+    assert(result32 == 5);
+
+    asm volatile ("pdepl   %2, %k1, %k0" : "=r"(result32) : "r"(result32), "m"(mask));
+    assert(result32 == (uint32_t)(ehlo & mask));
+
+    asm volatile ("pextl   %2, %k1, %k0" : "=r"(result32) : "r"(-1ull), "m"(mask));
+    assert(result32 == 7); /* mask has 3 bits set */
+
+    asm volatile ("pdepl   %2, %k1, %k0" : "=r"(result32) : "r"(-1ull), "m"(mask));
+    assert(result32 == (uint32_t)mask);
+
+    return 0;
+}
+
-- 
2.26.2

Re: [PATCH] target/i386: fix operand order for PDEP and PEXT

Posted by no-reply@patchew.org 5 years, 2 months ago

Patchew URL: https://patchew.org/QEMU/20201123131426.2725276-1-pbonzini@redhat.com/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Type: series
Message-id: 20201123131426.2725276-1-pbonzini@redhat.com
Subject: [PATCH] target/i386: fix operand order for PDEP and PEXT

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
   8cc30eb..6ab64cc  master     -> master
 * [new tag]         patchew/20201123131426.2725276-1-pbonzini@redhat.com -> patchew/20201123131426.2725276-1-pbonzini@redhat.com
Switched to a new branch 'test'
ee94689 target/i386: fix operand order for PDEP and PEXT

=== OUTPUT BEGIN ===
WARNING: Block comments use a leading /* on a separate line
#28: FILE: target/i386/translate.c:3939:
+                /* Note that by zero-extending the source operand, we

WARNING: Block comments use a leading /* on a separate line
#45: FILE: target/i386/translate.c:3957:
+                /* Note that by zero-extending the source operand, we

WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#73: 
new file mode 100644

ERROR: open brace '{' following function declarations go on the next line
#82: FILE: tests/tcg/i386/test-i386-bmi2.c:5:
+int main(int argc, char *argv[]) {

WARNING: line over 80 characters
#93: FILE: tests/tcg/i386/test-i386-bmi2.c:16:
+    asm volatile ("pdepq   %2, %1, %0" : "=r"(result64) : "r"(result64), "m"(mask));

WARNING: line over 80 characters
#96: FILE: tests/tcg/i386/test-i386-bmi2.c:19:
+    asm volatile ("pextq   %2, %1, %0" : "=r"(result64) : "r"(-1ull), "m"(mask));

WARNING: line over 80 characters
#99: FILE: tests/tcg/i386/test-i386-bmi2.c:22:
+    asm volatile ("pdepq   %2, %1, %0" : "=r"(result64) : "r"(-1ull), "m"(mask));

WARNING: line over 80 characters
#103: FILE: tests/tcg/i386/test-i386-bmi2.c:26:
+    asm volatile ("pextl   %2, %k1, %k0" : "=r"(result32) : "r"(ehlo), "m"(mask));

WARNING: line over 80 characters
#106: FILE: tests/tcg/i386/test-i386-bmi2.c:29:
+    asm volatile ("pdepl   %2, %k1, %k0" : "=r"(result32) : "r"(result32), "m"(mask));

WARNING: line over 80 characters
#109: FILE: tests/tcg/i386/test-i386-bmi2.c:32:
+    asm volatile ("pextl   %2, %k1, %k0" : "=r"(result32) : "r"(-1ull), "m"(mask));

WARNING: line over 80 characters
#112: FILE: tests/tcg/i386/test-i386-bmi2.c:35:
+    asm volatile ("pdepl   %2, %k1, %k0" : "=r"(result32) : "r"(-1ull), "m"(mask));

total: 1 errors, 10 warnings, 82 lines checked

Commit ee94689b216b (target/i386: fix operand order for PDEP and PEXT) has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
=== OUTPUT END ===

Test command exited with code: 1


The full log is available at
http://patchew.org/logs/20201123131426.2725276-1-pbonzini@redhat.com/testing.checkpatch/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-devel@redhat.com

Re: [PATCH] target/i386: fix operand order for PDEP and PEXT

Posted by Richard Henderson 5 years, 2 months ago

On 11/23/20 5:14 AM, Paolo Bonzini wrote:
> For PDEP and PEXT, the mask is provided in the memory (mod+r/m)
> operand, and therefore is loaded in s->T0 by gen_ldst_modrm.
> The source is provided in the second source operand (VEX.vvvv)
> and therefore is loaded in s->T1.  Fix the order in which
> they are passed to the helpers.
> 
> Reported-by: Lenard Szolnoki <blog@lenardszolnoki.com>
> Analyzed-by: Lenard Szolnoki <blog@lenardszolnoki.com>
> Fixes: https://bugs.launchpad.net/qemu/+bug/1605123
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

The patch itself looks fine.

> +test-i386-bmi2: CFLAGS += -mbmi2
> +run-test-i386-bmi2: QEMU_OPTS += -cpu max
> +run-plugin-test-i386-bmi2-%: QEMU_OPTS += -cpu max

I suspect that we still support host operating systems whose compilers do not
support -mbmi2.  This might require a bit in tests/tcg/configure.sh akin to
CROSS_CC_HAS_ARMV8_3.

> +int main(int argc, char *argv[]) {
> +    char hello[16];
> +    uint64_t ehlo = 0x202020204f4c4845ull;
> +    uint64_t mask = 0xa080800302020001ull;
> +    uint64_t result64;
> +    uint32_t result32;
> +
> +    /* 64 bits */
> +    asm volatile ("pextq   %2, %1, %0" : "=r"(result64) : "r"(ehlo), "m"(mask));
> +    assert(result64 == 133);

The test is written for x86_64 not i386.  How are we preventing the test case
from being run on 32-bit in the makefile?

> +    /* 32 bits */
> +    asm volatile ("pextl   %2, %k1, %k0" : "=r"(result32) : "r"(ehlo), "m"(mask));
> +    assert(result32 == 5);

Surely we should test the full 64-bit register result, and not truncate to
uint32_t in the output variable?


r~

Re: [PATCH] target/i386: fix operand order for PDEP and PEXT

Posted by Paolo Bonzini 5 years, 2 months ago

On 24/11/20 18:54, Richard Henderson wrote:
>> +test-i386-bmi2: CFLAGS += -mbmi2
>> +run-test-i386-bmi2: QEMU_OPTS += -cpu max
>> +run-plugin-test-i386-bmi2-%: QEMU_OPTS += -cpu max
> I suspect that we still support host operating systems whose compilers do not
> support -mbmi2.  This might require a bit in tests/tcg/configure.sh akin to
> CROSS_CC_HAS_ARMV8_3.
> 

Actually -mbmi2 should not be needed since (unlike sse or avx) the 
instructions use normal registers.  Only the assembler matters, and at 
least RHEL7 (binutils 2.27) has them.  So I'll just remove the flag, it 
should be enough.

Paolo