[Qemu-devel] [PATCH v2] Add AVX, AVX-512, MPX support to x86_cpu_dump_state

Doug Gale posted 1 patch 6 years, 3 months ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20171203063548.10297-1-doug16k@gmail.com
Test checkpatch passed
Test docker passed
Test ppc passed
Test s390x passed
target/i386/helper.c | 95 +++++++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 83 insertions(+), 12 deletions(-)
[Qemu-devel] [PATCH v2] Add AVX, AVX-512, MPX support to x86_cpu_dump_state
Posted by Doug Gale 6 years, 3 months ago
Signed-off-by: Doug Gale <doug16k@gmail.com>
---
Fix MSB LSB showing when SSE is disabled
 target/i386/helper.c | 95 +++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 83 insertions(+), 12 deletions(-)

diff --git a/target/i386/helper.c b/target/i386/helper.c
index f63eb3d3f4..03812b6e87 100644
--- a/target/i386/helper.c
+++ b/target/i386/helper.c
@@ -543,6 +543,7 @@ void x86_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
         }
     }
     cpu_fprintf(f, "EFER=%016" PRIx64 "\n", env->efer);
+    cpu_fprintf(f, "XCR0=%016" PRIx64 "\n", env->xcr0);
     if (flags & CPU_DUMP_FPU) {
         int fptag;
         fptag = 0;
@@ -565,21 +566,91 @@ void x86_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
             else
                 cpu_fprintf(f, " ");
         }
-        if (env->hflags & HF_CS64_MASK)
-            nb = 16;
-        else
+
+        if (env->hflags & HF_CS64_MASK) {
+            if (env->xcr0 & XSTATE_Hi16_ZMM_MASK) {
+                /* AVX-512 32 registers enabled */
+                nb = 32;
+            } else {
+                /* 64-bit mode, 16 registers */
+                nb = 16;
+            }
+        } else {
+            /* 32 bit mode, 8 registers */
             nb = 8;
-        for(i=0;i<nb;i++) {
-            cpu_fprintf(f, "XMM%02d=%08x%08x%08x%08x",
-                        i,
-                        env->xmm_regs[i].ZMM_L(3),
-                        env->xmm_regs[i].ZMM_L(2),
-                        env->xmm_regs[i].ZMM_L(1),
-                        env->xmm_regs[i].ZMM_L(0));
-            if ((i & 1) == 1)
+        }
+
+        /* sse register width in units of 64 bits */
+        int zmm_width;
+        char zmm_name;
+        if (env->xcr0 & XSTATE_ZMM_Hi256_MASK) {
+            /* 512-bit "ZMM" - AVX-512 registers enabled */
+            zmm_width = 8;
+            zmm_name = 'Z';
+        } else if (env->xcr0 & XSTATE_YMM_MASK) {
+            /* 256-bit "YMM" - AVX enabled */
+            zmm_width = 4;
+            zmm_name = 'Y';
+        } else if (env->cr[4] & CR4_OSFXSR_MASK) {
+            /* 128-bit "XMM" - SSE enabled */
+            zmm_width = 2;
+            zmm_name = 'X';
+        } else {
+            /* SSE not enabled */
+            zmm_width = 0;
+            zmm_name = 0;
+        }
+
+        if (zmm_width > 0) {
+            cpu_fprintf(f, "      MSB%*sLSB\n",
+                        -(zmm_width * 16 + zmm_width - 7), "");
+        }
+
+        for (i = 0; i < nb; i++) {
+            if (zmm_width == 0) {
+                cpu_fprintf(f, "SSE not enabled\n");
+                break;
+            }
+
+            cpu_fprintf(f, "%cMM%02d=", zmm_name, i);
+            int qw;
+            for (qw = zmm_width; qw > 0; qw -= 2) {
+                /* ':' separator every 64 bits */
+                cpu_fprintf(f, "%016" PRIx64 ":%016" PRIx64 "%s",
+                            env->xmm_regs[i].ZMM_Q(qw - 1),
+                            env->xmm_regs[i].ZMM_Q(qw - 2),
+                            qw > 2 ? ":" : "");
+            }
+
+            /* two registers per line for 128-bit registers */
+            if (zmm_width > 2 || (i & 1)) {
                 cpu_fprintf(f, "\n");
-            else
+            } else {
                 cpu_fprintf(f, " ");
+            }
+        }
+
+        if (env->xcr0 & XSTATE_OPMASK_MASK) {
+            /* AVX-512 opmask registers */
+            for (i = 0; i < 8; ++i) {
+                cpu_fprintf(f, "K%d=%08" PRIx64 "%s", i, env->opmask_regs[i],
+                           (i & 3) == 3 ? "\n" : " ");
+            }
+        }
+
+        if (env->xcr0 & XSTATE_BNDREGS_MASK) {
+            /* MPX bound registers */
+            for (i = 0; i < 4; ++i) {
+                cpu_fprintf(f, "BND%d=%016" PRIx64 ":%016" PRIx64 "%s",
+                            i, env->bnd_regs[i].ub, env->bnd_regs[i].lb,
+                            (i & 1) ? "\n" : " ");
+            }
+        }
+
+        if (env->xcr0 & XSTATE_BNDCSR_MASK) {
+            /* MPX bound status/config registers */
+            cpu_fprintf(f, "BNDCFGU=%016" PRIx64 " BNDSTATUS=%016" PRIx64 "\n",
+                        env->bndcs_regs.cfgu, env->bndcs_regs.sts);
         }
     }
     if (flags & CPU_DUMP_CODE) {
-- 
2.14.1


Re: [Qemu-devel] [PATCH v2] Add AVX, AVX-512, MPX support to x86_cpu_dump_state
Posted by Richard Henderson 6 years, 3 months ago
On 12/02/2017 10:35 PM, Doug Gale wrote:
> Signed-off-by: Doug Gale <doug16k@gmail.com>
> ---
> Fix MSB LSB showing when SSE is disabled
>  target/i386/helper.c | 95 +++++++++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 83 insertions(+), 12 deletions(-)
> 
> diff --git a/target/i386/helper.c b/target/i386/helper.c
> index f63eb3d3f4..03812b6e87 100644
> --- a/target/i386/helper.c
> +++ b/target/i386/helper.c
> @@ -543,6 +543,7 @@ void x86_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
>          }
>      }
>      cpu_fprintf(f, "EFER=%016" PRIx64 "\n", env->efer);
> +    cpu_fprintf(f, "XCR0=%016" PRIx64 "\n", env->xcr0);
>      if (flags & CPU_DUMP_FPU) {
>          int fptag;
>          fptag = 0;
> @@ -565,21 +566,91 @@ void x86_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
>              else
>                  cpu_fprintf(f, " ");
>          }
> -        if (env->hflags & HF_CS64_MASK)
> -            nb = 16;
> -        else
> +
> +        if (env->hflags & HF_CS64_MASK) {
> +            if (env->xcr0 & XSTATE_Hi16_ZMM_MASK) {
> +                /* AVX-512 32 registers enabled */
> +                nb = 32;
> +            } else {
> +                /* 64-bit mode, 16 registers */
> +                nb = 16;
> +            }
> +        } else {
> +            /* 32 bit mode, 8 registers */
>              nb = 8;
> -        for(i=0;i<nb;i++) {
> -            cpu_fprintf(f, "XMM%02d=%08x%08x%08x%08x",
> -                        i,
> -                        env->xmm_regs[i].ZMM_L(3),
> -                        env->xmm_regs[i].ZMM_L(2),
> -                        env->xmm_regs[i].ZMM_L(1),
> -                        env->xmm_regs[i].ZMM_L(0));
> -            if ((i & 1) == 1)
> +        }
> +
> +        /* sse register width in units of 64 bits */
> +        int zmm_width;
> +        char zmm_name;
> +        if (env->xcr0 & XSTATE_ZMM_Hi256_MASK) {
> +            /* 512-bit "ZMM" - AVX-512 registers enabled */
> +            zmm_width = 8;
> +            zmm_name = 'Z';
> +        } else if (env->xcr0 & XSTATE_YMM_MASK) {
> +            /* 256-bit "YMM" - AVX enabled */
> +            zmm_width = 4;
> +            zmm_name = 'Y';
> +        } else if (env->cr[4] & CR4_OSFXSR_MASK) {
> +            /* 128-bit "XMM" - SSE enabled */
> +            zmm_width = 2;
> +            zmm_name = 'X';
> +        } else {
> +            /* SSE not enabled */
> +            zmm_width = 0;
> +            zmm_name = 0;

You should exit the function here ...

> +        }
> +
> +        if (zmm_width > 0) {
> +            cpu_fprintf(f, "      MSB%*sLSB\n",
> +                        -(zmm_width * 16 + zmm_width - 7), "");
> +        }
> +
> +        for (i = 0; i < nb; i++) {
> +            if (zmm_width == 0) {
> +                cpu_fprintf(f, "SSE not enabled\n");
> +                break;
> +            }

... rather than performing this test in a loop.
Or indeed, printing anything at all.

I assume this is in order to examine registers in the monitor under kvm?


r~

Re: [Qemu-devel] [PATCH v2] Add AVX, AVX-512, MPX support to x86_cpu_dump_state
Posted by Doug Gale 6 years, 3 months ago
On Thu, Dec 7, 2017 at 5:37 PM, Richard Henderson <
richard.henderson@linaro.org> wrote:

> On 12/02/2017 10:35 PM, Doug Gale wrote:
> > Signed-off-by: Doug Gale <doug16k@gmail.com>
> > ---
> > Fix MSB LSB showing when SSE is disabled
> >  target/i386/helper.c | 95 ++++++++++++++++++++++++++++++
> +++++++++++++++-------
> >  1 file changed, 83 insertions(+), 12 deletions(-)
> >
> > diff --git a/target/i386/helper.c b/target/i386/helper.c
> > index f63eb3d3f4..03812b6e87 100644
> > --- a/target/i386/helper.c
> > +++ b/target/i386/helper.c
> > @@ -543,6 +543,7 @@ void x86_cpu_dump_state(CPUState *cs, FILE *f,
> fprintf_function cpu_fprintf,
> >          }
> >      }
> >      cpu_fprintf(f, "EFER=%016" PRIx64 "\n", env->efer);
> > +    cpu_fprintf(f, "XCR0=%016" PRIx64 "\n", env->xcr0);
> >      if (flags & CPU_DUMP_FPU) {
> >          int fptag;
> >          fptag = 0;
> > @@ -565,21 +566,91 @@ void x86_cpu_dump_state(CPUState *cs, FILE *f,
> fprintf_function cpu_fprintf,
> >              else
> >                  cpu_fprintf(f, " ");
> >          }
> > -        if (env->hflags & HF_CS64_MASK)
> > -            nb = 16;
> > -        else
> > +
> > +        if (env->hflags & HF_CS64_MASK) {
> > +            if (env->xcr0 & XSTATE_Hi16_ZMM_MASK) {
> > +                /* AVX-512 32 registers enabled */
> > +                nb = 32;
> > +            } else {
> > +                /* 64-bit mode, 16 registers */
> > +                nb = 16;
> > +            }
> > +        } else {
> > +            /* 32 bit mode, 8 registers */
> >              nb = 8;
> > -        for(i=0;i<nb;i++) {
> > -            cpu_fprintf(f, "XMM%02d=%08x%08x%08x%08x",
> > -                        i,
> > -                        env->xmm_regs[i].ZMM_L(3),
> > -                        env->xmm_regs[i].ZMM_L(2),
> > -                        env->xmm_regs[i].ZMM_L(1),
> > -                        env->xmm_regs[i].ZMM_L(0));
> > -            if ((i & 1) == 1)
> > +        }
> > +
> > +        /* sse register width in units of 64 bits */
> > +        int zmm_width;
> > +        char zmm_name;
> > +        if (env->xcr0 & XSTATE_ZMM_Hi256_MASK) {
> > +            /* 512-bit "ZMM" - AVX-512 registers enabled */
> > +            zmm_width = 8;
> > +            zmm_name = 'Z';
> > +        } else if (env->xcr0 & XSTATE_YMM_MASK) {
> > +            /* 256-bit "YMM" - AVX enabled */
> > +            zmm_width = 4;
> > +            zmm_name = 'Y';
> > +        } else if (env->cr[4] & CR4_OSFXSR_MASK) {
> > +            /* 128-bit "XMM" - SSE enabled */
> > +            zmm_width = 2;
> > +            zmm_name = 'X';
> > +        } else {
> > +            /* SSE not enabled */
> > +            zmm_width = 0;
> > +            zmm_name = 0;
>
> You should exit the function here ...
>
> > +        }
> > +
> > +        if (zmm_width > 0) {
> > +            cpu_fprintf(f, "      MSB%*sLSB\n",
> > +                        -(zmm_width * 16 + zmm_width - 7), "");
> > +        }
> > +
> > +        for (i = 0; i < nb; i++) {
> > +            if (zmm_width == 0) {
> > +                cpu_fprintf(f, "SSE not enabled\n");
> > +                break;
> > +            }
>
> ... rather than performing this test in a loop.
> Or indeed, printing anything at all.
>
> I assume this is in order to examine registers in the monitor under kvm?
>
>
> r~
>

​
Yes, to examine the registers in the monitor and improve the output of
things like -d int. I ran into an issue with
​ guest​
code and I wanted to check XCR0 to see if AVX was enabled, and I didn't see
any AVX registers
​, which was very misleading​
.
​ I couldn't check XCR0 to see if it was enabled either.​
I investigated and found that AVX was enabled, and the monitor output was
false. I added XCR0 and the entire AVX
​/AVX-512​
and MPX state to the
​qemu ​
output.

Why would it exit the function there? Are you sure the other states I
further down won't be enabled? It prints the MPX state further down. I put
the test in the loop so I wouldn't have to indent that whole `for` loop
another level, to improve readability. Is the performance of that loop
really relevant? The fprintf's will take at least thousands of times more
CPU time than that predictable branch.​
Re: [Qemu-devel] [PATCH v2] Add AVX, AVX-512, MPX support to x86_cpu_dump_state
Posted by Dr. David Alan Gilbert 6 years, 2 months ago
* Doug Gale (doug16k@gmail.com) wrote:
> Signed-off-by: Doug Gale <doug16k@gmail.com>

Hi Doug,
  Had you ever seen the top half of YMM* be none-zero?
All my tests with this are showing it as zero even though in the guest
I'm seeing some cases of gdb being non-zero.
(I'm starting to suspect it's not the printing code)

Dave

> ---
> Fix MSB LSB showing when SSE is disabled
>  target/i386/helper.c | 95 +++++++++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 83 insertions(+), 12 deletions(-)
> 
> diff --git a/target/i386/helper.c b/target/i386/helper.c
> index f63eb3d3f4..03812b6e87 100644
> --- a/target/i386/helper.c
> +++ b/target/i386/helper.c
> @@ -543,6 +543,7 @@ void x86_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
>          }
>      }
>      cpu_fprintf(f, "EFER=%016" PRIx64 "\n", env->efer);
> +    cpu_fprintf(f, "XCR0=%016" PRIx64 "\n", env->xcr0);
>      if (flags & CPU_DUMP_FPU) {
>          int fptag;
>          fptag = 0;
> @@ -565,21 +566,91 @@ void x86_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
>              else
>                  cpu_fprintf(f, " ");
>          }
> -        if (env->hflags & HF_CS64_MASK)
> -            nb = 16;
> -        else
> +
> +        if (env->hflags & HF_CS64_MASK) {
> +            if (env->xcr0 & XSTATE_Hi16_ZMM_MASK) {
> +                /* AVX-512 32 registers enabled */
> +                nb = 32;
> +            } else {
> +                /* 64-bit mode, 16 registers */
> +                nb = 16;
> +            }
> +        } else {
> +            /* 32 bit mode, 8 registers */
>              nb = 8;
> -        for(i=0;i<nb;i++) {
> -            cpu_fprintf(f, "XMM%02d=%08x%08x%08x%08x",
> -                        i,
> -                        env->xmm_regs[i].ZMM_L(3),
> -                        env->xmm_regs[i].ZMM_L(2),
> -                        env->xmm_regs[i].ZMM_L(1),
> -                        env->xmm_regs[i].ZMM_L(0));
> -            if ((i & 1) == 1)
> +        }
> +
> +        /* sse register width in units of 64 bits */
> +        int zmm_width;
> +        char zmm_name;
> +        if (env->xcr0 & XSTATE_ZMM_Hi256_MASK) {
> +            /* 512-bit "ZMM" - AVX-512 registers enabled */
> +            zmm_width = 8;
> +            zmm_name = 'Z';
> +        } else if (env->xcr0 & XSTATE_YMM_MASK) {
> +            /* 256-bit "YMM" - AVX enabled */
> +            zmm_width = 4;
> +            zmm_name = 'Y';
> +        } else if (env->cr[4] & CR4_OSFXSR_MASK) {
> +            /* 128-bit "XMM" - SSE enabled */
> +            zmm_width = 2;
> +            zmm_name = 'X';
> +        } else {
> +            /* SSE not enabled */
> +            zmm_width = 0;
> +            zmm_name = 0;
> +        }
> +
> +        if (zmm_width > 0) {
> +            cpu_fprintf(f, "      MSB%*sLSB\n",
> +                        -(zmm_width * 16 + zmm_width - 7), "");
> +        }
> +
> +        for (i = 0; i < nb; i++) {
> +            if (zmm_width == 0) {
> +                cpu_fprintf(f, "SSE not enabled\n");
> +                break;
> +            }
> +
> +            cpu_fprintf(f, "%cMM%02d=", zmm_name, i);
> +            int qw;
> +            for (qw = zmm_width; qw > 0; qw -= 2) {
> +                /* ':' separator every 64 bits */
> +                cpu_fprintf(f, "%016" PRIx64 ":%016" PRIx64 "%s",
> +                            env->xmm_regs[i].ZMM_Q(qw - 1),
> +                            env->xmm_regs[i].ZMM_Q(qw - 2),
> +                            qw > 2 ? ":" : "");
> +            }
> +
> +            /* two registers per line for 128-bit registers */
> +            if (zmm_width > 2 || (i & 1)) {
>                  cpu_fprintf(f, "\n");
> -            else
> +            } else {
>                  cpu_fprintf(f, " ");
> +            }
> +        }
> +
> +        if (env->xcr0 & XSTATE_OPMASK_MASK) {
> +            /* AVX-512 opmask registers */
> +            for (i = 0; i < 8; ++i) {
> +                cpu_fprintf(f, "K%d=%08" PRIx64 "%s", i, env->opmask_regs[i],
> +                           (i & 3) == 3 ? "\n" : " ");
> +            }
> +        }
> +
> +        if (env->xcr0 & XSTATE_BNDREGS_MASK) {
> +            /* MPX bound registers */
> +            for (i = 0; i < 4; ++i) {
> +                cpu_fprintf(f, "BND%d=%016" PRIx64 ":%016" PRIx64 "%s",
> +                            i, env->bnd_regs[i].ub, env->bnd_regs[i].lb,
> +                            (i & 1) ? "\n" : " ");
> +            }
> +        }
> +
> +        if (env->xcr0 & XSTATE_BNDCSR_MASK) {
> +            /* MPX bound status/config registers */
> +            cpu_fprintf(f, "BNDCFGU=%016" PRIx64 " BNDSTATUS=%016" PRIx64 "\n",
> +                        env->bndcs_regs.cfgu, env->bndcs_regs.sts);
>          }
>      }
>      if (flags & CPU_DUMP_CODE) {
> -- 
> 2.14.1
> 
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK