[PATCH v2] target/arm/arch_dump: Add SVE notes

Andrew Jones posted 1 patch 4 years, 6 months ago
Test docker-mingw@fedora passed
Test checkpatch failed
Test docker-clang@ubuntu failed
Test asan passed
Test docker-quick@centos7 passed
Test FreeBSD passed
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20191004120313.5347-1-drjones@redhat.com
Maintainers: Peter Maydell <peter.maydell@linaro.org>
There is a newer version of this series
include/elf.h          |   2 +
target/arm/arch_dump.c | 135 ++++++++++++++++++++++++++++++++++++++++-
2 files changed, 135 insertions(+), 2 deletions(-)
[PATCH v2] target/arm/arch_dump: Add SVE notes
Posted by Andrew Jones 4 years, 6 months ago
When dumping a guest with dump-guest-memory also dump the SVE
registers if they are in use.

Signed-off-by: Andrew Jones <drjones@redhat.com>
---
 include/elf.h          |   2 +
 target/arm/arch_dump.c | 135 ++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 135 insertions(+), 2 deletions(-)

diff --git a/include/elf.h b/include/elf.h
index 3501e0c8d03a..a7c357af74ca 100644
--- a/include/elf.h
+++ b/include/elf.h
@@ -1650,6 +1650,8 @@ typedef struct elf64_shdr {
 #define NT_ARM_HW_BREAK 0x402           /* ARM hardware breakpoint registers */
 #define NT_ARM_HW_WATCH 0x403           /* ARM hardware watchpoint registers */
 #define NT_ARM_SYSTEM_CALL      0x404   /* ARM system call number */
+#define NT_ARM_SVE	0x405		/* ARM Scalable Vector Extension
+					   registers */
 
 /*
  * Physical entry point into the kernel.
diff --git a/target/arm/arch_dump.c b/target/arm/arch_dump.c
index 26a2c098687c..b02e398430b9 100644
--- a/target/arm/arch_dump.c
+++ b/target/arm/arch_dump.c
@@ -62,12 +62,23 @@ struct aarch64_user_vfp_state {
 
 QEMU_BUILD_BUG_ON(sizeof(struct aarch64_user_vfp_state) != 528);
 
+/* struct user_sve_header from arch/arm64/include/uapi/asm/ptrace.h */
+struct aarch64_user_sve_header {
+    uint32_t size;
+    uint32_t max_size;
+    uint16_t vl;
+    uint16_t max_vl;
+    uint16_t flags;
+    uint16_t reserved;
+} QEMU_PACKED;
+
 struct aarch64_note {
     Elf64_Nhdr hdr;
     char name[8]; /* align_up(sizeof("CORE"), 4) */
     union {
         struct aarch64_elf_prstatus prstatus;
         struct aarch64_user_vfp_state vfp;
+        struct aarch64_user_sve_header sve;
     };
 } QEMU_PACKED;
 
@@ -76,6 +87,8 @@ struct aarch64_note {
             (AARCH64_NOTE_HEADER_SIZE + sizeof(struct aarch64_elf_prstatus))
 #define AARCH64_PRFPREG_NOTE_SIZE \
             (AARCH64_NOTE_HEADER_SIZE + sizeof(struct aarch64_user_vfp_state))
+#define AARCH64_SVE_NOTE_SIZE(env) \
+            (AARCH64_NOTE_HEADER_SIZE + sve_size(env))
 
 static void aarch64_note_init(struct aarch64_note *note, DumpState *s,
                               const char *name, Elf64_Word namesz,
@@ -128,11 +141,113 @@ static int aarch64_write_elf64_prfpreg(WriteCoreDumpFunction f,
     return 0;
 }
 
+#ifdef TARGET_AARCH64
+static off_t sve_zreg_offset(uint32_t vq, int n)
+{
+    off_t off = sizeof(struct aarch64_user_sve_header);
+    return ROUND_UP(off, 16) + vq * 16 * n;
+}
+static off_t sve_preg_offset(uint32_t vq, int n)
+{
+    return sve_zreg_offset(vq, 32) + vq * 16 / 8 * n;
+}
+static off_t sve_fpsr_offset(uint32_t vq)
+{
+    off_t off = sve_preg_offset(vq, 17) + offsetof(struct aarch64_note, sve);
+    return ROUND_UP(off, 16) - offsetof(struct aarch64_note, sve);
+}
+static off_t sve_fpcr_offset(uint32_t vq)
+{
+    return sve_fpsr_offset(vq) + sizeof(uint32_t);
+}
+static uint32_t sve_current_vq(CPUARMState *env)
+{
+    return sve_zcr_len_for_el(env, arm_current_el(env)) + 1;
+}
+static size_t sve_size_vq(uint32_t vq)
+{
+    off_t off = sve_fpcr_offset(vq) + sizeof(uint32_t) +
+                offsetof(struct aarch64_note, sve);
+    return ROUND_UP(off, 16) - offsetof(struct aarch64_note, sve);
+}
+static size_t sve_size(CPUARMState *env)
+{
+    return sve_size_vq(sve_current_vq(env));
+}
+
+static int aarch64_write_elf64_sve(WriteCoreDumpFunction f,
+                                   CPUARMState *env, int cpuid,
+                                   DumpState *s)
+{
+    struct aarch64_note *note;
+    ARMCPU *cpu = env_archcpu(env);
+    uint32_t vq = sve_current_vq(env);
+    uint32_t fpr;
+    uint8_t *buf;
+    int ret, i;
+
+    note = g_malloc0(AARCH64_SVE_NOTE_SIZE(env));
+    buf = (uint8_t *)&note->sve;
+
+    aarch64_note_init(note, s, "LINUX", 6, NT_ARM_SVE, sve_size_vq(vq));
+
+    note->sve.size = cpu_to_dump32(s, sve_size_vq(vq));
+    note->sve.max_size = cpu_to_dump32(s, sve_size_vq(cpu->sve_max_vq));
+    note->sve.vl = cpu_to_dump16(s, vq * 16);
+    note->sve.max_vl = cpu_to_dump16(s, cpu->sve_max_vq * 16);
+    note->sve.flags = cpu_to_dump16(s, 1);
+
+    for (i = 0; i < 32; ++i) {
+#ifdef HOST_WORDS_BIGENDIAN
+        uint64_t d[vq * 2];
+        int j;
+
+        for (j = 0; j < vq * 2; ++j) {
+            d[j] = bswap64(env->vfp.zregs[i].d[j]);
+        }
+#else
+        uint64_t *d = &env->vfp.zregs[i].d[0];
+#endif
+        memcpy(&buf[sve_zreg_offset(vq, i)], &d[0], vq * 16);
+    }
+
+    for (i = 0; i < 17; ++i) {
+#ifdef HOST_WORDS_BIGENDIAN
+        uint64_t d[DIV_ROUND_UP(vq * 2, 8)];
+        int j;
+
+        for (j = 0; j < DIV_ROUND_UP(vq * 2, 8); ++j) {
+            d[j] = bswap64(env->vfp.pregs[i].p[j]);
+        }
+#else
+        uint64_t *d = &env->vfp.pregs[i].p[0];
+#endif
+        memcpy(&buf[sve_preg_offset(vq, i)], &d[0], vq * 16 / 8);
+    }
+
+    fpr = cpu_to_dump32(s, vfp_get_fpsr(env));
+    memcpy(&buf[sve_fpsr_offset(vq)], &fpr, sizeof(uint32_t));
+
+    fpr = cpu_to_dump32(s, vfp_get_fpcr(env));
+    memcpy(&buf[sve_fpcr_offset(vq)], &fpr, sizeof(uint32_t));
+
+    ret = f(note, AARCH64_SVE_NOTE_SIZE(env), s);
+    g_free(note);
+
+    if (ret < 0) {
+        return -1;
+    }
+
+    return 0;
+}
+#endif
+
 int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
                              int cpuid, void *opaque)
 {
     struct aarch64_note note;
-    CPUARMState *env = &ARM_CPU(cs)->env;
+    ARMCPU *cpu = ARM_CPU(cs);
+    CPUARMState *env = &cpu->env;
     DumpState *s = opaque;
     uint64_t pstate, sp;
     int ret, i;
@@ -163,7 +278,18 @@ int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
         return -1;
     }
 
-    return aarch64_write_elf64_prfpreg(f, env, cpuid, s);
+    ret = aarch64_write_elf64_prfpreg(f, env, cpuid, s);
+    if (ret) {
+        return ret;
+    }
+
+#ifdef TARGET_AARCH64
+    if (cpu_isar_feature(aa64_sve, cpu)) {
+        ret = aarch64_write_elf64_sve(f, env, cpuid, s);
+    }
+#endif
+
+    return ret;
 }
 
 /* struct pt_regs from arch/arm/include/asm/ptrace.h */
@@ -335,6 +461,11 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus)
     if (class == ELFCLASS64) {
         note_size = AARCH64_PRSTATUS_NOTE_SIZE;
         note_size += AARCH64_PRFPREG_NOTE_SIZE;
+#ifdef TARGET_AARCH64
+        if (cpu_isar_feature(aa64_sve, cpu)) {
+            note_size += AARCH64_SVE_NOTE_SIZE(env);
+        }
+#endif
     } else {
         note_size = ARM_PRSTATUS_NOTE_SIZE;
         if (arm_feature(env, ARM_FEATURE_VFP)) {
-- 
2.20.1


Re: [PATCH v2] target/arm/arch_dump: Add SVE notes
Posted by Richard Henderson 4 years, 6 months ago
On 10/4/19 8:03 AM, Andrew Jones wrote:
> +#ifdef TARGET_AARCH64
> +static off_t sve_zreg_offset(uint32_t vq, int n)
> +{
> +    off_t off = sizeof(struct aarch64_user_sve_header);
> +    return ROUND_UP(off, 16) + vq * 16 * n;
> +}
> +static off_t sve_preg_offset(uint32_t vq, int n)
> +{
> +    return sve_zreg_offset(vq, 32) + vq * 16 / 8 * n;
> +}
> +static off_t sve_fpsr_offset(uint32_t vq)
> +{
> +    off_t off = sve_preg_offset(vq, 17) + offsetof(struct aarch64_note, sve);
> +    return ROUND_UP(off, 16) - offsetof(struct aarch64_note, sve);
> +}
> +static off_t sve_fpcr_offset(uint32_t vq)
> +{
> +    return sve_fpsr_offset(vq) + sizeof(uint32_t);
> +}
> +static uint32_t sve_current_vq(CPUARMState *env)
> +{
> +    return sve_zcr_len_for_el(env, arm_current_el(env)) + 1;
> +}
> +static size_t sve_size_vq(uint32_t vq)
> +{
> +    off_t off = sve_fpcr_offset(vq) + sizeof(uint32_t) +
> +                offsetof(struct aarch64_note, sve);
> +    return ROUND_UP(off, 16) - offsetof(struct aarch64_note, sve);
> +}
> +static size_t sve_size(CPUARMState *env)
> +{
> +    return sve_size_vq(sve_current_vq(env));
> +}

Watch the missing spaces between functions.

> +    for (i = 0; i < 32; ++i) {
> +#ifdef HOST_WORDS_BIGENDIAN
> +        uint64_t d[vq * 2];
> +        int j;
> +
> +        for (j = 0; j < vq * 2; ++j) {
> +            d[j] = bswap64(env->vfp.zregs[i].d[j]);
> +        }
> +#else
> +        uint64_t *d = &env->vfp.zregs[i].d[0];
> +#endif
> +        memcpy(&buf[sve_zreg_offset(vq, i)], &d[0], vq * 16);
> +    }

We should avoid the variable sized array here.

It might be best to avoid the ifdef altogether:

    for (i = 0; i < 32; ++i) {
        uint64_t *d = (uint64_t *)&buf[sve_zreg_offset(vq, i)];
        for (j = 0; j < vq * 2; ++j) {
            d[j] = cpu_to_le64(env->vfp.zregs[i].d[j]);
        }
    }

The compiler may well transform the inner loop to memcpy for little-endian
host, but even if it doesn't core dumping is hardly performance sensitive.



r~

Re: [PATCH v2] target/arm/arch_dump: Add SVE notes
Posted by Andrew Jones 4 years, 6 months ago
On Wed, Oct 09, 2019 at 08:39:21PM -0400, Richard Henderson wrote:
> On 10/4/19 8:03 AM, Andrew Jones wrote:
> > +#ifdef TARGET_AARCH64
> > +static off_t sve_zreg_offset(uint32_t vq, int n)
> > +{
> > +    off_t off = sizeof(struct aarch64_user_sve_header);
> > +    return ROUND_UP(off, 16) + vq * 16 * n;
> > +}
> > +static off_t sve_preg_offset(uint32_t vq, int n)
> > +{
> > +    return sve_zreg_offset(vq, 32) + vq * 16 / 8 * n;
> > +}
> > +static off_t sve_fpsr_offset(uint32_t vq)
> > +{
> > +    off_t off = sve_preg_offset(vq, 17) + offsetof(struct aarch64_note, sve);
> > +    return ROUND_UP(off, 16) - offsetof(struct aarch64_note, sve);
> > +}
> > +static off_t sve_fpcr_offset(uint32_t vq)
> > +{
> > +    return sve_fpsr_offset(vq) + sizeof(uint32_t);
> > +}
> > +static uint32_t sve_current_vq(CPUARMState *env)
> > +{
> > +    return sve_zcr_len_for_el(env, arm_current_el(env)) + 1;
> > +}
> > +static size_t sve_size_vq(uint32_t vq)
> > +{
> > +    off_t off = sve_fpcr_offset(vq) + sizeof(uint32_t) +
> > +                offsetof(struct aarch64_note, sve);
> > +    return ROUND_UP(off, 16) - offsetof(struct aarch64_note, sve);
> > +}
> > +static size_t sve_size(CPUARMState *env)
> > +{
> > +    return sve_size_vq(sve_current_vq(env));
> > +}
> 
> Watch the missing spaces between functions.

I'll put in the blank lines

> 
> > +    for (i = 0; i < 32; ++i) {
> > +#ifdef HOST_WORDS_BIGENDIAN
> > +        uint64_t d[vq * 2];
> > +        int j;
> > +
> > +        for (j = 0; j < vq * 2; ++j) {
> > +            d[j] = bswap64(env->vfp.zregs[i].d[j]);
> > +        }
> > +#else
> > +        uint64_t *d = &env->vfp.zregs[i].d[0];
> > +#endif
> > +        memcpy(&buf[sve_zreg_offset(vq, i)], &d[0], vq * 16);
> > +    }
> 
> We should avoid the variable sized array here.
> 
> It might be best to avoid the ifdef altogether:
> 
>     for (i = 0; i < 32; ++i) {
>         uint64_t *d = (uint64_t *)&buf[sve_zreg_offset(vq, i)];
>         for (j = 0; j < vq * 2; ++j) {
>             d[j] = cpu_to_le64(env->vfp.zregs[i].d[j]);
>         }
>     }
> 
> The compiler may well transform the inner loop to memcpy for little-endian
> host, but even if it doesn't core dumping is hardly performance sensitive.

True. I even had something like the above at first, but then
overcomplicated it with the #ifdef-ing.

Thanks,
drew

Re: [PATCH v2] target/arm/arch_dump: Add SVE notes
Posted by Richard Henderson 4 years, 6 months ago
On 10/10/19 2:16 AM, Andrew Jones wrote:
>> It might be best to avoid the ifdef altogether:
>>
>>     for (i = 0; i < 32; ++i) {
>>         uint64_t *d = (uint64_t *)&buf[sve_zreg_offset(vq, i)];
>>         for (j = 0; j < vq * 2; ++j) {
>>             d[j] = cpu_to_le64(env->vfp.zregs[i].d[j]);
>>         }
>>     }
>>
>> The compiler may well transform the inner loop to memcpy for little-endian
>> host, but even if it doesn't core dumping is hardly performance sensitive.
> 
> True. I even had something like the above at first, but then
> overcomplicated it with the #ifdef-ing.

Ah, I wonder if you changed things around with the ifdefs due to the pregs.
There's no trivial solution for those.  It'd be nice to share the bswapping
subroutine that you add in the SVE KVM patch set, and size the temporary array
using ARM_MAX_VQ.


r~

Re: [PATCH v2] target/arm/arch_dump: Add SVE notes
Posted by Andrew Jones 4 years, 6 months ago
On Thu, Oct 10, 2019 at 01:33:02PM -0400, Richard Henderson wrote:
> On 10/10/19 2:16 AM, Andrew Jones wrote:
> >> It might be best to avoid the ifdef altogether:
> >>
> >>     for (i = 0; i < 32; ++i) {
> >>         uint64_t *d = (uint64_t *)&buf[sve_zreg_offset(vq, i)];
> >>         for (j = 0; j < vq * 2; ++j) {
> >>             d[j] = cpu_to_le64(env->vfp.zregs[i].d[j]);
> >>         }
> >>     }
> >>
> >> The compiler may well transform the inner loop to memcpy for little-endian
> >> host, but even if it doesn't core dumping is hardly performance sensitive.
> > 
> > True. I even had something like the above at first, but then
> > overcomplicated it with the #ifdef-ing.
> 
> Ah, I wonder if you changed things around with the ifdefs due to the pregs.
> There's no trivial solution for those.  It'd be nice to share the bswapping
> subroutine that you add in the SVE KVM patch set, and size the temporary array
> using ARM_MAX_VQ.

How about something like this squashed in?

Thanks,
drew

diff --git a/target/arm/arch_dump.c b/target/arm/arch_dump.c
index b02e398430b9..7c7fd23f4d94 100644
--- a/target/arm/arch_dump.c
+++ b/target/arm/arch_dump.c
@@ -182,6 +182,7 @@ static int aarch64_write_elf64_sve(WriteCoreDumpFunction f,
     struct aarch64_note *note;
     ARMCPU *cpu = env_archcpu(env);
     uint32_t vq = sve_current_vq(env);
+    uint64_t tmp[ARM_MAX_VQ * 2], *r;
     uint32_t fpr;
     uint8_t *buf;
     int ret, i;
@@ -198,31 +199,14 @@ static int aarch64_write_elf64_sve(WriteCoreDumpFunction f,
     note->sve.flags = cpu_to_dump16(s, 1);
 
     for (i = 0; i < 32; ++i) {
-#ifdef HOST_WORDS_BIGENDIAN
-        uint64_t d[vq * 2];
-        int j;
-
-        for (j = 0; j < vq * 2; ++j) {
-            d[j] = bswap64(env->vfp.zregs[i].d[j]);
-        }
-#else
-        uint64_t *d = &env->vfp.zregs[i].d[0];
-#endif
-        memcpy(&buf[sve_zreg_offset(vq, i)], &d[0], vq * 16);
+        r = sve_bswap64(tmp, &env->vfp.zregs[i].d[0], vq * 2);
+        memcpy(&buf[sve_zreg_offset(vq, i)], r, vq * 16);
     }
 
     for (i = 0; i < 17; ++i) {
-#ifdef HOST_WORDS_BIGENDIAN
-        uint64_t d[DIV_ROUND_UP(vq * 2, 8)];
-        int j;
-
-        for (j = 0; j < DIV_ROUND_UP(vq * 2, 8); ++j) {
-            d[j] = bswap64(env->vfp.pregs[i].p[j]);
-        }
-#else
-        uint64_t *d = &env->vfp.pregs[i].p[0];
-#endif
-        memcpy(&buf[sve_preg_offset(vq, i)], &d[0], vq * 16 / 8);
+        r = sve_bswap64(tmp, r = &env->vfp.pregs[i].p[0],
+                        DIV_ROUND_UP(vq * 2, 8));
+        memcpy(&buf[sve_preg_offset(vq, i)], r, vq * 16 / 8);
     }
 
     fpr = cpu_to_dump32(s, vfp_get_fpsr(env));
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 5b9c3e4cd73d..b3092e5213e6 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -975,6 +975,31 @@ void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq);
 void aarch64_sve_change_el(CPUARMState *env, int old_el,
                            int new_el, bool el0_a64);
 void aarch64_add_sve_properties(Object *obj);
+
+/*
+ * SVE registers are encoded in KVM's memory in an endianness-invariant format.
+ * The byte at offset i from the start of the in-memory representation contains
+ * the bits [(7 + 8 * i) : (8 * i)] of the register value. As this means the
+ * lowest offsets are stored in the lowest memory addresses, then that nearly
+ * matches QEMU's representation, which is to use an array of host-endian
+ * uint64_t's, where the lower offsets are at the lower indices. To complete
+ * the translation we just need to byte swap the uint64_t's on big-endian hosts.
+ */
+static inline uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr)
+{
+#ifdef HOST_WORDS_BIGENDIAN
+    int i;
+
+    for (i = 0; i < nr; ++i) {
+        dst[i] = bswap64(src[i]);
+    }
+
+    return dst;
+#else
+    return src;
+#endif
+}
+
 #else
 static inline void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) { }
 static inline void aarch64_sve_change_el(CPUARMState *env, int o,
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
index 876184b8fe4d..e2da756e65ed 100644
--- a/target/arm/kvm64.c
+++ b/target/arm/kvm64.c
@@ -876,30 +876,6 @@ static int kvm_arch_put_fpsimd(CPUState *cs)
     return 0;
 }
 
-/*
- * SVE registers are encoded in KVM's memory in an endianness-invariant format.
- * The byte at offset i from the start of the in-memory representation contains
- * the bits [(7 + 8 * i) : (8 * i)] of the register value. As this means the
- * lowest offsets are stored in the lowest memory addresses, then that nearly
- * matches QEMU's representation, which is to use an array of host-endian
- * uint64_t's, where the lower offsets are at the lower indices. To complete
- * the translation we just need to byte swap the uint64_t's on big-endian hosts.
- */
-static uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr)
-{
-#ifdef HOST_WORDS_BIGENDIAN
-    int i;
-
-    for (i = 0; i < nr; ++i) {
-        dst[i] = bswap64(src[i]);
-    }
-
-    return dst;
-#else
-    return src;
-#endif
-}
-
 /*
  * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits
  * and PREGS and the FFR have a slice size of 256 bits. However we simply hard

Re: [PATCH v2] target/arm/arch_dump: Add SVE notes
Posted by Richard Henderson 4 years, 6 months ago
On 10/16/19 2:13 AM, Andrew Jones wrote:
> On Thu, Oct 10, 2019 at 01:33:02PM -0400, Richard Henderson wrote:
>> Ah, I wonder if you changed things around with the ifdefs due to the pregs.
>> There's no trivial solution for those.  It'd be nice to share the bswapping
>> subroutine that you add in the SVE KVM patch set, and size the temporary array
>> using ARM_MAX_VQ.
> 
> How about something like this squashed in?

Looks good.


r~


> 
> Thanks,
> drew
> 
> diff --git a/target/arm/arch_dump.c b/target/arm/arch_dump.c
> index b02e398430b9..7c7fd23f4d94 100644
> --- a/target/arm/arch_dump.c
> +++ b/target/arm/arch_dump.c
> @@ -182,6 +182,7 @@ static int aarch64_write_elf64_sve(WriteCoreDumpFunction f,
>      struct aarch64_note *note;
>      ARMCPU *cpu = env_archcpu(env);
>      uint32_t vq = sve_current_vq(env);
> +    uint64_t tmp[ARM_MAX_VQ * 2], *r;
>      uint32_t fpr;
>      uint8_t *buf;
>      int ret, i;
> @@ -198,31 +199,14 @@ static int aarch64_write_elf64_sve(WriteCoreDumpFunction f,
>      note->sve.flags = cpu_to_dump16(s, 1);
>  
>      for (i = 0; i < 32; ++i) {
> -#ifdef HOST_WORDS_BIGENDIAN
> -        uint64_t d[vq * 2];
> -        int j;
> -
> -        for (j = 0; j < vq * 2; ++j) {
> -            d[j] = bswap64(env->vfp.zregs[i].d[j]);
> -        }
> -#else
> -        uint64_t *d = &env->vfp.zregs[i].d[0];
> -#endif
> -        memcpy(&buf[sve_zreg_offset(vq, i)], &d[0], vq * 16);
> +        r = sve_bswap64(tmp, &env->vfp.zregs[i].d[0], vq * 2);
> +        memcpy(&buf[sve_zreg_offset(vq, i)], r, vq * 16);
>      }
>  
>      for (i = 0; i < 17; ++i) {
> -#ifdef HOST_WORDS_BIGENDIAN
> -        uint64_t d[DIV_ROUND_UP(vq * 2, 8)];
> -        int j;
> -
> -        for (j = 0; j < DIV_ROUND_UP(vq * 2, 8); ++j) {
> -            d[j] = bswap64(env->vfp.pregs[i].p[j]);
> -        }
> -#else
> -        uint64_t *d = &env->vfp.pregs[i].p[0];
> -#endif
> -        memcpy(&buf[sve_preg_offset(vq, i)], &d[0], vq * 16 / 8);
> +        r = sve_bswap64(tmp, r = &env->vfp.pregs[i].p[0],
> +                        DIV_ROUND_UP(vq * 2, 8));
> +        memcpy(&buf[sve_preg_offset(vq, i)], r, vq * 16 / 8);
>      }
>  
>      fpr = cpu_to_dump32(s, vfp_get_fpsr(env));
> diff --git a/target/arm/cpu.h b/target/arm/cpu.h
> index 5b9c3e4cd73d..b3092e5213e6 100644
> --- a/target/arm/cpu.h
> +++ b/target/arm/cpu.h
> @@ -975,6 +975,31 @@ void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq);
>  void aarch64_sve_change_el(CPUARMState *env, int old_el,
>                             int new_el, bool el0_a64);
>  void aarch64_add_sve_properties(Object *obj);
> +
> +/*
> + * SVE registers are encoded in KVM's memory in an endianness-invariant format.
> + * The byte at offset i from the start of the in-memory representation contains
> + * the bits [(7 + 8 * i) : (8 * i)] of the register value. As this means the
> + * lowest offsets are stored in the lowest memory addresses, then that nearly
> + * matches QEMU's representation, which is to use an array of host-endian
> + * uint64_t's, where the lower offsets are at the lower indices. To complete
> + * the translation we just need to byte swap the uint64_t's on big-endian hosts.
> + */
> +static inline uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr)
> +{
> +#ifdef HOST_WORDS_BIGENDIAN
> +    int i;
> +
> +    for (i = 0; i < nr; ++i) {
> +        dst[i] = bswap64(src[i]);
> +    }
> +
> +    return dst;
> +#else
> +    return src;
> +#endif
> +}
> +
>  #else
>  static inline void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) { }
>  static inline void aarch64_sve_change_el(CPUARMState *env, int o,
> diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
> index 876184b8fe4d..e2da756e65ed 100644
> --- a/target/arm/kvm64.c
> +++ b/target/arm/kvm64.c
> @@ -876,30 +876,6 @@ static int kvm_arch_put_fpsimd(CPUState *cs)
>      return 0;
>  }
>  
> -/*
> - * SVE registers are encoded in KVM's memory in an endianness-invariant format.
> - * The byte at offset i from the start of the in-memory representation contains
> - * the bits [(7 + 8 * i) : (8 * i)] of the register value. As this means the
> - * lowest offsets are stored in the lowest memory addresses, then that nearly
> - * matches QEMU's representation, which is to use an array of host-endian
> - * uint64_t's, where the lower offsets are at the lower indices. To complete
> - * the translation we just need to byte swap the uint64_t's on big-endian hosts.
> - */
> -static uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr)
> -{
> -#ifdef HOST_WORDS_BIGENDIAN
> -    int i;
> -
> -    for (i = 0; i < nr; ++i) {
> -        dst[i] = bswap64(src[i]);
> -    }
> -
> -    return dst;
> -#else
> -    return src;
> -#endif
> -}
> -
>  /*
>   * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits
>   * and PREGS and the FFR have a slice size of 256 bits. However we simply hard
> 


Re: [PATCH v2] target/arm/arch_dump: Add SVE notes
Posted by no-reply@patchew.org 4 years, 6 months ago
Patchew URL: https://patchew.org/QEMU/20191004120313.5347-1-drjones@redhat.com/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Type: series
Message-id: 20191004120313.5347-1-drjones@redhat.com
Subject: [PATCH v2] target/arm/arch_dump: Add SVE notes

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Switched to a new branch 'test'
9d21a9a target/arm/arch_dump: Add SVE notes

=== OUTPUT BEGIN ===
ERROR: code indent should never use tabs
#21: FILE: include/elf.h:1653:
+#define NT_ARM_SVE^I0x405^I^I/* ARM Scalable Vector Extension$

WARNING: Block comments use a leading /* on a separate line
#21: FILE: include/elf.h:1653:
+#define NT_ARM_SVE     0x405           /* ARM Scalable Vector Extension

ERROR: code indent should never use tabs
#22: FILE: include/elf.h:1654:
+^I^I^I^I^I   registers */$

WARNING: Block comments use * on subsequent lines
#22: FILE: include/elf.h:1654:
+#define NT_ARM_SVE     0x405           /* ARM Scalable Vector Extension
+                                          registers */

WARNING: Block comments use a trailing */ on a separate line
#22: FILE: include/elf.h:1654:
+                                          registers */

total: 2 errors, 3 warnings, 183 lines checked

Commit 9d21a9a5cba2 (target/arm/arch_dump: Add SVE notes) has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
=== OUTPUT END ===

Test command exited with code: 1


The full log is available at
http://patchew.org/logs/20191004120313.5347-1-drjones@redhat.com/testing.checkpatch/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-devel@redhat.com