[PATCH V3] arm/kvm: add support for MTE

Ganapatrao Kulkarni posted 1 patch 1 month ago
There is a newer version of this series
hw/arm/virt.c        | 72 ++++++++++++++++++++++++++------------------
target/arm/cpu.c     | 11 +++++--
target/arm/cpu.h     |  2 ++
target/arm/kvm.c     | 57 +++++++++++++++++++++++++++++++++++
target/arm/kvm_arm.h | 19 ++++++++++++
5 files changed, 129 insertions(+), 32 deletions(-)
[PATCH V3] arm/kvm: add support for MTE
Posted by Ganapatrao Kulkarni 1 month ago
Extend the 'mte' property for the virt machine to cover KVM as
well. For KVM, we don't allocate tag memory, but instead enable
the capability.

If MTE has been enabled, we need to disable migration, as we do not
yet have a way to migrate the tags as well. Therefore, MTE will stay
off with KVM unless requested explicitly.

This patch is rework of commit b320e21c48ce64853904bea6631c0158cc2ef227
which broke TCG since it made the TCG -cpu max
report the presence of MTE to the guest even if the board hadn't
enabled MTE by wiring up the tag RAM. This meant that if the guest
then tried to use MTE QEMU would segfault accessing the
non-existent tag RAM.

Signed-off-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com>
---

Changes since V2:
	Updated with review comments.

Changes since V1:
	Added code to enable MTE before reading register
id_aa64pfr1 (unmasked MTE bits).

This patch is boot tested on ARM64 with KVM and on X86 with TCG for mte=on
and default case(i.e, no mte).

 hw/arm/virt.c        | 72 ++++++++++++++++++++++++++------------------
 target/arm/cpu.c     | 11 +++++--
 target/arm/cpu.h     |  2 ++
 target/arm/kvm.c     | 57 +++++++++++++++++++++++++++++++++++
 target/arm/kvm_arm.h | 19 ++++++++++++
 5 files changed, 129 insertions(+), 32 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 7934b23651..a33af7d996 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2211,7 +2211,7 @@ static void machvirt_init(MachineState *machine)
         exit(1);
     }
 
-    if (vms->mte && (kvm_enabled() || hvf_enabled())) {
+    if (vms->mte && hvf_enabled()) {
         error_report("mach-virt: %s does not support providing "
                      "MTE to the guest CPU",
                      current_accel_name());
@@ -2281,39 +2281,51 @@ static void machvirt_init(MachineState *machine)
         }
 
         if (vms->mte) {
-            /* Create the memory region only once, but link to all cpus. */
-            if (!tag_sysmem) {
-                /*
-                 * The property exists only if MemTag is supported.
-                 * If it is, we must allocate the ram to back that up.
-                 */
-                if (!object_property_find(cpuobj, "tag-memory")) {
-                    error_report("MTE requested, but not supported "
-                                 "by the guest CPU");
-                    exit(1);
+            if (tcg_enabled()) {
+                /* Create the memory region only once, but link to all cpus. */
+                if (!tag_sysmem) {
+                    /*
+                     * The property exists only if MemTag is supported.
+                     * If it is, we must allocate the ram to back that up.
+                     */
+                    if (!object_property_find(cpuobj, "tag-memory")) {
+                        error_report("MTE requested, but not supported "
+                                     "by the guest CPU");
+                        exit(1);
+                    }
+
+                    tag_sysmem = g_new(MemoryRegion, 1);
+                    memory_region_init(tag_sysmem, OBJECT(machine),
+                                       "tag-memory", UINT64_MAX / 32);
+
+                    if (vms->secure) {
+                        secure_tag_sysmem = g_new(MemoryRegion, 1);
+                        memory_region_init(secure_tag_sysmem, OBJECT(machine),
+                                           "secure-tag-memory",
+                                           UINT64_MAX / 32);
+
+                        /* As with ram, secure-tag takes precedence over tag. */
+                        memory_region_add_subregion_overlap(secure_tag_sysmem,
+                                                            0, tag_sysmem, -1);
+                    }
                 }
 
-                tag_sysmem = g_new(MemoryRegion, 1);
-                memory_region_init(tag_sysmem, OBJECT(machine),
-                                   "tag-memory", UINT64_MAX / 32);
-
+                object_property_set_link(cpuobj, "tag-memory",
+                                         OBJECT(tag_sysmem), &error_abort);
                 if (vms->secure) {
-                    secure_tag_sysmem = g_new(MemoryRegion, 1);
-                    memory_region_init(secure_tag_sysmem, OBJECT(machine),
-                                       "secure-tag-memory", UINT64_MAX / 32);
-
-                    /* As with ram, secure-tag takes precedence over tag.  */
-                    memory_region_add_subregion_overlap(secure_tag_sysmem, 0,
-                                                        tag_sysmem, -1);
+                    object_property_set_link(cpuobj, "secure-tag-memory",
+                                             OBJECT(secure_tag_sysmem),
+                                             &error_abort);
                 }
-            }
-
-            object_property_set_link(cpuobj, "tag-memory", OBJECT(tag_sysmem),
-                                     &error_abort);
-            if (vms->secure) {
-                object_property_set_link(cpuobj, "secure-tag-memory",
-                                         OBJECT(secure_tag_sysmem),
-                                         &error_abort);
+            } else if (kvm_enabled()) {
+                if (!kvm_arm_mte_supported()) {
+                    error_report("MTE requested, but not supported by KVM");
+                    exit(1);
+                }
+                kvm_arm_enable_mte(cpuobj, &error_abort);
+            } else {
+                    error_report("MTE requested, but not supported ");
+                    exit(1);
             }
         }
 
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 19191c2391..8a2fc471ce 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -2390,14 +2390,21 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
 
 #ifndef CONFIG_USER_ONLY
         /*
-         * If we do not have tag-memory provided by the machine,
+         * If we do not have tag-memory provided by the TCG,
          * reduce MTE support to instructions enabled at EL0.
          * This matches Cortex-A710 BROADCASTMTE input being LOW.
          */
-        if (cpu->tag_memory == NULL) {
+        if (tcg_enabled() && cpu->tag_memory == NULL) {
             cpu->isar.id_aa64pfr1 =
                 FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 1);
         }
+
+        /*
+         * Clear MTE bits, if not enabled in KVM mode.
+         */
+        if (kvm_enabled() && !cpu->kvm_mte) {
+                FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 0);
+        }
 #endif
     }
 
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index f065756c5c..8fc8b6398f 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -922,6 +922,8 @@ struct ArchCPU {
 
     /* CPU has memory protection unit */
     bool has_mpu;
+    /* CPU has MTE enabled in KVM mode */
+    bool kvm_mte;
     /* PMSAv7 MPU number of supported regions */
     uint32_t pmsav7_dregion;
     /* PMSAv8 MPU number of supported hyp regions */
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 849e2e21b3..af7a98517d 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -39,6 +39,7 @@
 #include "hw/acpi/acpi.h"
 #include "hw/acpi/ghes.h"
 #include "target/arm/gtimer.h"
+#include "migration/blocker.h"
 
 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
     KVM_CAP_LAST_INFO
@@ -119,6 +120,20 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
     if (vmfd < 0) {
         goto err;
     }
+
+    /*
+     * MTE capability must be enabled by the VMM before creating
+     * any VCPUs. The MTE bits of the ID_AA64PFR1 register are masked
+     * if MTE is not enabled, allowing them to be probed correctly.
+     */
+    if (kvm_arm_mte_supported()) {
+        KVMState kvm_state;
+
+        kvm_state.fd = kvmfd;
+        kvm_state.vmfd = vmfd;
+        kvm_vm_enable_cap(&kvm_state, KVM_CAP_ARM_MTE, 0);
+    }
+
     cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0);
     if (cpufd < 0) {
         goto err;
@@ -1793,6 +1808,11 @@ bool kvm_arm_sve_supported(void)
     return kvm_check_extension(kvm_state, KVM_CAP_ARM_SVE);
 }
 
+bool kvm_arm_mte_supported(void)
+{
+    return kvm_check_extension(kvm_state, KVM_CAP_ARM_MTE);
+}
+
 QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1);
 
 uint32_t kvm_arm_sve_get_vls(ARMCPU *cpu)
@@ -2417,3 +2437,40 @@ int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
     }
     return 0;
 }
+
+void kvm_arm_enable_mte(Object *cpuobj, Error **errp)
+{
+    static bool tried_to_enable;
+    static bool succeeded_to_enable;
+    Error *mte_migration_blocker = NULL;
+    ARMCPU *cpu = ARM_CPU(cpuobj);
+    int ret;
+
+    if (!tried_to_enable) {
+        /*
+         * MTE on KVM is enabled on a per-VM basis (and retrying doesn't make
+         * sense), and we only want a single migration blocker as well.
+         */
+        tried_to_enable = true;
+
+        ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_MTE, 0);
+        if (ret) {
+            error_setg_errno(errp, -ret, "Failed to enable KVM_CAP_ARM_MTE");
+            return;
+        }
+
+        /* TODO: Add migration support with MTE enabled */
+        error_setg(&mte_migration_blocker,
+                   "Live migration disabled due to MTE enabled");
+        if (migrate_add_blocker(&mte_migration_blocker, errp)) {
+            error_free(mte_migration_blocker);
+            return;
+        }
+
+        succeeded_to_enable = true;
+    }
+
+    if (succeeded_to_enable) {
+        cpu->kvm_mte = true;
+    }
+}
diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
index cfaa0d9bc7..4d293618a7 100644
--- a/target/arm/kvm_arm.h
+++ b/target/arm/kvm_arm.h
@@ -188,6 +188,13 @@ bool kvm_arm_pmu_supported(void);
  */
 bool kvm_arm_sve_supported(void);
 
+/**
+ * kvm_arm_mte_supported:
+ *
+ * Returns: true if KVM can enable MTE, and false otherwise.
+ */
+bool kvm_arm_mte_supported(void);
+
 /**
  * kvm_arm_get_max_vm_ipa_size:
  * @ms: Machine state handle
@@ -214,6 +221,8 @@ void kvm_arm_pvtime_init(ARMCPU *cpu, uint64_t ipa);
 
 int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level);
 
+void kvm_arm_enable_mte(Object *cpuobj, Error **errp);
+
 #else
 
 /*
@@ -235,6 +244,11 @@ static inline bool kvm_arm_sve_supported(void)
     return false;
 }
 
+static inline bool kvm_arm_mte_supported(void)
+{
+    return false;
+}
+
 /*
  * These functions should never actually be called without KVM support.
  */
@@ -283,6 +297,11 @@ static inline uint32_t kvm_arm_sve_get_vls(ARMCPU *cpu)
     g_assert_not_reached();
 }
 
+static inline void kvm_arm_enable_mte(Object *cpuobj, Error **errp)
+{
+    g_assert_not_reached();
+}
+
 #endif
 
 #endif
-- 
2.44.0
Re: [PATCH V3] arm/kvm: add support for MTE
Posted by Gustavo Romero 3 weeks, 6 days ago
Hi Ganapatrao,

Sorry for the delay on replying it. I was attending KVM Forum and commuting.

On 9/20/24 09:37, Ganapatrao Kulkarni wrote:
> Extend the 'mte' property for the virt machine to cover KVM as
> well. For KVM, we don't allocate tag memory, but instead enable
> the capability.
>
> If MTE has been enabled, we need to disable migration, as we do not
> yet have a way to migrate the tags as well. Therefore, MTE will stay
> off with KVM unless requested explicitly.
>
> This patch is rework of commit b320e21c48ce64853904bea6631c0158cc2ef227
> which broke TCG since it made the TCG -cpu max
> report the presence of MTE to the guest even if the board hadn't
> enabled MTE by wiring up the tag RAM. This meant that if the guest
> then tried to use MTE QEMU would segfault accessing the
> non-existent tag RAM.
>
> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> Signed-off-by: Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com>
> ---
>
> Changes since V2:
> 	Updated with review comments.
>
> Changes since V1:
> 	Added code to enable MTE before reading register
> id_aa64pfr1 (unmasked MTE bits).
>
> This patch is boot tested on ARM64 with KVM and on X86 with TCG for mte=on
> and default case(i.e, no mte).
>
>   hw/arm/virt.c        | 72 ++++++++++++++++++++++++++------------------
>   target/arm/cpu.c     | 11 +++++--
>   target/arm/cpu.h     |  2 ++
>   target/arm/kvm.c     | 57 +++++++++++++++++++++++++++++++++++
>   target/arm/kvm_arm.h | 19 ++++++++++++
>   5 files changed, 129 insertions(+), 32 deletions(-)
>
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index 7934b23651..a33af7d996 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -2211,7 +2211,7 @@ static void machvirt_init(MachineState *machine)
>           exit(1);
>       }
>   
> -    if (vms->mte && (kvm_enabled() || hvf_enabled())) {
> +    if (vms->mte && hvf_enabled()) {
>           error_report("mach-virt: %s does not support providing "
>                        "MTE to the guest CPU",
>                        current_accel_name());
> @@ -2281,39 +2281,51 @@ static void machvirt_init(MachineState *machine)
>           }
>   
>           if (vms->mte) {
> -            /* Create the memory region only once, but link to all cpus. */
> -            if (!tag_sysmem) {
> -                /*
> -                 * The property exists only if MemTag is supported.
> -                 * If it is, we must allocate the ram to back that up.
> -                 */
> -                if (!object_property_find(cpuobj, "tag-memory")) {
> -                    error_report("MTE requested, but not supported "
> -                                 "by the guest CPU");
> -                    exit(1);
> +            if (tcg_enabled()) {
> +                /* Create the memory region only once, but link to all cpus. */
> +                if (!tag_sysmem) {
> +                    /*
> +                     * The property exists only if MemTag is supported.
> +                     * If it is, we must allocate the ram to back that up.
> +                     */
> +                    if (!object_property_find(cpuobj, "tag-memory")) {
> +                        error_report("MTE requested, but not supported "
> +                                     "by the guest CPU");
> +                        exit(1);
> +                    }
> +
> +                    tag_sysmem = g_new(MemoryRegion, 1);
> +                    memory_region_init(tag_sysmem, OBJECT(machine),
> +                                       "tag-memory", UINT64_MAX / 32);
> +
> +                    if (vms->secure) {
> +                        secure_tag_sysmem = g_new(MemoryRegion, 1);
> +                        memory_region_init(secure_tag_sysmem, OBJECT(machine),
> +                                           "secure-tag-memory",
> +                                           UINT64_MAX / 32);
> +
> +                        /* As with ram, secure-tag takes precedence over tag. */
> +                        memory_region_add_subregion_overlap(secure_tag_sysmem,
> +                                                            0, tag_sysmem, -1);
> +                    }
>                   }
>   
> -                tag_sysmem = g_new(MemoryRegion, 1);
> -                memory_region_init(tag_sysmem, OBJECT(machine),
> -                                   "tag-memory", UINT64_MAX / 32);
> -
> +                object_property_set_link(cpuobj, "tag-memory",
> +                                         OBJECT(tag_sysmem), &error_abort);
>                   if (vms->secure) {
> -                    secure_tag_sysmem = g_new(MemoryRegion, 1);
> -                    memory_region_init(secure_tag_sysmem, OBJECT(machine),
> -                                       "secure-tag-memory", UINT64_MAX / 32);
> -
> -                    /* As with ram, secure-tag takes precedence over tag.  */
> -                    memory_region_add_subregion_overlap(secure_tag_sysmem, 0,
> -                                                        tag_sysmem, -1);
> +                    object_property_set_link(cpuobj, "secure-tag-memory",
> +                                             OBJECT(secure_tag_sysmem),
> +                                             &error_abort);
>                   }
> -            }
> -
> -            object_property_set_link(cpuobj, "tag-memory", OBJECT(tag_sysmem),
> -                                     &error_abort);
> -            if (vms->secure) {
> -                object_property_set_link(cpuobj, "secure-tag-memory",
> -                                         OBJECT(secure_tag_sysmem),
> -                                         &error_abort);
> +            } else if (kvm_enabled()) {
> +                if (!kvm_arm_mte_supported()) {
> +                    error_report("MTE requested, but not supported by KVM");
> +                    exit(1);
> +                }
> +                kvm_arm_enable_mte(cpuobj, &error_abort);
> +            } else {
> +                    error_report("MTE requested, but not supported ");
> +                    exit(1);
>               }
>           }
>   
> diff --git a/target/arm/cpu.c b/target/arm/cpu.c
> index 19191c2391..8a2fc471ce 100644
> --- a/target/arm/cpu.c
> +++ b/target/arm/cpu.c
> @@ -2390,14 +2390,21 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
>   
>   #ifndef CONFIG_USER_ONLY
>           /*
> -         * If we do not have tag-memory provided by the machine,
> +         * If we do not have tag-memory provided by the TCG,
>            * reduce MTE support to instructions enabled at EL0.
>            * This matches Cortex-A710 BROADCASTMTE input being LOW.
>            */
> -        if (cpu->tag_memory == NULL) {
> +        if (tcg_enabled() && cpu->tag_memory == NULL) {
>               cpu->isar.id_aa64pfr1 =
>                   FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 1);
>           }
> +
> +        /*
> +         * Clear MTE bits, if not enabled in KVM mode.
> +         */
> +        if (kvm_enabled() && !cpu->kvm_mte) {
> +                FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 0);
> +        }

I 've discussed a bit with Richard about the need of setting the MTE 
field here

to 0. This is already a reduction since it's inside the condition block:

if (cpu_isar_feature(aa64_mte, cpu)) { ... }, which is only taken if we 
already

have MTE field >= 1. At this point the MTE bits in cpu->isar.id_aa64pfr1 
should

already be set correctly accordingly to the host bits since 
kvm_arm_get_host_cpu_features()

was called.


The check for TCG (cpu->tag_memory == NULL) exists because even if MTE 
instructions

are supported by the CPU it's possible that the machine's memory does 
not support tags,

but we don't check for that in KVM afaics.


For KVM, isn't the cpu->isar.id_aa64pfr1 MTE bits already correct here 
so we don't need to touch them?

cpu->kvm_mte is false here only if kvm_check_extension(kvm_state, 
KVM_CAP_ARM_MTE) is true but

kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_MTE, 0) is false -- can that 
happen ever happen? _or_ when

we can't block migration? So basically we are telling to the guest that 
MTE is not supported if we have

MTE supported in the host _but_ we can't block migration?



Cheers,

Gustavo

>   #endif
>       }
>   
> diff --git a/target/arm/cpu.h b/target/arm/cpu.h
> index f065756c5c..8fc8b6398f 100644
> --- a/target/arm/cpu.h
> +++ b/target/arm/cpu.h
> @@ -922,6 +922,8 @@ struct ArchCPU {
>   
>       /* CPU has memory protection unit */
>       bool has_mpu;
> +    /* CPU has MTE enabled in KVM mode */
> +    bool kvm_mte;
>       /* PMSAv7 MPU number of supported regions */
>       uint32_t pmsav7_dregion;
>       /* PMSAv8 MPU number of supported hyp regions */
> diff --git a/target/arm/kvm.c b/target/arm/kvm.c
> index 849e2e21b3..af7a98517d 100644
> --- a/target/arm/kvm.c
> +++ b/target/arm/kvm.c
> @@ -39,6 +39,7 @@
>   #include "hw/acpi/acpi.h"
>   #include "hw/acpi/ghes.h"
>   #include "target/arm/gtimer.h"
> +#include "migration/blocker.h"
>   
>   const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
>       KVM_CAP_LAST_INFO
> @@ -119,6 +120,20 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
>       if (vmfd < 0) {
>           goto err;
>       }
> +
> +    /*
> +     * MTE capability must be enabled by the VMM before creating
> +     * any VCPUs. The MTE bits of the ID_AA64PFR1 register are masked
> +     * if MTE is not enabled, allowing them to be probed correctly.
> +     */
> +    if (kvm_arm_mte_supported()) {
> +        KVMState kvm_state;
> +
> +        kvm_state.fd = kvmfd;
> +        kvm_state.vmfd = vmfd;
> +        kvm_vm_enable_cap(&kvm_state, KVM_CAP_ARM_MTE, 0);
> +    }
> +
>       cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0);
>       if (cpufd < 0) {
>           goto err;
> @@ -1793,6 +1808,11 @@ bool kvm_arm_sve_supported(void)
>       return kvm_check_extension(kvm_state, KVM_CAP_ARM_SVE);
>   }
>   
> +bool kvm_arm_mte_supported(void)
> +{
> +    return kvm_check_extension(kvm_state, KVM_CAP_ARM_MTE);
> +}
> +
>   QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1);
>   
>   uint32_t kvm_arm_sve_get_vls(ARMCPU *cpu)
> @@ -2417,3 +2437,40 @@ int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
>       }
>       return 0;
>   }
> +
> +void kvm_arm_enable_mte(Object *cpuobj, Error **errp)
> +{
> +    static bool tried_to_enable;
> +    static bool succeeded_to_enable;
> +    Error *mte_migration_blocker = NULL;
> +    ARMCPU *cpu = ARM_CPU(cpuobj);
> +    int ret;
> +
> +    if (!tried_to_enable) {
> +        /*
> +         * MTE on KVM is enabled on a per-VM basis (and retrying doesn't make
> +         * sense), and we only want a single migration blocker as well.
> +         */
> +        tried_to_enable = true;
> +
> +        ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_MTE, 0);
> +        if (ret) {
> +            error_setg_errno(errp, -ret, "Failed to enable KVM_CAP_ARM_MTE");
> +            return;
> +        }
> +
> +        /* TODO: Add migration support with MTE enabled */
> +        error_setg(&mte_migration_blocker,
> +                   "Live migration disabled due to MTE enabled");
> +        if (migrate_add_blocker(&mte_migration_blocker, errp)) {
> +            error_free(mte_migration_blocker);
> +            return;
> +        }
> +
> +        succeeded_to_enable = true;
> +    }
> +
> +    if (succeeded_to_enable) {
> +        cpu->kvm_mte = true;
> +    }
> +}
> diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
> index cfaa0d9bc7..4d293618a7 100644
> --- a/target/arm/kvm_arm.h
> +++ b/target/arm/kvm_arm.h
> @@ -188,6 +188,13 @@ bool kvm_arm_pmu_supported(void);
>    */
>   bool kvm_arm_sve_supported(void);
>   
> +/**
> + * kvm_arm_mte_supported:
> + *
> + * Returns: true if KVM can enable MTE, and false otherwise.
> + */
> +bool kvm_arm_mte_supported(void);
> +
>   /**
>    * kvm_arm_get_max_vm_ipa_size:
>    * @ms: Machine state handle
> @@ -214,6 +221,8 @@ void kvm_arm_pvtime_init(ARMCPU *cpu, uint64_t ipa);
>   
>   int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level);
>   
> +void kvm_arm_enable_mte(Object *cpuobj, Error **errp);
> +
>   #else
>   
>   /*
> @@ -235,6 +244,11 @@ static inline bool kvm_arm_sve_supported(void)
>       return false;
>   }
>   
> +static inline bool kvm_arm_mte_supported(void)
> +{
> +    return false;
> +}
> +
>   /*
>    * These functions should never actually be called without KVM support.
>    */
> @@ -283,6 +297,11 @@ static inline uint32_t kvm_arm_sve_get_vls(ARMCPU *cpu)
>       g_assert_not_reached();
>   }
>   
> +static inline void kvm_arm_enable_mte(Object *cpuobj, Error **errp)
> +{
> +    g_assert_not_reached();
> +}
> +
>   #endif
>   
>   #endif

Re: [PATCH V3] arm/kvm: add support for MTE
Posted by Ganapatrao Kulkarni 3 weeks, 6 days ago
Hi Gustavo,

On 25-09-2024 09:17 pm, Gustavo Romero wrote:
> Hi Ganapatrao,
> 
> Sorry for the delay on replying it. I was attending KVM Forum and 
> commuting.
> 
> On 9/20/24 09:37, Ganapatrao Kulkarni wrote:
>> Extend the 'mte' property for the virt machine to cover KVM as
>> well. For KVM, we don't allocate tag memory, but instead enable
>> the capability.
>>
>> If MTE has been enabled, we need to disable migration, as we do not
>> yet have a way to migrate the tags as well. Therefore, MTE will stay
>> off with KVM unless requested explicitly.
>>
>> This patch is rework of commit b320e21c48ce64853904bea6631c0158cc2ef227
>> which broke TCG since it made the TCG -cpu max
>> report the presence of MTE to the guest even if the board hadn't
>> enabled MTE by wiring up the tag RAM. This meant that if the guest
>> then tried to use MTE QEMU would segfault accessing the
>> non-existent tag RAM.
>>
>> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
>> Signed-off-by: Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com>
>> ---
>>
>> Changes since V2:
>>     Updated with review comments.
>>
>> Changes since V1:
>>     Added code to enable MTE before reading register
>> id_aa64pfr1 (unmasked MTE bits).
>>
>> This patch is boot tested on ARM64 with KVM and on X86 with TCG for 
>> mte=on
>> and default case(i.e, no mte).
>>
>>   hw/arm/virt.c        | 72 ++++++++++++++++++++++++++------------------
>>   target/arm/cpu.c     | 11 +++++--
>>   target/arm/cpu.h     |  2 ++
>>   target/arm/kvm.c     | 57 +++++++++++++++++++++++++++++++++++
>>   target/arm/kvm_arm.h | 19 ++++++++++++
>>   5 files changed, 129 insertions(+), 32 deletions(-)
>>
>> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
>> index 7934b23651..a33af7d996 100644
>> --- a/hw/arm/virt.c
>> +++ b/hw/arm/virt.c
>> @@ -2211,7 +2211,7 @@ static void machvirt_init(MachineState *machine)
>>           exit(1);
>>       }
>> -    if (vms->mte && (kvm_enabled() || hvf_enabled())) {
>> +    if (vms->mte && hvf_enabled()) {
>>           error_report("mach-virt: %s does not support providing "
>>                        "MTE to the guest CPU",
>>                        current_accel_name());
>> @@ -2281,39 +2281,51 @@ static void machvirt_init(MachineState *machine)
>>           }
>>           if (vms->mte) {
>> -            /* Create the memory region only once, but link to all 
>> cpus. */
>> -            if (!tag_sysmem) {
>> -                /*
>> -                 * The property exists only if MemTag is supported.
>> -                 * If it is, we must allocate the ram to back that up.
>> -                 */
>> -                if (!object_property_find(cpuobj, "tag-memory")) {
>> -                    error_report("MTE requested, but not supported "
>> -                                 "by the guest CPU");
>> -                    exit(1);
>> +            if (tcg_enabled()) {
>> +                /* Create the memory region only once, but link to 
>> all cpus. */
>> +                if (!tag_sysmem) {
>> +                    /*
>> +                     * The property exists only if MemTag is supported.
>> +                     * If it is, we must allocate the ram to back 
>> that up.
>> +                     */
>> +                    if (!object_property_find(cpuobj, "tag-memory")) {
>> +                        error_report("MTE requested, but not supported "
>> +                                     "by the guest CPU");
>> +                        exit(1);
>> +                    }
>> +
>> +                    tag_sysmem = g_new(MemoryRegion, 1);
>> +                    memory_region_init(tag_sysmem, OBJECT(machine),
>> +                                       "tag-memory", UINT64_MAX / 32);
>> +
>> +                    if (vms->secure) {
>> +                        secure_tag_sysmem = g_new(MemoryRegion, 1);
>> +                        memory_region_init(secure_tag_sysmem, 
>> OBJECT(machine),
>> +                                           "secure-tag-memory",
>> +                                           UINT64_MAX / 32);
>> +
>> +                        /* As with ram, secure-tag takes precedence 
>> over tag. */
>> +                        
>> memory_region_add_subregion_overlap(secure_tag_sysmem,
>> +                                                            0, 
>> tag_sysmem, -1);
>> +                    }
>>                   }
>> -                tag_sysmem = g_new(MemoryRegion, 1);
>> -                memory_region_init(tag_sysmem, OBJECT(machine),
>> -                                   "tag-memory", UINT64_MAX / 32);
>> -
>> +                object_property_set_link(cpuobj, "tag-memory",
>> +                                         OBJECT(tag_sysmem), 
>> &error_abort);
>>                   if (vms->secure) {
>> -                    secure_tag_sysmem = g_new(MemoryRegion, 1);
>> -                    memory_region_init(secure_tag_sysmem, 
>> OBJECT(machine),
>> -                                       "secure-tag-memory", 
>> UINT64_MAX / 32);
>> -
>> -                    /* As with ram, secure-tag takes precedence over 
>> tag.  */
>> -                    
>> memory_region_add_subregion_overlap(secure_tag_sysmem, 0,
>> -                                                        tag_sysmem, -1);
>> +                    object_property_set_link(cpuobj, 
>> "secure-tag-memory",
>> +                                             OBJECT(secure_tag_sysmem),
>> +                                             &error_abort);
>>                   }
>> -            }
>> -
>> -            object_property_set_link(cpuobj, "tag-memory", 
>> OBJECT(tag_sysmem),
>> -                                     &error_abort);
>> -            if (vms->secure) {
>> -                object_property_set_link(cpuobj, "secure-tag-memory",
>> -                                         OBJECT(secure_tag_sysmem),
>> -                                         &error_abort);
>> +            } else if (kvm_enabled()) {
>> +                if (!kvm_arm_mte_supported()) {
>> +                    error_report("MTE requested, but not supported by 
>> KVM");
>> +                    exit(1);
>> +                }
>> +                kvm_arm_enable_mte(cpuobj, &error_abort);
>> +            } else {
>> +                    error_report("MTE requested, but not supported ");
>> +                    exit(1);
>>               }
>>           }
>> diff --git a/target/arm/cpu.c b/target/arm/cpu.c
>> index 19191c2391..8a2fc471ce 100644
>> --- a/target/arm/cpu.c
>> +++ b/target/arm/cpu.c
>> @@ -2390,14 +2390,21 @@ static void arm_cpu_realizefn(DeviceState 
>> *dev, Error **errp)
>>   #ifndef CONFIG_USER_ONLY
>>           /*
>> -         * If we do not have tag-memory provided by the machine,
>> +         * If we do not have tag-memory provided by the TCG,
>>            * reduce MTE support to instructions enabled at EL0.
>>            * This matches Cortex-A710 BROADCASTMTE input being LOW.
>>            */
>> -        if (cpu->tag_memory == NULL) {
>> +        if (tcg_enabled() && cpu->tag_memory == NULL) {
>>               cpu->isar.id_aa64pfr1 =
>>                   FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 1);
>>           }
>> +
>> +        /*
>> +         * Clear MTE bits, if not enabled in KVM mode.
>> +         */
>> +        if (kvm_enabled() && !cpu->kvm_mte) {
>> +                FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 0);
>> +        }
> 
> I 've discussed a bit with Richard about the need of setting the MTE 
> field here
> 
> to 0. This is already a reduction since it's inside the condition block:
> 
> if (cpu_isar_feature(aa64_mte, cpu)) { ... }, which is only taken if we 
> already
> 
> have MTE field >= 1. At this point the MTE bits in cpu->isar.id_aa64pfr1 
> should
> 
> already be set correctly accordingly to the host bits since 
> kvm_arm_get_host_cpu_features()
> 
> was called.
> 
> 
> The check for TCG (cpu->tag_memory == NULL) exists because even if MTE 
> instructions
> 
> are supported by the CPU it's possible that the machine's memory does 
> not support tags,
> 
> but we don't check for that in KVM afaics.
> 
> 
> For KVM, isn't the cpu->isar.id_aa64pfr1 MTE bits already correct here 
> so we don't need to touch them?

id_aa64pfr1.MTE bits are masked/zero when we read without enabling the 
MTE(KVM_CAP_ARM_MTE). Hence to read the real value we are enabling MTE 
while reading the register.

Later, If user is not passed argument "mte=on", we want to revert the 
value that is read with MTE enabled since we are enabling MTE on actual 
VM  only if "mte=on".

Are you saying, let us have the MTE enabled for KVM by default and 
disable only if user passes mte=off?.

> 
> cpu->kvm_mte is false here only if kvm_check_extension(kvm_state, 
> KVM_CAP_ARM_MTE) is true but
> 
> kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_MTE, 0) is false -- can that 
> happen ever happen? _or_ when

This can be, if hardware is not supporting it.
Since user is trying to enable with mte=on but hardware is not capable, 
in that case qemu aborts with error message.

> 
> we can't block migration? So basically we are telling to the guest that 
> MTE is not supported if we have
> 
> MTE supported in the host _but_ we can't block migration?
> 
> 
> 
> Cheers,
> 
> Gustavo
> 

-- 
Thanks,
Ganapat/GK

Re: [PATCH V3] arm/kvm: add support for MTE
Posted by Gustavo Romero 3 weeks, 5 days ago
Hi Ganapatrao,

On 9/25/24 19:40, Ganapatrao Kulkarni wrote:
>
> Hi Gustavo,
>
> On 25-09-2024 09:17 pm, Gustavo Romero wrote:
>> Hi Ganapatrao,
>>
>> Sorry for the delay on replying it. I was attending KVM Forum and 
>> commuting.
>>
>> On 9/20/24 09:37, Ganapatrao Kulkarni wrote:
>>> Extend the 'mte' property for the virt machine to cover KVM as
>>> well. For KVM, we don't allocate tag memory, but instead enable
>>> the capability.
>>>
>>> If MTE has been enabled, we need to disable migration, as we do not
>>> yet have a way to migrate the tags as well. Therefore, MTE will stay
>>> off with KVM unless requested explicitly.
>>>
>>> This patch is rework of commit b320e21c48ce64853904bea6631c0158cc2ef227
>>> which broke TCG since it made the TCG -cpu max
>>> report the presence of MTE to the guest even if the board hadn't
>>> enabled MTE by wiring up the tag RAM. This meant that if the guest
>>> then tried to use MTE QEMU would segfault accessing the
>>> non-existent tag RAM.
>>>
>>> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
>>> Signed-off-by: Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com>
>>> ---
>>>
>>> Changes since V2:
>>>     Updated with review comments.
>>>
>>> Changes since V1:
>>>     Added code to enable MTE before reading register
>>> id_aa64pfr1 (unmasked MTE bits).
>>>
>>> This patch is boot tested on ARM64 with KVM and on X86 with TCG for 
>>> mte=on
>>> and default case(i.e, no mte).
>>>
>>>   hw/arm/virt.c        | 72 
>>> ++++++++++++++++++++++++++------------------
>>>   target/arm/cpu.c     | 11 +++++--
>>>   target/arm/cpu.h     |  2 ++
>>>   target/arm/kvm.c     | 57 +++++++++++++++++++++++++++++++++++
>>>   target/arm/kvm_arm.h | 19 ++++++++++++
>>>   5 files changed, 129 insertions(+), 32 deletions(-)
>>>
>>> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
>>> index 7934b23651..a33af7d996 100644
>>> --- a/hw/arm/virt.c
>>> +++ b/hw/arm/virt.c
>>> @@ -2211,7 +2211,7 @@ static void machvirt_init(MachineState *machine)
>>>           exit(1);
>>>       }
>>> -    if (vms->mte && (kvm_enabled() || hvf_enabled())) {
>>> +    if (vms->mte && hvf_enabled()) {
>>>           error_report("mach-virt: %s does not support providing "
>>>                        "MTE to the guest CPU",
>>>                        current_accel_name());
>>> @@ -2281,39 +2281,51 @@ static void machvirt_init(MachineState 
>>> *machine)
>>>           }
>>>           if (vms->mte) {
>>> -            /* Create the memory region only once, but link to all 
>>> cpus. */
>>> -            if (!tag_sysmem) {
>>> -                /*
>>> -                 * The property exists only if MemTag is supported.
>>> -                 * If it is, we must allocate the ram to back that up.
>>> -                 */
>>> -                if (!object_property_find(cpuobj, "tag-memory")) {
>>> -                    error_report("MTE requested, but not supported "
>>> -                                 "by the guest CPU");
>>> -                    exit(1);
>>> +            if (tcg_enabled()) {
>>> +                /* Create the memory region only once, but link to 
>>> all cpus. */
>>> +                if (!tag_sysmem) {
>>> +                    /*
>>> +                     * The property exists only if MemTag is 
>>> supported.
>>> +                     * If it is, we must allocate the ram to back 
>>> that up.
>>> +                     */
>>> +                    if (!object_property_find(cpuobj, "tag-memory")) {
>>> +                        error_report("MTE requested, but not 
>>> supported "
>>> +                                     "by the guest CPU");
>>> +                        exit(1);
>>> +                    }
>>> +
>>> +                    tag_sysmem = g_new(MemoryRegion, 1);
>>> +                    memory_region_init(tag_sysmem, OBJECT(machine),
>>> +                                       "tag-memory", UINT64_MAX / 32);
>>> +
>>> +                    if (vms->secure) {
>>> +                        secure_tag_sysmem = g_new(MemoryRegion, 1);
>>> +                        memory_region_init(secure_tag_sysmem, 
>>> OBJECT(machine),
>>> + "secure-tag-memory",
>>> +                                           UINT64_MAX / 32);
>>> +
>>> +                        /* As with ram, secure-tag takes precedence 
>>> over tag. */
>>> + memory_region_add_subregion_overlap(secure_tag_sysmem,
>>> + 0, tag_sysmem, -1);
>>> +                    }
>>>                   }
>>> -                tag_sysmem = g_new(MemoryRegion, 1);
>>> -                memory_region_init(tag_sysmem, OBJECT(machine),
>>> -                                   "tag-memory", UINT64_MAX / 32);
>>> -
>>> +                object_property_set_link(cpuobj, "tag-memory",
>>> +                                         OBJECT(tag_sysmem), 
>>> &error_abort);
>>>                   if (vms->secure) {
>>> -                    secure_tag_sysmem = g_new(MemoryRegion, 1);
>>> -                    memory_region_init(secure_tag_sysmem, 
>>> OBJECT(machine),
>>> -                                       "secure-tag-memory", 
>>> UINT64_MAX / 32);
>>> -
>>> -                    /* As with ram, secure-tag takes precedence 
>>> over tag.  */
>>> - memory_region_add_subregion_overlap(secure_tag_sysmem, 0,
>>> - tag_sysmem, -1);
>>> +                    object_property_set_link(cpuobj, 
>>> "secure-tag-memory",
>>> + OBJECT(secure_tag_sysmem),
>>> + &error_abort);
>>>                   }
>>> -            }
>>> -
>>> -            object_property_set_link(cpuobj, "tag-memory", 
>>> OBJECT(tag_sysmem),
>>> -                                     &error_abort);
>>> -            if (vms->secure) {
>>> -                object_property_set_link(cpuobj, "secure-tag-memory",
>>> - OBJECT(secure_tag_sysmem),
>>> -                                         &error_abort);
>>> +            } else if (kvm_enabled()) {
>>> +                if (!kvm_arm_mte_supported()) {
>>> +                    error_report("MTE requested, but not supported 
>>> by KVM");
>>> +                    exit(1);
>>> +                }
>>> +                kvm_arm_enable_mte(cpuobj, &error_abort);
>>> +            } else {
>>> +                    error_report("MTE requested, but not supported ");
>>> +                    exit(1);
>>>               }
>>>           }
>>> diff --git a/target/arm/cpu.c b/target/arm/cpu.c
>>> index 19191c2391..8a2fc471ce 100644
>>> --- a/target/arm/cpu.c
>>> +++ b/target/arm/cpu.c
>>> @@ -2390,14 +2390,21 @@ static void arm_cpu_realizefn(DeviceState 
>>> *dev, Error **errp)
>>>   #ifndef CONFIG_USER_ONLY
>>>           /*
>>> -         * If we do not have tag-memory provided by the machine,
>>> +         * If we do not have tag-memory provided by the TCG,
>>>            * reduce MTE support to instructions enabled at EL0.
>>>            * This matches Cortex-A710 BROADCASTMTE input being LOW.
>>>            */
>>> -        if (cpu->tag_memory == NULL) {
>>> +        if (tcg_enabled() && cpu->tag_memory == NULL) {
>>>               cpu->isar.id_aa64pfr1 =
>>>                   FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, 
>>> MTE, 1);
>>>           }
>>> +
>>> +        /*
>>> +         * Clear MTE bits, if not enabled in KVM mode.
>>> +         */
>>> +        if (kvm_enabled() && !cpu->kvm_mte) {
>>> +                FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 
>>> 0);
>>> +        }
>>
>> I 've discussed a bit with Richard about the need of setting the MTE 
>> field here
>>
>> to 0. This is already a reduction since it's inside the condition block:
>>
>> if (cpu_isar_feature(aa64_mte, cpu)) { ... }, which is only taken if 
>> we already
>>
>> have MTE field >= 1. At this point the MTE bits 
>> in cpu->isar.id_aa64pfr1 should
>>
>> already be set correctly accordingly to the host bits since 
>> kvm_arm_get_host_cpu_features()
>>
>> was called.
>>
>>
>> The check for TCG (cpu->tag_memory == NULL) exists because even if 
>> MTE instructions
>>
>> are supported by the CPU it's possible that the machine's memory does 
>> not support tags,
>>
>> but we don't check for that in KVM afaics.
>>
>>
>> For KVM, isn't the cpu->isar.id_aa64pfr1 MTE bits already correct 
>> here so we don't need to touch them?
>
> id_aa64pfr1.MTE bits are masked/zero when we read without enabling the 
> MTE(KVM_CAP_ARM_MTE). Hence to read the real value we are enabling MTE 
> while reading the register.
>
> Later, If user is not passed argument "mte=on", we want to revert the 
> value that is read with MTE enabled since we are enabling MTE on 
> actual VM  only if "mte=on".
>
> Are you saying, let us have the MTE enabled for KVM by default and 
> disable only if user passes mte=off?.

No, it's ok the way it is. I missed one obvious case: host supports MTE 
but user _didn't_ ask to enable it.


Thanks for the clarifications. It makes sense to me.


I think it would be good to explain this in the comment, i.e. explain 
the rationale for the reduction

(I'm calling  reduction the fact that we check for aa64pfr1.MTE bits and 
then we mask them (set to zero, in this case).

I'll comment it inline in the other thread were Cornelia suggests a 
change on it too.


Cheers,

Gustavo


>>
>> cpu->kvm_mte is false here only if kvm_check_extension(kvm_state, 
>> KVM_CAP_ARM_MTE) is true but
>>
>> kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_MTE, 0) is false -- can that 
>> happen ever happen? _or_ when
>
> This can be, if hardware is not supporting it.
> Since user is trying to enable with mte=on but hardware is not 
> capable, in that case qemu aborts with error message.
>
>>
>> we can't block migration? So basically we are telling to the guest 
>> that MTE is not supported if we have
>>
>> MTE supported in the host _but_ we can't block migration?
>>
>>
>>
>> Cheers,
>>
>> Gustavo
>>
>

Re: [PATCH V3] arm/kvm: add support for MTE
Posted by Cornelia Huck 3 weeks, 6 days ago
On Fri, Sep 20 2024, Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com> wrote:

Mostly nit-picking below, otherwise LGTM.

> Extend the 'mte' property for the virt machine to cover KVM as
> well. For KVM, we don't allocate tag memory, but instead enable
> the capability.
>
> If MTE has been enabled, we need to disable migration, as we do not
> yet have a way to migrate the tags as well. Therefore, MTE will stay
> off with KVM unless requested explicitly.
>
> This patch is rework of commit b320e21c48ce64853904bea6631c0158cc2ef227
> which broke TCG since it made the TCG -cpu max
> report the presence of MTE to the guest even if the board hadn't
> enabled MTE by wiring up the tag RAM. This meant that if the guest
> then tried to use MTE QEMU would segfault accessing the
> non-existent tag RAM.

I think the more canonical way to express this would be

[$AUTHOR: reworked original patch by doing X to avoid problem Y]

>
> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> Signed-off-by: Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com>

Also, the S-o-b chain is a bit confusing that way, because you are
listed as author of the patch, but I'm in the chain in front of you -- I
think I should still be listed as the author?

> ---
>
> Changes since V2:
> 	Updated with review comments.
>
> Changes since V1:
> 	Added code to enable MTE before reading register
> id_aa64pfr1 (unmasked MTE bits).
>
> This patch is boot tested on ARM64 with KVM and on X86 with TCG for mte=on
> and default case(i.e, no mte).
>
>  hw/arm/virt.c        | 72 ++++++++++++++++++++++++++------------------
>  target/arm/cpu.c     | 11 +++++--
>  target/arm/cpu.h     |  2 ++
>  target/arm/kvm.c     | 57 +++++++++++++++++++++++++++++++++++
>  target/arm/kvm_arm.h | 19 ++++++++++++
>  5 files changed, 129 insertions(+), 32 deletions(-)
>

(...)

> diff --git a/target/arm/cpu.c b/target/arm/cpu.c
> index 19191c2391..8a2fc471ce 100644
> --- a/target/arm/cpu.c
> +++ b/target/arm/cpu.c
> @@ -2390,14 +2390,21 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
>  
>  #ifndef CONFIG_USER_ONLY
>          /*
> -         * If we do not have tag-memory provided by the machine,
> +         * If we do not have tag-memory provided by the TCG,

Maybe

"If we run with TCG and do not have tag-memory provided by the machine"

?

>           * reduce MTE support to instructions enabled at EL0.
>           * This matches Cortex-A710 BROADCASTMTE input being LOW.
>           */
> -        if (cpu->tag_memory == NULL) {
> +        if (tcg_enabled() && cpu->tag_memory == NULL) {
>              cpu->isar.id_aa64pfr1 =
>                  FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 1);
>          }
> +
> +        /*
> +         * Clear MTE bits, if not enabled in KVM mode.

Maybe add "This matches the MTE bits being masked by KVM in that case."?

> +         */
> +        if (kvm_enabled() && !cpu->kvm_mte) {
> +                FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 0);
> +        }
>  #endif
>      }
>  

(...)

> diff --git a/target/arm/kvm.c b/target/arm/kvm.c
> index 849e2e21b3..af7a98517d 100644
> --- a/target/arm/kvm.c
> +++ b/target/arm/kvm.c
> @@ -39,6 +39,7 @@
>  #include "hw/acpi/acpi.h"
>  #include "hw/acpi/ghes.h"
>  #include "target/arm/gtimer.h"
> +#include "migration/blocker.h"
>  
>  const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
>      KVM_CAP_LAST_INFO
> @@ -119,6 +120,20 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
>      if (vmfd < 0) {
>          goto err;
>      }
> +
> +    /*
> +     * MTE capability must be enabled by the VMM before creating
> +     * any VCPUs. The MTE bits of the ID_AA64PFR1 register are masked
> +     * if MTE is not enabled, allowing them to be probed correctly.

This reads a bit confusing. Maybe

"The MTE capability must be enabled by the VMM before creating any VCPUs
in order to allow the MTE bits of the ID_AA64PFR1 register to be probed
correctly, as they are masked if MTE is not enabled."


> +     */
> +    if (kvm_arm_mte_supported()) {
> +        KVMState kvm_state;
> +
> +        kvm_state.fd = kvmfd;
> +        kvm_state.vmfd = vmfd;
> +        kvm_vm_enable_cap(&kvm_state, KVM_CAP_ARM_MTE, 0);
> +    }
> +
>      cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0);
>      if (cpufd < 0) {
>          goto err;

(...)
Re: [PATCH V3] arm/kvm: add support for MTE
Posted by Gustavo Romero 3 weeks, 5 days ago
Hi Cornelia and Ganapatrao,

On 9/25/24 14:54, Cornelia Huck wrote:
> On Fri, Sep 20 2024, Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com> wrote:
>
> Mostly nit-picking below, otherwise LGTM.
>
>> Extend the 'mte' property for the virt machine to cover KVM as
>> well. For KVM, we don't allocate tag memory, but instead enable
>> the capability.
>>
>> If MTE has been enabled, we need to disable migration, as we do not
>> yet have a way to migrate the tags as well. Therefore, MTE will stay
>> off with KVM unless requested explicitly.
>>
>> This patch is rework of commit b320e21c48ce64853904bea6631c0158cc2ef227
>> which broke TCG since it made the TCG -cpu max
>> report the presence of MTE to the guest even if the board hadn't
>> enabled MTE by wiring up the tag RAM. This meant that if the guest
>> then tried to use MTE QEMU would segfault accessing the
>> non-existent tag RAM.
> I think the more canonical way to express this would be
>
> [$AUTHOR: reworked original patch by doing X to avoid problem Y]
>
>> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
>> Signed-off-by: Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com>
> Also, the S-o-b chain is a bit confusing that way, because you are
> listed as author of the patch, but I'm in the chain in front of you -- I
> think I should still be listed as the author?
>
>> ---
>>
>> Changes since V2:
>> 	Updated with review comments.
>>
>> Changes since V1:
>> 	Added code to enable MTE before reading register
>> id_aa64pfr1 (unmasked MTE bits).
>>
>> This patch is boot tested on ARM64 with KVM and on X86 with TCG for mte=on
>> and default case(i.e, no mte).
>>
>>   hw/arm/virt.c        | 72 ++++++++++++++++++++++++++------------------
>>   target/arm/cpu.c     | 11 +++++--
>>   target/arm/cpu.h     |  2 ++
>>   target/arm/kvm.c     | 57 +++++++++++++++++++++++++++++++++++
>>   target/arm/kvm_arm.h | 19 ++++++++++++
>>   5 files changed, 129 insertions(+), 32 deletions(-)
>>
> (...)
>
>> diff --git a/target/arm/cpu.c b/target/arm/cpu.c
>> index 19191c2391..8a2fc471ce 100644
>> --- a/target/arm/cpu.c
>> +++ b/target/arm/cpu.c
>> @@ -2390,14 +2390,21 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
>>   
>>   #ifndef CONFIG_USER_ONLY
>>           /*
>> -         * If we do not have tag-memory provided by the machine,
>> +         * If we do not have tag-memory provided by the TCG,
> Maybe
>
> "If we run with TCG and do not have tag-memory provided by the machine"
>
> ?

Yep, I agree, this is better.


>>            * reduce MTE support to instructions enabled at EL0.
>>            * This matches Cortex-A710 BROADCASTMTE input being LOW.
>>            */
>> -        if (cpu->tag_memory == NULL) {
>> +        if (tcg_enabled() && cpu->tag_memory == NULL) {
>>               cpu->isar.id_aa64pfr1 =
>>                   FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 1);
>>           }
>> +
>> +        /*
>> +         * Clear MTE bits, if not enabled in KVM mode.
> Maybe add "This matches the MTE bits being masked by KVM in that case."?

Clearing the MTE bits is also necessary when MTE is supported by the
host (and so KVM can enable the MTE capability - so won't mask the MTE
bits, but the user didn't want MTE enabled in the guest (mte=on no given
or explicitly set to =off), so this comment is not always true?

How about something like:

"If MTE is supported by the host but could not be enabled on KVM mode or
MTE should not be enabled on the guest (e.i. mte=off), clear guest's MTE 
bits."

I do assume MTE is supported by the host (i.e. MTE bits >= 2 in the host)
because otherwise condition "if (cpu_isar_feature(aa64_mte, cpu)) { ... 
}" is not
taken; and at this point cpu->isar->id_aa64pfr1 is set from the host's 
bits via
kvm_arm_set_cpu_features_from_host() and kvm_arm_get_host_cpu_features ().


>> +         */
>> +        if (kvm_enabled() && !cpu->kvm_mte) {
>> +                FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 0);
>> +        }
>>   #endif
>>       }
>>   
> (...)
>
>> diff --git a/target/arm/kvm.c b/target/arm/kvm.c
>> index 849e2e21b3..af7a98517d 100644
>> --- a/target/arm/kvm.c
>> +++ b/target/arm/kvm.c
>> @@ -39,6 +39,7 @@
>>   #include "hw/acpi/acpi.h"
>>   #include "hw/acpi/ghes.h"
>>   #include "target/arm/gtimer.h"
>> +#include "migration/blocker.h"
>>   
>>   const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
>>       KVM_CAP_LAST_INFO
>> @@ -119,6 +120,20 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
>>       if (vmfd < 0) {
>>           goto err;
>>       }
>> +
>> +    /*
>> +     * MTE capability must be enabled by the VMM before creating
>> +     * any VCPUs. The MTE bits of the ID_AA64PFR1 register are masked
>> +     * if MTE is not enabled, allowing them to be probed correctly.
> This reads a bit confusing. Maybe
>
> "The MTE capability must be enabled by the VMM before creating any VCPUs
> in order to allow the MTE bits of the ID_AA64PFR1 register to be probed
> correctly, as they are masked if MTE is not enabled."

I agree.


>
>> +     */
>> +    if (kvm_arm_mte_supported()) {
>> +        KVMState kvm_state;
>> +
>> +        kvm_state.fd = kvmfd;
>> +        kvm_state.vmfd = vmfd;
>> +        kvm_vm_enable_cap(&kvm_state, KVM_CAP_ARM_MTE, 0);
>> +    }
>> +
>>       cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0);
>>       if (cpufd < 0) {
>>           goto err;
> (...)
>

Comments aside:

Reviewed-by: Gustavo Romero <gustavo.romero@linaro.org>
Re: [PATCH V3] arm/kvm: add support for MTE
Posted by Cornelia Huck 2 weeks, 6 days ago
On Thu, Sep 26 2024, Gustavo Romero <gustavo.romero@linaro.org> wrote:

> Hi Cornelia and Ganapatrao,
>
> On 9/25/24 14:54, Cornelia Huck wrote:
>> On Fri, Sep 20 2024, Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com> wrote:
>>> +
>>> +        /*
>>> +         * Clear MTE bits, if not enabled in KVM mode.
>> Maybe add "This matches the MTE bits being masked by KVM in that case."?
>
> Clearing the MTE bits is also necessary when MTE is supported by the
> host (and so KVM can enable the MTE capability - so won't mask the MTE
> bits, but the user didn't want MTE enabled in the guest (mte=on no given
> or explicitly set to =off), so this comment is not always true?
>
> How about something like:
>
> "If MTE is supported by the host but could not be enabled on KVM mode or
> MTE should not be enabled on the guest (e.i. mte=off), clear guest's MTE 
> bits."

s/e.i./i.e./ :)

Otherwise fine with me.

>
> I do assume MTE is supported by the host (i.e. MTE bits >= 2 in the host)
> because otherwise condition "if (cpu_isar_feature(aa64_mte, cpu)) { ... 
> }" is not
> taken; and at this point cpu->isar->id_aa64pfr1 is set from the host's 
> bits via
> kvm_arm_set_cpu_features_from_host() and kvm_arm_get_host_cpu_features ().
>
>
>>> +         */
>>> +        if (kvm_enabled() && !cpu->kvm_mte) {
>>> +                FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 0);
>>> +        }
>>>   #endif
>>>       }
Re: [PATCH V3] arm/kvm: add support for MTE
Posted by Ganapatrao Kulkarni 3 weeks, 4 days ago

On 26-09-2024 09:41 pm, Gustavo Romero wrote:
> Hi Cornelia and Ganapatrao,
> 
> On 9/25/24 14:54, Cornelia Huck wrote:
>> On Fri, Sep 20 2024, Ganapatrao Kulkarni 
>> <gankulkarni@os.amperecomputing.com> wrote:
>>
>> Mostly nit-picking below, otherwise LGTM.
>>
>>> Extend the 'mte' property for the virt machine to cover KVM as
>>> well. For KVM, we don't allocate tag memory, but instead enable
>>> the capability.
>>>
>>> If MTE has been enabled, we need to disable migration, as we do not
>>> yet have a way to migrate the tags as well. Therefore, MTE will stay
>>> off with KVM unless requested explicitly.
>>>
>>> This patch is rework of commit b320e21c48ce64853904bea6631c0158cc2ef227
>>> which broke TCG since it made the TCG -cpu max
>>> report the presence of MTE to the guest even if the board hadn't
>>> enabled MTE by wiring up the tag RAM. This meant that if the guest
>>> then tried to use MTE QEMU would segfault accessing the
>>> non-existent tag RAM.
>> I think the more canonical way to express this would be
>>
>> [$AUTHOR: reworked original patch by doing X to avoid problem Y]
>>
>>> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
>>> Signed-off-by: Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com>
>> Also, the S-o-b chain is a bit confusing that way, because you are
>> listed as author of the patch, but I'm in the chain in front of you -- I
>> think I should still be listed as the author?
>>
>>> ---
>>>
>>> Changes since V2:
>>>     Updated with review comments.
>>>
>>> Changes since V1:
>>>     Added code to enable MTE before reading register
>>> id_aa64pfr1 (unmasked MTE bits).
>>>
>>> This patch is boot tested on ARM64 with KVM and on X86 with TCG for 
>>> mte=on
>>> and default case(i.e, no mte).
>>>
>>>   hw/arm/virt.c        | 72 ++++++++++++++++++++++++++------------------
>>>   target/arm/cpu.c     | 11 +++++--
>>>   target/arm/cpu.h     |  2 ++
>>>   target/arm/kvm.c     | 57 +++++++++++++++++++++++++++++++++++
>>>   target/arm/kvm_arm.h | 19 ++++++++++++
>>>   5 files changed, 129 insertions(+), 32 deletions(-)
>>>
>> (...)
>>
>>> diff --git a/target/arm/cpu.c b/target/arm/cpu.c
>>> index 19191c2391..8a2fc471ce 100644
>>> --- a/target/arm/cpu.c
>>> +++ b/target/arm/cpu.c
>>> @@ -2390,14 +2390,21 @@ static void arm_cpu_realizefn(DeviceState 
>>> *dev, Error **errp)
>>>   #ifndef CONFIG_USER_ONLY
>>>           /*
>>> -         * If we do not have tag-memory provided by the machine,
>>> +         * If we do not have tag-memory provided by the TCG,
>> Maybe
>>
>> "If we run with TCG and do not have tag-memory provided by the machine"
>>
>> ?
> 
> Yep, I agree, this is better.

Sure, will update the comment.
> 
> 
>>>            * reduce MTE support to instructions enabled at EL0.
>>>            * This matches Cortex-A710 BROADCASTMTE input being LOW.
>>>            */
>>> -        if (cpu->tag_memory == NULL) {
>>> +        if (tcg_enabled() && cpu->tag_memory == NULL) {
>>>               cpu->isar.id_aa64pfr1 =
>>>                   FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 
>>> 1);
>>>           }
>>> +
>>> +        /*
>>> +         * Clear MTE bits, if not enabled in KVM mode.
>> Maybe add "This matches the MTE bits being masked by KVM in that case."?
> 
> Clearing the MTE bits is also necessary when MTE is supported by the
> host (and so KVM can enable the MTE capability - so won't mask the MTE
> bits, but the user didn't want MTE enabled in the guest (mte=on no given
> or explicitly set to =off), so this comment is not always true?
> 
> How about something like:
> 
> "If MTE is supported by the host but could not be enabled on KVM mode or
> MTE should not be enabled on the guest (e.i. mte=off), clear guest's MTE 
> bits."
> 

Ok thanks.

> I do assume MTE is supported by the host (i.e. MTE bits >= 2 in the host)
> because otherwise condition "if (cpu_isar_feature(aa64_mte, cpu)) { ... 
> }" is not
> taken; and at this point cpu->isar->id_aa64pfr1 is set from the host's 
> bits via
> kvm_arm_set_cpu_features_from_host() and kvm_arm_get_host_cpu_features ().
> 
> 
>>> +         */
>>> +        if (kvm_enabled() && !cpu->kvm_mte) {
>>> +                FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 0);
>>> +        }
>>>   #endif
>>>       }
>> (...)
>>
>>> diff --git a/target/arm/kvm.c b/target/arm/kvm.c
>>> index 849e2e21b3..af7a98517d 100644
>>> --- a/target/arm/kvm.c
>>> +++ b/target/arm/kvm.c
>>> @@ -39,6 +39,7 @@
>>>   #include "hw/acpi/acpi.h"
>>>   #include "hw/acpi/ghes.h"
>>>   #include "target/arm/gtimer.h"
>>> +#include "migration/blocker.h"
>>>   const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
>>>       KVM_CAP_LAST_INFO
>>> @@ -119,6 +120,20 @@ bool kvm_arm_create_scratch_host_vcpu(const 
>>> uint32_t *cpus_to_try,
>>>       if (vmfd < 0) {
>>>           goto err;
>>>       }
>>> +
>>> +    /*
>>> +     * MTE capability must be enabled by the VMM before creating
>>> +     * any VCPUs. The MTE bits of the ID_AA64PFR1 register are masked
>>> +     * if MTE is not enabled, allowing them to be probed correctly.
>> This reads a bit confusing. Maybe
>>
>> "The MTE capability must be enabled by the VMM before creating any VCPUs
>> in order to allow the MTE bits of the ID_AA64PFR1 register to be probed
>> correctly, as they are masked if MTE is not enabled."
> 
> I agree.

Ok, thanks.
> 
> 
>>
>>> +     */
>>> +    if (kvm_arm_mte_supported()) {
>>> +        KVMState kvm_state;
>>> +
>>> +        kvm_state.fd = kvmfd;
>>> +        kvm_state.vmfd = vmfd;
>>> +        kvm_vm_enable_cap(&kvm_state, KVM_CAP_ARM_MTE, 0);
>>> +    }
>>> +
>>>       cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0);
>>>       if (cpufd < 0) {
>>>           goto err;
>> (...)
>>
> 
> Comments aside:
> 
> Reviewed-by: Gustavo Romero <gustavo.romero@linaro.org>

Thanks Gustavo.

-- 
Thanks,
Ganapat/GK