[RFC PATCH v0 1/2] spapr: Add H_REG_SNS hcall

Bharata B Rao posted 2 patches 4 years, 6 months ago
Maintainers: Greg Kurz <groug@kaod.org>, "Michael S. Tsirkin" <mst@redhat.com>, Cornelia Huck <cohuck@redhat.com>, Paolo Bonzini <pbonzini@redhat.com>, David Gibson <david@gibson.dropbear.id.au>
[RFC PATCH v0 1/2] spapr: Add H_REG_SNS hcall
Posted by Bharata B Rao 4 years, 6 months ago
Add support for H_REG_SNS hcall so that asynchronous page
fault mechanism can be supported on PowerKVM guests.

This hcall essentially issues KVM_PPC_SET_SNS to let the
host map and pin the memory containing the Subvention
Notification Structure. It also claims SPAPR_IRQ_SNS to
be used as subvention notification interrupt.

Note: Updates to linux-headers/linux/kvm.h are temporary
pending headers update.

Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
---
 hw/ppc/spapr.c                  |  3 ++
 hw/ppc/spapr_hcall.c            | 56 +++++++++++++++++++++++++++++++++
 include/hw/ppc/spapr.h          |  3 ++
 include/hw/ppc/spapr_irq.h      |  1 +
 linux-headers/asm-powerpc/kvm.h |  6 ++++
 linux-headers/linux/kvm.h       |  1 +
 target/ppc/kvm.c                | 14 +++++++++
 target/ppc/kvm_ppc.h            | 10 ++++++
 8 files changed, 94 insertions(+)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 81699d4f8b..5f1f75826d 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2830,6 +2830,9 @@ static void spapr_machine_init(MachineState *machine)
 
         /* Enable H_PAGE_INIT */
         kvmppc_enable_h_page_init();
+
+        /* Enable H_REG_SNS */
+        kvmppc_enable_h_reg_sns();
     }
 
     /* map RAM */
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 0e9a5b2e40..957edecb13 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -1405,6 +1405,59 @@ static target_ulong h_update_dt(PowerPCCPU *cpu, SpaprMachineState *spapr,
     return H_SUCCESS;
 }
 
+static target_ulong deregister_sns(PowerPCCPU *cpu, SpaprMachineState *spapr)
+{
+    spapr->sns_addr = -1;
+    spapr->sns_len = 0;
+    spapr_irq_free(spapr, SPAPR_IRQ_SNS, 1);
+
+    return H_SUCCESS;
+}
+
+static target_ulong h_reg_sns(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                              target_ulong opcode, target_ulong *args)
+{
+    target_ulong addr = args[0];
+    target_ulong len = args[1];
+
+    if (addr == -1) {
+        return deregister_sns(cpu, spapr);
+    }
+
+    /*
+     * If SNS area is already registered, can't register again before
+     * deregistering it first.
+     */
+    if (spapr->sns_addr == -1) {
+        return H_PARAMETER;
+    }
+
+    if (!QEMU_IS_ALIGNED(addr, 4096)) {
+        return H_PARAMETER;
+    }
+
+    if (len < 256) {
+        return H_P2;
+    }
+
+    /* TODO: SNS area is not allowed to cross a page boundary */
+
+    /* KVM_PPC_SET_SNS ioctl */
+    if (kvmppc_set_sns_reg(addr, len)) {
+        return H_PARAMETER;
+    }
+
+    /* Record SNS addr and len */
+    spapr->sns_addr = addr;
+    spapr->sns_len = len;
+
+    /* Register irq source for sending ESN notification */
+    spapr_irq_claim(spapr, SPAPR_IRQ_SNS, false, &error_fatal);
+    args[1] = SPAPR_IRQ_SNS; /* irq no in R5 */
+
+    return H_SUCCESS;
+}
+
 static spapr_hcall_fn papr_hypercall_table[(MAX_HCALL_OPCODE / 4) + 1];
 static spapr_hcall_fn kvmppc_hypercall_table[KVMPPC_HCALL_MAX - KVMPPC_HCALL_BASE + 1];
 static spapr_hcall_fn svm_hypercall_table[(SVM_HCALL_MAX - SVM_HCALL_BASE) / 4 + 1];
@@ -1545,6 +1598,9 @@ static void hypercall_register_types(void)
     spapr_register_hypercall(KVMPPC_H_CAS, h_client_architecture_support);
 
     spapr_register_hypercall(KVMPPC_H_UPDATE_DT, h_update_dt);
+
+    /* SNS memory area registration */
+    spapr_register_hypercall(H_REG_SNS, h_reg_sns);
 }
 
 type_init(hypercall_register_types)
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 637652ad16..934f9e066e 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -252,6 +252,8 @@ struct SpaprMachineState {
     uint32_t numa_assoc_array[MAX_NODES + NVGPU_MAX_NUM][NUMA_ASSOC_SIZE];
 
     Error *fwnmi_migration_blocker;
+    uint64_t sns_addr;
+    uint64_t sns_len;
 };
 
 #define H_SUCCESS         0
@@ -549,6 +551,7 @@ struct SpaprMachineState {
 #define H_SCM_UNBIND_MEM        0x3F0
 #define H_SCM_UNBIND_ALL        0x3FC
 #define H_SCM_HEALTH            0x400
+#define H_REG_SNS               0x41C
 #define H_RPT_INVALIDATE        0x448
 
 #define MAX_HCALL_OPCODE        H_RPT_INVALIDATE
diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h
index c22a72c9e2..26c680f065 100644
--- a/include/hw/ppc/spapr_irq.h
+++ b/include/hw/ppc/spapr_irq.h
@@ -21,6 +21,7 @@
 #define SPAPR_XIRQ_BASE      XICS_IRQ_BASE /* 0x1000 */
 #define SPAPR_IRQ_EPOW       (SPAPR_XIRQ_BASE + 0x0000)
 #define SPAPR_IRQ_HOTPLUG    (SPAPR_XIRQ_BASE + 0x0001)
+#define SPAPR_IRQ_SNS        (SPAPR_XIRQ_BASE + 0x0002)
 #define SPAPR_IRQ_VIO        (SPAPR_XIRQ_BASE + 0x0100)  /* 256 VIO devices */
 #define SPAPR_IRQ_PCI_LSI    (SPAPR_XIRQ_BASE + 0x0200)  /* 32+ PHBs devices */
 
diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h
index 9f18fa090f..d72739126a 100644
--- a/linux-headers/asm-powerpc/kvm.h
+++ b/linux-headers/asm-powerpc/kvm.h
@@ -470,6 +470,12 @@ struct kvm_ppc_cpu_char {
 #define KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR	(1ULL << 61)
 #define KVM_PPC_CPU_BEHAV_FLUSH_COUNT_CACHE	(1ull << 58)
 
+/* For KVM_PPC_SET_SNS */
+struct kvm_ppc_sns_reg {
+	__u64 addr;
+	__u64 len;
+};
+
 /* Per-vcpu XICS interrupt controller state */
 #define KVM_REG_PPC_ICP_STATE	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
 
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index bcaf66cc4d..a76945fcbc 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -1458,6 +1458,7 @@ struct kvm_s390_ucas_mapping {
 #define KVM_SET_PMU_EVENT_FILTER  _IOW(KVMIO,  0xb2, struct kvm_pmu_event_filter)
 #define KVM_PPC_SVM_OFF		  _IO(KVMIO,  0xb3)
 #define KVM_ARM_MTE_COPY_TAGS	  _IOR(KVMIO,  0xb4, struct kvm_arm_copy_mte_tags)
+#define KVM_PPC_SET_SNS		  _IOR(KVMIO,  0xb5, struct kvm_ppc_sns_reg)
 
 /* ioctl for vm fd */
 #define KVM_CREATE_DEVICE	  _IOWR(KVMIO,  0xe0, struct kvm_create_device)
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index dc93b99189..330985c8a0 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -2047,6 +2047,11 @@ void kvmppc_enable_h_rpt_invalidate(void)
     kvmppc_enable_hcall(kvm_state, H_RPT_INVALIDATE);
 }
 
+void kvmppc_enable_h_reg_sns(void)
+{
+    kvmppc_enable_hcall(kvm_state, H_REG_SNS);
+}
+
 void kvmppc_set_papr(PowerPCCPU *cpu)
 {
     CPUState *cs = CPU(cpu);
@@ -2959,3 +2964,12 @@ bool kvm_arch_cpu_check_are_resettable(void)
 {
     return true;
 }
+
+int kvmppc_set_sns_reg(target_ulong addr, target_ulong len)
+{
+    struct kvm_ppc_sns_reg sns_reg;
+
+    sns_reg.addr = addr;
+    sns_reg.len = len;
+    return kvm_vm_ioctl(kvm_state, KVM_PPC_SET_SNS, &sns_reg);
+}
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index ee9325bf9a..c22bc3253e 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -25,6 +25,7 @@ void kvmppc_enable_set_mode_hcall(void);
 void kvmppc_enable_clear_ref_mod_hcalls(void);
 void kvmppc_enable_h_page_init(void);
 void kvmppc_enable_h_rpt_invalidate(void);
+void kvmppc_enable_h_reg_sns(void);
 void kvmppc_set_papr(PowerPCCPU *cpu);
 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr);
 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy);
@@ -87,6 +88,7 @@ void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online);
 void kvmppc_set_reg_tb_offset(PowerPCCPU *cpu, int64_t tb_offset);
 
 int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run);
+int kvmppc_set_sns_reg(target_ulong addr, target_ulong len);
 
 #else
 
@@ -157,6 +159,10 @@ static inline void kvmppc_enable_h_rpt_invalidate(void)
     g_assert_not_reached();
 }
 
+static inline void kvmppc_enable_h_reg_sns(void)
+{
+}
+
 static inline void kvmppc_set_papr(PowerPCCPU *cpu)
 {
 }
@@ -430,6 +436,10 @@ static inline bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
     return false;
 }
 
+int kvmppc_set_sns_reg(target_ulong addr, target_ulong len)
+{
+    return -ENOSYS;
+}
 #endif
 
 #ifndef CONFIG_KVM
-- 
2.31.1


Re: [RFC PATCH v0 1/2] spapr: Add H_REG_SNS hcall
Posted by David Gibson 4 years, 6 months ago
On Thu, Aug 05, 2021 at 01:02:27PM +0530, Bharata B Rao wrote:
> Add support for H_REG_SNS hcall so that asynchronous page
> fault mechanism can be supported on PowerKVM guests.
> 
> This hcall essentially issues KVM_PPC_SET_SNS to let the
> host map and pin the memory containing the Subvention
> Notification Structure. It also claims SPAPR_IRQ_SNS to
> be used as subvention notification interrupt.
> 
> Note: Updates to linux-headers/linux/kvm.h are temporary
> pending headers update.
> 
> Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
> ---
>  hw/ppc/spapr.c                  |  3 ++
>  hw/ppc/spapr_hcall.c            | 56 +++++++++++++++++++++++++++++++++
>  include/hw/ppc/spapr.h          |  3 ++
>  include/hw/ppc/spapr_irq.h      |  1 +
>  linux-headers/asm-powerpc/kvm.h |  6 ++++
>  linux-headers/linux/kvm.h       |  1 +
>  target/ppc/kvm.c                | 14 +++++++++
>  target/ppc/kvm_ppc.h            | 10 ++++++
>  8 files changed, 94 insertions(+)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 81699d4f8b..5f1f75826d 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2830,6 +2830,9 @@ static void spapr_machine_init(MachineState *machine)
>  
>          /* Enable H_PAGE_INIT */
>          kvmppc_enable_h_page_init();
> +
> +        /* Enable H_REG_SNS */
> +        kvmppc_enable_h_reg_sns();
>      }
>  
>      /* map RAM */
> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> index 0e9a5b2e40..957edecb13 100644
> --- a/hw/ppc/spapr_hcall.c
> +++ b/hw/ppc/spapr_hcall.c
> @@ -1405,6 +1405,59 @@ static target_ulong h_update_dt(PowerPCCPU *cpu, SpaprMachineState *spapr,
>      return H_SUCCESS;
>  }
>  
> +static target_ulong deregister_sns(PowerPCCPU *cpu, SpaprMachineState *spapr)
> +{
> +    spapr->sns_addr = -1;
> +    spapr->sns_len = 0;
> +    spapr_irq_free(spapr, SPAPR_IRQ_SNS, 1);
> +
> +    return H_SUCCESS;
> +}
> +
> +static target_ulong h_reg_sns(PowerPCCPU *cpu, SpaprMachineState *spapr,
> +                              target_ulong opcode, target_ulong *args)
> +{
> +    target_ulong addr = args[0];
> +    target_ulong len = args[1];
> +
> +    if (addr == -1) {
> +        return deregister_sns(cpu, spapr);
> +    }
> +
> +    /*
> +     * If SNS area is already registered, can't register again before
> +     * deregistering it first.
> +     */
> +    if (spapr->sns_addr == -1) {

As Fabiano says, it looks like the logic is reversed here.

> +        return H_PARAMETER;

Also, H_PARAMETER doesn't seem like the right error for this case.
H_BUSY, maybe?

> +    }
> +
> +    if (!QEMU_IS_ALIGNED(addr, 4096)) {

What's the significance of 4096 here?  Should this be one of the page
size defines instead?

> +        return H_PARAMETER;
> +    }
> +
> +    if (len < 256) {

A defined constant (SPAPR_MIN_SNS_SIZE?) would be worthwhile here, I think.

> +        return H_P2;
> +    }
> +
> +    /* TODO: SNS area is not allowed to cross a page boundary */
> +
> +    /* KVM_PPC_SET_SNS ioctl */
> +    if (kvmppc_set_sns_reg(addr, len)) {

What will happen if you attempt this on a TCG system?

> +        return H_PARAMETER;
> +    }
> +
> +    /* Record SNS addr and len */
> +    spapr->sns_addr = addr;
> +    spapr->sns_len = len;
> +
> +    /* Register irq source for sending ESN notification */
> +    spapr_irq_claim(spapr, SPAPR_IRQ_SNS, false, &error_fatal);

I don't think &error_fatal can be right here.  AFAICT this must be one
of two cases:
   1) This should never fail, no matter what the guest does.  If it
      does fail, that indicates a qemu bug.  In that case &error_abort is
      more appropriate
   2) This could fail for certain sequences of guest actions.  If
      that's the case, we shouldn't kill qemu, but should return
      H_HARDWARE (or something) instead.

> +    args[1] = SPAPR_IRQ_SNS; /* irq no in R5 */
> +
> +    return H_SUCCESS;
> +}
> +
>  static spapr_hcall_fn papr_hypercall_table[(MAX_HCALL_OPCODE / 4) + 1];
>  static spapr_hcall_fn kvmppc_hypercall_table[KVMPPC_HCALL_MAX - KVMPPC_HCALL_BASE + 1];
>  static spapr_hcall_fn svm_hypercall_table[(SVM_HCALL_MAX - SVM_HCALL_BASE) / 4 + 1];
> @@ -1545,6 +1598,9 @@ static void hypercall_register_types(void)
>      spapr_register_hypercall(KVMPPC_H_CAS, h_client_architecture_support);
>  
>      spapr_register_hypercall(KVMPPC_H_UPDATE_DT, h_update_dt);
> +
> +    /* SNS memory area registration */
> +    spapr_register_hypercall(H_REG_SNS, h_reg_sns);

You definitely need a machine parameter to enable this, and you should
only enable it by default for newer machine types.  Otherwise you will
cause migration breakages.

>  }
>  
>  type_init(hypercall_register_types)
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 637652ad16..934f9e066e 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -252,6 +252,8 @@ struct SpaprMachineState {
>      uint32_t numa_assoc_array[MAX_NODES + NVGPU_MAX_NUM][NUMA_ASSOC_SIZE];
>  
>      Error *fwnmi_migration_blocker;
> +    uint64_t sns_addr;
> +    uint64_t sns_len;
>  };
>  
>  #define H_SUCCESS         0
> @@ -549,6 +551,7 @@ struct SpaprMachineState {
>  #define H_SCM_UNBIND_MEM        0x3F0
>  #define H_SCM_UNBIND_ALL        0x3FC
>  #define H_SCM_HEALTH            0x400
> +#define H_REG_SNS               0x41C
>  #define H_RPT_INVALIDATE        0x448
>  
>  #define MAX_HCALL_OPCODE        H_RPT_INVALIDATE
> diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h
> index c22a72c9e2..26c680f065 100644
> --- a/include/hw/ppc/spapr_irq.h
> +++ b/include/hw/ppc/spapr_irq.h
> @@ -21,6 +21,7 @@
>  #define SPAPR_XIRQ_BASE      XICS_IRQ_BASE /* 0x1000 */
>  #define SPAPR_IRQ_EPOW       (SPAPR_XIRQ_BASE + 0x0000)
>  #define SPAPR_IRQ_HOTPLUG    (SPAPR_XIRQ_BASE + 0x0001)
> +#define SPAPR_IRQ_SNS        (SPAPR_XIRQ_BASE + 0x0002)
>  #define SPAPR_IRQ_VIO        (SPAPR_XIRQ_BASE + 0x0100)  /* 256 VIO devices */
>  #define SPAPR_IRQ_PCI_LSI    (SPAPR_XIRQ_BASE + 0x0200)  /* 32+ PHBs devices */
>  
> diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h
> index 9f18fa090f..d72739126a 100644
> --- a/linux-headers/asm-powerpc/kvm.h
> +++ b/linux-headers/asm-powerpc/kvm.h
> @@ -470,6 +470,12 @@ struct kvm_ppc_cpu_char {
>  #define KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR	(1ULL << 61)
>  #define KVM_PPC_CPU_BEHAV_FLUSH_COUNT_CACHE	(1ull << 58)
>  
> +/* For KVM_PPC_SET_SNS */
> +struct kvm_ppc_sns_reg {
> +	__u64 addr;
> +	__u64 len;
> +};
> +

Updates to linux-headers/ should be done as a separate preliminary
patch, listing the specific kernel commit that you're updating too.

>  /* Per-vcpu XICS interrupt controller state */
>  #define KVM_REG_PPC_ICP_STATE	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
>  
> diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
> index bcaf66cc4d..a76945fcbc 100644
> --- a/linux-headers/linux/kvm.h
> +++ b/linux-headers/linux/kvm.h
> @@ -1458,6 +1458,7 @@ struct kvm_s390_ucas_mapping {
>  #define KVM_SET_PMU_EVENT_FILTER  _IOW(KVMIO,  0xb2, struct kvm_pmu_event_filter)
>  #define KVM_PPC_SVM_OFF		  _IO(KVMIO,  0xb3)
>  #define KVM_ARM_MTE_COPY_TAGS	  _IOR(KVMIO,  0xb4, struct kvm_arm_copy_mte_tags)
> +#define KVM_PPC_SET_SNS		  _IOR(KVMIO,  0xb5, struct kvm_ppc_sns_reg)
>  
>  /* ioctl for vm fd */
>  #define KVM_CREATE_DEVICE	  _IOWR(KVMIO,  0xe0, struct kvm_create_device)
> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
> index dc93b99189..330985c8a0 100644
> --- a/target/ppc/kvm.c
> +++ b/target/ppc/kvm.c
> @@ -2047,6 +2047,11 @@ void kvmppc_enable_h_rpt_invalidate(void)
>      kvmppc_enable_hcall(kvm_state, H_RPT_INVALIDATE);
>  }
>  
> +void kvmppc_enable_h_reg_sns(void)
> +{
> +    kvmppc_enable_hcall(kvm_state, H_REG_SNS);
> +}
> +
>  void kvmppc_set_papr(PowerPCCPU *cpu)
>  {
>      CPUState *cs = CPU(cpu);
> @@ -2959,3 +2964,12 @@ bool kvm_arch_cpu_check_are_resettable(void)
>  {
>      return true;
>  }
> +
> +int kvmppc_set_sns_reg(target_ulong addr, target_ulong len)
> +{
> +    struct kvm_ppc_sns_reg sns_reg;
> +
> +    sns_reg.addr = addr;
> +    sns_reg.len = len;
> +    return kvm_vm_ioctl(kvm_state, KVM_PPC_SET_SNS, &sns_reg);
> +}
> diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
> index ee9325bf9a..c22bc3253e 100644
> --- a/target/ppc/kvm_ppc.h
> +++ b/target/ppc/kvm_ppc.h
> @@ -25,6 +25,7 @@ void kvmppc_enable_set_mode_hcall(void);
>  void kvmppc_enable_clear_ref_mod_hcalls(void);
>  void kvmppc_enable_h_page_init(void);
>  void kvmppc_enable_h_rpt_invalidate(void);
> +void kvmppc_enable_h_reg_sns(void);
>  void kvmppc_set_papr(PowerPCCPU *cpu);
>  int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr);
>  void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy);
> @@ -87,6 +88,7 @@ void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online);
>  void kvmppc_set_reg_tb_offset(PowerPCCPU *cpu, int64_t tb_offset);
>  
>  int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run);
> +int kvmppc_set_sns_reg(target_ulong addr, target_ulong len);
>  
>  #else
>  
> @@ -157,6 +159,10 @@ static inline void kvmppc_enable_h_rpt_invalidate(void)
>      g_assert_not_reached();
>  }
>  
> +static inline void kvmppc_enable_h_reg_sns(void)
> +{
> +}
> +
>  static inline void kvmppc_set_papr(PowerPCCPU *cpu)
>  {
>  }
> @@ -430,6 +436,10 @@ static inline bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
>      return false;
>  }
>  
> +int kvmppc_set_sns_reg(target_ulong addr, target_ulong len)
> +{
> +    return -ENOSYS;
> +}
>  #endif
>  
>  #ifndef CONFIG_KVM

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
Re: [RFC PATCH v0 1/2] spapr: Add H_REG_SNS hcall
Posted by Bharata B Rao 4 years, 5 months ago
On Mon, Aug 09, 2021 at 01:49:54PM +1000, David Gibson wrote:
> On Thu, Aug 05, 2021 at 01:02:27PM +0530, Bharata B Rao wrote:
> > Add support for H_REG_SNS hcall so that asynchronous page
> > fault mechanism can be supported on PowerKVM guests.
> > 
> > This hcall essentially issues KVM_PPC_SET_SNS to let the
> > host map and pin the memory containing the Subvention
> > Notification Structure. It also claims SPAPR_IRQ_SNS to
> > be used as subvention notification interrupt.
> > 
> > Note: Updates to linux-headers/linux/kvm.h are temporary
> > pending headers update.
> > 
> > Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
> > ---
> >  hw/ppc/spapr.c                  |  3 ++
> >  hw/ppc/spapr_hcall.c            | 56 +++++++++++++++++++++++++++++++++
> >  include/hw/ppc/spapr.h          |  3 ++
> >  include/hw/ppc/spapr_irq.h      |  1 +
> >  linux-headers/asm-powerpc/kvm.h |  6 ++++
> >  linux-headers/linux/kvm.h       |  1 +
> >  target/ppc/kvm.c                | 14 +++++++++
> >  target/ppc/kvm_ppc.h            | 10 ++++++
> >  8 files changed, 94 insertions(+)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index 81699d4f8b..5f1f75826d 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -2830,6 +2830,9 @@ static void spapr_machine_init(MachineState *machine)
> >  
> >          /* Enable H_PAGE_INIT */
> >          kvmppc_enable_h_page_init();
> > +
> > +        /* Enable H_REG_SNS */
> > +        kvmppc_enable_h_reg_sns();
> >      }
> >  
> >      /* map RAM */
> > diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> > index 0e9a5b2e40..957edecb13 100644
> > --- a/hw/ppc/spapr_hcall.c
> > +++ b/hw/ppc/spapr_hcall.c
> > @@ -1405,6 +1405,59 @@ static target_ulong h_update_dt(PowerPCCPU *cpu, SpaprMachineState *spapr,
> >      return H_SUCCESS;
> >  }
> >  
> > +static target_ulong deregister_sns(PowerPCCPU *cpu, SpaprMachineState *spapr)
> > +{
> > +    spapr->sns_addr = -1;
> > +    spapr->sns_len = 0;
> > +    spapr_irq_free(spapr, SPAPR_IRQ_SNS, 1);
> > +
> > +    return H_SUCCESS;
> > +}
> > +
> > +static target_ulong h_reg_sns(PowerPCCPU *cpu, SpaprMachineState *spapr,
> > +                              target_ulong opcode, target_ulong *args)
> > +{
> > +    target_ulong addr = args[0];
> > +    target_ulong len = args[1];
> > +
> > +    if (addr == -1) {
> > +        return deregister_sns(cpu, spapr);
> > +    }
> > +
> > +    /*
> > +     * If SNS area is already registered, can't register again before
> > +     * deregistering it first.
> > +     */
> > +    if (spapr->sns_addr == -1) {
> 
> As Fabiano says, it looks like the logic is reversed here.

Correct.

> 
> > +        return H_PARAMETER;
> 
> Also, H_PARAMETER doesn't seem like the right error for this case.
> H_BUSY, maybe?

Yes we may return H_BUSY.

> 
> > +    }
> > +
> > +    if (!QEMU_IS_ALIGNED(addr, 4096)) {
> 
> What's the significance of 4096 here?  Should this be one of the page
> size defines instead?

PAPR specifies this alignment.
"If the Address parameter is not 4K aligned in the valid logical address
space of the caller, then return H_Parameter."

> 
> > +        return H_PARAMETER;
> > +    }
> > +
> > +    if (len < 256) {
> 
> A defined constant (SPAPR_MIN_SNS_SIZE?) would be worthwhile here, I think.

Yes.

> 
> > +        return H_P2;
> > +    }
> > +
> > +    /* TODO: SNS area is not allowed to cross a page boundary */
> > +
> > +    /* KVM_PPC_SET_SNS ioctl */
> > +    if (kvmppc_set_sns_reg(addr, len)) {
> 
> What will happen if you attempt this on a TCG system?

We should have a variant of kvmppc_set_sns_reg() for TCG that
returns error, so that this hcall can fail.

> 
> > +        return H_PARAMETER;
> > +    }
> > +
> > +    /* Record SNS addr and len */
> > +    spapr->sns_addr = addr;
> > +    spapr->sns_len = len;
> > +
> > +    /* Register irq source for sending ESN notification */
> > +    spapr_irq_claim(spapr, SPAPR_IRQ_SNS, false, &error_fatal);
> 
> I don't think &error_fatal can be right here.  AFAICT this must be one
> of two cases:
>    1) This should never fail, no matter what the guest does.  If it
>       does fail, that indicates a qemu bug.  In that case &error_abort is
>       more appropriate
>    2) This could fail for certain sequences of guest actions.  If
>       that's the case, we shouldn't kill qemu, but should return
>       H_HARDWARE (or something) instead.

I think we could just check for error and return failure so that
the guest wouldn't go ahead and enable async-pf feature.

> 
> > +    args[1] = SPAPR_IRQ_SNS; /* irq no in R5 */
> > +
> > +    return H_SUCCESS;
> > +}
> > +
> >  static spapr_hcall_fn papr_hypercall_table[(MAX_HCALL_OPCODE / 4) + 1];
> >  static spapr_hcall_fn kvmppc_hypercall_table[KVMPPC_HCALL_MAX - KVMPPC_HCALL_BASE + 1];
> >  static spapr_hcall_fn svm_hypercall_table[(SVM_HCALL_MAX - SVM_HCALL_BASE) / 4 + 1];
> > @@ -1545,6 +1598,9 @@ static void hypercall_register_types(void)
> >      spapr_register_hypercall(KVMPPC_H_CAS, h_client_architecture_support);
> >  
> >      spapr_register_hypercall(KVMPPC_H_UPDATE_DT, h_update_dt);
> > +
> > +    /* SNS memory area registration */
> > +    spapr_register_hypercall(H_REG_SNS, h_reg_sns);
> 
> You definitely need a machine parameter to enable this, and you should
> only enable it by default for newer machine types.  Otherwise you will
> cause migration breakages.

Sure.

> 
> >  }
> >  
> >  type_init(hypercall_register_types)
> > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> > index 637652ad16..934f9e066e 100644
> > --- a/include/hw/ppc/spapr.h
> > +++ b/include/hw/ppc/spapr.h
> > @@ -252,6 +252,8 @@ struct SpaprMachineState {
> >      uint32_t numa_assoc_array[MAX_NODES + NVGPU_MAX_NUM][NUMA_ASSOC_SIZE];
> >  
> >      Error *fwnmi_migration_blocker;
> > +    uint64_t sns_addr;
> > +    uint64_t sns_len;
> >  };
> >  
> >  #define H_SUCCESS         0
> > @@ -549,6 +551,7 @@ struct SpaprMachineState {
> >  #define H_SCM_UNBIND_MEM        0x3F0
> >  #define H_SCM_UNBIND_ALL        0x3FC
> >  #define H_SCM_HEALTH            0x400
> > +#define H_REG_SNS               0x41C
> >  #define H_RPT_INVALIDATE        0x448
> >  
> >  #define MAX_HCALL_OPCODE        H_RPT_INVALIDATE
> > diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h
> > index c22a72c9e2..26c680f065 100644
> > --- a/include/hw/ppc/spapr_irq.h
> > +++ b/include/hw/ppc/spapr_irq.h
> > @@ -21,6 +21,7 @@
> >  #define SPAPR_XIRQ_BASE      XICS_IRQ_BASE /* 0x1000 */
> >  #define SPAPR_IRQ_EPOW       (SPAPR_XIRQ_BASE + 0x0000)
> >  #define SPAPR_IRQ_HOTPLUG    (SPAPR_XIRQ_BASE + 0x0001)
> > +#define SPAPR_IRQ_SNS        (SPAPR_XIRQ_BASE + 0x0002)
> >  #define SPAPR_IRQ_VIO        (SPAPR_XIRQ_BASE + 0x0100)  /* 256 VIO devices */
> >  #define SPAPR_IRQ_PCI_LSI    (SPAPR_XIRQ_BASE + 0x0200)  /* 32+ PHBs devices */
> >  
> > diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h
> > index 9f18fa090f..d72739126a 100644
> > --- a/linux-headers/asm-powerpc/kvm.h
> > +++ b/linux-headers/asm-powerpc/kvm.h
> > @@ -470,6 +470,12 @@ struct kvm_ppc_cpu_char {
> >  #define KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR	(1ULL << 61)
> >  #define KVM_PPC_CPU_BEHAV_FLUSH_COUNT_CACHE	(1ull << 58)
> >  
> > +/* For KVM_PPC_SET_SNS */
> > +struct kvm_ppc_sns_reg {
> > +	__u64 addr;
> > +	__u64 len;
> > +};
> > +
> 
> Updates to linux-headers/ should be done as a separate preliminary
> patch, listing the specific kernel commit that you're updating too.

Yes, I am aware of it. Since the kernel patches are still in RFC
state, I noted this as a TODO in patch description :-)

Thanks for your review.

Regards,
Bharata.

Re: [RFC PATCH v0 1/2] spapr: Add H_REG_SNS hcall
Posted by David Gibson 4 years, 5 months ago
On Wed, Aug 11, 2021 at 02:56:13PM +0530, Bharata B Rao wrote:
> On Mon, Aug 09, 2021 at 01:49:54PM +1000, David Gibson wrote:
> > On Thu, Aug 05, 2021 at 01:02:27PM +0530, Bharata B Rao wrote:
[snip]
> > > diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h
> > > index 9f18fa090f..d72739126a 100644
> > > --- a/linux-headers/asm-powerpc/kvm.h
> > > +++ b/linux-headers/asm-powerpc/kvm.h
> > > @@ -470,6 +470,12 @@ struct kvm_ppc_cpu_char {
> > >  #define KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR	(1ULL << 61)
> > >  #define KVM_PPC_CPU_BEHAV_FLUSH_COUNT_CACHE	(1ull << 58)
> > >  
> > > +/* For KVM_PPC_SET_SNS */
> > > +struct kvm_ppc_sns_reg {
> > > +	__u64 addr;
> > > +	__u64 len;
> > > +};
> > > +
> > 
> > Updates to linux-headers/ should be done as a separate preliminary
> > patch, listing the specific kernel commit that you're updating too.
> 
> Yes, I am aware of it. Since the kernel patches are still in RFC
> state, I noted this as a TODO in patch description :-)

Sorry, I missed that.  In general, even for draft posts, I'd suggest
doing the linux-headers/ updates as a separate patch (but you can
construct that ad-hoc).

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
Re: [RFC PATCH v0 1/2] spapr: Add H_REG_SNS hcall
Posted by Fabiano Rosas 4 years, 6 months ago
Bharata B Rao <bharata@linux.ibm.com> writes:

> Add support for H_REG_SNS hcall so that asynchronous page
> fault mechanism can be supported on PowerKVM guests.
>
> This hcall essentially issues KVM_PPC_SET_SNS to let the
> host map and pin the memory containing the Subvention
> Notification Structure. It also claims SPAPR_IRQ_SNS to
> be used as subvention notification interrupt.
>
> Note: Updates to linux-headers/linux/kvm.h are temporary
> pending headers update.
>
> Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>

...

> +static target_ulong h_reg_sns(PowerPCCPU *cpu, SpaprMachineState *spapr,
> +                              target_ulong opcode, target_ulong *args)
> +{
> +    target_ulong addr = args[0];
> +    target_ulong len = args[1];
> +
> +    if (addr == -1) {
> +        return deregister_sns(cpu, spapr);
> +    }
> +
> +    /*
> +     * If SNS area is already registered, can't register again before
> +     * deregistering it first.
> +     */
> +    if (spapr->sns_addr == -1) {
> +        return H_PARAMETER;
> +    }

Don't you mean (spapr->sns_addr != -1) ?

> +
> +    if (!QEMU_IS_ALIGNED(addr, 4096)) {
> +        return H_PARAMETER;
> +    }
> +
> +    if (len < 256) {
> +        return H_P2;
> +    }
> +
> +    /* TODO: SNS area is not allowed to cross a page boundary */
> +
> +    /* KVM_PPC_SET_SNS ioctl */
> +    if (kvmppc_set_sns_reg(addr, len)) {
> +        return H_PARAMETER;
> +    }
> +
> +    /* Record SNS addr and len */
> +    spapr->sns_addr = addr;
> +    spapr->sns_len = len;
> +
> +    /* Register irq source for sending ESN notification */
> +    spapr_irq_claim(spapr, SPAPR_IRQ_SNS, false, &error_fatal);
> +    args[1] = SPAPR_IRQ_SNS; /* irq no in R5 */
> +
> +    return H_SUCCESS;
> +}