[v1] Add SBI v3.0 PMU enhancements

[PATCH 7/8] RISC-V: KVM: Implement get event info function

Posted by Atish Patra 1 year, 2 months ago

The new get_event_info funciton allows the guest to query the presence
of multiple events with single SBI call. Currently, the perf driver
in linux guest invokes it for all the standard SBI PMU events. Support
the SBI function implementation in KVM as well.

Signed-off-by: Atish Patra <atishp@rivosinc.com>
---
 arch/riscv/include/asm/kvm_vcpu_pmu.h |  3 ++
 arch/riscv/kvm/vcpu_pmu.c             | 67 +++++++++++++++++++++++++++++++++++
 arch/riscv/kvm/vcpu_sbi_pmu.c         |  3 ++
 3 files changed, 73 insertions(+)

diff --git a/arch/riscv/include/asm/kvm_vcpu_pmu.h b/arch/riscv/include/asm/kvm_vcpu_pmu.h
index 1d85b6617508..9a930afc8f57 100644
--- a/arch/riscv/include/asm/kvm_vcpu_pmu.h
+++ b/arch/riscv/include/asm/kvm_vcpu_pmu.h
@@ -98,6 +98,9 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu);
 int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low,
 				      unsigned long saddr_high, unsigned long flags,
 				      struct kvm_vcpu_sbi_return *retdata);
+int kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu *vcpu, unsigned long saddr_low,
+				  unsigned long saddr_high, unsigned long num_events,
+				  unsigned long flags, struct kvm_vcpu_sbi_return *retdata);
 void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu);
 void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu);
 
diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c
index efd66835c2b8..a30f7ec31479 100644
--- a/arch/riscv/kvm/vcpu_pmu.c
+++ b/arch/riscv/kvm/vcpu_pmu.c
@@ -456,6 +456,73 @@ int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long s
 	return 0;
 }
 
+int kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu *vcpu, unsigned long saddr_low,
+				  unsigned long saddr_high, unsigned long num_events,
+				  unsigned long flags, struct kvm_vcpu_sbi_return *retdata)
+{
+	unsigned long hva;
+	struct riscv_pmu_event_info *einfo;
+	int shmem_size = num_events * sizeof(*einfo);
+	bool writable;
+	gpa_t shmem;
+	u32 eidx, etype;
+	u64 econfig;
+	int ret;
+
+	if (flags != 0 || (saddr_low & (SZ_16 - 1))) {
+		ret = SBI_ERR_INVALID_PARAM;
+		goto out;
+	}
+
+	shmem = saddr_low;
+	if (saddr_high != 0) {
+		if (IS_ENABLED(CONFIG_32BIT)) {
+			shmem |= ((gpa_t)saddr_high << 32);
+		} else {
+			ret = SBI_ERR_INVALID_ADDRESS;
+			goto out;
+		}
+	}
+
+	hva = kvm_vcpu_gfn_to_hva_prot(vcpu, shmem >> PAGE_SHIFT, &writable);
+	if (kvm_is_error_hva(hva) || !writable) {
+		ret = SBI_ERR_INVALID_ADDRESS;
+		goto out;
+	}
+
+	einfo = kzalloc(shmem_size, GFP_KERNEL);
+	if (!einfo)
+		return -ENOMEM;
+
+	ret = kvm_vcpu_read_guest(vcpu, shmem, einfo, shmem_size);
+	if (ret) {
+		ret = SBI_ERR_FAILURE;
+		goto free_mem;
+	}
+
+	for (int i = 0; i < num_events; i++) {
+		eidx = einfo[i].event_idx;
+		etype = kvm_pmu_get_perf_event_type(eidx);
+		econfig = kvm_pmu_get_perf_event_config(eidx, einfo[i].event_data);
+		ret = riscv_pmu_get_event_info(etype, econfig, NULL);
+		if (ret > 0)
+			einfo[i].output = 1;
+	}
+
+	kvm_vcpu_write_guest(vcpu, shmem, einfo, shmem_size);
+	if (ret) {
+		ret = SBI_ERR_FAILURE;
+		goto free_mem;
+	}
+
+free_mem:
+	kfree(einfo);
+out:
+	retdata->err_val = ret;
+
+	return 0;
+}
+
 int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu,
 				struct kvm_vcpu_sbi_return *retdata)
 {
diff --git a/arch/riscv/kvm/vcpu_sbi_pmu.c b/arch/riscv/kvm/vcpu_sbi_pmu.c
index e4be34e03e83..a020d979d179 100644
--- a/arch/riscv/kvm/vcpu_sbi_pmu.c
+++ b/arch/riscv/kvm/vcpu_sbi_pmu.c
@@ -73,6 +73,9 @@ static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
 	case SBI_EXT_PMU_SNAPSHOT_SET_SHMEM:
 		ret = kvm_riscv_vcpu_pmu_snapshot_set_shmem(vcpu, cp->a0, cp->a1, cp->a2, retdata);
 		break;
+	case SBI_EXT_PMU_EVENT_GET_INFO:
+		ret = kvm_riscv_vcpu_pmu_event_info(vcpu, cp->a0, cp->a1, cp->a2, cp->a3, retdata);
+		break;
 	default:
 		retdata->err_val = SBI_ERR_NOT_SUPPORTED;
 	}

-- 
2.34.1

Re: [PATCH 7/8] RISC-V: KVM: Implement get event info function

Posted by Samuel Holland 1 year, 2 months ago

Hi Atish,

On 2024-11-19 2:29 PM, Atish Patra wrote:
> The new get_event_info funciton allows the guest to query the presence
> of multiple events with single SBI call. Currently, the perf driver
> in linux guest invokes it for all the standard SBI PMU events. Support
> the SBI function implementation in KVM as well.
> 
> Signed-off-by: Atish Patra <atishp@rivosinc.com>
> ---
>  arch/riscv/include/asm/kvm_vcpu_pmu.h |  3 ++
>  arch/riscv/kvm/vcpu_pmu.c             | 67 +++++++++++++++++++++++++++++++++++
>  arch/riscv/kvm/vcpu_sbi_pmu.c         |  3 ++
>  3 files changed, 73 insertions(+)
> 
> diff --git a/arch/riscv/include/asm/kvm_vcpu_pmu.h b/arch/riscv/include/asm/kvm_vcpu_pmu.h
> index 1d85b6617508..9a930afc8f57 100644
> --- a/arch/riscv/include/asm/kvm_vcpu_pmu.h
> +++ b/arch/riscv/include/asm/kvm_vcpu_pmu.h
> @@ -98,6 +98,9 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu);
>  int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low,
>  				      unsigned long saddr_high, unsigned long flags,
>  				      struct kvm_vcpu_sbi_return *retdata);
> +int kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu *vcpu, unsigned long saddr_low,
> +				  unsigned long saddr_high, unsigned long num_events,
> +				  unsigned long flags, struct kvm_vcpu_sbi_return *retdata);
>  void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu);
>  void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu);
>  
> diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c
> index efd66835c2b8..a30f7ec31479 100644
> --- a/arch/riscv/kvm/vcpu_pmu.c
> +++ b/arch/riscv/kvm/vcpu_pmu.c
> @@ -456,6 +456,73 @@ int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long s
>  	return 0;
>  }
>  
> +int kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu *vcpu, unsigned long saddr_low,
> +				  unsigned long saddr_high, unsigned long num_events,
> +				  unsigned long flags, struct kvm_vcpu_sbi_return *retdata)
> +{
> +	unsigned long hva;
> +	struct riscv_pmu_event_info *einfo;
> +	int shmem_size = num_events * sizeof(*einfo);
> +	bool writable;
> +	gpa_t shmem;
> +	u32 eidx, etype;
> +	u64 econfig;
> +	int ret;
> +
> +	if (flags != 0 || (saddr_low & (SZ_16 - 1))) {
> +		ret = SBI_ERR_INVALID_PARAM;
> +		goto out;
> +	}
> +
> +	shmem = saddr_low;
> +	if (saddr_high != 0) {
> +		if (IS_ENABLED(CONFIG_32BIT)) {
> +			shmem |= ((gpa_t)saddr_high << 32);
> +		} else {
> +			ret = SBI_ERR_INVALID_ADDRESS;
> +			goto out;
> +		}
> +	}
> +
> +	hva = kvm_vcpu_gfn_to_hva_prot(vcpu, shmem >> PAGE_SHIFT, &writable);
> +	if (kvm_is_error_hva(hva) || !writable) {
> +		ret = SBI_ERR_INVALID_ADDRESS;

This only checks the first page if the address crosses a page boundary. Maybe
that is okay since kvm_vcpu_read_guest()/kvm_vcpu_write_guest() will fail if a
later page is inaccessible?

> +		goto out;
> +	}
> +
> +	einfo = kzalloc(shmem_size, GFP_KERNEL);
> +	if (!einfo)
> +		return -ENOMEM;
> +
> +	ret = kvm_vcpu_read_guest(vcpu, shmem, einfo, shmem_size);
> +	if (ret) {
> +		ret = SBI_ERR_FAILURE;
> +		goto free_mem;
> +	}
> +
> +	for (int i = 0; i < num_events; i++) {
> +		eidx = einfo[i].event_idx;
> +		etype = kvm_pmu_get_perf_event_type(eidx);
> +		econfig = kvm_pmu_get_perf_event_config(eidx, einfo[i].event_data);
> +		ret = riscv_pmu_get_event_info(etype, econfig, NULL);
> +		if (ret > 0)
> +			einfo[i].output = 1;

This also needs to write `output` in the else case to indicate that the event is
not supported; the spec does not require the caller to initialize bit 0 of
output to zero.

Regards,
Samuel

> +	}
> +
> +	kvm_vcpu_write_guest(vcpu, shmem, einfo, shmem_size);
> +	if (ret) {
> +		ret = SBI_ERR_FAILURE;
> +		goto free_mem;
> +	}
> +
> +free_mem:
> +	kfree(einfo);
> +out:
> +	retdata->err_val = ret;
> +
> +	return 0;
> +}
> +
>  int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu,
>  				struct kvm_vcpu_sbi_return *retdata)
>  {
> diff --git a/arch/riscv/kvm/vcpu_sbi_pmu.c b/arch/riscv/kvm/vcpu_sbi_pmu.c
> index e4be34e03e83..a020d979d179 100644
> --- a/arch/riscv/kvm/vcpu_sbi_pmu.c
> +++ b/arch/riscv/kvm/vcpu_sbi_pmu.c
> @@ -73,6 +73,9 @@ static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
>  	case SBI_EXT_PMU_SNAPSHOT_SET_SHMEM:
>  		ret = kvm_riscv_vcpu_pmu_snapshot_set_shmem(vcpu, cp->a0, cp->a1, cp->a2, retdata);
>  		break;
> +	case SBI_EXT_PMU_EVENT_GET_INFO:
> +		ret = kvm_riscv_vcpu_pmu_event_info(vcpu, cp->a0, cp->a1, cp->a2, cp->a3, retdata);
> +		break;
>  	default:
>  		retdata->err_val = SBI_ERR_NOT_SUPPORTED;
>  	}
>

Re: [PATCH 7/8] RISC-V: KVM: Implement get event info function

Posted by Atish Kumar Patra 1 year, 2 months ago

On Mon, Dec 2, 2024 at 3:02 PM Samuel Holland <samuel.holland@sifive.com> wrote:
>
> Hi Atish,
>
> On 2024-11-19 2:29 PM, Atish Patra wrote:
> > The new get_event_info funciton allows the guest to query the presence
> > of multiple events with single SBI call. Currently, the perf driver
> > in linux guest invokes it for all the standard SBI PMU events. Support
> > the SBI function implementation in KVM as well.
> >
> > Signed-off-by: Atish Patra <atishp@rivosinc.com>
> > ---
> >  arch/riscv/include/asm/kvm_vcpu_pmu.h |  3 ++
> >  arch/riscv/kvm/vcpu_pmu.c             | 67 +++++++++++++++++++++++++++++++++++
> >  arch/riscv/kvm/vcpu_sbi_pmu.c         |  3 ++
> >  3 files changed, 73 insertions(+)
> >
> > diff --git a/arch/riscv/include/asm/kvm_vcpu_pmu.h b/arch/riscv/include/asm/kvm_vcpu_pmu.h
> > index 1d85b6617508..9a930afc8f57 100644
> > --- a/arch/riscv/include/asm/kvm_vcpu_pmu.h
> > +++ b/arch/riscv/include/asm/kvm_vcpu_pmu.h
> > @@ -98,6 +98,9 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu);
> >  int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low,
> >                                     unsigned long saddr_high, unsigned long flags,
> >                                     struct kvm_vcpu_sbi_return *retdata);
> > +int kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu *vcpu, unsigned long saddr_low,
> > +                               unsigned long saddr_high, unsigned long num_events,
> > +                               unsigned long flags, struct kvm_vcpu_sbi_return *retdata);
> >  void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu);
> >  void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu);
> >
> > diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c
> > index efd66835c2b8..a30f7ec31479 100644
> > --- a/arch/riscv/kvm/vcpu_pmu.c
> > +++ b/arch/riscv/kvm/vcpu_pmu.c
> > @@ -456,6 +456,73 @@ int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long s
> >       return 0;
> >  }
> >
> > +int kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu *vcpu, unsigned long saddr_low,
> > +                               unsigned long saddr_high, unsigned long num_events,
> > +                               unsigned long flags, struct kvm_vcpu_sbi_return *retdata)
> > +{
> > +     unsigned long hva;
> > +     struct riscv_pmu_event_info *einfo;
> > +     int shmem_size = num_events * sizeof(*einfo);
> > +     bool writable;
> > +     gpa_t shmem;
> > +     u32 eidx, etype;
> > +     u64 econfig;
> > +     int ret;
> > +
> > +     if (flags != 0 || (saddr_low & (SZ_16 - 1))) {
> > +             ret = SBI_ERR_INVALID_PARAM;
> > +             goto out;
> > +     }
> > +
> > +     shmem = saddr_low;
> > +     if (saddr_high != 0) {
> > +             if (IS_ENABLED(CONFIG_32BIT)) {
> > +                     shmem |= ((gpa_t)saddr_high << 32);
> > +             } else {
> > +                     ret = SBI_ERR_INVALID_ADDRESS;
> > +                     goto out;
> > +             }
> > +     }
> > +
> > +     hva = kvm_vcpu_gfn_to_hva_prot(vcpu, shmem >> PAGE_SHIFT, &writable);
> > +     if (kvm_is_error_hva(hva) || !writable) {
> > +             ret = SBI_ERR_INVALID_ADDRESS;
>
> This only checks the first page if the address crosses a page boundary. Maybe
> that is okay since kvm_vcpu_read_guest()/kvm_vcpu_write_guest() will fail if a
> later page is inaccessible?
>

That's my assumption as well.
We can invoke kvm_vcpu_gfn_to_hva_prot in a loop to validate all the
pages starting with shmem though.
The difference would be the error code returned
(SBI_ERR_INVALID_ADDRESS vs SBI_ERR_FAILURE)
which the specification allows anyways.

Let me know if you think KVM must validate the entire range. I can add in v2.

> > +             goto out;
> > +     }
> > +
> > +     einfo = kzalloc(shmem_size, GFP_KERNEL);
> > +     if (!einfo)
> > +             return -ENOMEM;
> > +
> > +     ret = kvm_vcpu_read_guest(vcpu, shmem, einfo, shmem_size);
> > +     if (ret) {
> > +             ret = SBI_ERR_FAILURE;
> > +             goto free_mem;
> > +     }
> > +
> > +     for (int i = 0; i < num_events; i++) {
> > +             eidx = einfo[i].event_idx;
> > +             etype = kvm_pmu_get_perf_event_type(eidx);
> > +             econfig = kvm_pmu_get_perf_event_config(eidx, einfo[i].event_data);
> > +             ret = riscv_pmu_get_event_info(etype, econfig, NULL);
> > +             if (ret > 0)
> > +                     einfo[i].output = 1;
>
> This also needs to write `output` in the else case to indicate that the event is
> not supported; the spec does not require the caller to initialize bit 0 of
> output to zero.
>

Sure. Added.

> Regards,
> Samuel
>
> > +     }
> > +
> > +     kvm_vcpu_write_guest(vcpu, shmem, einfo, shmem_size);
> > +     if (ret) {
> > +             ret = SBI_ERR_FAILURE;
> > +             goto free_mem;
> > +     }
> > +
> > +free_mem:
> > +     kfree(einfo);
> > +out:
> > +     retdata->err_val = ret;
> > +
> > +     return 0;
> > +}
> > +
> >  int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu,
> >                               struct kvm_vcpu_sbi_return *retdata)
> >  {
> > diff --git a/arch/riscv/kvm/vcpu_sbi_pmu.c b/arch/riscv/kvm/vcpu_sbi_pmu.c
> > index e4be34e03e83..a020d979d179 100644
> > --- a/arch/riscv/kvm/vcpu_sbi_pmu.c
> > +++ b/arch/riscv/kvm/vcpu_sbi_pmu.c
> > @@ -73,6 +73,9 @@ static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
> >       case SBI_EXT_PMU_SNAPSHOT_SET_SHMEM:
> >               ret = kvm_riscv_vcpu_pmu_snapshot_set_shmem(vcpu, cp->a0, cp->a1, cp->a2, retdata);
> >               break;
> > +     case SBI_EXT_PMU_EVENT_GET_INFO:
> > +             ret = kvm_riscv_vcpu_pmu_event_info(vcpu, cp->a0, cp->a1, cp->a2, cp->a3, retdata);
> > +             break;
> >       default:
> >               retdata->err_val = SBI_ERR_NOT_SUPPORTED;
> >       }
> >
>