[PATCH] xen/x86: Make XEN_DOMCTL_get_vcpu_msrs more configurable

Tamas K Lengyel posted 1 patch 1 year, 6 months ago
Test gitlab-ci failed
Patches applied successfully (tree, apply log)
git fetch https://gitlab.com/xen-project/patchew/xen tags/patchew/854cdedcdd2bfff08ea45a3c13367c610d710aaf.1666630317.git.tamas.lengyel@intel.com
tools/include/xenctrl.h              |  4 +++
tools/libs/ctrl/xc_domain.c          | 35 ++++++++++++++++++++++++++
tools/libs/guest/xg_sr_save_x86_pv.c |  2 ++
xen/arch/x86/cpu/vpmu.c              | 10 ++++++++
xen/arch/x86/cpu/vpmu_amd.c          |  7 ++++++
xen/arch/x86/cpu/vpmu_intel.c        | 37 ++++++++++++++++++++++++++++
xen/arch/x86/domctl.c                | 35 +++++++++++++++++---------
xen/arch/x86/include/asm/vpmu.h      |  2 ++
8 files changed, 120 insertions(+), 12 deletions(-)
[PATCH] xen/x86: Make XEN_DOMCTL_get_vcpu_msrs more configurable
Posted by Tamas K Lengyel 1 year, 6 months ago
Currently the XEN_DOMCTL_get_vcpu_msrs is only capable of gathering a handful
of predetermined vcpu MSRs. In our use-case gathering the vPMU MSRs by an
external privileged tool is necessary, thus we extend the domctl to allow for
querying for any guest MSRs. To remain compatible with the existing setup if
no specific MSR is requested via the domctl the default list is returned.

Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
---
 tools/include/xenctrl.h              |  4 +++
 tools/libs/ctrl/xc_domain.c          | 35 ++++++++++++++++++++++++++
 tools/libs/guest/xg_sr_save_x86_pv.c |  2 ++
 xen/arch/x86/cpu/vpmu.c              | 10 ++++++++
 xen/arch/x86/cpu/vpmu_amd.c          |  7 ++++++
 xen/arch/x86/cpu/vpmu_intel.c        | 37 ++++++++++++++++++++++++++++
 xen/arch/x86/domctl.c                | 35 +++++++++++++++++---------
 xen/arch/x86/include/asm/vpmu.h      |  2 ++
 8 files changed, 120 insertions(+), 12 deletions(-)

diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h
index 0c8b4c3aa7..04244213bf 100644
--- a/tools/include/xenctrl.h
+++ b/tools/include/xenctrl.h
@@ -872,6 +872,10 @@ int xc_vcpu_getinfo(xc_interface *xch,
                     uint32_t vcpu,
                     xc_vcpuinfo_t *info);
 
+typedef struct xen_domctl_vcpu_msr xc_vcpumsr_t;
+int xc_vcpu_get_msrs(xc_interface *xch, uint32_t domid, uint32_t vcpu,
+                     uint32_t count, xc_vcpumsr_t *msrs);
+
 long long xc_domain_get_cpu_usage(xc_interface *xch,
                                   uint32_t domid,
                                   int vcpu);
diff --git a/tools/libs/ctrl/xc_domain.c b/tools/libs/ctrl/xc_domain.c
index 14c0420c35..d3a7e1fea6 100644
--- a/tools/libs/ctrl/xc_domain.c
+++ b/tools/libs/ctrl/xc_domain.c
@@ -2201,6 +2201,41 @@ int xc_domain_soft_reset(xc_interface *xch,
     domctl.domain = domid;
     return do_domctl(xch, &domctl);
 }
+
+int xc_vcpu_get_msrs(xc_interface *xch, uint32_t domid, uint32_t vcpu,
+                     uint32_t count, xc_vcpumsr_t *msrs)
+{
+    int rc;
+    DECLARE_DOMCTL;
+    domctl.cmd = XEN_DOMCTL_get_vcpu_msrs;
+    domctl.domain = domid;
+    domctl.u.vcpu_msrs.vcpu = vcpu;
+    domctl.u.vcpu_msrs.msr_count = count;
+
+    if ( !msrs )
+    {
+        if ( (rc = xc_domctl(xch, &domctl)) < 0 )
+            return rc;
+
+        return domctl.u.vcpu_msrs.msr_count;
+    }
+    else
+    {
+        DECLARE_HYPERCALL_BOUNCE(msrs, count * sizeof(xc_vcpumsr_t), XC_HYPERCALL_BUFFER_BOUNCE_BOTH);
+
+        if ( xc_hypercall_bounce_pre(xch, msrs) )
+            return -1;
+
+        set_xen_guest_handle(domctl.u.vcpu_msrs.msrs, msrs);
+
+        rc = do_domctl(xch, &domctl);
+
+        xc_hypercall_bounce_post(xch, msrs);
+
+        return rc;
+    }
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/tools/libs/guest/xg_sr_save_x86_pv.c b/tools/libs/guest/xg_sr_save_x86_pv.c
index 4964f1f7b8..7ac313bf3f 100644
--- a/tools/libs/guest/xg_sr_save_x86_pv.c
+++ b/tools/libs/guest/xg_sr_save_x86_pv.c
@@ -719,6 +719,8 @@ static int write_one_vcpu_msrs(struct xc_sr_context *ctx, uint32_t id)
         goto err;
     }
 
+    memset(buffer, 0, buffersz);
+
     set_xen_guest_handle(domctl.u.vcpu_msrs.msrs, buffer);
     if ( xc_domctl(xch, &domctl) < 0 )
     {
diff --git a/xen/arch/x86/cpu/vpmu.c b/xen/arch/x86/cpu/vpmu.c
index 64cdbfc48c..438dfbe196 100644
--- a/xen/arch/x86/cpu/vpmu.c
+++ b/xen/arch/x86/cpu/vpmu.c
@@ -651,6 +651,16 @@ void vpmu_dump(struct vcpu *v)
         alternative_vcall(vpmu_ops.arch_vpmu_dump, v);
 }
 
+int vpmu_get_msr(struct vcpu *v, unsigned int msr, uint64_t *val)
+{
+    ASSERT(v != current);
+
+    if ( !vpmu_is_set(vcpu_vpmu(v), VPMU_CONTEXT_ALLOCATED) )
+        return -EOPNOTSUPP;
+
+    return alternative_call(vpmu_ops.get_msr, v, msr, val);
+}
+
 long do_xenpmu_op(
     unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
 {
diff --git a/xen/arch/x86/cpu/vpmu_amd.c b/xen/arch/x86/cpu/vpmu_amd.c
index 58794a16f0..75bd68e541 100644
--- a/xen/arch/x86/cpu/vpmu_amd.c
+++ b/xen/arch/x86/cpu/vpmu_amd.c
@@ -518,6 +518,12 @@ static int cf_check svm_vpmu_initialise(struct vcpu *v)
     return 0;
 }
 
+static int cf_check amd_get_msr(struct vcpu *v, unsigned int msr, uint64_t *val)
+{
+    /* TODO in case an external tool needs access to these MSRs */
+    return -ENOSYS;
+}
+
 #ifdef CONFIG_MEM_SHARING
 static int cf_check amd_allocate_context(struct vcpu *v)
 {
@@ -535,6 +541,7 @@ static const struct arch_vpmu_ops __initconst_cf_clobber amd_vpmu_ops = {
     .arch_vpmu_save = amd_vpmu_save,
     .arch_vpmu_load = amd_vpmu_load,
     .arch_vpmu_dump = amd_vpmu_dump,
+    .get_msr = amd_get_msr,
 
 #ifdef CONFIG_MEM_SHARING
     .allocate_context = amd_allocate_context,
diff --git a/xen/arch/x86/cpu/vpmu_intel.c b/xen/arch/x86/cpu/vpmu_intel.c
index b91d818be0..b4b6ecfb15 100644
--- a/xen/arch/x86/cpu/vpmu_intel.c
+++ b/xen/arch/x86/cpu/vpmu_intel.c
@@ -898,6 +898,42 @@ static int cf_check vmx_vpmu_initialise(struct vcpu *v)
     return 0;
 }
 
+static int cf_check core2_vpmu_get_msr(struct vcpu *v, unsigned int msr,
+                                       uint64_t *val)
+{
+    int type, index, ret = 0;
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vpmu->context;
+    uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
+    struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
+        vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
+
+    if ( !is_core2_vpmu_msr(msr, &type, &index) )
+        return -EINVAL;
+
+    vcpu_pause(v);
+
+    if ( msr == MSR_CORE_PERF_GLOBAL_OVF_CTRL )
+        *val = core2_vpmu_cxt->global_ovf_ctrl;
+    else if ( msr == MSR_CORE_PERF_GLOBAL_STATUS )
+        *val = core2_vpmu_cxt->global_status;
+    else if ( msr == MSR_CORE_PERF_GLOBAL_CTRL )
+        *val = core2_vpmu_cxt->global_ctrl;
+    else if ( msr >= MSR_CORE_PERF_FIXED_CTR0 &&
+              msr < MSR_CORE_PERF_FIXED_CTR0 + fixed_pmc_cnt )
+        *val = fixed_counters[msr - MSR_CORE_PERF_FIXED_CTR0];
+    else if ( msr >= MSR_P6_PERFCTR(0) && msr < MSR_P6_PERFCTR(arch_pmc_cnt) )
+        *val = xen_pmu_cntr_pair[msr - MSR_P6_PERFCTR(0)].counter;
+    else if ( msr >= MSR_P6_EVNTSEL(0) && msr < MSR_P6_EVNTSEL(arch_pmc_cnt) )
+        *val = xen_pmu_cntr_pair[msr - MSR_P6_EVNTSEL(0)].control;
+    else
+        ret = -EINVAL;
+
+    vcpu_unpause(v);
+
+    return ret;
+}
+
 static const struct arch_vpmu_ops __initconst_cf_clobber core2_vpmu_ops = {
     .initialise = vmx_vpmu_initialise,
     .do_wrmsr = core2_vpmu_do_wrmsr,
@@ -907,6 +943,7 @@ static const struct arch_vpmu_ops __initconst_cf_clobber core2_vpmu_ops = {
     .arch_vpmu_save = core2_vpmu_save,
     .arch_vpmu_load = core2_vpmu_load,
     .arch_vpmu_dump = core2_vpmu_dump,
+    .get_msr = core2_vpmu_get_msr,
 
 #ifdef CONFIG_MEM_SHARING
     .allocate_context = core2_vpmu_alloc_resource,
diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index e9bfbc57a7..c481aa8575 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -1104,8 +1104,7 @@ long arch_do_domctl(
             break;
 
         ret = -EINVAL;
-        if ( (v == curr) || /* no vcpu_pause() */
-             !is_pv_domain(d) )
+        if ( v == curr )
             break;
 
         /* Count maximum number of optional msrs. */
@@ -1127,36 +1126,48 @@ long arch_do_domctl(
 
                 vcpu_pause(v);
 
-                for ( j = 0; j < ARRAY_SIZE(msrs_to_send); ++j )
+                for ( j = 0; j < ARRAY_SIZE(msrs_to_send) && i < vmsrs->msr_count; ++j )
                 {
                     uint64_t val;
-                    int rc = guest_rdmsr(v, msrs_to_send[j], &val);
+                    int rc;
+
+                    if ( copy_from_guest_offset(&msr, vmsrs->msrs, i, 1) )
+                    {
+                        ret = -EFAULT;
+                        break;
+                    }
+
+                    msr.index = msr.index ?: msrs_to_send[j];
+
+                    rc = guest_rdmsr(v, msr.index, &val);
 
                     /*
                      * It is the programmers responsibility to ensure that
-                     * msrs_to_send[] contain generally-read/write MSRs.
+                     * the msr requested contain generally-read/write MSRs.
                      * X86EMUL_EXCEPTION here implies a missing feature, and
                      * that the guest doesn't have access to the MSR.
                      */
                     if ( rc == X86EMUL_EXCEPTION )
                         continue;
+                    if ( rc == X86EMUL_UNHANDLEABLE )
+                        ret = vpmu_get_msr(v, msr.index, &val);
+                    else
+                        ret = (rc == X86EMUL_OKAY) ? 0 : -ENXIO;
 
-                    if ( rc != X86EMUL_OKAY )
+                    if ( ret )
                     {
                         ASSERT_UNREACHABLE();
-                        ret = -ENXIO;
                         break;
                     }
 
                     if ( !val )
                         continue; /* Skip empty MSRs. */
 
-                    if ( i < vmsrs->msr_count && !ret )
+                    msr.value = val;
+                    if ( copy_to_guest_offset(vmsrs->msrs, i, &msr, 1) )
                     {
-                        msr.index = msrs_to_send[j];
-                        msr.value = val;
-                        if ( copy_to_guest_offset(vmsrs->msrs, i, &msr, 1) )
-                            ret = -EFAULT;
+                        ret = -EFAULT;
+                        break;
                     }
                     ++i;
                 }
diff --git a/xen/arch/x86/include/asm/vpmu.h b/xen/arch/x86/include/asm/vpmu.h
index 05e1fbfccf..2fcf570b25 100644
--- a/xen/arch/x86/include/asm/vpmu.h
+++ b/xen/arch/x86/include/asm/vpmu.h
@@ -47,6 +47,7 @@ struct arch_vpmu_ops {
     int (*arch_vpmu_save)(struct vcpu *v, bool_t to_guest);
     int (*arch_vpmu_load)(struct vcpu *v, bool_t from_guest);
     void (*arch_vpmu_dump)(const struct vcpu *);
+    int (*get_msr)(struct vcpu *v, unsigned int msr, uint64_t *val);
 
 #ifdef CONFIG_MEM_SHARING
     int (*allocate_context)(struct vcpu *v);
@@ -117,6 +118,7 @@ void vpmu_save(struct vcpu *v);
 void cf_check vpmu_save_force(void *arg);
 int vpmu_load(struct vcpu *v, bool_t from_guest);
 void vpmu_dump(struct vcpu *v);
+int vpmu_get_msr(struct vcpu *v, unsigned int msr, uint64_t *val);
 
 static inline int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content)
 {
-- 
2.34.1
Re: [PATCH] xen/x86: Make XEN_DOMCTL_get_vcpu_msrs more configurable
Posted by Andrew Cooper 1 year, 6 months ago
On 24/10/2022 17:58, Tamas K Lengyel wrote:
> Currently the XEN_DOMCTL_get_vcpu_msrs is only capable of gathering a handful
> of predetermined vcpu MSRs. In our use-case gathering the vPMU MSRs by an
> external privileged tool is necessary, thus we extend the domctl to allow for
> querying for any guest MSRs. To remain compatible with the existing setup if
> no specific MSR is requested via the domctl the default list is returned.
>
> Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>

Naming aside, XEN_DOMCTL_{get,set}_vcpu_msrs is supposed to be "get me
all MSRs needed to migrate a vCPU".  (I do intend to retire the
hypercall as part of fixing the Xen side of migration, but that's ages away)

It seems like what you want is something more like
XEN_DOMCTL_{rd,wr}msr_list  (convenient timing, given the recent ISE
update).  I think those would be better as a separate pair of
hypercalls, rather than trying to repurpose an existing hypercall.


As for actually getting the values, please fix up guest_{rd,wr}msr() to
access the perf MSRs safely.  I know the vpmu MSR handling is in a
tragic state, but this new get_msr subop is making the problem even more
tangled.

~Andrew
Re: [PATCH] xen/x86: Make XEN_DOMCTL_get_vcpu_msrs more configurable
Posted by Tamas K Lengyel 1 year, 6 months ago
On Wed, Oct 26, 2022, 7:06 AM Andrew Cooper <Andrew.Cooper3@citrix.com>
wrote:

> On 24/10/2022 17:58, Tamas K Lengyel wrote:
> > Currently the XEN_DOMCTL_get_vcpu_msrs is only capable of gathering a
> handful
> > of predetermined vcpu MSRs. In our use-case gathering the vPMU MSRs by an
> > external privileged tool is necessary, thus we extend the domctl to
> allow for
> > querying for any guest MSRs. To remain compatible with the existing
> setup if
> > no specific MSR is requested via the domctl the default list is returned.
> >
> > Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
>
> Naming aside, XEN_DOMCTL_{get,set}_vcpu_msrs is supposed to be "get me
> all MSRs needed to migrate a vCPU".  (I do intend to retire the
> hypercall as part of fixing the Xen side of migration, but that's ages
> away)
>
> It seems like what you want is something more like
> XEN_DOMCTL_{rd,wr}msr_list  (convenient timing, given the recent ISE
> update).  I think those would be better as a separate pair of
> hypercalls, rather than trying to repurpose an existing hypercall.
>
>
> As for actually getting the values, please fix up guest_{rd,wr}msr() to
> access the perf MSRs safely.  I know the vpmu MSR handling is in a
> tragic state, but this new get_msr subop is making the problem even more
> tangled.
>

Adding a separate hypercall is fine. Unfortunately wiring it into
guest_rdmsr failed on the first attempt when I tried. This is because the
guest itself will hit that path when it reads its own vpmu msrs. The
guest_rdmsr actually fails in that path and a separate fall-back path is
where the vpmu do_rdmsr is called. Now if I wire in the vpmu msrs into
guest_rdmsr I short circuit the existing setup and it looked like a can of
worms. I would have to figure out who is trying to get the vpmu msrs and do
things differently based on that, and the only info we have is if v ==
current. That just looked fragile to me.

Tamas

>
Re: [PATCH] xen/x86: Make XEN_DOMCTL_get_vcpu_msrs more configurable
Posted by Jan Beulich 1 year, 6 months ago
On 26.10.2022 13:34, Tamas K Lengyel wrote:
> On Wed, Oct 26, 2022, 7:06 AM Andrew Cooper <Andrew.Cooper3@citrix.com>
> wrote:
> 
>> On 24/10/2022 17:58, Tamas K Lengyel wrote:
>>> Currently the XEN_DOMCTL_get_vcpu_msrs is only capable of gathering a
>> handful
>>> of predetermined vcpu MSRs. In our use-case gathering the vPMU MSRs by an
>>> external privileged tool is necessary, thus we extend the domctl to
>> allow for
>>> querying for any guest MSRs. To remain compatible with the existing
>> setup if
>>> no specific MSR is requested via the domctl the default list is returned.
>>>
>>> Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
>>
>> Naming aside, XEN_DOMCTL_{get,set}_vcpu_msrs is supposed to be "get me
>> all MSRs needed to migrate a vCPU".  (I do intend to retire the
>> hypercall as part of fixing the Xen side of migration, but that's ages
>> away)
>>
>> It seems like what you want is something more like
>> XEN_DOMCTL_{rd,wr}msr_list  (convenient timing, given the recent ISE
>> update).  I think those would be better as a separate pair of
>> hypercalls, rather than trying to repurpose an existing hypercall.
>>
>>
>> As for actually getting the values, please fix up guest_{rd,wr}msr() to
>> access the perf MSRs safely.  I know the vpmu MSR handling is in a
>> tragic state, but this new get_msr subop is making the problem even more
>> tangled.
>>
> 
> Adding a separate hypercall is fine.

Which will then also avoid altering the behavior of the existing hypercall:
You can't just assume e.g. input fields to be zeroed (or otherwise
suitably set) by existing callers, when those were previously output only.

Jan
Re: [PATCH] xen/x86: Make XEN_DOMCTL_get_vcpu_msrs more configurable
Posted by Tamas K Lengyel 1 year, 6 months ago
On Wed, Oct 26, 2022, 7:48 AM Jan Beulich <jbeulich@suse.com> wrote:

> On 26.10.2022 13:34, Tamas K Lengyel wrote:
> > On Wed, Oct 26, 2022, 7:06 AM Andrew Cooper <Andrew.Cooper3@citrix.com>
> > wrote:
> >
> >> On 24/10/2022 17:58, Tamas K Lengyel wrote:
> >>> Currently the XEN_DOMCTL_get_vcpu_msrs is only capable of gathering a
> >> handful
> >>> of predetermined vcpu MSRs. In our use-case gathering the vPMU MSRs by
> an
> >>> external privileged tool is necessary, thus we extend the domctl to
> >> allow for
> >>> querying for any guest MSRs. To remain compatible with the existing
> >> setup if
> >>> no specific MSR is requested via the domctl the default list is
> returned.
> >>>
> >>> Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
> >>
> >> Naming aside, XEN_DOMCTL_{get,set}_vcpu_msrs is supposed to be "get me
> >> all MSRs needed to migrate a vCPU".  (I do intend to retire the
> >> hypercall as part of fixing the Xen side of migration, but that's ages
> >> away)
> >>
> >> It seems like what you want is something more like
> >> XEN_DOMCTL_{rd,wr}msr_list  (convenient timing, given the recent ISE
> >> update).  I think those would be better as a separate pair of
> >> hypercalls, rather than trying to repurpose an existing hypercall.
> >>
> >>
> >> As for actually getting the values, please fix up guest_{rd,wr}msr() to
> >> access the perf MSRs safely.  I know the vpmu MSR handling is in a
> >> tragic state, but this new get_msr subop is making the problem even more
> >> tangled.
> >>
> >
> > Adding a separate hypercall is fine.
>
> Which will then also avoid altering the behavior of the existing hypercall:
> You can't just assume e.g. input fields to be zeroed (or otherwise
> suitably set) by existing callers, when those were previously output only.
>

I did add a memset to zero it on the single caller I could find.

Tamas

>
Re: [PATCH] xen/x86: Make XEN_DOMCTL_get_vcpu_msrs more configurable
Posted by Jan Beulich 1 year, 6 months ago
On 26.10.2022 13:58, Tamas K Lengyel wrote:
> On Wed, Oct 26, 2022, 7:48 AM Jan Beulich <jbeulich@suse.com> wrote:
> 
>> On 26.10.2022 13:34, Tamas K Lengyel wrote:
>>> On Wed, Oct 26, 2022, 7:06 AM Andrew Cooper <Andrew.Cooper3@citrix.com>
>>> wrote:
>>>
>>>> On 24/10/2022 17:58, Tamas K Lengyel wrote:
>>>>> Currently the XEN_DOMCTL_get_vcpu_msrs is only capable of gathering a
>>>> handful
>>>>> of predetermined vcpu MSRs. In our use-case gathering the vPMU MSRs by
>> an
>>>>> external privileged tool is necessary, thus we extend the domctl to
>>>> allow for
>>>>> querying for any guest MSRs. To remain compatible with the existing
>>>> setup if
>>>>> no specific MSR is requested via the domctl the default list is
>> returned.
>>>>>
>>>>> Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
>>>>
>>>> Naming aside, XEN_DOMCTL_{get,set}_vcpu_msrs is supposed to be "get me
>>>> all MSRs needed to migrate a vCPU".  (I do intend to retire the
>>>> hypercall as part of fixing the Xen side of migration, but that's ages
>>>> away)
>>>>
>>>> It seems like what you want is something more like
>>>> XEN_DOMCTL_{rd,wr}msr_list  (convenient timing, given the recent ISE
>>>> update).  I think those would be better as a separate pair of
>>>> hypercalls, rather than trying to repurpose an existing hypercall.
>>>>
>>>>
>>>> As for actually getting the values, please fix up guest_{rd,wr}msr() to
>>>> access the perf MSRs safely.  I know the vpmu MSR handling is in a
>>>> tragic state, but this new get_msr subop is making the problem even more
>>>> tangled.
>>>>
>>>
>>> Adding a separate hypercall is fine.
>>
>> Which will then also avoid altering the behavior of the existing hypercall:
>> You can't just assume e.g. input fields to be zeroed (or otherwise
>> suitably set) by existing callers, when those were previously output only.
>>
> 
> I did add a memset to zero it on the single caller I could find.

Some may deem this sufficient on the assumption that all users should
go through the libxenguest function. But then at the minimum you want
to update documentation in the public header. Yet then this wasn't
the only issue I spotted (hence the use of "e.g.") - you also alter
documented behavior as to the returned number of MSRs when the input
value was too small, if I'm not mistaken.

Jan
Re: [PATCH] xen/x86: Make XEN_DOMCTL_get_vcpu_msrs more configurable
Posted by Tamas K Lengyel 1 year, 6 months ago
On Wed, Oct 26, 2022 at 8:22 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 26.10.2022 13:58, Tamas K Lengyel wrote:
> > On Wed, Oct 26, 2022, 7:48 AM Jan Beulich <jbeulich@suse.com> wrote:
> >
> >> On 26.10.2022 13:34, Tamas K Lengyel wrote:
> >>> On Wed, Oct 26, 2022, 7:06 AM Andrew Cooper <Andrew.Cooper3@citrix.com
>
> >>> wrote:
> >>>
> >>>> On 24/10/2022 17:58, Tamas K Lengyel wrote:
> >>>>> Currently the XEN_DOMCTL_get_vcpu_msrs is only capable of gathering
a
> >>>> handful
> >>>>> of predetermined vcpu MSRs. In our use-case gathering the vPMU MSRs
by
> >> an
> >>>>> external privileged tool is necessary, thus we extend the domctl to
> >>>> allow for
> >>>>> querying for any guest MSRs. To remain compatible with the existing
> >>>> setup if
> >>>>> no specific MSR is requested via the domctl the default list is
> >> returned.
> >>>>>
> >>>>> Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
> >>>>
> >>>> Naming aside, XEN_DOMCTL_{get,set}_vcpu_msrs is supposed to be "get
me
> >>>> all MSRs needed to migrate a vCPU".  (I do intend to retire the
> >>>> hypercall as part of fixing the Xen side of migration, but that's
ages
> >>>> away)
> >>>>
> >>>> It seems like what you want is something more like
> >>>> XEN_DOMCTL_{rd,wr}msr_list  (convenient timing, given the recent ISE
> >>>> update).  I think those would be better as a separate pair of
> >>>> hypercalls, rather than trying to repurpose an existing hypercall.
> >>>>
> >>>>
> >>>> As for actually getting the values, please fix up guest_{rd,wr}msr()
to
> >>>> access the perf MSRs safely.  I know the vpmu MSR handling is in a
> >>>> tragic state, but this new get_msr subop is making the problem even
more
> >>>> tangled.
> >>>>
> >>>
> >>> Adding a separate hypercall is fine.
> >>
> >> Which will then also avoid altering the behavior of the existing
hypercall:
> >> You can't just assume e.g. input fields to be zeroed (or otherwise
> >> suitably set) by existing callers, when those were previously output
only.
> >>
> >
> > I did add a memset to zero it on the single caller I could find.
>
> Some may deem this sufficient on the assumption that all users should
> go through the libxenguest function. But then at the minimum you want
> to update documentation in the public header. Yet then this wasn't
> the only issue I spotted (hence the use of "e.g.") - you also alter
> documented behavior as to the returned number of MSRs when the input
> value was too small, if I'm not mistaken.

No, there shouldn't have been any alteration of the previous behavior other
than assuming the buffer sent by the toolstack is zero initialized when the
default list is requested. Either way, adding the separate hypercall is
fine by me.

Tamas
Re: [PATCH] xen/x86: Make XEN_DOMCTL_get_vcpu_msrs more configurable
Posted by Roger Pau Monné 1 year, 6 months ago
On Mon, Oct 24, 2022 at 12:58:54PM -0400, Tamas K Lengyel wrote:
> Currently the XEN_DOMCTL_get_vcpu_msrs is only capable of gathering a handful
> of predetermined vcpu MSRs. In our use-case gathering the vPMU MSRs by an
> external privileged tool is necessary, thus we extend the domctl to allow for
> querying for any guest MSRs. To remain compatible with the existing setup if
> no specific MSR is requested via the domctl the default list is returned.

I'm afraid I would benefit from some extra description about why you
need to introduce a separate hook instead of using the existing
do_rdmsr hook in arch_vpmu_ops (which is already hooked into
guest_rdmsr()).

Are the MSRs you are trying to fetch not accessible for the guest
itself to read?

It seems fragile to me to add such kind of hook to read MSRs that's
only used by XEN_DOMCTL_get_vcpu_msrs and not guest_rdmsr(), so it
would need some clear justification about why it's needed.

Thanks, Roger.
Re: [PATCH] xen/x86: Make XEN_DOMCTL_get_vcpu_msrs more configurable
Posted by Tamas K Lengyel 1 year, 6 months ago
On Tue, Oct 25, 2022 at 4:13 AM Roger Pau Monné <roger.pau@citrix.com>
wrote:
>
> On Mon, Oct 24, 2022 at 12:58:54PM -0400, Tamas K Lengyel wrote:
> > Currently the XEN_DOMCTL_get_vcpu_msrs is only capable of gathering a
handful
> > of predetermined vcpu MSRs. In our use-case gathering the vPMU MSRs by
an
> > external privileged tool is necessary, thus we extend the domctl to
allow for
> > querying for any guest MSRs. To remain compatible with the existing
setup if
> > no specific MSR is requested via the domctl the default list is
returned.
>
> I'm afraid I would benefit from some extra description about why you
> need to introduce a separate hook instead of using the existing
> do_rdmsr hook in arch_vpmu_ops (which is already hooked into
> guest_rdmsr()).
>
> Are the MSRs you are trying to fetch not accessible for the guest
> itself to read?

No, the reason we need this different hook is because do_rdmsr assumes the
guest is reading the MSRs that are currently loaded. For external tools
where v != current the vpmu context needs to be saved by pausing the vcpu
first and then the MSR content returned from the saved context.

 Tamas
Re: [PATCH] xen/x86: Make XEN_DOMCTL_get_vcpu_msrs more configurable
Posted by Roger Pau Monné 1 year, 6 months ago
On Tue, Oct 25, 2022 at 01:48:36PM -0400, Tamas K Lengyel wrote:
> On Tue, Oct 25, 2022 at 4:13 AM Roger Pau Monné <roger.pau@citrix.com>
> wrote:
> >
> > On Mon, Oct 24, 2022 at 12:58:54PM -0400, Tamas K Lengyel wrote:
> > > Currently the XEN_DOMCTL_get_vcpu_msrs is only capable of gathering a
> handful
> > > of predetermined vcpu MSRs. In our use-case gathering the vPMU MSRs by
> an
> > > external privileged tool is necessary, thus we extend the domctl to
> allow for
> > > querying for any guest MSRs. To remain compatible with the existing
> setup if
> > > no specific MSR is requested via the domctl the default list is
> returned.
> >
> > I'm afraid I would benefit from some extra description about why you
> > need to introduce a separate hook instead of using the existing
> > do_rdmsr hook in arch_vpmu_ops (which is already hooked into
> > guest_rdmsr()).
> >
> > Are the MSRs you are trying to fetch not accessible for the guest
> > itself to read?
> 
> No, the reason we need this different hook is because do_rdmsr assumes the
> guest is reading the MSRs that are currently loaded. For external tools
> where v != current the vpmu context needs to be saved by pausing the vcpu
> first and then the MSR content returned from the saved context.

Hm, I see.

We need to dump the CPU MSR contents into the structure so they can be
read from a different pCPU differently than the currently running one.

It would be nice if this could all be somehow wired into
guest_rdmsr(), but the function executing a vcpu_pause() as part of
it's operations would be quite weird, also it having a vcpu parameter
is kind of misleading, as under some circumstances it will perform a
rdmsr and that's likely only correct when v == current.

I guess I will ask for others opinion, but having that specific vPMU
function call in XEN_DOMCTL_get_vcpu_msrs on the side of guest_rdmsr()
seems like a layering violation, as it should all be contained in
guest_rdmsr().

Thanks, Roger.