From: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com>
Some versions of the hypervisor do not support HV_STATUS_AREA_PARENT and
return HV_STATUS_INVALID_PARAMETER for the second stats page mapping
request.
This results a failure in module init. Instead of failing, gracefully
fall back to populating stats_pages[HV_STATS_AREA_PARENT] with the
already-mapped stats_pages[HV_STATS_AREA_SELF].
Signed-off-by: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com>
Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
---
drivers/hv/mshv_root_hv_call.c | 43 ++++++++++++++++++++++++++++++----
drivers/hv/mshv_root_main.c | 3 +++
2 files changed, 42 insertions(+), 4 deletions(-)
diff --git a/drivers/hv/mshv_root_hv_call.c b/drivers/hv/mshv_root_hv_call.c
index c9c274f29c3c..1c38576a673c 100644
--- a/drivers/hv/mshv_root_hv_call.c
+++ b/drivers/hv/mshv_root_hv_call.c
@@ -724,6 +724,24 @@ hv_call_notify_port_ring_empty(u32 sint_index)
return hv_result_to_errno(status);
}
+static int
+hv_stats_get_area_type(enum hv_stats_object_type type,
+ const union hv_stats_object_identity *identity)
+{
+ switch (type) {
+ case HV_STATS_OBJECT_HYPERVISOR:
+ return identity->hv.stats_area_type;
+ case HV_STATS_OBJECT_LOGICAL_PROCESSOR:
+ return identity->lp.stats_area_type;
+ case HV_STATS_OBJECT_PARTITION:
+ return identity->partition.stats_area_type;
+ case HV_STATS_OBJECT_VP:
+ return identity->vp.stats_area_type;
+ }
+
+ return -EINVAL;
+}
+
int hv_call_map_stat_page(enum hv_stats_object_type type,
const union hv_stats_object_identity *identity,
void **addr)
@@ -732,7 +750,7 @@ int hv_call_map_stat_page(enum hv_stats_object_type type,
struct hv_input_map_stats_page *input;
struct hv_output_map_stats_page *output;
u64 status, pfn;
- int ret = 0;
+ int hv_status, ret = 0;
do {
local_irq_save(flags);
@@ -747,11 +765,28 @@ int hv_call_map_stat_page(enum hv_stats_object_type type,
pfn = output->map_location;
local_irq_restore(flags);
- if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
- ret = hv_result_to_errno(status);
+
+ hv_status = hv_result(status);
+ if (hv_status != HV_STATUS_INSUFFICIENT_MEMORY) {
if (hv_result_success(status))
break;
- return ret;
+
+ /*
+ * Some versions of the hypervisor do not support the
+ * PARENT stats area. In this case return "success" but
+ * set the page to NULL. The caller checks for this
+ * case instead just uses the SELF area.
+ */
+ if (hv_stats_get_area_type(type, identity) == HV_STATS_AREA_PARENT &&
+ hv_status == HV_STATUS_INVALID_PARAMETER) {
+ pr_debug_once("%s: PARENT area type is unsupported\n",
+ __func__);
+ *addr = NULL;
+ return 0;
+ }
+
+ hv_status_debug(status, "\n");
+ return hv_result_to_errno(status);
}
ret = hv_call_deposit_pages(NUMA_NO_NODE,
diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
index bbdefe8a2e9c..56ababab57ce 100644
--- a/drivers/hv/mshv_root_main.c
+++ b/drivers/hv/mshv_root_main.c
@@ -929,6 +929,9 @@ static int mshv_vp_stats_map(u64 partition_id, u32 vp_index,
if (err)
goto unmap_self;
+ if (!stats_pages[HV_STATS_AREA_PARENT])
+ stats_pages[HV_STATS_AREA_PARENT] = stats_pages[HV_STATS_AREA_SELF];
+
return 0;
unmap_self:
--
2.34.1
On Fri, Aug 29, 2025 at 8:44 AM Nuno Das Neves <nunodasneves@linux.microsoft.com> wrote: > > From: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com> > > Some versions of the hypervisor do not support HV_STATUS_AREA_PARENT and > return HV_STATUS_INVALID_PARAMETER for the second stats page mapping > request. > > This results a failure in module init. Instead of failing, gracefully > fall back to populating stats_pages[HV_STATS_AREA_PARENT] with the > already-mapped stats_pages[HV_STATS_AREA_SELF]. > > Signed-off-by: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com> > Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com> > --- > drivers/hv/mshv_root_hv_call.c | 43 ++++++++++++++++++++++++++++++---- > drivers/hv/mshv_root_main.c | 3 +++ > 2 files changed, 42 insertions(+), 4 deletions(-) > Reviewed-by: Tianyu Lan <tiala@microsoft.com> > diff --git a/drivers/hv/mshv_root_hv_call.c b/drivers/hv/mshv_root_hv_call.c > index c9c274f29c3c..1c38576a673c 100644 > --- a/drivers/hv/mshv_root_hv_call.c > +++ b/drivers/hv/mshv_root_hv_call.c > @@ -724,6 +724,24 @@ hv_call_notify_port_ring_empty(u32 sint_index) > return hv_result_to_errno(status); > } > > +static int > +hv_stats_get_area_type(enum hv_stats_object_type type, > + const union hv_stats_object_identity *identity) > +{ > + switch (type) { > + case HV_STATS_OBJECT_HYPERVISOR: > + return identity->hv.stats_area_type; > + case HV_STATS_OBJECT_LOGICAL_PROCESSOR: > + return identity->lp.stats_area_type; > + case HV_STATS_OBJECT_PARTITION: > + return identity->partition.stats_area_type; > + case HV_STATS_OBJECT_VP: > + return identity->vp.stats_area_type; > + } > + > + return -EINVAL; > +} > + > int hv_call_map_stat_page(enum hv_stats_object_type type, > const union hv_stats_object_identity *identity, > void **addr) > @@ -732,7 +750,7 @@ int hv_call_map_stat_page(enum hv_stats_object_type type, > struct hv_input_map_stats_page *input; > struct hv_output_map_stats_page *output; > u64 status, pfn; > - int ret = 0; > + int hv_status, ret = 0; > > do { > local_irq_save(flags); > @@ -747,11 +765,28 @@ int hv_call_map_stat_page(enum hv_stats_object_type type, > pfn = output->map_location; > > local_irq_restore(flags); > - if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { > - ret = hv_result_to_errno(status); > + > + hv_status = hv_result(status); > + if (hv_status != HV_STATUS_INSUFFICIENT_MEMORY) { > if (hv_result_success(status)) > break; > - return ret; > + > + /* > + * Some versions of the hypervisor do not support the > + * PARENT stats area. In this case return "success" but > + * set the page to NULL. The caller checks for this > + * case instead just uses the SELF area. > + */ > + if (hv_stats_get_area_type(type, identity) == HV_STATS_AREA_PARENT && > + hv_status == HV_STATUS_INVALID_PARAMETER) { > + pr_debug_once("%s: PARENT area type is unsupported\n", > + __func__); > + *addr = NULL; > + return 0; > + } > + > + hv_status_debug(status, "\n"); > + return hv_result_to_errno(status); > } > > ret = hv_call_deposit_pages(NUMA_NO_NODE, > diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c > index bbdefe8a2e9c..56ababab57ce 100644 > --- a/drivers/hv/mshv_root_main.c > +++ b/drivers/hv/mshv_root_main.c > @@ -929,6 +929,9 @@ static int mshv_vp_stats_map(u64 partition_id, u32 vp_index, > if (err) > goto unmap_self; > > + if (!stats_pages[HV_STATS_AREA_PARENT]) > + stats_pages[HV_STATS_AREA_PARENT] = stats_pages[HV_STATS_AREA_SELF]; > + > return 0; > > unmap_self: > -- > 2.34.1 > > -- Thanks Tianyu Lan
On 8/28/2025 5:43 PM, Nuno Das Neves wrote: > From: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com> > > Some versions of the hypervisor do not support HV_STATUS_AREA_PARENT and > return HV_STATUS_INVALID_PARAMETER for the second stats page mapping > request. > > This results a failure in module init. Instead of failing, gracefully > fall back to populating stats_pages[HV_STATS_AREA_PARENT] with the > already-mapped stats_pages[HV_STATS_AREA_SELF]. What's the impact of this graceful fallback? It occurs to me that if a stats accumulator, in userspace perhaps, expected to get stats from the 2 pages, it'd get incorrect values. > > Signed-off-by: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com> > Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com> > --- > drivers/hv/mshv_root_hv_call.c | 43 ++++++++++++++++++++++++++++++---- > drivers/hv/mshv_root_main.c | 3 +++ > 2 files changed, 42 insertions(+), 4 deletions(-) > > diff --git a/drivers/hv/mshv_root_hv_call.c b/drivers/hv/mshv_root_hv_call.c > index c9c274f29c3c..1c38576a673c 100644 > --- a/drivers/hv/mshv_root_hv_call.c > +++ b/drivers/hv/mshv_root_hv_call.c > @@ -724,6 +724,24 @@ hv_call_notify_port_ring_empty(u32 sint_index) > return hv_result_to_errno(status); > } > > +static int > +hv_stats_get_area_type(enum hv_stats_object_type type, One line please, i.e. static int hv_stats_get_area_type(...) <snip> Thanks, Easwar (he/him)
On 9/5/2025 12:21 PM, Easwar Hariharan wrote: > On 8/28/2025 5:43 PM, Nuno Das Neves wrote: >> From: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com> >> >> Some versions of the hypervisor do not support HV_STATUS_AREA_PARENT and >> return HV_STATUS_INVALID_PARAMETER for the second stats page mapping >> request. >> >> This results a failure in module init. Instead of failing, gracefully >> fall back to populating stats_pages[HV_STATS_AREA_PARENT] with the >> already-mapped stats_pages[HV_STATS_AREA_SELF]. > > What's the impact of this graceful fallback? It occurs to me that if a stats > accumulator, in userspace perhaps, expected to get stats from the 2 pages, > it'd get incorrect values. > This is going out of scope of this series a bit but I'll explain briefly. When we do add the code to expose these stats to userspace, the SELF and PARENT pages won't be exposed separately, there is no duplication. For each stat counter in the page, we'll expose either the SELF or PARENT value, depending on whether there is anything in that slot (whether it's zero or not). Some stats are available via the SELF page, and some via the PARENT page, but the counters in the page have the same layout. So some counters in the SELF page will all stay zero while on the PARENT page they are updated, and vice versa. I believe the hypervisor takes this strange approach for the purpose of backward compatibility. Introducing L1VH created the need for this SELF/PARENT distinction. Hope that makes some kind of sense...it will be clearer when we post the mshv debugfs code itself. To put it another way, falling back to the SELF page won't cause any impact to userspace because the distinction between the pages is all handled in the driver, and we only read each stat value from either SELF or PARENT. Nuno >> >> Signed-off-by: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com> >> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com> >> --- >> drivers/hv/mshv_root_hv_call.c | 43 ++++++++++++++++++++++++++++++---- >> drivers/hv/mshv_root_main.c | 3 +++ >> 2 files changed, 42 insertions(+), 4 deletions(-) >> >> diff --git a/drivers/hv/mshv_root_hv_call.c b/drivers/hv/mshv_root_hv_call.c >> index c9c274f29c3c..1c38576a673c 100644 >> --- a/drivers/hv/mshv_root_hv_call.c >> +++ b/drivers/hv/mshv_root_hv_call.c >> @@ -724,6 +724,24 @@ hv_call_notify_port_ring_empty(u32 sint_index) >> return hv_result_to_errno(status); >> } >> >> +static int >> +hv_stats_get_area_type(enum hv_stats_object_type type, > > One line please, i.e. > > static int hv_stats_get_area_type(...) > > <snip> > > Thanks, > Easwar (he/him)
On 9/8/2025 10:04 AM, Nuno Das Neves wrote: > On 9/5/2025 12:21 PM, Easwar Hariharan wrote: >> On 8/28/2025 5:43 PM, Nuno Das Neves wrote: >>> From: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com> >>> >>> Some versions of the hypervisor do not support HV_STATUS_AREA_PARENT and >>> return HV_STATUS_INVALID_PARAMETER for the second stats page mapping >>> request. >>> >>> This results a failure in module init. Instead of failing, gracefully >>> fall back to populating stats_pages[HV_STATS_AREA_PARENT] with the >>> already-mapped stats_pages[HV_STATS_AREA_SELF]. >> >> What's the impact of this graceful fallback? It occurs to me that if a stats >> accumulator, in userspace perhaps, expected to get stats from the 2 pages, >> it'd get incorrect values. >> > This is going out of scope of this series a bit but I'll explain briefly. > > When we do add the code to expose these stats to userspace, the SELF and > PARENT pages won't be exposed separately, there is no duplication. > > For each stat counter in the page, we'll expose either the SELF or PARENT > value, depending on whether there is anything in that slot (whether it's zero > or not). > > Some stats are available via the SELF page, and some via the PARENT page, but > the counters in the page have the same layout. So some counters in the SELF > page will all stay zero while on the PARENT page they are updated, and vice > versa. > > I believe the hypervisor takes this strange approach for the purpose of > backward compatibility. Introducing L1VH created the need for this SELF/PARENT > distinction. > > Hope that makes some kind of sense...it will be clearer when we post the mshv > debugfs code itself. > > To put it another way, falling back to the SELF page won't cause any impact > to userspace because the distinction between the pages is all handled in the > driver, and we only read each stat value from either SELF or PARENT. > > Nuno Thank you for that explanation, it sorta makes sense. I think it'd be better if this patch is part of the series that exposes the stats to userspace, so that it can be reviewed in context with the rest of the code in the driver that manages the pick-and-choose of a stat value from the SELF/PARENT page. Unless there's an active problem now in the upstream kernel that this patch solves? i.e. are the versions of the hypervisor that don't support the PARENT stats page available in the wild? Thanks, Easwar (he/him)
On 9/8/2025 10:22 AM, Easwar Hariharan wrote: > On 9/8/2025 10:04 AM, Nuno Das Neves wrote: >> On 9/5/2025 12:21 PM, Easwar Hariharan wrote: >>> On 8/28/2025 5:43 PM, Nuno Das Neves wrote: >>>> From: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com> >>>> >>>> Some versions of the hypervisor do not support HV_STATUS_AREA_PARENT and >>>> return HV_STATUS_INVALID_PARAMETER for the second stats page mapping >>>> request. >>>> >>>> This results a failure in module init. Instead of failing, gracefully >>>> fall back to populating stats_pages[HV_STATS_AREA_PARENT] with the >>>> already-mapped stats_pages[HV_STATS_AREA_SELF]. >>> >>> What's the impact of this graceful fallback? It occurs to me that if a stats >>> accumulator, in userspace perhaps, expected to get stats from the 2 pages, >>> it'd get incorrect values. >>> >> This is going out of scope of this series a bit but I'll explain briefly. >> >> When we do add the code to expose these stats to userspace, the SELF and >> PARENT pages won't be exposed separately, there is no duplication. >> >> For each stat counter in the page, we'll expose either the SELF or PARENT >> value, depending on whether there is anything in that slot (whether it's zero >> or not). >> >> Some stats are available via the SELF page, and some via the PARENT page, but >> the counters in the page have the same layout. So some counters in the SELF >> page will all stay zero while on the PARENT page they are updated, and vice >> versa. >> >> I believe the hypervisor takes this strange approach for the purpose of >> backward compatibility. Introducing L1VH created the need for this SELF/PARENT >> distinction. >> >> Hope that makes some kind of sense...it will be clearer when we post the mshv >> debugfs code itself. >> >> To put it another way, falling back to the SELF page won't cause any impact >> to userspace because the distinction between the pages is all handled in the >> driver, and we only read each stat value from either SELF or PARENT. >> >> Nuno > > Thank you for that explanation, it sorta makes sense. > > I think it'd be better if this patch is part of the series that exposes the stats > to userspace, so that it can be reviewed in context with the rest of the code in > the driver that manages the pick-and-choose of a stat value from the SELF/PARENT > page. > Good idea, I think I'll do that. Thanks! > Unless there's an active problem now in the upstream kernel that this patch solves? > i.e. are the versions of the hypervisor that don't support the PARENT stats > page available in the wild? > I thought there was, but on reflection, no it doesn't solve a problem that exists in the code today. Nuno > Thanks, > Easwar (he/him)
On 9/8/2025 1:06 PM, Nuno Das Neves wrote: > On 9/8/2025 10:22 AM, Easwar Hariharan wrote: >> On 9/8/2025 10:04 AM, Nuno Das Neves wrote: >>> On 9/5/2025 12:21 PM, Easwar Hariharan wrote: >>>> On 8/28/2025 5:43 PM, Nuno Das Neves wrote: >>>>> From: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com> >>>>> >>>>> Some versions of the hypervisor do not support HV_STATUS_AREA_PARENT and >>>>> return HV_STATUS_INVALID_PARAMETER for the second stats page mapping >>>>> request. >>>>> >>>>> This results a failure in module init. Instead of failing, gracefully >>>>> fall back to populating stats_pages[HV_STATS_AREA_PARENT] with the >>>>> already-mapped stats_pages[HV_STATS_AREA_SELF]. >>>> >>>> What's the impact of this graceful fallback? It occurs to me that if a stats >>>> accumulator, in userspace perhaps, expected to get stats from the 2 pages, >>>> it'd get incorrect values. >>>> >>> This is going out of scope of this series a bit but I'll explain briefly. >>> >>> When we do add the code to expose these stats to userspace, the SELF and >>> PARENT pages won't be exposed separately, there is no duplication. >>> >>> For each stat counter in the page, we'll expose either the SELF or PARENT >>> value, depending on whether there is anything in that slot (whether it's zero >>> or not). >>> >>> Some stats are available via the SELF page, and some via the PARENT page, but >>> the counters in the page have the same layout. So some counters in the SELF >>> page will all stay zero while on the PARENT page they are updated, and vice >>> versa. >>> >>> I believe the hypervisor takes this strange approach for the purpose of >>> backward compatibility. Introducing L1VH created the need for this SELF/PARENT >>> distinction. >>> >>> Hope that makes some kind of sense...it will be clearer when we post the mshv >>> debugfs code itself. >>> >>> To put it another way, falling back to the SELF page won't cause any impact >>> to userspace because the distinction between the pages is all handled in the >>> driver, and we only read each stat value from either SELF or PARENT. >>> >>> Nuno >> >> Thank you for that explanation, it sorta makes sense. >> >> I think it'd be better if this patch is part of the series that exposes the stats >> to userspace, so that it can be reviewed in context with the rest of the code in >> the driver that manages the pick-and-choose of a stat value from the SELF/PARENT >> page. >> > Good idea, I think I'll do that. Thanks! > >> Unless there's an active problem now in the upstream kernel that this patch solves? >> i.e. are the versions of the hypervisor that don't support the PARENT stats >> page available in the wild? >> > I thought there was, but on reflection, no it doesn't solve a problem that exists in > the code today. > > Nuno > The usecases for stats exposed by the hypervisor are: 1) used within the kernel by root scheduler 2) exposed to userspace via debugfs. I thought we are addressing the first use-case here (patch1 in this series). If root scheduler support was upstreamed then this patchset does solve a problem in upstream code. >> Thanks, >> Easwar (he/him) > > -- Regards, Praveen K Paladugu
On 9/9/2025 7:52 AM, Praveen K Paladugu wrote: > > > On 9/8/2025 1:06 PM, Nuno Das Neves wrote: >> On 9/8/2025 10:22 AM, Easwar Hariharan wrote: >>> On 9/8/2025 10:04 AM, Nuno Das Neves wrote: >>>> On 9/5/2025 12:21 PM, Easwar Hariharan wrote: >>>>> On 8/28/2025 5:43 PM, Nuno Das Neves wrote: >>>>>> From: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com> >>>>>> >>>>>> Some versions of the hypervisor do not support HV_STATUS_AREA_PARENT and >>>>>> return HV_STATUS_INVALID_PARAMETER for the second stats page mapping >>>>>> request. >>>>>> >>>>>> This results a failure in module init. Instead of failing, gracefully >>>>>> fall back to populating stats_pages[HV_STATS_AREA_PARENT] with the >>>>>> already-mapped stats_pages[HV_STATS_AREA_SELF]. >>>>> >>>>> What's the impact of this graceful fallback? It occurs to me that if a stats >>>>> accumulator, in userspace perhaps, expected to get stats from the 2 pages, >>>>> it'd get incorrect values. >>>>> >>>> This is going out of scope of this series a bit but I'll explain briefly. >>>> >>>> When we do add the code to expose these stats to userspace, the SELF and >>>> PARENT pages won't be exposed separately, there is no duplication. >>>> >>>> For each stat counter in the page, we'll expose either the SELF or PARENT >>>> value, depending on whether there is anything in that slot (whether it's zero >>>> or not). >>>> >>>> Some stats are available via the SELF page, and some via the PARENT page, but >>>> the counters in the page have the same layout. So some counters in the SELF >>>> page will all stay zero while on the PARENT page they are updated, and vice >>>> versa. >>>> >>>> I believe the hypervisor takes this strange approach for the purpose of >>>> backward compatibility. Introducing L1VH created the need for this SELF/PARENT >>>> distinction. >>>> >>>> Hope that makes some kind of sense...it will be clearer when we post the mshv >>>> debugfs code itself. >>>> >>>> To put it another way, falling back to the SELF page won't cause any impact >>>> to userspace because the distinction between the pages is all handled in the >>>> driver, and we only read each stat value from either SELF or PARENT. >>>> >>>> Nuno >>> >>> Thank you for that explanation, it sorta makes sense. >>> >>> I think it'd be better if this patch is part of the series that exposes the stats >>> to userspace, so that it can be reviewed in context with the rest of the code in >>> the driver that manages the pick-and-choose of a stat value from the SELF/PARENT >>> page. >>> >> Good idea, I think I'll do that. Thanks! >> >>> Unless there's an active problem now in the upstream kernel that this patch solves? >>> i.e. are the versions of the hypervisor that don't support the PARENT stats >>> page available in the wild? >>> >> I thought there was, but on reflection, no it doesn't solve a problem that exists in >> the code today. >> >> Nuno >> > > The usecases for stats exposed by the hypervisor are: > 1) used within the kernel by root scheduler > 2) exposed to userspace via debugfs. > > I thought we are addressing the first use-case here (patch1 in this series). If root scheduler support was upstreamed then this patchset does solve a problem in upstream code. > Sorry about the long context, I couldn't figure out a good spot to snip. Thanks for calling that out. I think root scheduler support has been upstreamed if I'm reading root_scheduler_init() right, Nuno can confirm. With the series applied, I don't see any of the code that picks and chooses the stats from the PARENT/SELF pages even for the kernel case, rather there's a straight memcpy of the stats pages in the create_vp ioctl. Is the kernel usage with the root scheduler somehow immune from the problem userspace would face with the duplicated pages? If not, I'd say that it's an argument for dropping patch 1 and 2, instead of including patch 2, or to fold them together. We don't want a state in the upstream kernel where a commit introduces a known problem just to be solved by the following commit. Thanks, Easwar (he/him)
On 9/9/2025 9:27 AM, Easwar Hariharan wrote: > On 9/9/2025 7:52 AM, Praveen K Paladugu wrote: >> >> >> On 9/8/2025 1:06 PM, Nuno Das Neves wrote: >>> On 9/8/2025 10:22 AM, Easwar Hariharan wrote: >>>> On 9/8/2025 10:04 AM, Nuno Das Neves wrote: >>>>> On 9/5/2025 12:21 PM, Easwar Hariharan wrote: >>>>>> On 8/28/2025 5:43 PM, Nuno Das Neves wrote: >>>>>>> From: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com> >>>>>>> >>>>>>> Some versions of the hypervisor do not support HV_STATUS_AREA_PARENT and >>>>>>> return HV_STATUS_INVALID_PARAMETER for the second stats page mapping >>>>>>> request. >>>>>>> >>>>>>> This results a failure in module init. Instead of failing, gracefully >>>>>>> fall back to populating stats_pages[HV_STATS_AREA_PARENT] with the >>>>>>> already-mapped stats_pages[HV_STATS_AREA_SELF]. >>>>>> >>>>>> What's the impact of this graceful fallback? It occurs to me that if a stats >>>>>> accumulator, in userspace perhaps, expected to get stats from the 2 pages, >>>>>> it'd get incorrect values. >>>>>> >>>>> This is going out of scope of this series a bit but I'll explain briefly. >>>>> >>>>> When we do add the code to expose these stats to userspace, the SELF and >>>>> PARENT pages won't be exposed separately, there is no duplication. >>>>> >>>>> For each stat counter in the page, we'll expose either the SELF or PARENT >>>>> value, depending on whether there is anything in that slot (whether it's zero >>>>> or not). >>>>> >>>>> Some stats are available via the SELF page, and some via the PARENT page, but >>>>> the counters in the page have the same layout. So some counters in the SELF >>>>> page will all stay zero while on the PARENT page they are updated, and vice >>>>> versa. >>>>> >>>>> I believe the hypervisor takes this strange approach for the purpose of >>>>> backward compatibility. Introducing L1VH created the need for this SELF/PARENT >>>>> distinction. >>>>> >>>>> Hope that makes some kind of sense...it will be clearer when we post the mshv >>>>> debugfs code itself. >>>>> >>>>> To put it another way, falling back to the SELF page won't cause any impact >>>>> to userspace because the distinction between the pages is all handled in the >>>>> driver, and we only read each stat value from either SELF or PARENT. >>>>> >>>>> Nuno >>>> >>>> Thank you for that explanation, it sorta makes sense. >>>> >>>> I think it'd be better if this patch is part of the series that exposes the stats >>>> to userspace, so that it can be reviewed in context with the rest of the code in >>>> the driver that manages the pick-and-choose of a stat value from the SELF/PARENT >>>> page. >>>> >>> Good idea, I think I'll do that. Thanks! >>> >>>> Unless there's an active problem now in the upstream kernel that this patch solves? >>>> i.e. are the versions of the hypervisor that don't support the PARENT stats >>>> page available in the wild? >>>> >>> I thought there was, but on reflection, no it doesn't solve a problem that exists in >>> the code today. >>> >>> Nuno >>> >> >> The usecases for stats exposed by the hypervisor are: >> 1) used within the kernel by root scheduler >> 2) exposed to userspace via debugfs. >> >> I thought we are addressing the first use-case here (patch1 in this series). If root scheduler support was upstreamed then this patchset does solve a problem in upstream code. >> > > Sorry about the long context, I couldn't figure out a good spot to snip. > > Thanks for calling that out. I think root scheduler support has been upstreamed if I'm reading > root_scheduler_init() right, Nuno can confirm. > Correct. But the user of the PARENT == SELF workaround is L1VH, which doesn't support root scheduler today. So technically the fix doesn't do anything if we're just talking about the stats mapping used by the root scheduler code. > With the series applied, I don't see any of the code that picks and chooses the stats from > the PARENT/SELF pages even for the kernel case, rather there's a straight memcpy of the > stats pages in the create_vp ioctl. > This memcpy: memcpy(vp->vp_stats_pages, stats_pages, sizeof(stats_pages)); Is not copying the contents of the pages. It's copying array of pointers to the SELF and PARENT pages: struct mshv_vp{ /* ... */ struct hv_stats_page *vp_stats_pages[2]; /* ... */ }; > Is the kernel usage with the root scheduler somehow immune from the problem userspace would > face with the duplicated pages? If not, I'd say that it's an argument for dropping patch 1 > and 2, instead of including patch 2, or to fold them together. We don't want a state in the > upstream kernel where a commit introduces a known problem just to be solved by the following > commit. > Yes, it's immune. Nothing is being accumulated and therefore double-counted. There's a signal value in the stats page which is used to determine if the dispatch thread is blocked. It's just a boolean. See mshv_vp_dispatch_thread_blocked(). Thanks Nuno > Thanks, > Easwar (he/him)
On 8/28/2025 7:43 PM, Nuno Das Neves wrote: > From: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com> > > Some versions of the hypervisor do not support HV_STATUS_AREA_PARENT and > return HV_STATUS_INVALID_PARAMETER for the second stats page mapping > request. > Is this behavior limited to VP stats? Or does it extend to other stats (hypervisor, partition, etc) as well? > This results a failure in module init. Instead of failing, gracefully nit: s/This results in a failure during module init/> fall back to populating stats_pages[HV_STATS_AREA_PARENT] with the > already-mapped stats_pages[HV_STATS_AREA_SELF]. > > Signed-off-by: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com> > Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com> > --- > drivers/hv/mshv_root_hv_call.c | 43 ++++++++++++++++++++++++++++++---- > drivers/hv/mshv_root_main.c | 3 +++ > 2 files changed, 42 insertions(+), 4 deletions(-) > > diff --git a/drivers/hv/mshv_root_hv_call.c b/drivers/hv/mshv_root_hv_call.c > index c9c274f29c3c..1c38576a673c 100644 > --- a/drivers/hv/mshv_root_hv_call.c > +++ b/drivers/hv/mshv_root_hv_call.c > @@ -724,6 +724,24 @@ hv_call_notify_port_ring_empty(u32 sint_index) > return hv_result_to_errno(status); > } > > +static int > +hv_stats_get_area_type(enum hv_stats_object_type type, > + const union hv_stats_object_identity *identity) > +{ > + switch (type) { > + case HV_STATS_OBJECT_HYPERVISOR: > + return identity->hv.stats_area_type; > + case HV_STATS_OBJECT_LOGICAL_PROCESSOR: > + return identity->lp.stats_area_type; > + case HV_STATS_OBJECT_PARTITION: > + return identity->partition.stats_area_type; > + case HV_STATS_OBJECT_VP: > + return identity->vp.stats_area_type; > + } > + > + return -EINVAL; > +} > + > int hv_call_map_stat_page(enum hv_stats_object_type type, > const union hv_stats_object_identity *identity, > void **addr) > @@ -732,7 +750,7 @@ int hv_call_map_stat_page(enum hv_stats_object_type type, > struct hv_input_map_stats_page *input; > struct hv_output_map_stats_page *output; > u64 status, pfn; > - int ret = 0; > + int hv_status, ret = 0; > > do { > local_irq_save(flags); > @@ -747,11 +765,28 @@ int hv_call_map_stat_page(enum hv_stats_object_type type, > pfn = output->map_location; > > local_irq_restore(flags); > - if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { > - ret = hv_result_to_errno(status); > + > + hv_status = hv_result(status); > + if (hv_status != HV_STATUS_INSUFFICIENT_MEMORY) { > if (hv_result_success(status)) > break; > - return ret; > + > + /* > + * Some versions of the hypervisor do not support the > + * PARENT stats area. In this case return "success" but > + * set the page to NULL. The caller checks for this > + * case instead just uses the SELF area. > + */ > + if (hv_stats_get_area_type(type, identity) == HV_STATS_AREA_PARENT && > + hv_status == HV_STATUS_INVALID_PARAMETER) { > + pr_debug_once("%s: PARENT area type is unsupported\n", > + __func__); > + *addr = NULL; > + return 0; > + } > + > + hv_status_debug(status, "\n"); > + return hv_result_to_errno(status); > } > > ret = hv_call_deposit_pages(NUMA_NO_NODE, > diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c > index bbdefe8a2e9c..56ababab57ce 100644 > --- a/drivers/hv/mshv_root_main.c > +++ b/drivers/hv/mshv_root_main.c > @@ -929,6 +929,9 @@ static int mshv_vp_stats_map(u64 partition_id, u32 vp_index, > if (err) > goto unmap_self; > > + if (!stats_pages[HV_STATS_AREA_PARENT]) > + stats_pages[HV_STATS_AREA_PARENT] = stats_pages[HV_STATS_AREA_SELF]; > + > return 0; > > unmap_self: -- Regards, Praveen K Paladugu
On 9/5/2025 8:31 AM, Praveen K Paladugu wrote: > > > On 8/28/2025 7:43 PM, Nuno Das Neves wrote: >> From: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com> >> >> Some versions of the hypervisor do not support HV_STATUS_AREA_PARENT and >> return HV_STATUS_INVALID_PARAMETER for the second stats page mapping >> request. >> > Is this behavior limited to VP stats? Or does it extend to other > stats (hypervisor, partition, etc) as well? > In practice we will only need to worry about partition and VP. In the current code in hyperv-next, it's only VP stats. Upcoming patches to add debugfs code will also need it for partition stats. >> This results a failure in module init. Instead of failing, gracefully > nit: s/This results in a failure during module init/ Thanks, I'll change it for v2 Nuno >> fall back to populating stats_pages[HV_STATS_AREA_PARENT] with the >> already-mapped stats_pages[HV_STATS_AREA_SELF]. >> >> Signed-off-by: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com> >> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com> >> --- >> drivers/hv/mshv_root_hv_call.c | 43 ++++++++++++++++++++++++++++++---- >> drivers/hv/mshv_root_main.c | 3 +++ >> 2 files changed, 42 insertions(+), 4 deletions(-) >> >> diff --git a/drivers/hv/mshv_root_hv_call.c b/drivers/hv/mshv_root_hv_call.c >> index c9c274f29c3c..1c38576a673c 100644 >> --- a/drivers/hv/mshv_root_hv_call.c >> +++ b/drivers/hv/mshv_root_hv_call.c >> @@ -724,6 +724,24 @@ hv_call_notify_port_ring_empty(u32 sint_index) >> return hv_result_to_errno(status); >> } >> +static int >> +hv_stats_get_area_type(enum hv_stats_object_type type, >> + const union hv_stats_object_identity *identity) >> +{ >> + switch (type) { >> + case HV_STATS_OBJECT_HYPERVISOR: >> + return identity->hv.stats_area_type; >> + case HV_STATS_OBJECT_LOGICAL_PROCESSOR: >> + return identity->lp.stats_area_type; >> + case HV_STATS_OBJECT_PARTITION: >> + return identity->partition.stats_area_type; >> + case HV_STATS_OBJECT_VP: >> + return identity->vp.stats_area_type; >> + } >> + >> + return -EINVAL; >> +} >> + >> int hv_call_map_stat_page(enum hv_stats_object_type type, >> const union hv_stats_object_identity *identity, >> void **addr) >> @@ -732,7 +750,7 @@ int hv_call_map_stat_page(enum hv_stats_object_type type, >> struct hv_input_map_stats_page *input; >> struct hv_output_map_stats_page *output; >> u64 status, pfn; >> - int ret = 0; >> + int hv_status, ret = 0; >> do { >> local_irq_save(flags); >> @@ -747,11 +765,28 @@ int hv_call_map_stat_page(enum hv_stats_object_type type, >> pfn = output->map_location; >> local_irq_restore(flags); >> - if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { >> - ret = hv_result_to_errno(status); >> + >> + hv_status = hv_result(status); >> + if (hv_status != HV_STATUS_INSUFFICIENT_MEMORY) { >> if (hv_result_success(status)) >> break; >> - return ret; >> + >> + /* >> + * Some versions of the hypervisor do not support the >> + * PARENT stats area. In this case return "success" but >> + * set the page to NULL. The caller checks for this >> + * case instead just uses the SELF area. >> + */ >> + if (hv_stats_get_area_type(type, identity) == HV_STATS_AREA_PARENT && >> + hv_status == HV_STATUS_INVALID_PARAMETER) { >> + pr_debug_once("%s: PARENT area type is unsupported\n", >> + __func__); >> + *addr = NULL; >> + return 0; >> + } >> + >> + hv_status_debug(status, "\n"); >> + return hv_result_to_errno(status); >> } >> ret = hv_call_deposit_pages(NUMA_NO_NODE, >> diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c >> index bbdefe8a2e9c..56ababab57ce 100644 >> --- a/drivers/hv/mshv_root_main.c >> +++ b/drivers/hv/mshv_root_main.c >> @@ -929,6 +929,9 @@ static int mshv_vp_stats_map(u64 partition_id, u32 vp_index, >> if (err) >> goto unmap_self; >> + if (!stats_pages[HV_STATS_AREA_PARENT]) >> + stats_pages[HV_STATS_AREA_PARENT] = stats_pages[HV_STATS_AREA_SELF]; >> + >> return 0; >> unmap_self: >
© 2016 - 2025 Red Hat, Inc.