[PATCH 2/6] mshv: Ignore second stats page map result failure

Nuno Das Neves posted 6 patches 1 month ago
There is a newer version of this series
[PATCH 2/6] mshv: Ignore second stats page map result failure
Posted by Nuno Das Neves 1 month ago
From: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com>

Some versions of the hypervisor do not support HV_STATUS_AREA_PARENT and
return HV_STATUS_INVALID_PARAMETER for the second stats page mapping
request.

This results a failure in module init. Instead of failing, gracefully
fall back to populating stats_pages[HV_STATS_AREA_PARENT] with the
already-mapped stats_pages[HV_STATS_AREA_SELF].

Signed-off-by: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com>
Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
---
 drivers/hv/mshv_root_hv_call.c | 43 ++++++++++++++++++++++++++++++----
 drivers/hv/mshv_root_main.c    |  3 +++
 2 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/drivers/hv/mshv_root_hv_call.c b/drivers/hv/mshv_root_hv_call.c
index c9c274f29c3c..1c38576a673c 100644
--- a/drivers/hv/mshv_root_hv_call.c
+++ b/drivers/hv/mshv_root_hv_call.c
@@ -724,6 +724,24 @@ hv_call_notify_port_ring_empty(u32 sint_index)
 	return hv_result_to_errno(status);
 }
 
+static int
+hv_stats_get_area_type(enum hv_stats_object_type type,
+		       const union hv_stats_object_identity *identity)
+{
+	switch (type) {
+	case HV_STATS_OBJECT_HYPERVISOR:
+		return identity->hv.stats_area_type;
+	case HV_STATS_OBJECT_LOGICAL_PROCESSOR:
+		return identity->lp.stats_area_type;
+	case HV_STATS_OBJECT_PARTITION:
+		return identity->partition.stats_area_type;
+	case HV_STATS_OBJECT_VP:
+		return identity->vp.stats_area_type;
+	}
+
+	return -EINVAL;
+}
+
 int hv_call_map_stat_page(enum hv_stats_object_type type,
 			  const union hv_stats_object_identity *identity,
 			  void **addr)
@@ -732,7 +750,7 @@ int hv_call_map_stat_page(enum hv_stats_object_type type,
 	struct hv_input_map_stats_page *input;
 	struct hv_output_map_stats_page *output;
 	u64 status, pfn;
-	int ret = 0;
+	int hv_status, ret = 0;
 
 	do {
 		local_irq_save(flags);
@@ -747,11 +765,28 @@ int hv_call_map_stat_page(enum hv_stats_object_type type,
 		pfn = output->map_location;
 
 		local_irq_restore(flags);
-		if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
-			ret = hv_result_to_errno(status);
+
+		hv_status = hv_result(status);
+		if (hv_status != HV_STATUS_INSUFFICIENT_MEMORY) {
 			if (hv_result_success(status))
 				break;
-			return ret;
+
+			/*
+			 * Some versions of the hypervisor do not support the
+			 * PARENT stats area. In this case return "success" but
+			 * set the page to NULL. The caller checks for this
+			 * case instead just uses the SELF area.
+			 */
+			if (hv_stats_get_area_type(type, identity) == HV_STATS_AREA_PARENT &&
+			    hv_status == HV_STATUS_INVALID_PARAMETER) {
+				pr_debug_once("%s: PARENT area type is unsupported\n",
+					      __func__);
+				*addr = NULL;
+				return 0;
+			}
+
+			hv_status_debug(status, "\n");
+			return hv_result_to_errno(status);
 		}
 
 		ret = hv_call_deposit_pages(NUMA_NO_NODE,
diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
index bbdefe8a2e9c..56ababab57ce 100644
--- a/drivers/hv/mshv_root_main.c
+++ b/drivers/hv/mshv_root_main.c
@@ -929,6 +929,9 @@ static int mshv_vp_stats_map(u64 partition_id, u32 vp_index,
 	if (err)
 		goto unmap_self;
 
+	if (!stats_pages[HV_STATS_AREA_PARENT])
+		stats_pages[HV_STATS_AREA_PARENT] = stats_pages[HV_STATS_AREA_SELF];
+
 	return 0;
 
 unmap_self:
-- 
2.34.1
Re: [PATCH 2/6] mshv: Ignore second stats page map result failure
Posted by Tianyu Lan 3 weeks, 5 days ago
On Fri, Aug 29, 2025 at 8:44 AM Nuno Das Neves
<nunodasneves@linux.microsoft.com> wrote:
>
> From: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com>
>
> Some versions of the hypervisor do not support HV_STATUS_AREA_PARENT and
> return HV_STATUS_INVALID_PARAMETER for the second stats page mapping
> request.
>
> This results a failure in module init. Instead of failing, gracefully
> fall back to populating stats_pages[HV_STATS_AREA_PARENT] with the
> already-mapped stats_pages[HV_STATS_AREA_SELF].
>
> Signed-off-by: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com>
> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
> ---
>  drivers/hv/mshv_root_hv_call.c | 43 ++++++++++++++++++++++++++++++----
>  drivers/hv/mshv_root_main.c    |  3 +++
>  2 files changed, 42 insertions(+), 4 deletions(-)
>
Reviewed-by: Tianyu Lan <tiala@microsoft.com>

> diff --git a/drivers/hv/mshv_root_hv_call.c b/drivers/hv/mshv_root_hv_call.c
> index c9c274f29c3c..1c38576a673c 100644
> --- a/drivers/hv/mshv_root_hv_call.c
> +++ b/drivers/hv/mshv_root_hv_call.c
> @@ -724,6 +724,24 @@ hv_call_notify_port_ring_empty(u32 sint_index)
>         return hv_result_to_errno(status);
>  }
>
> +static int
> +hv_stats_get_area_type(enum hv_stats_object_type type,
> +                      const union hv_stats_object_identity *identity)
> +{
> +       switch (type) {
> +       case HV_STATS_OBJECT_HYPERVISOR:
> +               return identity->hv.stats_area_type;
> +       case HV_STATS_OBJECT_LOGICAL_PROCESSOR:
> +               return identity->lp.stats_area_type;
> +       case HV_STATS_OBJECT_PARTITION:
> +               return identity->partition.stats_area_type;
> +       case HV_STATS_OBJECT_VP:
> +               return identity->vp.stats_area_type;
> +       }
> +
> +       return -EINVAL;
> +}
> +
>  int hv_call_map_stat_page(enum hv_stats_object_type type,
>                           const union hv_stats_object_identity *identity,
>                           void **addr)
> @@ -732,7 +750,7 @@ int hv_call_map_stat_page(enum hv_stats_object_type type,
>         struct hv_input_map_stats_page *input;
>         struct hv_output_map_stats_page *output;
>         u64 status, pfn;
> -       int ret = 0;
> +       int hv_status, ret = 0;
>
>         do {
>                 local_irq_save(flags);
> @@ -747,11 +765,28 @@ int hv_call_map_stat_page(enum hv_stats_object_type type,
>                 pfn = output->map_location;
>
>                 local_irq_restore(flags);
> -               if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
> -                       ret = hv_result_to_errno(status);
> +
> +               hv_status = hv_result(status);
> +               if (hv_status != HV_STATUS_INSUFFICIENT_MEMORY) {
>                         if (hv_result_success(status))
>                                 break;
> -                       return ret;
> +
> +                       /*
> +                        * Some versions of the hypervisor do not support the
> +                        * PARENT stats area. In this case return "success" but
> +                        * set the page to NULL. The caller checks for this
> +                        * case instead just uses the SELF area.
> +                        */
> +                       if (hv_stats_get_area_type(type, identity) == HV_STATS_AREA_PARENT &&
> +                           hv_status == HV_STATUS_INVALID_PARAMETER) {
> +                               pr_debug_once("%s: PARENT area type is unsupported\n",
> +                                             __func__);
> +                               *addr = NULL;
> +                               return 0;
> +                       }
> +
> +                       hv_status_debug(status, "\n");
> +                       return hv_result_to_errno(status);
>                 }
>
>                 ret = hv_call_deposit_pages(NUMA_NO_NODE,
> diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
> index bbdefe8a2e9c..56ababab57ce 100644
> --- a/drivers/hv/mshv_root_main.c
> +++ b/drivers/hv/mshv_root_main.c
> @@ -929,6 +929,9 @@ static int mshv_vp_stats_map(u64 partition_id, u32 vp_index,
>         if (err)
>                 goto unmap_self;
>
> +       if (!stats_pages[HV_STATS_AREA_PARENT])
> +               stats_pages[HV_STATS_AREA_PARENT] = stats_pages[HV_STATS_AREA_SELF];
> +
>         return 0;
>
>  unmap_self:
> --
> 2.34.1
>
>


-- 
Thanks
Tianyu Lan
Re: [PATCH 2/6] mshv: Ignore second stats page map result failure
Posted by Easwar Hariharan 3 weeks, 6 days ago
On 8/28/2025 5:43 PM, Nuno Das Neves wrote:
> From: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com>
> 
> Some versions of the hypervisor do not support HV_STATUS_AREA_PARENT and
> return HV_STATUS_INVALID_PARAMETER for the second stats page mapping
> request.
> 
> This results a failure in module init. Instead of failing, gracefully
> fall back to populating stats_pages[HV_STATS_AREA_PARENT] with the
> already-mapped stats_pages[HV_STATS_AREA_SELF].

What's the impact of this graceful fallback? It occurs to me that if a stats
accumulator, in userspace perhaps, expected to get stats from the 2 pages,
it'd get incorrect values.

> 
> Signed-off-by: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com>
> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
> ---
>  drivers/hv/mshv_root_hv_call.c | 43 ++++++++++++++++++++++++++++++----
>  drivers/hv/mshv_root_main.c    |  3 +++
>  2 files changed, 42 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/hv/mshv_root_hv_call.c b/drivers/hv/mshv_root_hv_call.c
> index c9c274f29c3c..1c38576a673c 100644
> --- a/drivers/hv/mshv_root_hv_call.c
> +++ b/drivers/hv/mshv_root_hv_call.c
> @@ -724,6 +724,24 @@ hv_call_notify_port_ring_empty(u32 sint_index)
>  	return hv_result_to_errno(status);
>  }
>  
> +static int
> +hv_stats_get_area_type(enum hv_stats_object_type type,

One line please, i.e.

static int hv_stats_get_area_type(...)

<snip>

Thanks,
Easwar (he/him)
Re: [PATCH 2/6] mshv: Ignore second stats page map result failure
Posted by Nuno Das Neves 3 weeks, 3 days ago
On 9/5/2025 12:21 PM, Easwar Hariharan wrote:
> On 8/28/2025 5:43 PM, Nuno Das Neves wrote:
>> From: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com>
>>
>> Some versions of the hypervisor do not support HV_STATUS_AREA_PARENT and
>> return HV_STATUS_INVALID_PARAMETER for the second stats page mapping
>> request.
>>
>> This results a failure in module init. Instead of failing, gracefully
>> fall back to populating stats_pages[HV_STATS_AREA_PARENT] with the
>> already-mapped stats_pages[HV_STATS_AREA_SELF].
> 
> What's the impact of this graceful fallback? It occurs to me that if a stats
> accumulator, in userspace perhaps, expected to get stats from the 2 pages,
> it'd get incorrect values.
> 
This is going out of scope of this series a bit but I'll explain briefly.

When we do add the code to expose these stats to userspace, the SELF and
PARENT pages won't be exposed separately, there is no duplication.

For each stat counter in the page, we'll expose either the SELF or PARENT
value, depending on whether there is anything in that slot (whether it's zero
or not).

Some stats are available via the SELF page, and some via the PARENT page, but
the counters in the page have the same layout. So some counters in the SELF
page will all stay zero while on the PARENT page they are updated, and vice
versa.

I believe the hypervisor takes this strange approach for the purpose of
backward compatibility. Introducing L1VH created the need for this SELF/PARENT
distinction.

Hope that makes some kind of sense...it will be clearer when we post the mshv
debugfs code itself.

To put it another way, falling back to the SELF page won't cause any impact
to userspace because the distinction between the pages is all handled in the
driver, and we only read each stat value from either SELF or PARENT.

Nuno

>>
>> Signed-off-by: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com>
>> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
>> ---
>>  drivers/hv/mshv_root_hv_call.c | 43 ++++++++++++++++++++++++++++++----
>>  drivers/hv/mshv_root_main.c    |  3 +++
>>  2 files changed, 42 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/hv/mshv_root_hv_call.c b/drivers/hv/mshv_root_hv_call.c
>> index c9c274f29c3c..1c38576a673c 100644
>> --- a/drivers/hv/mshv_root_hv_call.c
>> +++ b/drivers/hv/mshv_root_hv_call.c
>> @@ -724,6 +724,24 @@ hv_call_notify_port_ring_empty(u32 sint_index)
>>  	return hv_result_to_errno(status);
>>  }
>>  
>> +static int
>> +hv_stats_get_area_type(enum hv_stats_object_type type,
> 
> One line please, i.e.
> 
> static int hv_stats_get_area_type(...)
> 
> <snip>
> 
> Thanks,
> Easwar (he/him)
Re: [PATCH 2/6] mshv: Ignore second stats page map result failure
Posted by Easwar Hariharan 3 weeks, 3 days ago
On 9/8/2025 10:04 AM, Nuno Das Neves wrote:
> On 9/5/2025 12:21 PM, Easwar Hariharan wrote:
>> On 8/28/2025 5:43 PM, Nuno Das Neves wrote:
>>> From: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com>
>>>
>>> Some versions of the hypervisor do not support HV_STATUS_AREA_PARENT and
>>> return HV_STATUS_INVALID_PARAMETER for the second stats page mapping
>>> request.
>>>
>>> This results a failure in module init. Instead of failing, gracefully
>>> fall back to populating stats_pages[HV_STATS_AREA_PARENT] with the
>>> already-mapped stats_pages[HV_STATS_AREA_SELF].
>>
>> What's the impact of this graceful fallback? It occurs to me that if a stats
>> accumulator, in userspace perhaps, expected to get stats from the 2 pages,
>> it'd get incorrect values.
>>
> This is going out of scope of this series a bit but I'll explain briefly.
> 
> When we do add the code to expose these stats to userspace, the SELF and
> PARENT pages won't be exposed separately, there is no duplication.
> 
> For each stat counter in the page, we'll expose either the SELF or PARENT
> value, depending on whether there is anything in that slot (whether it's zero
> or not).
> 
> Some stats are available via the SELF page, and some via the PARENT page, but
> the counters in the page have the same layout. So some counters in the SELF
> page will all stay zero while on the PARENT page they are updated, and vice
> versa.
> 
> I believe the hypervisor takes this strange approach for the purpose of
> backward compatibility. Introducing L1VH created the need for this SELF/PARENT
> distinction.
> 
> Hope that makes some kind of sense...it will be clearer when we post the mshv
> debugfs code itself.
> 
> To put it another way, falling back to the SELF page won't cause any impact
> to userspace because the distinction between the pages is all handled in the
> driver, and we only read each stat value from either SELF or PARENT.
> 
> Nuno

Thank you for that explanation, it sorta makes sense.

I think it'd be better if this patch is part of the series that exposes the stats
to userspace, so that it can be reviewed in context with the rest of the code in
the driver that manages the pick-and-choose of a stat value from the SELF/PARENT
page.

Unless there's an active problem now in the upstream kernel that this patch solves?
i.e. are the versions of the hypervisor that don't support the PARENT stats
page available in the wild?

Thanks,
Easwar (he/him)
Re: [PATCH 2/6] mshv: Ignore second stats page map result failure
Posted by Nuno Das Neves 3 weeks, 3 days ago
On 9/8/2025 10:22 AM, Easwar Hariharan wrote:
> On 9/8/2025 10:04 AM, Nuno Das Neves wrote:
>> On 9/5/2025 12:21 PM, Easwar Hariharan wrote:
>>> On 8/28/2025 5:43 PM, Nuno Das Neves wrote:
>>>> From: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com>
>>>>
>>>> Some versions of the hypervisor do not support HV_STATUS_AREA_PARENT and
>>>> return HV_STATUS_INVALID_PARAMETER for the second stats page mapping
>>>> request.
>>>>
>>>> This results a failure in module init. Instead of failing, gracefully
>>>> fall back to populating stats_pages[HV_STATS_AREA_PARENT] with the
>>>> already-mapped stats_pages[HV_STATS_AREA_SELF].
>>>
>>> What's the impact of this graceful fallback? It occurs to me that if a stats
>>> accumulator, in userspace perhaps, expected to get stats from the 2 pages,
>>> it'd get incorrect values.
>>>
>> This is going out of scope of this series a bit but I'll explain briefly.
>>
>> When we do add the code to expose these stats to userspace, the SELF and
>> PARENT pages won't be exposed separately, there is no duplication.
>>
>> For each stat counter in the page, we'll expose either the SELF or PARENT
>> value, depending on whether there is anything in that slot (whether it's zero
>> or not).
>>
>> Some stats are available via the SELF page, and some via the PARENT page, but
>> the counters in the page have the same layout. So some counters in the SELF
>> page will all stay zero while on the PARENT page they are updated, and vice
>> versa.
>>
>> I believe the hypervisor takes this strange approach for the purpose of
>> backward compatibility. Introducing L1VH created the need for this SELF/PARENT
>> distinction.
>>
>> Hope that makes some kind of sense...it will be clearer when we post the mshv
>> debugfs code itself.
>>
>> To put it another way, falling back to the SELF page won't cause any impact
>> to userspace because the distinction between the pages is all handled in the
>> driver, and we only read each stat value from either SELF or PARENT.
>>
>> Nuno
> 
> Thank you for that explanation, it sorta makes sense.
> 
> I think it'd be better if this patch is part of the series that exposes the stats
> to userspace, so that it can be reviewed in context with the rest of the code in
> the driver that manages the pick-and-choose of a stat value from the SELF/PARENT
> page.
> 
Good idea, I think I'll do that. Thanks!

> Unless there's an active problem now in the upstream kernel that this patch solves?
> i.e. are the versions of the hypervisor that don't support the PARENT stats
> page available in the wild?
> 
I thought there was, but on reflection, no it doesn't solve a problem that exists in
the code today.

Nuno

> Thanks,
> Easwar (he/him)
Re: [PATCH 2/6] mshv: Ignore second stats page map result failure
Posted by Praveen K Paladugu 3 weeks, 2 days ago

On 9/8/2025 1:06 PM, Nuno Das Neves wrote:
> On 9/8/2025 10:22 AM, Easwar Hariharan wrote:
>> On 9/8/2025 10:04 AM, Nuno Das Neves wrote:
>>> On 9/5/2025 12:21 PM, Easwar Hariharan wrote:
>>>> On 8/28/2025 5:43 PM, Nuno Das Neves wrote:
>>>>> From: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com>
>>>>>
>>>>> Some versions of the hypervisor do not support HV_STATUS_AREA_PARENT and
>>>>> return HV_STATUS_INVALID_PARAMETER for the second stats page mapping
>>>>> request.
>>>>>
>>>>> This results a failure in module init. Instead of failing, gracefully
>>>>> fall back to populating stats_pages[HV_STATS_AREA_PARENT] with the
>>>>> already-mapped stats_pages[HV_STATS_AREA_SELF].
>>>>
>>>> What's the impact of this graceful fallback? It occurs to me that if a stats
>>>> accumulator, in userspace perhaps, expected to get stats from the 2 pages,
>>>> it'd get incorrect values.
>>>>
>>> This is going out of scope of this series a bit but I'll explain briefly.
>>>
>>> When we do add the code to expose these stats to userspace, the SELF and
>>> PARENT pages won't be exposed separately, there is no duplication.
>>>
>>> For each stat counter in the page, we'll expose either the SELF or PARENT
>>> value, depending on whether there is anything in that slot (whether it's zero
>>> or not).
>>>
>>> Some stats are available via the SELF page, and some via the PARENT page, but
>>> the counters in the page have the same layout. So some counters in the SELF
>>> page will all stay zero while on the PARENT page they are updated, and vice
>>> versa.
>>>
>>> I believe the hypervisor takes this strange approach for the purpose of
>>> backward compatibility. Introducing L1VH created the need for this SELF/PARENT
>>> distinction.
>>>
>>> Hope that makes some kind of sense...it will be clearer when we post the mshv
>>> debugfs code itself.
>>>
>>> To put it another way, falling back to the SELF page won't cause any impact
>>> to userspace because the distinction between the pages is all handled in the
>>> driver, and we only read each stat value from either SELF or PARENT.
>>>
>>> Nuno
>>
>> Thank you for that explanation, it sorta makes sense.
>>
>> I think it'd be better if this patch is part of the series that exposes the stats
>> to userspace, so that it can be reviewed in context with the rest of the code in
>> the driver that manages the pick-and-choose of a stat value from the SELF/PARENT
>> page.
>>
> Good idea, I think I'll do that. Thanks!
> 
>> Unless there's an active problem now in the upstream kernel that this patch solves?
>> i.e. are the versions of the hypervisor that don't support the PARENT stats
>> page available in the wild?
>>
> I thought there was, but on reflection, no it doesn't solve a problem that exists in
> the code today.
> 
> Nuno
>

The usecases for stats exposed by the hypervisor are:
1) used within the kernel by root scheduler
2) exposed to userspace via debugfs.

I thought we are addressing the first use-case here (patch1 in this 
series). If root scheduler support was upstreamed then this patchset 
does solve a problem in upstream code.


>> Thanks,
>> Easwar (he/him)
> 
> 

-- 
Regards,
Praveen K Paladugu
Re: [PATCH 2/6] mshv: Ignore second stats page map result failure
Posted by Easwar Hariharan 3 weeks, 2 days ago
On 9/9/2025 7:52 AM, Praveen K Paladugu wrote:
> 
> 
> On 9/8/2025 1:06 PM, Nuno Das Neves wrote:
>> On 9/8/2025 10:22 AM, Easwar Hariharan wrote:
>>> On 9/8/2025 10:04 AM, Nuno Das Neves wrote:
>>>> On 9/5/2025 12:21 PM, Easwar Hariharan wrote:
>>>>> On 8/28/2025 5:43 PM, Nuno Das Neves wrote:
>>>>>> From: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com>
>>>>>>
>>>>>> Some versions of the hypervisor do not support HV_STATUS_AREA_PARENT and
>>>>>> return HV_STATUS_INVALID_PARAMETER for the second stats page mapping
>>>>>> request.
>>>>>>
>>>>>> This results a failure in module init. Instead of failing, gracefully
>>>>>> fall back to populating stats_pages[HV_STATS_AREA_PARENT] with the
>>>>>> already-mapped stats_pages[HV_STATS_AREA_SELF].
>>>>>
>>>>> What's the impact of this graceful fallback? It occurs to me that if a stats
>>>>> accumulator, in userspace perhaps, expected to get stats from the 2 pages,
>>>>> it'd get incorrect values.
>>>>>
>>>> This is going out of scope of this series a bit but I'll explain briefly.
>>>>
>>>> When we do add the code to expose these stats to userspace, the SELF and
>>>> PARENT pages won't be exposed separately, there is no duplication.
>>>>
>>>> For each stat counter in the page, we'll expose either the SELF or PARENT
>>>> value, depending on whether there is anything in that slot (whether it's zero
>>>> or not).
>>>>
>>>> Some stats are available via the SELF page, and some via the PARENT page, but
>>>> the counters in the page have the same layout. So some counters in the SELF
>>>> page will all stay zero while on the PARENT page they are updated, and vice
>>>> versa.
>>>>
>>>> I believe the hypervisor takes this strange approach for the purpose of
>>>> backward compatibility. Introducing L1VH created the need for this SELF/PARENT
>>>> distinction.
>>>>
>>>> Hope that makes some kind of sense...it will be clearer when we post the mshv
>>>> debugfs code itself.
>>>>
>>>> To put it another way, falling back to the SELF page won't cause any impact
>>>> to userspace because the distinction between the pages is all handled in the
>>>> driver, and we only read each stat value from either SELF or PARENT.
>>>>
>>>> Nuno
>>>
>>> Thank you for that explanation, it sorta makes sense.
>>>
>>> I think it'd be better if this patch is part of the series that exposes the stats
>>> to userspace, so that it can be reviewed in context with the rest of the code in
>>> the driver that manages the pick-and-choose of a stat value from the SELF/PARENT
>>> page.
>>>
>> Good idea, I think I'll do that. Thanks!
>>
>>> Unless there's an active problem now in the upstream kernel that this patch solves?
>>> i.e. are the versions of the hypervisor that don't support the PARENT stats
>>> page available in the wild?
>>>
>> I thought there was, but on reflection, no it doesn't solve a problem that exists in
>> the code today.
>>
>> Nuno
>>
> 
> The usecases for stats exposed by the hypervisor are:
> 1) used within the kernel by root scheduler
> 2) exposed to userspace via debugfs.
> 
> I thought we are addressing the first use-case here (patch1 in this series). If root scheduler support was upstreamed then this patchset does solve a problem in upstream code.
> 

Sorry about the long context, I couldn't figure out a good spot to snip.

Thanks for calling that out. I think root scheduler support has been upstreamed if I'm reading
root_scheduler_init() right, Nuno can confirm.

With the series applied, I don't see any of the code that picks and chooses the stats from
the PARENT/SELF pages even for the kernel case, rather there's a straight memcpy of the
stats pages in the create_vp ioctl.

Is the kernel usage with the root scheduler somehow immune from the problem userspace would
face with the duplicated pages? If not, I'd say that it's an argument for dropping patch 1
and 2, instead of including patch 2, or to fold them together. We don't want a state in the
upstream kernel where a commit introduces a known problem just to be solved by the following
commit.

Thanks,
Easwar (he/him)
Re: [PATCH 2/6] mshv: Ignore second stats page map result failure
Posted by Nuno Das Neves 3 weeks, 2 days ago
On 9/9/2025 9:27 AM, Easwar Hariharan wrote:
> On 9/9/2025 7:52 AM, Praveen K Paladugu wrote:
>>
>>
>> On 9/8/2025 1:06 PM, Nuno Das Neves wrote:
>>> On 9/8/2025 10:22 AM, Easwar Hariharan wrote:
>>>> On 9/8/2025 10:04 AM, Nuno Das Neves wrote:
>>>>> On 9/5/2025 12:21 PM, Easwar Hariharan wrote:
>>>>>> On 8/28/2025 5:43 PM, Nuno Das Neves wrote:
>>>>>>> From: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com>
>>>>>>>
>>>>>>> Some versions of the hypervisor do not support HV_STATUS_AREA_PARENT and
>>>>>>> return HV_STATUS_INVALID_PARAMETER for the second stats page mapping
>>>>>>> request.
>>>>>>>
>>>>>>> This results a failure in module init. Instead of failing, gracefully
>>>>>>> fall back to populating stats_pages[HV_STATS_AREA_PARENT] with the
>>>>>>> already-mapped stats_pages[HV_STATS_AREA_SELF].
>>>>>>
>>>>>> What's the impact of this graceful fallback? It occurs to me that if a stats
>>>>>> accumulator, in userspace perhaps, expected to get stats from the 2 pages,
>>>>>> it'd get incorrect values.
>>>>>>
>>>>> This is going out of scope of this series a bit but I'll explain briefly.
>>>>>
>>>>> When we do add the code to expose these stats to userspace, the SELF and
>>>>> PARENT pages won't be exposed separately, there is no duplication.
>>>>>
>>>>> For each stat counter in the page, we'll expose either the SELF or PARENT
>>>>> value, depending on whether there is anything in that slot (whether it's zero
>>>>> or not).
>>>>>
>>>>> Some stats are available via the SELF page, and some via the PARENT page, but
>>>>> the counters in the page have the same layout. So some counters in the SELF
>>>>> page will all stay zero while on the PARENT page they are updated, and vice
>>>>> versa.
>>>>>
>>>>> I believe the hypervisor takes this strange approach for the purpose of
>>>>> backward compatibility. Introducing L1VH created the need for this SELF/PARENT
>>>>> distinction.
>>>>>
>>>>> Hope that makes some kind of sense...it will be clearer when we post the mshv
>>>>> debugfs code itself.
>>>>>
>>>>> To put it another way, falling back to the SELF page won't cause any impact
>>>>> to userspace because the distinction between the pages is all handled in the
>>>>> driver, and we only read each stat value from either SELF or PARENT.
>>>>>
>>>>> Nuno
>>>>
>>>> Thank you for that explanation, it sorta makes sense.
>>>>
>>>> I think it'd be better if this patch is part of the series that exposes the stats
>>>> to userspace, so that it can be reviewed in context with the rest of the code in
>>>> the driver that manages the pick-and-choose of a stat value from the SELF/PARENT
>>>> page.
>>>>
>>> Good idea, I think I'll do that. Thanks!
>>>
>>>> Unless there's an active problem now in the upstream kernel that this patch solves?
>>>> i.e. are the versions of the hypervisor that don't support the PARENT stats
>>>> page available in the wild?
>>>>
>>> I thought there was, but on reflection, no it doesn't solve a problem that exists in
>>> the code today.
>>>
>>> Nuno
>>>
>>
>> The usecases for stats exposed by the hypervisor are:
>> 1) used within the kernel by root scheduler
>> 2) exposed to userspace via debugfs.
>>
>> I thought we are addressing the first use-case here (patch1 in this series). If root scheduler support was upstreamed then this patchset does solve a problem in upstream code.
>>
> 
> Sorry about the long context, I couldn't figure out a good spot to snip.
> 
> Thanks for calling that out. I think root scheduler support has been upstreamed if I'm reading
> root_scheduler_init() right, Nuno can confirm.
> 
Correct. But the user of the PARENT == SELF workaround is L1VH, which doesn't support root
scheduler today. So technically the fix doesn't do anything if we're just talking about the stats
mapping used by the root scheduler code.

> With the series applied, I don't see any of the code that picks and chooses the stats from
> the PARENT/SELF pages even for the kernel case, rather there's a straight memcpy of the
> stats pages in the create_vp ioctl.
> 
This memcpy:
memcpy(vp->vp_stats_pages, stats_pages, sizeof(stats_pages));

Is not copying the contents of the pages. It's copying array of pointers to the SELF and PARENT
pages:

struct mshv_vp{
/* ... */
	struct hv_stats_page *vp_stats_pages[2];
/* ... */
};

> Is the kernel usage with the root scheduler somehow immune from the problem userspace would
> face with the duplicated pages? If not, I'd say that it's an argument for dropping patch 1
> and 2, instead of including patch 2, or to fold them together. We don't want a state in the
> upstream kernel where a commit introduces a known problem just to be solved by the following
> commit.
> 
Yes, it's immune. Nothing is being accumulated and therefore double-counted. There's a signal
value in the stats page which is used to determine if the dispatch thread is blocked. It's
just a boolean. See mshv_vp_dispatch_thread_blocked().

Thanks
Nuno

> Thanks,
> Easwar (he/him)
Re: [PATCH 2/6] mshv: Ignore second stats page map result failure
Posted by Praveen K Paladugu 3 weeks, 6 days ago

On 8/28/2025 7:43 PM, Nuno Das Neves wrote:
> From: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com>
> 
> Some versions of the hypervisor do not support HV_STATUS_AREA_PARENT and
> return HV_STATUS_INVALID_PARAMETER for the second stats page mapping
> request.
>
Is this behavior limited to VP stats? Or does it extend to other
stats (hypervisor, partition, etc) as well?

> This results a failure in module init. Instead of failing, gracefully
nit: s/This results in a failure during module init/> fall back to 
populating stats_pages[HV_STATS_AREA_PARENT] with the
> already-mapped stats_pages[HV_STATS_AREA_SELF].
> 
> Signed-off-by: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com>
> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
> ---
>   drivers/hv/mshv_root_hv_call.c | 43 ++++++++++++++++++++++++++++++----
>   drivers/hv/mshv_root_main.c    |  3 +++
>   2 files changed, 42 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/hv/mshv_root_hv_call.c b/drivers/hv/mshv_root_hv_call.c
> index c9c274f29c3c..1c38576a673c 100644
> --- a/drivers/hv/mshv_root_hv_call.c
> +++ b/drivers/hv/mshv_root_hv_call.c
> @@ -724,6 +724,24 @@ hv_call_notify_port_ring_empty(u32 sint_index)
>   	return hv_result_to_errno(status);
>   }
>   
> +static int
> +hv_stats_get_area_type(enum hv_stats_object_type type,
> +		       const union hv_stats_object_identity *identity)
> +{
> +	switch (type) {
> +	case HV_STATS_OBJECT_HYPERVISOR:
> +		return identity->hv.stats_area_type;
> +	case HV_STATS_OBJECT_LOGICAL_PROCESSOR:
> +		return identity->lp.stats_area_type;
> +	case HV_STATS_OBJECT_PARTITION:
> +		return identity->partition.stats_area_type;
> +	case HV_STATS_OBJECT_VP:
> +		return identity->vp.stats_area_type;
> +	}
> +
> +	return -EINVAL;
> +}
> +
>   int hv_call_map_stat_page(enum hv_stats_object_type type,
>   			  const union hv_stats_object_identity *identity,
>   			  void **addr)
> @@ -732,7 +750,7 @@ int hv_call_map_stat_page(enum hv_stats_object_type type,
>   	struct hv_input_map_stats_page *input;
>   	struct hv_output_map_stats_page *output;
>   	u64 status, pfn;
> -	int ret = 0;
> +	int hv_status, ret = 0;
>   
>   	do {
>   		local_irq_save(flags);
> @@ -747,11 +765,28 @@ int hv_call_map_stat_page(enum hv_stats_object_type type,
>   		pfn = output->map_location;
>   
>   		local_irq_restore(flags);
> -		if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
> -			ret = hv_result_to_errno(status);
> +
> +		hv_status = hv_result(status);
> +		if (hv_status != HV_STATUS_INSUFFICIENT_MEMORY) {
>   			if (hv_result_success(status))
>   				break;
> -			return ret;
> +
> +			/*
> +			 * Some versions of the hypervisor do not support the
> +			 * PARENT stats area. In this case return "success" but
> +			 * set the page to NULL. The caller checks for this
> +			 * case instead just uses the SELF area.
> +			 */
> +			if (hv_stats_get_area_type(type, identity) == HV_STATS_AREA_PARENT &&
> +			    hv_status == HV_STATUS_INVALID_PARAMETER) {
> +				pr_debug_once("%s: PARENT area type is unsupported\n",
> +					      __func__);
> +				*addr = NULL;
> +				return 0;
> +			}
> +
> +			hv_status_debug(status, "\n");
> +			return hv_result_to_errno(status);
>   		}
>   
>   		ret = hv_call_deposit_pages(NUMA_NO_NODE,
> diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
> index bbdefe8a2e9c..56ababab57ce 100644
> --- a/drivers/hv/mshv_root_main.c
> +++ b/drivers/hv/mshv_root_main.c
> @@ -929,6 +929,9 @@ static int mshv_vp_stats_map(u64 partition_id, u32 vp_index,
>   	if (err)
>   		goto unmap_self;
>   
> +	if (!stats_pages[HV_STATS_AREA_PARENT])
> +		stats_pages[HV_STATS_AREA_PARENT] = stats_pages[HV_STATS_AREA_SELF];
> +
>   	return 0;
>   
>   unmap_self:

-- 
Regards,
Praveen K Paladugu
Re: [PATCH 2/6] mshv: Ignore second stats page map result failure
Posted by Nuno Das Neves 3 weeks, 6 days ago
On 9/5/2025 8:31 AM, Praveen K Paladugu wrote:
> 
> 
> On 8/28/2025 7:43 PM, Nuno Das Neves wrote:
>> From: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com>
>>
>> Some versions of the hypervisor do not support HV_STATUS_AREA_PARENT and
>> return HV_STATUS_INVALID_PARAMETER for the second stats page mapping
>> request.
>>
> Is this behavior limited to VP stats? Or does it extend to other
> stats (hypervisor, partition, etc) as well?
> 
In practice we will only need to worry about partition and VP.

In the current code in hyperv-next, it's only VP stats. Upcoming patches
to add debugfs code will also need it for partition stats.

>> This results a failure in module init. Instead of failing, gracefully
> nit: s/This results in a failure during module init/

Thanks, I'll change it for v2

Nuno

>> fall back to populating stats_pages[HV_STATS_AREA_PARENT] with the
>> already-mapped stats_pages[HV_STATS_AREA_SELF].
>>
>> Signed-off-by: Purna Pavan Chandra Aekkaladevi <paekkaladevi@linux.microsoft.com>
>> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
>> ---
>>   drivers/hv/mshv_root_hv_call.c | 43 ++++++++++++++++++++++++++++++----
>>   drivers/hv/mshv_root_main.c    |  3 +++
>>   2 files changed, 42 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/hv/mshv_root_hv_call.c b/drivers/hv/mshv_root_hv_call.c
>> index c9c274f29c3c..1c38576a673c 100644
>> --- a/drivers/hv/mshv_root_hv_call.c
>> +++ b/drivers/hv/mshv_root_hv_call.c
>> @@ -724,6 +724,24 @@ hv_call_notify_port_ring_empty(u32 sint_index)
>>       return hv_result_to_errno(status);
>>   }
>>   +static int
>> +hv_stats_get_area_type(enum hv_stats_object_type type,
>> +               const union hv_stats_object_identity *identity)
>> +{
>> +    switch (type) {
>> +    case HV_STATS_OBJECT_HYPERVISOR:
>> +        return identity->hv.stats_area_type;
>> +    case HV_STATS_OBJECT_LOGICAL_PROCESSOR:
>> +        return identity->lp.stats_area_type;
>> +    case HV_STATS_OBJECT_PARTITION:
>> +        return identity->partition.stats_area_type;
>> +    case HV_STATS_OBJECT_VP:
>> +        return identity->vp.stats_area_type;
>> +    }
>> +
>> +    return -EINVAL;
>> +}
>> +
>>   int hv_call_map_stat_page(enum hv_stats_object_type type,
>>                 const union hv_stats_object_identity *identity,
>>                 void **addr)
>> @@ -732,7 +750,7 @@ int hv_call_map_stat_page(enum hv_stats_object_type type,
>>       struct hv_input_map_stats_page *input;
>>       struct hv_output_map_stats_page *output;
>>       u64 status, pfn;
>> -    int ret = 0;
>> +    int hv_status, ret = 0;
>>         do {
>>           local_irq_save(flags);
>> @@ -747,11 +765,28 @@ int hv_call_map_stat_page(enum hv_stats_object_type type,
>>           pfn = output->map_location;
>>             local_irq_restore(flags);
>> -        if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
>> -            ret = hv_result_to_errno(status);
>> +
>> +        hv_status = hv_result(status);
>> +        if (hv_status != HV_STATUS_INSUFFICIENT_MEMORY) {
>>               if (hv_result_success(status))
>>                   break;
>> -            return ret;
>> +
>> +            /*
>> +             * Some versions of the hypervisor do not support the
>> +             * PARENT stats area. In this case return "success" but
>> +             * set the page to NULL. The caller checks for this
>> +             * case instead just uses the SELF area.
>> +             */
>> +            if (hv_stats_get_area_type(type, identity) == HV_STATS_AREA_PARENT &&
>> +                hv_status == HV_STATUS_INVALID_PARAMETER) {
>> +                pr_debug_once("%s: PARENT area type is unsupported\n",
>> +                          __func__);
>> +                *addr = NULL;
>> +                return 0;
>> +            }
>> +
>> +            hv_status_debug(status, "\n");
>> +            return hv_result_to_errno(status);
>>           }
>>             ret = hv_call_deposit_pages(NUMA_NO_NODE,
>> diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
>> index bbdefe8a2e9c..56ababab57ce 100644
>> --- a/drivers/hv/mshv_root_main.c
>> +++ b/drivers/hv/mshv_root_main.c
>> @@ -929,6 +929,9 @@ static int mshv_vp_stats_map(u64 partition_id, u32 vp_index,
>>       if (err)
>>           goto unmap_self;
>>   +    if (!stats_pages[HV_STATS_AREA_PARENT])
>> +        stats_pages[HV_STATS_AREA_PARENT] = stats_pages[HV_STATS_AREA_SELF];
>> +
>>       return 0;
>>     unmap_self:
>