[PATCH v5 01/10] hyperv: Convert Hyper-V status codes to strings

Nuno Das Neves posted 10 patches 11 months, 2 weeks ago
There is a newer version of this series
[PATCH v5 01/10] hyperv: Convert Hyper-V status codes to strings
Posted by Nuno Das Neves 11 months, 2 weeks ago
Introduce hv_result_to_string() for this purpose. This allows
hypercall failures to be debugged more easily with dmesg.

Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
---
 drivers/hv/hv_common.c         | 65 ++++++++++++++++++++++++++++++++++
 drivers/hv/hv_proc.c           | 13 ++++---
 include/asm-generic/mshyperv.h |  1 +
 3 files changed, 74 insertions(+), 5 deletions(-)

diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index 9804adb4cc56..ce20818688fe 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -740,3 +740,68 @@ void hv_identify_partition_type(void)
 			pr_crit("Hyper-V: CONFIG_MSHV_ROOT not enabled!\n");
 	}
 }
+
+const char *hv_result_to_string(u64 hv_status)
+{
+	switch (hv_result(hv_status)) {
+	case HV_STATUS_SUCCESS:
+		return "HV_STATUS_SUCCESS";
+	case HV_STATUS_INVALID_HYPERCALL_CODE:
+		return "HV_STATUS_INVALID_HYPERCALL_CODE";
+	case HV_STATUS_INVALID_HYPERCALL_INPUT:
+		return "HV_STATUS_INVALID_HYPERCALL_INPUT";
+	case HV_STATUS_INVALID_ALIGNMENT:
+		return "HV_STATUS_INVALID_ALIGNMENT";
+	case HV_STATUS_INVALID_PARAMETER:
+		return "HV_STATUS_INVALID_PARAMETER";
+	case HV_STATUS_ACCESS_DENIED:
+		return "HV_STATUS_ACCESS_DENIED";
+	case HV_STATUS_INVALID_PARTITION_STATE:
+		return "HV_STATUS_INVALID_PARTITION_STATE";
+	case HV_STATUS_OPERATION_DENIED:
+		return "HV_STATUS_OPERATION_DENIED";
+	case HV_STATUS_UNKNOWN_PROPERTY:
+		return "HV_STATUS_UNKNOWN_PROPERTY";
+	case HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE:
+		return "HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE";
+	case HV_STATUS_INSUFFICIENT_MEMORY:
+		return "HV_STATUS_INSUFFICIENT_MEMORY";
+	case HV_STATUS_INVALID_PARTITION_ID:
+		return "HV_STATUS_INVALID_PARTITION_ID";
+	case HV_STATUS_INVALID_VP_INDEX:
+		return "HV_STATUS_INVALID_VP_INDEX";
+	case HV_STATUS_NOT_FOUND:
+		return "HV_STATUS_NOT_FOUND";
+	case HV_STATUS_INVALID_PORT_ID:
+		return "HV_STATUS_INVALID_PORT_ID";
+	case HV_STATUS_INVALID_CONNECTION_ID:
+		return "HV_STATUS_INVALID_CONNECTION_ID";
+	case HV_STATUS_INSUFFICIENT_BUFFERS:
+		return "HV_STATUS_INSUFFICIENT_BUFFERS";
+	case HV_STATUS_NOT_ACKNOWLEDGED:
+		return "HV_STATUS_NOT_ACKNOWLEDGED";
+	case HV_STATUS_INVALID_VP_STATE:
+		return "HV_STATUS_INVALID_VP_STATE";
+	case HV_STATUS_NO_RESOURCES:
+		return "HV_STATUS_NO_RESOURCES";
+	case HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED:
+		return "HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED";
+	case HV_STATUS_INVALID_LP_INDEX:
+		return "HV_STATUS_INVALID_LP_INDEX";
+	case HV_STATUS_INVALID_REGISTER_VALUE:
+		return "HV_STATUS_INVALID_REGISTER_VALUE";
+	case HV_STATUS_OPERATION_FAILED:
+		return "HV_STATUS_OPERATION_FAILED";
+	case HV_STATUS_TIME_OUT:
+		return "HV_STATUS_TIME_OUT";
+	case HV_STATUS_CALL_PENDING:
+		return "HV_STATUS_CALL_PENDING";
+	case HV_STATUS_VTL_ALREADY_ENABLED:
+		return "HV_STATUS_VTL_ALREADY_ENABLED";
+	default:
+		return "Unknown";
+	};
+	return "Unknown";
+}
+EXPORT_SYMBOL_GPL(hv_result_to_string);
+
diff --git a/drivers/hv/hv_proc.c b/drivers/hv/hv_proc.c
index 2fae18e4f7d2..8fc30f509fa7 100644
--- a/drivers/hv/hv_proc.c
+++ b/drivers/hv/hv_proc.c
@@ -87,7 +87,8 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
 				     page_count, 0, input_page, NULL);
 	local_irq_restore(flags);
 	if (!hv_result_success(status)) {
-		pr_err("Failed to deposit pages: %lld\n", status);
+		pr_err("%s: Failed to deposit pages: %s\n", __func__,
+		       hv_result_to_string(status));
 		ret = hv_result_to_errno(status);
 		goto err_free_allocations;
 	}
@@ -137,8 +138,9 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
 
 		if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
 			if (!hv_result_success(status)) {
-				pr_err("%s: cpu %u apic ID %u, %lld\n", __func__,
-				       lp_index, apic_id, status);
+				pr_err("%s: cpu %u apic ID %u, %s\n",
+				       __func__, lp_index, apic_id,
+				       hv_result_to_string(status));
 				ret = hv_result_to_errno(status);
 			}
 			break;
@@ -179,8 +181,9 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
 
 		if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
 			if (!hv_result_success(status)) {
-				pr_err("%s: vcpu %u, lp %u, %lld\n", __func__,
-				       vp_index, flags, status);
+				pr_err("%s: vcpu %u, lp %u, %s\n",
+				       __func__, vp_index, flags,
+				       hv_result_to_string(status));
 				ret = hv_result_to_errno(status);
 			}
 			break;
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index b13b0cda4ac8..dc4729dba9ef 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -298,6 +298,7 @@ static inline int cpumask_to_vpset_skip(struct hv_vpset *vpset,
 	return __cpumask_to_vpset(vpset, cpus, func);
 }
 
+const char *hv_result_to_string(u64 hv_status);
 int hv_result_to_errno(u64 status);
 void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die);
 bool hv_is_hyperv_initialized(void);
-- 
2.34.1
Re: [PATCH v5 01/10] hyperv: Convert Hyper-V status codes to strings
Posted by Roman Kisel 11 months, 2 weeks ago

On 2/26/2025 3:07 PM, Nuno Das Neves wrote:

[...]

> +
> +const char *hv_result_to_string(u64 hv_status)
> +{
> +	switch (hv_result(hv_status)) {

[...]

> +		return "HV_STATUS_VTL_ALREADY_ENABLED";
> +	default:
> +		return "Unknown";
> +	};
> +	return "Unknown";
> +}
> +EXPORT_SYMBOL_GPL(hv_result_to_string);

Should we remove this and output the hexadecimal error code in ~3 places
this function is used?

The "Unknown" part would make debugging harder actually when something
fails. I presume that the mainstream scenarios all work, and it is the
edge cases that might fail, and these are likelier to produce "Unknown".

Folks who actually debug failed hypercalls rarely have issues with
looking up the error code, and printing "Unknown" to the log is worse
than a hexadecimal. Like even the people who wrote the code got nothing
to say about what is going on.

-- 
Thank you,
Roman
Re: [PATCH v5 01/10] hyperv: Convert Hyper-V status codes to strings
Posted by Nuno Das Neves 11 months, 2 weeks ago
On 2/27/2025 9:02 AM, Roman Kisel wrote:
> 
> 
> On 2/26/2025 3:07 PM, Nuno Das Neves wrote:
> 
> [...]
> 
>> +
>> +const char *hv_result_to_string(u64 hv_status)
>> +{
>> +    switch (hv_result(hv_status)) {
> 
> [...]
> 
>> +        return "HV_STATUS_VTL_ALREADY_ENABLED";
>> +    default:
>> +        return "Unknown";
>> +    };
>> +    return "Unknown";
>> +}
>> +EXPORT_SYMBOL_GPL(hv_result_to_string);
> 
> Should we remove this and output the hexadecimal error code in ~3 places
> this function is used?
> 
I guess you're implying it's not worth adding such a function for only a
few places in the code? That is a good point, and a bit of an oversight
on my part while editing this series. Originally all the hypercall helper
functions in the driver code (10+ places) used this function as well, but
I removed those printks_()s as a temporary solution to limit the use of
printk in the driver code (as opposed to dev_printk() which is preferred).

I didn't think to remove *this* patch as a result of that change!
I do want to figure out a good way to add that logging back to the hypercall
helpers, so I do want to try and get some form of this patch in to aid
debugging hypercalls - it has been very very useful over time.

> The "Unknown" part would make debugging harder actually when something
> fails. I presume that the mainstream scenarios all work, and it is the
> edge cases that might fail, and these are likelier to produce "Unknown".
> 
That is a very good point. Ideally, we could log "Unknown" along with
the hex code instead of replacing it.

What do you think about keeping this function, but instead of using it
directly, introduce a "standard" way for logging hypercall errors which
can hopefully be used everywhere in the kernel?
e.g. a simple macro:
#define hv_hvcall_err(control, status)
do {
	u64 ___status = (status);
	pr_err("Hypercall: %#x err: %#x : %s", (control) & 0xFFFF, hv_result(___status), hv_result_to_string(___status));
} while (0)

I feel like this is the best of both worlds, and actually makes it even
easier to do this logging everywhere it is wanted (for me, that includes
all the /dev/mshv-related hypercalls).
We could add strings for the HVCALL_ values too, and/or include __func__
in the macro to aid in finding the context it was used in.

> Folks who actually debug failed hypercalls rarely have issues with
> looking up the error code, and printing "Unknown" to the log is worse
> than a hexadecimal. Like even the people who wrote the code got nothing
> to say about what is going on.
> 
Yep, totally agree having the hex code available can be valuable in
unexpected situations.

Re: [PATCH v5 01/10] hyperv: Convert Hyper-V status codes to strings
Posted by Roman Kisel 11 months, 2 weeks ago

On 2/27/2025 4:15 PM, Nuno Das Neves wrote:
> On 2/27/2025 9:02 AM, Roman Kisel wrote:

[...]

> I guess you're implying it's not worth adding such a function for only a
> few places in the code? That is a good point, and a bit of an oversight
> on my part while editing this series. Originally all the hypercall helper
> functions in the driver code (10+ places) used this function as well, but
> I removed those printks_()s as a temporary solution to limit the use of
> printk in the driver code (as opposed to dev_printk() which is preferred).
> 
> I didn't think to remove *this* patch as a result of that change!
> I do want to figure out a good way to add that logging back to the hypercall
> helpers, so I do want to try and get some form of this patch in to aid
> debugging hypercalls - it has been very very useful over time.
> 

Right, I thought that the function looked more as a bring-up aid rather
than a full fledged solution to some problem.

>> The "Unknown" part would make debugging harder actually when something
>> fails. I presume that the mainstream scenarios all work, and it is the
>> edge cases that might fail, and these are likelier to produce "Unknown".
>>
> That is a very good point. Ideally, we could log "Unknown" along with
> the hex code instead of replacing it.
> 
> What do you think about keeping this function, but instead of using it
> directly, introduce a "standard" way for logging hypercall errors which
> can hopefully be used everywhere in the kernel?
> e.g. a simple macro:
> #define hv_hvcall_err(control, status)
> do {
> 	u64 ___status = (status);
> 	pr_err("Hypercall: %#x err: %#x : %s", (control) & 0xFFFF, hv_result(___status), hv_result_to_string(___status));
> } while (0)
> 
> I feel like this is the best of both worlds, and actually makes it even
> easier to do this logging everywhere it is wanted (for me, that includes
> all the /dev/mshv-related hypercalls).
> We could add strings for the HVCALL_ values too, and/or include __func__
> in the macro to aid in finding the context it was used in.
> 

That doesn’t seem to be common in the kernel from what I’ve seen in 
dmesg, although there is certainly a lot of appeal in that approach. 
However, we will have to remember to update the function each time when 
another status code is added not to leave things half-cooked.

Also it is a bit surprising the *kernel* should report that rather than 
the VMM from the user mode. E.g. the kernel does not report all errors 
on file open, file seek, etc. As I understand, the hv status codes are
later mapped to errno in a lossy manner, and errno is what the user mode
receives?

As long as the hex code is logged, I am fine with the change.

>> Folks who actually debug failed hypercalls rarely have issues with
>> looking up the error code, and printing "Unknown" to the log is worse
>> than a hexadecimal. Like even the people who wrote the code got nothing
>> to say about what is going on.
>>
> Yep, totally agree having the hex code available can be valuable in
> unexpected situations.
> 

Appreciate giving my concerns a thorough consideration!

-- 
Thank you,
Roman

Re: [PATCH v5 01/10] hyperv: Convert Hyper-V status codes to strings
Posted by Easwar Hariharan 11 months, 2 weeks ago
On 2/27/2025 9:02 AM, Roman Kisel wrote:
> 
> 
> On 2/26/2025 3:07 PM, Nuno Das Neves wrote:
> 
> [...]
> 
>> +
>> +const char *hv_result_to_string(u64 hv_status)
>> +{
>> +    switch (hv_result(hv_status)) {
> 
> [...]
> 
>> +        return "HV_STATUS_VTL_ALREADY_ENABLED";
>> +    default:
>> +        return "Unknown";
>> +    };
>> +    return "Unknown";
>> +}
>> +EXPORT_SYMBOL_GPL(hv_result_to_string);
> 
> Should we remove this and output the hexadecimal error code in ~3 places
> this function is used?
> 
> The "Unknown" part would make debugging harder actually when something
> fails. I presume that the mainstream scenarios all work, and it is the
> edge cases that might fail, and these are likelier to produce "Unknown".
> 
> Folks who actually debug failed hypercalls rarely have issues with
> looking up the error code, and printing "Unknown" to the log is worse
> than a hexadecimal. Like even the people who wrote the code got nothing
> to say about what is going on.
> 

Sorry, I have to disagree with this, a recent commit of mine[1] closed a WSL
issue that was open for over 2 years for, partly, the utter uselessness of
the hex return code of the hypercall.

[1] https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d2138eab8cde61e0e6f62d0713e45202e8457d6d

Thanks,
Easwar (he/him)
Re: [PATCH v5 01/10] hyperv: Convert Hyper-V status codes to strings
Posted by Roman Kisel 11 months, 2 weeks ago

On 2/27/2025 2:54 PM, Easwar Hariharan wrote:
> On 2/27/2025 9:02 AM, Roman Kisel wrote:

[...]

> 
> Sorry, I have to disagree with this, a recent commit of mine[1] closed a WSL
> issue that was open for over 2 years for, partly, the utter uselessness of
> the hex return code of the hypercall.

What hypercall was that? I see

		storvsc_log_ratelimited(device, loglevel,
			"tag#%d cmd 0x%x status: scsi 0x%x srb 0x%x hv 0x%x\n",
			scsi_cmd_to_rq(request->cmd)->tag,
			stor_pkt->vm_srb.cdb[0],
			vstor_packet->vm_srb.scsi_status,
			vstor_packet->vm_srb.srb_status,
			vstor_packet->status);

in your patch where `vstor_packet->status` is claimed to be a hypercall
status? I'd be surprised if the hypervisor concerned itself with
the details of visualized SCSI storage. The VMM on the host might and
should.

I'll look through the code to gain more confidence in my suspicion that
calling the SCSI virt storage packet status a hv status causd the
frustration with debugging, and if no counter examples found, will send
a patch to fix that log statement above.

> 
> [1] https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d2138eab8cde61e0e6f62d0713e45202e8457d6d
> 
> Thanks,
> Easwar (he/him)

-- 
Thank you,
Roman
Re: [PATCH v5 01/10] hyperv: Convert Hyper-V status codes to strings
Posted by Roman Kisel 11 months, 2 weeks ago

On 2/27/2025 2:54 PM, Easwar Hariharan wrote:
[...]

> 
> Sorry, I have to disagree with this, a recent commit of mine[1] closed a WSL
> issue that was open for over 2 years for, partly, the utter uselessness of
> the hex return code of the hypercall.

Thanks for your efforts, and sorry to hear you had a frustrating
debugging experience (sounds like it).

Would be great to learn the details to understand how this function is
going to improve the situation:

1. How come the hex error code was useless, what is not matching
    anything in the Linux headers?
2. How having "Unknown" in the log can possibly be better?
3. Given that the select hv status codes and the proposed strings have
    1:1 correspondence, and there is the 1:N catch-all case for the
    "Unknown", how's that better?

> 
> [1] https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d2138eab8cde61e0e6f62d0713e45202e8457d6d
> 
> Thanks,
> Easwar (he/him)

-- 
Thank you,
Roman
Re: [PATCH v5 01/10] hyperv: Convert Hyper-V status codes to strings
Posted by Easwar Hariharan 11 months, 2 weeks ago
On 2/27/2025 3:08 PM, Roman Kisel wrote:
> 
> 
> On 2/27/2025 2:54 PM, Easwar Hariharan wrote:
> [...]
> 
>>
>> Sorry, I have to disagree with this, a recent commit of mine[1] closed a WSL
>> issue that was open for over 2 years for, partly, the utter uselessness of
>> the hex return code of the hypercall.
> 
> Thanks for your efforts, and sorry to hear you had a frustrating
> debugging experience (sounds like it).

TBF, I didn't personally struggle with it for 2 years, IMHO, it was the opaqueness
of what the value meant that contributed to user pain.

> 
> Would be great to learn the details to understand how this function is
> going to improve the situation:
> 
> 1. How come the hex error code was useless, what is not matching
>    anything in the Linux headers?

It doesn't match anything in the Linux headers, but it's an NTSTATUS, not HVSTATUS.

Coming from the PoV of a user, it would be a much more useful message to see:

[  249.512760] hv_storvsc fd1d2cbd-ce7c-535c-966b-eb5f811c95f0: tag#683 cmd 0x28 status: scsi 0x2 srb 0x4 hv STATUS_UNSUCCESSFUL

than 

[  249.512760] hv_storvsc fd1d2cbd-ce7c-535c-966b-eb5f811c95f0: tag#683 cmd 0x28 status: scsi 0x2 srb 0x4 hv 0xc0000001

> 2. How having "Unknown" in the log can possibly be better?

IMHO, seeing "Unknown" in an error report means that there's a new return value
that needs to be mapped to errno in hv_status_to_errno() and updated here as well.

> 3. Given that the select hv status codes and the proposed strings have
>    1:1 correspondence, and there is the 1:N catch-all case for the
>    "Unknown", how's that better?
> 

I didn't really follow this question, but I suppose the answer to Q2 answers this as
well. If not, please expand and I'll try to answer.

Thanks,
Easwar (he/him)
Re: [PATCH v5 01/10] hyperv: Convert Hyper-V status codes to strings
Posted by Roman Kisel 11 months, 2 weeks ago

On 2/27/2025 3:25 PM, Easwar Hariharan wrote:
> On 2/27/2025 3:08 PM, Roman Kisel wrote:

[...]

>> Would be great to learn the details to understand how this function is
>> going to improve the situation:
>>
>> 1. How come the hex error code was useless, what is not matching
>>     anything in the Linux headers?
> 
> It doesn't match anything in the Linux headers, but it's an NTSTATUS, not HVSTATUS.
> 

That is what it looks like from the code, I posted the details in the
parallel thread.

Here is a fix:
https://lore.kernel.org/linux-hyperv/20250227233110.36596-1-romank@linux.microsoft.com/

Also I think the commit description in your patch

https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d2138eab8cde61e0e6f62d0713e45202e8457d6d

conflates the hypervisor (ours runs bare-metal, Type 1) and the VMMs
(Virtual Machine Monitors)+VSPs (Virtual Service Providers, e.g StorVSP
that implements SCSI) running in the host/root/dom0 partition.

> Coming from the PoV of a user, it would be a much more useful message to see:
> 
> [  249.512760] hv_storvsc fd1d2cbd-ce7c-535c-966b-eb5f811c95f0: tag#683 cmd 0x28 status: scsi 0x2 srb 0x4 hv STATUS_UNSUCCESSFUL
> 
> than
> 
> [  249.512760] hv_storvsc fd1d2cbd-ce7c-535c-966b-eb5f811c95f0: tag#683 cmd 0x28 status: scsi 0x2 srb 0x4 hv 0xc0000001
> 

It is likely that the PoV of a user that you've mentioned is actually
a PoV of a (kernel) developer. It is hard to imagine that folks running
web sites, DB servers, LoBs, LLMs, etc. in Hyper-V VMs care about the
lowest software level of the virt stack in the form of the symbolic
name or the hex code. They need their VMs to be reliable or suggest
what the user may try if a configuration error is suspected.

To make the error log message useful to the user, the message should
mention ways of remediation or at least hint what might've gotten
wedged. Without that, that's only useful for the people who work with
the kernel code proper or the kernel interface to the user land.

So I'd think that the hex error codes from the hypervisor give the user
exactly as much as the error symbolic names do to get the system to the
desired state: nothing. Even less when the error reported "Unknown" :)

>> 2. How having "Unknown" in the log can possibly be better?
> 
> IMHO, seeing "Unknown" in an error report means that there's a new return value
> that needs to be mapped to errno in hv_status_to_errno() and updated here as well.
> 

It means that to the developer. To the user, it means the developers
messed something up and to make matters even worse they didn't leave any
breadcrumbs (e.g. the hex code) to see what's wrong to help the user and
themselves: there is just that "Unknown" thing in the log.

>> 3. Given that the select hv status codes and the proposed strings have
>>     1:1 correspondence, and there is the 1:N catch-all case for the
>>     "Unknown", how's that better?
>>
> 
> I didn't really follow this question, but I suppose the answer to Q2 answers this as
> well. If not, please expand and I'll try to answer.
>

Sorry about that chunk, hit "Send" without looking the e-mail over
another time. Appreciate the discussion very much!


> Thanks,
> Easwar (he/him)

-- 
Thank you,
Roman

Re: [PATCH v5 01/10] hyperv: Convert Hyper-V status codes to strings
Posted by Easwar Hariharan 11 months, 2 weeks ago
On 2/28/2025 9:20 AM, Roman Kisel wrote:
> 
> 
> On 2/27/2025 3:25 PM, Easwar Hariharan wrote:
>> On 2/27/2025 3:08 PM, Roman Kisel wrote:
> 
> [...]
> 
>>> Would be great to learn the details to understand how this function is
>>> going to improve the situation:
>>>
>>> 1. How come the hex error code was useless, what is not matching
>>>     anything in the Linux headers?
>>
>> It doesn't match anything in the Linux headers, but it's an NTSTATUS, not HVSTATUS.
>>
> 
> That is what it looks like from the code, I posted the details in the
> parallel thread.
> 
> Here is a fix:
> https://lore.kernel.org/linux-hyperv/20250227233110.36596-1-romank@linux.microsoft.com/
> 
> Also I think the commit description in your patch
> 
> https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d2138eab8cde61e0e6f62d0713e45202e8457d6d
> 
> conflates the hypervisor (ours runs bare-metal, Type 1) and the VMMs
> (Virtual Machine Monitors)+VSPs (Virtual Service Providers, e.g StorVSP
> that implements SCSI) running in the host/root/dom0 partition.

Agreed, that was what I was led to believe, your patch would help with that
miscommunication, though not in its current form. See my review comment in that
thread.

> 
>> Coming from the PoV of a user, it would be a much more useful message to see:
>>
>> [  249.512760] hv_storvsc fd1d2cbd-ce7c-535c-966b-eb5f811c95f0: tag#683 cmd 0x28 status: scsi 0x2 srb 0x4 hv STATUS_UNSUCCESSFUL
>>
>> than
>>
>> [  249.512760] hv_storvsc fd1d2cbd-ce7c-535c-966b-eb5f811c95f0: tag#683 cmd 0x28 status: scsi 0x2 srb 0x4 hv 0xc0000001
>>
> 
> It is likely that the PoV of a user that you've mentioned is actually
> a PoV of a (kernel) developer.

Actually, no, it's PoV of the WSL users that are having the discussion in
the linked github issue. FWIW, that issue also occurred in Azure with multiple
incidents coming into our queue because of the unusable flood of error messages.

> It is hard to imagine that folks running
> web sites, DB servers, LoBs, LLMs, etc. in Hyper-V VMs care about the
> lowest software level of the virt stack in the form of the symbolic
> name or the hex code. They need their VMs to be reliable or suggest
> what the user may try if a configuration error is suspected.
> 
> To make the error log message useful to the user, the message should
> mention ways of remediation or at least hint what might've gotten
> wedged. Without that, that's only useful for the people who work with
> the kernel code proper or the kernel interface to the user land.

There's a step between seeing the issue and fixing it that you're missing,
i.e. the reporting.

An issue that says "flood of hv_storvsc errors reporting status
unsuccessful" is better than the same without that status information:
https://github.com/microsoft/WSL/issues/9173

> 
> So I'd think that the hex error codes from the hypervisor give the user
> exactly as much as the error symbolic names do to get the system to the
> desired state: nothing. 
I continue to disagree, seeing HV_STATUS_NO_RESOURCES is better than 0x1D,
because the user may think to look at `top` or `free -h` or similar to see
what could be killed to improve the situation.

> Even less when the error reported "Unknown" :)

I agree on the uselessness of "Unknown" to the user, except as already mentioned
below, as a prompt for the code to be updated.

> 
>>> 2. How having "Unknown" in the log can possibly be better?
>>
>> IMHO, seeing "Unknown" in an error report means that there's a new return value
>> that needs to be mapped to errno in hv_status_to_errno() and updated here as well.
>>
> 
> It means that to the developer. To the user, it means the developers
> messed something up and to make matters even worse they didn't leave any
> breadcrumbs (e.g. the hex code) to see what's wrong to help the user and
> themselves: there is just that "Unknown" thing in the log.

I think Nuno's compromise addresses this very well, to also print the hex code.

> 
>>> 3. Given that the select hv status codes and the proposed strings have
>>>     1:1 correspondence, and there is the 1:N catch-all case for the
>>>     "Unknown", how's that better?
>>>
>>
>> I didn't really follow this question, but I suppose the answer to Q2 answers this as
>> well. If not, please expand and I'll try to answer.
>>
> 
> Sorry about that chunk, hit "Send" without looking the e-mail over
> another time. Appreciate the discussion very much!
> 
> 
>> Thanks,
>> Easwar (he/him)
> 

Re: [PATCH v5 01/10] hyperv: Convert Hyper-V status codes to strings
Posted by Roman Kisel 11 months, 2 weeks ago

On 2/28/2025 12:22 PM, Easwar Hariharan wrote:
> On 2/28/2025 9:20 AM, Roman Kisel wrote:
>>

[...]

>>
>> So I'd think that the hex error codes from the hypervisor give the user
>> exactly as much as the error symbolic names do to get the system to the
>> desired state: nothing.
> I continue to disagree, seeing HV_STATUS_NO_RESOURCES is better than 0x1D,
> because the user may think to look at `top` or `free -h` or similar to see
> what could be killed to improve the situation.
> 

I agree that the symbolic name might save the step of looking up the
error code in the headers. Now, the next step depends on how much the
user is into virt technologies (if at all). That is
to illustrate the point that a hint in the logs (or in the
Documentation) is crucial of what to do next.

The symbolic name might mislead; a hex code maybe with an addition of
"please look up what may fix this at <URL> or report the problem here
<URL>" would look better to _my imaginary_ customer :) That would be
as much friendly as possible, if the kernel needs to print any of that
at all. Likely the VMM in the user land if it gets that code as-is.

Thank you for the fair critique and the time!

[...]

>>> Thanks,
>>> Easwar (he/him)
>>
> 

-- 
Thank you,
Roman
Re: [PATCH v5 01/10] hyperv: Convert Hyper-V status codes to strings
Posted by Easwar Hariharan 11 months, 2 weeks ago
On 2/26/2025 3:07 PM, Nuno Das Neves wrote:
> Introduce hv_result_to_string() for this purpose. This allows
> hypercall failures to be debugged more easily with dmesg.
> 

Let the commit message stand on its own, i.e. state that hv_result_to_string()
is introduced to convert hyper-v status codes to string.

> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
> ---
>  drivers/hv/hv_common.c         | 65 ++++++++++++++++++++++++++++++++++
>  drivers/hv/hv_proc.c           | 13 ++++---
>  include/asm-generic/mshyperv.h |  1 +
>  3 files changed, 74 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
> index 9804adb4cc56..ce20818688fe 100644
> --- a/drivers/hv/hv_common.c
> +++ b/drivers/hv/hv_common.c
> @@ -740,3 +740,68 @@ void hv_identify_partition_type(void)
>  			pr_crit("Hyper-V: CONFIG_MSHV_ROOT not enabled!\n");
>  	}
>  }
> +
> +const char *hv_result_to_string(u64 hv_status)
> +{
> +	switch (hv_result(hv_status)) {
> +	case HV_STATUS_SUCCESS:
> +		return "HV_STATUS_SUCCESS";
> +	case HV_STATUS_INVALID_HYPERCALL_CODE:
> +		return "HV_STATUS_INVALID_HYPERCALL_CODE";
> +	case HV_STATUS_INVALID_HYPERCALL_INPUT:
> +		return "HV_STATUS_INVALID_HYPERCALL_INPUT";
> +	case HV_STATUS_INVALID_ALIGNMENT:
> +		return "HV_STATUS_INVALID_ALIGNMENT";
> +	case HV_STATUS_INVALID_PARAMETER:
> +		return "HV_STATUS_INVALID_PARAMETER";
> +	case HV_STATUS_ACCESS_DENIED:
> +		return "HV_STATUS_ACCESS_DENIED";
> +	case HV_STATUS_INVALID_PARTITION_STATE:
> +		return "HV_STATUS_INVALID_PARTITION_STATE";
> +	case HV_STATUS_OPERATION_DENIED:
> +		return "HV_STATUS_OPERATION_DENIED";
> +	case HV_STATUS_UNKNOWN_PROPERTY:
> +		return "HV_STATUS_UNKNOWN_PROPERTY";
> +	case HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE:
> +		return "HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE";
> +	case HV_STATUS_INSUFFICIENT_MEMORY:
> +		return "HV_STATUS_INSUFFICIENT_MEMORY";
> +	case HV_STATUS_INVALID_PARTITION_ID:
> +		return "HV_STATUS_INVALID_PARTITION_ID";
> +	case HV_STATUS_INVALID_VP_INDEX:
> +		return "HV_STATUS_INVALID_VP_INDEX";
> +	case HV_STATUS_NOT_FOUND:
> +		return "HV_STATUS_NOT_FOUND";
> +	case HV_STATUS_INVALID_PORT_ID:
> +		return "HV_STATUS_INVALID_PORT_ID";
> +	case HV_STATUS_INVALID_CONNECTION_ID:
> +		return "HV_STATUS_INVALID_CONNECTION_ID";
> +	case HV_STATUS_INSUFFICIENT_BUFFERS:
> +		return "HV_STATUS_INSUFFICIENT_BUFFERS";
> +	case HV_STATUS_NOT_ACKNOWLEDGED:
> +		return "HV_STATUS_NOT_ACKNOWLEDGED";
> +	case HV_STATUS_INVALID_VP_STATE:
> +		return "HV_STATUS_INVALID_VP_STATE";
> +	case HV_STATUS_NO_RESOURCES:
> +		return "HV_STATUS_NO_RESOURCES";
> +	case HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED:
> +		return "HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED";
> +	case HV_STATUS_INVALID_LP_INDEX:
> +		return "HV_STATUS_INVALID_LP_INDEX";
> +	case HV_STATUS_INVALID_REGISTER_VALUE:
> +		return "HV_STATUS_INVALID_REGISTER_VALUE";
> +	case HV_STATUS_OPERATION_FAILED:
> +		return "HV_STATUS_OPERATION_FAILED";
> +	case HV_STATUS_TIME_OUT:
> +		return "HV_STATUS_TIME_OUT";
> +	case HV_STATUS_CALL_PENDING:
> +		return "HV_STATUS_CALL_PENDING";
> +	case HV_STATUS_VTL_ALREADY_ENABLED:
> +		return "HV_STATUS_VTL_ALREADY_ENABLED";
> +	default:
> +		return "Unknown";
> +	};
> +	return "Unknown";

Unnecessary extra return since the default case already returns "Unknown"

> +}
> +EXPORT_SYMBOL_GPL(hv_result_to_string);
> +

Extra line here ^

> diff --git a/drivers/hv/hv_proc.c b/drivers/hv/hv_proc.c
> index 2fae18e4f7d2..8fc30f509fa7 100644
> --- a/drivers/hv/hv_proc.c
> +++ b/drivers/hv/hv_proc.c
> @@ -87,7 +87,8 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
>  				     page_count, 0, input_page, NULL);
>  	local_irq_restore(flags);
>  	if (!hv_result_success(status)) {
> -		pr_err("Failed to deposit pages: %lld\n", status);
> +		pr_err("%s: Failed to deposit pages: %s\n", __func__,
> +		       hv_result_to_string(status));
>  		ret = hv_result_to_errno(status);
>  		goto err_free_allocations;
>  	}
> @@ -137,8 +138,9 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
>  
>  		if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
>  			if (!hv_result_success(status)) {
> -				pr_err("%s: cpu %u apic ID %u, %lld\n", __func__,
> -				       lp_index, apic_id, status);
> +				pr_err("%s: cpu %u apic ID %u, %s\n",
> +				       __func__, lp_index, apic_id,
> +				       hv_result_to_string(status));
>  				ret = hv_result_to_errno(status);
>  			}
>  			break;
> @@ -179,8 +181,9 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
>  
>  		if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
>  			if (!hv_result_success(status)) {
> -				pr_err("%s: vcpu %u, lp %u, %lld\n", __func__,
> -				       vp_index, flags, status);
> +				pr_err("%s: vcpu %u, lp %u, %s\n",
> +				       __func__, vp_index, flags,
> +				       hv_result_to_string(status));
>  				ret = hv_result_to_errno(status);
>  			}
>  			break;

There are more convertible instances in arch/x86/hyperv/irqdomain.c and drivers/iommu/hyperv-iommu.c

> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
> index b13b0cda4ac8..dc4729dba9ef 100644
> --- a/include/asm-generic/mshyperv.h
> +++ b/include/asm-generic/mshyperv.h
> @@ -298,6 +298,7 @@ static inline int cpumask_to_vpset_skip(struct hv_vpset *vpset,
>  	return __cpumask_to_vpset(vpset, cpus, func);
>  }
>  
> +const char *hv_result_to_string(u64 hv_status);
>  int hv_result_to_errno(u64 status);
>  void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die);
>  bool hv_is_hyperv_initialized(void);
Re: [PATCH v5 01/10] hyperv: Convert Hyper-V status codes to strings
Posted by Nuno Das Neves 11 months, 2 weeks ago
On 2/26/2025 8:22 PM, Easwar Hariharan wrote:
> On 2/26/2025 3:07 PM, Nuno Das Neves wrote:
>> Introduce hv_result_to_string() for this purpose. This allows
>> hypercall failures to be debugged more easily with dmesg.
>>
> 
> Let the commit message stand on its own, i.e. state that hv_result_to_string()
> is introduced to convert hyper-v status codes to string.
> 
I thought since the subject line is part of the commit message, this kind of
phrasing is ok. However I see that in my email client it is a little odd because
the subject line is a bit far removed from the rest of the message.

I'll change it :)

>> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
>> ---
>>  drivers/hv/hv_common.c         | 65 ++++++++++++++++++++++++++++++++++
>>  drivers/hv/hv_proc.c           | 13 ++++---
>>  include/asm-generic/mshyperv.h |  1 +
>>  3 files changed, 74 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
>> index 9804adb4cc56..ce20818688fe 100644
>> --- a/drivers/hv/hv_common.c
>> +++ b/drivers/hv/hv_common.c
>> @@ -740,3 +740,68 @@ void hv_identify_partition_type(void)
>>  			pr_crit("Hyper-V: CONFIG_MSHV_ROOT not enabled!\n");
>>  	}
>>  }
>> +
>> +const char *hv_result_to_string(u64 hv_status)
>> +{
>> +	switch (hv_result(hv_status)) {
>> +	case HV_STATUS_SUCCESS:
>> +		return "HV_STATUS_SUCCESS";
>> +	case HV_STATUS_INVALID_HYPERCALL_CODE:
>> +		return "HV_STATUS_INVALID_HYPERCALL_CODE";
>> +	case HV_STATUS_INVALID_HYPERCALL_INPUT:
>> +		return "HV_STATUS_INVALID_HYPERCALL_INPUT";
>> +	case HV_STATUS_INVALID_ALIGNMENT:
>> +		return "HV_STATUS_INVALID_ALIGNMENT";
>> +	case HV_STATUS_INVALID_PARAMETER:
>> +		return "HV_STATUS_INVALID_PARAMETER";
>> +	case HV_STATUS_ACCESS_DENIED:
>> +		return "HV_STATUS_ACCESS_DENIED";
>> +	case HV_STATUS_INVALID_PARTITION_STATE:
>> +		return "HV_STATUS_INVALID_PARTITION_STATE";
>> +	case HV_STATUS_OPERATION_DENIED:
>> +		return "HV_STATUS_OPERATION_DENIED";
>> +	case HV_STATUS_UNKNOWN_PROPERTY:
>> +		return "HV_STATUS_UNKNOWN_PROPERTY";
>> +	case HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE:
>> +		return "HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE";
>> +	case HV_STATUS_INSUFFICIENT_MEMORY:
>> +		return "HV_STATUS_INSUFFICIENT_MEMORY";
>> +	case HV_STATUS_INVALID_PARTITION_ID:
>> +		return "HV_STATUS_INVALID_PARTITION_ID";
>> +	case HV_STATUS_INVALID_VP_INDEX:
>> +		return "HV_STATUS_INVALID_VP_INDEX";
>> +	case HV_STATUS_NOT_FOUND:
>> +		return "HV_STATUS_NOT_FOUND";
>> +	case HV_STATUS_INVALID_PORT_ID:
>> +		return "HV_STATUS_INVALID_PORT_ID";
>> +	case HV_STATUS_INVALID_CONNECTION_ID:
>> +		return "HV_STATUS_INVALID_CONNECTION_ID";
>> +	case HV_STATUS_INSUFFICIENT_BUFFERS:
>> +		return "HV_STATUS_INSUFFICIENT_BUFFERS";
>> +	case HV_STATUS_NOT_ACKNOWLEDGED:
>> +		return "HV_STATUS_NOT_ACKNOWLEDGED";
>> +	case HV_STATUS_INVALID_VP_STATE:
>> +		return "HV_STATUS_INVALID_VP_STATE";
>> +	case HV_STATUS_NO_RESOURCES:
>> +		return "HV_STATUS_NO_RESOURCES";
>> +	case HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED:
>> +		return "HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED";
>> +	case HV_STATUS_INVALID_LP_INDEX:
>> +		return "HV_STATUS_INVALID_LP_INDEX";
>> +	case HV_STATUS_INVALID_REGISTER_VALUE:
>> +		return "HV_STATUS_INVALID_REGISTER_VALUE";
>> +	case HV_STATUS_OPERATION_FAILED:
>> +		return "HV_STATUS_OPERATION_FAILED";
>> +	case HV_STATUS_TIME_OUT:
>> +		return "HV_STATUS_TIME_OUT";
>> +	case HV_STATUS_CALL_PENDING:
>> +		return "HV_STATUS_CALL_PENDING";
>> +	case HV_STATUS_VTL_ALREADY_ENABLED:
>> +		return "HV_STATUS_VTL_ALREADY_ENABLED";
>> +	default:
>> +		return "Unknown";
>> +	};
>> +	return "Unknown";
> 
> Unnecessary extra return since the default case already returns "Unknown"
> 
Good point, I think I'd prefer to remove the first return and leave the
default case empty.

>> +}
>> +EXPORT_SYMBOL_GPL(hv_result_to_string);
>> +
> 
> Extra line here ^
> 
Thanks

>> diff --git a/drivers/hv/hv_proc.c b/drivers/hv/hv_proc.c
>> index 2fae18e4f7d2..8fc30f509fa7 100644
>> --- a/drivers/hv/hv_proc.c
>> +++ b/drivers/hv/hv_proc.c
>> @@ -87,7 +87,8 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
>>  				     page_count, 0, input_page, NULL);
>>  	local_irq_restore(flags);
>>  	if (!hv_result_success(status)) {
>> -		pr_err("Failed to deposit pages: %lld\n", status);
>> +		pr_err("%s: Failed to deposit pages: %s\n", __func__,
>> +		       hv_result_to_string(status));
>>  		ret = hv_result_to_errno(status);
>>  		goto err_free_allocations;
>>  	}
>> @@ -137,8 +138,9 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
>>  
>>  		if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
>>  			if (!hv_result_success(status)) {
>> -				pr_err("%s: cpu %u apic ID %u, %lld\n", __func__,
>> -				       lp_index, apic_id, status);
>> +				pr_err("%s: cpu %u apic ID %u, %s\n",
>> +				       __func__, lp_index, apic_id,
>> +				       hv_result_to_string(status));
>>  				ret = hv_result_to_errno(status);
>>  			}
>>  			break;
>> @@ -179,8 +181,9 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
>>  
>>  		if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
>>  			if (!hv_result_success(status)) {
>> -				pr_err("%s: vcpu %u, lp %u, %lld\n", __func__,
>> -				       vp_index, flags, status);
>> +				pr_err("%s: vcpu %u, lp %u, %s\n",
>> +				       __func__, vp_index, flags,
>> +				       hv_result_to_string(status));
>>  				ret = hv_result_to_errno(status);
>>  			}
>>  			break;
> 
> There are more convertible instances in arch/x86/hyperv/irqdomain.c and drivers/iommu/hyperv-iommu.c
> 
Ah, thank you, happy to add those!

>> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
>> index b13b0cda4ac8..dc4729dba9ef 100644
>> --- a/include/asm-generic/mshyperv.h
>> +++ b/include/asm-generic/mshyperv.h
>> @@ -298,6 +298,7 @@ static inline int cpumask_to_vpset_skip(struct hv_vpset *vpset,
>>  	return __cpumask_to_vpset(vpset, cpus, func);
>>  }
>>  
>> +const char *hv_result_to_string(u64 hv_status);
>>  int hv_result_to_errno(u64 status);
>>  void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die);
>>  bool hv_is_hyperv_initialized(void);
Re: [PATCH v5 01/10] hyperv: Convert Hyper-V status codes to strings
Posted by Stanislav Kinsburskii 11 months, 2 weeks ago
On Wed, Feb 26, 2025 at 03:07:55PM -0800, Nuno Das Neves wrote:
> Introduce hv_result_to_string() for this purpose. This allows
> hypercall failures to be debugged more easily with dmesg.
> 
> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
> ---
>  drivers/hv/hv_common.c         | 65 ++++++++++++++++++++++++++++++++++
>  drivers/hv/hv_proc.c           | 13 ++++---
>  include/asm-generic/mshyperv.h |  1 +
>  3 files changed, 74 insertions(+), 5 deletions(-)
> 

Reviewed-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>