Introduce hv_result_to_string() for this purpose. This allows
hypercall failures to be debugged more easily with dmesg.
Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
---
drivers/hv/hv_common.c | 65 ++++++++++++++++++++++++++++++++++
drivers/hv/hv_proc.c | 13 ++++---
include/asm-generic/mshyperv.h | 1 +
3 files changed, 74 insertions(+), 5 deletions(-)
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index 9804adb4cc56..ce20818688fe 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -740,3 +740,68 @@ void hv_identify_partition_type(void)
pr_crit("Hyper-V: CONFIG_MSHV_ROOT not enabled!\n");
}
}
+
+const char *hv_result_to_string(u64 hv_status)
+{
+ switch (hv_result(hv_status)) {
+ case HV_STATUS_SUCCESS:
+ return "HV_STATUS_SUCCESS";
+ case HV_STATUS_INVALID_HYPERCALL_CODE:
+ return "HV_STATUS_INVALID_HYPERCALL_CODE";
+ case HV_STATUS_INVALID_HYPERCALL_INPUT:
+ return "HV_STATUS_INVALID_HYPERCALL_INPUT";
+ case HV_STATUS_INVALID_ALIGNMENT:
+ return "HV_STATUS_INVALID_ALIGNMENT";
+ case HV_STATUS_INVALID_PARAMETER:
+ return "HV_STATUS_INVALID_PARAMETER";
+ case HV_STATUS_ACCESS_DENIED:
+ return "HV_STATUS_ACCESS_DENIED";
+ case HV_STATUS_INVALID_PARTITION_STATE:
+ return "HV_STATUS_INVALID_PARTITION_STATE";
+ case HV_STATUS_OPERATION_DENIED:
+ return "HV_STATUS_OPERATION_DENIED";
+ case HV_STATUS_UNKNOWN_PROPERTY:
+ return "HV_STATUS_UNKNOWN_PROPERTY";
+ case HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE:
+ return "HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE";
+ case HV_STATUS_INSUFFICIENT_MEMORY:
+ return "HV_STATUS_INSUFFICIENT_MEMORY";
+ case HV_STATUS_INVALID_PARTITION_ID:
+ return "HV_STATUS_INVALID_PARTITION_ID";
+ case HV_STATUS_INVALID_VP_INDEX:
+ return "HV_STATUS_INVALID_VP_INDEX";
+ case HV_STATUS_NOT_FOUND:
+ return "HV_STATUS_NOT_FOUND";
+ case HV_STATUS_INVALID_PORT_ID:
+ return "HV_STATUS_INVALID_PORT_ID";
+ case HV_STATUS_INVALID_CONNECTION_ID:
+ return "HV_STATUS_INVALID_CONNECTION_ID";
+ case HV_STATUS_INSUFFICIENT_BUFFERS:
+ return "HV_STATUS_INSUFFICIENT_BUFFERS";
+ case HV_STATUS_NOT_ACKNOWLEDGED:
+ return "HV_STATUS_NOT_ACKNOWLEDGED";
+ case HV_STATUS_INVALID_VP_STATE:
+ return "HV_STATUS_INVALID_VP_STATE";
+ case HV_STATUS_NO_RESOURCES:
+ return "HV_STATUS_NO_RESOURCES";
+ case HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED:
+ return "HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED";
+ case HV_STATUS_INVALID_LP_INDEX:
+ return "HV_STATUS_INVALID_LP_INDEX";
+ case HV_STATUS_INVALID_REGISTER_VALUE:
+ return "HV_STATUS_INVALID_REGISTER_VALUE";
+ case HV_STATUS_OPERATION_FAILED:
+ return "HV_STATUS_OPERATION_FAILED";
+ case HV_STATUS_TIME_OUT:
+ return "HV_STATUS_TIME_OUT";
+ case HV_STATUS_CALL_PENDING:
+ return "HV_STATUS_CALL_PENDING";
+ case HV_STATUS_VTL_ALREADY_ENABLED:
+ return "HV_STATUS_VTL_ALREADY_ENABLED";
+ default:
+ return "Unknown";
+ };
+ return "Unknown";
+}
+EXPORT_SYMBOL_GPL(hv_result_to_string);
+
diff --git a/drivers/hv/hv_proc.c b/drivers/hv/hv_proc.c
index 2fae18e4f7d2..8fc30f509fa7 100644
--- a/drivers/hv/hv_proc.c
+++ b/drivers/hv/hv_proc.c
@@ -87,7 +87,8 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
page_count, 0, input_page, NULL);
local_irq_restore(flags);
if (!hv_result_success(status)) {
- pr_err("Failed to deposit pages: %lld\n", status);
+ pr_err("%s: Failed to deposit pages: %s\n", __func__,
+ hv_result_to_string(status));
ret = hv_result_to_errno(status);
goto err_free_allocations;
}
@@ -137,8 +138,9 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
if (!hv_result_success(status)) {
- pr_err("%s: cpu %u apic ID %u, %lld\n", __func__,
- lp_index, apic_id, status);
+ pr_err("%s: cpu %u apic ID %u, %s\n",
+ __func__, lp_index, apic_id,
+ hv_result_to_string(status));
ret = hv_result_to_errno(status);
}
break;
@@ -179,8 +181,9 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
if (!hv_result_success(status)) {
- pr_err("%s: vcpu %u, lp %u, %lld\n", __func__,
- vp_index, flags, status);
+ pr_err("%s: vcpu %u, lp %u, %s\n",
+ __func__, vp_index, flags,
+ hv_result_to_string(status));
ret = hv_result_to_errno(status);
}
break;
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index b13b0cda4ac8..dc4729dba9ef 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -298,6 +298,7 @@ static inline int cpumask_to_vpset_skip(struct hv_vpset *vpset,
return __cpumask_to_vpset(vpset, cpus, func);
}
+const char *hv_result_to_string(u64 hv_status);
int hv_result_to_errno(u64 status);
void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die);
bool hv_is_hyperv_initialized(void);
--
2.34.1
On 2/26/2025 3:07 PM, Nuno Das Neves wrote:
[...]
> +
> +const char *hv_result_to_string(u64 hv_status)
> +{
> + switch (hv_result(hv_status)) {
[...]
> + return "HV_STATUS_VTL_ALREADY_ENABLED";
> + default:
> + return "Unknown";
> + };
> + return "Unknown";
> +}
> +EXPORT_SYMBOL_GPL(hv_result_to_string);
Should we remove this and output the hexadecimal error code in ~3 places
this function is used?
The "Unknown" part would make debugging harder actually when something
fails. I presume that the mainstream scenarios all work, and it is the
edge cases that might fail, and these are likelier to produce "Unknown".
Folks who actually debug failed hypercalls rarely have issues with
looking up the error code, and printing "Unknown" to the log is worse
than a hexadecimal. Like even the people who wrote the code got nothing
to say about what is going on.
--
Thank you,
Roman
On 2/27/2025 9:02 AM, Roman Kisel wrote:
>
>
> On 2/26/2025 3:07 PM, Nuno Das Neves wrote:
>
> [...]
>
>> +
>> +const char *hv_result_to_string(u64 hv_status)
>> +{
>> + switch (hv_result(hv_status)) {
>
> [...]
>
>> + return "HV_STATUS_VTL_ALREADY_ENABLED";
>> + default:
>> + return "Unknown";
>> + };
>> + return "Unknown";
>> +}
>> +EXPORT_SYMBOL_GPL(hv_result_to_string);
>
> Should we remove this and output the hexadecimal error code in ~3 places
> this function is used?
>
I guess you're implying it's not worth adding such a function for only a
few places in the code? That is a good point, and a bit of an oversight
on my part while editing this series. Originally all the hypercall helper
functions in the driver code (10+ places) used this function as well, but
I removed those printks_()s as a temporary solution to limit the use of
printk in the driver code (as opposed to dev_printk() which is preferred).
I didn't think to remove *this* patch as a result of that change!
I do want to figure out a good way to add that logging back to the hypercall
helpers, so I do want to try and get some form of this patch in to aid
debugging hypercalls - it has been very very useful over time.
> The "Unknown" part would make debugging harder actually when something
> fails. I presume that the mainstream scenarios all work, and it is the
> edge cases that might fail, and these are likelier to produce "Unknown".
>
That is a very good point. Ideally, we could log "Unknown" along with
the hex code instead of replacing it.
What do you think about keeping this function, but instead of using it
directly, introduce a "standard" way for logging hypercall errors which
can hopefully be used everywhere in the kernel?
e.g. a simple macro:
#define hv_hvcall_err(control, status)
do {
u64 ___status = (status);
pr_err("Hypercall: %#x err: %#x : %s", (control) & 0xFFFF, hv_result(___status), hv_result_to_string(___status));
} while (0)
I feel like this is the best of both worlds, and actually makes it even
easier to do this logging everywhere it is wanted (for me, that includes
all the /dev/mshv-related hypercalls).
We could add strings for the HVCALL_ values too, and/or include __func__
in the macro to aid in finding the context it was used in.
> Folks who actually debug failed hypercalls rarely have issues with
> looking up the error code, and printing "Unknown" to the log is worse
> than a hexadecimal. Like even the people who wrote the code got nothing
> to say about what is going on.
>
Yep, totally agree having the hex code available can be valuable in
unexpected situations.
On 2/27/2025 4:15 PM, Nuno Das Neves wrote:
> On 2/27/2025 9:02 AM, Roman Kisel wrote:
[...]
> I guess you're implying it's not worth adding such a function for only a
> few places in the code? That is a good point, and a bit of an oversight
> on my part while editing this series. Originally all the hypercall helper
> functions in the driver code (10+ places) used this function as well, but
> I removed those printks_()s as a temporary solution to limit the use of
> printk in the driver code (as opposed to dev_printk() which is preferred).
>
> I didn't think to remove *this* patch as a result of that change!
> I do want to figure out a good way to add that logging back to the hypercall
> helpers, so I do want to try and get some form of this patch in to aid
> debugging hypercalls - it has been very very useful over time.
>
Right, I thought that the function looked more as a bring-up aid rather
than a full fledged solution to some problem.
>> The "Unknown" part would make debugging harder actually when something
>> fails. I presume that the mainstream scenarios all work, and it is the
>> edge cases that might fail, and these are likelier to produce "Unknown".
>>
> That is a very good point. Ideally, we could log "Unknown" along with
> the hex code instead of replacing it.
>
> What do you think about keeping this function, but instead of using it
> directly, introduce a "standard" way for logging hypercall errors which
> can hopefully be used everywhere in the kernel?
> e.g. a simple macro:
> #define hv_hvcall_err(control, status)
> do {
> u64 ___status = (status);
> pr_err("Hypercall: %#x err: %#x : %s", (control) & 0xFFFF, hv_result(___status), hv_result_to_string(___status));
> } while (0)
>
> I feel like this is the best of both worlds, and actually makes it even
> easier to do this logging everywhere it is wanted (for me, that includes
> all the /dev/mshv-related hypercalls).
> We could add strings for the HVCALL_ values too, and/or include __func__
> in the macro to aid in finding the context it was used in.
>
That doesn’t seem to be common in the kernel from what I’ve seen in
dmesg, although there is certainly a lot of appeal in that approach.
However, we will have to remember to update the function each time when
another status code is added not to leave things half-cooked.
Also it is a bit surprising the *kernel* should report that rather than
the VMM from the user mode. E.g. the kernel does not report all errors
on file open, file seek, etc. As I understand, the hv status codes are
later mapped to errno in a lossy manner, and errno is what the user mode
receives?
As long as the hex code is logged, I am fine with the change.
>> Folks who actually debug failed hypercalls rarely have issues with
>> looking up the error code, and printing "Unknown" to the log is worse
>> than a hexadecimal. Like even the people who wrote the code got nothing
>> to say about what is going on.
>>
> Yep, totally agree having the hex code available can be valuable in
> unexpected situations.
>
Appreciate giving my concerns a thorough consideration!
--
Thank you,
Roman
On 2/27/2025 9:02 AM, Roman Kisel wrote:
>
>
> On 2/26/2025 3:07 PM, Nuno Das Neves wrote:
>
> [...]
>
>> +
>> +const char *hv_result_to_string(u64 hv_status)
>> +{
>> + switch (hv_result(hv_status)) {
>
> [...]
>
>> + return "HV_STATUS_VTL_ALREADY_ENABLED";
>> + default:
>> + return "Unknown";
>> + };
>> + return "Unknown";
>> +}
>> +EXPORT_SYMBOL_GPL(hv_result_to_string);
>
> Should we remove this and output the hexadecimal error code in ~3 places
> this function is used?
>
> The "Unknown" part would make debugging harder actually when something
> fails. I presume that the mainstream scenarios all work, and it is the
> edge cases that might fail, and these are likelier to produce "Unknown".
>
> Folks who actually debug failed hypercalls rarely have issues with
> looking up the error code, and printing "Unknown" to the log is worse
> than a hexadecimal. Like even the people who wrote the code got nothing
> to say about what is going on.
>
Sorry, I have to disagree with this, a recent commit of mine[1] closed a WSL
issue that was open for over 2 years for, partly, the utter uselessness of
the hex return code of the hypercall.
[1] https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d2138eab8cde61e0e6f62d0713e45202e8457d6d
Thanks,
Easwar (he/him)
On 2/27/2025 2:54 PM, Easwar Hariharan wrote: > On 2/27/2025 9:02 AM, Roman Kisel wrote: [...] > > Sorry, I have to disagree with this, a recent commit of mine[1] closed a WSL > issue that was open for over 2 years for, partly, the utter uselessness of > the hex return code of the hypercall. What hypercall was that? I see storvsc_log_ratelimited(device, loglevel, "tag#%d cmd 0x%x status: scsi 0x%x srb 0x%x hv 0x%x\n", scsi_cmd_to_rq(request->cmd)->tag, stor_pkt->vm_srb.cdb[0], vstor_packet->vm_srb.scsi_status, vstor_packet->vm_srb.srb_status, vstor_packet->status); in your patch where `vstor_packet->status` is claimed to be a hypercall status? I'd be surprised if the hypervisor concerned itself with the details of visualized SCSI storage. The VMM on the host might and should. I'll look through the code to gain more confidence in my suspicion that calling the SCSI virt storage packet status a hv status causd the frustration with debugging, and if no counter examples found, will send a patch to fix that log statement above. > > [1] https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d2138eab8cde61e0e6f62d0713e45202e8457d6d > > Thanks, > Easwar (he/him) -- Thank you, Roman
On 2/27/2025 2:54 PM, Easwar Hariharan wrote:
[...]
>
> Sorry, I have to disagree with this, a recent commit of mine[1] closed a WSL
> issue that was open for over 2 years for, partly, the utter uselessness of
> the hex return code of the hypercall.
Thanks for your efforts, and sorry to hear you had a frustrating
debugging experience (sounds like it).
Would be great to learn the details to understand how this function is
going to improve the situation:
1. How come the hex error code was useless, what is not matching
anything in the Linux headers?
2. How having "Unknown" in the log can possibly be better?
3. Given that the select hv status codes and the proposed strings have
1:1 correspondence, and there is the 1:N catch-all case for the
"Unknown", how's that better?
>
> [1] https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d2138eab8cde61e0e6f62d0713e45202e8457d6d
>
> Thanks,
> Easwar (he/him)
--
Thank you,
Roman
On 2/27/2025 3:08 PM, Roman Kisel wrote: > > > On 2/27/2025 2:54 PM, Easwar Hariharan wrote: > [...] > >> >> Sorry, I have to disagree with this, a recent commit of mine[1] closed a WSL >> issue that was open for over 2 years for, partly, the utter uselessness of >> the hex return code of the hypercall. > > Thanks for your efforts, and sorry to hear you had a frustrating > debugging experience (sounds like it). TBF, I didn't personally struggle with it for 2 years, IMHO, it was the opaqueness of what the value meant that contributed to user pain. > > Would be great to learn the details to understand how this function is > going to improve the situation: > > 1. How come the hex error code was useless, what is not matching > anything in the Linux headers? It doesn't match anything in the Linux headers, but it's an NTSTATUS, not HVSTATUS. Coming from the PoV of a user, it would be a much more useful message to see: [ 249.512760] hv_storvsc fd1d2cbd-ce7c-535c-966b-eb5f811c95f0: tag#683 cmd 0x28 status: scsi 0x2 srb 0x4 hv STATUS_UNSUCCESSFUL than [ 249.512760] hv_storvsc fd1d2cbd-ce7c-535c-966b-eb5f811c95f0: tag#683 cmd 0x28 status: scsi 0x2 srb 0x4 hv 0xc0000001 > 2. How having "Unknown" in the log can possibly be better? IMHO, seeing "Unknown" in an error report means that there's a new return value that needs to be mapped to errno in hv_status_to_errno() and updated here as well. > 3. Given that the select hv status codes and the proposed strings have > 1:1 correspondence, and there is the 1:N catch-all case for the > "Unknown", how's that better? > I didn't really follow this question, but I suppose the answer to Q2 answers this as well. If not, please expand and I'll try to answer. Thanks, Easwar (he/him)
On 2/27/2025 3:25 PM, Easwar Hariharan wrote: > On 2/27/2025 3:08 PM, Roman Kisel wrote: [...] >> Would be great to learn the details to understand how this function is >> going to improve the situation: >> >> 1. How come the hex error code was useless, what is not matching >> anything in the Linux headers? > > It doesn't match anything in the Linux headers, but it's an NTSTATUS, not HVSTATUS. > That is what it looks like from the code, I posted the details in the parallel thread. Here is a fix: https://lore.kernel.org/linux-hyperv/20250227233110.36596-1-romank@linux.microsoft.com/ Also I think the commit description in your patch https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d2138eab8cde61e0e6f62d0713e45202e8457d6d conflates the hypervisor (ours runs bare-metal, Type 1) and the VMMs (Virtual Machine Monitors)+VSPs (Virtual Service Providers, e.g StorVSP that implements SCSI) running in the host/root/dom0 partition. > Coming from the PoV of a user, it would be a much more useful message to see: > > [ 249.512760] hv_storvsc fd1d2cbd-ce7c-535c-966b-eb5f811c95f0: tag#683 cmd 0x28 status: scsi 0x2 srb 0x4 hv STATUS_UNSUCCESSFUL > > than > > [ 249.512760] hv_storvsc fd1d2cbd-ce7c-535c-966b-eb5f811c95f0: tag#683 cmd 0x28 status: scsi 0x2 srb 0x4 hv 0xc0000001 > It is likely that the PoV of a user that you've mentioned is actually a PoV of a (kernel) developer. It is hard to imagine that folks running web sites, DB servers, LoBs, LLMs, etc. in Hyper-V VMs care about the lowest software level of the virt stack in the form of the symbolic name or the hex code. They need their VMs to be reliable or suggest what the user may try if a configuration error is suspected. To make the error log message useful to the user, the message should mention ways of remediation or at least hint what might've gotten wedged. Without that, that's only useful for the people who work with the kernel code proper or the kernel interface to the user land. So I'd think that the hex error codes from the hypervisor give the user exactly as much as the error symbolic names do to get the system to the desired state: nothing. Even less when the error reported "Unknown" :) >> 2. How having "Unknown" in the log can possibly be better? > > IMHO, seeing "Unknown" in an error report means that there's a new return value > that needs to be mapped to errno in hv_status_to_errno() and updated here as well. > It means that to the developer. To the user, it means the developers messed something up and to make matters even worse they didn't leave any breadcrumbs (e.g. the hex code) to see what's wrong to help the user and themselves: there is just that "Unknown" thing in the log. >> 3. Given that the select hv status codes and the proposed strings have >> 1:1 correspondence, and there is the 1:N catch-all case for the >> "Unknown", how's that better? >> > > I didn't really follow this question, but I suppose the answer to Q2 answers this as > well. If not, please expand and I'll try to answer. > Sorry about that chunk, hit "Send" without looking the e-mail over another time. Appreciate the discussion very much! > Thanks, > Easwar (he/him) -- Thank you, Roman
On 2/28/2025 9:20 AM, Roman Kisel wrote: > > > On 2/27/2025 3:25 PM, Easwar Hariharan wrote: >> On 2/27/2025 3:08 PM, Roman Kisel wrote: > > [...] > >>> Would be great to learn the details to understand how this function is >>> going to improve the situation: >>> >>> 1. How come the hex error code was useless, what is not matching >>> anything in the Linux headers? >> >> It doesn't match anything in the Linux headers, but it's an NTSTATUS, not HVSTATUS. >> > > That is what it looks like from the code, I posted the details in the > parallel thread. > > Here is a fix: > https://lore.kernel.org/linux-hyperv/20250227233110.36596-1-romank@linux.microsoft.com/ > > Also I think the commit description in your patch > > https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d2138eab8cde61e0e6f62d0713e45202e8457d6d > > conflates the hypervisor (ours runs bare-metal, Type 1) and the VMMs > (Virtual Machine Monitors)+VSPs (Virtual Service Providers, e.g StorVSP > that implements SCSI) running in the host/root/dom0 partition. Agreed, that was what I was led to believe, your patch would help with that miscommunication, though not in its current form. See my review comment in that thread. > >> Coming from the PoV of a user, it would be a much more useful message to see: >> >> [ 249.512760] hv_storvsc fd1d2cbd-ce7c-535c-966b-eb5f811c95f0: tag#683 cmd 0x28 status: scsi 0x2 srb 0x4 hv STATUS_UNSUCCESSFUL >> >> than >> >> [ 249.512760] hv_storvsc fd1d2cbd-ce7c-535c-966b-eb5f811c95f0: tag#683 cmd 0x28 status: scsi 0x2 srb 0x4 hv 0xc0000001 >> > > It is likely that the PoV of a user that you've mentioned is actually > a PoV of a (kernel) developer. Actually, no, it's PoV of the WSL users that are having the discussion in the linked github issue. FWIW, that issue also occurred in Azure with multiple incidents coming into our queue because of the unusable flood of error messages. > It is hard to imagine that folks running > web sites, DB servers, LoBs, LLMs, etc. in Hyper-V VMs care about the > lowest software level of the virt stack in the form of the symbolic > name or the hex code. They need their VMs to be reliable or suggest > what the user may try if a configuration error is suspected. > > To make the error log message useful to the user, the message should > mention ways of remediation or at least hint what might've gotten > wedged. Without that, that's only useful for the people who work with > the kernel code proper or the kernel interface to the user land. There's a step between seeing the issue and fixing it that you're missing, i.e. the reporting. An issue that says "flood of hv_storvsc errors reporting status unsuccessful" is better than the same without that status information: https://github.com/microsoft/WSL/issues/9173 > > So I'd think that the hex error codes from the hypervisor give the user > exactly as much as the error symbolic names do to get the system to the > desired state: nothing. I continue to disagree, seeing HV_STATUS_NO_RESOURCES is better than 0x1D, because the user may think to look at `top` or `free -h` or similar to see what could be killed to improve the situation. > Even less when the error reported "Unknown" :) I agree on the uselessness of "Unknown" to the user, except as already mentioned below, as a prompt for the code to be updated. > >>> 2. How having "Unknown" in the log can possibly be better? >> >> IMHO, seeing "Unknown" in an error report means that there's a new return value >> that needs to be mapped to errno in hv_status_to_errno() and updated here as well. >> > > It means that to the developer. To the user, it means the developers > messed something up and to make matters even worse they didn't leave any > breadcrumbs (e.g. the hex code) to see what's wrong to help the user and > themselves: there is just that "Unknown" thing in the log. I think Nuno's compromise addresses this very well, to also print the hex code. > >>> 3. Given that the select hv status codes and the proposed strings have >>> 1:1 correspondence, and there is the 1:N catch-all case for the >>> "Unknown", how's that better? >>> >> >> I didn't really follow this question, but I suppose the answer to Q2 answers this as >> well. If not, please expand and I'll try to answer. >> > > Sorry about that chunk, hit "Send" without looking the e-mail over > another time. Appreciate the discussion very much! > > >> Thanks, >> Easwar (he/him) >
On 2/28/2025 12:22 PM, Easwar Hariharan wrote: > On 2/28/2025 9:20 AM, Roman Kisel wrote: >> [...] >> >> So I'd think that the hex error codes from the hypervisor give the user >> exactly as much as the error symbolic names do to get the system to the >> desired state: nothing. > I continue to disagree, seeing HV_STATUS_NO_RESOURCES is better than 0x1D, > because the user may think to look at `top` or `free -h` or similar to see > what could be killed to improve the situation. > I agree that the symbolic name might save the step of looking up the error code in the headers. Now, the next step depends on how much the user is into virt technologies (if at all). That is to illustrate the point that a hint in the logs (or in the Documentation) is crucial of what to do next. The symbolic name might mislead; a hex code maybe with an addition of "please look up what may fix this at <URL> or report the problem here <URL>" would look better to _my imaginary_ customer :) That would be as much friendly as possible, if the kernel needs to print any of that at all. Likely the VMM in the user land if it gets that code as-is. Thank you for the fair critique and the time! [...] >>> Thanks, >>> Easwar (he/him) >> > -- Thank you, Roman
On 2/26/2025 3:07 PM, Nuno Das Neves wrote:
> Introduce hv_result_to_string() for this purpose. This allows
> hypercall failures to be debugged more easily with dmesg.
>
Let the commit message stand on its own, i.e. state that hv_result_to_string()
is introduced to convert hyper-v status codes to string.
> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
> ---
> drivers/hv/hv_common.c | 65 ++++++++++++++++++++++++++++++++++
> drivers/hv/hv_proc.c | 13 ++++---
> include/asm-generic/mshyperv.h | 1 +
> 3 files changed, 74 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
> index 9804adb4cc56..ce20818688fe 100644
> --- a/drivers/hv/hv_common.c
> +++ b/drivers/hv/hv_common.c
> @@ -740,3 +740,68 @@ void hv_identify_partition_type(void)
> pr_crit("Hyper-V: CONFIG_MSHV_ROOT not enabled!\n");
> }
> }
> +
> +const char *hv_result_to_string(u64 hv_status)
> +{
> + switch (hv_result(hv_status)) {
> + case HV_STATUS_SUCCESS:
> + return "HV_STATUS_SUCCESS";
> + case HV_STATUS_INVALID_HYPERCALL_CODE:
> + return "HV_STATUS_INVALID_HYPERCALL_CODE";
> + case HV_STATUS_INVALID_HYPERCALL_INPUT:
> + return "HV_STATUS_INVALID_HYPERCALL_INPUT";
> + case HV_STATUS_INVALID_ALIGNMENT:
> + return "HV_STATUS_INVALID_ALIGNMENT";
> + case HV_STATUS_INVALID_PARAMETER:
> + return "HV_STATUS_INVALID_PARAMETER";
> + case HV_STATUS_ACCESS_DENIED:
> + return "HV_STATUS_ACCESS_DENIED";
> + case HV_STATUS_INVALID_PARTITION_STATE:
> + return "HV_STATUS_INVALID_PARTITION_STATE";
> + case HV_STATUS_OPERATION_DENIED:
> + return "HV_STATUS_OPERATION_DENIED";
> + case HV_STATUS_UNKNOWN_PROPERTY:
> + return "HV_STATUS_UNKNOWN_PROPERTY";
> + case HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE:
> + return "HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE";
> + case HV_STATUS_INSUFFICIENT_MEMORY:
> + return "HV_STATUS_INSUFFICIENT_MEMORY";
> + case HV_STATUS_INVALID_PARTITION_ID:
> + return "HV_STATUS_INVALID_PARTITION_ID";
> + case HV_STATUS_INVALID_VP_INDEX:
> + return "HV_STATUS_INVALID_VP_INDEX";
> + case HV_STATUS_NOT_FOUND:
> + return "HV_STATUS_NOT_FOUND";
> + case HV_STATUS_INVALID_PORT_ID:
> + return "HV_STATUS_INVALID_PORT_ID";
> + case HV_STATUS_INVALID_CONNECTION_ID:
> + return "HV_STATUS_INVALID_CONNECTION_ID";
> + case HV_STATUS_INSUFFICIENT_BUFFERS:
> + return "HV_STATUS_INSUFFICIENT_BUFFERS";
> + case HV_STATUS_NOT_ACKNOWLEDGED:
> + return "HV_STATUS_NOT_ACKNOWLEDGED";
> + case HV_STATUS_INVALID_VP_STATE:
> + return "HV_STATUS_INVALID_VP_STATE";
> + case HV_STATUS_NO_RESOURCES:
> + return "HV_STATUS_NO_RESOURCES";
> + case HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED:
> + return "HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED";
> + case HV_STATUS_INVALID_LP_INDEX:
> + return "HV_STATUS_INVALID_LP_INDEX";
> + case HV_STATUS_INVALID_REGISTER_VALUE:
> + return "HV_STATUS_INVALID_REGISTER_VALUE";
> + case HV_STATUS_OPERATION_FAILED:
> + return "HV_STATUS_OPERATION_FAILED";
> + case HV_STATUS_TIME_OUT:
> + return "HV_STATUS_TIME_OUT";
> + case HV_STATUS_CALL_PENDING:
> + return "HV_STATUS_CALL_PENDING";
> + case HV_STATUS_VTL_ALREADY_ENABLED:
> + return "HV_STATUS_VTL_ALREADY_ENABLED";
> + default:
> + return "Unknown";
> + };
> + return "Unknown";
Unnecessary extra return since the default case already returns "Unknown"
> +}
> +EXPORT_SYMBOL_GPL(hv_result_to_string);
> +
Extra line here ^
> diff --git a/drivers/hv/hv_proc.c b/drivers/hv/hv_proc.c
> index 2fae18e4f7d2..8fc30f509fa7 100644
> --- a/drivers/hv/hv_proc.c
> +++ b/drivers/hv/hv_proc.c
> @@ -87,7 +87,8 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
> page_count, 0, input_page, NULL);
> local_irq_restore(flags);
> if (!hv_result_success(status)) {
> - pr_err("Failed to deposit pages: %lld\n", status);
> + pr_err("%s: Failed to deposit pages: %s\n", __func__,
> + hv_result_to_string(status));
> ret = hv_result_to_errno(status);
> goto err_free_allocations;
> }
> @@ -137,8 +138,9 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
>
> if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
> if (!hv_result_success(status)) {
> - pr_err("%s: cpu %u apic ID %u, %lld\n", __func__,
> - lp_index, apic_id, status);
> + pr_err("%s: cpu %u apic ID %u, %s\n",
> + __func__, lp_index, apic_id,
> + hv_result_to_string(status));
> ret = hv_result_to_errno(status);
> }
> break;
> @@ -179,8 +181,9 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
>
> if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
> if (!hv_result_success(status)) {
> - pr_err("%s: vcpu %u, lp %u, %lld\n", __func__,
> - vp_index, flags, status);
> + pr_err("%s: vcpu %u, lp %u, %s\n",
> + __func__, vp_index, flags,
> + hv_result_to_string(status));
> ret = hv_result_to_errno(status);
> }
> break;
There are more convertible instances in arch/x86/hyperv/irqdomain.c and drivers/iommu/hyperv-iommu.c
> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
> index b13b0cda4ac8..dc4729dba9ef 100644
> --- a/include/asm-generic/mshyperv.h
> +++ b/include/asm-generic/mshyperv.h
> @@ -298,6 +298,7 @@ static inline int cpumask_to_vpset_skip(struct hv_vpset *vpset,
> return __cpumask_to_vpset(vpset, cpus, func);
> }
>
> +const char *hv_result_to_string(u64 hv_status);
> int hv_result_to_errno(u64 status);
> void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die);
> bool hv_is_hyperv_initialized(void);
On 2/26/2025 8:22 PM, Easwar Hariharan wrote:
> On 2/26/2025 3:07 PM, Nuno Das Neves wrote:
>> Introduce hv_result_to_string() for this purpose. This allows
>> hypercall failures to be debugged more easily with dmesg.
>>
>
> Let the commit message stand on its own, i.e. state that hv_result_to_string()
> is introduced to convert hyper-v status codes to string.
>
I thought since the subject line is part of the commit message, this kind of
phrasing is ok. However I see that in my email client it is a little odd because
the subject line is a bit far removed from the rest of the message.
I'll change it :)
>> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
>> ---
>> drivers/hv/hv_common.c | 65 ++++++++++++++++++++++++++++++++++
>> drivers/hv/hv_proc.c | 13 ++++---
>> include/asm-generic/mshyperv.h | 1 +
>> 3 files changed, 74 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
>> index 9804adb4cc56..ce20818688fe 100644
>> --- a/drivers/hv/hv_common.c
>> +++ b/drivers/hv/hv_common.c
>> @@ -740,3 +740,68 @@ void hv_identify_partition_type(void)
>> pr_crit("Hyper-V: CONFIG_MSHV_ROOT not enabled!\n");
>> }
>> }
>> +
>> +const char *hv_result_to_string(u64 hv_status)
>> +{
>> + switch (hv_result(hv_status)) {
>> + case HV_STATUS_SUCCESS:
>> + return "HV_STATUS_SUCCESS";
>> + case HV_STATUS_INVALID_HYPERCALL_CODE:
>> + return "HV_STATUS_INVALID_HYPERCALL_CODE";
>> + case HV_STATUS_INVALID_HYPERCALL_INPUT:
>> + return "HV_STATUS_INVALID_HYPERCALL_INPUT";
>> + case HV_STATUS_INVALID_ALIGNMENT:
>> + return "HV_STATUS_INVALID_ALIGNMENT";
>> + case HV_STATUS_INVALID_PARAMETER:
>> + return "HV_STATUS_INVALID_PARAMETER";
>> + case HV_STATUS_ACCESS_DENIED:
>> + return "HV_STATUS_ACCESS_DENIED";
>> + case HV_STATUS_INVALID_PARTITION_STATE:
>> + return "HV_STATUS_INVALID_PARTITION_STATE";
>> + case HV_STATUS_OPERATION_DENIED:
>> + return "HV_STATUS_OPERATION_DENIED";
>> + case HV_STATUS_UNKNOWN_PROPERTY:
>> + return "HV_STATUS_UNKNOWN_PROPERTY";
>> + case HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE:
>> + return "HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE";
>> + case HV_STATUS_INSUFFICIENT_MEMORY:
>> + return "HV_STATUS_INSUFFICIENT_MEMORY";
>> + case HV_STATUS_INVALID_PARTITION_ID:
>> + return "HV_STATUS_INVALID_PARTITION_ID";
>> + case HV_STATUS_INVALID_VP_INDEX:
>> + return "HV_STATUS_INVALID_VP_INDEX";
>> + case HV_STATUS_NOT_FOUND:
>> + return "HV_STATUS_NOT_FOUND";
>> + case HV_STATUS_INVALID_PORT_ID:
>> + return "HV_STATUS_INVALID_PORT_ID";
>> + case HV_STATUS_INVALID_CONNECTION_ID:
>> + return "HV_STATUS_INVALID_CONNECTION_ID";
>> + case HV_STATUS_INSUFFICIENT_BUFFERS:
>> + return "HV_STATUS_INSUFFICIENT_BUFFERS";
>> + case HV_STATUS_NOT_ACKNOWLEDGED:
>> + return "HV_STATUS_NOT_ACKNOWLEDGED";
>> + case HV_STATUS_INVALID_VP_STATE:
>> + return "HV_STATUS_INVALID_VP_STATE";
>> + case HV_STATUS_NO_RESOURCES:
>> + return "HV_STATUS_NO_RESOURCES";
>> + case HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED:
>> + return "HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED";
>> + case HV_STATUS_INVALID_LP_INDEX:
>> + return "HV_STATUS_INVALID_LP_INDEX";
>> + case HV_STATUS_INVALID_REGISTER_VALUE:
>> + return "HV_STATUS_INVALID_REGISTER_VALUE";
>> + case HV_STATUS_OPERATION_FAILED:
>> + return "HV_STATUS_OPERATION_FAILED";
>> + case HV_STATUS_TIME_OUT:
>> + return "HV_STATUS_TIME_OUT";
>> + case HV_STATUS_CALL_PENDING:
>> + return "HV_STATUS_CALL_PENDING";
>> + case HV_STATUS_VTL_ALREADY_ENABLED:
>> + return "HV_STATUS_VTL_ALREADY_ENABLED";
>> + default:
>> + return "Unknown";
>> + };
>> + return "Unknown";
>
> Unnecessary extra return since the default case already returns "Unknown"
>
Good point, I think I'd prefer to remove the first return and leave the
default case empty.
>> +}
>> +EXPORT_SYMBOL_GPL(hv_result_to_string);
>> +
>
> Extra line here ^
>
Thanks
>> diff --git a/drivers/hv/hv_proc.c b/drivers/hv/hv_proc.c
>> index 2fae18e4f7d2..8fc30f509fa7 100644
>> --- a/drivers/hv/hv_proc.c
>> +++ b/drivers/hv/hv_proc.c
>> @@ -87,7 +87,8 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
>> page_count, 0, input_page, NULL);
>> local_irq_restore(flags);
>> if (!hv_result_success(status)) {
>> - pr_err("Failed to deposit pages: %lld\n", status);
>> + pr_err("%s: Failed to deposit pages: %s\n", __func__,
>> + hv_result_to_string(status));
>> ret = hv_result_to_errno(status);
>> goto err_free_allocations;
>> }
>> @@ -137,8 +138,9 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
>>
>> if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
>> if (!hv_result_success(status)) {
>> - pr_err("%s: cpu %u apic ID %u, %lld\n", __func__,
>> - lp_index, apic_id, status);
>> + pr_err("%s: cpu %u apic ID %u, %s\n",
>> + __func__, lp_index, apic_id,
>> + hv_result_to_string(status));
>> ret = hv_result_to_errno(status);
>> }
>> break;
>> @@ -179,8 +181,9 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
>>
>> if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
>> if (!hv_result_success(status)) {
>> - pr_err("%s: vcpu %u, lp %u, %lld\n", __func__,
>> - vp_index, flags, status);
>> + pr_err("%s: vcpu %u, lp %u, %s\n",
>> + __func__, vp_index, flags,
>> + hv_result_to_string(status));
>> ret = hv_result_to_errno(status);
>> }
>> break;
>
> There are more convertible instances in arch/x86/hyperv/irqdomain.c and drivers/iommu/hyperv-iommu.c
>
Ah, thank you, happy to add those!
>> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
>> index b13b0cda4ac8..dc4729dba9ef 100644
>> --- a/include/asm-generic/mshyperv.h
>> +++ b/include/asm-generic/mshyperv.h
>> @@ -298,6 +298,7 @@ static inline int cpumask_to_vpset_skip(struct hv_vpset *vpset,
>> return __cpumask_to_vpset(vpset, cpus, func);
>> }
>>
>> +const char *hv_result_to_string(u64 hv_status);
>> int hv_result_to_errno(u64 status);
>> void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die);
>> bool hv_is_hyperv_initialized(void);
On Wed, Feb 26, 2025 at 03:07:55PM -0800, Nuno Das Neves wrote: > Introduce hv_result_to_string() for this purpose. This allows > hypercall failures to be debugged more easily with dmesg. > > Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com> > --- > drivers/hv/hv_common.c | 65 ++++++++++++++++++++++++++++++++++ > drivers/hv/hv_proc.c | 13 ++++--- > include/asm-generic/mshyperv.h | 1 + > 3 files changed, 74 insertions(+), 5 deletions(-) > Reviewed-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
© 2016 - 2026 Red Hat, Inc.