From: Michael Kelley <mhklinux@outlook.com>
Update hypercall call sites to use the new hv_setup_*() functions
to set up hypercall arguments. Since these functions zero the
fixed portion of input memory, remove now redundant calls to memset()
and explicit zero'ing of input fields.
Signed-off-by: Michael Kelley <mhklinux@outlook.com>
Reviewed-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
---
Notes:
Changes in v4:
* Rename hv_hvcall_*() functions to hv_setup_*() [Easwar Hariharan]
* Rename hv_hvcall_in_batch_size() to hv_get_input_batch_size()
[Easwar Hariharan]
Changes in v2:
* Fixed get_vtl() and hv_vtl_apicid_to_vp_id() to properly treat the input
and output arguments as arrays [Nuno Das Neves]
* Enhanced __send_ipi_mask_ex() and hv_map_interrupt() to check the number
of computed banks in the hv_vpset against the batch_size. Since an
hv_vpset currently represents a maximum of 4096 CPUs, the hv_vpset size
does not exceed 512 bytes and there should always be sufficent space. But
do the check just in case something changes. [Nuno Das Neves]
arch/x86/hyperv/hv_apic.c | 10 ++++------
arch/x86/hyperv/hv_init.c | 6 ++----
arch/x86/hyperv/hv_vtl.c | 3 +--
arch/x86/hyperv/irqdomain.c | 17 ++++++++++-------
4 files changed, 17 insertions(+), 19 deletions(-)
diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index bfde0a3498b9..bafb5dceb5d6 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -109,21 +109,19 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
{
struct hv_send_ipi_ex *ipi_arg;
unsigned long flags;
- int nr_bank = 0;
+ int batch_size, nr_bank = 0;
u64 status = HV_STATUS_INVALID_PARAMETER;
if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
return false;
local_irq_save(flags);
- ipi_arg = *this_cpu_ptr(hyperv_pcpu_input_arg);
-
+ batch_size = hv_setup_in_array(&ipi_arg, sizeof(*ipi_arg),
+ sizeof(ipi_arg->vp_set.bank_contents[0]));
if (unlikely(!ipi_arg))
goto ipi_mask_ex_done;
ipi_arg->vector = vector;
- ipi_arg->reserved = 0;
- ipi_arg->vp_set.valid_bank_mask = 0;
/*
* Use HV_GENERIC_SET_ALL and avoid converting cpumask to VP_SET
@@ -140,7 +138,7 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
* represented in VP_SET. Return an error and fall back to
* native (architectural) method of sending IPIs.
*/
- if (nr_bank <= 0)
+ if (nr_bank <= 0 || nr_bank > batch_size)
goto ipi_mask_ex_done;
} else {
ipi_arg->vp_set.format = HV_GENERIC_SET_ALL;
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index afdbda2dd7b7..b7a2877c2a92 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -685,13 +685,11 @@ int hv_apicid_to_vp_index(u32 apic_id)
local_irq_save(irq_flags);
- input = *this_cpu_ptr(hyperv_pcpu_input_arg);
- memset(input, 0, sizeof(*input));
+ hv_setup_inout_array(&input, sizeof(*input), sizeof(input->apic_ids[0]),
+ &output, 0, sizeof(*output));
input->partition_id = HV_PARTITION_ID_SELF;
input->apic_ids[0] = apic_id;
- output = *this_cpu_ptr(hyperv_pcpu_output_arg);
-
control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_INDEX_FROM_APIC_ID;
status = hv_do_hypercall(control, input, output);
ret = output[0];
diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c
index 042e8712d8de..fc523a5096f4 100644
--- a/arch/x86/hyperv/hv_vtl.c
+++ b/arch/x86/hyperv/hv_vtl.c
@@ -131,8 +131,7 @@ static int hv_vtl_bringup_vcpu(u32 target_vp_index, int cpu, u64 eip_ignored)
local_irq_save(irq_flags);
- input = *this_cpu_ptr(hyperv_pcpu_input_arg);
- memset(input, 0, sizeof(*input));
+ hv_setup_in(&input, sizeof(*input));
input->partition_id = HV_PARTITION_ID_SELF;
input->vp_index = target_vp_index;
diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c
index 090f5ac9f492..87ebe43f58cf 100644
--- a/arch/x86/hyperv/irqdomain.c
+++ b/arch/x86/hyperv/irqdomain.c
@@ -21,15 +21,15 @@ static int hv_map_interrupt(union hv_device_id device_id, bool level,
struct hv_device_interrupt_descriptor *intr_desc;
unsigned long flags;
u64 status;
- int nr_bank, var_size;
+ int batch_size, nr_bank, var_size;
local_irq_save(flags);
- input = *this_cpu_ptr(hyperv_pcpu_input_arg);
- output = *this_cpu_ptr(hyperv_pcpu_output_arg);
+ batch_size = hv_setup_inout_array(&input, sizeof(*input),
+ sizeof(input->interrupt_descriptor.target.vp_set.bank_contents[0]),
+ &output, sizeof(*output), 0);
intr_desc = &input->interrupt_descriptor;
- memset(input, 0, sizeof(*input));
input->partition_id = hv_current_partition_id;
input->device_id = device_id.as_uint64;
intr_desc->interrupt_type = HV_X64_INTERRUPT_TYPE_FIXED;
@@ -41,7 +41,6 @@ static int hv_map_interrupt(union hv_device_id device_id, bool level,
else
intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_EDGE;
- intr_desc->target.vp_set.valid_bank_mask = 0;
intr_desc->target.vp_set.format = HV_GENERIC_SET_SPARSE_4K;
nr_bank = cpumask_to_vpset(&(intr_desc->target.vp_set), cpumask_of(cpu));
if (nr_bank < 0) {
@@ -49,6 +48,11 @@ static int hv_map_interrupt(union hv_device_id device_id, bool level,
pr_err("%s: unable to generate VP set\n", __func__);
return -EINVAL;
}
+ if (nr_bank > batch_size) {
+ local_irq_restore(flags);
+ pr_err("%s: nr_bank too large\n", __func__);
+ return -EINVAL;
+ }
intr_desc->target.flags = HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET;
/*
@@ -78,9 +82,8 @@ static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry)
u64 status;
local_irq_save(flags);
- input = *this_cpu_ptr(hyperv_pcpu_input_arg);
- memset(input, 0, sizeof(*input));
+ hv_setup_in(&input, sizeof(*input));
intr_entry = &input->interrupt_entry;
input->partition_id = hv_current_partition_id;
input->device_id = id;
--
2.25.1
On 7/17/2025 11:55 PM, mhkelley58@gmail.com wrote: > From: Michael Kelley <mhklinux@outlook.com> > > Update hypercall call sites to use the new hv_setup_*() functions > to set up hypercall arguments. Since these functions zero the > fixed portion of input memory, remove now redundant calls to memset() > and explicit zero'ing of input fields. > > Signed-off-by: Michael Kelley <mhklinux@outlook.com> > Reviewed-by: Nuno Das Neves <nunodasneves@linux.microsoft.com> > --- > > Notes: > Changes in v4: > * Rename hv_hvcall_*() functions to hv_setup_*() [Easwar Hariharan] > * Rename hv_hvcall_in_batch_size() to hv_get_input_batch_size() > [Easwar Hariharan] > > Changes in v2: > * Fixed get_vtl() and hv_vtl_apicid_to_vp_id() to properly treat the input > and output arguments as arrays [Nuno Das Neves] > * Enhanced __send_ipi_mask_ex() and hv_map_interrupt() to check the number > of computed banks in the hv_vpset against the batch_size. Since an > hv_vpset currently represents a maximum of 4096 CPUs, the hv_vpset size > does not exceed 512 bytes and there should always be sufficent space. But > do the check just in case something changes. [Nuno Das Neves] > <snip> > diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c > index 090f5ac9f492..87ebe43f58cf 100644 > --- a/arch/x86/hyperv/irqdomain.c > +++ b/arch/x86/hyperv/irqdomain.c > @@ -21,15 +21,15 @@ static int hv_map_interrupt(union hv_device_id device_id, bool level, > struct hv_device_interrupt_descriptor *intr_desc; > unsigned long flags; > u64 status; > - int nr_bank, var_size; > + int batch_size, nr_bank, var_size; > > local_irq_save(flags); > > - input = *this_cpu_ptr(hyperv_pcpu_input_arg); > - output = *this_cpu_ptr(hyperv_pcpu_output_arg); > + batch_size = hv_setup_inout_array(&input, sizeof(*input), > + sizeof(input->interrupt_descriptor.target.vp_set.bank_contents[0]), > + &output, sizeof(*output), 0); > Hi Michael, I finally managed to test this series on (nested) root partition and encountered an issue when I applied this patch. With the above change, I saw HV_STATUS_INVALID_ALIGNMENT from this hypercall. I printed out the addresses and sizes and everything looked correct. The output seemed to be correctly placed at the end of the percpu page. E.g. if input was allocated at an address ending in 0x3000, output would be at 0x3ff0, because hv_output_map_device_interrupt is 0x10 bytes in size. But it turns out, the definition for hv_output_map_device_interrupt is out of date (or was never correct)! It should be: struct hv_output_map_device_interrupt { struct hv_interrupt_entry interrupt_entry; u64 extended_status_deprecated[5]; } __packed; (The "extended_status_deprecated" field is missing in the current code.) Due to this, when the hypervisor validates the hypercall input/output, it sees that output is going across a page boundary, because it knows sizeof(hv_output_map_device_interrupt) is actually 0x58. I confirmed that adding the "extended_status_deprecated" field fixes the issue. That should be fixed either as part of this patch or an additional one. Nuno PS. I have yet to test the mshv driver changes in patch 6, I'll try to do so this week. > intr_desc = &input->interrupt_descriptor; > - memset(input, 0, sizeof(*input)); > input->partition_id = hv_current_partition_id; > input->device_id = device_id.as_uint64; > intr_desc->interrupt_type = HV_X64_INTERRUPT_TYPE_FIXED; > @@ -41,7 +41,6 @@ static int hv_map_interrupt(union hv_device_id device_id, bool level, > else > intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_EDGE; > > - intr_desc->target.vp_set.valid_bank_mask = 0; > intr_desc->target.vp_set.format = HV_GENERIC_SET_SPARSE_4K; > nr_bank = cpumask_to_vpset(&(intr_desc->target.vp_set), cpumask_of(cpu)); > if (nr_bank < 0) { > @@ -49,6 +48,11 @@ static int hv_map_interrupt(union hv_device_id device_id, bool level, > pr_err("%s: unable to generate VP set\n", __func__); > return -EINVAL; > } > + if (nr_bank > batch_size) { > + local_irq_restore(flags); > + pr_err("%s: nr_bank too large\n", __func__); > + return -EINVAL; > + } > intr_desc->target.flags = HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET; > > /* > @@ -78,9 +82,8 @@ static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry) > u64 status; > > local_irq_save(flags); > - input = *this_cpu_ptr(hyperv_pcpu_input_arg); > > - memset(input, 0, sizeof(*input)); > + hv_setup_in(&input, sizeof(*input)); > intr_entry = &input->interrupt_entry; > input->partition_id = hv_current_partition_id; > input->device_id = id;
On Tue, Aug 12, 2025 at 05:22:29PM -0700, Nuno Das Neves wrote: > On 7/17/2025 11:55 PM, mhkelley58@gmail.com wrote: > > From: Michael Kelley <mhklinux@outlook.com> > > > > Update hypercall call sites to use the new hv_setup_*() functions > > to set up hypercall arguments. Since these functions zero the > > fixed portion of input memory, remove now redundant calls to memset() > > and explicit zero'ing of input fields. > > > > Signed-off-by: Michael Kelley <mhklinux@outlook.com> > > Reviewed-by: Nuno Das Neves <nunodasneves@linux.microsoft.com> > > --- > > > > Notes: > > Changes in v4: > > * Rename hv_hvcall_*() functions to hv_setup_*() [Easwar Hariharan] > > * Rename hv_hvcall_in_batch_size() to hv_get_input_batch_size() > > [Easwar Hariharan] > > > > Changes in v2: > > * Fixed get_vtl() and hv_vtl_apicid_to_vp_id() to properly treat the input > > and output arguments as arrays [Nuno Das Neves] > > * Enhanced __send_ipi_mask_ex() and hv_map_interrupt() to check the number > > of computed banks in the hv_vpset against the batch_size. Since an > > hv_vpset currently represents a maximum of 4096 CPUs, the hv_vpset size > > does not exceed 512 bytes and there should always be sufficent space. But > > do the check just in case something changes. [Nuno Das Neves] > > > > <snip> > > > diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c > > index 090f5ac9f492..87ebe43f58cf 100644 > > --- a/arch/x86/hyperv/irqdomain.c > > +++ b/arch/x86/hyperv/irqdomain.c > > @@ -21,15 +21,15 @@ static int hv_map_interrupt(union hv_device_id device_id, bool level, > > struct hv_device_interrupt_descriptor *intr_desc; > > unsigned long flags; > > u64 status; > > - int nr_bank, var_size; > > + int batch_size, nr_bank, var_size; > > > > local_irq_save(flags); > > > > - input = *this_cpu_ptr(hyperv_pcpu_input_arg); > > - output = *this_cpu_ptr(hyperv_pcpu_output_arg); > > + batch_size = hv_setup_inout_array(&input, sizeof(*input), > > + sizeof(input->interrupt_descriptor.target.vp_set.bank_contents[0]), > > + &output, sizeof(*output), 0); > > > > Hi Michael, I finally managed to test this series on (nested) root > partition and encountered an issue when I applied this patch. > > With the above change, I saw HV_STATUS_INVALID_ALIGNMENT from this > hypercall. I printed out the addresses and sizes and everything looked > correct. The output seemed to be correctly placed at the end of the > percpu page. E.g. if input was allocated at an address ending in 0x3000, > output would be at 0x3ff0, because hv_output_map_device_interrupt is > 0x10 bytes in size. > > But it turns out, the definition for hv_output_map_device_interrupt > is out of date (or was never correct)! It should be: > > struct hv_output_map_device_interrupt { > struct hv_interrupt_entry interrupt_entry; > u64 extended_status_deprecated[5]; > } __packed; > > (The "extended_status_deprecated" field is missing in the current code.) > > Due to this, when the hypervisor validates the hypercall input/output, > it sees that output is going across a page boundary, because it knows > sizeof(hv_output_map_device_interrupt) is actually 0x58. > > I confirmed that adding the "extended_status_deprecated" field fixes the > issue. That should be fixed either as part of this patch or an additional > one. Thanks for testing this, Nuno. In that case, can you please submit a patch for hv_output_map_device_interrupt? That can go in via the fixes tree. Thanks, Wei > > Nuno > > PS. I have yet to test the mshv driver changes in patch 6, I'll try to > do so this week. > > > intr_desc = &input->interrupt_descriptor; > > - memset(input, 0, sizeof(*input)); > > input->partition_id = hv_current_partition_id; > > input->device_id = device_id.as_uint64; > > intr_desc->interrupt_type = HV_X64_INTERRUPT_TYPE_FIXED; > > @@ -41,7 +41,6 @@ static int hv_map_interrupt(union hv_device_id device_id, bool level, > > else > > intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_EDGE; > > > > - intr_desc->target.vp_set.valid_bank_mask = 0; > > intr_desc->target.vp_set.format = HV_GENERIC_SET_SPARSE_4K; > > nr_bank = cpumask_to_vpset(&(intr_desc->target.vp_set), cpumask_of(cpu)); > > if (nr_bank < 0) { > > @@ -49,6 +48,11 @@ static int hv_map_interrupt(union hv_device_id device_id, bool level, > > pr_err("%s: unable to generate VP set\n", __func__); > > return -EINVAL; > > } > > + if (nr_bank > batch_size) { > > + local_irq_restore(flags); > > + pr_err("%s: nr_bank too large\n", __func__); > > + return -EINVAL; > > + } > > intr_desc->target.flags = HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET; > > > > /* > > @@ -78,9 +82,8 @@ static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry) > > u64 status; > > > > local_irq_save(flags); > > - input = *this_cpu_ptr(hyperv_pcpu_input_arg); > > > > - memset(input, 0, sizeof(*input)); > > + hv_setup_in(&input, sizeof(*input)); > > intr_entry = &input->interrupt_entry; > > input->partition_id = hv_current_partition_id; > > input->device_id = id; > >
On 8/12/2025 7:41 PM, Wei Liu wrote: > On Tue, Aug 12, 2025 at 05:22:29PM -0700, Nuno Das Neves wrote: >> On 7/17/2025 11:55 PM, mhkelley58@gmail.com wrote: >>> From: Michael Kelley <mhklinux@outlook.com> >>> >>> Update hypercall call sites to use the new hv_setup_*() functions >>> to set up hypercall arguments. Since these functions zero the >>> fixed portion of input memory, remove now redundant calls to memset() >>> and explicit zero'ing of input fields. >>> >>> Signed-off-by: Michael Kelley <mhklinux@outlook.com> >>> Reviewed-by: Nuno Das Neves <nunodasneves@linux.microsoft.com> >>> --- >>> >>> Notes: >>> Changes in v4: >>> * Rename hv_hvcall_*() functions to hv_setup_*() [Easwar Hariharan] >>> * Rename hv_hvcall_in_batch_size() to hv_get_input_batch_size() >>> [Easwar Hariharan] >>> >>> Changes in v2: >>> * Fixed get_vtl() and hv_vtl_apicid_to_vp_id() to properly treat the input >>> and output arguments as arrays [Nuno Das Neves] >>> * Enhanced __send_ipi_mask_ex() and hv_map_interrupt() to check the number >>> of computed banks in the hv_vpset against the batch_size. Since an >>> hv_vpset currently represents a maximum of 4096 CPUs, the hv_vpset size >>> does not exceed 512 bytes and there should always be sufficent space. But >>> do the check just in case something changes. [Nuno Das Neves] >>> >> >> <snip> >> >>> diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c >>> index 090f5ac9f492..87ebe43f58cf 100644 >>> --- a/arch/x86/hyperv/irqdomain.c >>> +++ b/arch/x86/hyperv/irqdomain.c >>> @@ -21,15 +21,15 @@ static int hv_map_interrupt(union hv_device_id device_id, bool level, >>> struct hv_device_interrupt_descriptor *intr_desc; >>> unsigned long flags; >>> u64 status; >>> - int nr_bank, var_size; >>> + int batch_size, nr_bank, var_size; >>> >>> local_irq_save(flags); >>> >>> - input = *this_cpu_ptr(hyperv_pcpu_input_arg); >>> - output = *this_cpu_ptr(hyperv_pcpu_output_arg); >>> + batch_size = hv_setup_inout_array(&input, sizeof(*input), >>> + sizeof(input->interrupt_descriptor.target.vp_set.bank_contents[0]), >>> + &output, sizeof(*output), 0); >>> >> >> Hi Michael, I finally managed to test this series on (nested) root >> partition and encountered an issue when I applied this patch. >> >> With the above change, I saw HV_STATUS_INVALID_ALIGNMENT from this >> hypercall. I printed out the addresses and sizes and everything looked >> correct. The output seemed to be correctly placed at the end of the >> percpu page. E.g. if input was allocated at an address ending in 0x3000, >> output would be at 0x3ff0, because hv_output_map_device_interrupt is >> 0x10 bytes in size. >> >> But it turns out, the definition for hv_output_map_device_interrupt >> is out of date (or was never correct)! It should be: >> >> struct hv_output_map_device_interrupt { >> struct hv_interrupt_entry interrupt_entry; >> u64 extended_status_deprecated[5]; >> } __packed; >> >> (The "extended_status_deprecated" field is missing in the current code.) >> >> Due to this, when the hypervisor validates the hypercall input/output, >> it sees that output is going across a page boundary, because it knows >> sizeof(hv_output_map_device_interrupt) is actually 0x58. >> >> I confirmed that adding the "extended_status_deprecated" field fixes the >> issue. That should be fixed either as part of this patch or an additional >> one. > > Thanks for testing this, Nuno. In that case, can you please submit a > patch for hv_output_map_device_interrupt? That can go in via the fixes > tree. > > Thanks, > Wei > Sent the fix: https://lore.kernel.org/linux-hyperv/1755109257-6893-1-git-send-email-nunodasneves@linux.microsoft.com/T/#u >> >> Nuno >> >> PS. I have yet to test the mshv driver changes in patch 6, I'll try to >> do so this week. >> >>> intr_desc = &input->interrupt_descriptor; >>> - memset(input, 0, sizeof(*input)); >>> input->partition_id = hv_current_partition_id; >>> input->device_id = device_id.as_uint64; >>> intr_desc->interrupt_type = HV_X64_INTERRUPT_TYPE_FIXED; >>> @@ -41,7 +41,6 @@ static int hv_map_interrupt(union hv_device_id device_id, bool level, >>> else >>> intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_EDGE; >>> >>> - intr_desc->target.vp_set.valid_bank_mask = 0; >>> intr_desc->target.vp_set.format = HV_GENERIC_SET_SPARSE_4K; >>> nr_bank = cpumask_to_vpset(&(intr_desc->target.vp_set), cpumask_of(cpu)); >>> if (nr_bank < 0) { >>> @@ -49,6 +48,11 @@ static int hv_map_interrupt(union hv_device_id device_id, bool level, >>> pr_err("%s: unable to generate VP set\n", __func__); >>> return -EINVAL; >>> } >>> + if (nr_bank > batch_size) { >>> + local_irq_restore(flags); >>> + pr_err("%s: nr_bank too large\n", __func__); >>> + return -EINVAL; >>> + } >>> intr_desc->target.flags = HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET; >>> >>> /* >>> @@ -78,9 +82,8 @@ static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry) >>> u64 status; >>> >>> local_irq_save(flags); >>> - input = *this_cpu_ptr(hyperv_pcpu_input_arg); >>> >>> - memset(input, 0, sizeof(*input)); >>> + hv_setup_in(&input, sizeof(*input)); >>> intr_entry = &input->interrupt_entry; >>> input->partition_id = hv_current_partition_id; >>> input->device_id = id; >> >>
© 2016 - 2025 Red Hat, Inc.