[PATCH] x86/hvm: Add APIC IDs to the per-vLAPIC save area

Alejandro Vallejo posted 1 patch 8 months, 2 weeks ago
Patches applied successfully (tree, apply log)
git fetch https://gitlab.com/xen-project/patchew/xen tags/patchew/20250218142259.6697-1-alejandro.vallejo@cloud.com
xen/arch/x86/cpuid.c                   | 21 ++++++++++-----------
xen/arch/x86/hvm/vlapic.c              | 16 ++++++++++++++--
xen/arch/x86/include/asm/hvm/vlapic.h  |  1 +
xen/include/public/arch-x86/hvm/save.h |  2 ++
4 files changed, 27 insertions(+), 13 deletions(-)
[PATCH] x86/hvm: Add APIC IDs to the per-vLAPIC save area
Posted by Alejandro Vallejo 8 months, 2 weeks ago
Today, Xen hardcodes apic_id = vcpu_id * 2, but this is unwise and
interferes with providing accurate topology information to the guest.

Introduce a new x2apic_id field into hvm_hw_lapic.  This is immutable
state from the guest's point of view, but it will allow the toolstack to
eventually configure the value, and for the value to move on migrate.

For backwards compatibility, the patch rebuilds the old-style APIC IDs
from migration streams lacking them when they aren't present.

Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
---
I've split this one from the rest of the topology series as it's independent
and entangled with another patch from Andrew.

Changes since v7 of the topology series:
  * Minor changes to commit message and some comments in the code.
    * Notably all references to 4.19 are now 4.20 and "Nov. 2024" is now
      "Feb. 2025".
  * Moved x2APIC ID recovery to the hidden state context handler.
  * s/rsvd_zero/_rsvd0/
---
 xen/arch/x86/cpuid.c                   | 21 ++++++++++-----------
 xen/arch/x86/hvm/vlapic.c              | 16 ++++++++++++++--
 xen/arch/x86/include/asm/hvm/vlapic.h  |  1 +
 xen/include/public/arch-x86/hvm/save.h |  2 ++
 4 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
index 2a777436ee27..f33f7bd2069f 100644
--- a/xen/arch/x86/cpuid.c
+++ b/xen/arch/x86/cpuid.c
@@ -138,10 +138,10 @@ void guest_cpuid(const struct vcpu *v, uint32_t leaf,
         const struct cpu_user_regs *regs;
 
     case 0x1:
-        /* TODO: Rework topology logic. */
         res->b &= 0x00ffffffu;
         if ( is_hvm_domain(d) )
-            res->b |= (v->vcpu_id * 2) << 24;
+            /* Large systems do wrap around 255 in the xAPIC_ID field. */
+            res->b |= vlapic_x2apic_id(vcpu_vlapic(v)) << 24;
 
         /* TODO: Rework vPMU control in terms of toolstack choices. */
         if ( vpmu_available(v) &&
@@ -310,19 +310,18 @@ void guest_cpuid(const struct vcpu *v, uint32_t leaf,
         break;
 
     case 0xb:
-        /*
-         * In principle, this leaf is Intel-only.  In practice, it is tightly
-         * coupled with x2apic, and we offer an x2apic-capable APIC emulation
-         * to guests on AMD hardware as well.
-         *
-         * TODO: Rework topology logic.
-         */
         if ( p->basic.x2apic )
         {
             *(uint8_t *)&res->c = subleaf;
 
-            /* Fix the x2APIC identifier. */
-            res->d = v->vcpu_id * 2;
+            /*
+             * The x2APIC ID is per-vCPU, and fixed irrespective of the
+             * requested subleaf. Xen 4.20 and earlier leaked x2APIC into PV
+             * guests. The value shown is nonsensical but kept as-was during
+             * the Xen 4.21 dev cycle (Feb. 2025) for compatibility.
+             */
+            res->d = is_hvm_domain(d) ? vlapic_x2apic_id(vcpu_vlapic(v))
+                                      : 2 * v->vcpu_id;
         }
         break;
 
diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c
index 3363926b487b..61c4aadd95e3 100644
--- a/xen/arch/x86/hvm/vlapic.c
+++ b/xen/arch/x86/hvm/vlapic.c
@@ -1090,7 +1090,7 @@ static uint32_t x2apic_ldr_from_id(uint32_t id)
 static void set_x2apic_id(struct vlapic *vlapic)
 {
     const struct vcpu *v = vlapic_vcpu(vlapic);
-    uint32_t apic_id = v->vcpu_id * 2;
+    uint32_t apic_id = vlapic_x2apic_id(vlapic);
     uint32_t apic_ldr = x2apic_ldr_from_id(apic_id);
 
     /*
@@ -1470,7 +1470,7 @@ void vlapic_reset(struct vlapic *vlapic)
     if ( v->vcpu_id == 0 )
         vlapic->hw.apic_base_msr |= APIC_BASE_BSP;
 
-    vlapic_set_reg(vlapic, APIC_ID, (v->vcpu_id * 2) << 24);
+    vlapic_set_reg(vlapic, APIC_ID, SET_xAPIC_ID(vlapic_x2apic_id(vlapic)));
     vlapic_do_init(vlapic);
 }
 
@@ -1606,6 +1606,9 @@ static int cf_check lapic_check_hidden(const struct domain *d,
          APIC_BASE_EXTD )
         return -EINVAL;
 
+    if ( s._rsvd0 )
+        return -EINVAL;
+
     return 0;
 }
 
@@ -1621,6 +1624,14 @@ static int cf_check lapic_load_hidden(struct domain *d, hvm_domain_context_t *h)
         return -EINVAL;
     }
 
+    /*
+     * Xen 4.20 and earlier had no x2APIC ID in the migration stream and
+     * hard-coded "vcpu_id * 2". Default back to this if we have a
+     * zero-extended record.
+     */
+    if ( h->size <= offsetof(struct hvm_hw_lapic, x2apic_id) )
+        s->hw.x2apic_id = v->vcpu_id * 2;
+
     s->loaded.hw = 1;
     if ( s->loaded.regs )
         lapic_load_fixup(s);
@@ -1687,6 +1698,7 @@ int vlapic_init(struct vcpu *v)
     }
 
     vlapic->pt.source = PTSRC_lapic;
+    vlapic->hw.x2apic_id = 2 * v->vcpu_id;
 
     vlapic->regs_page = alloc_domheap_page(v->domain, MEMF_no_owner);
     if ( !vlapic->regs_page )
diff --git a/xen/arch/x86/include/asm/hvm/vlapic.h b/xen/arch/x86/include/asm/hvm/vlapic.h
index 2c4ff94ae7a8..85c4a236b9f6 100644
--- a/xen/arch/x86/include/asm/hvm/vlapic.h
+++ b/xen/arch/x86/include/asm/hvm/vlapic.h
@@ -44,6 +44,7 @@
 #define vlapic_xapic_mode(vlapic)                               \
     (!vlapic_hw_disabled(vlapic) && \
      !((vlapic)->hw.apic_base_msr & APIC_BASE_EXTD))
+#define vlapic_x2apic_id(vlapic) ((vlapic)->hw.x2apic_id)
 
 /*
  * Generic APIC bitmap vector update & search routines.
diff --git a/xen/include/public/arch-x86/hvm/save.h b/xen/include/public/arch-x86/hvm/save.h
index 7ecacadde165..a70d46811ab5 100644
--- a/xen/include/public/arch-x86/hvm/save.h
+++ b/xen/include/public/arch-x86/hvm/save.h
@@ -394,6 +394,8 @@ struct hvm_hw_lapic {
     uint32_t             disabled; /* VLAPIC_xx_DISABLED */
     uint32_t             timer_divisor;
     uint64_t             tdt_msr;
+    uint32_t             x2apic_id;
+    uint32_t             _rsvd0;
 };
 
 DECLARE_HVM_SAVE_TYPE(LAPIC, 5, struct hvm_hw_lapic);
-- 
2.48.1


Re: [PATCH] x86/hvm: Add APIC IDs to the per-vLAPIC save area
Posted by Jan Beulich 8 months, 1 week ago
On 18.02.2025 15:22, Alejandro Vallejo wrote:
> Today, Xen hardcodes apic_id = vcpu_id * 2, but this is unwise and
> interferes with providing accurate topology information to the guest.
> 
> Introduce a new x2apic_id field into hvm_hw_lapic.  This is immutable
> state from the guest's point of view, but it will allow the toolstack to
> eventually configure the value, and for the value to move on migrate.
> 
> For backwards compatibility, the patch rebuilds the old-style APIC IDs
> from migration streams lacking them when they aren't present.

Nit: "when they aren't present" looks to duplicate "lacking them"?

> Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
> ---
> I've split this one from the rest of the topology series as it's independent
> and entangled with another patch from Andrew.

Albeit I think meanwhile we've settled that the entangling isn't quite as
problematic.

> @@ -1621,6 +1624,14 @@ static int cf_check lapic_load_hidden(struct domain *d, hvm_domain_context_t *h)
>          return -EINVAL;
>      }
>  
> +    /*
> +     * Xen 4.20 and earlier had no x2APIC ID in the migration stream and
> +     * hard-coded "vcpu_id * 2". Default back to this if we have a
> +     * zero-extended record.
> +     */
> +    if ( h->size <= offsetof(struct hvm_hw_lapic, x2apic_id) )
> +        s->hw.x2apic_id = v->vcpu_id * 2;

While we better wouldn't get to see such input, it is in principle possible
to have an input stream with, say, half the field. Imo the condition ought
to be such that we'd make the adjustment when less than the full field is
available.

Jan

Re: [PATCH] x86/hvm: Add APIC IDs to the per-vLAPIC save area
Posted by Alejandro Vallejo 8 months ago
On Wed Feb 26, 2025 at 1:11 PM GMT, Jan Beulich wrote:
> On 18.02.2025 15:22, Alejandro Vallejo wrote:
> > Today, Xen hardcodes apic_id = vcpu_id * 2, but this is unwise and
> > interferes with providing accurate topology information to the guest.
> > 
> > Introduce a new x2apic_id field into hvm_hw_lapic.  This is immutable
> > state from the guest's point of view, but it will allow the toolstack to
> > eventually configure the value, and for the value to move on migrate.
> > 
> > For backwards compatibility, the patch rebuilds the old-style APIC IDs
> > from migration streams lacking them when they aren't present.
>
> Nit: "when they aren't present" looks to duplicate "lacking them"?

Indeed, I'll get rid of the former.

>
> > Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
> > ---
> > I've split this one from the rest of the topology series as it's independent
> > and entangled with another patch from Andrew.
>
> Albeit I think meanwhile we've settled that the entangling isn't quite as
> problematic.
>
> > @@ -1621,6 +1624,14 @@ static int cf_check lapic_load_hidden(struct domain *d, hvm_domain_context_t *h)
> >          return -EINVAL;
> >      }
> >  
> > +    /*
> > +     * Xen 4.20 and earlier had no x2APIC ID in the migration stream and
> > +     * hard-coded "vcpu_id * 2". Default back to this if we have a
> > +     * zero-extended record.
> > +     */
> > +    if ( h->size <= offsetof(struct hvm_hw_lapic, x2apic_id) )
> > +        s->hw.x2apic_id = v->vcpu_id * 2;
>
> While we better wouldn't get to see such input, it is in principle possible
> to have an input stream with, say, half the field. Imo the condition ought
> to be such that we'd make the adjustment when less than the full field is
> available.
>
> Jan

I think this is addressed by Roger's proposal later on, so I'll leave it at
that. Thanks.

Cheers,
Alejandro
Re: [PATCH] x86/hvm: Add APIC IDs to the per-vLAPIC save area
Posted by Roger Pau Monné 8 months, 1 week ago
On Wed, Feb 26, 2025 at 02:11:23PM +0100, Jan Beulich wrote:
> On 18.02.2025 15:22, Alejandro Vallejo wrote:
> > Today, Xen hardcodes apic_id = vcpu_id * 2, but this is unwise and
> > interferes with providing accurate topology information to the guest.
> > 
> > Introduce a new x2apic_id field into hvm_hw_lapic.  This is immutable
> > state from the guest's point of view, but it will allow the toolstack to
> > eventually configure the value, and for the value to move on migrate.
> > 
> > For backwards compatibility, the patch rebuilds the old-style APIC IDs
> > from migration streams lacking them when they aren't present.
> 
> Nit: "when they aren't present" looks to duplicate "lacking them"?
> 
> > Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
> > ---
> > I've split this one from the rest of the topology series as it's independent
> > and entangled with another patch from Andrew.
> 
> Albeit I think meanwhile we've settled that the entangling isn't quite as
> problematic.
> 
> > @@ -1621,6 +1624,14 @@ static int cf_check lapic_load_hidden(struct domain *d, hvm_domain_context_t *h)
> >          return -EINVAL;
> >      }
> >  
> > +    /*
> > +     * Xen 4.20 and earlier had no x2APIC ID in the migration stream and
> > +     * hard-coded "vcpu_id * 2". Default back to this if we have a
> > +     * zero-extended record.
> > +     */
> > +    if ( h->size <= offsetof(struct hvm_hw_lapic, x2apic_id) )
> > +        s->hw.x2apic_id = v->vcpu_id * 2;
> 
> While we better wouldn't get to see such input, it is in principle possible
> to have an input stream with, say, half the field. Imo the condition ought
> to be such that we'd make the adjustment when less than the full field is
> available.

I would add an additional check to ensure _rsvd0 remains 0, to avoid
further additions from attempting to reuse that padding space.

if ( s->hw._rsvd0 )
    return -EINVAL;

In fact I would be tempted to overwrite the ID if the stream size
doesn't match the expected one, ie:

if ( h->size < (offsetof(struct hvm_hw_lapic, _rsvd0) +
                sizeof(s->hw._rsvd0)) )
    s->hw.x2apic_id = v->vcpu_id * 2;

Regards, Roger.

Re: [PATCH] x86/hvm: Add APIC IDs to the per-vLAPIC save area
Posted by Alejandro Vallejo 8 months ago
Hi,

On Wed Feb 26, 2025 at 5:33 PM GMT, Roger Pau Monné wrote:
> On Wed, Feb 26, 2025 at 02:11:23PM +0100, Jan Beulich wrote:
> > On 18.02.2025 15:22, Alejandro Vallejo wrote:
> > > Today, Xen hardcodes apic_id = vcpu_id * 2, but this is unwise and
> > > interferes with providing accurate topology information to the guest.
> > > 
> > > Introduce a new x2apic_id field into hvm_hw_lapic.  This is immutable
> > > state from the guest's point of view, but it will allow the toolstack to
> > > eventually configure the value, and for the value to move on migrate.
> > > 
> > > For backwards compatibility, the patch rebuilds the old-style APIC IDs
> > > from migration streams lacking them when they aren't present.
> > 
> > Nit: "when they aren't present" looks to duplicate "lacking them"?
> > 
> > > Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
> > > ---
> > > I've split this one from the rest of the topology series as it's independent
> > > and entangled with another patch from Andrew.
> > 
> > Albeit I think meanwhile we've settled that the entangling isn't quite as
> > problematic.
> > 
> > > @@ -1621,6 +1624,14 @@ static int cf_check lapic_load_hidden(struct domain *d, hvm_domain_context_t *h)
> > >          return -EINVAL;
> > >      }
> > >  
> > > +    /*
> > > +     * Xen 4.20 and earlier had no x2APIC ID in the migration stream and
> > > +     * hard-coded "vcpu_id * 2". Default back to this if we have a
> > > +     * zero-extended record.
> > > +     */
> > > +    if ( h->size <= offsetof(struct hvm_hw_lapic, x2apic_id) )
> > > +        s->hw.x2apic_id = v->vcpu_id * 2;
> > 
> > While we better wouldn't get to see such input, it is in principle possible
> > to have an input stream with, say, half the field. Imo the condition ought
> > to be such that we'd make the adjustment when less than the full field is
> > available.
>
> I would add an additional check to ensure _rsvd0 remains 0, to avoid
> further additions from attempting to reuse that padding space.
>
> if ( s->hw._rsvd0 )
>     return -EINVAL;

That's already on lapic_check_hidden(), so it's guaranteed to be zero. Unless
you mean something else?

>
> if ( s->hw._rsvd0 )
>     return -EINVAL;
>
> In fact I would be tempted to overwrite the ID if the stream size
> doesn't match the expected one, ie:
>
> if ( h->size < (offsetof(struct hvm_hw_lapic, _rsvd0) +
>                 sizeof(s->hw._rsvd0)) )
>     s->hw.x2apic_id = v->vcpu_id * 2;

That looks better. I'll do that instead.

>
> Regards, Roger.

Cheers,
Alejandro
Re: [PATCH] x86/hvm: Add APIC IDs to the per-vLAPIC save area
Posted by Roger Pau Monné 8 months ago
On Thu, Feb 27, 2025 at 02:12:58PM +0000, Alejandro Vallejo wrote:
> Hi,
> 
> On Wed Feb 26, 2025 at 5:33 PM GMT, Roger Pau Monné wrote:
> > On Wed, Feb 26, 2025 at 02:11:23PM +0100, Jan Beulich wrote:
> > > On 18.02.2025 15:22, Alejandro Vallejo wrote:
> > > > @@ -1621,6 +1624,14 @@ static int cf_check lapic_load_hidden(struct domain *d, hvm_domain_context_t *h)
> > > >          return -EINVAL;
> > > >      }
> > > >  
> > > > +    /*
> > > > +     * Xen 4.20 and earlier had no x2APIC ID in the migration stream and
> > > > +     * hard-coded "vcpu_id * 2". Default back to this if we have a
> > > > +     * zero-extended record.
> > > > +     */
> > > > +    if ( h->size <= offsetof(struct hvm_hw_lapic, x2apic_id) )
> > > > +        s->hw.x2apic_id = v->vcpu_id * 2;
> > > 
> > > While we better wouldn't get to see such input, it is in principle possible
> > > to have an input stream with, say, half the field. Imo the condition ought
> > > to be such that we'd make the adjustment when less than the full field is
> > > available.
> >
> > I would add an additional check to ensure _rsvd0 remains 0, to avoid
> > further additions from attempting to reuse that padding space.
> >
> > if ( s->hw._rsvd0 )
> >     return -EINVAL;
> 
> That's already on lapic_check_hidden(), so it's guaranteed to be zero. Unless
> you mean something else?

Oh, I've missed that - it's indeed fine.  I was missing the previous
chunk when replying here and forgot about it.

Thanks, Roger.

Re: [PATCH] x86/hvm: Add APIC IDs to the per-vLAPIC save area
Posted by Jan Beulich 8 months, 1 week ago
On 26.02.2025 18:33, Roger Pau Monné wrote:
> On Wed, Feb 26, 2025 at 02:11:23PM +0100, Jan Beulich wrote:
>> On 18.02.2025 15:22, Alejandro Vallejo wrote:
>>> Today, Xen hardcodes apic_id = vcpu_id * 2, but this is unwise and
>>> interferes with providing accurate topology information to the guest.
>>>
>>> Introduce a new x2apic_id field into hvm_hw_lapic.  This is immutable
>>> state from the guest's point of view, but it will allow the toolstack to
>>> eventually configure the value, and for the value to move on migrate.
>>>
>>> For backwards compatibility, the patch rebuilds the old-style APIC IDs
>>> from migration streams lacking them when they aren't present.
>>
>> Nit: "when they aren't present" looks to duplicate "lacking them"?
>>
>>> Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
>>> ---
>>> I've split this one from the rest of the topology series as it's independent
>>> and entangled with another patch from Andrew.
>>
>> Albeit I think meanwhile we've settled that the entangling isn't quite as
>> problematic.
>>
>>> @@ -1621,6 +1624,14 @@ static int cf_check lapic_load_hidden(struct domain *d, hvm_domain_context_t *h)
>>>          return -EINVAL;
>>>      }
>>>  
>>> +    /*
>>> +     * Xen 4.20 and earlier had no x2APIC ID in the migration stream and
>>> +     * hard-coded "vcpu_id * 2". Default back to this if we have a
>>> +     * zero-extended record.
>>> +     */
>>> +    if ( h->size <= offsetof(struct hvm_hw_lapic, x2apic_id) )
>>> +        s->hw.x2apic_id = v->vcpu_id * 2;
>>
>> While we better wouldn't get to see such input, it is in principle possible
>> to have an input stream with, say, half the field. Imo the condition ought
>> to be such that we'd make the adjustment when less than the full field is
>> available.
> 
> I would add an additional check to ensure _rsvd0 remains 0, to avoid
> further additions from attempting to reuse that padding space.
> 
> if ( s->hw._rsvd0 )
>     return -EINVAL;

I agree we want such a check; I actually should have pointed that out, too.
I don't, however, see why the field couldn't be re-used going forward (under
the right conditions, of course).

> In fact I would be tempted to overwrite the ID if the stream size
> doesn't match the expected one, ie:
> 
> if ( h->size < (offsetof(struct hvm_hw_lapic, _rsvd0) +
>                 sizeof(s->hw._rsvd0)) )
>     s->hw.x2apic_id = v->vcpu_id * 2;

Hmm, yes, perhaps better to be yet more safe here.

Jan

Re: [PATCH] x86/hvm: Add APIC IDs to the per-vLAPIC save area
Posted by Alejandro Vallejo 8 months ago
On Thu Feb 27, 2025 at 7:29 AM GMT, Jan Beulich wrote:
> On 26.02.2025 18:33, Roger Pau Monné wrote:
> > On Wed, Feb 26, 2025 at 02:11:23PM +0100, Jan Beulich wrote:
> >> On 18.02.2025 15:22, Alejandro Vallejo wrote:
> >>> Today, Xen hardcodes apic_id = vcpu_id * 2, but this is unwise and
> >>> interferes with providing accurate topology information to the guest.
> >>>
> >>> Introduce a new x2apic_id field into hvm_hw_lapic.  This is immutable
> >>> state from the guest's point of view, but it will allow the toolstack to
> >>> eventually configure the value, and for the value to move on migrate.
> >>>
> >>> For backwards compatibility, the patch rebuilds the old-style APIC IDs
> >>> from migration streams lacking them when they aren't present.
> >>
> >> Nit: "when they aren't present" looks to duplicate "lacking them"?
> >>
> >>> Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
> >>> ---
> >>> I've split this one from the rest of the topology series as it's independent
> >>> and entangled with another patch from Andrew.
> >>
> >> Albeit I think meanwhile we've settled that the entangling isn't quite as
> >> problematic.
> >>
> >>> @@ -1621,6 +1624,14 @@ static int cf_check lapic_load_hidden(struct domain *d, hvm_domain_context_t *h)
> >>>          return -EINVAL;
> >>>      }
> >>>  
> >>> +    /*
> >>> +     * Xen 4.20 and earlier had no x2APIC ID in the migration stream and
> >>> +     * hard-coded "vcpu_id * 2". Default back to this if we have a
> >>> +     * zero-extended record.
> >>> +     */
> >>> +    if ( h->size <= offsetof(struct hvm_hw_lapic, x2apic_id) )
> >>> +        s->hw.x2apic_id = v->vcpu_id * 2;
> >>
> >> While we better wouldn't get to see such input, it is in principle possible
> >> to have an input stream with, say, half the field. Imo the condition ought
> >> to be such that we'd make the adjustment when less than the full field is
> >> available.
> > 
> > I would add an additional check to ensure _rsvd0 remains 0, to avoid
> > further additions from attempting to reuse that padding space.
> > 
> > if ( s->hw._rsvd0 )
> >     return -EINVAL;
>
> I agree we want such a check; I actually should have pointed that out, too.
> I don't, however, see why the field couldn't be re-used going forward (under
> the right conditions, of course).

It could be reused indeed, but at the point of making use of it we'd remove the
check.

Cheers,
Alejandro