Make it so the APs expose their own APIC IDs in a LUT. We can use that
LUT to populate the MADT, decoupling the algorithm that relates CPU IDs
and APIC IDs from hvmloader.
Moved smp_initialise() ahead of apic_setup() in order to initialise
cpu_to_x2apicid ASAP and avoid using it uninitialised. Note that
bringing up the APs doesn't need the APIC in hvmloader becasue it always
runs virtualized and uses the PV interface.
While at this, exploit the assumption that CPU0 always has APICID0 to
remove ap_callin, as writing the APIC ID may serve the same purpose.
Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
---
v7:
* CPU_TO_X2APICID to lowercase
* Spell out the CPU0<-->APICID0 relationship in the commit message as
the rationale to remove ap_callin.
* Explain the motion of smp_initialise() ahead of apic_setup() in the
commit message.
---
tools/firmware/hvmloader/config.h | 5 ++-
tools/firmware/hvmloader/hvmloader.c | 6 +--
tools/firmware/hvmloader/mp_tables.c | 4 +-
tools/firmware/hvmloader/smp.c | 57 ++++++++++++++++++++-----
tools/firmware/hvmloader/util.c | 2 +-
tools/include/xen-tools/common-macros.h | 5 +++
6 files changed, 63 insertions(+), 16 deletions(-)
diff --git a/tools/firmware/hvmloader/config.h b/tools/firmware/hvmloader/config.h
index cd716bf39245..04cab1e59f08 100644
--- a/tools/firmware/hvmloader/config.h
+++ b/tools/firmware/hvmloader/config.h
@@ -4,6 +4,8 @@
#include <stdint.h>
#include <stdbool.h>
+#include <xen/hvm/hvm_info_table.h>
+
enum virtual_vga { VGA_none, VGA_std, VGA_cirrus, VGA_pt };
extern enum virtual_vga virtual_vga;
@@ -48,8 +50,9 @@ extern uint8_t ioapic_version;
#define IOAPIC_ID 0x01
+extern uint32_t cpu_to_x2apicid[HVM_MAX_VCPUS];
+
#define LAPIC_BASE_ADDRESS 0xfee00000
-#define LAPIC_ID(vcpu_id) ((vcpu_id) * 2)
#define PCI_ISA_DEVFN 0x08 /* dev 1, fn 0 */
#define PCI_ISA_IRQ_MASK 0x0c20U /* ISA IRQs 5,10,11 are PCI connected */
diff --git a/tools/firmware/hvmloader/hvmloader.c b/tools/firmware/hvmloader/hvmloader.c
index f8af88fabf24..bebdfa923880 100644
--- a/tools/firmware/hvmloader/hvmloader.c
+++ b/tools/firmware/hvmloader/hvmloader.c
@@ -224,7 +224,7 @@ static void apic_setup(void)
/* 8259A ExtInts are delivered through IOAPIC pin 0 (Virtual Wire Mode). */
ioapic_write(0x10, APIC_DM_EXTINT);
- ioapic_write(0x11, SET_APIC_ID(LAPIC_ID(0)));
+ ioapic_write(0x11, SET_APIC_ID(cpu_to_x2apicid[0]));
}
struct bios_info {
@@ -341,11 +341,11 @@ int main(void)
printf("CPU speed is %u MHz\n", get_cpu_mhz());
+ smp_initialise();
+
apic_setup();
pci_setup();
- smp_initialise();
-
perform_tests();
if ( bios->bios_info_setup )
diff --git a/tools/firmware/hvmloader/mp_tables.c b/tools/firmware/hvmloader/mp_tables.c
index 77d3010406d0..539260365e1e 100644
--- a/tools/firmware/hvmloader/mp_tables.c
+++ b/tools/firmware/hvmloader/mp_tables.c
@@ -198,8 +198,10 @@ static void fill_mp_config_table(struct mp_config_table *mpct, int length)
/* fills in an MP processor entry for VCPU 'vcpu_id' */
static void fill_mp_proc_entry(struct mp_proc_entry *mppe, int vcpu_id)
{
+ ASSERT(cpu_to_x2apicid[vcpu_id] < 0xFF );
+
mppe->type = ENTRY_TYPE_PROCESSOR;
- mppe->lapic_id = LAPIC_ID(vcpu_id);
+ mppe->lapic_id = cpu_to_x2apicid[vcpu_id];
mppe->lapic_version = 0x11;
mppe->cpu_flags = CPU_FLAG_ENABLED;
if ( vcpu_id == 0 )
diff --git a/tools/firmware/hvmloader/smp.c b/tools/firmware/hvmloader/smp.c
index 1b940cefd071..d63536f14f00 100644
--- a/tools/firmware/hvmloader/smp.c
+++ b/tools/firmware/hvmloader/smp.c
@@ -29,7 +29,37 @@
#include <xen/vcpu.h>
-static int ap_callin;
+/**
+ * Lookup table of (x2)APIC IDs.
+ *
+ * Each entry is populated its respective CPU as they come online. This is required
+ * for generating the MADT with minimal assumptions about ID relationships.
+ *
+ * While the name makes "x2" explicit, these may actually be xAPIC IDs if no
+ * x2APIC is present. "x2" merely highlights that each entry is 32 bits wide.
+ */
+uint32_t cpu_to_x2apicid[HVM_MAX_VCPUS];
+
+/** Tristate about x2apic being supported. -1=unknown */
+static int has_x2apic = -1;
+
+static uint32_t read_apic_id(void)
+{
+ uint32_t apic_id;
+
+ if ( has_x2apic )
+ cpuid(0xb, NULL, NULL, NULL, &apic_id);
+ else
+ {
+ cpuid(1, NULL, &apic_id, NULL, NULL);
+ apic_id >>= 24;
+ }
+
+ /* Never called by cpu0, so should never return 0 */
+ ASSERT(apic_id);
+
+ return apic_id;
+}
static void cpu_setup(unsigned int cpu)
{
@@ -37,13 +67,17 @@ static void cpu_setup(unsigned int cpu)
cacheattr_init();
printf("done.\n");
- if ( !cpu ) /* Used on the BSP too */
+ /* The BSP exits early because its APIC ID is known to be zero */
+ if ( !cpu )
return;
wmb();
- ap_callin = 1;
+ ACCESS_ONCE(cpu_to_x2apicid[cpu]) = read_apic_id();
- /* After this point, the BSP will shut us down. */
+ /*
+ * After this point the BSP will shut us down. A write to
+ * cpu_to_x2apicid[cpu] signals the BSP to bring down `cpu`.
+ */
for ( ;; )
asm volatile ( "hlt" );
@@ -54,10 +88,6 @@ static void boot_cpu(unsigned int cpu)
static uint8_t ap_stack[PAGE_SIZE] __attribute__ ((aligned (16)));
static struct vcpu_hvm_context ap;
- /* Initialise shared variables. */
- ap_callin = 0;
- wmb();
-
/* Wake up the secondary processor */
ap = (struct vcpu_hvm_context) {
.mode = VCPU_HVM_MODE_32B,
@@ -90,10 +120,11 @@ static void boot_cpu(unsigned int cpu)
BUG();
/*
- * Wait for the secondary processor to complete initialisation.
+ * Wait for the secondary processor to complete initialisation,
+ * which is signaled by its x2APIC ID being written to the LUT.
* Do not touch shared resources meanwhile.
*/
- while ( !ap_callin )
+ while ( !ACCESS_ONCE(cpu_to_x2apicid[cpu]) )
cpu_relax();
/* Take the secondary processor offline. */
@@ -104,6 +135,12 @@ static void boot_cpu(unsigned int cpu)
void smp_initialise(void)
{
unsigned int i, nr_cpus = hvm_info->nr_vcpus;
+ uint32_t ecx;
+
+ cpuid(1, NULL, NULL, &ecx, NULL);
+ has_x2apic = (ecx >> 21) & 1;
+ if ( has_x2apic )
+ printf("x2APIC supported\n");
printf("Multiprocessor initialisation:\n");
cpu_setup(0);
diff --git a/tools/firmware/hvmloader/util.c b/tools/firmware/hvmloader/util.c
index d3b3f9038e64..821b3086a87d 100644
--- a/tools/firmware/hvmloader/util.c
+++ b/tools/firmware/hvmloader/util.c
@@ -827,7 +827,7 @@ static void acpi_mem_free(struct acpi_ctxt *ctxt,
static uint32_t acpi_lapic_id(unsigned cpu)
{
- return LAPIC_ID(cpu);
+ return cpu_to_x2apic_id[cpu];
}
void hvmloader_acpi_build_tables(struct acpi_config *config,
diff --git a/tools/include/xen-tools/common-macros.h b/tools/include/xen-tools/common-macros.h
index 60912225cb7a..336c6309d96e 100644
--- a/tools/include/xen-tools/common-macros.h
+++ b/tools/include/xen-tools/common-macros.h
@@ -108,4 +108,9 @@
#define get_unaligned(ptr) get_unaligned_t(typeof(*(ptr)), ptr)
#define put_unaligned(val, ptr) put_unaligned_t(typeof(*(ptr)), val, ptr)
+#define __ACCESS_ONCE(x) ({ \
+ (void)(typeof(x))0; /* Scalar typecheck. */ \
+ (volatile typeof(x) *)&(x); })
+#define ACCESS_ONCE(x) (*__ACCESS_ONCE(x))
+
#endif /* __XEN_TOOLS_COMMON_MACROS__ */
--
2.47.0
On 21/10/2024 4:45 pm, Alejandro Vallejo wrote: > diff --git a/tools/firmware/hvmloader/config.h b/tools/firmware/hvmloader/config.h > index cd716bf39245..04cab1e59f08 100644 > --- a/tools/firmware/hvmloader/config.h > +++ b/tools/firmware/hvmloader/config.h > @@ -4,6 +4,8 @@ > #include <stdint.h> > #include <stdbool.h> > > +#include <xen/hvm/hvm_info_table.h> > + > enum virtual_vga { VGA_none, VGA_std, VGA_cirrus, VGA_pt }; > extern enum virtual_vga virtual_vga; > > @@ -48,8 +50,9 @@ extern uint8_t ioapic_version; > > #define IOAPIC_ID 0x01 > > +extern uint32_t cpu_to_x2apicid[HVM_MAX_VCPUS]; Just cpu_to_apic_id[] please. The distinction between x or x2 isn't interesting here. HVM_MAX_VCPUS is a constant that should never have existed in the first place, *and* its the limit we're looking to finally break when this series is accepted. This array needs to be hvm_info->nr_vcpus entries long, and will want to be more than 128 entries very soon. Just scratch_alloc() the array. Then you can avoid the include. > diff --git a/tools/firmware/hvmloader/mp_tables.c b/tools/firmware/hvmloader/mp_tables.c > index 77d3010406d0..539260365e1e 100644 > --- a/tools/firmware/hvmloader/mp_tables.c > +++ b/tools/firmware/hvmloader/mp_tables.c > @@ -198,8 +198,10 @@ static void fill_mp_config_table(struct mp_config_table *mpct, int length) > /* fills in an MP processor entry for VCPU 'vcpu_id' */ > static void fill_mp_proc_entry(struct mp_proc_entry *mppe, int vcpu_id) > { > + ASSERT(cpu_to_x2apicid[vcpu_id] < 0xFF ); This is just going to break when we hit 256 vCPUs in a VM. What do real systems do? They'll either wrap around 255 like the CPUID xAPIC_ID does, or they'll not write out MP tables at all. > diff --git a/tools/firmware/hvmloader/smp.c b/tools/firmware/hvmloader/smp.c > index 1b940cefd071..d63536f14f00 100644 > --- a/tools/firmware/hvmloader/smp.c > +++ b/tools/firmware/hvmloader/smp.c > @@ -90,10 +120,11 @@ static void boot_cpu(unsigned int cpu) > BUG(); > > /* > - * Wait for the secondary processor to complete initialisation. > + * Wait for the secondary processor to complete initialisation, > + * which is signaled by its x2APIC ID being written to the LUT. Technically all arrays are a lookup table, but I'm not sure LUT is a common enough term to be used unqualified like this. Just say "... signalled by writing its APIC_ID out." The where is very apparent by the code. > @@ -104,6 +135,12 @@ static void boot_cpu(unsigned int cpu) > void smp_initialise(void) > { > unsigned int i, nr_cpus = hvm_info->nr_vcpus; > + uint32_t ecx; > + > + cpuid(1, NULL, NULL, &ecx, NULL); > + has_x2apic = (ecx >> 21) & 1; > + if ( has_x2apic ) > + printf("x2APIC supported\n"); You need to check max_leaf >= 0xb too. Remember Xen might not give you leave 0xb yet, and then you'll hit the assert for finding 0. And has_x2apic wants to be a simple boolean. Nothing good can come from confusing -1 with "x2apic available". I recommend splitting this patch into three. Several aspects are quite subtle. 1) Collect the APIC_IDs on APs 2) Change how callin is signalled. 3) Replace LAPIC_ID() with the collected apic_id. but AFAICT, it can be done as a standalone series, independently of the other Xen/toolstack work. ~Andrew
On Wed Oct 30, 2024 at 11:31 AM GMT, Andrew Cooper wrote: > On 21/10/2024 4:45 pm, Alejandro Vallejo wrote: > > diff --git a/tools/firmware/hvmloader/config.h b/tools/firmware/hvmloader/config.h > > index cd716bf39245..04cab1e59f08 100644 > > --- a/tools/firmware/hvmloader/config.h > > +++ b/tools/firmware/hvmloader/config.h > > @@ -4,6 +4,8 @@ > > #include <stdint.h> > > #include <stdbool.h> > > > > +#include <xen/hvm/hvm_info_table.h> > > + > > enum virtual_vga { VGA_none, VGA_std, VGA_cirrus, VGA_pt }; > > extern enum virtual_vga virtual_vga; > > > > @@ -48,8 +50,9 @@ extern uint8_t ioapic_version; > > > > #define IOAPIC_ID 0x01 > > > > +extern uint32_t cpu_to_x2apicid[HVM_MAX_VCPUS]; > > Just cpu_to_apic_id[] please. The distinction between x or x2 isn't > interesting here. I disagree. While "x" says nothing of interest "x2" does state the width. cpu_to_apic_id is ambiguous and I've seen no shortage of code in which it's impossible to assess its correctness without going to check what the original author meant; and guesswork is bad for robustness. cpu_to_x2apicid has an unambiguous width at the meager cost of 2 chars. If you have very strong feelings about it I can change it, but my preference is to keep it as-is. > > HVM_MAX_VCPUS is a constant that should never have existed in the first > place, *and* its the limit we're looking to finally break when this > series is accepted. > > This array needs to be hvm_info->nr_vcpus entries long, and will want to > be more than 128 entries very soon. Just scratch_alloc() the array. > Then you can avoid the include. That's a major PITA in the libxl side. I'll have a go to see how long it takes me before I weep :_) > > > diff --git a/tools/firmware/hvmloader/mp_tables.c b/tools/firmware/hvmloader/mp_tables.c > > index 77d3010406d0..539260365e1e 100644 > > --- a/tools/firmware/hvmloader/mp_tables.c > > +++ b/tools/firmware/hvmloader/mp_tables.c > > @@ -198,8 +198,10 @@ static void fill_mp_config_table(struct mp_config_table *mpct, int length) > > /* fills in an MP processor entry for VCPU 'vcpu_id' */ > > static void fill_mp_proc_entry(struct mp_proc_entry *mppe, int vcpu_id) > > { > > + ASSERT(cpu_to_x2apicid[vcpu_id] < 0xFF ); > > This is just going to break when we hit 256 vCPUs in a VM. > > What do real systems do? > > They'll either wrap around 255 like the CPUID xAPIC_ID does, or they'll > not write out MP tables at all. Definitely not wrapping around, that makes no sense. It could also show the first 255 APs only. The reality is that if we're exposing 1000 vCPUs is because we expect the guest to use them. While it's likely we want to avoid writing the MP tables, that's not a puddle I want to play with ATM. Note that this is not a new breakage. It was already broken if we were to hit such an APIC ID (which we can't because HVM_MAX_VCPUS is lower). I just made sure we never write out corrupted tables. > > > diff --git a/tools/firmware/hvmloader/smp.c b/tools/firmware/hvmloader/smp.c > > index 1b940cefd071..d63536f14f00 100644 > > --- a/tools/firmware/hvmloader/smp.c > > +++ b/tools/firmware/hvmloader/smp.c > > @@ -90,10 +120,11 @@ static void boot_cpu(unsigned int cpu) > > BUG(); > > > > /* > > - * Wait for the secondary processor to complete initialisation. > > + * Wait for the secondary processor to complete initialisation, > > + * which is signaled by its x2APIC ID being written to the LUT. > > Technically all arrays are a lookup table, but I'm not sure LUT is a No. A look-up table is a very specific implementation of a relation (in the mathematical sense) between an unsigned integer and some other type, implemented by means of an array indexed by said integer. > common enough term to be used unqualified like this. Happy to change the name if it's uncommon enough in this codebase, but it is fairly common outside of it, and it's common enough to have its own wikipedia page with that very acronym. https://en.wikipedia.org/wiki/Lookup_table > > Just say "... signalled by writing its APIC_ID out." The where is very > apparent by the code. > > > @@ -104,6 +135,12 @@ static void boot_cpu(unsigned int cpu) > > void smp_initialise(void) > > { > > unsigned int i, nr_cpus = hvm_info->nr_vcpus; > > + uint32_t ecx; > > + > > + cpuid(1, NULL, NULL, &ecx, NULL); > > + has_x2apic = (ecx >> 21) & 1; > > + if ( has_x2apic ) > > + printf("x2APIC supported\n"); > > You need to check max_leaf >= 0xb too. Remember Xen might not give you > leave 0xb yet, and then you'll hit the assert for finding 0. True. > > And has_x2apic wants to be a simple boolean. Nothing good can come from > confusing -1 with "x2apic available". Sure > > > I recommend splitting this patch into three. Several aspects are quite > subtle. > > 1) Collect the APIC_IDs on APs > 2) Change how callin is signalled. > 3) Replace LAPIC_ID() with the collected apic_id. > > but AFAICT, it can be done as a standalone series, independently of the > other Xen/toolstack work. Ack > > ~Andrew Cheers, Alejandro
On 11.11.2024 12:20, Alejandro Vallejo wrote: > On Wed Oct 30, 2024 at 11:31 AM GMT, Andrew Cooper wrote: >> On 21/10/2024 4:45 pm, Alejandro Vallejo wrote: >>> diff --git a/tools/firmware/hvmloader/config.h b/tools/firmware/hvmloader/config.h >>> index cd716bf39245..04cab1e59f08 100644 >>> --- a/tools/firmware/hvmloader/config.h >>> +++ b/tools/firmware/hvmloader/config.h >>> @@ -4,6 +4,8 @@ >>> #include <stdint.h> >>> #include <stdbool.h> >>> >>> +#include <xen/hvm/hvm_info_table.h> >>> + >>> enum virtual_vga { VGA_none, VGA_std, VGA_cirrus, VGA_pt }; >>> extern enum virtual_vga virtual_vga; >>> >>> @@ -48,8 +50,9 @@ extern uint8_t ioapic_version; >>> >>> #define IOAPIC_ID 0x01 >>> >>> +extern uint32_t cpu_to_x2apicid[HVM_MAX_VCPUS]; >> >> Just cpu_to_apic_id[] please. The distinction between x or x2 isn't >> interesting here. > > I disagree. > > While "x" says nothing of interest "x2" does state the width. cpu_to_apic_id is > ambiguous and I've seen no shortage of code in which it's impossible to assess > its correctness without going to check what the original author meant; and > guesswork is bad for robustness. cpu_to_x2apicid has an unambiguous width at > the meager cost of 2 chars. If you have very strong feelings about it I can > change it, but my preference is to keep it as-is. Just to mention it: I'm with Andrew here, and iirc I even had commented to this effect on an earlier version as well. Jan
On 30.10.2024 12:31, Andrew Cooper wrote: > On 21/10/2024 4:45 pm, Alejandro Vallejo wrote: >> --- a/tools/firmware/hvmloader/mp_tables.c >> +++ b/tools/firmware/hvmloader/mp_tables.c >> @@ -198,8 +198,10 @@ static void fill_mp_config_table(struct mp_config_table *mpct, int length) >> /* fills in an MP processor entry for VCPU 'vcpu_id' */ >> static void fill_mp_proc_entry(struct mp_proc_entry *mppe, int vcpu_id) >> { >> + ASSERT(cpu_to_x2apicid[vcpu_id] < 0xFF ); > > This is just going to break when we hit 256 vCPUs in a VM. > > What do real systems do? > > They'll either wrap around 255 like the CPUID xAPIC_ID does, or they'll > not write out MP tables at all. "at all" may be going a little far. They may simply not advertise CPUs with too wide APIC IDs there, while still allowing others to be discovered this legacy way. Jan
© 2016 - 2024 Red Hat, Inc.