Extend CPU hotplug interface to return architecture specific
identifier for current CPU (in case of x86, it's APIC ID).
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
---
TODO:
* cripple it to behave old way on old machine types so that
new firmware started on new QEMU won't see a difference
when migrated to an old QEMU (i.e. QEMU that doesn't support
this command)
---
docs/specs/acpi_cpu_hotplug.txt | 10 +++++++++-
hw/acpi/cpu.c | 15 +++++++++++++++
hw/acpi/trace-events | 1 +
3 files changed, 25 insertions(+), 1 deletion(-)
diff --git a/docs/specs/acpi_cpu_hotplug.txt b/docs/specs/acpi_cpu_hotplug.txt
index 43c5a193f0..0438678249 100644
--- a/docs/specs/acpi_cpu_hotplug.txt
+++ b/docs/specs/acpi_cpu_hotplug.txt
@@ -32,7 +32,9 @@ Register block size:
read access:
offset:
- [0x0-0x3] reserved
+ [0x0-0x3] Command data 2: (DWORD access)
+ upper 32 bit of 'Command data' if returned data value is 64 bit.
+ in case of error or unsupported command reads is 0x0
[0x4] CPU device status fields: (1 byte access)
bits:
0: Device is enabled and may be used by guest
@@ -87,6 +89,8 @@ write access:
2: stores value into OST status register, triggers
ACPI_DEVICE_OST QMP event from QEMU to external applications
with current values of OST event and status registers.
+ 3: OSPM reads architecture specific value identifying CPU
+ (x86: APIC ID)
other values: reserved
Selecting CPU device beyond possible range has no effect on platform:
@@ -115,3 +119,7 @@ Typical usecases:
5.2 if 'Command data' register has not changed, there is not CPU
corresponding to iterator value and the last valid iterator value
equals to 'max_cpus' + 1
+ - Get architecture specific id for a CPU
+ 1. pick a CPU to read from using 'CPU selector' register
+ 2. write 0x3 int0 'Command field' register
+ 3. read architecture specific id from 'Command data' register
diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c
index 87f30a31d7..701542d860 100644
--- a/hw/acpi/cpu.c
+++ b/hw/acpi/cpu.c
@@ -12,11 +12,13 @@
#define ACPI_CPU_FLAGS_OFFSET_RW 4
#define ACPI_CPU_CMD_OFFSET_WR 5
#define ACPI_CPU_CMD_DATA_OFFSET_RW 8
+#define ACPI_CPU_CMD_DATA2_OFFSET_RW 0
enum {
CPHP_GET_NEXT_CPU_WITH_EVENT_CMD = 0,
CPHP_OST_EVENT_CMD = 1,
CPHP_OST_STATUS_CMD = 2,
+ CPHP_GET_CPU_ID_CMD = 3,
CPHP_CMD_MAX
};
@@ -74,11 +76,24 @@ static uint64_t cpu_hotplug_rd(void *opaque, hwaddr addr, unsigned size)
case CPHP_GET_NEXT_CPU_WITH_EVENT_CMD:
val = cpu_st->selector;
break;
+ case CPHP_GET_CPU_ID_CMD:
+ val = cpu_to_le64(cdev->arch_id) & 0xFFFFFFFF;
+ break;
default:
break;
}
trace_cpuhp_acpi_read_cmd_data(cpu_st->selector, val);
break;
+ case ACPI_CPU_CMD_DATA2_OFFSET_RW:
+ switch (cpu_st->command) {
+ case CPHP_GET_CPU_ID_CMD:
+ val = cpu_to_le64(cdev->arch_id) >> 32;
+ break;
+ default:
+ break;
+ }
+ trace_cpuhp_acpi_read_cmd_data2(cpu_st->selector, val);
+ break;
default:
break;
}
diff --git a/hw/acpi/trace-events b/hw/acpi/trace-events
index 96b8273297..afbc77de1c 100644
--- a/hw/acpi/trace-events
+++ b/hw/acpi/trace-events
@@ -23,6 +23,7 @@ cpuhp_acpi_read_flags(uint32_t idx, uint8_t flags) "idx[0x%"PRIx32"] flags: 0x%"
cpuhp_acpi_write_idx(uint32_t idx) "set active cpu idx: 0x%"PRIx32
cpuhp_acpi_write_cmd(uint32_t idx, uint8_t cmd) "idx[0x%"PRIx32"] cmd: 0x%"PRIx8
cpuhp_acpi_read_cmd_data(uint32_t idx, uint32_t data) "idx[0x%"PRIx32"] data: 0x%"PRIx32
+cpuhp_acpi_read_cmd_data2(uint32_t idx, uint32_t data) "idx[0x%"PRIx32"] data: 0x%"PRIx32
cpuhp_acpi_cpu_has_events(uint32_t idx, bool ins, bool rm) "idx[0x%"PRIx32"] inserting: %d, removing: %d"
cpuhp_acpi_clear_inserting_evt(uint32_t idx) "idx[0x%"PRIx32"]"
cpuhp_acpi_clear_remove_evt(uint32_t idx) "idx[0x%"PRIx32"]"
--
2.18.1
On 10/09/19 15:22, Igor Mammedov wrote:
> Extend CPU hotplug interface to return architecture specific
> identifier for current CPU (in case of x86, it's APIC ID).
>
> Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> ---
> TODO:
> * cripple it to behave old way on old machine types so that
> new firmware started on new QEMU won't see a difference
> when migrated to an old QEMU (i.e. QEMU that doesn't support
> this command)
> ---
> docs/specs/acpi_cpu_hotplug.txt | 10 +++++++++-
> hw/acpi/cpu.c | 15 +++++++++++++++
> hw/acpi/trace-events | 1 +
> 3 files changed, 25 insertions(+), 1 deletion(-)
>
> diff --git a/docs/specs/acpi_cpu_hotplug.txt b/docs/specs/acpi_cpu_hotplug.txt
> index 43c5a193f0..0438678249 100644
> --- a/docs/specs/acpi_cpu_hotplug.txt
> +++ b/docs/specs/acpi_cpu_hotplug.txt
> @@ -32,7 +32,9 @@ Register block size:
>
> read access:
> offset:
> - [0x0-0x3] reserved
> + [0x0-0x3] Command data 2: (DWORD access)
> + upper 32 bit of 'Command data' if returned data value is 64 bit.
> + in case of error or unsupported command reads is 0x0
How about
[0x0] Command data 2: (DWORD access, little endian)
If the "CPU selector" value last stored by the guest refers to
an impossible CPU, then 0.
Otherwise, if the "Command field" value last stored by the
guest differs from 3, then 0.
Otherwise, the most significant 32 bits of the selected CPU's
architecture specific ID.
[0x8] Command data: (DWORD access, little endian)
If the "CPU selector" value last stored by the guest refers to
an impossible CPU, then 0.
Otherwise,
- if the "Command field" value last stored by the guest is 0,
then the selector of the currently selected CPU;
- if the "Command field" value last stored by the guest is 3,
then the least significant 32 bits of the selected CPU's
architecture specific ID;
- otherwise, 0.
> [0x4] CPU device status fields: (1 byte access)
> bits:
> 0: Device is enabled and may be used by guest
> @@ -87,6 +89,8 @@ write access:
> 2: stores value into OST status register, triggers
> ACPI_DEVICE_OST QMP event from QEMU to external applications
> with current values of OST event and status registers.
> + 3: OSPM reads architecture specific value identifying CPU
> + (x86: APIC ID)
> other values: reserved
>
Seems OK.
> Selecting CPU device beyond possible range has no effect on platform:
> @@ -115,3 +119,7 @@ Typical usecases:
> 5.2 if 'Command data' register has not changed, there is not CPU
> corresponding to iterator value and the last valid iterator value
> equals to 'max_cpus' + 1
> + - Get architecture specific id for a CPU
> + 1. pick a CPU to read from using 'CPU selector' register
> + 2. write 0x3 int0 'Command field' register
> + 3. read architecture specific id from 'Command data' register
Looks good, except for:
- typo: "int0"
- in step 3, we should reference 'Command data 2' as well.
In fact, in
<http://mid.mail-archive.com/2b10ca48-c734-4f41-9521-136c44060812@redhat.com>,
I wrote, for the "Get a cpu with pending event" use case:
> 1. Store 0x0 to the 'CPU selector' register.
> 2. Store 0x0 to the 'Command field' register.
> 3. Read the 'CPU device status fields' register.
> 4. If both bit#1 and bit#2 are clear in the value read, there is no
> CPU with a pending event.
> 5. Otherwise, read the 'Command data' register. The value read is the
> selector of the CPU with the pending event (which is already
> selected).
and your steps #2 and #3, for getting the arch specific ID, can be
directly appended as steps 6. and 7.!
> diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c
> index 87f30a31d7..701542d860 100644
> --- a/hw/acpi/cpu.c
> +++ b/hw/acpi/cpu.c
> @@ -12,11 +12,13 @@
> #define ACPI_CPU_FLAGS_OFFSET_RW 4
> #define ACPI_CPU_CMD_OFFSET_WR 5
> #define ACPI_CPU_CMD_DATA_OFFSET_RW 8
> +#define ACPI_CPU_CMD_DATA2_OFFSET_RW 0
>
> enum {
> CPHP_GET_NEXT_CPU_WITH_EVENT_CMD = 0,
> CPHP_OST_EVENT_CMD = 1,
> CPHP_OST_STATUS_CMD = 2,
> + CPHP_GET_CPU_ID_CMD = 3,
> CPHP_CMD_MAX
> };
>
> @@ -74,11 +76,24 @@ static uint64_t cpu_hotplug_rd(void *opaque, hwaddr addr, unsigned size)
> case CPHP_GET_NEXT_CPU_WITH_EVENT_CMD:
> val = cpu_st->selector;
> break;
> + case CPHP_GET_CPU_ID_CMD:
> + val = cpu_to_le64(cdev->arch_id) & 0xFFFFFFFF;
> + break;
> default:
> break;
> }
> trace_cpuhp_acpi_read_cmd_data(cpu_st->selector, val);
> break;
> + case ACPI_CPU_CMD_DATA2_OFFSET_RW:
> + switch (cpu_st->command) {
> + case CPHP_GET_CPU_ID_CMD:
> + val = cpu_to_le64(cdev->arch_id) >> 32;
> + break;
> + default:
> + break;
> + }
> + trace_cpuhp_acpi_read_cmd_data2(cpu_st->selector, val);
> + break;
> default:
> break;
> }
> diff --git a/hw/acpi/trace-events b/hw/acpi/trace-events
> index 96b8273297..afbc77de1c 100644
> --- a/hw/acpi/trace-events
> +++ b/hw/acpi/trace-events
> @@ -23,6 +23,7 @@ cpuhp_acpi_read_flags(uint32_t idx, uint8_t flags) "idx[0x%"PRIx32"] flags: 0x%"
> cpuhp_acpi_write_idx(uint32_t idx) "set active cpu idx: 0x%"PRIx32
> cpuhp_acpi_write_cmd(uint32_t idx, uint8_t cmd) "idx[0x%"PRIx32"] cmd: 0x%"PRIx8
> cpuhp_acpi_read_cmd_data(uint32_t idx, uint32_t data) "idx[0x%"PRIx32"] data: 0x%"PRIx32
> +cpuhp_acpi_read_cmd_data2(uint32_t idx, uint32_t data) "idx[0x%"PRIx32"] data: 0x%"PRIx32
> cpuhp_acpi_cpu_has_events(uint32_t idx, bool ins, bool rm) "idx[0x%"PRIx32"] inserting: %d, removing: %d"
> cpuhp_acpi_clear_inserting_evt(uint32_t idx) "idx[0x%"PRIx32"]"
> cpuhp_acpi_clear_remove_evt(uint32_t idx) "idx[0x%"PRIx32"]"
>
Looks plausible to me, thanks (discounting the TODO item).
Right now, I can't offer testing for patch#3 (I'm quite far from the
point where I'll be actually looking for a hotplugged CPU :) ), but
based on the docs patches #1 and #2, and my proposed updates, I can
rework my "possible CPU count detection" in OVMF.
Do I need to check in OVMF specifically whether the "modern" CPU hotplug
register block is available? Can you tell me what the oldest machine
types are that support the modern interface?
Hmm... Commit abd49bc2ed2f ("docs: update ACPI CPU hotplug spec with new
protocol", 2016-06-24) seems relevant. First released in v2.7.0. I think
I should detect whether this interface is available.
Can I use the following sequence to detect whether the interface is
available?
1. Store 0x0 to command register.
2. Store 0x0 to selector register.
3. Read 'command data' register.
4. If value read is 0, the interface is available.
(Because I assume that unmapped IO ports read as all-bits-one. Is that
right?)
BTW, can I dynamically detect support for the GET_CPU_ID command too?
I'm thinking, when I enumerate / count all possible CPUs, I can at once
fetch the arch IDs for all of them. If I only get zeros from the command
data registers, across all CPUs, in response to GET_CPU_ID, then the
command is not available.
Thanks
Laszlo
On Thu, Oct 10, 2019 at 04:56:18PM +0200, Laszlo Ersek wrote:
> On 10/09/19 15:22, Igor Mammedov wrote:
> > Extend CPU hotplug interface to return architecture specific
> > identifier for current CPU (in case of x86, it's APIC ID).
> >
> > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > ---
> > TODO:
> > * cripple it to behave old way on old machine types so that
> > new firmware started on new QEMU won't see a difference
> > when migrated to an old QEMU (i.e. QEMU that doesn't support
> > this command)
> > ---
> > docs/specs/acpi_cpu_hotplug.txt | 10 +++++++++-
> > hw/acpi/cpu.c | 15 +++++++++++++++
> > hw/acpi/trace-events | 1 +
> > 3 files changed, 25 insertions(+), 1 deletion(-)
> >
> > diff --git a/docs/specs/acpi_cpu_hotplug.txt b/docs/specs/acpi_cpu_hotplug.txt
> > index 43c5a193f0..0438678249 100644
> > --- a/docs/specs/acpi_cpu_hotplug.txt
> > +++ b/docs/specs/acpi_cpu_hotplug.txt
> > @@ -32,7 +32,9 @@ Register block size:
> >
> > read access:
> > offset:
> > - [0x0-0x3] reserved
> > + [0x0-0x3] Command data 2: (DWORD access)
> > + upper 32 bit of 'Command data' if returned data value is 64 bit.
> > + in case of error or unsupported command reads is 0x0
>
> How about
>
> [0x0] Command data 2: (DWORD access, little endian)
> If the "CPU selector" value last stored by the guest refers to
> an impossible CPU, then 0.
> Otherwise, if the "Command field" value last stored by the
> guest differs from 3, then 0.
> Otherwise, the most significant 32 bits of the selected CPU's
> architecture specific ID.
>
> [0x8] Command data: (DWORD access, little endian)
> If the "CPU selector" value last stored by the guest refers to
> an impossible CPU, then 0.
> Otherwise,
> - if the "Command field" value last stored by the guest is 0,
> then the selector of the currently selected CPU;
> - if the "Command field" value last stored by the guest is 3,
> then the least significant 32 bits of the selected CPU's
> architecture specific ID;
> - otherwise, 0.
>
> > [0x4] CPU device status fields: (1 byte access)
> > bits:
> > 0: Device is enabled and may be used by guest
> > @@ -87,6 +89,8 @@ write access:
> > 2: stores value into OST status register, triggers
> > ACPI_DEVICE_OST QMP event from QEMU to external applications
> > with current values of OST event and status registers.
> > + 3: OSPM reads architecture specific value identifying CPU
> > + (x86: APIC ID)
> > other values: reserved
> >
>
> Seems OK.
>
> > Selecting CPU device beyond possible range has no effect on platform:
> > @@ -115,3 +119,7 @@ Typical usecases:
> > 5.2 if 'Command data' register has not changed, there is not CPU
> > corresponding to iterator value and the last valid iterator value
> > equals to 'max_cpus' + 1
> > + - Get architecture specific id for a CPU
> > + 1. pick a CPU to read from using 'CPU selector' register
> > + 2. write 0x3 int0 'Command field' register
> > + 3. read architecture specific id from 'Command data' register
>
> Looks good, except for:
>
> - typo: "int0"
>
> - in step 3, we should reference 'Command data 2' as well.
>
>
> In fact, in
> <http://mid.mail-archive.com/2b10ca48-c734-4f41-9521-136c44060812@redhat.com>,
> I wrote, for the "Get a cpu with pending event" use case:
>
> > 1. Store 0x0 to the 'CPU selector' register.
> > 2. Store 0x0 to the 'Command field' register.
> > 3. Read the 'CPU device status fields' register.
> > 4. If both bit#1 and bit#2 are clear in the value read, there is no
> > CPU with a pending event.
> > 5. Otherwise, read the 'Command data' register. The value read is the
> > selector of the CPU with the pending event (which is already
> > selected).
>
> and your steps #2 and #3, for getting the arch specific ID, can be
> directly appended as steps 6. and 7.!
>
>
> > diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c
> > index 87f30a31d7..701542d860 100644
> > --- a/hw/acpi/cpu.c
> > +++ b/hw/acpi/cpu.c
> > @@ -12,11 +12,13 @@
> > #define ACPI_CPU_FLAGS_OFFSET_RW 4
> > #define ACPI_CPU_CMD_OFFSET_WR 5
> > #define ACPI_CPU_CMD_DATA_OFFSET_RW 8
> > +#define ACPI_CPU_CMD_DATA2_OFFSET_RW 0
> >
> > enum {
> > CPHP_GET_NEXT_CPU_WITH_EVENT_CMD = 0,
> > CPHP_OST_EVENT_CMD = 1,
> > CPHP_OST_STATUS_CMD = 2,
> > + CPHP_GET_CPU_ID_CMD = 3,
> > CPHP_CMD_MAX
> > };
> >
> > @@ -74,11 +76,24 @@ static uint64_t cpu_hotplug_rd(void *opaque, hwaddr addr, unsigned size)
> > case CPHP_GET_NEXT_CPU_WITH_EVENT_CMD:
> > val = cpu_st->selector;
> > break;
> > + case CPHP_GET_CPU_ID_CMD:
> > + val = cpu_to_le64(cdev->arch_id) & 0xFFFFFFFF;
> > + break;
> > default:
> > break;
> > }
> > trace_cpuhp_acpi_read_cmd_data(cpu_st->selector, val);
> > break;
> > + case ACPI_CPU_CMD_DATA2_OFFSET_RW:
> > + switch (cpu_st->command) {
> > + case CPHP_GET_CPU_ID_CMD:
> > + val = cpu_to_le64(cdev->arch_id) >> 32;
> > + break;
> > + default:
> > + break;
> > + }
> > + trace_cpuhp_acpi_read_cmd_data2(cpu_st->selector, val);
> > + break;
> > default:
> > break;
> > }
> > diff --git a/hw/acpi/trace-events b/hw/acpi/trace-events
> > index 96b8273297..afbc77de1c 100644
> > --- a/hw/acpi/trace-events
> > +++ b/hw/acpi/trace-events
> > @@ -23,6 +23,7 @@ cpuhp_acpi_read_flags(uint32_t idx, uint8_t flags) "idx[0x%"PRIx32"] flags: 0x%"
> > cpuhp_acpi_write_idx(uint32_t idx) "set active cpu idx: 0x%"PRIx32
> > cpuhp_acpi_write_cmd(uint32_t idx, uint8_t cmd) "idx[0x%"PRIx32"] cmd: 0x%"PRIx8
> > cpuhp_acpi_read_cmd_data(uint32_t idx, uint32_t data) "idx[0x%"PRIx32"] data: 0x%"PRIx32
> > +cpuhp_acpi_read_cmd_data2(uint32_t idx, uint32_t data) "idx[0x%"PRIx32"] data: 0x%"PRIx32
> > cpuhp_acpi_cpu_has_events(uint32_t idx, bool ins, bool rm) "idx[0x%"PRIx32"] inserting: %d, removing: %d"
> > cpuhp_acpi_clear_inserting_evt(uint32_t idx) "idx[0x%"PRIx32"]"
> > cpuhp_acpi_clear_remove_evt(uint32_t idx) "idx[0x%"PRIx32"]"
> >
>
> Looks plausible to me, thanks (discounting the TODO item).
>
> Right now, I can't offer testing for patch#3 (I'm quite far from the
> point where I'll be actually looking for a hotplugged CPU :) ), but
> based on the docs patches #1 and #2, and my proposed updates, I can
> rework my "possible CPU count detection" in OVMF.
>
> Do I need to check in OVMF specifically whether the "modern" CPU hotplug
> register block is available? Can you tell me what the oldest machine
> types are that support the modern interface?
>
> Hmm... Commit abd49bc2ed2f ("docs: update ACPI CPU hotplug spec with new
> protocol", 2016-06-24) seems relevant. First released in v2.7.0. I think
> I should detect whether this interface is available.
>
> Can I use the following sequence to detect whether the interface is
> available?
>
> 1. Store 0x0 to command register.
> 2. Store 0x0 to selector register.
> 3. Read 'command data' register.
> 4. If value read is 0, the interface is available.
>
> (Because I assume that unmapped IO ports read as all-bits-one. Is that
> right?)
>
> BTW, can I dynamically detect support for the GET_CPU_ID command too?
> I'm thinking, when I enumerate / count all possible CPUs, I can at once
> fetch the arch IDs for all of them. If I only get zeros from the command
> data registers, across all CPUs, in response to GET_CPU_ID, then the
> command is not available.
>
> Thanks
> Laszlo
Laszlo, won't we need to add topology info anyway?
if yes then this patch is just a stopgap, so let's do
fw cfg and be done with it?
--
MST
On Thu, Oct 10, 2019 at 11:06:29AM -0400, Michael S. Tsirkin wrote:
> On Thu, Oct 10, 2019 at 04:56:18PM +0200, Laszlo Ersek wrote:
> > On 10/09/19 15:22, Igor Mammedov wrote:
> > > Extend CPU hotplug interface to return architecture specific
> > > identifier for current CPU (in case of x86, it's APIC ID).
> > >
> > > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > > ---
> > > TODO:
> > > * cripple it to behave old way on old machine types so that
> > > new firmware started on new QEMU won't see a difference
> > > when migrated to an old QEMU (i.e. QEMU that doesn't support
> > > this command)
> > > ---
> > > docs/specs/acpi_cpu_hotplug.txt | 10 +++++++++-
> > > hw/acpi/cpu.c | 15 +++++++++++++++
> > > hw/acpi/trace-events | 1 +
> > > 3 files changed, 25 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/docs/specs/acpi_cpu_hotplug.txt b/docs/specs/acpi_cpu_hotplug.txt
> > > index 43c5a193f0..0438678249 100644
> > > --- a/docs/specs/acpi_cpu_hotplug.txt
> > > +++ b/docs/specs/acpi_cpu_hotplug.txt
> > > @@ -32,7 +32,9 @@ Register block size:
> > >
> > > read access:
> > > offset:
> > > - [0x0-0x3] reserved
> > > + [0x0-0x3] Command data 2: (DWORD access)
> > > + upper 32 bit of 'Command data' if returned data value is 64 bit.
> > > + in case of error or unsupported command reads is 0x0
> >
> > How about
> >
> > [0x0] Command data 2: (DWORD access, little endian)
> > If the "CPU selector" value last stored by the guest refers to
> > an impossible CPU, then 0.
> > Otherwise, if the "Command field" value last stored by the
> > guest differs from 3, then 0.
> > Otherwise, the most significant 32 bits of the selected CPU's
> > architecture specific ID.
> >
> > [0x8] Command data: (DWORD access, little endian)
> > If the "CPU selector" value last stored by the guest refers to
> > an impossible CPU, then 0.
> > Otherwise,
> > - if the "Command field" value last stored by the guest is 0,
> > then the selector of the currently selected CPU;
> > - if the "Command field" value last stored by the guest is 3,
> > then the least significant 32 bits of the selected CPU's
> > architecture specific ID;
> > - otherwise, 0.
> >
> > > [0x4] CPU device status fields: (1 byte access)
> > > bits:
> > > 0: Device is enabled and may be used by guest
> > > @@ -87,6 +89,8 @@ write access:
> > > 2: stores value into OST status register, triggers
> > > ACPI_DEVICE_OST QMP event from QEMU to external applications
> > > with current values of OST event and status registers.
> > > + 3: OSPM reads architecture specific value identifying CPU
> > > + (x86: APIC ID)
> > > other values: reserved
> > >
> >
> > Seems OK.
> >
> > > Selecting CPU device beyond possible range has no effect on platform:
> > > @@ -115,3 +119,7 @@ Typical usecases:
> > > 5.2 if 'Command data' register has not changed, there is not CPU
> > > corresponding to iterator value and the last valid iterator value
> > > equals to 'max_cpus' + 1
> > > + - Get architecture specific id for a CPU
> > > + 1. pick a CPU to read from using 'CPU selector' register
> > > + 2. write 0x3 int0 'Command field' register
> > > + 3. read architecture specific id from 'Command data' register
> >
> > Looks good, except for:
> >
> > - typo: "int0"
> >
> > - in step 3, we should reference 'Command data 2' as well.
> >
> >
> > In fact, in
> > <http://mid.mail-archive.com/2b10ca48-c734-4f41-9521-136c44060812@redhat.com>,
> > I wrote, for the "Get a cpu with pending event" use case:
> >
> > > 1. Store 0x0 to the 'CPU selector' register.
> > > 2. Store 0x0 to the 'Command field' register.
> > > 3. Read the 'CPU device status fields' register.
> > > 4. If both bit#1 and bit#2 are clear in the value read, there is no
> > > CPU with a pending event.
> > > 5. Otherwise, read the 'Command data' register. The value read is the
> > > selector of the CPU with the pending event (which is already
> > > selected).
> >
> > and your steps #2 and #3, for getting the arch specific ID, can be
> > directly appended as steps 6. and 7.!
> >
> >
> > > diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c
> > > index 87f30a31d7..701542d860 100644
> > > --- a/hw/acpi/cpu.c
> > > +++ b/hw/acpi/cpu.c
> > > @@ -12,11 +12,13 @@
> > > #define ACPI_CPU_FLAGS_OFFSET_RW 4
> > > #define ACPI_CPU_CMD_OFFSET_WR 5
> > > #define ACPI_CPU_CMD_DATA_OFFSET_RW 8
> > > +#define ACPI_CPU_CMD_DATA2_OFFSET_RW 0
> > >
> > > enum {
> > > CPHP_GET_NEXT_CPU_WITH_EVENT_CMD = 0,
> > > CPHP_OST_EVENT_CMD = 1,
> > > CPHP_OST_STATUS_CMD = 2,
> > > + CPHP_GET_CPU_ID_CMD = 3,
> > > CPHP_CMD_MAX
> > > };
> > >
> > > @@ -74,11 +76,24 @@ static uint64_t cpu_hotplug_rd(void *opaque, hwaddr addr, unsigned size)
> > > case CPHP_GET_NEXT_CPU_WITH_EVENT_CMD:
> > > val = cpu_st->selector;
> > > break;
> > > + case CPHP_GET_CPU_ID_CMD:
> > > + val = cpu_to_le64(cdev->arch_id) & 0xFFFFFFFF;
> > > + break;
> > > default:
> > > break;
> > > }
> > > trace_cpuhp_acpi_read_cmd_data(cpu_st->selector, val);
> > > break;
> > > + case ACPI_CPU_CMD_DATA2_OFFSET_RW:
> > > + switch (cpu_st->command) {
> > > + case CPHP_GET_CPU_ID_CMD:
> > > + val = cpu_to_le64(cdev->arch_id) >> 32;
> > > + break;
> > > + default:
> > > + break;
> > > + }
> > > + trace_cpuhp_acpi_read_cmd_data2(cpu_st->selector, val);
> > > + break;
> > > default:
> > > break;
> > > }
> > > diff --git a/hw/acpi/trace-events b/hw/acpi/trace-events
> > > index 96b8273297..afbc77de1c 100644
> > > --- a/hw/acpi/trace-events
> > > +++ b/hw/acpi/trace-events
> > > @@ -23,6 +23,7 @@ cpuhp_acpi_read_flags(uint32_t idx, uint8_t flags) "idx[0x%"PRIx32"] flags: 0x%"
> > > cpuhp_acpi_write_idx(uint32_t idx) "set active cpu idx: 0x%"PRIx32
> > > cpuhp_acpi_write_cmd(uint32_t idx, uint8_t cmd) "idx[0x%"PRIx32"] cmd: 0x%"PRIx8
> > > cpuhp_acpi_read_cmd_data(uint32_t idx, uint32_t data) "idx[0x%"PRIx32"] data: 0x%"PRIx32
> > > +cpuhp_acpi_read_cmd_data2(uint32_t idx, uint32_t data) "idx[0x%"PRIx32"] data: 0x%"PRIx32
> > > cpuhp_acpi_cpu_has_events(uint32_t idx, bool ins, bool rm) "idx[0x%"PRIx32"] inserting: %d, removing: %d"
> > > cpuhp_acpi_clear_inserting_evt(uint32_t idx) "idx[0x%"PRIx32"]"
> > > cpuhp_acpi_clear_remove_evt(uint32_t idx) "idx[0x%"PRIx32"]"
> > >
> >
> > Looks plausible to me, thanks (discounting the TODO item).
> >
> > Right now, I can't offer testing for patch#3 (I'm quite far from the
> > point where I'll be actually looking for a hotplugged CPU :) ), but
> > based on the docs patches #1 and #2, and my proposed updates, I can
> > rework my "possible CPU count detection" in OVMF.
> >
> > Do I need to check in OVMF specifically whether the "modern" CPU hotplug
> > register block is available? Can you tell me what the oldest machine
> > types are that support the modern interface?
> >
> > Hmm... Commit abd49bc2ed2f ("docs: update ACPI CPU hotplug spec with new
> > protocol", 2016-06-24) seems relevant. First released in v2.7.0. I think
> > I should detect whether this interface is available.
> >
> > Can I use the following sequence to detect whether the interface is
> > available?
> >
> > 1. Store 0x0 to command register.
> > 2. Store 0x0 to selector register.
> > 3. Read 'command data' register.
> > 4. If value read is 0, the interface is available.
> >
> > (Because I assume that unmapped IO ports read as all-bits-one. Is that
> > right?)
> >
> > BTW, can I dynamically detect support for the GET_CPU_ID command too?
> > I'm thinking, when I enumerate / count all possible CPUs, I can at once
> > fetch the arch IDs for all of them. If I only get zeros from the command
> > data registers, across all CPUs, in response to GET_CPU_ID, then the
> > command is not available.
> >
> > Thanks
> > Laszlo
>
> Laszlo, won't we need to add topology info anyway?
> if yes then this patch is just a stopgap, so let's do
> fw cfg and be done with it?
Topology info is already available on CPUID.
--
Eduardo
On 10/10/19 21:26, Eduardo Habkost wrote: > Topology info is already available on CPUID. Independently of everything else, thanks for pointing this out. The edk2 library called "LocalApicLib" has two relevant functions: > /** > Get Package ID/Core ID/Thread ID of a processor. > > The algorithm assumes the target system has symmetry across physical > package boundaries with respect to the number of logical processors > per package, number of cores per package. > > @param[in] InitialApicId Initial APIC ID of the target logical processor. > @param[out] Package Returns the processor package ID. > @param[out] Core Returns the processor core ID. > @param[out] Thread Returns the processor thread ID. > **/ > VOID > EFIAPI > GetProcessorLocationByApicId ( > IN UINT32 InitialApicId, > OUT UINT32 *Package OPTIONAL, > OUT UINT32 *Core OPTIONAL, > OUT UINT32 *Thread OPTIONAL > ); > > /** > Get Package ID/Module ID/Tile ID/Die ID/Core ID/Thread ID of a processor. > > The algorithm assumes the target system has symmetry across physical > package boundaries with respect to the number of threads per core, number of > cores per module, number of modules per tile, number of tiles per die, number > of dies per package. > > @param[in] InitialApicId Initial APIC ID of the target logical processor. > @param[out] Package Returns the processor package ID. > @param[out] Die Returns the processor die ID. > @param[out] Tile Returns the processor tile ID. > @param[out] Module Returns the processor module ID. > @param[out] Core Returns the processor core ID. > @param[out] Thread Returns the processor thread ID. > **/ > VOID > EFIAPI > GetProcessorLocation2ByApicId ( > IN UINT32 InitialApicId, > OUT UINT32 *Package OPTIONAL, > OUT UINT32 *Die OPTIONAL, > OUT UINT32 *Tile OPTIONAL, > OUT UINT32 *Module OPTIONAL, > OUT UINT32 *Core OPTIONAL, > OUT UINT32 *Thread OPTIONAL > ); They are implemented with heavy CPUID usage. So... just give me the APIC-ID. That's the primary key in edk2 for identifying x86 processors. Thanks Laszlo
On 10/10/19 17:06, Michael S. Tsirkin wrote:
> On Thu, Oct 10, 2019 at 04:56:18PM +0200, Laszlo Ersek wrote:
>> On 10/09/19 15:22, Igor Mammedov wrote:
>>> Extend CPU hotplug interface to return architecture specific
>>> identifier for current CPU (in case of x86, it's APIC ID).
>>>
>>> Signed-off-by: Igor Mammedov <imammedo@redhat.com>
>>> ---
>>> TODO:
>>> * cripple it to behave old way on old machine types so that
>>> new firmware started on new QEMU won't see a difference
>>> when migrated to an old QEMU (i.e. QEMU that doesn't support
>>> this command)
>>> ---
>>> docs/specs/acpi_cpu_hotplug.txt | 10 +++++++++-
>>> hw/acpi/cpu.c | 15 +++++++++++++++
>>> hw/acpi/trace-events | 1 +
>>> 3 files changed, 25 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/docs/specs/acpi_cpu_hotplug.txt b/docs/specs/acpi_cpu_hotplug.txt
>>> index 43c5a193f0..0438678249 100644
>>> --- a/docs/specs/acpi_cpu_hotplug.txt
>>> +++ b/docs/specs/acpi_cpu_hotplug.txt
>>> @@ -32,7 +32,9 @@ Register block size:
>>>
>>> read access:
>>> offset:
>>> - [0x0-0x3] reserved
>>> + [0x0-0x3] Command data 2: (DWORD access)
>>> + upper 32 bit of 'Command data' if returned data value is 64 bit.
>>> + in case of error or unsupported command reads is 0x0
>>
>> How about
>>
>> [0x0] Command data 2: (DWORD access, little endian)
>> If the "CPU selector" value last stored by the guest refers to
>> an impossible CPU, then 0.
>> Otherwise, if the "Command field" value last stored by the
>> guest differs from 3, then 0.
>> Otherwise, the most significant 32 bits of the selected CPU's
>> architecture specific ID.
>>
>> [0x8] Command data: (DWORD access, little endian)
>> If the "CPU selector" value last stored by the guest refers to
>> an impossible CPU, then 0.
>> Otherwise,
>> - if the "Command field" value last stored by the guest is 0,
>> then the selector of the currently selected CPU;
>> - if the "Command field" value last stored by the guest is 3,
>> then the least significant 32 bits of the selected CPU's
>> architecture specific ID;
>> - otherwise, 0.
>>
>>> [0x4] CPU device status fields: (1 byte access)
>>> bits:
>>> 0: Device is enabled and may be used by guest
>>> @@ -87,6 +89,8 @@ write access:
>>> 2: stores value into OST status register, triggers
>>> ACPI_DEVICE_OST QMP event from QEMU to external applications
>>> with current values of OST event and status registers.
>>> + 3: OSPM reads architecture specific value identifying CPU
>>> + (x86: APIC ID)
>>> other values: reserved
>>>
>>
>> Seems OK.
>>
>>> Selecting CPU device beyond possible range has no effect on platform:
>>> @@ -115,3 +119,7 @@ Typical usecases:
>>> 5.2 if 'Command data' register has not changed, there is not CPU
>>> corresponding to iterator value and the last valid iterator value
>>> equals to 'max_cpus' + 1
>>> + - Get architecture specific id for a CPU
>>> + 1. pick a CPU to read from using 'CPU selector' register
>>> + 2. write 0x3 int0 'Command field' register
>>> + 3. read architecture specific id from 'Command data' register
>>
>> Looks good, except for:
>>
>> - typo: "int0"
>>
>> - in step 3, we should reference 'Command data 2' as well.
>>
>>
>> In fact, in
>> <http://mid.mail-archive.com/2b10ca48-c734-4f41-9521-136c44060812@redhat.com>,
>> I wrote, for the "Get a cpu with pending event" use case:
>>
>>> 1. Store 0x0 to the 'CPU selector' register.
>>> 2. Store 0x0 to the 'Command field' register.
>>> 3. Read the 'CPU device status fields' register.
>>> 4. If both bit#1 and bit#2 are clear in the value read, there is no
>>> CPU with a pending event.
>>> 5. Otherwise, read the 'Command data' register. The value read is the
>>> selector of the CPU with the pending event (which is already
>>> selected).
>>
>> and your steps #2 and #3, for getting the arch specific ID, can be
>> directly appended as steps 6. and 7.!
>>
>>
>>> diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c
>>> index 87f30a31d7..701542d860 100644
>>> --- a/hw/acpi/cpu.c
>>> +++ b/hw/acpi/cpu.c
>>> @@ -12,11 +12,13 @@
>>> #define ACPI_CPU_FLAGS_OFFSET_RW 4
>>> #define ACPI_CPU_CMD_OFFSET_WR 5
>>> #define ACPI_CPU_CMD_DATA_OFFSET_RW 8
>>> +#define ACPI_CPU_CMD_DATA2_OFFSET_RW 0
>>>
>>> enum {
>>> CPHP_GET_NEXT_CPU_WITH_EVENT_CMD = 0,
>>> CPHP_OST_EVENT_CMD = 1,
>>> CPHP_OST_STATUS_CMD = 2,
>>> + CPHP_GET_CPU_ID_CMD = 3,
>>> CPHP_CMD_MAX
>>> };
>>>
>>> @@ -74,11 +76,24 @@ static uint64_t cpu_hotplug_rd(void *opaque, hwaddr addr, unsigned size)
>>> case CPHP_GET_NEXT_CPU_WITH_EVENT_CMD:
>>> val = cpu_st->selector;
>>> break;
>>> + case CPHP_GET_CPU_ID_CMD:
>>> + val = cpu_to_le64(cdev->arch_id) & 0xFFFFFFFF;
>>> + break;
>>> default:
>>> break;
>>> }
>>> trace_cpuhp_acpi_read_cmd_data(cpu_st->selector, val);
>>> break;
>>> + case ACPI_CPU_CMD_DATA2_OFFSET_RW:
>>> + switch (cpu_st->command) {
>>> + case CPHP_GET_CPU_ID_CMD:
>>> + val = cpu_to_le64(cdev->arch_id) >> 32;
>>> + break;
>>> + default:
>>> + break;
>>> + }
>>> + trace_cpuhp_acpi_read_cmd_data2(cpu_st->selector, val);
>>> + break;
>>> default:
>>> break;
>>> }
>>> diff --git a/hw/acpi/trace-events b/hw/acpi/trace-events
>>> index 96b8273297..afbc77de1c 100644
>>> --- a/hw/acpi/trace-events
>>> +++ b/hw/acpi/trace-events
>>> @@ -23,6 +23,7 @@ cpuhp_acpi_read_flags(uint32_t idx, uint8_t flags) "idx[0x%"PRIx32"] flags: 0x%"
>>> cpuhp_acpi_write_idx(uint32_t idx) "set active cpu idx: 0x%"PRIx32
>>> cpuhp_acpi_write_cmd(uint32_t idx, uint8_t cmd) "idx[0x%"PRIx32"] cmd: 0x%"PRIx8
>>> cpuhp_acpi_read_cmd_data(uint32_t idx, uint32_t data) "idx[0x%"PRIx32"] data: 0x%"PRIx32
>>> +cpuhp_acpi_read_cmd_data2(uint32_t idx, uint32_t data) "idx[0x%"PRIx32"] data: 0x%"PRIx32
>>> cpuhp_acpi_cpu_has_events(uint32_t idx, bool ins, bool rm) "idx[0x%"PRIx32"] inserting: %d, removing: %d"
>>> cpuhp_acpi_clear_inserting_evt(uint32_t idx) "idx[0x%"PRIx32"]"
>>> cpuhp_acpi_clear_remove_evt(uint32_t idx) "idx[0x%"PRIx32"]"
>>>
>>
>> Looks plausible to me, thanks (discounting the TODO item).
>>
>> Right now, I can't offer testing for patch#3 (I'm quite far from the
>> point where I'll be actually looking for a hotplugged CPU :) ), but
>> based on the docs patches #1 and #2, and my proposed updates, I can
>> rework my "possible CPU count detection" in OVMF.
>>
>> Do I need to check in OVMF specifically whether the "modern" CPU hotplug
>> register block is available? Can you tell me what the oldest machine
>> types are that support the modern interface?
>>
>> Hmm... Commit abd49bc2ed2f ("docs: update ACPI CPU hotplug spec with new
>> protocol", 2016-06-24) seems relevant. First released in v2.7.0. I think
>> I should detect whether this interface is available.
>>
>> Can I use the following sequence to detect whether the interface is
>> available?
>>
>> 1. Store 0x0 to command register.
>> 2. Store 0x0 to selector register.
>> 3. Read 'command data' register.
>> 4. If value read is 0, the interface is available.
>>
>> (Because I assume that unmapped IO ports read as all-bits-one. Is that
>> right?)
>>
>> BTW, can I dynamically detect support for the GET_CPU_ID command too?
>> I'm thinking, when I enumerate / count all possible CPUs, I can at once
>> fetch the arch IDs for all of them. If I only get zeros from the command
>> data registers, across all CPUs, in response to GET_CPU_ID, then the
>> command is not available.
>>
>> Thanks
>> Laszlo
>
> Laszlo, won't we need to add topology info anyway?
> if yes then this patch is just a stopgap, so let's do
> fw cfg and be done with it?
No, CPU topology is not relevant *per se* for this OVMF feature (CPU
hotplug with SMM).
CPU topology is only interesting as a building block.
What OVMF really needs, for the hotplug SMI handler, is the ability to:
- learn a QEMU-specific unique identifier for the CPU that has just been
hot-plugged,
- translate said QEMU-specific unique CPU identifier to the new CPU's
APIC-ID.
So how do we get there?
- If the hotplug register block (the hotplug hardware interface) lets me
fetch the APIC-ID for the new CPU immediately, in the hotplug SMI
handler, that's best (most convenient).
- If the QEMU-specific unique identifier fetched from the register block
is a sequential CPU index (or "selector"), then I *could* use the CPU
topology as a building block, for constructing the APIC-ID myself. I
would decompose the CPU index into thread / core / die / socket offsets,
based on the topology, and re-compose those offsets into an APIC-ID.
- If the QEMU-specific unique identifier fetched from the register block
is a sequential CPU index (or "selector"), then I could *also* use a
complete mapping table (index to APIC-ID), fetched earlier from fw_cfg.
I would use the CPU index as a subscript into that table.
What really matters for OVMF is the APIC-ID of the just-plugged CPU.
I definitely have to access the CPU hotplug register block in the SMI
handler, because that's where I can get *any kind* of identifier for the
new CPU. (Unless we want to implement a dedicated CPU hotplug controller
for OVMF, that is.)
Thanks
Laszlo
On Thu, 10 Oct 2019 11:06:29 -0400
"Michael S. Tsirkin" <mst@redhat.com> wrote:
> On Thu, Oct 10, 2019 at 04:56:18PM +0200, Laszlo Ersek wrote:
> > On 10/09/19 15:22, Igor Mammedov wrote:
> > > Extend CPU hotplug interface to return architecture specific
> > > identifier for current CPU (in case of x86, it's APIC ID).
> > >
> > > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > > ---
> > > TODO:
> > > * cripple it to behave old way on old machine types so that
> > > new firmware started on new QEMU won't see a difference
> > > when migrated to an old QEMU (i.e. QEMU that doesn't support
> > > this command)
> > > ---
> > > docs/specs/acpi_cpu_hotplug.txt | 10 +++++++++-
> > > hw/acpi/cpu.c | 15 +++++++++++++++
> > > hw/acpi/trace-events | 1 +
> > > 3 files changed, 25 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/docs/specs/acpi_cpu_hotplug.txt b/docs/specs/acpi_cpu_hotplug.txt
> > > index 43c5a193f0..0438678249 100644
> > > --- a/docs/specs/acpi_cpu_hotplug.txt
> > > +++ b/docs/specs/acpi_cpu_hotplug.txt
> > > @@ -32,7 +32,9 @@ Register block size:
> > >
> > > read access:
> > > offset:
> > > - [0x0-0x3] reserved
> > > + [0x0-0x3] Command data 2: (DWORD access)
> > > + upper 32 bit of 'Command data' if returned data value is 64 bit.
> > > + in case of error or unsupported command reads is 0x0
> >
> > How about
> >
> > [0x0] Command data 2: (DWORD access, little endian)
> > If the "CPU selector" value last stored by the guest refers to
> > an impossible CPU, then 0.
> > Otherwise, if the "Command field" value last stored by the
> > guest differs from 3, then 0.
> > Otherwise, the most significant 32 bits of the selected CPU's
> > architecture specific ID.
> >
> > [0x8] Command data: (DWORD access, little endian)
> > If the "CPU selector" value last stored by the guest refers to
> > an impossible CPU, then 0.
> > Otherwise,
> > - if the "Command field" value last stored by the guest is 0,
> > then the selector of the currently selected CPU;
> > - if the "Command field" value last stored by the guest is 3,
> > then the least significant 32 bits of the selected CPU's
> > architecture specific ID;
> > - otherwise, 0.
> >
> > > [0x4] CPU device status fields: (1 byte access)
> > > bits:
> > > 0: Device is enabled and may be used by guest
> > > @@ -87,6 +89,8 @@ write access:
> > > 2: stores value into OST status register, triggers
> > > ACPI_DEVICE_OST QMP event from QEMU to external applications
> > > with current values of OST event and status registers.
> > > + 3: OSPM reads architecture specific value identifying CPU
> > > + (x86: APIC ID)
> > > other values: reserved
> > >
> >
> > Seems OK.
> >
> > > Selecting CPU device beyond possible range has no effect on platform:
> > > @@ -115,3 +119,7 @@ Typical usecases:
> > > 5.2 if 'Command data' register has not changed, there is not CPU
> > > corresponding to iterator value and the last valid iterator value
> > > equals to 'max_cpus' + 1
> > > + - Get architecture specific id for a CPU
> > > + 1. pick a CPU to read from using 'CPU selector' register
> > > + 2. write 0x3 int0 'Command field' register
> > > + 3. read architecture specific id from 'Command data' register
> >
> > Looks good, except for:
> >
> > - typo: "int0"
> >
> > - in step 3, we should reference 'Command data 2' as well.
> >
> >
> > In fact, in
> > <http://mid.mail-archive.com/2b10ca48-c734-4f41-9521-136c44060812@redhat.com>,
> > I wrote, for the "Get a cpu with pending event" use case:
> >
> > > 1. Store 0x0 to the 'CPU selector' register.
> > > 2. Store 0x0 to the 'Command field' register.
> > > 3. Read the 'CPU device status fields' register.
> > > 4. If both bit#1 and bit#2 are clear in the value read, there is no
> > > CPU with a pending event.
> > > 5. Otherwise, read the 'Command data' register. The value read is the
> > > selector of the CPU with the pending event (which is already
> > > selected).
> >
> > and your steps #2 and #3, for getting the arch specific ID, can be
> > directly appended as steps 6. and 7.!
> >
> >
> > > diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c
> > > index 87f30a31d7..701542d860 100644
> > > --- a/hw/acpi/cpu.c
> > > +++ b/hw/acpi/cpu.c
> > > @@ -12,11 +12,13 @@
> > > #define ACPI_CPU_FLAGS_OFFSET_RW 4
> > > #define ACPI_CPU_CMD_OFFSET_WR 5
> > > #define ACPI_CPU_CMD_DATA_OFFSET_RW 8
> > > +#define ACPI_CPU_CMD_DATA2_OFFSET_RW 0
> > >
> > > enum {
> > > CPHP_GET_NEXT_CPU_WITH_EVENT_CMD = 0,
> > > CPHP_OST_EVENT_CMD = 1,
> > > CPHP_OST_STATUS_CMD = 2,
> > > + CPHP_GET_CPU_ID_CMD = 3,
> > > CPHP_CMD_MAX
> > > };
> > >
> > > @@ -74,11 +76,24 @@ static uint64_t cpu_hotplug_rd(void *opaque, hwaddr addr, unsigned size)
> > > case CPHP_GET_NEXT_CPU_WITH_EVENT_CMD:
> > > val = cpu_st->selector;
> > > break;
> > > + case CPHP_GET_CPU_ID_CMD:
> > > + val = cpu_to_le64(cdev->arch_id) & 0xFFFFFFFF;
> > > + break;
> > > default:
> > > break;
> > > }
> > > trace_cpuhp_acpi_read_cmd_data(cpu_st->selector, val);
> > > break;
> > > + case ACPI_CPU_CMD_DATA2_OFFSET_RW:
> > > + switch (cpu_st->command) {
> > > + case CPHP_GET_CPU_ID_CMD:
> > > + val = cpu_to_le64(cdev->arch_id) >> 32;
> > > + break;
> > > + default:
> > > + break;
> > > + }
> > > + trace_cpuhp_acpi_read_cmd_data2(cpu_st->selector, val);
> > > + break;
> > > default:
> > > break;
> > > }
> > > diff --git a/hw/acpi/trace-events b/hw/acpi/trace-events
> > > index 96b8273297..afbc77de1c 100644
> > > --- a/hw/acpi/trace-events
> > > +++ b/hw/acpi/trace-events
> > > @@ -23,6 +23,7 @@ cpuhp_acpi_read_flags(uint32_t idx, uint8_t flags) "idx[0x%"PRIx32"] flags: 0x%"
> > > cpuhp_acpi_write_idx(uint32_t idx) "set active cpu idx: 0x%"PRIx32
> > > cpuhp_acpi_write_cmd(uint32_t idx, uint8_t cmd) "idx[0x%"PRIx32"] cmd: 0x%"PRIx8
> > > cpuhp_acpi_read_cmd_data(uint32_t idx, uint32_t data) "idx[0x%"PRIx32"] data: 0x%"PRIx32
> > > +cpuhp_acpi_read_cmd_data2(uint32_t idx, uint32_t data) "idx[0x%"PRIx32"] data: 0x%"PRIx32
> > > cpuhp_acpi_cpu_has_events(uint32_t idx, bool ins, bool rm) "idx[0x%"PRIx32"] inserting: %d, removing: %d"
> > > cpuhp_acpi_clear_inserting_evt(uint32_t idx) "idx[0x%"PRIx32"]"
> > > cpuhp_acpi_clear_remove_evt(uint32_t idx) "idx[0x%"PRIx32"]"
> > >
> >
> > Looks plausible to me, thanks (discounting the TODO item).
> >
> > Right now, I can't offer testing for patch#3 (I'm quite far from the
> > point where I'll be actually looking for a hotplugged CPU :) ), but
> > based on the docs patches #1 and #2, and my proposed updates, I can
> > rework my "possible CPU count detection" in OVMF.
> >
> > Do I need to check in OVMF specifically whether the "modern" CPU hotplug
> > register block is available? Can you tell me what the oldest machine
> > types are that support the modern interface?
> >
> > Hmm... Commit abd49bc2ed2f ("docs: update ACPI CPU hotplug spec with new
> > protocol", 2016-06-24) seems relevant. First released in v2.7.0. I think
> > I should detect whether this interface is available.
> >
> > Can I use the following sequence to detect whether the interface is
> > available?
> >
> > 1. Store 0x0 to command register.
> > 2. Store 0x0 to selector register.
> > 3. Read 'command data' register.
> > 4. If value read is 0, the interface is available.
> >
> > (Because I assume that unmapped IO ports read as all-bits-one. Is that
> > right?)
> >
> > BTW, can I dynamically detect support for the GET_CPU_ID command too?
> > I'm thinking, when I enumerate / count all possible CPUs, I can at once
> > fetch the arch IDs for all of them. If I only get zeros from the command
> > data registers, across all CPUs, in response to GET_CPU_ID, then the
> > command is not available.
> >
> > Thanks
> > Laszlo
>
> Laszlo, won't we need to add topology info anyway?
> if yes then this patch is just a stopgap, so let's do
> fw cfg and be done with it?
answer to your question and suggestion is in following thread
"Re: [RFC 3/3] acpi: cpuhp: add CPHP_GET_CPU_ID_CMD command"
"
In fact, it seems like OVMF will have to synthesize the new
(hot-plugged) VCPU's *APIC-ID* from the following information sources:
- the topology information described above (die / core / thread counts), and
- the "modern" CPU hotplug interface (docs/specs/acpi_cpu_hotplug.txt).
"
On Thu, 10 Oct 2019 16:56:18 +0200
Laszlo Ersek <lersek@redhat.com> wrote:
> On 10/09/19 15:22, Igor Mammedov wrote:
> > Extend CPU hotplug interface to return architecture specific
> > identifier for current CPU (in case of x86, it's APIC ID).
> >
> > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > ---
> > TODO:
> > * cripple it to behave old way on old machine types so that
> > new firmware started on new QEMU won't see a difference
> > when migrated to an old QEMU (i.e. QEMU that doesn't support
> > this command)
> > ---
> > docs/specs/acpi_cpu_hotplug.txt | 10 +++++++++-
> > hw/acpi/cpu.c | 15 +++++++++++++++
> > hw/acpi/trace-events | 1 +
> > 3 files changed, 25 insertions(+), 1 deletion(-)
> >
> > diff --git a/docs/specs/acpi_cpu_hotplug.txt b/docs/specs/acpi_cpu_hotplug.txt
> > index 43c5a193f0..0438678249 100644
> > --- a/docs/specs/acpi_cpu_hotplug.txt
> > +++ b/docs/specs/acpi_cpu_hotplug.txt
> > @@ -32,7 +32,9 @@ Register block size:
> >
> > read access:
> > offset:
> > - [0x0-0x3] reserved
> > + [0x0-0x3] Command data 2: (DWORD access)
> > + upper 32 bit of 'Command data' if returned data value is 64 bit.
> > + in case of error or unsupported command reads is 0x0
>
> How about
>
> [0x0] Command data 2: (DWORD access, little endian)
> If the "CPU selector" value last stored by the guest refers to
> an impossible CPU, then 0.
> Otherwise, if the "Command field" value last stored by the
> guest differs from 3, then 0.
> Otherwise, the most significant 32 bits of the selected CPU's
> architecture specific ID.
>
> [0x8] Command data: (DWORD access, little endian)
> If the "CPU selector" value last stored by the guest refers to
> an impossible CPU, then 0.
> Otherwise,
> - if the "Command field" value last stored by the guest is 0,
> then the selector of the currently selected CPU;
> - if the "Command field" value last stored by the guest is 3,
> then the least significant 32 bits of the selected CPU's
> architecture specific ID;
> - otherwise, 0.
this format is easier to read comparing to suggestion on [1/3]
https://lists.gnu.org/archive/html/qemu-devel/2019-10/msg02256.html
So I'll use it in [1/3] and then just extend it here
>
> > [0x4] CPU device status fields: (1 byte access)
> > bits:
> > 0: Device is enabled and may be used by guest
> > @@ -87,6 +89,8 @@ write access:
> > 2: stores value into OST status register, triggers
> > ACPI_DEVICE_OST QMP event from QEMU to external applications
> > with current values of OST event and status registers.
> > + 3: OSPM reads architecture specific value identifying CPU
> > + (x86: APIC ID)
> > other values: reserved
> >
>
> Seems OK.
>
> > Selecting CPU device beyond possible range has no effect on platform:
> > @@ -115,3 +119,7 @@ Typical usecases:
> > 5.2 if 'Command data' register has not changed, there is not CPU
> > corresponding to iterator value and the last valid iterator value
> > equals to 'max_cpus' + 1
> > + - Get architecture specific id for a CPU
> > + 1. pick a CPU to read from using 'CPU selector' register
> > + 2. write 0x3 int0 'Command field' register
> > + 3. read architecture specific id from 'Command data' register
>
> Looks good, except for:
>
> - typo: "int0"
>
> - in step 3, we should reference 'Command data 2' as well.
>
>
> In fact, in
> <http://mid.mail-archive.com/2b10ca48-c734-4f41-9521-136c44060812@redhat.com>,
> I wrote, for the "Get a cpu with pending event" use case:
>
> > 1. Store 0x0 to the 'CPU selector' register.
> > 2. Store 0x0 to the 'Command field' register.
> > 3. Read the 'CPU device status fields' register.
> > 4. If both bit#1 and bit#2 are clear in the value read, there is no
> > CPU with a pending event.
> > 5. Otherwise, read the 'Command data' register. The value read is the
> > selector of the CPU with the pending event (which is already
> > selected).
>
> and your steps #2 and #3, for getting the arch specific ID, can be
> directly appended as steps 6. and 7.!
>
>
> > diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c
> > index 87f30a31d7..701542d860 100644
> > --- a/hw/acpi/cpu.c
> > +++ b/hw/acpi/cpu.c
> > @@ -12,11 +12,13 @@
> > #define ACPI_CPU_FLAGS_OFFSET_RW 4
> > #define ACPI_CPU_CMD_OFFSET_WR 5
> > #define ACPI_CPU_CMD_DATA_OFFSET_RW 8
> > +#define ACPI_CPU_CMD_DATA2_OFFSET_RW 0
> >
> > enum {
> > CPHP_GET_NEXT_CPU_WITH_EVENT_CMD = 0,
> > CPHP_OST_EVENT_CMD = 1,
> > CPHP_OST_STATUS_CMD = 2,
> > + CPHP_GET_CPU_ID_CMD = 3,
> > CPHP_CMD_MAX
> > };
> >
> > @@ -74,11 +76,24 @@ static uint64_t cpu_hotplug_rd(void *opaque, hwaddr addr, unsigned size)
> > case CPHP_GET_NEXT_CPU_WITH_EVENT_CMD:
> > val = cpu_st->selector;
> > break;
> > + case CPHP_GET_CPU_ID_CMD:
> > + val = cpu_to_le64(cdev->arch_id) & 0xFFFFFFFF;
> > + break;
> > default:
> > break;
> > }
> > trace_cpuhp_acpi_read_cmd_data(cpu_st->selector, val);
> > break;
> > + case ACPI_CPU_CMD_DATA2_OFFSET_RW:
> > + switch (cpu_st->command) {
> > + case CPHP_GET_CPU_ID_CMD:
> > + val = cpu_to_le64(cdev->arch_id) >> 32;
> > + break;
> > + default:
> > + break;
> > + }
> > + trace_cpuhp_acpi_read_cmd_data2(cpu_st->selector, val);
> > + break;
> > default:
> > break;
> > }
> > diff --git a/hw/acpi/trace-events b/hw/acpi/trace-events
> > index 96b8273297..afbc77de1c 100644
> > --- a/hw/acpi/trace-events
> > +++ b/hw/acpi/trace-events
> > @@ -23,6 +23,7 @@ cpuhp_acpi_read_flags(uint32_t idx, uint8_t flags) "idx[0x%"PRIx32"] flags: 0x%"
> > cpuhp_acpi_write_idx(uint32_t idx) "set active cpu idx: 0x%"PRIx32
> > cpuhp_acpi_write_cmd(uint32_t idx, uint8_t cmd) "idx[0x%"PRIx32"] cmd: 0x%"PRIx8
> > cpuhp_acpi_read_cmd_data(uint32_t idx, uint32_t data) "idx[0x%"PRIx32"] data: 0x%"PRIx32
> > +cpuhp_acpi_read_cmd_data2(uint32_t idx, uint32_t data) "idx[0x%"PRIx32"] data: 0x%"PRIx32
> > cpuhp_acpi_cpu_has_events(uint32_t idx, bool ins, bool rm) "idx[0x%"PRIx32"] inserting: %d, removing: %d"
> > cpuhp_acpi_clear_inserting_evt(uint32_t idx) "idx[0x%"PRIx32"]"
> > cpuhp_acpi_clear_remove_evt(uint32_t idx) "idx[0x%"PRIx32"]"
> >
>
> Looks plausible to me, thanks (discounting the TODO item).
>
> Right now, I can't offer testing for patch#3 (I'm quite far from the
> point where I'll be actually looking for a hotplugged CPU :) ), but
> based on the docs patches #1 and #2, and my proposed updates, I can
> rework my "possible CPU count detection" in OVMF.
>
> Do I need to check in OVMF specifically whether the "modern" CPU hotplug
> register block is available? Can you tell me what the oldest machine
> types are that support the modern interface?
See 679dd1a95 (pc: use new CPU hotplug interface since 2.7 machine type)
> Hmm... Commit abd49bc2ed2f ("docs: update ACPI CPU hotplug spec with new
> protocol", 2016-06-24) seems relevant. First released in v2.7.0. I think
> I should detect whether this interface is available.
Can you make detection based on QEMU version (is dynamic detection really necessary)?
> Can I use the following sequence to detect whether the interface is
> available?
>
> 1. Store 0x0 to command register.
> 2. Store 0x0 to selector register.
> 3. Read 'command data' register.
> 4. If value read is 0, the interface is available.
By default legacy register block layout is in place
(i.e. present cpus bitmap) where 1st byte is guarantied to be ">0" as it has
at least the boot CPU bit set and writes to legacy bitmap are ignored.
Currently AML code code does switching to modern interface, see
docs/specs/acpi_cpu_hotplug.txt:
"
The first DWORD in bitmap is used in write mode to switch from legacy
to new CPU hotplug interface, write 0 into it to do switch.
"
related code "if (opts.has_legacy_cphp) {" and cpu_status_write()
Considering firmware runs the first, it should enable modern interface
on its own
1. Store 0x0 to selector register (actually it's store into bitmap to attempt switch).
and to check if interface is present
2. Store 0x0 to selector register (to ensure valid selector value (otherwise command is ignored))
3. Store 0x0 to command register (to be able to read back selector from command data)
4. Store 0x0 to selector register (because #3 can select the a cpu with events if any)
be aware libvirt may start QEMU in paused mode (hotplug context) and hotplugs extra CPUs
with device_add and then let guest run. So firmware may see present CPUs with events
at boot time.
5. Read 'command data' register.
6. If value read is 0, the interface is available.
> (Because I assume that unmapped IO ports read as all-bits-one. Is that
> right?)
that's right but ports are mapped to legacy CPU bitmap, you can't count on all-bits-one case here.
> BTW, can I dynamically detect support for the GET_CPU_ID command too?
> I'm thinking, when I enumerate / count all possible CPUs, I can at once
> fetch the arch IDs for all of them. If I only get zeros from the command
> data registers, across all CPUs, in response to GET_CPU_ID, then the
> command is not available.
APICID == 0 is valid value, so one would be need to account for ' -smp 1 '
case where the only valid selector is 0 that leads to APIC ID = 0
If counted maxcpus > 1, then what you suggest will work
if you pick the last CPU (apic id != 0). (at least for x86 guest,
I don't know if it's fine wrt ARM guest)
May be dynamic detection is just over-engineering.
> Thanks
> Laszlo
>
Hello Igor,
On 10/18/19 18:18, Igor Mammedov wrote:
> On Thu, 10 Oct 2019 16:56:18 +0200
> Laszlo Ersek <lersek@redhat.com> wrote:
[...]
>> Right now, I can't offer testing for patch#3 (I'm quite far from the
>> point where I'll be actually looking for a hotplugged CPU :) ), but
>> based on the docs patches #1 and #2, and my proposed updates, I can
>> rework my "possible CPU count detection" in OVMF.
>>
>> Do I need to check in OVMF specifically whether the "modern" CPU hotplug
>> register block is available? Can you tell me what the oldest machine
>> types are that support the modern interface?
> See 679dd1a95 (pc: use new CPU hotplug interface since 2.7 machine type)
>
>
>> Hmm... Commit abd49bc2ed2f ("docs: update ACPI CPU hotplug spec with new
>> protocol", 2016-06-24) seems relevant. First released in v2.7.0. I think
>> I should detect whether this interface is available.
> Can you make detection based on QEMU version (is dynamic detection really necessary)?
Thanks for following up on this; indeed it has been my remaining open
question in this thread.
I attempted to investigate a bit myself a few days ago, and indeed I can
now find the commit hash "679dd1a95" in my bash history.
So... If I can somehow query the QEMU machine type version -- or
emulator version? -- inside the guest, I'm perfectly happy using that. I
don't need dynamic detection specifically for this feature, I just need
something to deduce the feature from.
The problem is that the "possible CPU count" determination would run in
OVMF in every case, regardless of whether OVMF was built with -D
SMM_REQUIRE, and regardless of board type (q35 vs. i440fx). Requiring
QEMU v2.7.0 or later just for booting OVMF would be a bit steep.
So the idea is:
- check (somehow) if QEMU has the modern hotplug register block
- available: great, now set the boot CPU count and the possible CPU
count independently of each other
- otherwise: set both values to the boot CPU count (which we can fetch
from fw_cfg, like we've always done)
>> Can I use the following sequence to detect whether the interface is
>> available?
>>
>> 1. Store 0x0 to command register.
>> 2. Store 0x0 to selector register.
>> 3. Read 'command data' register.
>> 4. If value read is 0, the interface is available.
>
> By default legacy register block layout is in place
> (i.e. present cpus bitmap) where 1st byte is guarantied to be ">0" as it has
> at least the boot CPU bit set and writes to legacy bitmap are ignored.
>
> Currently AML code code does switching to modern interface, see
> docs/specs/acpi_cpu_hotplug.txt:
> "
> The first DWORD in bitmap is used in write mode to switch from legacy
> to new CPU hotplug interface, write 0 into it to do switch.
> "
> related code "if (opts.has_legacy_cphp) {" and cpu_status_write()
>
> Considering firmware runs the first, it should enable modern interface
> on its own
> 1. Store 0x0 to selector register (actually it's store into bitmap to attempt switch).
> and to check if interface is present
> 2. Store 0x0 to selector register (to ensure valid selector value (otherwise command is ignored))
> 3. Store 0x0 to command register (to be able to read back selector from command data)
> 4. Store 0x0 to selector register (because #3 can select the a cpu with events if any)
> be aware libvirt may start QEMU in paused mode (hotplug context) and hotplugs extra CPUs
> with device_add and then let guest run. So firmware may see present CPUs with events
> at boot time.
> 5. Read 'command data' register.
> 6. If value read is 0, the interface is available.
Perfect!
Basically this is prepending two "write 0 to selector register" steps to
what I suspected. I certainly couldn't figure out the "switch to modern"
step, and whether initializing the selector to something valid was
needed at boot. Now I know. :) Thanks!
>
>> (Because I assume that unmapped IO ports read as all-bits-one. Is that
>> right?)
> that's right but ports are mapped to legacy CPU bitmap, you can't count on all-bits-one case here.
>
>> BTW, can I dynamically detect support for the GET_CPU_ID command too?
>> I'm thinking, when I enumerate / count all possible CPUs, I can at once
>> fetch the arch IDs for all of them. If I only get zeros from the command
>> data registers, across all CPUs, in response to GET_CPU_ID, then the
>> command is not available.
>
> APICID == 0 is valid value, so one would be need to account for ' -smp 1 '
> case where the only valid selector is 0 that leads to APIC ID = 0
>
> If counted maxcpus > 1, then what you suggest will work
> if you pick the last CPU (apic id != 0). (at least for x86 guest,
> I don't know if it's fine wrt ARM guest)
>
> May be dynamic detection is just over-engineering.
Dynamically detecting the presence of the GET_CPU_ID command is not
important. That's because GET_CPU_ID is for a new use case (CPU hotplug
with SMM), which has never worked before. So we can't regress it.
We'll just modify the OvmfPkg/README file to spell out the minimum
machine type requirement:
don't try to hotplug a CPU when running OVMF built with SMM_REQUIRE,
unless your machine type is pc-q35-4.2+
which is exactly what we did for base SMM:
* The minimum required QEMU machine type is "pc-q35-2.5".
The "possible CPU count" determination is more risky because that will
modify a code path that's active on every boot, in every possible
environment.
I think I can rework my OVMF series for the "possible CPU count"
fetching now.
Thank you!
Laszlo
On 10/21/19 15:06, Laszlo Ersek wrote:
> On 10/18/19 18:18, Igor Mammedov wrote:
>> On Thu, 10 Oct 2019 16:56:18 +0200
>> Laszlo Ersek <lersek@redhat.com> wrote:
[...]
>>> Can I use the following sequence to detect whether the interface is
>>> available?
>>>
>>> 1. Store 0x0 to command register.
>>> 2. Store 0x0 to selector register.
>>> 3. Read 'command data' register.
>>> 4. If value read is 0, the interface is available.
>>
>> By default legacy register block layout is in place
>> (i.e. present cpus bitmap) where 1st byte is guarantied to be ">0" as it has
>> at least the boot CPU bit set and writes to legacy bitmap are ignored.
>>
>> Currently AML code code does switching to modern interface, see
>> docs/specs/acpi_cpu_hotplug.txt:
>> "
>> The first DWORD in bitmap is used in write mode to switch from legacy
>> to new CPU hotplug interface, write 0 into it to do switch.
>> "
>> related code "if (opts.has_legacy_cphp) {" and cpu_status_write()
>>
>> Considering firmware runs the first, it should enable modern interface
>> on its own
>> 1. Store 0x0 to selector register (actually it's store into bitmap to attempt switch).
>> and to check if interface is present
>> 2. Store 0x0 to selector register (to ensure valid selector value (otherwise command is ignored))
>> 3. Store 0x0 to command register (to be able to read back selector from command data)
>> 4. Store 0x0 to selector register (because #3 can select the a cpu with events if any)
>> be aware libvirt may start QEMU in paused mode (hotplug context) and hotplugs extra CPUs
>> with device_add and then let guest run. So firmware may see present CPUs with events
>> at boot time.
>> 5. Read 'command data' register.
>> 6. If value read is 0, the interface is available.
>
> Perfect!
>
> Basically this is prepending two "write 0 to selector register" steps to
> what I suspected. I certainly couldn't figure out the "switch to modern"
> step, and whether initializing the selector to something valid was
> needed at boot. Now I know. :) Thanks!
>
>>
>>> (Because I assume that unmapped IO ports read as all-bits-one. Is that
>>> right?)
>> that's right but ports are mapped to legacy CPU bitmap, you can't count on all-bits-one case here.
It seems I rejoiced too soon.
When we read the command data register in the last step, that is at
offset 0x8 in the register block. Considering the legacy "CPU present
bitmap", if no CPU is present in that range, then the firmware could
read a zero value. I got confused because I thought we were reading at
offset 0, which would always have bit0 set (for CPU#0).
Can we detect the modern interface like this:
1. store 0x0 to selector register (attempt to switch)
2. read one byte at offset 0 in the register block
3. if bit#0 is set, the modern interface is unavailable;
otherwise (= bit#0 clear), the modern interface is available
Here's why:
- if even the legacy interface is missing, then step 2 is an unassigned
read, hence the value read is all-bits-one; bit#0 is set
- if only the legacy interface is available, then bit#0 stands for
CPU#0, it will be set
- if the switch-over in step 1 is successful, then offset 0 is reserved,
hence it returns all-bits-zero.
With this, if we ever assigned offset 0 for reading, then we'd have to
define it with bit#0 constantly clear.
Thanks,
Laszlo
On Tue, 22 Oct 2019 14:39:24 +0200
Laszlo Ersek <lersek@redhat.com> wrote:
> On 10/21/19 15:06, Laszlo Ersek wrote:
> > On 10/18/19 18:18, Igor Mammedov wrote:
> >> On Thu, 10 Oct 2019 16:56:18 +0200
> >> Laszlo Ersek <lersek@redhat.com> wrote:
>
> [...]
>
> >>> Can I use the following sequence to detect whether the interface is
> >>> available?
> >>>
> >>> 1. Store 0x0 to command register.
> >>> 2. Store 0x0 to selector register.
> >>> 3. Read 'command data' register.
> >>> 4. If value read is 0, the interface is available.
> >>
> >> By default legacy register block layout is in place
> >> (i.e. present cpus bitmap) where 1st byte is guarantied to be ">0" as it has
> >> at least the boot CPU bit set and writes to legacy bitmap are ignored.
> >>
> >> Currently AML code code does switching to modern interface, see
> >> docs/specs/acpi_cpu_hotplug.txt:
> >> "
> >> The first DWORD in bitmap is used in write mode to switch from legacy
> >> to new CPU hotplug interface, write 0 into it to do switch.
> >> "
> >> related code "if (opts.has_legacy_cphp) {" and cpu_status_write()
> >>
> >> Considering firmware runs the first, it should enable modern interface
> >> on its own
> >> 1. Store 0x0 to selector register (actually it's store into bitmap to attempt switch).
> >> and to check if interface is present
> >> 2. Store 0x0 to selector register (to ensure valid selector value (otherwise command is ignored))
> >> 3. Store 0x0 to command register (to be able to read back selector from command data)
> >> 4. Store 0x0 to selector register (because #3 can select the a cpu with events if any)
> >> be aware libvirt may start QEMU in paused mode (hotplug context) and hotplugs extra CPUs
> >> with device_add and then let guest run. So firmware may see present CPUs with events
> >> at boot time.
> >> 5. Read 'command data' register.
> >> 6. If value read is 0, the interface is available.
> >
> > Perfect!
> >
> > Basically this is prepending two "write 0 to selector register" steps to
> > what I suspected. I certainly couldn't figure out the "switch to modern"
> > step, and whether initializing the selector to something valid was
> > needed at boot. Now I know. :) Thanks!
> >
> >>
> >>> (Because I assume that unmapped IO ports read as all-bits-one. Is that
> >>> right?)
> >> that's right but ports are mapped to legacy CPU bitmap, you can't count on all-bits-one case here.
>
> It seems I rejoiced too soon.
>
> When we read the command data register in the last step, that is at
> offset 0x8 in the register block. Considering the legacy "CPU present
> bitmap", if no CPU is present in that range, then the firmware could
> read a zero value. I got confused because I thought we were reading at
> offset 0, which would always have bit0 set (for CPU#0).
>
> Can we detect the modern interface like this:
>
> 1. store 0x0 to selector register (attempt to switch)
> 2. read one byte at offset 0 in the register block
> 3. if bit#0 is set, the modern interface is unavailable;
> otherwise (= bit#0 clear), the modern interface is available
>
> Here's why:
>
> - if even the legacy interface is missing, then step 2 is an unassigned
> read, hence the value read is all-bits-one; bit#0 is set
>
> - if only the legacy interface is available, then bit#0 stands for
> CPU#0, it will be set
>
> - if the switch-over in step 1 is successful, then offset 0 is reserved,
> hence it returns all-bits-zero.
>
> With this, if we ever assigned offset 0 for reading, then we'd have to
> define it with bit#0 constantly clear.
There is no need to reserve bit#0 if in step #5 we use s/'command data'/'Command data 2'/
Alternatively we can reserve bit#0 and sequentially read upper half from 'Command data'
(one a new flag to show that there is more data to read).
(Upper half currently is not necessary, it's there for future ARM's MPIDR).
One more thing, this behavior is based on artifacts of x86 machine and AllOnes fallback.
Obviously it won't work with arm/virt, do we care about AVMF at this point?
> Thanks,
> Laszlo
On 10/22/19 16:42, Igor Mammedov wrote:
> On Tue, 22 Oct 2019 14:39:24 +0200
> Laszlo Ersek <lersek@redhat.com> wrote:
>
>> On 10/21/19 15:06, Laszlo Ersek wrote:
>>> On 10/18/19 18:18, Igor Mammedov wrote:
>>>> Considering firmware runs the first, it should enable modern interface
>>>> on its own
>>>> 1. Store 0x0 to selector register (actually it's store into bitmap to attempt switch).
>>>> and to check if interface is present
>>>> 2. Store 0x0 to selector register (to ensure valid selector value (otherwise command is ignored))
>>>> 3. Store 0x0 to command register (to be able to read back selector from command data)
>>>> 4. Store 0x0 to selector register (because #3 can select the a cpu with events if any)
>>>> be aware libvirt may start QEMU in paused mode (hotplug context) and hotplugs extra CPUs
>>>> with device_add and then let guest run. So firmware may see present CPUs with events
>>>> at boot time.
>>>> 5. Read 'command data' register.
>>>> 6. If value read is 0, the interface is available.
>> When we read the command data register in the last step, that is at
>> offset 0x8 in the register block. Considering the legacy "CPU present
>> bitmap", if no CPU is present in that range, then the firmware could
>> read a zero value. I got confused because I thought we were reading at
>> offset 0, which would always have bit0 set (for CPU#0).
>>
>> Can we detect the modern interface like this:
>>
>> 1. store 0x0 to selector register (attempt to switch)
>> 2. read one byte at offset 0 in the register block
>> 3. if bit#0 is set, the modern interface is unavailable;
>> otherwise (= bit#0 clear), the modern interface is available
>>
>> Here's why:
>>
>> - if even the legacy interface is missing, then step 2 is an unassigned
>> read, hence the value read is all-bits-one; bit#0 is set
>>
>> - if only the legacy interface is available, then bit#0 stands for
>> CPU#0, it will be set
>>
>> - if the switch-over in step 1 is successful, then offset 0 is reserved,
>> hence it returns all-bits-zero.
>>
>> With this, if we ever assigned offset 0 for reading, then we'd have to
>> define it with bit#0 constantly clear.
>
> There is no need to reserve bit#0 if in step #5 we use s/'command data'/'Command data 2'/
Good idea. We can drop step 4 too:
[0x0] Command data 2: (DWORD access, little endian)
If the "CPU selector" value last stored by the guest refers to
an impossible CPU, then 0.
This is skipped by step 2.
Otherwise, if the "Command field" value last stored by the
guest differs from 3, then 0.
This is triggered by step 3.
So step 4 does not look necessary. (As long as the guest is OK with the
selector ending up with a changed value.)
Otherwise, the most significant 32 bits of the selected CPU's
architecture specific ID.
Not relevant for this use case.
> Alternatively we can reserve bit#0 and sequentially read upper half from 'Command data'
> (one a new flag to show that there is more data to read).
I like the "Command data 2" register more. The "temporal domain" is
always a complication in register definitions.
> (Upper half currently is not necessary, it's there for future ARM's MPIDR).
>
> One more thing, this behavior is based on artifacts of x86 machine and AllOnes fallback.
> Obviously it won't work with arm/virt, do we care about AVMF at this point?
No, in the firmware, all this is strictly x86 code. The ArmVirtQemu
guest firmware has no support for multiprocessing at this time, to my
understanding.
(Nonetheless, if the register block got placed at an MMIO base address
on arm/virt, I think "unassigned_mem_ops" would apply there just the same.)
Thanks!
Laszlo
On Tue, 22 Oct 2019 17:49:06 +0200 Laszlo Ersek <lersek@redhat.com> wrote: > On 10/22/19 16:42, Igor Mammedov wrote: > > On Tue, 22 Oct 2019 14:39:24 +0200 > > Laszlo Ersek <lersek@redhat.com> wrote: > > > >> On 10/21/19 15:06, Laszlo Ersek wrote: > >>> On 10/18/19 18:18, Igor Mammedov wrote: > > >>>> Considering firmware runs the first, it should enable modern interface > >>>> on its own > >>>> 1. Store 0x0 to selector register (actually it's store into bitmap to attempt switch). > >>>> and to check if interface is present > >>>> 2. Store 0x0 to selector register (to ensure valid selector value (otherwise command is ignored)) > >>>> 3. Store 0x0 to command register (to be able to read back selector from command data) > >>>> 4. Store 0x0 to selector register (because #3 can select the a cpu with events if any) > >>>> be aware libvirt may start QEMU in paused mode (hotplug context) and hotplugs extra CPUs > >>>> with device_add and then let guest run. So firmware may see present CPUs with events > >>>> at boot time. > >>>> 5. Read 'command data' register. > >>>> 6. If value read is 0, the interface is available. > > >> When we read the command data register in the last step, that is at > >> offset 0x8 in the register block. Considering the legacy "CPU present > >> bitmap", if no CPU is present in that range, then the firmware could > >> read a zero value. I got confused because I thought we were reading at > >> offset 0, which would always have bit0 set (for CPU#0). > >> > >> Can we detect the modern interface like this: > >> > >> 1. store 0x0 to selector register (attempt to switch) > >> 2. read one byte at offset 0 in the register block > >> 3. if bit#0 is set, the modern interface is unavailable; > >> otherwise (= bit#0 clear), the modern interface is available > >> > >> Here's why: > >> > >> - if even the legacy interface is missing, then step 2 is an unassigned > >> read, hence the value read is all-bits-one; bit#0 is set > >> > >> - if only the legacy interface is available, then bit#0 stands for > >> CPU#0, it will be set > >> > >> - if the switch-over in step 1 is successful, then offset 0 is reserved, > >> hence it returns all-bits-zero. > >> > >> With this, if we ever assigned offset 0 for reading, then we'd have to > >> define it with bit#0 constantly clear. > > > > There is no need to reserve bit#0 if in step #5 we use s/'command data'/'Command data 2'/ > > Good idea. We can drop step 4 too: > > [0x0] Command data 2: (DWORD access, little endian) > If the "CPU selector" value last stored by the guest refers to > an impossible CPU, then 0. > > This is skipped by step 2. > > Otherwise, if the "Command field" value last stored by the > guest differs from 3, then 0. > > This is triggered by step 3. > > So step 4 does not look necessary. (As long as the guest is OK with the > selector ending up with a changed value.) sounds good, I'll respin patches taking this into account. > Otherwise, the most significant 32 bits of the selected CPU's > architecture specific ID. > > Not relevant for this use case. > > > Alternatively we can reserve bit#0 and sequentially read upper half from 'Command data' > > (one a new flag to show that there is more data to read). > > I like the "Command data 2" register more. The "temporal domain" is > always a complication in register definitions. > > > (Upper half currently is not necessary, it's there for future ARM's MPIDR). > > > > One more thing, this behavior is based on artifacts of x86 machine and AllOnes fallback. > > Obviously it won't work with arm/virt, do we care about AVMF at this point? > > No, in the firmware, all this is strictly x86 code. The ArmVirtQemu > guest firmware has no support for multiprocessing at this time, to my > understanding. > > (Nonetheless, if the register block got placed at an MMIO base address > on arm/virt, I think "unassigned_mem_ops" would apply there just the same.) > > Thanks! > Laszlo > >
Hi Igor,
On 10/9/19 3:22 PM, Igor Mammedov wrote:
> Extend CPU hotplug interface to return architecture specific
> identifier for current CPU (in case of x86, it's APIC ID).
>
> Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> ---
> TODO:
> * cripple it to behave old way on old machine types so that
> new firmware started on new QEMU won't see a difference
> when migrated to an old QEMU (i.e. QEMU that doesn't support
> this command)
> ---
> docs/specs/acpi_cpu_hotplug.txt | 10 +++++++++-
> hw/acpi/cpu.c | 15 +++++++++++++++
> hw/acpi/trace-events | 1 +
> 3 files changed, 25 insertions(+), 1 deletion(-)
>
> diff --git a/docs/specs/acpi_cpu_hotplug.txt b/docs/specs/acpi_cpu_hotplug.txt
> index 43c5a193f0..0438678249 100644
> --- a/docs/specs/acpi_cpu_hotplug.txt
> +++ b/docs/specs/acpi_cpu_hotplug.txt
> @@ -32,7 +32,9 @@ Register block size:
>
> read access:
> offset:
> - [0x0-0x3] reserved
> + [0x0-0x3] Command data 2: (DWORD access)
> + upper 32 bit of 'Command data' if returned data value is 64 bit.
> + in case of error or unsupported command reads is 0x0
> [0x4] CPU device status fields: (1 byte access)
> bits:
> 0: Device is enabled and may be used by guest
> @@ -87,6 +89,8 @@ write access:
> 2: stores value into OST status register, triggers
> ACPI_DEVICE_OST QMP event from QEMU to external applications
> with current values of OST event and status registers.
> + 3: OSPM reads architecture specific value identifying CPU
> + (x86: APIC ID)
> other values: reserved
>
> Selecting CPU device beyond possible range has no effect on platform:
> @@ -115,3 +119,7 @@ Typical usecases:
> 5.2 if 'Command data' register has not changed, there is not CPU
> corresponding to iterator value and the last valid iterator value
> equals to 'max_cpus' + 1
> + - Get architecture specific id for a CPU
> + 1. pick a CPU to read from using 'CPU selector' register
> + 2. write 0x3 int0 'Command field' register
> + 3. read architecture specific id from 'Command data' register
> diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c
> index 87f30a31d7..701542d860 100644
> --- a/hw/acpi/cpu.c
> +++ b/hw/acpi/cpu.c
> @@ -12,11 +12,13 @@
> #define ACPI_CPU_FLAGS_OFFSET_RW 4
> #define ACPI_CPU_CMD_OFFSET_WR 5
> #define ACPI_CPU_CMD_DATA_OFFSET_RW 8
> +#define ACPI_CPU_CMD_DATA2_OFFSET_RW 0
This part confuses me:
#define ACPI_CPU_SELECTOR_OFFSET_WR 0
#define ACPI_CPU_CMD_DATA2_OFFSET_RW 0
What about:
#define ACPI_CPU_SELECTOR_OFFSET_W 0
#define ACPI_CPU_CMD_DATA2_OFFSET_R 0
>
> enum {
> CPHP_GET_NEXT_CPU_WITH_EVENT_CMD = 0,
> CPHP_OST_EVENT_CMD = 1,
> CPHP_OST_STATUS_CMD = 2,
> + CPHP_GET_CPU_ID_CMD = 3,
> CPHP_CMD_MAX
> };
>
> @@ -74,11 +76,24 @@ static uint64_t cpu_hotplug_rd(void *opaque, hwaddr addr, unsigned size)
> case CPHP_GET_NEXT_CPU_WITH_EVENT_CMD:
> val = cpu_st->selector;
> break;
> + case CPHP_GET_CPU_ID_CMD:
> + val = cpu_to_le64(cdev->arch_id) & 0xFFFFFFFF;
> + break;
> default:
> break;
> }
> trace_cpuhp_acpi_read_cmd_data(cpu_st->selector, val);
> break;
> + case ACPI_CPU_CMD_DATA2_OFFSET_RW:
> + switch (cpu_st->command) {
> + case CPHP_GET_CPU_ID_CMD:
> + val = cpu_to_le64(cdev->arch_id) >> 32;
> + break;
> + default:
> + break;
> + }
> + trace_cpuhp_acpi_read_cmd_data2(cpu_st->selector, val);
> + break;
> default:
> break;
> }
> diff --git a/hw/acpi/trace-events b/hw/acpi/trace-events
> index 96b8273297..afbc77de1c 100644
> --- a/hw/acpi/trace-events
> +++ b/hw/acpi/trace-events
> @@ -23,6 +23,7 @@ cpuhp_acpi_read_flags(uint32_t idx, uint8_t flags) "idx[0x%"PRIx32"] flags: 0x%"
> cpuhp_acpi_write_idx(uint32_t idx) "set active cpu idx: 0x%"PRIx32
> cpuhp_acpi_write_cmd(uint32_t idx, uint8_t cmd) "idx[0x%"PRIx32"] cmd: 0x%"PRIx8
> cpuhp_acpi_read_cmd_data(uint32_t idx, uint32_t data) "idx[0x%"PRIx32"] data: 0x%"PRIx32
> +cpuhp_acpi_read_cmd_data2(uint32_t idx, uint32_t data) "idx[0x%"PRIx32"] data: 0x%"PRIx32
> cpuhp_acpi_cpu_has_events(uint32_t idx, bool ins, bool rm) "idx[0x%"PRIx32"] inserting: %d, removing: %d"
> cpuhp_acpi_clear_inserting_evt(uint32_t idx) "idx[0x%"PRIx32"]"
> cpuhp_acpi_clear_remove_evt(uint32_t idx) "idx[0x%"PRIx32"]"
>
© 2016 - 2026 Red Hat, Inc.