[PATCH v5 07/10] hw/i386: declare ACPI mother board resource for MMCONFIG region

isaku.yamahata@gmail.com posted 10 patches 4 years, 11 months ago
Maintainers: "Philippe Mathieu-Daudé" <f4bug@amsat.org>, Jiaxun Yang <jiaxun.yang@flygoat.com>, Paolo Bonzini <pbonzini@redhat.com>, Igor Mammedov <imammedo@redhat.com>, "Michael S. Tsirkin" <mst@redhat.com>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, Richard Henderson <richard.henderson@linaro.org>, Aurelien Jarno <aurelien@aurel32.net>, Huacai Chen <chenhuacai@kernel.org>, Eduardo Habkost <ehabkost@redhat.com>
[PATCH v5 07/10] hw/i386: declare ACPI mother board resource for MMCONFIG region
Posted by isaku.yamahata@gmail.com 4 years, 11 months ago
From: Isaku Yamahata <isaku.yamahata@intel.com>

Declare PNP0C01 device to reserve MMCONFIG region to conform to the
spec better and play nice with guest BIOSes/OSes.

According to PCI Firmware Specification[0], MMCONFIG region must be
reserved by declaring a motherboard resource. It's optional to reserve
the region in memory map by Int 15 E820h or EFIGetMemoryMap.
Guest Linux checks if the MMCFG region is reserved by bios memory map
or ACPI resource. If it's not reserved, Linux falls back to legacy PCI
configuration access.

TDVF [1] [2] doesn't reserve MMCONFIG the region in memory map.
On the other hand OVMF reserves it in memory map without declaring a
motherboard resource. With memory map reservation, linux guest uses
MMCONFIG region. However it doesn't comply to PCI Firmware
specification.

[0] PCI Firmware specification Revision 3.2
  4.1.2 MCFG Table Description table 4-2 NOTE 2
  If the operating system does not natively comprehend reserving the
  MMCFG region, The MMCFG region must e reserved by firmware. ...
  For most systems, the mortheroard resource would appear at the root
  of the ACPI namespace (under \_SB)...
  The resource can optionally be returned in Int15 E820h or
  EFIGetMemoryMap as reserved memory but must always be reported
  through ACPI as a motherboard resource

[1] TDX: Intel Trust Domain Extension
    https://software.intel.com/content/www/us/en/develop/articles/intel-trust-domain-extensions.html
[2] TDX Virtual Firmware
    https://github.com/tianocore/edk2-staging/tree/TDVF

The change to DSDT is as follows.
@@ -68,32 +68,47 @@

                     If ((CDW3 != Local0))
                     {
                         CDW1 |= 0x10
                     }

                     CDW3 = Local0
                 }
                 Else
                 {
                     CDW1 |= 0x04
                 }

                 Return (Arg3)
             }
         }
+
+        Device (DRAC)
+        {
+            Name (_HID, "PNP0C01" /* System Board */)  // _HID: Hardware ID
+            Name (_CRS, ResourceTemplate ()  // _CRS: Current Resource Settings
+            {
+                DWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, NonCacheable, ReadWrite,
+                    0x00000000,         // Granularity
+                    0xB0000000,         // Range Minimum
+                    0xBFFFFFFF,         // Range Maximum
+                    0x00000000,         // Translation Offset
+                    0x10000000,         // Length
+                    ,, , AddressRangeMemory, TypeStatic)
+            })
+        }
     }

     Scope (_SB)
     {
         Device (HPET)
         {
             Name (_HID, EisaId ("PNP0103") /* HPET System Timer */)  // _HID: Hardware ID
             Name (_UID, Zero)  // _UID: Unique ID
             OperationRegion (HPTM, SystemMemory, 0xFED00000, 0x0400)
             Field (HPTM, DWordAcc, Lock, Preserve)
             {
                 VEND,   32,
                 PRD,    32
             }

             Method (_STA, 0, NotSerialized)  // _STA: Status

Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
 hw/i386/acpi-build.c | 46 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 45 insertions(+), 1 deletion(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 49aef4ebd1..96497475d1 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -1072,6 +1072,46 @@ static void build_q35_pci0_int(Aml *table)
     aml_append(table, sb_scope);
 }
 
+static Aml *build_q35_dram_controller(const AcpiMcfgInfo *mcfg)
+{
+    Aml *dev;
+    Aml *resource_template;
+
+    /* DRAM controller */
+    dev = aml_device("DRAC");
+    aml_append(dev, aml_name_decl("_HID", aml_string("PNP0C01")));
+
+    resource_template = aml_resource_template();
+    if (mcfg->base + mcfg->size - 1 >= (1ULL << 32)) {
+        aml_append(resource_template,
+                   aml_qword_memory(AML_POS_DECODE,
+                                    AML_MIN_FIXED,
+                                    AML_MAX_FIXED,
+                                    AML_NON_CACHEABLE,
+                                    AML_READ_WRITE,
+                                    0x0000000000000000,
+                                    mcfg->base,
+                                    mcfg->base + mcfg->size - 1,
+                                    0x0000000000000000,
+                                    mcfg->size));
+    } else {
+        aml_append(resource_template,
+                   aml_dword_memory(AML_POS_DECODE,
+                                    AML_MIN_FIXED,
+                                    AML_MAX_FIXED,
+                                    AML_NON_CACHEABLE,
+                                    AML_READ_WRITE,
+                                    0x0000000000000000,
+                                    mcfg->base,
+                                    mcfg->base + mcfg->size - 1,
+                                    0x0000000000000000,
+                                    mcfg->size));
+    }
+    aml_append(dev, aml_name_decl("_CRS", resource_template));
+
+    return dev;
+}
+
 static void build_q35_isa_bridge(Aml *table)
 {
     Aml *dev;
@@ -1218,6 +1258,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
     PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine);
     X86MachineState *x86ms = X86_MACHINE(machine);
     AcpiMcfgInfo mcfg;
+    bool mcfg_valid = !!acpi_get_mcfg(&mcfg);
     uint32_t nr_mem = machine->ram_slots;
     int root_bus_limit = 0xFF;
     PCIBus *bus = NULL;
@@ -1256,6 +1297,9 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
         aml_append(dev, aml_name_decl("_UID", aml_int(0)));
         aml_append(dev, build_q35_osc_method());
         aml_append(sb_scope, dev);
+        if (mcfg_valid) {
+            aml_append(sb_scope, build_q35_dram_controller(&mcfg));
+        }
 
         if (pm->smi_on_cpuhp) {
             /* reserve SMI block resources, IO ports 0xB2, 0xB3 */
@@ -1386,7 +1430,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
      * the PCI0._CRS.  Add mmconfig to the set so it will be excluded
      * too.
      */
-    if (acpi_get_mcfg(&mcfg)) {
+    if (mcfg_valid) {
         crs_range_insert(crs_range_set.mem_ranges,
                          mcfg.base, mcfg.base + mcfg.size - 1);
     }
-- 
2.17.1


Re: [PATCH v5 07/10] hw/i386: declare ACPI mother board resource for MMCONFIG region
Posted by Igor Mammedov 4 years, 11 months ago
On Wed, 17 Feb 2021 21:51:15 -0800
isaku.yamahata@gmail.com wrote:

> From: Isaku Yamahata <isaku.yamahata@intel.com>
> 
> Declare PNP0C01 device to reserve MMCONFIG region to conform to the
> spec better and play nice with guest BIOSes/OSes.
> 
> According to PCI Firmware Specification[0], MMCONFIG region must be
> reserved by declaring a motherboard resource. It's optional to reserve
> the region in memory map by Int 15 E820h or EFIGetMemoryMap.
> Guest Linux checks if the MMCFG region is reserved by bios memory map
> or ACPI resource. If it's not reserved, Linux falls back to legacy PCI
> configuration access.
> 
> TDVF [1] [2] doesn't reserve MMCONFIG the region in memory map.
> On the other hand OVMF reserves it in memory map without declaring a
> motherboard resource. With memory map reservation, linux guest uses
> MMCONFIG region. However it doesn't comply to PCI Firmware
> specification.
> 
> [0] PCI Firmware specification Revision 3.2
>   4.1.2 MCFG Table Description table 4-2 NOTE 2
>   If the operating system does not natively comprehend reserving the
>   MMCFG region, The MMCFG region must e reserved by firmware. ...
>   For most systems, the mortheroard resource would appear at the root
>   of the ACPI namespace (under \_SB)...
>   The resource can optionally be returned in Int15 E820h or
>   EFIGetMemoryMap as reserved memory but must always be reported
>   through ACPI as a motherboard resource
> 
> [1] TDX: Intel Trust Domain Extension
>     https://software.intel.com/content/www/us/en/develop/articles/intel-trust-domain-extensions.html
> [2] TDX Virtual Firmware
>     https://github.com/tianocore/edk2-staging/tree/TDVF
> 
> The change to DSDT is as follows.
> @@ -68,32 +68,47 @@
> 
>                      If ((CDW3 != Local0))
>                      {
>                          CDW1 |= 0x10
>                      }
> 
>                      CDW3 = Local0
>                  }
>                  Else
>                  {
>                      CDW1 |= 0x04
>                  }
> 
>                  Return (Arg3)
>              }
>          }
> +
> +        Device (DRAC)
> +        {
> +            Name (_HID, "PNP0C01" /* System Board */)  // _HID: Hardware ID
> +            Name (_CRS, ResourceTemplate ()  // _CRS: Current Resource Settings
> +            {
> +                DWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, NonCacheable, ReadWrite,
> +                    0x00000000,         // Granularity
> +                    0xB0000000,         // Range Minimum
> +                    0xBFFFFFFF,         // Range Maximum
> +                    0x00000000,         // Translation Offset
> +                    0x10000000,         // Length
> +                    ,, , AddressRangeMemory, TypeStatic)
> +            })
> +        }
>      }
> 
>      Scope (_SB)
>      {
>          Device (HPET)
>          {
>              Name (_HID, EisaId ("PNP0103") /* HPET System Timer */)  // _HID: Hardware ID
>              Name (_UID, Zero)  // _UID: Unique ID
>              OperationRegion (HPTM, SystemMemory, 0xFED00000, 0x0400)
>              Field (HPTM, DWordAcc, Lock, Preserve)
>              {
>                  VEND,   32,
>                  PRD,    32
>              }
> 
>              Method (_STA, 0, NotSerialized)  // _STA: Status
> 
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>

Reviewed-by: Igor Mammedov <imammedo@redhat.com>

> ---
>  hw/i386/acpi-build.c | 46 +++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 45 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index 49aef4ebd1..96497475d1 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -1072,6 +1072,46 @@ static void build_q35_pci0_int(Aml *table)
>      aml_append(table, sb_scope);
>  }
>  
> +static Aml *build_q35_dram_controller(const AcpiMcfgInfo *mcfg)
> +{
> +    Aml *dev;
> +    Aml *resource_template;
> +
> +    /* DRAM controller */
> +    dev = aml_device("DRAC");
> +    aml_append(dev, aml_name_decl("_HID", aml_string("PNP0C01")));
> +
> +    resource_template = aml_resource_template();
> +    if (mcfg->base + mcfg->size - 1 >= (1ULL << 32)) {
> +        aml_append(resource_template,
> +                   aml_qword_memory(AML_POS_DECODE,
> +                                    AML_MIN_FIXED,
> +                                    AML_MAX_FIXED,
> +                                    AML_NON_CACHEABLE,
> +                                    AML_READ_WRITE,
> +                                    0x0000000000000000,
> +                                    mcfg->base,
> +                                    mcfg->base + mcfg->size - 1,
> +                                    0x0000000000000000,
> +                                    mcfg->size));
> +    } else {
> +        aml_append(resource_template,
> +                   aml_dword_memory(AML_POS_DECODE,
> +                                    AML_MIN_FIXED,
> +                                    AML_MAX_FIXED,
> +                                    AML_NON_CACHEABLE,
> +                                    AML_READ_WRITE,
> +                                    0x0000000000000000,
> +                                    mcfg->base,
> +                                    mcfg->base + mcfg->size - 1,
> +                                    0x0000000000000000,
> +                                    mcfg->size));
> +    }
> +    aml_append(dev, aml_name_decl("_CRS", resource_template));
> +
> +    return dev;
> +}
> +
>  static void build_q35_isa_bridge(Aml *table)
>  {
>      Aml *dev;
> @@ -1218,6 +1258,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
>      PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine);
>      X86MachineState *x86ms = X86_MACHINE(machine);
>      AcpiMcfgInfo mcfg;
> +    bool mcfg_valid = !!acpi_get_mcfg(&mcfg);
>      uint32_t nr_mem = machine->ram_slots;
>      int root_bus_limit = 0xFF;
>      PCIBus *bus = NULL;
> @@ -1256,6 +1297,9 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
>          aml_append(dev, aml_name_decl("_UID", aml_int(0)));
>          aml_append(dev, build_q35_osc_method());
>          aml_append(sb_scope, dev);
> +        if (mcfg_valid) {
> +            aml_append(sb_scope, build_q35_dram_controller(&mcfg));
> +        }
>  
>          if (pm->smi_on_cpuhp) {
>              /* reserve SMI block resources, IO ports 0xB2, 0xB3 */
> @@ -1386,7 +1430,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
>       * the PCI0._CRS.  Add mmconfig to the set so it will be excluded
>       * too.
>       */
> -    if (acpi_get_mcfg(&mcfg)) {
> +    if (mcfg_valid) {
>          crs_range_insert(crs_range_set.mem_ranges,
>                           mcfg.base, mcfg.base + mcfg.size - 1);
>      }