The "root" device search was introduced to support SMN access for Zen
systems. This device represents a PCIe root complex. It is not the
same as the "CPU/node" devices found at slots 0x18-0x1F.
There may be multiple PCIe root complexes within an AMD node. Such is
the case with server or HEDT systems, etc. Therefore it is not enough to
assume "root <-> AMD node" is a 1-to-1 association.
Currently, this is handled by skipping "extra" root complexes during the
search. However, the hardware provides the PCI bus number of an AMD
node's root device.
Use the hardware info to get the root device's bus and drop the extra
search code and PCI IDs.
Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---
arch/x86/include/asm/amd_node.h | 1 +
arch/x86/kernel/amd_nb.c | 80 ++-------------------------------
arch/x86/kernel/amd_node.c | 57 +++++++++++++++++++++++
3 files changed, 62 insertions(+), 76 deletions(-)
diff --git a/arch/x86/include/asm/amd_node.h b/arch/x86/include/asm/amd_node.h
index 622bd3038eeb..3f097dd479f8 100644
--- a/arch/x86/include/asm/amd_node.h
+++ b/arch/x86/include/asm/amd_node.h
@@ -23,5 +23,6 @@
#define AMD_NODE0_PCI_SLOT 0x18
struct pci_dev *amd_node_get_func(u16 node, u8 func);
+struct pci_dev *amd_node_get_root(u16 node);
#endif /*_ASM_X86_AMD_NODE_H_*/
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 34c06b25782d..135ecc0a0166 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -15,44 +15,11 @@
#include <linux/pci_ids.h>
#include <asm/amd_nb.h>
-#define PCI_DEVICE_ID_AMD_17H_ROOT 0x1450
-#define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0
-#define PCI_DEVICE_ID_AMD_17H_M30H_ROOT 0x1480
-#define PCI_DEVICE_ID_AMD_17H_M60H_ROOT 0x1630
-#define PCI_DEVICE_ID_AMD_17H_MA0H_ROOT 0x14b5
-#define PCI_DEVICE_ID_AMD_19H_M10H_ROOT 0x14a4
-#define PCI_DEVICE_ID_AMD_19H_M40H_ROOT 0x14b5
-#define PCI_DEVICE_ID_AMD_19H_M60H_ROOT 0x14d8
-#define PCI_DEVICE_ID_AMD_19H_M70H_ROOT 0x14e8
-#define PCI_DEVICE_ID_AMD_1AH_M00H_ROOT 0x153a
-#define PCI_DEVICE_ID_AMD_1AH_M20H_ROOT 0x1507
-#define PCI_DEVICE_ID_AMD_1AH_M60H_ROOT 0x1122
-#define PCI_DEVICE_ID_AMD_MI200_ROOT 0x14bb
-#define PCI_DEVICE_ID_AMD_MI300_ROOT 0x14f8
-
/* Protect the PCI config register pairs used for SMN. */
static DEFINE_MUTEX(smn_mutex);
static u32 *flush_words;
-static const struct pci_device_id amd_root_ids[] = {
- { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_ROOT) },
- { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_ROOT) },
- { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_ROOT) },
- { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_ROOT) },
- { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_MA0H_ROOT) },
- { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M10H_ROOT) },
- { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_ROOT) },
- { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M60H_ROOT) },
- { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M70H_ROOT) },
- { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M00H_ROOT) },
- { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M20H_ROOT) },
- { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M60H_ROOT) },
- { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_ROOT) },
- { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI300_ROOT) },
- {}
-};
-
static const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
@@ -85,11 +52,6 @@ static const struct pci_device_id amd_nb_misc_ids[] = {
{}
};
-static const struct pci_device_id hygon_root_ids[] = {
- { PCI_DEVICE(PCI_VENDOR_ID_HYGON, PCI_DEVICE_ID_AMD_17H_ROOT) },
- {}
-};
-
static const struct pci_device_id hygon_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_HYGON, PCI_DEVICE_ID_AMD_17H_DF_F3) },
{}
@@ -222,19 +184,15 @@ EXPORT_SYMBOL_GPL(amd_smn_write);
static int amd_cache_northbridges(void)
{
const struct pci_device_id *misc_ids = amd_nb_misc_ids;
- const struct pci_device_id *root_ids = amd_root_ids;
- struct pci_dev *root, *misc;
+ struct pci_dev *misc;
struct amd_northbridge *nb;
- u16 roots_per_misc = 0;
u16 misc_count = 0;
- u16 root_count = 0;
- u16 i, j;
+ u16 i;
if (amd_northbridges.num)
return 0;
if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
- root_ids = hygon_root_ids;
misc_ids = hygon_nb_misc_ids;
}
@@ -245,23 +203,6 @@ static int amd_cache_northbridges(void)
if (!misc_count)
return -ENODEV;
- root = NULL;
- while ((root = next_northbridge(root, root_ids)))
- root_count++;
-
- if (root_count) {
- roots_per_misc = root_count / misc_count;
-
- /*
- * There should be _exactly_ N roots for each DF/SMN
- * interface.
- */
- if (!roots_per_misc || (root_count % roots_per_misc)) {
- pr_info("Unsupported AMD DF/PCI configuration found\n");
- return -ENODEV;
- }
- }
-
nb = kcalloc(misc_count, sizeof(struct amd_northbridge), GFP_KERNEL);
if (!nb)
return -ENOMEM;
@@ -269,25 +210,12 @@ static int amd_cache_northbridges(void)
amd_northbridges.nb = nb;
amd_northbridges.num = misc_count;
- misc = root = NULL;
+ misc = NULL;
for (i = 0; i < amd_northbridges.num; i++) {
- node_to_amd_nb(i)->root = root =
- next_northbridge(root, root_ids);
+ node_to_amd_nb(i)->root = amd_node_get_root(i);
node_to_amd_nb(i)->misc = misc =
next_northbridge(misc, misc_ids);
node_to_amd_nb(i)->link = amd_node_get_func(i, 4);
-
- /*
- * If there are more PCI root devices than data fabric/
- * system management network interfaces, then the (N)
- * PCI roots per DF/SMN interface are functionally the
- * same (for DF/SMN access) and N-1 are redundant. N-1
- * PCI roots should be skipped per DF/SMN interface so
- * the following DF/SMN interfaces get mapped to
- * correct PCI roots.
- */
- for (j = 1; j < roots_per_misc; j++)
- root = next_northbridge(root, root_ids);
}
if (amd_gart_present())
diff --git a/arch/x86/kernel/amd_node.c b/arch/x86/kernel/amd_node.c
index e825cd4426b9..3aaf7c81f0fa 100644
--- a/arch/x86/kernel/amd_node.c
+++ b/arch/x86/kernel/amd_node.c
@@ -32,3 +32,60 @@ struct pci_dev *amd_node_get_func(u16 node, u8 func)
return pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(AMD_NODE0_PCI_SLOT + node, func));
}
+
+#define DF_BLK_INST_CNT 0x040
+#define DF_CFG_ADDR_CNTL_LEGACY 0x084
+#define DF_CFG_ADDR_CNTL_DF4 0xC04
+
+#define DF_MAJOR_REVISION GENMASK(27, 24)
+
+static u16 get_cfg_addr_cntl_offset(struct pci_dev *df_f0)
+{
+ u32 reg;
+
+ /*
+ * Revision fields added for DF4 and later.
+ *
+ * Major revision of '0' is found pre-DF4. Field is Read-as-Zero.
+ */
+ if (pci_read_config_dword(df_f0, DF_BLK_INST_CNT, ®))
+ return 0;
+
+ if (reg & DF_MAJOR_REVISION)
+ return DF_CFG_ADDR_CNTL_DF4;
+
+ return DF_CFG_ADDR_CNTL_LEGACY;
+}
+
+struct pci_dev *amd_node_get_root(u16 node)
+{
+ struct pci_dev *df_f0 __free(pci_dev_put) = NULL;
+ struct pci_dev *root;
+ u16 cntl_off;
+ u8 bus;
+
+ if (!boot_cpu_has(X86_FEATURE_ZEN))
+ return NULL;
+
+ /*
+ * D18F0xXXX [Config Address Control] (DF::CfgAddressCntl)
+ * Bits [7:0] (SecBusNum) holds the bus number of the root device for
+ * this Data Fabric instance. The segment, device, and function will be 0.
+ */
+ df_f0 = amd_node_get_func(node, 0);
+ if (!df_f0)
+ return NULL;
+
+ cntl_off = get_cfg_addr_cntl_offset(df_f0);
+ if (!cntl_off)
+ return NULL;
+
+ if (pci_read_config_byte(df_f0, cntl_off, &bus))
+ return NULL;
+
+ /* Grab the pointer for the actual root device instance. */
+ root = pci_get_domain_bus_and_slot(0, bus, 0);
+
+ pci_dbg(root, "is root for AMD node %u\n", node);
+ return root;
+}
--
2.43.0
On Wed, Oct 23, 2024 at 05:21:40PM +0000, Yazen Ghannam wrote: > The "root" device search was introduced to support SMN access for Zen > systems. This device represents a PCIe root complex. It is not the > same as the "CPU/node" devices found at slots 0x18-0x1F. > > There may be multiple PCIe root complexes within an AMD node. Such is > the case with server or HEDT systems, etc. Therefore it is not enough to HEDT? ... > +struct pci_dev *amd_node_get_root(u16 node) > +{ > + struct pci_dev *df_f0 __free(pci_dev_put) = NULL; > + struct pci_dev *root; > + u16 cntl_off; > + u8 bus; > + > + if (!boot_cpu_has(X86_FEATURE_ZEN)) check_for_deprecated_apis: WARNING: arch/x86/kernel/amd_node.c:67: Do not use boot_cpu_has() - use cpu_feature_enabled() instead > + return NULL; -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette
On Thu, Oct 31, 2024 at 12:20:27PM +0100, Borislav Petkov wrote: > On Wed, Oct 23, 2024 at 05:21:40PM +0000, Yazen Ghannam wrote: > > The "root" device search was introduced to support SMN access for Zen > > systems. This device represents a PCIe root complex. It is not the > > same as the "CPU/node" devices found at slots 0x18-0x1F. > > > > There may be multiple PCIe root complexes within an AMD node. Such is > > the case with server or HEDT systems, etc. Therefore it is not enough to > > HEDT? > Sorry, forgot to spell it: High-end Desktop. > ... > > > +struct pci_dev *amd_node_get_root(u16 node) > > +{ > > + struct pci_dev *df_f0 __free(pci_dev_put) = NULL; > > + struct pci_dev *root; > > + u16 cntl_off; > > + u8 bus; > > + > > + if (!boot_cpu_has(X86_FEATURE_ZEN)) > > check_for_deprecated_apis: WARNING: arch/x86/kernel/amd_node.c:67: Do not use boot_cpu_has() - use cpu_feature_enabled() instead > Ack. Thanks, Yazen
> From: Yazen Ghannam <yazen.ghannam@amd.com> > [...] > +struct pci_dev *amd_node_get_root(u16 node) { > + struct pci_dev *df_f0 __free(pci_dev_put) = NULL; NULL pointer initialization is not necessary. > + struct pci_dev *root; > + u16 cntl_off; > + u8 bus; > + > + if (!boot_cpu_has(X86_FEATURE_ZEN)) > + return NULL; > + > + /* > + * D18F0xXXX [Config Address Control] (DF::CfgAddressCntl) > + * Bits [7:0] (SecBusNum) holds the bus number of the root device for > + * this Data Fabric instance. The segment, device, and function will be > 0. > + */ > + df_f0 = amd_node_get_func(node, 0); > + if (!df_f0) > + return NULL; > [...]
On Thu, 31 Oct 2024, Zhuo, Qiuxu wrote: > > From: Yazen Ghannam <yazen.ghannam@amd.com> > > [...] > > +struct pci_dev *amd_node_get_root(u16 node) { > > + struct pci_dev *df_f0 __free(pci_dev_put) = NULL; > > NULL pointer initialization is not necessary. It is, because __free() is used... > > + struct pci_dev *root; > > + u16 cntl_off; > > + u8 bus; > > + > > + if (!boot_cpu_has(X86_FEATURE_ZEN)) > > + return NULL; ...This would try to free() whatever garbage df_f0 holds... > > + /* > > + * D18F0xXXX [Config Address Control] (DF::CfgAddressCntl) > > + * Bits [7:0] (SecBusNum) holds the bus number of the root device for > > + * this Data Fabric instance. The segment, device, and function will be > > 0. > > + */ > > + df_f0 = amd_node_get_func(node, 0); ...However, the recommended practice when using __free() is this (as documented in include/linux/cleanup.h): * Given that the "__free(...) = NULL" pattern for variables defined at * the top of the function poses this potential interdependency problem * the recommendation is to always define and assign variables in one * statement and not group variable definitions at the top of the * function when __free() is used. I know the outcome will look undesirable to some, me included, but there's little that can be done to that because there's no other way for the compiler to infer the order. That being said, strictly speaking it isn't causing issue in this function as is but it's still a bad pattern to initialize to = NULL because in other instances it will cause problems. So better to steer away from the pattern entirely rather than depend on reviewers noticing the a cleaup ordering problem gets introduced by some later change to the function. > > + if (!df_f0) > > + return NULL; -- i.
> From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com> > [...] > > > + if (!boot_cpu_has(X86_FEATURE_ZEN)) > > > + return NULL; > > ...This would try to free() whatever garbage df_f0 holds... Oops ... I missed this point. Thanks for correcting me. -Qiuxu
On Thu, Oct 31, 2024 at 12:08:20PM +0200, Ilpo Järvinen wrote: > On Thu, 31 Oct 2024, Zhuo, Qiuxu wrote: > > > > From: Yazen Ghannam <yazen.ghannam@amd.com> > > > [...] > > > +struct pci_dev *amd_node_get_root(u16 node) { > > > + struct pci_dev *df_f0 __free(pci_dev_put) = NULL; > > > > NULL pointer initialization is not necessary. > > It is, because __free() is used... > > > > + struct pci_dev *root; > > > + u16 cntl_off; > > > + u8 bus; > > > + > > > + if (!boot_cpu_has(X86_FEATURE_ZEN)) > > > + return NULL; > > ...This would try to free() whatever garbage df_f0 holds... > > > > + /* > > > + * D18F0xXXX [Config Address Control] (DF::CfgAddressCntl) > > > + * Bits [7:0] (SecBusNum) holds the bus number of the root device for > > > + * this Data Fabric instance. The segment, device, and function will be > > > 0. > > > + */ > > > + df_f0 = amd_node_get_func(node, 0); > > ...However, the recommended practice when using __free() is this (as > documented in include/linux/cleanup.h): > > * Given that the "__free(...) = NULL" pattern for variables defined at > * the top of the function poses this potential interdependency problem > * the recommendation is to always define and assign variables in one > * statement and not group variable definitions at the top of the > * function when __free() is used. > > I know the outcome will look undesirable to some, me included, but > there's little that can be done to that because there's no other way for > the compiler to infer the order. > > That being said, strictly speaking it isn't causing issue in this function > as is but it's still a bad pattern to initialize to = NULL because in > other instances it will cause problems. So better to steer away from the > pattern entirely rather than depend on reviewers noticing the a cleaup > ordering problem gets introduced by some later change to the function. > I originally read that in the context of using a guard(). But really we should do like this in any case, correct? struct pci_dev *df_f0 __free(pci_dev_put) = amd_node_get_func(node, 0); Thanks, Yazen
On Thu, 31 Oct 2024, Yazen Ghannam wrote: > On Thu, Oct 31, 2024 at 12:08:20PM +0200, Ilpo Järvinen wrote: > > On Thu, 31 Oct 2024, Zhuo, Qiuxu wrote: > > > > > > From: Yazen Ghannam <yazen.ghannam@amd.com> > > > > [...] > > > > +struct pci_dev *amd_node_get_root(u16 node) { > > > > + struct pci_dev *df_f0 __free(pci_dev_put) = NULL; > > > > > > NULL pointer initialization is not necessary. > > > > It is, because __free() is used... > > > > > > + struct pci_dev *root; > > > > + u16 cntl_off; > > > > + u8 bus; > > > > + > > > > + if (!boot_cpu_has(X86_FEATURE_ZEN)) > > > > + return NULL; > > > > ...This would try to free() whatever garbage df_f0 holds... > > > > > > + /* > > > > + * D18F0xXXX [Config Address Control] (DF::CfgAddressCntl) > > > > + * Bits [7:0] (SecBusNum) holds the bus number of the root device for > > > > + * this Data Fabric instance. The segment, device, and function will be > > > > 0. > > > > + */ > > > > + df_f0 = amd_node_get_func(node, 0); > > > > ...However, the recommended practice when using __free() is this (as > > documented in include/linux/cleanup.h): > > > > * Given that the "__free(...) = NULL" pattern for variables defined at > > * the top of the function poses this potential interdependency problem > > * the recommendation is to always define and assign variables in one > > * statement and not group variable definitions at the top of the > > * function when __free() is used. > > > > I know the outcome will look undesirable to some, me included, but > > there's little that can be done to that because there's no other way for > > the compiler to infer the order. > > > > That being said, strictly speaking it isn't causing issue in this function > > as is but it's still a bad pattern to initialize to = NULL because in > > other instances it will cause problems. So better to steer away from the > > pattern entirely rather than depend on reviewers noticing the a cleaup > > ordering problem gets introduced by some later change to the function. > > > > I originally read that in the context of using a guard(). But really we > should do like this in any case, correct? > > struct pci_dev *df_f0 __free(pci_dev_put) = amd_node_get_func(node, 0); Yes, that is the recommendation. It says "always" so not only the cases where guard() or other __free()s are used. Of course this only applies to use of __free(), other variables should still be declared in the usual place and not spread around. -- i.
On Thu, Oct 31, 2024 at 05:42:34PM +0200, Ilpo Järvinen wrote: > On Thu, 31 Oct 2024, Yazen Ghannam wrote: > > > On Thu, Oct 31, 2024 at 12:08:20PM +0200, Ilpo Järvinen wrote: > > > On Thu, 31 Oct 2024, Zhuo, Qiuxu wrote: > > > > > > > > From: Yazen Ghannam <yazen.ghannam@amd.com> > > > > > [...] > > > > > +struct pci_dev *amd_node_get_root(u16 node) { > > > > > + struct pci_dev *df_f0 __free(pci_dev_put) = NULL; > > > > > > > > NULL pointer initialization is not necessary. > > > > > > It is, because __free() is used... > > > > > > > > + struct pci_dev *root; > > > > > + u16 cntl_off; > > > > > + u8 bus; > > > > > + > > > > > + if (!boot_cpu_has(X86_FEATURE_ZEN)) > > > > > + return NULL; > > > > > > ...This would try to free() whatever garbage df_f0 holds... > > > > > > > > + /* > > > > > + * D18F0xXXX [Config Address Control] (DF::CfgAddressCntl) > > > > > + * Bits [7:0] (SecBusNum) holds the bus number of the root device for > > > > > + * this Data Fabric instance. The segment, device, and function will be > > > > > 0. > > > > > + */ > > > > > + df_f0 = amd_node_get_func(node, 0); > > > > > > ...However, the recommended practice when using __free() is this (as > > > documented in include/linux/cleanup.h): > > > > > > * Given that the "__free(...) = NULL" pattern for variables defined at > > > * the top of the function poses this potential interdependency problem > > > * the recommendation is to always define and assign variables in one > > > * statement and not group variable definitions at the top of the > > > * function when __free() is used. > > > > > > I know the outcome will look undesirable to some, me included, but > > > there's little that can be done to that because there's no other way for > > > the compiler to infer the order. > > > > > > That being said, strictly speaking it isn't causing issue in this function > > > as is but it's still a bad pattern to initialize to = NULL because in > > > other instances it will cause problems. So better to steer away from the > > > pattern entirely rather than depend on reviewers noticing the a cleaup > > > ordering problem gets introduced by some later change to the function. > > > > > > > I originally read that in the context of using a guard(). But really we > > should do like this in any case, correct? > > > > struct pci_dev *df_f0 __free(pci_dev_put) = amd_node_get_func(node, 0); > > Yes, that is the recommendation. It says "always" so not only the cases > where guard() or other __free()s are used. > > Of course this only applies to use of __free(), other variables should > still be declared in the usual place and not spread around. > Ah right. Will make the change. Thanks, Yazen
© 2016 - 2024 Red Hat, Inc.