[PATCH 6/6] PCI: of: Create device-tree root bus node

Herve Codina posted 6 patches 2 weeks, 5 days ago
[PATCH 6/6] PCI: of: Create device-tree root bus node
Posted by Herve Codina 2 weeks, 5 days ago
PCI devices device-tree nodes can be already created. This was
introduced by commit 407d1a51921e ("PCI: Create device tree node for
bridge").

In order to have device-tree nodes related to PCI devices attached on
their PCI root bus, a root bus device-tree node is needed. This root bus
node will be used as the parent node of the first level devices scanned
on the bus.

On non device-tree based system (such as ACPI), a device-tree node for
the PCI root bus does not exist. Indeed, this component is not described
in a device-tree used at boot.

The device-tree PCI root bus node creation needs to be done at runtime.
This is done in the same way as for the creation of the PCI device
nodes. I.e. node and properties are created based on computed
information done by the PCI core.

Signed-off-by: Herve Codina <herve.codina@bootlin.com>
---
 drivers/pci/of.c          |  85 +++++++++++++++++++++++++++++++-
 drivers/pci/of_property.c | 101 ++++++++++++++++++++++++++++++++++++++
 drivers/pci/pci.h         |   6 +++
 drivers/pci/probe.c       |   2 +
 drivers/pci/remove.c      |   2 +
 5 files changed, 195 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/of.c b/drivers/pci/of.c
index 141ffbb1b3e6..46733a293c3f 100644
--- a/drivers/pci/of.c
+++ b/drivers/pci/of.c
@@ -726,7 +726,90 @@ void of_pci_make_dev_node(struct pci_dev *pdev)
 out_free_name:
 	kfree(name);
 }
-#endif
+
+void of_pci_remove_root_bus_node(struct pci_bus *bus)
+{
+	struct device_node *np;
+
+	np = pci_bus_to_OF_node(bus);
+	if (!np || !of_node_check_flag(np, OF_DYNAMIC))
+		return;
+
+	device_remove_of_node(&bus->dev);
+	of_changeset_revert(np->data);
+	of_changeset_destroy(np->data);
+	of_node_put(np);
+}
+
+void of_pci_make_root_bus_node(struct pci_bus *bus)
+{
+	struct device_node *np = NULL;
+	struct of_changeset *cset;
+	const char *name;
+	int ret;
+
+	/*
+	 * If there is already a device tree node linked to this device,
+	 * return immediately.
+	 */
+	if (pci_bus_to_OF_node(bus))
+		return;
+
+	/* Check if there is a DT root node to attach this created node */
+	if (!of_root) {
+		pr_err("of_root node is NULL, cannot create PCI root bus node");
+		return;
+	}
+
+	name = kasprintf(GFP_KERNEL, "pci-root@%x,%x", pci_domain_nr(bus),
+			 bus->number);
+	if (!name)
+		return;
+
+	cset = kmalloc(sizeof(*cset), GFP_KERNEL);
+	if (!cset)
+		goto out_free_name;
+	of_changeset_init(cset);
+
+	np = of_changeset_create_node(cset, of_root, name);
+	if (!np)
+		goto out_destroy_cset;
+
+	ret = of_pci_add_root_bus_properties(bus, cset, np);
+	if (ret)
+		goto out_free_node;
+
+	/*
+	 * This of_node will be added to an existing device. The of_node parent
+	 * is the root OF node and so this node will be handled by the platform
+	 * bus. Avoid any new device creation.
+	 */
+	of_node_set_flag(np, OF_POPULATED);
+	np->fwnode.dev = &bus->dev;
+	fwnode_dev_initialized(&np->fwnode, true);
+
+	ret = of_changeset_apply(cset);
+	if (ret)
+		goto out_free_node;
+
+	np->data = cset;
+
+	/* Add the of_node to the existing device */
+	device_add_of_node(&bus->dev, np);
+	kfree(name);
+
+	return;
+
+out_free_node:
+	of_node_put(np);
+out_destroy_cset:
+	of_changeset_destroy(cset);
+	kfree(cset);
+out_free_name:
+	kfree(name);
+}
+
+#endif /* CONFIG_PCI_DYNAMIC_OF_NODES */
 
 #endif /* CONFIG_PCI */
 
diff --git a/drivers/pci/of_property.c b/drivers/pci/of_property.c
index e56159cc48e8..527fc51565f3 100644
--- a/drivers/pci/of_property.c
+++ b/drivers/pci/of_property.c
@@ -394,3 +394,104 @@ int of_pci_add_properties(struct pci_dev *pdev, struct of_changeset *ocs,
 
 	return 0;
 }
+
+static bool of_pci_is_range_resource(const struct resource *res, u32 *flags)
+{
+	if (!(resource_type(res) & IORESOURCE_MEM) &&
+	    !(resource_type(res) & IORESOURCE_MEM_64))
+		return false;
+
+	if (of_pci_get_addr_flags(res, flags))
+		return false;
+
+	return true;
+}
+
+static int of_pci_root_bus_prop_ranges(struct pci_bus *bus,
+				       struct of_changeset *ocs,
+				       struct device_node *np)
+{
+	struct pci_host_bridge *bridge = to_pci_host_bridge(bus->bridge);
+	struct resource_entry *window;
+	unsigned int ranges_sz = 0;
+	unsigned int n_range = 0;
+	struct resource *res;
+	int n_addr_cells;
+	u32 *ranges;
+	u64 val64;
+	u32 flags;
+	int ret;
+
+	n_addr_cells = of_n_addr_cells(np);
+	if (n_addr_cells <= 0 || n_addr_cells > 2)
+		return -EINVAL;
+
+	resource_list_for_each_entry(window, &bridge->windows) {
+		res = window->res;
+		if (!of_pci_is_range_resource(res, &flags))
+			continue;
+		n_range++;
+	}
+
+	if (!n_range)
+		return 0;
+
+	ranges = kcalloc(n_range,
+			 (OF_PCI_ADDRESS_CELLS + OF_PCI_SIZE_CELLS +
+			  n_addr_cells) * sizeof(*ranges),
+			 GFP_KERNEL);
+	if (!ranges)
+		return -ENOMEM;
+
+	resource_list_for_each_entry(window, &bridge->windows) {
+		res = window->res;
+		if (!of_pci_is_range_resource(res, &flags))
+			continue;
+
+		/* PCI bus address */
+		val64 = res->start;
+		of_pci_set_address(NULL, &ranges[ranges_sz], val64, 0, flags, false);
+		ranges_sz += OF_PCI_ADDRESS_CELLS;
+
+		/* Host bus address */
+		if (n_addr_cells == 2)
+			ranges[ranges_sz++] = upper_32_bits(val64);
+		ranges[ranges_sz++] = lower_32_bits(val64);
+
+		/* Size */
+		val64 = resource_size(res);
+		ranges[ranges_sz] = upper_32_bits(val64);
+		ranges[ranges_sz + 1] = lower_32_bits(val64);
+		ranges_sz += OF_PCI_SIZE_CELLS;
+	}
+
+	ret = of_changeset_add_prop_u32_array(ocs, np, "ranges", ranges, ranges_sz);
+	kfree(ranges);
+	return ret;
+}
+
+int of_pci_add_root_bus_properties(struct pci_bus *bus, struct of_changeset *ocs,
+				   struct device_node *np)
+{
+	int ret;
+
+	ret = of_changeset_add_prop_string(ocs, np, "device_type", "pci");
+	if (ret)
+		return ret;
+
+	ret = of_changeset_add_prop_u32(ocs, np, "#address-cells",
+					OF_PCI_ADDRESS_CELLS);
+	if (ret)
+		return ret;
+
+	ret = of_changeset_add_prop_u32(ocs, np, "#size-cells",
+					OF_PCI_SIZE_CELLS);
+	if (ret)
+		return ret;
+
+	ret = of_pci_root_bus_prop_ranges(bus, ocs, np);
+	if (ret)
+		return ret;
+
+	return 0;
+}
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 14d00ce45bfa..56e450807d5d 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -802,9 +802,15 @@ void of_pci_make_dev_node(struct pci_dev *pdev);
 void of_pci_remove_node(struct pci_dev *pdev);
 int of_pci_add_properties(struct pci_dev *pdev, struct of_changeset *ocs,
 			  struct device_node *np);
+void of_pci_make_root_bus_node(struct pci_bus *bus);
+void of_pci_remove_root_bus_node(struct pci_bus *bus);
+int of_pci_add_root_bus_properties(struct pci_bus *bus, struct of_changeset *ocs,
+				   struct device_node *np);
 #else
 static inline void of_pci_make_dev_node(struct pci_dev *pdev) { }
 static inline void of_pci_remove_node(struct pci_dev *pdev) { }
+static inline void of_pci_make_root_bus_node(struct pci_bus *bus) { }
+static inline void of_pci_remove_root_bus_node(struct pci_bus *bus) { }
 #endif
 
 #ifdef CONFIG_PCIEAER
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 4f68414c3086..063780bec45e 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1049,6 +1049,8 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge)
 		dev_info(&bus->dev, "root bus resource %pR%s\n", res, addr);
 	}
 
+	of_pci_make_root_bus_node(bus);
+
 	down_write(&pci_bus_sem);
 	list_add_tail(&bus->node, &pci_root_buses);
 	up_write(&pci_bus_sem);
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index e4ce1145aa3e..80cbe02f66b2 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -160,6 +160,8 @@ void pci_stop_root_bus(struct pci_bus *bus)
 					 &bus->devices, bus_list)
 		pci_stop_bus_device(child);
 
+	of_pci_remove_root_bus_node(bus);
+
 	/* stop the host bridge */
 	device_release_driver(&host_bridge->dev);
 }
-- 
2.46.2
Re: [PATCH 6/6] PCI: of: Create device-tree root bus node
Posted by Bjorn Helgaas 2 weeks, 4 days ago
On Mon, Nov 04, 2024 at 06:20:00PM +0100, Herve Codina wrote:
> PCI devices device-tree nodes can be already created. This was
> introduced by commit 407d1a51921e ("PCI: Create device tree node for
> bridge").

I guess 407d1a51921e creates device tree nodes for bridges, including
Root Ports, which are enumerated as PCI-to-PCI bridges, right?

> In order to have device-tree nodes related to PCI devices attached on
> their PCI root bus, a root bus device-tree node is needed. This root bus
> node will be used as the parent node of the first level devices scanned
> on the bus.
>
> On non device-tree based system (such as ACPI), a device-tree node for
> the PCI root bus does not exist.  ...

I'm wondering if "root bus" is the right description for this patch
and whether "PCI host bridge" might be more accurate.  The bus itself
doesn't really have a physical counterpart other than being the
secondary side of a PCI host bridge where the primary side is some
kind of CPU bus.

An ACPI namespace doesn't include a "root bus" object, but it *does*
include a PCI host bridge (PNP0A03) object, which is where any address
translation between the CPU bus and the PCI hierarchy is described.

I suspect this patch is adding a DT node that corresponds to the
PNP0A03 host bridge object, and the "ranges" property of the new node
will describe the mapping from the CPU address space to the PCI
address space.

> Indeed, this component is not described
> in a device-tree used at boot.

But maybe I'm on the wrong track, because obviously PCI host
controllers *are* described in DTs used at boot.

> The device-tree PCI root bus node creation needs to be done at runtime.
> This is done in the same way as for the creation of the PCI device
> nodes. I.e. node and properties are created based on computed
> information done by the PCI core.

See address translation question below.

> +void of_pci_make_root_bus_node(struct pci_bus *bus)
> +{
> +	struct device_node *np = NULL;
> +	struct of_changeset *cset;
> +	const char *name;
> +	int ret;
> +
> +	/*
> +	 * If there is already a device tree node linked to this device,
> +	 * return immediately.
> +	 */
> +	if (pci_bus_to_OF_node(bus))
> +		return;
> +
> +	/* Check if there is a DT root node to attach this created node */
> +	if (!of_root) {
> +		pr_err("of_root node is NULL, cannot create PCI root bus node");
> +		return;
> +	}
> +
> +	name = kasprintf(GFP_KERNEL, "pci-root@%x,%x", pci_domain_nr(bus),
> +			 bus->number);

Should this be "pci%d@%x,%x" to match the typical descriptions of PCI
host bridges in DT?

> +static int of_pci_root_bus_prop_ranges(struct pci_bus *bus,
> +				       struct of_changeset *ocs,
> +				       struct device_node *np)
> +{
> +	struct pci_host_bridge *bridge = to_pci_host_bridge(bus->bridge);
> +	struct resource_entry *window;
> +	unsigned int ranges_sz = 0;
> +	unsigned int n_range = 0;
> +	struct resource *res;
> +	int n_addr_cells;
> +	u32 *ranges;
> +	u64 val64;
> +	u32 flags;
> +	int ret;
> +
> +	n_addr_cells = of_n_addr_cells(np);
> +	if (n_addr_cells <= 0 || n_addr_cells > 2)
> +		return -EINVAL;
> +
> +	resource_list_for_each_entry(window, &bridge->windows) {
> +		res = window->res;
> +		if (!of_pci_is_range_resource(res, &flags))
> +			continue;
> +		n_range++;
> +	}
> +
> +	if (!n_range)
> +		return 0;
> +
> +	ranges = kcalloc(n_range,
> +			 (OF_PCI_ADDRESS_CELLS + OF_PCI_SIZE_CELLS +
> +			  n_addr_cells) * sizeof(*ranges),
> +			 GFP_KERNEL);
> +	if (!ranges)
> +		return -ENOMEM;
> +
> +	resource_list_for_each_entry(window, &bridge->windows) {
> +		res = window->res;
> +		if (!of_pci_is_range_resource(res, &flags))
> +			continue;
> +
> +		/* PCI bus address */
> +		val64 = res->start;
> +		of_pci_set_address(NULL, &ranges[ranges_sz], val64, 0, flags, false);
> +		ranges_sz += OF_PCI_ADDRESS_CELLS;
> +
> +		/* Host bus address */
> +		if (n_addr_cells == 2)
> +			ranges[ranges_sz++] = upper_32_bits(val64);
> +		ranges[ranges_sz++] = lower_32_bits(val64);

IIUC this sets both the parent address (the host bus (CPU) physical
address) and the child address (PCI bus address) to the same value.

I think that's wrong because these addresses need not be identical.

I think the parent address should be the res->start value, and the
child address should be "res->start - window->offset", similar to
what's done by pcibios_resource_to_bus().

> +		/* Size */
> +		val64 = resource_size(res);
> +		ranges[ranges_sz] = upper_32_bits(val64);
> +		ranges[ranges_sz + 1] = lower_32_bits(val64);
> +		ranges_sz += OF_PCI_SIZE_CELLS;
> +	}
> +
> +	ret = of_changeset_add_prop_u32_array(ocs, np, "ranges", ranges, ranges_sz);
> +	kfree(ranges);
> +	return ret;
> +}
Re: [PATCH 6/6] PCI: of: Create device-tree root bus node
Posted by Herve Codina 2 weeks, 4 days ago
On Tue, 5 Nov 2024 12:59:01 -0600
Bjorn Helgaas <helgaas@kernel.org> wrote:

> On Mon, Nov 04, 2024 at 06:20:00PM +0100, Herve Codina wrote:
> > PCI devices device-tree nodes can be already created. This was
> > introduced by commit 407d1a51921e ("PCI: Create device tree node for
> > bridge").  
> 
> I guess 407d1a51921e creates device tree nodes for bridges, including
> Root Ports, which are enumerated as PCI-to-PCI bridges, right?

It creates DT nodes for bridges and devices.

> 
> > In order to have device-tree nodes related to PCI devices attached on
> > their PCI root bus, a root bus device-tree node is needed. This root bus
> > node will be used as the parent node of the first level devices scanned
> > on the bus.
> >
> > On non device-tree based system (such as ACPI), a device-tree node for
> > the PCI root bus does not exist.  ...  
> 
> I'm wondering if "root bus" is the right description for this patch
> and whether "PCI host bridge" might be more accurate.  The bus itself
> doesn't really have a physical counterpart other than being the
> secondary side of a PCI host bridge where the primary side is some
> kind of CPU bus.

Indeed, you're right. I will rename "root bus" to "host bridge" everywhere
in the patch (description but also the code itself).

> 
> An ACPI namespace doesn't include a "root bus" object, but it *does*
> include a PCI host bridge (PNP0A03) object, which is where any address
> translation between the CPU bus and the PCI hierarchy is described.
> 
> I suspect this patch is adding a DT node that corresponds to the
> PNP0A03 host bridge object, and the "ranges" property of the new node
> will describe the mapping from the CPU address space to the PCI
> address space.

Exactly.

> 
> > Indeed, this component is not described
> > in a device-tree used at boot.  
> 
> But maybe I'm on the wrong track, because obviously PCI host
> controllers *are* described in DTs used at boot.

They are described in a device-tree used at boot on device-tree based
systems.
On x86, we are on ACPI based system -> No DT used at boot -> PCI host
controller not described in DT.

I could replace with "Indeed, this component is not described in a
device-tree used at boot because, in that case, device-tree is not
used to describe the hardware"

> 
> > The device-tree PCI root bus node creation needs to be done at runtime.
> > This is done in the same way as for the creation of the PCI device
> > nodes. I.e. node and properties are created based on computed
> > information done by the PCI core.  
> 
> See address translation question below.
> 
> > +void of_pci_make_root_bus_node(struct pci_bus *bus)
> > +{
> > +	struct device_node *np = NULL;
> > +	struct of_changeset *cset;
> > +	const char *name;
> > +	int ret;
> > +
> > +	/*
> > +	 * If there is already a device tree node linked to this device,
> > +	 * return immediately.
> > +	 */
> > +	if (pci_bus_to_OF_node(bus))
> > +		return;
> > +
> > +	/* Check if there is a DT root node to attach this created node */
> > +	if (!of_root) {
> > +		pr_err("of_root node is NULL, cannot create PCI root bus node");
> > +		return;
> > +	}
> > +
> > +	name = kasprintf(GFP_KERNEL, "pci-root@%x,%x", pci_domain_nr(bus),
> > +			 bus->number);  
> 
> Should this be "pci%d@%x,%x" to match the typical descriptions of PCI
> host bridges in DT?

What do you think I should use for the %d you proposed.
Also I supposed your "@%x,%x" is still pci_domain_nr(bus), bus->number.

> 
> > +static int of_pci_root_bus_prop_ranges(struct pci_bus *bus,
> > +				       struct of_changeset *ocs,
> > +				       struct device_node *np)
> > +{
> > +	struct pci_host_bridge *bridge = to_pci_host_bridge(bus->bridge);
> > +	struct resource_entry *window;
> > +	unsigned int ranges_sz = 0;
> > +	unsigned int n_range = 0;
> > +	struct resource *res;
> > +	int n_addr_cells;
> > +	u32 *ranges;
> > +	u64 val64;
> > +	u32 flags;
> > +	int ret;
> > +
> > +	n_addr_cells = of_n_addr_cells(np);
> > +	if (n_addr_cells <= 0 || n_addr_cells > 2)
> > +		return -EINVAL;
> > +
> > +	resource_list_for_each_entry(window, &bridge->windows) {
> > +		res = window->res;
> > +		if (!of_pci_is_range_resource(res, &flags))
> > +			continue;
> > +		n_range++;
> > +	}
> > +
> > +	if (!n_range)
> > +		return 0;
> > +
> > +	ranges = kcalloc(n_range,
> > +			 (OF_PCI_ADDRESS_CELLS + OF_PCI_SIZE_CELLS +
> > +			  n_addr_cells) * sizeof(*ranges),
> > +			 GFP_KERNEL);
> > +	if (!ranges)
> > +		return -ENOMEM;
> > +
> > +	resource_list_for_each_entry(window, &bridge->windows) {
> > +		res = window->res;
> > +		if (!of_pci_is_range_resource(res, &flags))
> > +			continue;
> > +
> > +		/* PCI bus address */
> > +		val64 = res->start;
> > +		of_pci_set_address(NULL, &ranges[ranges_sz], val64, 0, flags, false);
> > +		ranges_sz += OF_PCI_ADDRESS_CELLS;
> > +
> > +		/* Host bus address */
> > +		if (n_addr_cells == 2)
> > +			ranges[ranges_sz++] = upper_32_bits(val64);
> > +		ranges[ranges_sz++] = lower_32_bits(val64);  
> 
> IIUC this sets both the parent address (the host bus (CPU) physical
> address) and the child address (PCI bus address) to the same value.
> 
> I think that's wrong because these addresses need not be identical.
> 
> I think the parent address should be the res->start value, and the
> child address should be "res->start - window->offset", similar to
> what's done by pcibios_resource_to_bus().

I see.
I will update in the next iteration.

Thanks for your feedback.
Best regards,
Hervé
Re: [PATCH 6/6] PCI: of: Create device-tree root bus node
Posted by Bjorn Helgaas 2 weeks, 3 days ago
On Wed, Nov 06, 2024 at 03:53:53PM +0100, Herve Codina wrote:
> On Tue, 5 Nov 2024 12:59:01 -0600
> Bjorn Helgaas <helgaas@kernel.org> wrote:
> > On Mon, Nov 04, 2024 at 06:20:00PM +0100, Herve Codina wrote:
> > > PCI devices device-tree nodes can be already created. This was
> > > introduced by commit 407d1a51921e ("PCI: Create device tree node for
> > > bridge").  
> ...

> > > Indeed, this component is not described
> > > in a device-tree used at boot.  
> > 
> > But maybe I'm on the wrong track, because obviously PCI host
> > controllers *are* described in DTs used at boot.
> 
> They are described in a device-tree used at boot on device-tree based
> systems.
> On x86, we are on ACPI based system -> No DT used at boot -> PCI host
> controller not described in DT.

Right, I was thinking of the devicetree-based systems, where the host
controller must be described in DT.

> > > +	name = kasprintf(GFP_KERNEL, "pci-root@%x,%x", pci_domain_nr(bus),
> > > +			 bus->number);  
> > 
> > Should this be "pci%d@%x,%x" to match the typical descriptions of PCI
> > host bridges in DT?
> 
> What do you think I should use for the %d you proposed.

Based on the .dts files, I think the %d is just an index to
distinguish multiple PCI host bridges.  Maybe that's not relevant
here, I dunno.

> Also I supposed your "@%x,%x" is still pci_domain_nr(bus), bus->number.

Yes.  I think we're basically constructing a DT node to correspond to
an ACPI PNP0A03 device.  ACPI does support updating the root bus
number via _CRS/_PRS/_SRS, but Linux doesn't have support for that, so
the root bus number is basically constant.  The pci_domain_nr(bus)
should be coming from _SEG, and that's definitely constant.

Bjorn