[PATCH net-next v22 2/2] mctp pcc: Implement MCTP over PCC Transport

admiyo@os.amperecomputing.com posted 2 patches 2 months, 4 weeks ago
There is a newer version of this series
[PATCH net-next v22 2/2] mctp pcc: Implement MCTP over PCC Transport
Posted by admiyo@os.amperecomputing.com 2 months, 4 weeks ago
From: Adam Young <admiyo@os.amperecomputing.com>

Implementation of network driver for
Management Control Transport Protocol(MCTP)
over Platform Communication Channel(PCC)

DMTF DSP:0292
https://www.dmtf.org/sites/default/files/standards/documents/\
DSP0292_1.0.0WIP50.pdf

MCTP devices are specified via ACPI by entries
in DSDT/SDST and reference channels specified
in the PCCT.  Messages are sent on a type 3 and
received on a type 4 channel.  Communication with
other devices use the PCC based doorbell mechanism;
a shared memory segment with a corresponding
interrupt and a memory register used to trigger
remote interrupts.

This driver takes advantage of PCC mailbox buffer
management. The data section of the struct sk_buff
that contains the outgoing packet is sent to the mailbox,
already properly formatted  as a PCC message.  The driver
is also reponsible for allocating a struct sk_buff that
is then passed to the mailbox and used to record the
data in the shared buffer. It maintains a list of both
outging and incoming sk_buffs to match the data buffers
with the original sk_buffs.

When the Type 3 channel outbox receives a txdone response
intterupt, it consumes the outgoing sk_buff, allowing
it to be freed.

Signed-off-by: Adam Young <admiyo@os.amperecomputing.com>
---
 MAINTAINERS                 |   5 +
 drivers/net/mctp/Kconfig    |  13 ++
 drivers/net/mctp/Makefile   |   1 +
 drivers/net/mctp/mctp-pcc.c | 346 ++++++++++++++++++++++++++++++++++++
 4 files changed, 365 insertions(+)
 create mode 100644 drivers/net/mctp/mctp-pcc.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 881a1f08e665..455618ee93b6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14467,6 +14467,11 @@ F:	include/net/mctpdevice.h
 F:	include/net/netns/mctp.h
 F:	net/mctp/
 
+MANAGEMENT COMPONENT TRANSPORT PROTOCOL (MCTP) over PCC (MCTP-PCC) Driver
+M:	Adam Young <admiyo@os.amperecomputing.com>
+S:	Maintained
+F:	drivers/net/mctp/mctp-pcc.c
+
 MAPLE TREE
 M:	Liam R. Howlett <Liam.Howlett@oracle.com>
 L:	maple-tree@lists.infradead.org
diff --git a/drivers/net/mctp/Kconfig b/drivers/net/mctp/Kconfig
index cf325ab0b1ef..f69d0237f058 100644
--- a/drivers/net/mctp/Kconfig
+++ b/drivers/net/mctp/Kconfig
@@ -57,6 +57,19 @@ config MCTP_TRANSPORT_USB
 	  MCTP-over-USB interfaces are peer-to-peer, so each interface
 	  represents a physical connection to one remote MCTP endpoint.
 
+config MCTP_TRANSPORT_PCC
+	tristate "MCTP PCC transport"
+	depends on ACPI
+	help
+	  Provides a driver to access MCTP devices over PCC transport,
+	  A MCTP protocol network device is created via ACPI for each
+	  entry in the DST/SDST that matches the identifier. The Platform
+	  communication channels are selected from the corresponding
+	  entries in the PCCT.
+
+	  Say y here if you need to connect to MCTP endpoints over PCC. To
+	  compile as a module, use m; the module will be called mctp-pcc.
+
 endmenu
 
 endif
diff --git a/drivers/net/mctp/Makefile b/drivers/net/mctp/Makefile
index c36006849a1e..2276f148df7c 100644
--- a/drivers/net/mctp/Makefile
+++ b/drivers/net/mctp/Makefile
@@ -1,3 +1,4 @@
+obj-$(CONFIG_MCTP_TRANSPORT_PCC) += mctp-pcc.o
 obj-$(CONFIG_MCTP_SERIAL) += mctp-serial.o
 obj-$(CONFIG_MCTP_TRANSPORT_I2C) += mctp-i2c.o
 obj-$(CONFIG_MCTP_TRANSPORT_I3C) += mctp-i3c.o
diff --git a/drivers/net/mctp/mctp-pcc.c b/drivers/net/mctp/mctp-pcc.c
new file mode 100644
index 000000000000..15ed449071d5
--- /dev/null
+++ b/drivers/net/mctp/mctp-pcc.c
@@ -0,0 +1,346 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * mctp-pcc.c - Driver for MCTP over PCC.
+ * Copyright (c) 2024, Ampere Computing LLC
+ */
+
+/* Implementation of MCTP over PCC DMTF Specification DSP0256
+ * https://www.dmtf.org/sites/default/files/standards/documents/DSP0256_2.0.0WIP50.pdf
+ */
+
+#include <linux/acpi.h>
+#include <linux/if_arp.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/platform_device.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/hrtimer.h>
+
+#include <acpi/acpi_bus.h>
+#include <acpi/acpi_drivers.h>
+#include <acpi/acrestyp.h>
+#include <acpi/actbl.h>
+#include <net/mctp.h>
+#include <net/mctpdevice.h>
+#include <acpi/pcc.h>
+
+#include "../../mailbox/mailbox.h"
+
+#define MCTP_PAYLOAD_LENGTH     256
+#define MCTP_CMD_LENGTH         4
+#define MCTP_PCC_VERSION        0x1 /* DSP0292 a single version: 1 */
+#define MCTP_SIGNATURE          "MCTP"
+#define MCTP_SIGNATURE_LENGTH   (sizeof(MCTP_SIGNATURE) - 1)
+#define MCTP_MIN_MTU            68
+#define PCC_DWORD_TYPE          0x0c
+
+struct mctp_pcc_mailbox {
+	u32 index;
+	struct pcc_mbox_chan *chan;
+	struct mbox_client client;
+	struct sk_buff_head packets;
+};
+
+/* The netdev structure. One of these per PCC adapter. */
+struct mctp_pcc_ndev {
+	/* spinlock to serialize access to PCC outbox buffer and registers
+	 * Note that what PCC calls registers are memory locations, not CPU
+	 * Registers.  They include the fields used to synchronize access
+	 * between the OS and remote endpoints.
+	 *
+	 * Only the Outbox needs a spinlock, to prevent multiple
+	 * sent packets triggering multiple attempts to over write
+	 * the outbox.  The Inbox buffer is controlled by the remote
+	 * service and a spinlock would have no effect.
+	 */
+	spinlock_t lock;
+	struct mctp_dev mdev;
+	struct acpi_device *acpi_device;
+	struct mctp_pcc_mailbox inbox;
+	struct mctp_pcc_mailbox outbox;
+};
+
+static void *mctp_pcc_rx_alloc(struct mbox_client *c, int size)
+{
+	struct mctp_pcc_mailbox *box;
+	struct mctp_pcc_ndev *mctp_pcc_ndev;
+	struct sk_buff *skb;
+	void *skb_buf;
+
+	box = container_of(c, struct mctp_pcc_mailbox, client);
+	mctp_pcc_ndev = container_of(c, struct mctp_pcc_ndev, inbox.client);
+	if (size > mctp_pcc_ndev->mdev.dev->mtu)
+		return NULL;
+	mctp_pcc_ndev = container_of(c, struct mctp_pcc_ndev, inbox.client);
+	skb = netdev_alloc_skb(mctp_pcc_ndev->mdev.dev, size);
+	if (!skb)
+		return NULL;
+	skb_buf = skb_put(skb, size);
+	skb->protocol = htons(ETH_P_MCTP);
+
+	skb_queue_head(&box->packets, skb);
+
+	return skb->data;
+}
+
+static void mctp_pcc_client_rx_callback(struct mbox_client *c, void *buffer)
+{
+	struct mctp_pcc_ndev *mctp_pcc_ndev;
+	struct pcc_header pcc_header;
+	struct mctp_skb_cb *cb;
+	struct sk_buff *skb;
+
+	mctp_pcc_ndev = container_of(c, struct mctp_pcc_ndev, inbox.client);
+	if (!buffer) {
+		dev_dstats_rx_dropped(mctp_pcc_ndev->mdev.dev);
+		return;
+	}
+
+	skb_queue_walk(&mctp_pcc_ndev->inbox.packets, skb) {
+		if (skb->data == buffer) {
+			skb_unlink(skb, &mctp_pcc_ndev->inbox.packets);
+			dev_dstats_rx_add(mctp_pcc_ndev->mdev.dev, skb->len);
+			skb_reset_mac_header(skb);
+			skb_pull(skb, sizeof(pcc_header));
+			skb_reset_network_header(skb);
+			cb = __mctp_cb(skb);
+			cb->halen = 0;
+			netif_rx(skb);
+			return;
+		}
+	}
+	pr_warn("Unmatched packet in mctp-pcc inbox packet list");
+}
+
+static void mctp_pcc_tx_done(struct mbox_client *c, void *mssg, int r)
+{
+	struct mctp_pcc_mailbox *box;
+	struct sk_buff *skb;
+
+	box = container_of(c, struct mctp_pcc_mailbox, client);
+	skb_queue_walk(&box->packets, skb) {
+		if (skb->data == mssg) {
+			skb_unlink(skb, &box->packets);
+			dev_consume_skb_any(skb);
+			break;
+		}
+	}
+}
+
+static netdev_tx_t mctp_pcc_tx(struct sk_buff *skb, struct net_device *ndev)
+{
+	struct mctp_pcc_ndev *mpnd = netdev_priv(ndev);
+	struct pcc_header *pcc_header;
+	int len = skb->len;
+	int rc;
+
+	rc = skb_cow_head(skb, sizeof(*pcc_header));
+	if (rc)
+		goto err_drop;
+
+	pcc_header = skb_push(skb, sizeof(*pcc_header));
+	pcc_header->signature = cpu_to_le32(PCC_SIGNATURE | mpnd->outbox.index);
+	pcc_header->flags = cpu_to_le32(PCC_CMD_COMPLETION_NOTIFY);
+	memcpy(&pcc_header->command, MCTP_SIGNATURE, MCTP_SIGNATURE_LENGTH);
+	pcc_header->length = cpu_to_le32(len + MCTP_SIGNATURE_LENGTH);
+
+	skb_queue_head(&mpnd->outbox.packets, skb);
+
+	rc = mbox_send_message(mpnd->outbox.chan->mchan, skb->data);
+
+	if (rc < 0) {
+		pr_info("%s fail, rc = %d", __func__, rc);
+		return NETDEV_TX_BUSY;
+	}
+	dev_dstats_tx_add(ndev, len);
+	return NETDEV_TX_OK;
+err_drop:
+	dev_dstats_tx_dropped(ndev);
+	kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
+static const struct net_device_ops mctp_pcc_netdev_ops = {
+	.ndo_start_xmit = mctp_pcc_tx,
+};
+
+static const struct mctp_netdev_ops mctp_netdev_ops = {
+	NULL
+};
+
+static void mctp_pcc_setup(struct net_device *ndev)
+{
+	ndev->type = ARPHRD_MCTP;
+	ndev->hard_header_len = 0;
+	ndev->tx_queue_len = 0;
+	ndev->flags = IFF_NOARP;
+	ndev->netdev_ops = &mctp_pcc_netdev_ops;
+	ndev->needs_free_netdev = true;
+	ndev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS;
+}
+
+struct mctp_pcc_lookup_context {
+	int index;
+	u32 inbox_index;
+	u32 outbox_index;
+};
+
+static acpi_status lookup_pcct_indices(struct acpi_resource *ares,
+				       void *context)
+{
+	struct mctp_pcc_lookup_context *luc = context;
+	struct acpi_resource_address32 *addr;
+
+	if (ares->type != PCC_DWORD_TYPE)
+		return AE_OK;
+
+	addr = ACPI_CAST_PTR(struct acpi_resource_address32, &ares->data);
+	switch (luc->index) {
+	case 0:
+		luc->outbox_index = addr[0].address.minimum;
+		break;
+	case 1:
+		luc->inbox_index = addr[0].address.minimum;
+		break;
+	}
+	luc->index++;
+	return AE_OK;
+}
+
+static void drain_packets(struct sk_buff_head *list)
+{
+	struct sk_buff *skb;
+
+	while (!skb_queue_empty(list)) {
+		skb = skb_dequeue(list);
+		dev_consume_skb_any(skb);
+	}
+}
+
+static void mctp_cleanup_netdev(void *data)
+{
+	struct mctp_pcc_ndev *mctp_pcc_ndev;
+	struct net_device *ndev = data;
+
+	mctp_pcc_ndev = netdev_priv(ndev);
+	drain_packets(&mctp_pcc_ndev->outbox.packets);
+	drain_packets(&mctp_pcc_ndev->inbox.packets);
+
+	mctp_unregister_netdev(ndev);
+}
+
+static void mctp_cleanup_channel(void *data)
+{
+	struct pcc_mbox_chan *chan = data;
+
+	pcc_mbox_free_channel(chan);
+}
+
+static int mctp_pcc_initialize_mailbox(struct device *dev,
+				       struct mctp_pcc_mailbox *box, u32 index)
+{
+	box->index = index;
+	skb_queue_head_init(&box->packets);
+	box->chan = pcc_mbox_request_channel(&box->client, index);
+	box->chan->rx_alloc = mctp_pcc_rx_alloc;
+
+	box->client.dev = dev;
+	if (IS_ERR(box->chan))
+		return PTR_ERR(box->chan);
+	return devm_add_action_or_reset(dev, mctp_cleanup_channel, box->chan);
+}
+
+static int mctp_pcc_driver_add(struct acpi_device *acpi_dev)
+{
+	struct mctp_pcc_lookup_context context = {0};
+	struct mctp_pcc_ndev *mctp_pcc_ndev;
+	struct device *dev = &acpi_dev->dev;
+	struct net_device *ndev;
+	acpi_handle dev_handle;
+	acpi_status status;
+	int mctp_pcc_mtu;
+	char name[32];
+	int rc;
+
+	dev_dbg(dev, "Adding mctp_pcc device for HID %s\n",
+		acpi_device_hid(acpi_dev));
+	dev_handle = acpi_device_handle(acpi_dev);
+	status = acpi_walk_resources(dev_handle, "_CRS", lookup_pcct_indices,
+				     &context);
+	if (!ACPI_SUCCESS(status)) {
+		dev_err(dev, "FAILURE to lookup PCC indexes from CRS\n");
+		return -EINVAL;
+	}
+
+	/* inbox initialization */
+	snprintf(name, sizeof(name), "mctpipcc%d", context.inbox_index);
+	ndev = alloc_netdev(sizeof(*mctp_pcc_ndev), name, NET_NAME_PREDICTABLE,
+			    mctp_pcc_setup);
+	if (!ndev)
+		return -ENOMEM;
+
+	mctp_pcc_ndev = netdev_priv(ndev);
+	spin_lock_init(&mctp_pcc_ndev->lock);
+	rc = mctp_pcc_initialize_mailbox(dev, &mctp_pcc_ndev->inbox,
+					 context.inbox_index);
+	if (rc)
+		goto free_netdev;
+	mctp_pcc_ndev->inbox.client.rx_callback = mctp_pcc_client_rx_callback;
+
+	/* outbox initialization */
+	rc = mctp_pcc_initialize_mailbox(dev, &mctp_pcc_ndev->outbox,
+					 context.outbox_index);
+	if (rc)
+		goto free_netdev;
+
+	mctp_pcc_ndev->outbox.client.tx_done = mctp_pcc_tx_done;
+	mctp_pcc_ndev->acpi_device = acpi_dev;
+	mctp_pcc_ndev->mdev.dev = ndev;
+	acpi_dev->driver_data = mctp_pcc_ndev;
+
+	/* There is no clean way to pass the MTU to the callback function
+	 * used for registration, so set the values ahead of time.
+	 */
+	mctp_pcc_mtu = mctp_pcc_ndev->outbox.chan->shmem_size -
+		sizeof(struct pcc_header);
+	ndev->mtu = MCTP_MIN_MTU;
+	ndev->max_mtu = mctp_pcc_mtu;
+	ndev->min_mtu = MCTP_MIN_MTU;
+
+	/* ndev needs to be freed before the iomemory (mapped above) gets
+	 * unmapped,  devm resources get freed in reverse to the order they
+	 * are added.
+	 */
+	rc = mctp_register_netdev(ndev, &mctp_netdev_ops, MCTP_PHYS_BINDING_PCC);
+	if (rc)
+		goto free_netdev;
+	return devm_add_action_or_reset(dev, mctp_cleanup_netdev, ndev);
+free_netdev:
+	free_netdev(ndev);
+	return rc;
+}
+
+static const struct acpi_device_id mctp_pcc_device_ids[] = {
+	{ "DMT0001" },
+	{}
+};
+
+static struct acpi_driver mctp_pcc_driver = {
+	.name = "mctp_pcc",
+	.class = "Unknown",
+	.ids = mctp_pcc_device_ids,
+	.ops = {
+		.add = mctp_pcc_driver_add,
+	},
+};
+
+module_acpi_driver(mctp_pcc_driver);
+
+MODULE_DEVICE_TABLE(acpi, mctp_pcc_device_ids);
+
+MODULE_DESCRIPTION("MCTP PCC ACPI device");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Adam Young <admiyo@os.amperecomputing.com>");
-- 
2.43.0
Re: [PATCH net-next v22 2/2] mctp pcc: Implement MCTP over PCC Transport
Posted by Jeremy Kerr 2 months, 3 weeks ago
Hi Adam,

A few comments inline:

> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * mctp-pcc.c - Driver for MCTP over PCC.
> + * Copyright (c) 2024, Ampere Computing LLC

It's 2025 now, I'd suggest a range:

    Copyright (c) 2024-2025, Ampere Computing LLC

> + */
> +
> +/* Implementation of MCTP over PCC DMTF Specification DSP0256
> + * https://www.dmtf.org/sites/default/files/standards/documents/DSP0256_2.0.0WIP50.pdf

DSP0256 has been released now, if that's a more appropriate reference.
But it looks like DSP0292 is more specific to the PCC parts?

> + */
> +
> +#include <linux/acpi.h>
> +#include <linux/if_arp.h>
> +#include <linux/init.h>
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/netdevice.h>
> +#include <linux/platform_device.h>
> +#include <linux/string.h>
> +#include <linux/skbuff.h>
> +#include <linux/hrtimer.h>
> +
> +#include <acpi/acpi_bus.h>
> +#include <acpi/acpi_drivers.h>
> +#include <acpi/acrestyp.h>
> +#include <acpi/actbl.h>
> +#include <net/mctp.h>
> +#include <net/mctpdevice.h>
> +#include <acpi/pcc.h>
> +
> +#include "../../mailbox/mailbox.h"
> +
> +#define MCTP_PAYLOAD_LENGTH     256
> +#define MCTP_CMD_LENGTH         4
> +#define MCTP_PCC_VERSION        0x1 /* DSP0292 a single version: 1 */
> +#define MCTP_SIGNATURE          "MCTP"
> +#define MCTP_SIGNATURE_LENGTH   (sizeof(MCTP_SIGNATURE) - 1)
> +#define MCTP_MIN_MTU            68
> +#define PCC_DWORD_TYPE          0x0c
> +
> +struct mctp_pcc_mailbox {
> +       u32 index;
> +       struct pcc_mbox_chan *chan;
> +       struct mbox_client client;
> +       struct sk_buff_head packets;
> +};
> +
> +/* The netdev structure. One of these per PCC adapter. */
> +struct mctp_pcc_ndev {
> +       /* spinlock to serialize access to PCC outbox buffer and registers
> +        * Note that what PCC calls registers are memory locations, not CPU
> +        * Registers.  They include the fields used to synchronize access
> +        * between the OS and remote endpoints.
> +        *
> +        * Only the Outbox needs a spinlock, to prevent multiple
> +        * sent packets triggering multiple attempts to over write
> +        * the outbox.  The Inbox buffer is controlled by the remote
> +        * service and a spinlock would have no effect.
> +        */
> +       spinlock_t lock;
> +       struct mctp_dev mdev;

You are only ever using the mdev->dev pointer; just use a struct
net_device * here. The MCTP layer handles the creation of the MCTP
parts.

> +       struct acpi_device *acpi_device;
> +       struct mctp_pcc_mailbox inbox;
> +       struct mctp_pcc_mailbox outbox;
> +};
> +
> +static void *mctp_pcc_rx_alloc(struct mbox_client *c, int size)
> +{
> +       struct mctp_pcc_mailbox *box;
> +       struct mctp_pcc_ndev *mctp_pcc_ndev;
> +       struct sk_buff *skb;
> +       void *skb_buf;
> +
> +       box = container_of(c, struct mctp_pcc_mailbox, client);
> +       mctp_pcc_ndev = container_of(c, struct mctp_pcc_ndev, inbox.client);
> +       if (size > mctp_pcc_ndev->mdev.dev->mtu)
> +               return NULL;
> +       mctp_pcc_ndev = container_of(c, struct mctp_pcc_ndev, inbox.client);

You already have set mctp_ncc_ndev a few lines above. It's also a bit
unusual doing the two container_of() operations, when you can find the
ndev and reference the mailbox from there.

The common pattern for this is to set up your context from the input
pointers early too, so something like:

    static void *mctp_pcc_rx_alloc(struct mbox_client *c, int size)
    {
            struct mctp_pcc_ndev *mctp_pcc_ndev =
                    container_of(c, struct mctp_pcc_ndev, inbox.client);
            struct mctp_pcc_mailbox *box = &mctp_pcc_ndev->inbox;
            struct sk_buff *skb;

            /* ... */

(but you may not need a var for 'box' at all)

> +       skb = netdev_alloc_skb(mctp_pcc_ndev->mdev.dev, size);
> +       if (!skb)
> +               return NULL;
> +       skb_buf = skb_put(skb, size);

you don't use skb_buf anywhere?

(building with W=1 should catch this)

> +       skb->protocol = htons(ETH_P_MCTP);
> +
> +       skb_queue_head(&box->packets, skb);
> +
> +       return skb->data;
> +}
> +
> +static void mctp_pcc_client_rx_callback(struct mbox_client *c, void *buffer)
> +{
> +       struct mctp_pcc_ndev *mctp_pcc_ndev;
> +       struct pcc_header pcc_header;
> +       struct mctp_skb_cb *cb;
> +       struct sk_buff *skb;
> +
> +       mctp_pcc_ndev = container_of(c, struct mctp_pcc_ndev, inbox.client);
> +       if (!buffer) {
> +               dev_dstats_rx_dropped(mctp_pcc_ndev->mdev.dev);
> +               return;
> +       }

Mainly out of curiosity: how does this happen? How do we get a
completion where there is no original buffer?

> +
> +       skb_queue_walk(&mctp_pcc_ndev->inbox.packets, skb) {
> +               if (skb->data == buffer) {
> +                       skb_unlink(skb, &mctp_pcc_ndev->inbox.packets);
> +                       dev_dstats_rx_add(mctp_pcc_ndev->mdev.dev, skb->len);
> +                       skb_reset_mac_header(skb);
> +                       skb_pull(skb, sizeof(pcc_header));
> +                       skb_reset_network_header(skb);
> +                       cb = __mctp_cb(skb);
> +                       cb->halen = 0;
> +                       netif_rx(skb);
> +                       return;
> +               }

You can save a bit of indent by flipping the logic here:

        skb_queue_walk(&mctp_pcc_ndev->inbox.packets, skb) {
                if (skb->data != buffer)
                        continue;

                skb_unlink(skb, &mctp_pcc_ndev->inbox.packets);
                dev_dstats_rx_add(mctp_pcc_ndev->mdev.dev, skb->len);
                skb_reset_mac_header(skb);
                skb_pull(skb, sizeof(pcc_header));
                skb_reset_network_header(skb);
                cb = __mctp_cb(skb);
                cb->halen = 0;
                netif_rx(skb);
                return;
        }

I figure we're restricted to what the mailbox API provides, but is there
any way we can access the skb through a pointer, rather than having to
dig through these lists?

I think the issue is that the mbox API is using the void * buffer as
both the data to transfer, and the callback context, so we can't stash
useful context across the completion?


> +       }
> +       pr_warn("Unmatched packet in mctp-pcc inbox packet list");
> +}
> +
> +static void mctp_pcc_tx_done(struct mbox_client *c, void *mssg, int r)
> +{
> +       struct mctp_pcc_mailbox *box;
> +       struct sk_buff *skb;
> +
> +       box = container_of(c, struct mctp_pcc_mailbox, client);
> +       skb_queue_walk(&box->packets, skb) {
> +               if (skb->data == mssg) {
> +                       skb_unlink(skb, &box->packets);
> +                       dev_consume_skb_any(skb);
> +                       break;
> +               }
> +       }
> +}
> +
> +static netdev_tx_t mctp_pcc_tx(struct sk_buff *skb, struct net_device *ndev)
> +{
> +       struct mctp_pcc_ndev *mpnd = netdev_priv(ndev);
> +       struct pcc_header *pcc_header;
> +       int len = skb->len;
> +       int rc;
> +
> +       rc = skb_cow_head(skb, sizeof(*pcc_header));
> +       if (rc)
> +               goto err_drop;
> +
> +       pcc_header = skb_push(skb, sizeof(*pcc_header));
> +       pcc_header->signature = cpu_to_le32(PCC_SIGNATURE | mpnd->outbox.index);
> +       pcc_header->flags = cpu_to_le32(PCC_CMD_COMPLETION_NOTIFY);
> +       memcpy(&pcc_header->command, MCTP_SIGNATURE, MCTP_SIGNATURE_LENGTH);
> +       pcc_header->length = cpu_to_le32(len + MCTP_SIGNATURE_LENGTH);
> +
> +       skb_queue_head(&mpnd->outbox.packets, skb);
> +
> +       rc = mbox_send_message(mpnd->outbox.chan->mchan, skb->data);
> +
> +       if (rc < 0) {
> +               pr_info("%s fail, rc = %d", __func__, rc);
> +               return NETDEV_TX_BUSY;
> +       }

What happens on mbox_send_message failure? The skb will still be present
in the outbox.packets queue - I assume we don't see a completion
callback in that case, and so the skb will be in the outbox.packets
queue forever?

Are you sure you want to return NETDEV_TX_BUSY here?

Is there any situation where the mbox_send_message will continually
fail? Should we ratelimit the pr_info() message there (and regardless,
better to use one of netdev_info / netdev_warn / etc functions, since we
are dealing with netdevs here).

> +       dev_dstats_tx_add(ndev, len);
> +       return NETDEV_TX_OK;
> +err_drop:
> +       dev_dstats_tx_dropped(ndev);
> +       kfree_skb(skb);
> +       return NETDEV_TX_OK;
> +}
> +
> +static const struct net_device_ops mctp_pcc_netdev_ops = {
> +       .ndo_start_xmit = mctp_pcc_tx,
> +};
> +
> +static const struct mctp_netdev_ops mctp_netdev_ops = {
> +       NULL
> +};
> +
> +static void mctp_pcc_setup(struct net_device *ndev)
> +{
> +       ndev->type = ARPHRD_MCTP;
> +       ndev->hard_header_len = 0;
> +       ndev->tx_queue_len = 0;
> +       ndev->flags = IFF_NOARP;
> +       ndev->netdev_ops = &mctp_pcc_netdev_ops;
> +       ndev->needs_free_netdev = true;
> +       ndev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS;
> +}
> +
> +struct mctp_pcc_lookup_context {
> +       int index;
> +       u32 inbox_index;
> +       u32 outbox_index;
> +};
> +
> +static acpi_status lookup_pcct_indices(struct acpi_resource *ares,
> +                                      void *context)
> +{
> +       struct mctp_pcc_lookup_context *luc = context;
> +       struct acpi_resource_address32 *addr;
> +
> +       if (ares->type != PCC_DWORD_TYPE)
> +               return AE_OK;
> +
> +       addr = ACPI_CAST_PTR(struct acpi_resource_address32, &ares->data);
> +       switch (luc->index) {
> +       case 0:
> +               luc->outbox_index = addr[0].address.minimum;
> +               break;
> +       case 1:
> +               luc->inbox_index = addr[0].address.minimum;
> +               break;
> +       }
> +       luc->index++;
> +       return AE_OK;
> +}
> +
> +static void drain_packets(struct sk_buff_head *list)
> +{
> +       struct sk_buff *skb;
> +
> +       while (!skb_queue_empty(list)) {
> +               skb = skb_dequeue(list);
> +               dev_consume_skb_any(skb);
> +       }
> +}
> +
> +static void mctp_cleanup_netdev(void *data)
> +{
> +       struct mctp_pcc_ndev *mctp_pcc_ndev;
> +       struct net_device *ndev = data;
> +
> +       mctp_pcc_ndev = netdev_priv(ndev);
> +       drain_packets(&mctp_pcc_ndev->outbox.packets);
> +       drain_packets(&mctp_pcc_ndev->inbox.packets);
> +
> +       mctp_unregister_netdev(ndev);
> +}
> +
> +static void mctp_cleanup_channel(void *data)
> +{
> +       struct pcc_mbox_chan *chan = data;
> +
> +       pcc_mbox_free_channel(chan);
> +}
> +
> +static int mctp_pcc_initialize_mailbox(struct device *dev,
> +                                      struct mctp_pcc_mailbox *box, u32 index)
> +{
> +       box->index = index;
> +       skb_queue_head_init(&box->packets);
> +       box->chan = pcc_mbox_request_channel(&box->client, index);
> +       box->chan->rx_alloc = mctp_pcc_rx_alloc;
> +
> +       box->client.dev = dev;
> +       if (IS_ERR(box->chan))
> +               return PTR_ERR(box->chan);
> +       return devm_add_action_or_reset(dev, mctp_cleanup_channel, box->chan);
> +}
> +
> +static int mctp_pcc_driver_add(struct acpi_device *acpi_dev)
> +{
> +       struct mctp_pcc_lookup_context context = {0};
> +       struct mctp_pcc_ndev *mctp_pcc_ndev;
> +       struct device *dev = &acpi_dev->dev;
> +       struct net_device *ndev;
> +       acpi_handle dev_handle;
> +       acpi_status status;
> +       int mctp_pcc_mtu;
> +       char name[32];
> +       int rc;
> +
> +       dev_dbg(dev, "Adding mctp_pcc device for HID %s\n",
> +               acpi_device_hid(acpi_dev));
> +       dev_handle = acpi_device_handle(acpi_dev);
> +       status = acpi_walk_resources(dev_handle, "_CRS", lookup_pcct_indices,
> +                                    &context);
> +       if (!ACPI_SUCCESS(status)) {
> +               dev_err(dev, "FAILURE to lookup PCC indexes from CRS\n");
> +               return -EINVAL;
> +       }
> +
> +       /* inbox initialization */

the inbox initialization seems to be a bit further down.

> +       snprintf(name, sizeof(name), "mctpipcc%d", context.inbox_index);
> +       ndev = alloc_netdev(sizeof(*mctp_pcc_ndev), name, NET_NAME_PREDICTABLE,
> +                           mctp_pcc_setup);
> +       if (!ndev)
> +               return -ENOMEM;
> +
> +       mctp_pcc_ndev = netdev_priv(ndev);
> +       spin_lock_init(&mctp_pcc_ndev->lock);
> +       rc = mctp_pcc_initialize_mailbox(dev, &mctp_pcc_ndev->inbox,
> +                                        context.inbox_index);
> +       if (rc)
> +               goto free_netdev;
> +       mctp_pcc_ndev->inbox.client.rx_callback = mctp_pcc_client_rx_callback;
> +
> +       /* outbox initialization */
> +       rc = mctp_pcc_initialize_mailbox(dev, &mctp_pcc_ndev->outbox,
> +                                        context.outbox_index);
> +       if (rc)
> +               goto free_netdev;
> +
> +       mctp_pcc_ndev->outbox.client.tx_done = mctp_pcc_tx_done;
> +       mctp_pcc_ndev->acpi_device = acpi_dev;
> +       mctp_pcc_ndev->mdev.dev = ndev;
> +       acpi_dev->driver_data = mctp_pcc_ndev;
> +
> +       /* There is no clean way to pass the MTU to the callback function
> +        * used for registration, so set the values ahead of time.
> +        */
> +       mctp_pcc_mtu = mctp_pcc_ndev->outbox.chan->shmem_size -
> +               sizeof(struct pcc_header);
> +       ndev->mtu = MCTP_MIN_MTU;
> +       ndev->max_mtu = mctp_pcc_mtu;
> +       ndev->min_mtu = MCTP_MIN_MTU;
> +
> +       /* ndev needs to be freed before the iomemory (mapped above) gets
> +        * unmapped,  devm resources get freed in reverse to the order they
> +        * are added.
> +        */
> +       rc = mctp_register_netdev(ndev, &mctp_netdev_ops, MCTP_PHYS_BINDING_PCC);
> +       if (rc)
> +               goto free_netdev;
> +       return devm_add_action_or_reset(dev, mctp_cleanup_netdev, ndev);
> +free_netdev:
> +       free_netdev(ndev);
> +       return rc;
> +}

Just a couple of nitpicky style things here (and above): try to keep the
return-value checks immediately after the appropriate call:

            rc = mbox_send_message(mpnd->outbox.chan->mchan, skb->data);
            if (rc < 0) {
                    pr_info("%s fail, rc = %d", __func__, rc);
                    return NETDEV_TX_BUSY;
            }

but give yourself some space after those checks, and around the
returns/goto labels:

            rc = mctp_register_netdev(ndev, &mctp_netdev_ops, MCTP_PHYS_BINDING_PCC);
            if (rc)
                    goto free_netdev;

            return devm_add_action_or_reset(dev, mctp_cleanup_netdev, ndev);

    free_netdev:
            free_netdev(ndev);
            return rc;
    }

Cheers,


Jeremy
Re: [PATCH net-next v22 2/2] mctp pcc: Implement MCTP over PCC Transport
Posted by Adam Young 2 months, 3 weeks ago
All Changes are accepted.  I have attempted to answer your questions here.


>> +static void mctp_pcc_client_rx_callback(struct mbox_client *c, void *buffer)
>> +{
>> +       struct mctp_pcc_ndev *mctp_pcc_ndev;
>> +       struct pcc_header pcc_header;
>> +       struct mctp_skb_cb *cb;
>> +       struct sk_buff *skb;
>> +
>> +       mctp_pcc_ndev = container_of(c, struct mctp_pcc_ndev, inbox.client);
>> +       if (!buffer) {
>> +               dev_dstats_rx_dropped(mctp_pcc_ndev->mdev.dev);
>> +               return;
>> +       }
> Mainly out of curiosity: how does this happen? How do we get a
> completion where there is no original buffer?

If the sk_buff allocation fails, the logic falls back to the old code, 
which passes on a null buffer. There is logic there with notifying the 
sender that I don't want to skip or modify.

See the other patch, in the change to the irq handler.

> I figure we're restricted to what the mailbox API provides, but is there
> any way we can access the skb through a pointer, rather than having to
> dig through these lists?
>
> I think the issue is that the mbox API is using the void * buffer as
> both the data to transfer, and the callback context, so we can't stash
> useful context across the completion?

Correct, the SK_buff is a structure  that points to a buffer, and what 
gets to the send_data function is the buffer itself. That buffer has no 
pointer back to the sk_buff.


>> +
>> +       rc = mbox_send_message(mpnd->outbox.chan->mchan, skb->data);
>> +
>> +       if (rc < 0) {
>> +               pr_info("%s fail, rc = %d", __func__, rc);
>> +               return NETDEV_TX_BUSY;
>> +       }
> What happens on mbox_send_message failure? The skb will still be present
> in the outbox.packets queue - I assume we don't see a completion
> callback in that case, and so the skb will be in the outbox.packets
> queue forever?

>
> Are you sure you want to return NETDEV_TX_BUSY here?
>
> Is there any situation where the mbox_send_message will continually
> fail? Should we ratelimit the pr_info() message there (and regardless,
> better to use one of netdev_info / netdev_warn / etc functions, since we
> are dealing with netdevs here).

The fail will happen if the ring buffer is full, so, yes, it makes sense 
not to queue the packet. I can move that to after the mbox_send_message.

The NETDEV_TX_BUSY is correct, as it means resend the packet, and we 
don't have any reference to it.

The only other failure path on the mbox_send_message code is due to 
timeouts, which we do not use from this driver. That is a recent change, 
and that was the case I was handling before.

Re: [PATCH net-next v22 2/2] mctp pcc: Implement MCTP over PCC Transport
Posted by Jeremy Kerr 2 months, 3 weeks ago
Hi Adam,
> If the sk_buff allocation fails, the logic falls back to the old code, 
> which passes on a null buffer. There is logic there with notifying the 
> sender that I don't want to skip or modify.

OK, so this will happen if we didn't allocate a buffer in the first
place - we'd still get a completion occurring. Let me know if my
understanding is incorrect.
> 

> > I think the issue is that the mbox API is using the void * buffer
> > as both the data to transfer, and the callback context, so we can't
> > stash useful context across the completion?
> 
> Correct, the SK_buff is a structure  that points to a buffer, and
> what gets to the send_data function is the buffer itself. That buffer
> has no pointer back to the sk_buff.

OK, that's a bit unfortunate. Might be good to see if you can add a
context pointer to the mailbox request, in which you could stuff the
skb pointer. USB does this for the transfer completions: there are
members on the transfer (the struct urb) for both a context and buffer
pointer.

Of course, that is more a mailbox API change, so you may want to
consider that as a separate thing later.

> The NETDEV_TX_BUSY is correct, as it means resend the packet, and we 
> don't have any reference to it.

OK, you may want to have the tx queue stopped at that point then, if
you have some facility to re-start it when the mailbox ring buffer has
space.

Cheers,


Jeremy
Re: [PATCH net-next v22 2/2] mctp pcc: Implement MCTP over PCC Transport
Posted by kernel test robot 2 months, 3 weeks ago
Hi,

kernel test robot noticed the following build warnings:

[auto build test WARNING on net-next/main]

url:    https://github.com/intel-lab-lkp/linux/commits/admiyo-os-amperecomputing-com/mailbox-pcc-support-mailbox-management-of-the-shared-buffer/20250711-031525
base:   net-next/main
patch link:    https://lore.kernel.org/r/20250710191209.737167-3-admiyo%40os.amperecomputing.com
patch subject: [PATCH net-next v22 2/2] mctp pcc: Implement MCTP over PCC Transport
config: x86_64-allyesconfig (https://download.01.org/0day-ci/archive/20250711/202507111843.OV6uA3Jr-lkp@intel.com/config)
compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250711/202507111843.OV6uA3Jr-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202507111843.OV6uA3Jr-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> drivers/net/mctp/mctp-pcc.c:71:8: warning: variable 'skb_buf' set but not used [-Wunused-but-set-variable]
      71 |         void *skb_buf;
         |               ^
   1 warning generated.


vim +/skb_buf +71 drivers/net/mctp/mctp-pcc.c

    65	
    66	static void *mctp_pcc_rx_alloc(struct mbox_client *c, int size)
    67	{
    68		struct mctp_pcc_mailbox *box;
    69		struct mctp_pcc_ndev *mctp_pcc_ndev;
    70		struct sk_buff *skb;
  > 71		void *skb_buf;
    72	
    73		box = container_of(c, struct mctp_pcc_mailbox, client);
    74		mctp_pcc_ndev = container_of(c, struct mctp_pcc_ndev, inbox.client);
    75		if (size > mctp_pcc_ndev->mdev.dev->mtu)
    76			return NULL;
    77		mctp_pcc_ndev = container_of(c, struct mctp_pcc_ndev, inbox.client);
    78		skb = netdev_alloc_skb(mctp_pcc_ndev->mdev.dev, size);
    79		if (!skb)
    80			return NULL;
    81		skb_buf = skb_put(skb, size);
    82		skb->protocol = htons(ETH_P_MCTP);
    83	
    84		skb_queue_head(&box->packets, skb);
    85	
    86		return skb->data;
    87	}
    88	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki