From: Adam Young <admiyo@os.amperecomputing.com>
Implementation of network driver for
Management Control Transport Protocol(MCTP)
over Platform Communication Channel(PCC)
DMTF DSP:0292
https://www.dmtf.org/sites/default/files/standards/documents/\
DSP0292_1.0.0WIP50.pdf
MCTP devices are specified via ACPI by entries
in DSDT/SDST and reference channels specified
in the PCCT. Messages are sent on a type 3 and
received on a type 4 channel. Communication with
other devices use the PCC based doorbell mechanism;
a shared memory segment with a corresponding
interrupt and a memory register used to trigger
remote interrupts.
This driver takes advantage of PCC mailbox buffer
management. The data section of the struct sk_buff
that contains the outgoing packet is sent to the mailbox,
already properly formatted as a PCC message. The driver
is also reponsible for allocating a struct sk_buff that
is then passed to the mailbox and used to record the
data in the shared buffer. It maintains a list of both
outging and incoming sk_buffs to match the data buffers
with the original sk_buffs.
When the Type 3 channel outbox receives a txdone response
intterupt, it consumes the outgoing sk_buff, allowing
it to be freed.
Signed-off-by: Adam Young <admiyo@os.amperecomputing.com>
---
MAINTAINERS | 5 +
drivers/net/mctp/Kconfig | 13 ++
drivers/net/mctp/Makefile | 1 +
drivers/net/mctp/mctp-pcc.c | 346 ++++++++++++++++++++++++++++++++++++
4 files changed, 365 insertions(+)
create mode 100644 drivers/net/mctp/mctp-pcc.c
diff --git a/MAINTAINERS b/MAINTAINERS
index 881a1f08e665..455618ee93b6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14467,6 +14467,11 @@ F: include/net/mctpdevice.h
F: include/net/netns/mctp.h
F: net/mctp/
+MANAGEMENT COMPONENT TRANSPORT PROTOCOL (MCTP) over PCC (MCTP-PCC) Driver
+M: Adam Young <admiyo@os.amperecomputing.com>
+S: Maintained
+F: drivers/net/mctp/mctp-pcc.c
+
MAPLE TREE
M: Liam R. Howlett <Liam.Howlett@oracle.com>
L: maple-tree@lists.infradead.org
diff --git a/drivers/net/mctp/Kconfig b/drivers/net/mctp/Kconfig
index cf325ab0b1ef..f69d0237f058 100644
--- a/drivers/net/mctp/Kconfig
+++ b/drivers/net/mctp/Kconfig
@@ -57,6 +57,19 @@ config MCTP_TRANSPORT_USB
MCTP-over-USB interfaces are peer-to-peer, so each interface
represents a physical connection to one remote MCTP endpoint.
+config MCTP_TRANSPORT_PCC
+ tristate "MCTP PCC transport"
+ depends on ACPI
+ help
+ Provides a driver to access MCTP devices over PCC transport,
+ A MCTP protocol network device is created via ACPI for each
+ entry in the DST/SDST that matches the identifier. The Platform
+ communication channels are selected from the corresponding
+ entries in the PCCT.
+
+ Say y here if you need to connect to MCTP endpoints over PCC. To
+ compile as a module, use m; the module will be called mctp-pcc.
+
endmenu
endif
diff --git a/drivers/net/mctp/Makefile b/drivers/net/mctp/Makefile
index c36006849a1e..2276f148df7c 100644
--- a/drivers/net/mctp/Makefile
+++ b/drivers/net/mctp/Makefile
@@ -1,3 +1,4 @@
+obj-$(CONFIG_MCTP_TRANSPORT_PCC) += mctp-pcc.o
obj-$(CONFIG_MCTP_SERIAL) += mctp-serial.o
obj-$(CONFIG_MCTP_TRANSPORT_I2C) += mctp-i2c.o
obj-$(CONFIG_MCTP_TRANSPORT_I3C) += mctp-i3c.o
diff --git a/drivers/net/mctp/mctp-pcc.c b/drivers/net/mctp/mctp-pcc.c
new file mode 100644
index 000000000000..15ed449071d5
--- /dev/null
+++ b/drivers/net/mctp/mctp-pcc.c
@@ -0,0 +1,346 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * mctp-pcc.c - Driver for MCTP over PCC.
+ * Copyright (c) 2024, Ampere Computing LLC
+ */
+
+/* Implementation of MCTP over PCC DMTF Specification DSP0256
+ * https://www.dmtf.org/sites/default/files/standards/documents/DSP0256_2.0.0WIP50.pdf
+ */
+
+#include <linux/acpi.h>
+#include <linux/if_arp.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/platform_device.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/hrtimer.h>
+
+#include <acpi/acpi_bus.h>
+#include <acpi/acpi_drivers.h>
+#include <acpi/acrestyp.h>
+#include <acpi/actbl.h>
+#include <net/mctp.h>
+#include <net/mctpdevice.h>
+#include <acpi/pcc.h>
+
+#include "../../mailbox/mailbox.h"
+
+#define MCTP_PAYLOAD_LENGTH 256
+#define MCTP_CMD_LENGTH 4
+#define MCTP_PCC_VERSION 0x1 /* DSP0292 a single version: 1 */
+#define MCTP_SIGNATURE "MCTP"
+#define MCTP_SIGNATURE_LENGTH (sizeof(MCTP_SIGNATURE) - 1)
+#define MCTP_MIN_MTU 68
+#define PCC_DWORD_TYPE 0x0c
+
+struct mctp_pcc_mailbox {
+ u32 index;
+ struct pcc_mbox_chan *chan;
+ struct mbox_client client;
+ struct sk_buff_head packets;
+};
+
+/* The netdev structure. One of these per PCC adapter. */
+struct mctp_pcc_ndev {
+ /* spinlock to serialize access to PCC outbox buffer and registers
+ * Note that what PCC calls registers are memory locations, not CPU
+ * Registers. They include the fields used to synchronize access
+ * between the OS and remote endpoints.
+ *
+ * Only the Outbox needs a spinlock, to prevent multiple
+ * sent packets triggering multiple attempts to over write
+ * the outbox. The Inbox buffer is controlled by the remote
+ * service and a spinlock would have no effect.
+ */
+ spinlock_t lock;
+ struct mctp_dev mdev;
+ struct acpi_device *acpi_device;
+ struct mctp_pcc_mailbox inbox;
+ struct mctp_pcc_mailbox outbox;
+};
+
+static void *mctp_pcc_rx_alloc(struct mbox_client *c, int size)
+{
+ struct mctp_pcc_mailbox *box;
+ struct mctp_pcc_ndev *mctp_pcc_ndev;
+ struct sk_buff *skb;
+ void *skb_buf;
+
+ box = container_of(c, struct mctp_pcc_mailbox, client);
+ mctp_pcc_ndev = container_of(c, struct mctp_pcc_ndev, inbox.client);
+ if (size > mctp_pcc_ndev->mdev.dev->mtu)
+ return NULL;
+ mctp_pcc_ndev = container_of(c, struct mctp_pcc_ndev, inbox.client);
+ skb = netdev_alloc_skb(mctp_pcc_ndev->mdev.dev, size);
+ if (!skb)
+ return NULL;
+ skb_buf = skb_put(skb, size);
+ skb->protocol = htons(ETH_P_MCTP);
+
+ skb_queue_head(&box->packets, skb);
+
+ return skb->data;
+}
+
+static void mctp_pcc_client_rx_callback(struct mbox_client *c, void *buffer)
+{
+ struct mctp_pcc_ndev *mctp_pcc_ndev;
+ struct pcc_header pcc_header;
+ struct mctp_skb_cb *cb;
+ struct sk_buff *skb;
+
+ mctp_pcc_ndev = container_of(c, struct mctp_pcc_ndev, inbox.client);
+ if (!buffer) {
+ dev_dstats_rx_dropped(mctp_pcc_ndev->mdev.dev);
+ return;
+ }
+
+ skb_queue_walk(&mctp_pcc_ndev->inbox.packets, skb) {
+ if (skb->data == buffer) {
+ skb_unlink(skb, &mctp_pcc_ndev->inbox.packets);
+ dev_dstats_rx_add(mctp_pcc_ndev->mdev.dev, skb->len);
+ skb_reset_mac_header(skb);
+ skb_pull(skb, sizeof(pcc_header));
+ skb_reset_network_header(skb);
+ cb = __mctp_cb(skb);
+ cb->halen = 0;
+ netif_rx(skb);
+ return;
+ }
+ }
+ pr_warn("Unmatched packet in mctp-pcc inbox packet list");
+}
+
+static void mctp_pcc_tx_done(struct mbox_client *c, void *mssg, int r)
+{
+ struct mctp_pcc_mailbox *box;
+ struct sk_buff *skb;
+
+ box = container_of(c, struct mctp_pcc_mailbox, client);
+ skb_queue_walk(&box->packets, skb) {
+ if (skb->data == mssg) {
+ skb_unlink(skb, &box->packets);
+ dev_consume_skb_any(skb);
+ break;
+ }
+ }
+}
+
+static netdev_tx_t mctp_pcc_tx(struct sk_buff *skb, struct net_device *ndev)
+{
+ struct mctp_pcc_ndev *mpnd = netdev_priv(ndev);
+ struct pcc_header *pcc_header;
+ int len = skb->len;
+ int rc;
+
+ rc = skb_cow_head(skb, sizeof(*pcc_header));
+ if (rc)
+ goto err_drop;
+
+ pcc_header = skb_push(skb, sizeof(*pcc_header));
+ pcc_header->signature = cpu_to_le32(PCC_SIGNATURE | mpnd->outbox.index);
+ pcc_header->flags = cpu_to_le32(PCC_CMD_COMPLETION_NOTIFY);
+ memcpy(&pcc_header->command, MCTP_SIGNATURE, MCTP_SIGNATURE_LENGTH);
+ pcc_header->length = cpu_to_le32(len + MCTP_SIGNATURE_LENGTH);
+
+ skb_queue_head(&mpnd->outbox.packets, skb);
+
+ rc = mbox_send_message(mpnd->outbox.chan->mchan, skb->data);
+
+ if (rc < 0) {
+ pr_info("%s fail, rc = %d", __func__, rc);
+ return NETDEV_TX_BUSY;
+ }
+ dev_dstats_tx_add(ndev, len);
+ return NETDEV_TX_OK;
+err_drop:
+ dev_dstats_tx_dropped(ndev);
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
+}
+
+static const struct net_device_ops mctp_pcc_netdev_ops = {
+ .ndo_start_xmit = mctp_pcc_tx,
+};
+
+static const struct mctp_netdev_ops mctp_netdev_ops = {
+ NULL
+};
+
+static void mctp_pcc_setup(struct net_device *ndev)
+{
+ ndev->type = ARPHRD_MCTP;
+ ndev->hard_header_len = 0;
+ ndev->tx_queue_len = 0;
+ ndev->flags = IFF_NOARP;
+ ndev->netdev_ops = &mctp_pcc_netdev_ops;
+ ndev->needs_free_netdev = true;
+ ndev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS;
+}
+
+struct mctp_pcc_lookup_context {
+ int index;
+ u32 inbox_index;
+ u32 outbox_index;
+};
+
+static acpi_status lookup_pcct_indices(struct acpi_resource *ares,
+ void *context)
+{
+ struct mctp_pcc_lookup_context *luc = context;
+ struct acpi_resource_address32 *addr;
+
+ if (ares->type != PCC_DWORD_TYPE)
+ return AE_OK;
+
+ addr = ACPI_CAST_PTR(struct acpi_resource_address32, &ares->data);
+ switch (luc->index) {
+ case 0:
+ luc->outbox_index = addr[0].address.minimum;
+ break;
+ case 1:
+ luc->inbox_index = addr[0].address.minimum;
+ break;
+ }
+ luc->index++;
+ return AE_OK;
+}
+
+static void drain_packets(struct sk_buff_head *list)
+{
+ struct sk_buff *skb;
+
+ while (!skb_queue_empty(list)) {
+ skb = skb_dequeue(list);
+ dev_consume_skb_any(skb);
+ }
+}
+
+static void mctp_cleanup_netdev(void *data)
+{
+ struct mctp_pcc_ndev *mctp_pcc_ndev;
+ struct net_device *ndev = data;
+
+ mctp_pcc_ndev = netdev_priv(ndev);
+ drain_packets(&mctp_pcc_ndev->outbox.packets);
+ drain_packets(&mctp_pcc_ndev->inbox.packets);
+
+ mctp_unregister_netdev(ndev);
+}
+
+static void mctp_cleanup_channel(void *data)
+{
+ struct pcc_mbox_chan *chan = data;
+
+ pcc_mbox_free_channel(chan);
+}
+
+static int mctp_pcc_initialize_mailbox(struct device *dev,
+ struct mctp_pcc_mailbox *box, u32 index)
+{
+ box->index = index;
+ skb_queue_head_init(&box->packets);
+ box->chan = pcc_mbox_request_channel(&box->client, index);
+ box->chan->rx_alloc = mctp_pcc_rx_alloc;
+
+ box->client.dev = dev;
+ if (IS_ERR(box->chan))
+ return PTR_ERR(box->chan);
+ return devm_add_action_or_reset(dev, mctp_cleanup_channel, box->chan);
+}
+
+static int mctp_pcc_driver_add(struct acpi_device *acpi_dev)
+{
+ struct mctp_pcc_lookup_context context = {0};
+ struct mctp_pcc_ndev *mctp_pcc_ndev;
+ struct device *dev = &acpi_dev->dev;
+ struct net_device *ndev;
+ acpi_handle dev_handle;
+ acpi_status status;
+ int mctp_pcc_mtu;
+ char name[32];
+ int rc;
+
+ dev_dbg(dev, "Adding mctp_pcc device for HID %s\n",
+ acpi_device_hid(acpi_dev));
+ dev_handle = acpi_device_handle(acpi_dev);
+ status = acpi_walk_resources(dev_handle, "_CRS", lookup_pcct_indices,
+ &context);
+ if (!ACPI_SUCCESS(status)) {
+ dev_err(dev, "FAILURE to lookup PCC indexes from CRS\n");
+ return -EINVAL;
+ }
+
+ /* inbox initialization */
+ snprintf(name, sizeof(name), "mctpipcc%d", context.inbox_index);
+ ndev = alloc_netdev(sizeof(*mctp_pcc_ndev), name, NET_NAME_PREDICTABLE,
+ mctp_pcc_setup);
+ if (!ndev)
+ return -ENOMEM;
+
+ mctp_pcc_ndev = netdev_priv(ndev);
+ spin_lock_init(&mctp_pcc_ndev->lock);
+ rc = mctp_pcc_initialize_mailbox(dev, &mctp_pcc_ndev->inbox,
+ context.inbox_index);
+ if (rc)
+ goto free_netdev;
+ mctp_pcc_ndev->inbox.client.rx_callback = mctp_pcc_client_rx_callback;
+
+ /* outbox initialization */
+ rc = mctp_pcc_initialize_mailbox(dev, &mctp_pcc_ndev->outbox,
+ context.outbox_index);
+ if (rc)
+ goto free_netdev;
+
+ mctp_pcc_ndev->outbox.client.tx_done = mctp_pcc_tx_done;
+ mctp_pcc_ndev->acpi_device = acpi_dev;
+ mctp_pcc_ndev->mdev.dev = ndev;
+ acpi_dev->driver_data = mctp_pcc_ndev;
+
+ /* There is no clean way to pass the MTU to the callback function
+ * used for registration, so set the values ahead of time.
+ */
+ mctp_pcc_mtu = mctp_pcc_ndev->outbox.chan->shmem_size -
+ sizeof(struct pcc_header);
+ ndev->mtu = MCTP_MIN_MTU;
+ ndev->max_mtu = mctp_pcc_mtu;
+ ndev->min_mtu = MCTP_MIN_MTU;
+
+ /* ndev needs to be freed before the iomemory (mapped above) gets
+ * unmapped, devm resources get freed in reverse to the order they
+ * are added.
+ */
+ rc = mctp_register_netdev(ndev, &mctp_netdev_ops, MCTP_PHYS_BINDING_PCC);
+ if (rc)
+ goto free_netdev;
+ return devm_add_action_or_reset(dev, mctp_cleanup_netdev, ndev);
+free_netdev:
+ free_netdev(ndev);
+ return rc;
+}
+
+static const struct acpi_device_id mctp_pcc_device_ids[] = {
+ { "DMT0001" },
+ {}
+};
+
+static struct acpi_driver mctp_pcc_driver = {
+ .name = "mctp_pcc",
+ .class = "Unknown",
+ .ids = mctp_pcc_device_ids,
+ .ops = {
+ .add = mctp_pcc_driver_add,
+ },
+};
+
+module_acpi_driver(mctp_pcc_driver);
+
+MODULE_DEVICE_TABLE(acpi, mctp_pcc_device_ids);
+
+MODULE_DESCRIPTION("MCTP PCC ACPI device");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Adam Young <admiyo@os.amperecomputing.com>");
--
2.43.0
Hi Adam, A few comments inline: > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * mctp-pcc.c - Driver for MCTP over PCC. > + * Copyright (c) 2024, Ampere Computing LLC It's 2025 now, I'd suggest a range: Copyright (c) 2024-2025, Ampere Computing LLC > + */ > + > +/* Implementation of MCTP over PCC DMTF Specification DSP0256 > + * https://www.dmtf.org/sites/default/files/standards/documents/DSP0256_2.0.0WIP50.pdf DSP0256 has been released now, if that's a more appropriate reference. But it looks like DSP0292 is more specific to the PCC parts? > + */ > + > +#include <linux/acpi.h> > +#include <linux/if_arp.h> > +#include <linux/init.h> > +#include <linux/kernel.h> > +#include <linux/module.h> > +#include <linux/netdevice.h> > +#include <linux/platform_device.h> > +#include <linux/string.h> > +#include <linux/skbuff.h> > +#include <linux/hrtimer.h> > + > +#include <acpi/acpi_bus.h> > +#include <acpi/acpi_drivers.h> > +#include <acpi/acrestyp.h> > +#include <acpi/actbl.h> > +#include <net/mctp.h> > +#include <net/mctpdevice.h> > +#include <acpi/pcc.h> > + > +#include "../../mailbox/mailbox.h" > + > +#define MCTP_PAYLOAD_LENGTH 256 > +#define MCTP_CMD_LENGTH 4 > +#define MCTP_PCC_VERSION 0x1 /* DSP0292 a single version: 1 */ > +#define MCTP_SIGNATURE "MCTP" > +#define MCTP_SIGNATURE_LENGTH (sizeof(MCTP_SIGNATURE) - 1) > +#define MCTP_MIN_MTU 68 > +#define PCC_DWORD_TYPE 0x0c > + > +struct mctp_pcc_mailbox { > + u32 index; > + struct pcc_mbox_chan *chan; > + struct mbox_client client; > + struct sk_buff_head packets; > +}; > + > +/* The netdev structure. One of these per PCC adapter. */ > +struct mctp_pcc_ndev { > + /* spinlock to serialize access to PCC outbox buffer and registers > + * Note that what PCC calls registers are memory locations, not CPU > + * Registers. They include the fields used to synchronize access > + * between the OS and remote endpoints. > + * > + * Only the Outbox needs a spinlock, to prevent multiple > + * sent packets triggering multiple attempts to over write > + * the outbox. The Inbox buffer is controlled by the remote > + * service and a spinlock would have no effect. > + */ > + spinlock_t lock; > + struct mctp_dev mdev; You are only ever using the mdev->dev pointer; just use a struct net_device * here. The MCTP layer handles the creation of the MCTP parts. > + struct acpi_device *acpi_device; > + struct mctp_pcc_mailbox inbox; > + struct mctp_pcc_mailbox outbox; > +}; > + > +static void *mctp_pcc_rx_alloc(struct mbox_client *c, int size) > +{ > + struct mctp_pcc_mailbox *box; > + struct mctp_pcc_ndev *mctp_pcc_ndev; > + struct sk_buff *skb; > + void *skb_buf; > + > + box = container_of(c, struct mctp_pcc_mailbox, client); > + mctp_pcc_ndev = container_of(c, struct mctp_pcc_ndev, inbox.client); > + if (size > mctp_pcc_ndev->mdev.dev->mtu) > + return NULL; > + mctp_pcc_ndev = container_of(c, struct mctp_pcc_ndev, inbox.client); You already have set mctp_ncc_ndev a few lines above. It's also a bit unusual doing the two container_of() operations, when you can find the ndev and reference the mailbox from there. The common pattern for this is to set up your context from the input pointers early too, so something like: static void *mctp_pcc_rx_alloc(struct mbox_client *c, int size) { struct mctp_pcc_ndev *mctp_pcc_ndev = container_of(c, struct mctp_pcc_ndev, inbox.client); struct mctp_pcc_mailbox *box = &mctp_pcc_ndev->inbox; struct sk_buff *skb; /* ... */ (but you may not need a var for 'box' at all) > + skb = netdev_alloc_skb(mctp_pcc_ndev->mdev.dev, size); > + if (!skb) > + return NULL; > + skb_buf = skb_put(skb, size); you don't use skb_buf anywhere? (building with W=1 should catch this) > + skb->protocol = htons(ETH_P_MCTP); > + > + skb_queue_head(&box->packets, skb); > + > + return skb->data; > +} > + > +static void mctp_pcc_client_rx_callback(struct mbox_client *c, void *buffer) > +{ > + struct mctp_pcc_ndev *mctp_pcc_ndev; > + struct pcc_header pcc_header; > + struct mctp_skb_cb *cb; > + struct sk_buff *skb; > + > + mctp_pcc_ndev = container_of(c, struct mctp_pcc_ndev, inbox.client); > + if (!buffer) { > + dev_dstats_rx_dropped(mctp_pcc_ndev->mdev.dev); > + return; > + } Mainly out of curiosity: how does this happen? How do we get a completion where there is no original buffer? > + > + skb_queue_walk(&mctp_pcc_ndev->inbox.packets, skb) { > + if (skb->data == buffer) { > + skb_unlink(skb, &mctp_pcc_ndev->inbox.packets); > + dev_dstats_rx_add(mctp_pcc_ndev->mdev.dev, skb->len); > + skb_reset_mac_header(skb); > + skb_pull(skb, sizeof(pcc_header)); > + skb_reset_network_header(skb); > + cb = __mctp_cb(skb); > + cb->halen = 0; > + netif_rx(skb); > + return; > + } You can save a bit of indent by flipping the logic here: skb_queue_walk(&mctp_pcc_ndev->inbox.packets, skb) { if (skb->data != buffer) continue; skb_unlink(skb, &mctp_pcc_ndev->inbox.packets); dev_dstats_rx_add(mctp_pcc_ndev->mdev.dev, skb->len); skb_reset_mac_header(skb); skb_pull(skb, sizeof(pcc_header)); skb_reset_network_header(skb); cb = __mctp_cb(skb); cb->halen = 0; netif_rx(skb); return; } I figure we're restricted to what the mailbox API provides, but is there any way we can access the skb through a pointer, rather than having to dig through these lists? I think the issue is that the mbox API is using the void * buffer as both the data to transfer, and the callback context, so we can't stash useful context across the completion? > + } > + pr_warn("Unmatched packet in mctp-pcc inbox packet list"); > +} > + > +static void mctp_pcc_tx_done(struct mbox_client *c, void *mssg, int r) > +{ > + struct mctp_pcc_mailbox *box; > + struct sk_buff *skb; > + > + box = container_of(c, struct mctp_pcc_mailbox, client); > + skb_queue_walk(&box->packets, skb) { > + if (skb->data == mssg) { > + skb_unlink(skb, &box->packets); > + dev_consume_skb_any(skb); > + break; > + } > + } > +} > + > +static netdev_tx_t mctp_pcc_tx(struct sk_buff *skb, struct net_device *ndev) > +{ > + struct mctp_pcc_ndev *mpnd = netdev_priv(ndev); > + struct pcc_header *pcc_header; > + int len = skb->len; > + int rc; > + > + rc = skb_cow_head(skb, sizeof(*pcc_header)); > + if (rc) > + goto err_drop; > + > + pcc_header = skb_push(skb, sizeof(*pcc_header)); > + pcc_header->signature = cpu_to_le32(PCC_SIGNATURE | mpnd->outbox.index); > + pcc_header->flags = cpu_to_le32(PCC_CMD_COMPLETION_NOTIFY); > + memcpy(&pcc_header->command, MCTP_SIGNATURE, MCTP_SIGNATURE_LENGTH); > + pcc_header->length = cpu_to_le32(len + MCTP_SIGNATURE_LENGTH); > + > + skb_queue_head(&mpnd->outbox.packets, skb); > + > + rc = mbox_send_message(mpnd->outbox.chan->mchan, skb->data); > + > + if (rc < 0) { > + pr_info("%s fail, rc = %d", __func__, rc); > + return NETDEV_TX_BUSY; > + } What happens on mbox_send_message failure? The skb will still be present in the outbox.packets queue - I assume we don't see a completion callback in that case, and so the skb will be in the outbox.packets queue forever? Are you sure you want to return NETDEV_TX_BUSY here? Is there any situation where the mbox_send_message will continually fail? Should we ratelimit the pr_info() message there (and regardless, better to use one of netdev_info / netdev_warn / etc functions, since we are dealing with netdevs here). > + dev_dstats_tx_add(ndev, len); > + return NETDEV_TX_OK; > +err_drop: > + dev_dstats_tx_dropped(ndev); > + kfree_skb(skb); > + return NETDEV_TX_OK; > +} > + > +static const struct net_device_ops mctp_pcc_netdev_ops = { > + .ndo_start_xmit = mctp_pcc_tx, > +}; > + > +static const struct mctp_netdev_ops mctp_netdev_ops = { > + NULL > +}; > + > +static void mctp_pcc_setup(struct net_device *ndev) > +{ > + ndev->type = ARPHRD_MCTP; > + ndev->hard_header_len = 0; > + ndev->tx_queue_len = 0; > + ndev->flags = IFF_NOARP; > + ndev->netdev_ops = &mctp_pcc_netdev_ops; > + ndev->needs_free_netdev = true; > + ndev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS; > +} > + > +struct mctp_pcc_lookup_context { > + int index; > + u32 inbox_index; > + u32 outbox_index; > +}; > + > +static acpi_status lookup_pcct_indices(struct acpi_resource *ares, > + void *context) > +{ > + struct mctp_pcc_lookup_context *luc = context; > + struct acpi_resource_address32 *addr; > + > + if (ares->type != PCC_DWORD_TYPE) > + return AE_OK; > + > + addr = ACPI_CAST_PTR(struct acpi_resource_address32, &ares->data); > + switch (luc->index) { > + case 0: > + luc->outbox_index = addr[0].address.minimum; > + break; > + case 1: > + luc->inbox_index = addr[0].address.minimum; > + break; > + } > + luc->index++; > + return AE_OK; > +} > + > +static void drain_packets(struct sk_buff_head *list) > +{ > + struct sk_buff *skb; > + > + while (!skb_queue_empty(list)) { > + skb = skb_dequeue(list); > + dev_consume_skb_any(skb); > + } > +} > + > +static void mctp_cleanup_netdev(void *data) > +{ > + struct mctp_pcc_ndev *mctp_pcc_ndev; > + struct net_device *ndev = data; > + > + mctp_pcc_ndev = netdev_priv(ndev); > + drain_packets(&mctp_pcc_ndev->outbox.packets); > + drain_packets(&mctp_pcc_ndev->inbox.packets); > + > + mctp_unregister_netdev(ndev); > +} > + > +static void mctp_cleanup_channel(void *data) > +{ > + struct pcc_mbox_chan *chan = data; > + > + pcc_mbox_free_channel(chan); > +} > + > +static int mctp_pcc_initialize_mailbox(struct device *dev, > + struct mctp_pcc_mailbox *box, u32 index) > +{ > + box->index = index; > + skb_queue_head_init(&box->packets); > + box->chan = pcc_mbox_request_channel(&box->client, index); > + box->chan->rx_alloc = mctp_pcc_rx_alloc; > + > + box->client.dev = dev; > + if (IS_ERR(box->chan)) > + return PTR_ERR(box->chan); > + return devm_add_action_or_reset(dev, mctp_cleanup_channel, box->chan); > +} > + > +static int mctp_pcc_driver_add(struct acpi_device *acpi_dev) > +{ > + struct mctp_pcc_lookup_context context = {0}; > + struct mctp_pcc_ndev *mctp_pcc_ndev; > + struct device *dev = &acpi_dev->dev; > + struct net_device *ndev; > + acpi_handle dev_handle; > + acpi_status status; > + int mctp_pcc_mtu; > + char name[32]; > + int rc; > + > + dev_dbg(dev, "Adding mctp_pcc device for HID %s\n", > + acpi_device_hid(acpi_dev)); > + dev_handle = acpi_device_handle(acpi_dev); > + status = acpi_walk_resources(dev_handle, "_CRS", lookup_pcct_indices, > + &context); > + if (!ACPI_SUCCESS(status)) { > + dev_err(dev, "FAILURE to lookup PCC indexes from CRS\n"); > + return -EINVAL; > + } > + > + /* inbox initialization */ the inbox initialization seems to be a bit further down. > + snprintf(name, sizeof(name), "mctpipcc%d", context.inbox_index); > + ndev = alloc_netdev(sizeof(*mctp_pcc_ndev), name, NET_NAME_PREDICTABLE, > + mctp_pcc_setup); > + if (!ndev) > + return -ENOMEM; > + > + mctp_pcc_ndev = netdev_priv(ndev); > + spin_lock_init(&mctp_pcc_ndev->lock); > + rc = mctp_pcc_initialize_mailbox(dev, &mctp_pcc_ndev->inbox, > + context.inbox_index); > + if (rc) > + goto free_netdev; > + mctp_pcc_ndev->inbox.client.rx_callback = mctp_pcc_client_rx_callback; > + > + /* outbox initialization */ > + rc = mctp_pcc_initialize_mailbox(dev, &mctp_pcc_ndev->outbox, > + context.outbox_index); > + if (rc) > + goto free_netdev; > + > + mctp_pcc_ndev->outbox.client.tx_done = mctp_pcc_tx_done; > + mctp_pcc_ndev->acpi_device = acpi_dev; > + mctp_pcc_ndev->mdev.dev = ndev; > + acpi_dev->driver_data = mctp_pcc_ndev; > + > + /* There is no clean way to pass the MTU to the callback function > + * used for registration, so set the values ahead of time. > + */ > + mctp_pcc_mtu = mctp_pcc_ndev->outbox.chan->shmem_size - > + sizeof(struct pcc_header); > + ndev->mtu = MCTP_MIN_MTU; > + ndev->max_mtu = mctp_pcc_mtu; > + ndev->min_mtu = MCTP_MIN_MTU; > + > + /* ndev needs to be freed before the iomemory (mapped above) gets > + * unmapped, devm resources get freed in reverse to the order they > + * are added. > + */ > + rc = mctp_register_netdev(ndev, &mctp_netdev_ops, MCTP_PHYS_BINDING_PCC); > + if (rc) > + goto free_netdev; > + return devm_add_action_or_reset(dev, mctp_cleanup_netdev, ndev); > +free_netdev: > + free_netdev(ndev); > + return rc; > +} Just a couple of nitpicky style things here (and above): try to keep the return-value checks immediately after the appropriate call: rc = mbox_send_message(mpnd->outbox.chan->mchan, skb->data); if (rc < 0) { pr_info("%s fail, rc = %d", __func__, rc); return NETDEV_TX_BUSY; } but give yourself some space after those checks, and around the returns/goto labels: rc = mctp_register_netdev(ndev, &mctp_netdev_ops, MCTP_PHYS_BINDING_PCC); if (rc) goto free_netdev; return devm_add_action_or_reset(dev, mctp_cleanup_netdev, ndev); free_netdev: free_netdev(ndev); return rc; } Cheers, Jeremy
All Changes are accepted. I have attempted to answer your questions here. >> +static void mctp_pcc_client_rx_callback(struct mbox_client *c, void *buffer) >> +{ >> + struct mctp_pcc_ndev *mctp_pcc_ndev; >> + struct pcc_header pcc_header; >> + struct mctp_skb_cb *cb; >> + struct sk_buff *skb; >> + >> + mctp_pcc_ndev = container_of(c, struct mctp_pcc_ndev, inbox.client); >> + if (!buffer) { >> + dev_dstats_rx_dropped(mctp_pcc_ndev->mdev.dev); >> + return; >> + } > Mainly out of curiosity: how does this happen? How do we get a > completion where there is no original buffer? If the sk_buff allocation fails, the logic falls back to the old code, which passes on a null buffer. There is logic there with notifying the sender that I don't want to skip or modify. See the other patch, in the change to the irq handler. > I figure we're restricted to what the mailbox API provides, but is there > any way we can access the skb through a pointer, rather than having to > dig through these lists? > > I think the issue is that the mbox API is using the void * buffer as > both the data to transfer, and the callback context, so we can't stash > useful context across the completion? Correct, the SK_buff is a structure that points to a buffer, and what gets to the send_data function is the buffer itself. That buffer has no pointer back to the sk_buff. >> + >> + rc = mbox_send_message(mpnd->outbox.chan->mchan, skb->data); >> + >> + if (rc < 0) { >> + pr_info("%s fail, rc = %d", __func__, rc); >> + return NETDEV_TX_BUSY; >> + } > What happens on mbox_send_message failure? The skb will still be present > in the outbox.packets queue - I assume we don't see a completion > callback in that case, and so the skb will be in the outbox.packets > queue forever? > > Are you sure you want to return NETDEV_TX_BUSY here? > > Is there any situation where the mbox_send_message will continually > fail? Should we ratelimit the pr_info() message there (and regardless, > better to use one of netdev_info / netdev_warn / etc functions, since we > are dealing with netdevs here). The fail will happen if the ring buffer is full, so, yes, it makes sense not to queue the packet. I can move that to after the mbox_send_message. The NETDEV_TX_BUSY is correct, as it means resend the packet, and we don't have any reference to it. The only other failure path on the mbox_send_message code is due to timeouts, which we do not use from this driver. That is a recent change, and that was the case I was handling before.
Hi Adam, > If the sk_buff allocation fails, the logic falls back to the old code, > which passes on a null buffer. There is logic there with notifying the > sender that I don't want to skip or modify. OK, so this will happen if we didn't allocate a buffer in the first place - we'd still get a completion occurring. Let me know if my understanding is incorrect. > > > I think the issue is that the mbox API is using the void * buffer > > as both the data to transfer, and the callback context, so we can't > > stash useful context across the completion? > > Correct, the SK_buff is a structure that points to a buffer, and > what gets to the send_data function is the buffer itself. That buffer > has no pointer back to the sk_buff. OK, that's a bit unfortunate. Might be good to see if you can add a context pointer to the mailbox request, in which you could stuff the skb pointer. USB does this for the transfer completions: there are members on the transfer (the struct urb) for both a context and buffer pointer. Of course, that is more a mailbox API change, so you may want to consider that as a separate thing later. > The NETDEV_TX_BUSY is correct, as it means resend the packet, and we > don't have any reference to it. OK, you may want to have the tx queue stopped at that point then, if you have some facility to re-start it when the mailbox ring buffer has space. Cheers, Jeremy
Hi, kernel test robot noticed the following build warnings: [auto build test WARNING on net-next/main] url: https://github.com/intel-lab-lkp/linux/commits/admiyo-os-amperecomputing-com/mailbox-pcc-support-mailbox-management-of-the-shared-buffer/20250711-031525 base: net-next/main patch link: https://lore.kernel.org/r/20250710191209.737167-3-admiyo%40os.amperecomputing.com patch subject: [PATCH net-next v22 2/2] mctp pcc: Implement MCTP over PCC Transport config: x86_64-allyesconfig (https://download.01.org/0day-ci/archive/20250711/202507111843.OV6uA3Jr-lkp@intel.com/config) compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261) reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250711/202507111843.OV6uA3Jr-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202507111843.OV6uA3Jr-lkp@intel.com/ All warnings (new ones prefixed by >>): >> drivers/net/mctp/mctp-pcc.c:71:8: warning: variable 'skb_buf' set but not used [-Wunused-but-set-variable] 71 | void *skb_buf; | ^ 1 warning generated. vim +/skb_buf +71 drivers/net/mctp/mctp-pcc.c 65 66 static void *mctp_pcc_rx_alloc(struct mbox_client *c, int size) 67 { 68 struct mctp_pcc_mailbox *box; 69 struct mctp_pcc_ndev *mctp_pcc_ndev; 70 struct sk_buff *skb; > 71 void *skb_buf; 72 73 box = container_of(c, struct mctp_pcc_mailbox, client); 74 mctp_pcc_ndev = container_of(c, struct mctp_pcc_ndev, inbox.client); 75 if (size > mctp_pcc_ndev->mdev.dev->mtu) 76 return NULL; 77 mctp_pcc_ndev = container_of(c, struct mctp_pcc_ndev, inbox.client); 78 skb = netdev_alloc_skb(mctp_pcc_ndev->mdev.dev, size); 79 if (!skb) 80 return NULL; 81 skb_buf = skb_put(skb, size); 82 skb->protocol = htons(ETH_P_MCTP); 83 84 skb_queue_head(&box->packets, skb); 85 86 return skb->data; 87 } 88 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
© 2016 - 2025 Red Hat, Inc.