Implementation of network driver for
Management Component Transport Protocol(MCTP)
over Platform Communication Channel(PCC)
DMTF DSP:0292
https://www.dmtf.org/sites/default/files/standards/documents/\
DSP0292_1.0.0WIP50.pdf
MCTP devices are specified via ACPI by entries
in DSDT/SSDT and reference channels specified
in the PCCT. Messages are sent on a type 3 and
received on a type 4 channel. Communication with
other devices use the PCC based doorbell mechanism;
a shared memory segment with a corresponding
interrupt and a memory register used to trigger
remote interrupts.
This driver takes advantage of PCC mailbox shared functions
management. Unlike the existing Type 2 drivers, the mssg
parameter is actively used. The data section of the struct sk_buff
that contains the outgoing packet is sent to the mailbox,
already properly formatted as a PCC exctended message.
The driver makes use of the pcc mailbox buffer management helpers.
These allow the network driver to use common code for the reading and
wrting from the shared memory buffer to the mailbox driver,
attempting to get a single implementation of the PCC protocol for
Type3/4.
If the mailbox ring buffer is full, the driver stops the
incoming packet queues until a message has been sent,
freeing space in the ring buffer.
When the Type 3 channel outbox receives a txdone response
interrupt, it consumes the outgoing sk_buff, allowing
it to be freed.
Bringing up an interface creates the channel between the
network driver and the mailbox driver. This enables
communication with the remote endpoint, to include the
receipt of new messages. Bringing down an interface
removes the channel, and no new messages can be delivered.
Stopping the interface also frees any packets that are
cached in the mailbox ringbuffer.
Signed-off-by: Adam Young <admiyo@os.amperecomputing.com>
---
MAINTAINERS | 5 +
drivers/net/mctp/Kconfig | 13 ++
drivers/net/mctp/Makefile | 1 +
drivers/net/mctp/mctp-pcc.c | 319 ++++++++++++++++++++++++++++++++++++
4 files changed, 338 insertions(+)
create mode 100644 drivers/net/mctp/mctp-pcc.c
diff --git a/MAINTAINERS b/MAINTAINERS
index 46126ce2f968..e1497608a05d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14915,6 +14915,11 @@ F: include/net/mctpdevice.h
F: include/net/netns/mctp.h
F: net/mctp/
+MANAGEMENT COMPONENT TRANSPORT PROTOCOL (MCTP) over PCC (MCTP-PCC) Driver
+M: Adam Young <admiyo@os.amperecomputing.com>
+S: Maintained
+F: drivers/net/mctp/mctp-pcc.c
+
MAPLE TREE
M: Liam R. Howlett <Liam.Howlett@oracle.com>
R: Alice Ryhl <aliceryhl@google.com>
diff --git a/drivers/net/mctp/Kconfig b/drivers/net/mctp/Kconfig
index cf325ab0b1ef..77cd4091050c 100644
--- a/drivers/net/mctp/Kconfig
+++ b/drivers/net/mctp/Kconfig
@@ -47,6 +47,19 @@ config MCTP_TRANSPORT_I3C
A MCTP protocol network device is created for each I3C bus
having a "mctp-controller" devicetree property.
+config MCTP_TRANSPORT_PCC
+ tristate "MCTP PCC transport"
+ depends on ACPI
+ help
+ Provides a driver to access MCTP devices over PCC transport,
+ A MCTP protocol network device is created via ACPI for each
+ entry in the DSDT/SSDT that matches the identifier. The Platform
+ communication channels are selected from the corresponding
+ entries in the PCCT.
+
+ Say y here if you need to connect to MCTP endpoints over PCC. To
+ compile as a module, use m; the module will be called mctp-pcc.
+
config MCTP_TRANSPORT_USB
tristate "MCTP USB transport"
depends on USB
diff --git a/drivers/net/mctp/Makefile b/drivers/net/mctp/Makefile
index c36006849a1e..0a591299ffa9 100644
--- a/drivers/net/mctp/Makefile
+++ b/drivers/net/mctp/Makefile
@@ -1,4 +1,5 @@
obj-$(CONFIG_MCTP_SERIAL) += mctp-serial.o
obj-$(CONFIG_MCTP_TRANSPORT_I2C) += mctp-i2c.o
obj-$(CONFIG_MCTP_TRANSPORT_I3C) += mctp-i3c.o
+obj-$(CONFIG_MCTP_TRANSPORT_PCC) += mctp-pcc.o
obj-$(CONFIG_MCTP_TRANSPORT_USB) += mctp-usb.o
diff --git a/drivers/net/mctp/mctp-pcc.c b/drivers/net/mctp/mctp-pcc.c
new file mode 100644
index 000000000000..927a525c1121
--- /dev/null
+++ b/drivers/net/mctp/mctp-pcc.c
@@ -0,0 +1,319 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * mctp-pcc.c - Driver for MCTP over PCC.
+ * Copyright (c) 2024-2025, Ampere Computing LLC
+ *
+ */
+
+/* Implementation of MCTP over PCC DMTF Specification DSP0256
+ * https://www.dmtf.org/sites/default/files/standards/documents/DSP0292_1.0.0WIP50.pdf
+ */
+
+#include <linux/acpi.h>
+#include <linux/hrtimer.h>
+#include <linux/if_arp.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mailbox_client.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/platform_device.h>
+#include <linux/skbuff.h>
+#include <linux/string.h>
+
+#include <acpi/acpi_bus.h>
+#include <acpi/acpi_drivers.h>
+#include <acpi/acrestyp.h>
+#include <acpi/actbl.h>
+#include <acpi/pcc.h>
+#include <net/mctp.h>
+#include <net/mctpdevice.h>
+
+#define MCTP_SIGNATURE "MCTP"
+#define MCTP_SIGNATURE_LENGTH (sizeof(MCTP_SIGNATURE) - 1)
+#define MCTP_MIN_MTU 68
+#define PCC_DWORD_TYPE 0x0c
+
+struct mctp_pcc_mailbox {
+ u32 index;
+ struct pcc_mbox_chan *chan;
+ struct mbox_client client;
+};
+
+/* The netdev structure. One of these per PCC adapter. */
+struct mctp_pcc_ndev {
+ struct net_device *ndev;
+ struct acpi_device *acpi_device;
+ struct mctp_pcc_mailbox inbox;
+ struct mctp_pcc_mailbox outbox;
+};
+
+static void mctp_pcc_client_rx_callback(struct mbox_client *cl, void *mssg)
+{
+ struct pcc_extended_header pcc_header;
+ struct mctp_pcc_ndev *mctp_pcc_ndev;
+ struct mctp_pcc_mailbox *inbox;
+ struct mctp_skb_cb *cb;
+ struct sk_buff *skb;
+ int size;
+
+ mctp_pcc_ndev = container_of(cl, struct mctp_pcc_ndev, inbox.client);
+ inbox = &mctp_pcc_ndev->inbox;
+ size = pcc_mbox_query_bytes_available(inbox->chan);
+ if (size == 0)
+ return;
+ skb = netdev_alloc_skb(mctp_pcc_ndev->ndev, size);
+ if (!skb) {
+ dev_dstats_rx_dropped(mctp_pcc_ndev->ndev);
+ return;
+ }
+ skb_put(skb, size);
+ skb->protocol = htons(ETH_P_MCTP);
+ pcc_mbox_read_from_buffer(inbox->chan, size, skb->data);
+ dev_dstats_rx_add(mctp_pcc_ndev->ndev, skb->len);
+ skb_reset_mac_header(skb);
+ skb_pull(skb, sizeof(pcc_header));
+ skb_reset_network_header(skb);
+ cb = __mctp_cb(skb);
+ cb->halen = 0;
+ netif_rx(skb);
+}
+
+static netdev_tx_t mctp_pcc_tx(struct sk_buff *skb, struct net_device *ndev)
+{
+ struct mctp_pcc_ndev *mpnd = netdev_priv(ndev);
+ struct pcc_extended_header *pcc_header;
+ int len = skb->len;
+ int rc;
+
+ rc = skb_cow_head(skb, sizeof(*pcc_header));
+ if (rc) {
+ dev_dstats_tx_dropped(ndev);
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
+ }
+
+ pcc_header = skb_push(skb, sizeof(*pcc_header));
+ pcc_header->signature = PCC_SIGNATURE | mpnd->outbox.index;
+ pcc_header->flags = PCC_CMD_COMPLETION_NOTIFY;
+ memcpy(&pcc_header->command, MCTP_SIGNATURE, MCTP_SIGNATURE_LENGTH);
+ pcc_header->length = len + MCTP_SIGNATURE_LENGTH;
+ rc = mbox_send_message(mpnd->outbox.chan->mchan, skb);
+
+ if (rc < 0) {
+ netif_stop_queue(ndev);
+ return NETDEV_TX_BUSY;
+ }
+
+ dev_dstats_tx_add(ndev, len);
+ return NETDEV_TX_OK;
+}
+
+static void mctp_pcc_tx_prepare(struct mbox_client *cl, void *mssg)
+{
+ struct mctp_pcc_ndev *mctp_pcc_ndev;
+ struct mctp_pcc_mailbox *outbox;
+ struct sk_buff *skb = mssg;
+ int len_sent;
+
+ mctp_pcc_ndev = container_of(cl, struct mctp_pcc_ndev, outbox.client);
+ outbox = &mctp_pcc_ndev->outbox;
+
+ if (!skb)
+ return;
+
+ len_sent = pcc_mbox_write_to_buffer(outbox->chan, skb->len, skb->data);
+ if (len_sent == 0)
+ pr_info("packet dropped");
+}
+
+static void mctp_pcc_tx_done(struct mbox_client *c, void *mssg, int r)
+{
+ struct mctp_pcc_ndev *mctp_pcc_ndev;
+ struct mctp_pcc_mailbox *outbox;
+ struct sk_buff *skb = mssg;
+
+ mctp_pcc_ndev = container_of(c, struct mctp_pcc_ndev, outbox.client);
+ outbox = container_of(c, struct mctp_pcc_mailbox, client);
+ if (skb)
+ dev_consume_skb_any(skb);
+ netif_wake_queue(mctp_pcc_ndev->ndev);
+}
+
+static int mctp_pcc_ndo_open(struct net_device *ndev)
+{
+ struct mctp_pcc_ndev *mctp_pcc_ndev = netdev_priv(ndev);
+ struct mctp_pcc_mailbox *outbox, *inbox;
+
+ outbox = &mctp_pcc_ndev->outbox;
+ inbox = &mctp_pcc_ndev->inbox;
+
+ outbox->chan = pcc_mbox_request_channel(&outbox->client, outbox->index);
+ if (IS_ERR(outbox->chan))
+ return PTR_ERR(outbox->chan);
+
+ inbox->client.rx_callback = mctp_pcc_client_rx_callback;
+ inbox->chan = pcc_mbox_request_channel(&inbox->client, inbox->index);
+ if (IS_ERR(inbox->chan)) {
+ pcc_mbox_free_channel(outbox->chan);
+ return PTR_ERR(inbox->chan);
+ }
+ return 0;
+}
+
+static int mctp_pcc_ndo_stop(struct net_device *ndev)
+{
+ struct mctp_pcc_ndev *mctp_pcc_ndev = netdev_priv(ndev);
+
+ pcc_mbox_free_channel(mctp_pcc_ndev->outbox.chan);
+ pcc_mbox_free_channel(mctp_pcc_ndev->inbox.chan);
+ return 0;
+}
+
+static const struct net_device_ops mctp_pcc_netdev_ops = {
+ .ndo_open = mctp_pcc_ndo_open,
+ .ndo_stop = mctp_pcc_ndo_stop,
+ .ndo_start_xmit = mctp_pcc_tx,
+};
+
+static void mctp_pcc_setup(struct net_device *ndev)
+{
+ ndev->type = ARPHRD_MCTP;
+ ndev->hard_header_len = 0;
+ ndev->tx_queue_len = 0;
+ ndev->flags = IFF_NOARP;
+ ndev->netdev_ops = &mctp_pcc_netdev_ops;
+ ndev->needs_free_netdev = true;
+ ndev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS;
+}
+
+struct mctp_pcc_lookup_context {
+ int index;
+ u32 inbox_index;
+ u32 outbox_index;
+};
+
+static acpi_status lookup_pcct_indices(struct acpi_resource *ares,
+ void *context)
+{
+ struct mctp_pcc_lookup_context *luc = context;
+ struct acpi_resource_address32 *addr;
+
+ if (ares->type != PCC_DWORD_TYPE)
+ return AE_OK;
+
+ addr = ACPI_CAST_PTR(struct acpi_resource_address32, &ares->data);
+ switch (luc->index) {
+ case 0:
+ luc->outbox_index = addr[0].address.minimum;
+ break;
+ case 1:
+ luc->inbox_index = addr[0].address.minimum;
+ break;
+ }
+ luc->index++;
+ return AE_OK;
+}
+
+static void mctp_cleanup_netdev(void *data)
+{
+ struct net_device *ndev = data;
+
+ mctp_unregister_netdev(ndev);
+}
+
+static int initialize_MTU(struct net_device *ndev)
+{
+ struct mctp_pcc_ndev *mctp_pcc_ndev = netdev_priv(ndev);
+ struct mctp_pcc_mailbox *outbox;
+ int mctp_pcc_mtu;
+
+ outbox = &mctp_pcc_ndev->outbox;
+ mctp_pcc_mtu = pcc_mbox_buffer_size(outbox->index);
+ if (mctp_pcc_mtu == -1)
+ return -1;
+
+ mctp_pcc_mtu = mctp_pcc_mtu - sizeof(struct pcc_extended_header);
+ mctp_pcc_ndev = netdev_priv(ndev);
+ ndev->mtu = MCTP_MIN_MTU;
+ ndev->max_mtu = mctp_pcc_mtu;
+ ndev->min_mtu = MCTP_MIN_MTU;
+
+ return 0;
+}
+
+static int mctp_pcc_driver_add(struct acpi_device *acpi_dev)
+{
+ struct mctp_pcc_lookup_context context = {0};
+ struct mctp_pcc_ndev *mctp_pcc_ndev;
+ struct device *dev = &acpi_dev->dev;
+ struct net_device *ndev;
+ acpi_handle dev_handle;
+ acpi_status status;
+ char name[32];
+ int rc;
+
+ dev_dbg(dev, "Adding mctp_pcc device for HID %s\n",
+ acpi_device_hid(acpi_dev));
+ dev_handle = acpi_device_handle(acpi_dev);
+ status = acpi_walk_resources(dev_handle, "_CRS", lookup_pcct_indices,
+ &context);
+ if (!ACPI_SUCCESS(status)) {
+ dev_err(dev, "FAILED to lookup PCC indexes from CRS\n");
+ return -EINVAL;
+ }
+
+ snprintf(name, sizeof(name), "mctppcc%d", context.inbox_index);
+ ndev = alloc_netdev(sizeof(*mctp_pcc_ndev), name, NET_NAME_PREDICTABLE,
+ mctp_pcc_setup);
+ if (!ndev)
+ return -ENOMEM;
+
+ mctp_pcc_ndev = netdev_priv(ndev);
+
+ mctp_pcc_ndev->inbox.index = context.inbox_index;
+ mctp_pcc_ndev->inbox.client.dev = dev;
+ mctp_pcc_ndev->outbox.index = context.outbox_index;
+ mctp_pcc_ndev->outbox.client.dev = dev;
+
+ mctp_pcc_ndev->outbox.client.tx_prepare = mctp_pcc_tx_prepare;
+ mctp_pcc_ndev->outbox.client.tx_done = mctp_pcc_tx_done;
+ mctp_pcc_ndev->acpi_device = acpi_dev;
+ mctp_pcc_ndev->ndev = ndev;
+ acpi_dev->driver_data = mctp_pcc_ndev;
+
+ rc = initialize_MTU(ndev);
+ if (rc)
+ goto free_netdev;
+
+ rc = mctp_register_netdev(ndev, NULL, MCTP_PHYS_BINDING_PCC);
+ if (rc)
+ goto free_netdev;
+
+ return devm_add_action_or_reset(dev, mctp_cleanup_netdev, ndev);
+free_netdev:
+ free_netdev(ndev);
+ return rc;
+}
+
+static const struct acpi_device_id mctp_pcc_device_ids[] = {
+ { "DMT0001" },
+ {}
+};
+
+static struct acpi_driver mctp_pcc_driver = {
+ .name = "mctp_pcc",
+ .class = "Unknown",
+ .ids = mctp_pcc_device_ids,
+ .ops = {
+ .add = mctp_pcc_driver_add,
+ },
+};
+
+module_acpi_driver(mctp_pcc_driver);
+
+MODULE_DEVICE_TABLE(acpi, mctp_pcc_device_ids);
+
+MODULE_DESCRIPTION("MCTP PCC ACPI device");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Adam Young <admiyo@os.amperecomputing.com>");
--
2.43.0
Hi Adam,
Looking pretty good, a few things inline:
> diff --git a/drivers/net/mctp/mctp-pcc.c b/drivers/net/mctp/mctp-pcc.c
> new file mode 100644
> index 000000000000..927a525c1121
> --- /dev/null
> +++ b/drivers/net/mctp/mctp-pcc.c
> @@ -0,0 +1,319 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * mctp-pcc.c - Driver for MCTP over PCC.
> + * Copyright (c) 2024-2025, Ampere Computing LLC
> + *
> + */
> +
> +/* Implementation of MCTP over PCC DMTF Specification DSP0256
> + * https://www.dmtf.org/sites/default/files/standards/documents/DSP0292_1.0.0WIP50.pdf
> + */
> +
> +#include <linux/acpi.h>
> +#include <linux/hrtimer.h>
> +#include <linux/if_arp.h>
> +#include <linux/init.h>
> +#include <linux/kernel.h>
> +#include <linux/mailbox_client.h>
> +#include <linux/module.h>
> +#include <linux/netdevice.h>
> +#include <linux/platform_device.h>
> +#include <linux/skbuff.h>
> +#include <linux/string.h>
> +
> +#include <acpi/acpi_bus.h>
> +#include <acpi/acpi_drivers.h>
> +#include <acpi/acrestyp.h>
> +#include <acpi/actbl.h>
> +#include <acpi/pcc.h>
> +#include <net/mctp.h>
> +#include <net/mctpdevice.h>
> +
> +#define MCTP_SIGNATURE "MCTP"
> +#define MCTP_SIGNATURE_LENGTH (sizeof(MCTP_SIGNATURE) - 1)
> +#define MCTP_MIN_MTU 68
> +#define PCC_DWORD_TYPE 0x0c
> +
> +struct mctp_pcc_mailbox {
> + u32 index;
> + struct pcc_mbox_chan *chan;
> + struct mbox_client client;
> +};
> +
> +/* The netdev structure. One of these per PCC adapter. */
> +struct mctp_pcc_ndev {
> + struct net_device *ndev;
> + struct acpi_device *acpi_device;
> + struct mctp_pcc_mailbox inbox;
> + struct mctp_pcc_mailbox outbox;
> +};
> +
> +static void mctp_pcc_client_rx_callback(struct mbox_client *cl, void *mssg)
> +{
> + struct pcc_extended_header pcc_header;
> + struct mctp_pcc_ndev *mctp_pcc_ndev;
> + struct mctp_pcc_mailbox *inbox;
> + struct mctp_skb_cb *cb;
> + struct sk_buff *skb;
> + int size;
> +
> + mctp_pcc_ndev = container_of(cl, struct mctp_pcc_ndev, inbox.client);
> + inbox = &mctp_pcc_ndev->inbox;
> + size = pcc_mbox_query_bytes_available(inbox->chan);
> + if (size == 0)
> + return;
> + skb = netdev_alloc_skb(mctp_pcc_ndev->ndev, size);
> + if (!skb) {
> + dev_dstats_rx_dropped(mctp_pcc_ndev->ndev);
> + return;
> + }
> + skb_put(skb, size);
> + skb->protocol = htons(ETH_P_MCTP);
> + pcc_mbox_read_from_buffer(inbox->chan, size, skb->data);
> + dev_dstats_rx_add(mctp_pcc_ndev->ndev, skb->len);
> + skb_reset_mac_header(skb);
> + skb_pull(skb, sizeof(pcc_header));
> + skb_reset_network_header(skb);
> + cb = __mctp_cb(skb);
> + cb->halen = 0;
> + netif_rx(skb);
> +}
> +
> +static netdev_tx_t mctp_pcc_tx(struct sk_buff *skb, struct net_device *ndev)
> +{
> + struct mctp_pcc_ndev *mpnd = netdev_priv(ndev);
> + struct pcc_extended_header *pcc_header;
> + int len = skb->len;
> + int rc;
> +
> + rc = skb_cow_head(skb, sizeof(*pcc_header));
> + if (rc) {
> + dev_dstats_tx_dropped(ndev);
> + kfree_skb(skb);
> + return NETDEV_TX_OK;
> + }
> +
> + pcc_header = skb_push(skb, sizeof(*pcc_header));
> + pcc_header->signature = PCC_SIGNATURE | mpnd->outbox.index;
> + pcc_header->flags = PCC_CMD_COMPLETION_NOTIFY;
> + memcpy(&pcc_header->command, MCTP_SIGNATURE, MCTP_SIGNATURE_LENGTH);
> + pcc_header->length = len + MCTP_SIGNATURE_LENGTH;
> + rc = mbox_send_message(mpnd->outbox.chan->mchan, skb);
> +
> + if (rc < 0) {
> + netif_stop_queue(ndev);
> + return NETDEV_TX_BUSY;
> + }
super minor: the grouping here (and a couple of other places) is a bit
strange; try and keep the rc handler together with where rc is set, and
the setup and send distinct. I'd typically do this as:
pcc_header = skb_push(skb, sizeof(*pcc_header));
pcc_header->signature = PCC_SIGNATURE | mpnd->outbox.index;
pcc_header->flags = PCC_CMD_COMPLETION_NOTIFY;
memcpy(&pcc_header->command, MCTP_SIGNATURE, MCTP_SIGNATURE_LENGTH);
pcc_header->length = len + MCTP_SIGNATURE_LENGTH;
rc = mbox_send_message(mpnd->outbox.chan->mchan, skb);
if (rc < 0) {
netif_stop_queue(ndev);
return NETDEV_TX_BUSY;
}
> +
> + dev_dstats_tx_add(ndev, len);
> + return NETDEV_TX_OK;
> +}
> +
> +static void mctp_pcc_tx_prepare(struct mbox_client *cl, void *mssg)
> +{
> + struct mctp_pcc_ndev *mctp_pcc_ndev;
> + struct mctp_pcc_mailbox *outbox;
> + struct sk_buff *skb = mssg;
> + int len_sent;
> +
> + mctp_pcc_ndev = container_of(cl, struct mctp_pcc_ndev, outbox.client);
> + outbox = &mctp_pcc_ndev->outbox;
> +
> + if (!skb)
> + return;
> +
> + len_sent = pcc_mbox_write_to_buffer(outbox->chan, skb->len, skb->data);
> + if (len_sent == 0)
> + pr_info("packet dropped");
You probably don't want a bare pr_info() in the failure path here;
either something debug level, or ratelimited.
and probably use a netdev-specific log function (netdev_dbg() perhaps),
so you get the device information too.
I'm not super clear on this failure mode, do you need to update the
tx statistics on the drop here? In which case, you may not need the
_dbg/_info message at all.
> +}
> +
> +static void mctp_pcc_tx_done(struct mbox_client *c, void *mssg, int r)
> +{
> + struct mctp_pcc_ndev *mctp_pcc_ndev;
> + struct mctp_pcc_mailbox *outbox;
> + struct sk_buff *skb = mssg;
Nice, no more list lookups.
> +
> + mctp_pcc_ndev = container_of(c, struct mctp_pcc_ndev, outbox.client);
> + outbox = container_of(c, struct mctp_pcc_mailbox, client);
> + if (skb)
> + dev_consume_skb_any(skb);
minor: dev_consume_skb_any() will tolerate a null argument, you can
leave out the conditional here.
> + netif_wake_queue(mctp_pcc_ndev->ndev);
> +}
> +
> +static int mctp_pcc_ndo_open(struct net_device *ndev)
> +{
> + struct mctp_pcc_ndev *mctp_pcc_ndev = netdev_priv(ndev);
> + struct mctp_pcc_mailbox *outbox, *inbox;
> +
> + outbox = &mctp_pcc_ndev->outbox;
> + inbox = &mctp_pcc_ndev->inbox;
> +
> + outbox->chan = pcc_mbox_request_channel(&outbox->client, outbox->index);
> + if (IS_ERR(outbox->chan))
> + return PTR_ERR(outbox->chan);
> +
> + inbox->client.rx_callback = mctp_pcc_client_rx_callback;
> + inbox->chan = pcc_mbox_request_channel(&inbox->client, inbox->index);
> + if (IS_ERR(inbox->chan)) {
> + pcc_mbox_free_channel(outbox->chan);
> + return PTR_ERR(inbox->chan);
> + }
> + return 0;
> +}
Having the channels fully set-up before calling request_channel looks
much safer now :)
> +
> +static int mctp_pcc_ndo_stop(struct net_device *ndev)
> +{
> + struct mctp_pcc_ndev *mctp_pcc_ndev = netdev_priv(ndev);
> +
> + pcc_mbox_free_channel(mctp_pcc_ndev->outbox.chan);
> + pcc_mbox_free_channel(mctp_pcc_ndev->inbox.chan);
> + return 0;
> +}
> +
> +static const struct net_device_ops mctp_pcc_netdev_ops = {
> + .ndo_open = mctp_pcc_ndo_open,
> + .ndo_stop = mctp_pcc_ndo_stop,
> + .ndo_start_xmit = mctp_pcc_tx,
> +};
> +
> +static void mctp_pcc_setup(struct net_device *ndev)
> +{
> + ndev->type = ARPHRD_MCTP;
> + ndev->hard_header_len = 0;
Isn't this sizeof(struct pcc_extended_header) ?
> + ndev->tx_queue_len = 0;
> + ndev->flags = IFF_NOARP;
> + ndev->netdev_ops = &mctp_pcc_netdev_ops;
> + ndev->needs_free_netdev = true;
> + ndev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS;
> +}
> +
> +struct mctp_pcc_lookup_context {
> + int index;
> + u32 inbox_index;
> + u32 outbox_index;
> +};
> +
> +static acpi_status lookup_pcct_indices(struct acpi_resource *ares,
> + void *context)
> +{
> + struct mctp_pcc_lookup_context *luc = context;
> + struct acpi_resource_address32 *addr;
> +
> + if (ares->type != PCC_DWORD_TYPE)
> + return AE_OK;
> +
> + addr = ACPI_CAST_PTR(struct acpi_resource_address32, &ares->data);
> + switch (luc->index) {
> + case 0:
> + luc->outbox_index = addr[0].address.minimum;
> + break;
> + case 1:
> + luc->inbox_index = addr[0].address.minimum;
> + break;
> + }
> + luc->index++;
> + return AE_OK;
> +}
> +
> +static void mctp_cleanup_netdev(void *data)
> +{
> + struct net_device *ndev = data;
> +
> + mctp_unregister_netdev(ndev);
> +}
> +
> +static int initialize_MTU(struct net_device *ndev)
> +{
> + struct mctp_pcc_ndev *mctp_pcc_ndev = netdev_priv(ndev);
> + struct mctp_pcc_mailbox *outbox;
> + int mctp_pcc_mtu;
> +
> + outbox = &mctp_pcc_ndev->outbox;
> + mctp_pcc_mtu = pcc_mbox_buffer_size(outbox->index);
> + if (mctp_pcc_mtu == -1)
> + return -1;
You may want to check that this is at least
sizeof(struct pcc_extended_header), to avoid an underflow below, and
possibly at least MCTP_MIN_MTU after subtracting the header size (as
this would be less than the MCTP BTU)
> +
> + mctp_pcc_mtu = mctp_pcc_mtu - sizeof(struct pcc_extended_header);
> + mctp_pcc_ndev = netdev_priv(ndev);
unneeded?
> + ndev->mtu = MCTP_MIN_MTU;
> + ndev->max_mtu = mctp_pcc_mtu;
> + ndev->min_mtu = MCTP_MIN_MTU;
> +
> + return 0;
> +}
Now that the initialize_MTU implementation is simpler, you may want to
reinstate it inline in driver_add(), but also fine as-is.
> +
> +static int mctp_pcc_driver_add(struct acpi_device *acpi_dev)
> +{
> + struct mctp_pcc_lookup_context context = {0};
> + struct mctp_pcc_ndev *mctp_pcc_ndev;
> + struct device *dev = &acpi_dev->dev;
> + struct net_device *ndev;
> + acpi_handle dev_handle;
> + acpi_status status;
> + char name[32];
> + int rc;
> +
> + dev_dbg(dev, "Adding mctp_pcc device for HID %s\n",
> + acpi_device_hid(acpi_dev));
> + dev_handle = acpi_device_handle(acpi_dev);
> + status = acpi_walk_resources(dev_handle, "_CRS", lookup_pcct_indices,
> + &context);
> + if (!ACPI_SUCCESS(status)) {
> + dev_err(dev, "FAILED to lookup PCC indexes from CRS\n");
> + return -EINVAL;
> + }
> +
> + snprintf(name, sizeof(name), "mctppcc%d", context.inbox_index);
> + ndev = alloc_netdev(sizeof(*mctp_pcc_ndev), name, NET_NAME_PREDICTABLE,
> + mctp_pcc_setup);
> + if (!ndev)
> + return -ENOMEM;
> +
> + mctp_pcc_ndev = netdev_priv(ndev);
> +
> + mctp_pcc_ndev->inbox.index = context.inbox_index;
> + mctp_pcc_ndev->inbox.client.dev = dev;
> + mctp_pcc_ndev->outbox.index = context.outbox_index;
> + mctp_pcc_ndev->outbox.client.dev = dev;
> +
> + mctp_pcc_ndev->outbox.client.tx_prepare = mctp_pcc_tx_prepare;
> + mctp_pcc_ndev->outbox.client.tx_done = mctp_pcc_tx_done;
> + mctp_pcc_ndev->acpi_device = acpi_dev;
> + mctp_pcc_ndev->ndev = ndev;
> + acpi_dev->driver_data = mctp_pcc_ndev;
> +
> + rc = initialize_MTU(ndev);
> + if (rc)
> + goto free_netdev;
> +
> + rc = mctp_register_netdev(ndev, NULL, MCTP_PHYS_BINDING_PCC);
> + if (rc)
> + goto free_netdev;
> +
> + return devm_add_action_or_reset(dev, mctp_cleanup_netdev, ndev);
> +free_netdev:
> + free_netdev(ndev);
> + return rc;
> +}
> +
> +static const struct acpi_device_id mctp_pcc_device_ids[] = {
> + { "DMT0001" },
> + {}
> +};
> +
> +static struct acpi_driver mctp_pcc_driver = {
> + .name = "mctp_pcc",
> + .class = "Unknown",
> + .ids = mctp_pcc_device_ids,
> + .ops = {
> + .add = mctp_pcc_driver_add,
> + },
> +};
> +
> +module_acpi_driver(mctp_pcc_driver);
> +
> +MODULE_DEVICE_TABLE(acpi, mctp_pcc_device_ids);
> +
> +MODULE_DESCRIPTION("MCTP PCC ACPI device");
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Adam Young <admiyo@os.amperecomputing.com>");
Cheers,
Jeremy
On 10/16/25 23:01, Jeremy Kerr wrote:
> Hi Adam,
>
> Looking pretty good, a few things inline:
>
>> diff --git a/drivers/net/mctp/mctp-pcc.c b/drivers/net/mctp/mctp-pcc.c
>> new file mode 100644
>> index 000000000000..927a525c1121
>> --- /dev/null
>> +++ b/drivers/net/mctp/mctp-pcc.c
>> @@ -0,0 +1,319 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * mctp-pcc.c - Driver for MCTP over PCC.
>> + * Copyright (c) 2024-2025, Ampere Computing LLC
>> + *
>> + */
>> +
>> +/* Implementation of MCTP over PCC DMTF Specification DSP0256
>> + * https://www.dmtf.org/sites/default/files/standards/documents/DSP0292_1.0.0WIP50.pdf
>> + */
>> +
>> +#include <linux/acpi.h>
>> +#include <linux/hrtimer.h>
>> +#include <linux/if_arp.h>
>> +#include <linux/init.h>
>> +#include <linux/kernel.h>
>> +#include <linux/mailbox_client.h>
>> +#include <linux/module.h>
>> +#include <linux/netdevice.h>
>> +#include <linux/platform_device.h>
>> +#include <linux/skbuff.h>
>> +#include <linux/string.h>
>> +
>> +#include <acpi/acpi_bus.h>
>> +#include <acpi/acpi_drivers.h>
>> +#include <acpi/acrestyp.h>
>> +#include <acpi/actbl.h>
>> +#include <acpi/pcc.h>
>> +#include <net/mctp.h>
>> +#include <net/mctpdevice.h>
>> +
>> +#define MCTP_SIGNATURE "MCTP"
>> +#define MCTP_SIGNATURE_LENGTH (sizeof(MCTP_SIGNATURE) - 1)
>> +#define MCTP_MIN_MTU 68
>> +#define PCC_DWORD_TYPE 0x0c
>> +
>> +struct mctp_pcc_mailbox {
>> + u32 index;
>> + struct pcc_mbox_chan *chan;
>> + struct mbox_client client;
>> +};
>> +
>> +/* The netdev structure. One of these per PCC adapter. */
>> +struct mctp_pcc_ndev {
>> + struct net_device *ndev;
>> + struct acpi_device *acpi_device;
>> + struct mctp_pcc_mailbox inbox;
>> + struct mctp_pcc_mailbox outbox;
>> +};
>> +
>> +static void mctp_pcc_client_rx_callback(struct mbox_client *cl, void *mssg)
>> +{
>> + struct pcc_extended_header pcc_header;
>> + struct mctp_pcc_ndev *mctp_pcc_ndev;
>> + struct mctp_pcc_mailbox *inbox;
>> + struct mctp_skb_cb *cb;
>> + struct sk_buff *skb;
>> + int size;
>> +
>> + mctp_pcc_ndev = container_of(cl, struct mctp_pcc_ndev, inbox.client);
>> + inbox = &mctp_pcc_ndev->inbox;
>> + size = pcc_mbox_query_bytes_available(inbox->chan);
>> + if (size == 0)
>> + return;
>> + skb = netdev_alloc_skb(mctp_pcc_ndev->ndev, size);
>> + if (!skb) {
>> + dev_dstats_rx_dropped(mctp_pcc_ndev->ndev);
>> + return;
>> + }
>> + skb_put(skb, size);
>> + skb->protocol = htons(ETH_P_MCTP);
>> + pcc_mbox_read_from_buffer(inbox->chan, size, skb->data);
>> + dev_dstats_rx_add(mctp_pcc_ndev->ndev, skb->len);
>> + skb_reset_mac_header(skb);
>> + skb_pull(skb, sizeof(pcc_header));
>> + skb_reset_network_header(skb);
>> + cb = __mctp_cb(skb);
>> + cb->halen = 0;
>> + netif_rx(skb);
>> +}
>> +
>> +static netdev_tx_t mctp_pcc_tx(struct sk_buff *skb, struct net_device *ndev)
>> +{
>> + struct mctp_pcc_ndev *mpnd = netdev_priv(ndev);
>> + struct pcc_extended_header *pcc_header;
>> + int len = skb->len;
>> + int rc;
>> +
>> + rc = skb_cow_head(skb, sizeof(*pcc_header));
>> + if (rc) {
>> + dev_dstats_tx_dropped(ndev);
>> + kfree_skb(skb);
>> + return NETDEV_TX_OK;
>> + }
>> +
>> + pcc_header = skb_push(skb, sizeof(*pcc_header));
>> + pcc_header->signature = PCC_SIGNATURE | mpnd->outbox.index;
>> + pcc_header->flags = PCC_CMD_COMPLETION_NOTIFY;
>> + memcpy(&pcc_header->command, MCTP_SIGNATURE, MCTP_SIGNATURE_LENGTH);
>> + pcc_header->length = len + MCTP_SIGNATURE_LENGTH;
>> + rc = mbox_send_message(mpnd->outbox.chan->mchan, skb);
>> +
>> + if (rc < 0) {
>> + netif_stop_queue(ndev);
>> + return NETDEV_TX_BUSY;
>> + }
> super minor: the grouping here (and a couple of other places) is a bit
> strange; try and keep the rc handler together with where rc is set, and
> the setup and send distinct. I'd typically do this as:
>
> pcc_header = skb_push(skb, sizeof(*pcc_header));
> pcc_header->signature = PCC_SIGNATURE | mpnd->outbox.index;
> pcc_header->flags = PCC_CMD_COMPLETION_NOTIFY;
> memcpy(&pcc_header->command, MCTP_SIGNATURE, MCTP_SIGNATURE_LENGTH);
> pcc_header->length = len + MCTP_SIGNATURE_LENGTH;
>
> rc = mbox_send_message(mpnd->outbox.chan->mchan, skb);
> if (rc < 0) {
> netif_stop_queue(ndev);
> return NETDEV_TX_BUSY;
> }
OK I can see the rationale. I do find that easier to read.
>
>> +
>> + dev_dstats_tx_add(ndev, len);
>> + return NETDEV_TX_OK;
>> +}
>> +
>> +static void mctp_pcc_tx_prepare(struct mbox_client *cl, void *mssg)
>> +{
>> + struct mctp_pcc_ndev *mctp_pcc_ndev;
>> + struct mctp_pcc_mailbox *outbox;
>> + struct sk_buff *skb = mssg;
>> + int len_sent;
>> +
>> + mctp_pcc_ndev = container_of(cl, struct mctp_pcc_ndev, outbox.client);
>> + outbox = &mctp_pcc_ndev->outbox;
>> +
>> + if (!skb)
>> + return;
>> +
>> + len_sent = pcc_mbox_write_to_buffer(outbox->chan, skb->len, skb->data);
>> + if (len_sent == 0)
>> + pr_info("packet dropped");
> You probably don't want a bare pr_info() in the failure path here;
> either something debug level, or ratelimited.
>
> and probably use a netdev-specific log function (netdev_dbg() perhaps),
> so you get the device information too.
>
> I'm not super clear on this failure mode, do you need to update the
> tx statistics on the drop here? In which case, you may not need the
> _dbg/_info message at all.
Honestly, this is a pretty bad case, unlike, say, a UDP packet drop,
this implies that something in the system is very much not aligned with
the users expectations. I can go with a netdev specific, but I think
this should be info level at least.
>
>> +}
>> +
>> +static void mctp_pcc_tx_done(struct mbox_client *c, void *mssg, int r)
>> +{
>> + struct mctp_pcc_ndev *mctp_pcc_ndev;
>> + struct mctp_pcc_mailbox *outbox;
>> + struct sk_buff *skb = mssg;
> Nice, no more list lookups.
Yep.
>
>> +
>> + mctp_pcc_ndev = container_of(c, struct mctp_pcc_ndev, outbox.client);
>> + outbox = container_of(c, struct mctp_pcc_mailbox, client);
>> + if (skb)
>> + dev_consume_skb_any(skb);
> minor: dev_consume_skb_any() will tolerate a null argument, you can
> leave out the conditional here.
OK
>
>> + netif_wake_queue(mctp_pcc_ndev->ndev);
>> +}
>> +
>> +static int mctp_pcc_ndo_open(struct net_device *ndev)
>> +{
>> + struct mctp_pcc_ndev *mctp_pcc_ndev = netdev_priv(ndev);
>> + struct mctp_pcc_mailbox *outbox, *inbox;
>> +
>> + outbox = &mctp_pcc_ndev->outbox;
>> + inbox = &mctp_pcc_ndev->inbox;
>> +
>> + outbox->chan = pcc_mbox_request_channel(&outbox->client, outbox->index);
>> + if (IS_ERR(outbox->chan))
>> + return PTR_ERR(outbox->chan);
>> +
>> + inbox->client.rx_callback = mctp_pcc_client_rx_callback;
>> + inbox->chan = pcc_mbox_request_channel(&inbox->client, inbox->index);
>> + if (IS_ERR(inbox->chan)) {
>> + pcc_mbox_free_channel(outbox->chan);
>> + return PTR_ERR(inbox->chan);
>> + }
>> + return 0;
>> +}
> Having the channels fully set-up before calling request_channel looks
> much safer now :)
This actually showed that the same problem would have happened in
setting MTU. It is why we need and accessor to the shared buffer size.
>
>> +
>> +static int mctp_pcc_ndo_stop(struct net_device *ndev)
>> +{
>> + struct mctp_pcc_ndev *mctp_pcc_ndev = netdev_priv(ndev);
>> +
>> + pcc_mbox_free_channel(mctp_pcc_ndev->outbox.chan);
>> + pcc_mbox_free_channel(mctp_pcc_ndev->inbox.chan);
>> + return 0;
>> +}
>> +
>> +static const struct net_device_ops mctp_pcc_netdev_ops = {
>> + .ndo_open = mctp_pcc_ndo_open,
>> + .ndo_stop = mctp_pcc_ndo_stop,
>> + .ndo_start_xmit = mctp_pcc_tx,
>> +};
>> +
>> +static void mctp_pcc_setup(struct net_device *ndev)
>> +{
>> + ndev->type = ARPHRD_MCTP;
>> + ndev->hard_header_len = 0;
> Isn't this sizeof(struct pcc_extended_header) ?
Yes it is.
>
>> + ndev->tx_queue_len = 0;
>> + ndev->flags = IFF_NOARP;
>> + ndev->netdev_ops = &mctp_pcc_netdev_ops;
>> + ndev->needs_free_netdev = true;
>> + ndev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS;
>> +}
>> +
>> +struct mctp_pcc_lookup_context {
>> + int index;
>> + u32 inbox_index;
>> + u32 outbox_index;
>> +};
>> +
>> +static acpi_status lookup_pcct_indices(struct acpi_resource *ares,
>> + void *context)
>> +{
>> + struct mctp_pcc_lookup_context *luc = context;
>> + struct acpi_resource_address32 *addr;
>> +
>> + if (ares->type != PCC_DWORD_TYPE)
>> + return AE_OK;
>> +
>> + addr = ACPI_CAST_PTR(struct acpi_resource_address32, &ares->data);
>> + switch (luc->index) {
>> + case 0:
>> + luc->outbox_index = addr[0].address.minimum;
>> + break;
>> + case 1:
>> + luc->inbox_index = addr[0].address.minimum;
>> + break;
>> + }
>> + luc->index++;
>> + return AE_OK;
>> +}
>> +
>> +static void mctp_cleanup_netdev(void *data)
>> +{
>> + struct net_device *ndev = data;
>> +
>> + mctp_unregister_netdev(ndev);
>> +}
>> +
>> +static int initialize_MTU(struct net_device *ndev)
>> +{
>> + struct mctp_pcc_ndev *mctp_pcc_ndev = netdev_priv(ndev);
>> + struct mctp_pcc_mailbox *outbox;
>> + int mctp_pcc_mtu;
>> +
>> + outbox = &mctp_pcc_ndev->outbox;
>> + mctp_pcc_mtu = pcc_mbox_buffer_size(outbox->index);
>> + if (mctp_pcc_mtu == -1)
>> + return -1;
> You may want to check that this is at least
> sizeof(struct pcc_extended_header), to avoid an underflow below, and
> possibly at least MCTP_MIN_MTU after subtracting the header size (as
> this would be less than the MCTP BTU)
And if it is? I think that is a sign of a hardware problem, beyond the
scope of what we can fix here. No matter what, the interface will be
unusable.
>
>> +
>> + mctp_pcc_mtu = mctp_pcc_mtu - sizeof(struct pcc_extended_header);
>> + mctp_pcc_ndev = netdev_priv(ndev);
> unneeded?
Yep, and gone....
>
>> + ndev->mtu = MCTP_MIN_MTU;
>> + ndev->max_mtu = mctp_pcc_mtu;
>> + ndev->min_mtu = MCTP_MIN_MTU;
>> +
>> + return 0;
>> +}
> Now that the initialize_MTU implementation is simpler, you may want to
> reinstate it inline in driver_add(), but also fine as-is.
>
>> +
>> +static int mctp_pcc_driver_add(struct acpi_device *acpi_dev)
>> +{
>> + struct mctp_pcc_lookup_context context = {0};
>> + struct mctp_pcc_ndev *mctp_pcc_ndev;
>> + struct device *dev = &acpi_dev->dev;
>> + struct net_device *ndev;
>> + acpi_handle dev_handle;
>> + acpi_status status;
>> + char name[32];
>> + int rc;
>> +
>> + dev_dbg(dev, "Adding mctp_pcc device for HID %s\n",
>> + acpi_device_hid(acpi_dev));
>> + dev_handle = acpi_device_handle(acpi_dev);
>> + status = acpi_walk_resources(dev_handle, "_CRS", lookup_pcct_indices,
>> + &context);
>> + if (!ACPI_SUCCESS(status)) {
>> + dev_err(dev, "FAILED to lookup PCC indexes from CRS\n");
>> + return -EINVAL;
>> + }
>> +
>> + snprintf(name, sizeof(name), "mctppcc%d", context.inbox_index);
>> + ndev = alloc_netdev(sizeof(*mctp_pcc_ndev), name, NET_NAME_PREDICTABLE,
>> + mctp_pcc_setup);
>> + if (!ndev)
>> + return -ENOMEM;
>> +
>> + mctp_pcc_ndev = netdev_priv(ndev);
>> +
>> + mctp_pcc_ndev->inbox.index = context.inbox_index;
>> + mctp_pcc_ndev->inbox.client.dev = dev;
>> + mctp_pcc_ndev->outbox.index = context.outbox_index;
>> + mctp_pcc_ndev->outbox.client.dev = dev;
>> +
>> + mctp_pcc_ndev->outbox.client.tx_prepare = mctp_pcc_tx_prepare;
>> + mctp_pcc_ndev->outbox.client.tx_done = mctp_pcc_tx_done;
>> + mctp_pcc_ndev->acpi_device = acpi_dev;
>> + mctp_pcc_ndev->ndev = ndev;
>> + acpi_dev->driver_data = mctp_pcc_ndev;
>> +
>> + rc = initialize_MTU(ndev);
>> + if (rc)
>> + goto free_netdev;
>> +
>> + rc = mctp_register_netdev(ndev, NULL, MCTP_PHYS_BINDING_PCC);
>> + if (rc)
>> + goto free_netdev;
>> +
>> + return devm_add_action_or_reset(dev, mctp_cleanup_netdev, ndev);
>> +free_netdev:
>> + free_netdev(ndev);
>> + return rc;
>> +}
>> +
>> +static const struct acpi_device_id mctp_pcc_device_ids[] = {
>> + { "DMT0001" },
>> + {}
>> +};
>> +
>> +static struct acpi_driver mctp_pcc_driver = {
>> + .name = "mctp_pcc",
>> + .class = "Unknown",
>> + .ids = mctp_pcc_device_ids,
>> + .ops = {
>> + .add = mctp_pcc_driver_add,
>> + },
>> +};
>> +
>> +module_acpi_driver(mctp_pcc_driver);
>> +
>> +MODULE_DEVICE_TABLE(acpi, mctp_pcc_device_ids);
>> +
>> +MODULE_DESCRIPTION("MCTP PCC ACPI device");
>> +MODULE_LICENSE("GPL");
>> +MODULE_AUTHOR("Adam Young <admiyo@os.amperecomputing.com>");
> Cheers,
>
>
> Jeremy
© 2016 - 2025 Red Hat, Inc.