[PATCH net-next v3 3/3] net: ti: icssg-prueth: Add XDP support

Meghana Malladi posted 3 patches 11 months, 2 weeks ago
There is a newer version of this series
[PATCH net-next v3 3/3] net: ti: icssg-prueth: Add XDP support
Posted by Meghana Malladi 11 months, 2 weeks ago
From: Roger Quadros <rogerq@kernel.org>

Add native XDP support. We do not support zero copy yet.

Signed-off-by: Roger Quadros <rogerq@kernel.org>
Signed-off-by: MD Danish Anwar <danishanwar@ti.com>
Signed-off-by: Meghana Malladi <m-malladi@ti.com>
---
Changes since v2 (v3-v2):
- Use page_pool contained in the page instead of using passing page_pool
(rx_chn) as part of swdata
- dev_sw_netstats_tx_add() instead of incrementing the stats directly
- Add missing ndev->stats.tx_dropped++ wherever applicable
- Move k3_cppi_desc_pool_alloc() before the DMA mapping for easier cleanup
on failure
- Replace rxp->napi_id with emac->napi_rx.napi_id in prueth_create_xdp_rxqs()

All the above changes have been suggested by Roger Quadros <rogerq@kernel.org>

 drivers/net/ethernet/ti/icssg/icssg_common.c | 219 +++++++++++++++++--
 drivers/net/ethernet/ti/icssg/icssg_prueth.c | 125 ++++++++++-
 drivers/net/ethernet/ti/icssg/icssg_prueth.h |  17 ++
 3 files changed, 346 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c
index 01eeabe83eff..4716e24ea05d 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_common.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_common.c
@@ -98,11 +98,19 @@ void prueth_xmit_free(struct prueth_tx_chn *tx_chn,
 {
 	struct cppi5_host_desc_t *first_desc, *next_desc;
 	dma_addr_t buf_dma, next_desc_dma;
+	struct prueth_swdata *swdata;
 	u32 buf_dma_len;
 
 	first_desc = desc;
 	next_desc = first_desc;
 
+	swdata = cppi5_hdesc_get_swdata(desc);
+	if (swdata->type == PRUETH_SWDATA_PAGE) {
+		page_pool_recycle_direct(swdata->data.page->pp,
+					 swdata->data.page);
+		goto free_desc;
+	}
+
 	cppi5_hdesc_get_obuf(first_desc, &buf_dma, &buf_dma_len);
 	k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma);
 
@@ -126,6 +134,7 @@ void prueth_xmit_free(struct prueth_tx_chn *tx_chn,
 		k3_cppi_desc_pool_free(tx_chn->desc_pool, next_desc);
 	}
 
+free_desc:
 	k3_cppi_desc_pool_free(tx_chn->desc_pool, first_desc);
 }
 EXPORT_SYMBOL_GPL(prueth_xmit_free);
@@ -139,6 +148,7 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn,
 	struct prueth_swdata *swdata;
 	struct prueth_tx_chn *tx_chn;
 	unsigned int total_bytes = 0;
+	struct xdp_frame *xdpf;
 	struct sk_buff *skb;
 	dma_addr_t desc_dma;
 	int res, num_tx = 0;
@@ -168,21 +178,28 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn,
 			continue;
 		}
 
-		if (swdata->type != PRUETH_SWDATA_SKB) {
+		switch (swdata->type) {
+		case PRUETH_SWDATA_SKB:
+			skb = swdata->data.skb;
+			dev_sw_netstats_tx_add(skb->dev, 1, skb->len);
+			total_bytes += skb->len;
+			napi_consume_skb(skb, budget);
+			break;
+		case PRUETH_SWDATA_XDPF:
+			xdpf = swdata->data.xdpf;
+			dev_sw_netstats_tx_add(ndev, 1, xdpf->len);
+			total_bytes += xdpf->len;
+			xdp_return_frame(xdpf);
+			break;
+		default:
 			netdev_err(ndev, "tx_complete: invalid swdata type %d\n", swdata->type);
 			prueth_xmit_free(tx_chn, desc_tx);
+			ndev->stats.tx_dropped++;
 			budget++;
 			continue;
 		}
 
-		skb = swdata->data.skb;
 		prueth_xmit_free(tx_chn, desc_tx);
-
-		ndev = skb->dev;
-		ndev->stats.tx_packets++;
-		ndev->stats.tx_bytes += skb->len;
-		total_bytes += skb->len;
-		napi_consume_skb(skb, budget);
 		num_tx++;
 	}
 
@@ -541,7 +558,153 @@ void emac_rx_timestamp(struct prueth_emac *emac,
 	ssh->hwtstamp = ns_to_ktime(ns);
 }
 
-static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
+/**
+ * emac_xmit_xdp_frame - transmits an XDP frame
+ * @emac: emac device
+ * @xdpf: data to transmit
+ * @page: page from page pool if already DMA mapped
+ * @q_idx: queue id
+ *
+ * Return: XDP state
+ */
+int emac_xmit_xdp_frame(struct prueth_emac *emac,
+			struct xdp_frame *xdpf,
+			struct page *page,
+			unsigned int q_idx)
+{
+	struct cppi5_host_desc_t *first_desc;
+	struct net_device *ndev = emac->ndev;
+	struct prueth_tx_chn *tx_chn;
+	dma_addr_t desc_dma, buf_dma;
+	struct prueth_swdata *swdata;
+	u32 *epib;
+	int ret;
+
+	void *data = xdpf->data;
+	u32 pkt_len = xdpf->len;
+
+	if (q_idx >= PRUETH_MAX_TX_QUEUES) {
+		netdev_err(ndev, "xdp tx: invalid q_id %d\n", q_idx);
+		return ICSSG_XDP_CONSUMED;	/* drop */
+	}
+
+	tx_chn = &emac->tx_chns[q_idx];
+
+	first_desc = k3_cppi_desc_pool_alloc(tx_chn->desc_pool);
+	if (!first_desc) {
+		netdev_dbg(ndev, "xdp tx: failed to allocate descriptor\n");
+		goto drop_free_descs;	/* drop */
+	}
+
+	if (page) { /* already DMA mapped by page_pool */
+		buf_dma = page_pool_get_dma_addr(page);
+		buf_dma += xdpf->headroom + sizeof(struct xdp_frame);
+	} else { /* Map the linear buffer */
+		buf_dma = dma_map_single(tx_chn->dma_dev, data, pkt_len, DMA_TO_DEVICE);
+		if (dma_mapping_error(tx_chn->dma_dev, buf_dma)) {
+			netdev_err(ndev, "xdp tx: failed to map data buffer\n");
+			goto drop_free_descs;	/* drop */
+		}
+	}
+
+	cppi5_hdesc_init(first_desc, CPPI5_INFO0_HDESC_EPIB_PRESENT,
+			 PRUETH_NAV_PS_DATA_SIZE);
+	cppi5_hdesc_set_pkttype(first_desc, 0);
+	epib = first_desc->epib;
+	epib[0] = 0;
+	epib[1] = 0;
+
+	/* set dst tag to indicate internal qid at the firmware which is at
+	 * bit8..bit15. bit0..bit7 indicates port num for directed
+	 * packets in case of switch mode operation
+	 */
+	cppi5_desc_set_tags_ids(&first_desc->hdr, 0, (emac->port_id | (q_idx << 8)));
+	k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &buf_dma);
+	cppi5_hdesc_attach_buf(first_desc, buf_dma, pkt_len, buf_dma, pkt_len);
+	swdata = cppi5_hdesc_get_swdata(first_desc);
+	if (page) {
+		swdata->type = PRUETH_SWDATA_PAGE;
+		swdata->data.page = page;
+	} else {
+		swdata->type = PRUETH_SWDATA_XDPF;
+		swdata->data.xdpf = xdpf;
+	}
+
+	cppi5_hdesc_set_pktlen(first_desc, pkt_len);
+	desc_dma = k3_cppi_desc_pool_virt2dma(tx_chn->desc_pool, first_desc);
+
+	ret = k3_udma_glue_push_tx_chn(tx_chn->tx_chn, first_desc, desc_dma);
+	if (ret) {
+		netdev_err(ndev, "xdp tx: push failed: %d\n", ret);
+		goto drop_free_descs;
+	}
+
+	return ICSSG_XDP_TX;
+
+drop_free_descs:
+	prueth_xmit_free(tx_chn, first_desc);
+	return ICSSG_XDP_CONSUMED;
+}
+EXPORT_SYMBOL_GPL(emac_xmit_xdp_frame);
+
+/**
+ * emac_run_xdp - run an XDP program
+ * @emac: emac device
+ * @xdp: XDP buffer containing the frame
+ * @page: page with RX data if already DMA mapped
+ *
+ * Return: XDP state
+ */
+static int emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp,
+			struct page *page)
+{
+	struct net_device *ndev = emac->ndev;
+	int err, result = ICSSG_XDP_PASS;
+	struct bpf_prog *xdp_prog;
+	struct xdp_frame *xdpf;
+	int q_idx;
+	u32 act;
+
+	xdp_prog = READ_ONCE(emac->xdp_prog);
+	act = bpf_prog_run_xdp(xdp_prog, xdp);
+	switch (act) {
+	case XDP_PASS:
+		break;
+	case XDP_TX:
+		/* Send packet to TX ring for immediate transmission */
+		xdpf = xdp_convert_buff_to_frame(xdp);
+		if (unlikely(!xdpf))
+			goto drop;
+
+		q_idx = smp_processor_id() % emac->tx_ch_num;
+		result = emac_xmit_xdp_frame(emac, xdpf, page, q_idx);
+		if (result == ICSSG_XDP_CONSUMED)
+			goto drop;
+		break;
+	case XDP_REDIRECT:
+		err = xdp_do_redirect(emac->ndev, xdp, xdp_prog);
+		if (err)
+			goto drop;
+		result = ICSSG_XDP_REDIR;
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(emac->ndev, xdp_prog, act);
+		fallthrough;
+	case XDP_ABORTED:
+drop:
+		trace_xdp_exception(emac->ndev, xdp_prog, act);
+		fallthrough; /* handle aborts by dropping packet */
+	case XDP_DROP:
+		ndev->stats.tx_dropped++;
+		result = ICSSG_XDP_CONSUMED;
+		page_pool_recycle_direct(emac->rx_chns.pg_pool, page);
+		break;
+	}
+
+	return result;
+}
+
+static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, int *xdp_state)
 {
 	struct prueth_rx_chn *rx_chn = &emac->rx_chns;
 	u32 buf_dma_len, pkt_len, port_id = 0;
@@ -552,10 +715,12 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
 	struct page *page, *new_page;
 	struct page_pool *pool;
 	struct sk_buff *skb;
+	struct xdp_buff xdp;
 	u32 *psdata;
 	void *pa;
 	int ret;
 
+	*xdp_state = 0;
 	pool = rx_chn->pg_pool;
 	ret = k3_udma_glue_pop_rx_chn(rx_chn->rx_chn, flow_id, &desc_dma);
 	if (ret) {
@@ -596,9 +761,21 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
 		goto requeue;
 	}
 
-	/* prepare skb and send to n/w stack */
 	pa = page_address(page);
-	skb = napi_build_skb(pa, PAGE_SIZE);
+	if (emac->xdp_prog) {
+		xdp_init_buff(&xdp, PAGE_SIZE, &rx_chn->xdp_rxq);
+		xdp_prepare_buff(&xdp, pa, PRUETH_HEADROOM, pkt_len, false);
+
+		*xdp_state = emac_run_xdp(emac, &xdp, page);
+		if (*xdp_state == ICSSG_XDP_PASS)
+			skb = xdp_build_skb_from_buff(&xdp);
+		else
+			goto requeue;
+	} else {
+		/* prepare skb and send to n/w stack */
+		skb = napi_build_skb(pa, PAGE_SIZE);
+	}
+
 	if (!skb) {
 		ndev->stats.rx_dropped++;
 		page_pool_recycle_direct(pool, page);
@@ -861,13 +1038,23 @@ static void prueth_tx_cleanup(void *data, dma_addr_t desc_dma)
 	struct prueth_tx_chn *tx_chn = data;
 	struct cppi5_host_desc_t *desc_tx;
 	struct prueth_swdata *swdata;
+	struct xdp_frame *xdpf;
 	struct sk_buff *skb;
 
 	desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, desc_dma);
 	swdata = cppi5_hdesc_get_swdata(desc_tx);
-	if (swdata->type == PRUETH_SWDATA_SKB) {
+
+	switch (swdata->type) {
+	case PRUETH_SWDATA_SKB:
 		skb = swdata->data.skb;
 		dev_kfree_skb_any(skb);
+		break;
+	case PRUETH_SWDATA_XDPF:
+		xdpf = swdata->data.xdpf;
+		xdp_return_frame(xdpf);
+		break;
+	default:
+		break;
 	}
 
 	prueth_xmit_free(tx_chn, desc_tx);
@@ -904,15 +1091,18 @@ int icssg_napi_rx_poll(struct napi_struct *napi_rx, int budget)
 		PRUETH_RX_FLOW_DATA_SR1 : PRUETH_RX_FLOW_DATA;
 	int flow = emac->is_sr1 ?
 		PRUETH_MAX_RX_FLOWS_SR1 : PRUETH_MAX_RX_FLOWS;
+	int xdp_state_or = 0;
 	int num_rx = 0;
 	int cur_budget;
+	int xdp_state;
 	int ret;
 
 	while (flow--) {
 		cur_budget = budget - num_rx;
 
 		while (cur_budget--) {
-			ret = emac_rx_packet(emac, flow);
+			ret = emac_rx_packet(emac, flow, &xdp_state);
+			xdp_state_or |= xdp_state;
 			if (ret)
 				break;
 			num_rx++;
@@ -922,6 +1112,9 @@ int icssg_napi_rx_poll(struct napi_struct *napi_rx, int budget)
 			break;
 	}
 
+	if (xdp_state_or & ICSSG_XDP_REDIR)
+		xdp_do_flush();
+
 	if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) {
 		if (unlikely(emac->rx_pace_timeout_ns)) {
 			hrtimer_start(&emac->rx_hrtimer,
diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
index 3ff8c322f9d9..1acbf9e1bade 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
@@ -559,6 +559,33 @@ const struct icss_iep_clockops prueth_iep_clockops = {
 	.perout_enable = prueth_perout_enable,
 };
 
+static int prueth_create_xdp_rxqs(struct prueth_emac *emac)
+{
+	struct xdp_rxq_info *rxq = &emac->rx_chns.xdp_rxq;
+	struct page_pool *pool = emac->rx_chns.pg_pool;
+	int ret;
+
+	ret = xdp_rxq_info_reg(rxq, emac->ndev, 0, emac->napi_rx.napi_id);
+	if (ret)
+		return ret;
+
+	ret = xdp_rxq_info_reg_mem_model(rxq, MEM_TYPE_PAGE_POOL, pool);
+	if (ret)
+		xdp_rxq_info_unreg(rxq);
+
+	return ret;
+}
+
+static void prueth_destroy_xdp_rxqs(struct prueth_emac *emac)
+{
+	struct xdp_rxq_info *rxq = &emac->rx_chns.xdp_rxq;
+
+	if (!xdp_rxq_info_is_reg(rxq))
+		return;
+
+	xdp_rxq_info_unreg(rxq);
+}
+
 static int icssg_prueth_add_mcast(struct net_device *ndev, const u8 *addr)
 {
 	struct net_device *real_dev;
@@ -780,10 +807,14 @@ static int emac_ndo_open(struct net_device *ndev)
 	if (ret)
 		goto free_tx_ts_irq;
 
-	ret = k3_udma_glue_enable_rx_chn(emac->rx_chns.rx_chn);
+	ret = prueth_create_xdp_rxqs(emac);
 	if (ret)
 		goto reset_rx_chn;
 
+	ret = k3_udma_glue_enable_rx_chn(emac->rx_chns.rx_chn);
+	if (ret)
+		goto destroy_xdp_rxqs;
+
 	for (i = 0; i < emac->tx_ch_num; i++) {
 		ret = k3_udma_glue_enable_tx_chn(emac->tx_chns[i].tx_chn);
 		if (ret)
@@ -809,6 +840,8 @@ static int emac_ndo_open(struct net_device *ndev)
 	 * any SKB for completion. So set false to free_skb
 	 */
 	prueth_reset_tx_chan(emac, i, false);
+destroy_xdp_rxqs:
+	prueth_destroy_xdp_rxqs(emac);
 reset_rx_chn:
 	prueth_reset_rx_chan(&emac->rx_chns, max_rx_flows, false);
 free_tx_ts_irq:
@@ -879,7 +912,7 @@ static int emac_ndo_stop(struct net_device *ndev)
 	k3_udma_glue_tdown_rx_chn(emac->rx_chns.rx_chn, true);
 
 	prueth_reset_rx_chan(&emac->rx_chns, max_rx_flows, true);
-
+	prueth_destroy_xdp_rxqs(emac);
 	napi_disable(&emac->napi_rx);
 	hrtimer_cancel(&emac->rx_hrtimer);
 
@@ -1024,6 +1057,90 @@ static int emac_ndo_vlan_rx_del_vid(struct net_device *ndev,
 	return 0;
 }
 
+/**
+ * emac_xdp_xmit - Implements ndo_xdp_xmit
+ * @dev: netdev
+ * @n: number of frames
+ * @frames: array of XDP buffer pointers
+ * @flags: XDP extra info
+ *
+ * Return: number of frames successfully sent. Failed frames
+ * will be free'ed by XDP core.
+ *
+ * For error cases, a negative errno code is returned and no-frames
+ * are transmitted (caller must handle freeing frames).
+ **/
+static int emac_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+			 u32 flags)
+{
+	struct prueth_emac *emac = netdev_priv(dev);
+	unsigned int q_idx;
+	int nxmit = 0;
+	int i;
+
+	q_idx = smp_processor_id() % emac->tx_ch_num;
+
+	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+		return -EINVAL;
+
+	for (i = 0; i < n; i++) {
+		struct xdp_frame *xdpf = frames[i];
+		int err;
+
+		err = emac_xmit_xdp_frame(emac, xdpf, NULL, q_idx);
+		if (err != ICSSG_XDP_TX)
+			break;
+		nxmit++;
+	}
+
+	return nxmit;
+}
+
+/**
+ * emac_xdp_setup - add/remove an XDP program
+ * @emac: emac device
+ * @bpf: XDP program
+ *
+ * Return: Always 0 (Success)
+ **/
+static int emac_xdp_setup(struct prueth_emac *emac, struct netdev_bpf *bpf)
+{
+	struct bpf_prog *prog = bpf->prog;
+	xdp_features_t val;
+
+	val = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+	      NETDEV_XDP_ACT_NDO_XMIT;
+	xdp_set_features_flag(emac->ndev, val);
+
+	if (!emac->xdpi.prog && !prog)
+		return 0;
+
+	WRITE_ONCE(emac->xdp_prog, prog);
+
+	xdp_attachment_setup(&emac->xdpi, bpf);
+
+	return 0;
+}
+
+/**
+ * emac_ndo_bpf - implements ndo_bpf for icssg_prueth
+ * @ndev: network adapter device
+ * @bpf: XDP program
+ *
+ * Return: 0 on success, error code on failure.
+ **/
+static int emac_ndo_bpf(struct net_device *ndev, struct netdev_bpf *bpf)
+{
+	struct prueth_emac *emac = netdev_priv(ndev);
+
+	switch (bpf->command) {
+	case XDP_SETUP_PROG:
+		return emac_xdp_setup(emac, bpf);
+	default:
+		return -EINVAL;
+	}
+}
+
 static const struct net_device_ops emac_netdev_ops = {
 	.ndo_open = emac_ndo_open,
 	.ndo_stop = emac_ndo_stop,
@@ -1038,6 +1155,8 @@ static const struct net_device_ops emac_netdev_ops = {
 	.ndo_fix_features = emac_ndo_fix_features,
 	.ndo_vlan_rx_add_vid = emac_ndo_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid = emac_ndo_vlan_rx_del_vid,
+	.ndo_bpf = emac_ndo_bpf,
+	.ndo_xdp_xmit = emac_xdp_xmit,
 };
 
 static int prueth_netdev_init(struct prueth *prueth,
@@ -1066,6 +1185,8 @@ static int prueth_netdev_init(struct prueth *prueth,
 	emac->prueth = prueth;
 	emac->ndev = ndev;
 	emac->port_id = port;
+	emac->xdp_prog = NULL;
+	emac->ndev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
 	emac->cmd_wq = create_singlethread_workqueue("icssg_cmd_wq");
 	if (!emac->cmd_wq) {
 		ret = -ENOMEM;
diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h
index 3bbabd007129..0675919beb94 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h
+++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h
@@ -8,6 +8,8 @@
 #ifndef __NET_TI_ICSSG_PRUETH_H
 #define __NET_TI_ICSSG_PRUETH_H
 
+#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
 #include <linux/etherdevice.h>
 #include <linux/genalloc.h>
 #include <linux/if_vlan.h>
@@ -134,6 +136,7 @@ struct prueth_rx_chn {
 	unsigned int irq[ICSSG_MAX_RFLOWS];	/* separate irq per flow */
 	char name[32];
 	struct page_pool *pg_pool;
+	struct xdp_rxq_info xdp_rxq;
 };
 
 enum prueth_swdata_type {
@@ -141,6 +144,7 @@ enum prueth_swdata_type {
 	PRUETH_SWDATA_SKB,
 	PRUETH_SWDATA_PAGE,
 	PRUETH_SWDATA_CMD,
+	PRUETH_SWDATA_XDPF,
 };
 
 struct prueth_swdata {
@@ -149,6 +153,7 @@ struct prueth_swdata {
 		struct sk_buff *skb;
 		struct page *page;
 		u32 cmd;
+		struct xdp_frame *xdpf;
 	} data;
 };
 
@@ -159,6 +164,12 @@ struct prueth_swdata {
 
 #define PRUETH_MAX_TX_TS_REQUESTS	50 /* Max simultaneous TX_TS requests */
 
+/* XDP BPF state */
+#define ICSSG_XDP_PASS           0
+#define ICSSG_XDP_CONSUMED       BIT(0)
+#define ICSSG_XDP_TX             BIT(1)
+#define ICSSG_XDP_REDIR          BIT(2)
+
 /* Minimum coalesce time in usecs for both Tx and Rx */
 #define ICSSG_MIN_COALESCE_USECS 20
 
@@ -227,6 +238,8 @@ struct prueth_emac {
 	unsigned long rx_pace_timeout_ns;
 
 	struct netdev_hw_addr_list vlan_mcast_list[MAX_VLAN_ID];
+	struct bpf_prog *xdp_prog;
+	struct xdp_attachment_info xdpi;
 };
 
 /* The buf includes headroom compatible with both skb and xdpf */
@@ -465,5 +478,9 @@ void prueth_put_cores(struct prueth *prueth, int slice);
 
 /* Revision specific helper */
 u64 icssg_ts_to_ns(u32 hi_sw, u32 hi, u32 lo, u32 cycle_time_ns);
+int emac_xmit_xdp_frame(struct prueth_emac *emac,
+			struct xdp_frame *xdpf,
+			struct page *page,
+			unsigned int q_idx);
 
 #endif /* __NET_TI_ICSSG_PRUETH_H */
-- 
2.43.0
Re: [PATCH net-next v3 3/3] net: ti: icssg-prueth: Add XDP support
Posted by Roger Quadros 11 months, 2 weeks ago

On 24/02/2025 13:01, Meghana Malladi wrote:
> From: Roger Quadros <rogerq@kernel.org>
> 
> Add native XDP support. We do not support zero copy yet.
> 
> Signed-off-by: Roger Quadros <rogerq@kernel.org>
> Signed-off-by: MD Danish Anwar <danishanwar@ti.com>
> Signed-off-by: Meghana Malladi <m-malladi@ti.com>
> ---
> Changes since v2 (v3-v2):
> - Use page_pool contained in the page instead of using passing page_pool
> (rx_chn) as part of swdata
> - dev_sw_netstats_tx_add() instead of incrementing the stats directly
> - Add missing ndev->stats.tx_dropped++ wherever applicable
> - Move k3_cppi_desc_pool_alloc() before the DMA mapping for easier cleanup
> on failure
> - Replace rxp->napi_id with emac->napi_rx.napi_id in prueth_create_xdp_rxqs()
> 
> All the above changes have been suggested by Roger Quadros <rogerq@kernel.org>
> 
>  drivers/net/ethernet/ti/icssg/icssg_common.c | 219 +++++++++++++++++--
>  drivers/net/ethernet/ti/icssg/icssg_prueth.c | 125 ++++++++++-
>  drivers/net/ethernet/ti/icssg/icssg_prueth.h |  17 ++
>  3 files changed, 346 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c
> index 01eeabe83eff..4716e24ea05d 100644
> --- a/drivers/net/ethernet/ti/icssg/icssg_common.c
> +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c
> @@ -98,11 +98,19 @@ void prueth_xmit_free(struct prueth_tx_chn *tx_chn,
>  {
>  	struct cppi5_host_desc_t *first_desc, *next_desc;
>  	dma_addr_t buf_dma, next_desc_dma;
> +	struct prueth_swdata *swdata;
>  	u32 buf_dma_len;
>  
>  	first_desc = desc;
>  	next_desc = first_desc;
>  
> +	swdata = cppi5_hdesc_get_swdata(desc);
> +	if (swdata->type == PRUETH_SWDATA_PAGE) {
> +		page_pool_recycle_direct(swdata->data.page->pp,
> +					 swdata->data.page);
> +		goto free_desc;
> +	}
> +
>  	cppi5_hdesc_get_obuf(first_desc, &buf_dma, &buf_dma_len);
>  	k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma);
>  
> @@ -126,6 +134,7 @@ void prueth_xmit_free(struct prueth_tx_chn *tx_chn,
>  		k3_cppi_desc_pool_free(tx_chn->desc_pool, next_desc);
>  	}
>  
> +free_desc:
>  	k3_cppi_desc_pool_free(tx_chn->desc_pool, first_desc);
>  }
>  EXPORT_SYMBOL_GPL(prueth_xmit_free);
> @@ -139,6 +148,7 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn,
>  	struct prueth_swdata *swdata;
>  	struct prueth_tx_chn *tx_chn;
>  	unsigned int total_bytes = 0;
> +	struct xdp_frame *xdpf;
>  	struct sk_buff *skb;
>  	dma_addr_t desc_dma;
>  	int res, num_tx = 0;
> @@ -168,21 +178,28 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn,
>  			continue;
>  		}
>  
> -		if (swdata->type != PRUETH_SWDATA_SKB) {
> +		switch (swdata->type) {
> +		case PRUETH_SWDATA_SKB:
> +			skb = swdata->data.skb;
> +			dev_sw_netstats_tx_add(skb->dev, 1, skb->len);
> +			total_bytes += skb->len;
> +			napi_consume_skb(skb, budget);
> +			break;
> +		case PRUETH_SWDATA_XDPF:
> +			xdpf = swdata->data.xdpf;
> +			dev_sw_netstats_tx_add(ndev, 1, xdpf->len);
> +			total_bytes += xdpf->len;
> +			xdp_return_frame(xdpf);
> +			break;
> +		default:
>  			netdev_err(ndev, "tx_complete: invalid swdata type %d\n", swdata->type);
>  			prueth_xmit_free(tx_chn, desc_tx);
> +			ndev->stats.tx_dropped++;
>  			budget++;
>  			continue;
>  		}
>  
> -		skb = swdata->data.skb;
>  		prueth_xmit_free(tx_chn, desc_tx);
> -
> -		ndev = skb->dev;
> -		ndev->stats.tx_packets++;
> -		ndev->stats.tx_bytes += skb->len;
> -		total_bytes += skb->len;
> -		napi_consume_skb(skb, budget);
>  		num_tx++;
>  	}
>  
> @@ -541,7 +558,153 @@ void emac_rx_timestamp(struct prueth_emac *emac,
>  	ssh->hwtstamp = ns_to_ktime(ns);
>  }
>  
> -static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
> +/**
> + * emac_xmit_xdp_frame - transmits an XDP frame
> + * @emac: emac device
> + * @xdpf: data to transmit
> + * @page: page from page pool if already DMA mapped
> + * @q_idx: queue id
> + *
> + * Return: XDP state
> + */
> +int emac_xmit_xdp_frame(struct prueth_emac *emac,
> +			struct xdp_frame *xdpf,
> +			struct page *page,
> +			unsigned int q_idx)
> +{
> +	struct cppi5_host_desc_t *first_desc;
> +	struct net_device *ndev = emac->ndev;
> +	struct prueth_tx_chn *tx_chn;
> +	dma_addr_t desc_dma, buf_dma;
> +	struct prueth_swdata *swdata;
> +	u32 *epib;
> +	int ret;
> +
drop new line and arrange below declarations in reverse xmas tree order.

> +	void *data = xdpf->data;
> +	u32 pkt_len = xdpf->len;
> +
> +	if (q_idx >= PRUETH_MAX_TX_QUEUES) {
> +		netdev_err(ndev, "xdp tx: invalid q_id %d\n", q_idx);
> +		return ICSSG_XDP_CONSUMED;	/* drop */
> +	}
> +
> +	tx_chn = &emac->tx_chns[q_idx];
> +
> +	first_desc = k3_cppi_desc_pool_alloc(tx_chn->desc_pool);
> +	if (!first_desc) {
> +		netdev_dbg(ndev, "xdp tx: failed to allocate descriptor\n");
> +		goto drop_free_descs;	/* drop */
> +	}
> +
> +	if (page) { /* already DMA mapped by page_pool */
> +		buf_dma = page_pool_get_dma_addr(page);
> +		buf_dma += xdpf->headroom + sizeof(struct xdp_frame);
> +	} else { /* Map the linear buffer */
> +		buf_dma = dma_map_single(tx_chn->dma_dev, data, pkt_len, DMA_TO_DEVICE);
> +		if (dma_mapping_error(tx_chn->dma_dev, buf_dma)) {
> +			netdev_err(ndev, "xdp tx: failed to map data buffer\n");
> +			goto drop_free_descs;	/* drop */
> +		}
> +	}
> +
> +	cppi5_hdesc_init(first_desc, CPPI5_INFO0_HDESC_EPIB_PRESENT,
> +			 PRUETH_NAV_PS_DATA_SIZE);
> +	cppi5_hdesc_set_pkttype(first_desc, 0);
> +	epib = first_desc->epib;
> +	epib[0] = 0;
> +	epib[1] = 0;
> +
> +	/* set dst tag to indicate internal qid at the firmware which is at
> +	 * bit8..bit15. bit0..bit7 indicates port num for directed
> +	 * packets in case of switch mode operation
> +	 */
> +	cppi5_desc_set_tags_ids(&first_desc->hdr, 0, (emac->port_id | (q_idx << 8)));
> +	k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &buf_dma);
> +	cppi5_hdesc_attach_buf(first_desc, buf_dma, pkt_len, buf_dma, pkt_len);
> +	swdata = cppi5_hdesc_get_swdata(first_desc);
> +	if (page) {
> +		swdata->type = PRUETH_SWDATA_PAGE;
> +		swdata->data.page = page;
> +	} else {
> +		swdata->type = PRUETH_SWDATA_XDPF;
> +		swdata->data.xdpf = xdpf;
> +	}
> +
> +	cppi5_hdesc_set_pktlen(first_desc, pkt_len);
> +	desc_dma = k3_cppi_desc_pool_virt2dma(tx_chn->desc_pool, first_desc);
> +
> +	ret = k3_udma_glue_push_tx_chn(tx_chn->tx_chn, first_desc, desc_dma);
> +	if (ret) {
> +		netdev_err(ndev, "xdp tx: push failed: %d\n", ret);
> +		goto drop_free_descs;
> +	}
> +
> +	return ICSSG_XDP_TX;
> +
> +drop_free_descs:
> +	prueth_xmit_free(tx_chn, first_desc);
> +	return ICSSG_XDP_CONSUMED;
> +}
> +EXPORT_SYMBOL_GPL(emac_xmit_xdp_frame);
> +
> +/**
> + * emac_run_xdp - run an XDP program
> + * @emac: emac device
> + * @xdp: XDP buffer containing the frame
> + * @page: page with RX data if already DMA mapped
> + *
> + * Return: XDP state
> + */
> +static int emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp,
> +			struct page *page)
> +{
> +	struct net_device *ndev = emac->ndev;
> +	int err, result = ICSSG_XDP_PASS;

you could avoid initialization of result. see below.

> +	struct bpf_prog *xdp_prog;
> +	struct xdp_frame *xdpf;
> +	int q_idx;
> +	u32 act;
> +
> +	xdp_prog = READ_ONCE(emac->xdp_prog);
> +	act = bpf_prog_run_xdp(xdp_prog, xdp);
> +	switch (act) {
> +	case XDP_PASS:
> +		break;
instead of break how about?
		return ICSSG_XDP_PASS;

> +	case XDP_TX:
> +		/* Send packet to TX ring for immediate transmission */
> +		xdpf = xdp_convert_buff_to_frame(xdp);
> +		if (unlikely(!xdpf))

TX is dropped so here you need to
			ndev->stats.tx_dropped++;
> +			goto drop;
> +
> +		q_idx = smp_processor_id() % emac->tx_ch_num;
> +		result = emac_xmit_xdp_frame(emac, xdpf, page, q_idx);
> +		if (result == ICSSG_XDP_CONSUMED)
> +			goto drop;

need to increment rx stats as we received the packet successfully?

> +		break;
instead,
		return ICSSG_XDP_TX;

> +	case XDP_REDIRECT:
> +		err = xdp_do_redirect(emac->ndev, xdp, xdp_prog);
> +		if (err)
> +			goto drop;
> +		result = ICSSG_XDP_REDIR;
> +		break;

replace above 2 by
		return ICSSG_XDP_REDIR;
> +	default:
> +		bpf_warn_invalid_xdp_action(emac->ndev, xdp_prog, act);
> +		fallthrough;
> +	case XDP_ABORTED:
> +drop:
> +		trace_xdp_exception(emac->ndev, xdp_prog, act);
> +		fallthrough; /* handle aborts by dropping packet */
> +	case XDP_DROP:
> +		ndev->stats.tx_dropped++;

shouldn't this be
		ndev->stats.rx_dropped++;

> +		result = ICSSG_XDP_CONSUMED;

not required if we directly return this value below.

> +		page_pool_recycle_direct(emac->rx_chns.pg_pool, page);
> +		break;
		return ICSSG_XDP_CONSUMED;
> +	}
> +
> +	return result;

drop this

> +}
> +
> +static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, int *xdp_state)
>  {
>  	struct prueth_rx_chn *rx_chn = &emac->rx_chns;
>  	u32 buf_dma_len, pkt_len, port_id = 0;
> @@ -552,10 +715,12 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
>  	struct page *page, *new_page;
>  	struct page_pool *pool;
>  	struct sk_buff *skb;
> +	struct xdp_buff xdp;
>  	u32 *psdata;
>  	void *pa;
>  	int ret;
>  
> +	*xdp_state = 0;
>  	pool = rx_chn->pg_pool;
>  	ret = k3_udma_glue_pop_rx_chn(rx_chn->rx_chn, flow_id, &desc_dma);
>  	if (ret) {
> @@ -596,9 +761,21 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
>  		goto requeue;
>  	}
>  
> -	/* prepare skb and send to n/w stack */
>  	pa = page_address(page);
> -	skb = napi_build_skb(pa, PAGE_SIZE);

We are running the xdp program after allocating the new page.
How about running the xdp program first? if the packet has to be dropped
then it is pointless to allocate a new page. We could just reuse the old page
and save CPU cycles.

> +	if (emac->xdp_prog) {
> +		xdp_init_buff(&xdp, PAGE_SIZE, &rx_chn->xdp_rxq);
> +		xdp_prepare_buff(&xdp, pa, PRUETH_HEADROOM, pkt_len, false);
> +
> +		*xdp_state = emac_run_xdp(emac, &xdp, page);
> +		if (*xdp_state == ICSSG_XDP_PASS)
> +			skb = xdp_build_skb_from_buff(&xdp);
> +		else
> +			goto requeue;
> +	} else {
> +		/* prepare skb and send to n/w stack */
> +		skb = napi_build_skb(pa, PAGE_SIZE);
> +	}
> +
>  	if (!skb) {
>  		ndev->stats.rx_dropped++;
>  		page_pool_recycle_direct(pool, page);

instead of recycling the old page just reuse it
		new_page = page;

>		goto requeue;
> 	}

here you can allocate the new page cause now we're sure old page
has to be sent to user space.

> @@ -861,13 +1038,23 @@ static void prueth_tx_cleanup(void *data, dma_addr_t desc_dma)
>  	struct prueth_tx_chn *tx_chn = data;
>  	struct cppi5_host_desc_t *desc_tx;
>  	struct prueth_swdata *swdata;
> +	struct xdp_frame *xdpf;
>  	struct sk_buff *skb;
>  
>  	desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, desc_dma);
>  	swdata = cppi5_hdesc_get_swdata(desc_tx);
> -	if (swdata->type == PRUETH_SWDATA_SKB) {
> +
> +	switch (swdata->type) {
> +	case PRUETH_SWDATA_SKB:
>  		skb = swdata->data.skb;
>  		dev_kfree_skb_any(skb);
> +		break;
> +	case PRUETH_SWDATA_XDPF:
> +		xdpf = swdata->data.xdpf;
> +		xdp_return_frame(xdpf);
> +		break;
> +	default:
> +		break;
>  	}
>  
>  	prueth_xmit_free(tx_chn, desc_tx);
> @@ -904,15 +1091,18 @@ int icssg_napi_rx_poll(struct napi_struct *napi_rx, int budget)
>  		PRUETH_RX_FLOW_DATA_SR1 : PRUETH_RX_FLOW_DATA;
>  	int flow = emac->is_sr1 ?
>  		PRUETH_MAX_RX_FLOWS_SR1 : PRUETH_MAX_RX_FLOWS;
> +	int xdp_state_or = 0;
>  	int num_rx = 0;
>  	int cur_budget;
> +	int xdp_state;
>  	int ret;
>  
>  	while (flow--) {
>  		cur_budget = budget - num_rx;
>  
>  		while (cur_budget--) {
> -			ret = emac_rx_packet(emac, flow);
> +			ret = emac_rx_packet(emac, flow, &xdp_state);
> +			xdp_state_or |= xdp_state;
>  			if (ret)
>  				break;
>  			num_rx++;
> @@ -922,6 +1112,9 @@ int icssg_napi_rx_poll(struct napi_struct *napi_rx, int budget)
>  			break;
>  	}
>  
> +	if (xdp_state_or & ICSSG_XDP_REDIR)
> +		xdp_do_flush();
> +
>  	if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) {
>  		if (unlikely(emac->rx_pace_timeout_ns)) {
>  			hrtimer_start(&emac->rx_hrtimer,
> diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
> index 3ff8c322f9d9..1acbf9e1bade 100644
> --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c
> +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
> @@ -559,6 +559,33 @@ const struct icss_iep_clockops prueth_iep_clockops = {
>  	.perout_enable = prueth_perout_enable,
>  };
>  
> +static int prueth_create_xdp_rxqs(struct prueth_emac *emac)
> +{
> +	struct xdp_rxq_info *rxq = &emac->rx_chns.xdp_rxq;
> +	struct page_pool *pool = emac->rx_chns.pg_pool;
> +	int ret;
> +
> +	ret = xdp_rxq_info_reg(rxq, emac->ndev, 0, emac->napi_rx.napi_id);
> +	if (ret)
> +		return ret;
> +
> +	ret = xdp_rxq_info_reg_mem_model(rxq, MEM_TYPE_PAGE_POOL, pool);
> +	if (ret)
> +		xdp_rxq_info_unreg(rxq);
> +
> +	return ret;
> +}
> +
> +static void prueth_destroy_xdp_rxqs(struct prueth_emac *emac)
> +{
> +	struct xdp_rxq_info *rxq = &emac->rx_chns.xdp_rxq;
> +
> +	if (!xdp_rxq_info_is_reg(rxq))
> +		return;
> +
> +	xdp_rxq_info_unreg(rxq);
> +}
> +
>  static int icssg_prueth_add_mcast(struct net_device *ndev, const u8 *addr)
>  {
>  	struct net_device *real_dev;
> @@ -780,10 +807,14 @@ static int emac_ndo_open(struct net_device *ndev)
>  	if (ret)
>  		goto free_tx_ts_irq;
>  
> -	ret = k3_udma_glue_enable_rx_chn(emac->rx_chns.rx_chn);
> +	ret = prueth_create_xdp_rxqs(emac);
>  	if (ret)
>  		goto reset_rx_chn;
>  
> +	ret = k3_udma_glue_enable_rx_chn(emac->rx_chns.rx_chn);
> +	if (ret)
> +		goto destroy_xdp_rxqs;
> +
>  	for (i = 0; i < emac->tx_ch_num; i++) {
>  		ret = k3_udma_glue_enable_tx_chn(emac->tx_chns[i].tx_chn);
>  		if (ret)
> @@ -809,6 +840,8 @@ static int emac_ndo_open(struct net_device *ndev)
>  	 * any SKB for completion. So set false to free_skb
>  	 */
>  	prueth_reset_tx_chan(emac, i, false);
> +destroy_xdp_rxqs:
> +	prueth_destroy_xdp_rxqs(emac);
>  reset_rx_chn:
>  	prueth_reset_rx_chan(&emac->rx_chns, max_rx_flows, false);
>  free_tx_ts_irq:
> @@ -879,7 +912,7 @@ static int emac_ndo_stop(struct net_device *ndev)
>  	k3_udma_glue_tdown_rx_chn(emac->rx_chns.rx_chn, true);
>  
>  	prueth_reset_rx_chan(&emac->rx_chns, max_rx_flows, true);
> -
> +	prueth_destroy_xdp_rxqs(emac);
>  	napi_disable(&emac->napi_rx);
>  	hrtimer_cancel(&emac->rx_hrtimer);
>  
> @@ -1024,6 +1057,90 @@ static int emac_ndo_vlan_rx_del_vid(struct net_device *ndev,
>  	return 0;
>  }
>  
> +/**
> + * emac_xdp_xmit - Implements ndo_xdp_xmit
> + * @dev: netdev
> + * @n: number of frames
> + * @frames: array of XDP buffer pointers
> + * @flags: XDP extra info
> + *
> + * Return: number of frames successfully sent. Failed frames
> + * will be free'ed by XDP core.
> + *
> + * For error cases, a negative errno code is returned and no-frames
> + * are transmitted (caller must handle freeing frames).
> + **/
> +static int emac_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
> +			 u32 flags)
> +{
> +	struct prueth_emac *emac = netdev_priv(dev);
> +	unsigned int q_idx;
> +	int nxmit = 0;
> +	int i;
> +
> +	q_idx = smp_processor_id() % emac->tx_ch_num;
> +
> +	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
> +		return -EINVAL;
> +
> +	for (i = 0; i < n; i++) {
> +		struct xdp_frame *xdpf = frames[i];
> +		int err;
> +
> +		err = emac_xmit_xdp_frame(emac, xdpf, NULL, q_idx);
> +		if (err != ICSSG_XDP_TX)
> +			break;
> +		nxmit++;
> +	}
> +
> +	return nxmit;
> +}
> +
> +/**
> + * emac_xdp_setup - add/remove an XDP program
> + * @emac: emac device
> + * @bpf: XDP program
> + *
> + * Return: Always 0 (Success)
> + **/
> +static int emac_xdp_setup(struct prueth_emac *emac, struct netdev_bpf *bpf)
> +{
> +	struct bpf_prog *prog = bpf->prog;
> +	xdp_features_t val;
> +
> +	val = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
> +	      NETDEV_XDP_ACT_NDO_XMIT;
> +	xdp_set_features_flag(emac->ndev, val);
> +
> +	if (!emac->xdpi.prog && !prog)
> +		return 0;
> +
> +	WRITE_ONCE(emac->xdp_prog, prog);
> +
> +	xdp_attachment_setup(&emac->xdpi, bpf);
> +
> +	return 0;
> +}
> +
> +/**
> + * emac_ndo_bpf - implements ndo_bpf for icssg_prueth
> + * @ndev: network adapter device
> + * @bpf: XDP program
> + *
> + * Return: 0 on success, error code on failure.
> + **/
> +static int emac_ndo_bpf(struct net_device *ndev, struct netdev_bpf *bpf)
> +{
> +	struct prueth_emac *emac = netdev_priv(ndev);
> +
> +	switch (bpf->command) {
> +	case XDP_SETUP_PROG:
> +		return emac_xdp_setup(emac, bpf);
> +	default:
> +		return -EINVAL;
> +	}
> +}
> +
>  static const struct net_device_ops emac_netdev_ops = {
>  	.ndo_open = emac_ndo_open,
>  	.ndo_stop = emac_ndo_stop,
> @@ -1038,6 +1155,8 @@ static const struct net_device_ops emac_netdev_ops = {
>  	.ndo_fix_features = emac_ndo_fix_features,
>  	.ndo_vlan_rx_add_vid = emac_ndo_vlan_rx_add_vid,
>  	.ndo_vlan_rx_kill_vid = emac_ndo_vlan_rx_del_vid,
> +	.ndo_bpf = emac_ndo_bpf,
> +	.ndo_xdp_xmit = emac_xdp_xmit,
>  };
>  
>  static int prueth_netdev_init(struct prueth *prueth,
> @@ -1066,6 +1185,8 @@ static int prueth_netdev_init(struct prueth *prueth,
>  	emac->prueth = prueth;
>  	emac->ndev = ndev;
>  	emac->port_id = port;
> +	emac->xdp_prog = NULL;
> +	emac->ndev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
>  	emac->cmd_wq = create_singlethread_workqueue("icssg_cmd_wq");
>  	if (!emac->cmd_wq) {
>  		ret = -ENOMEM;
> diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h
> index 3bbabd007129..0675919beb94 100644
> --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h
> +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h
> @@ -8,6 +8,8 @@
>  #ifndef __NET_TI_ICSSG_PRUETH_H
>  #define __NET_TI_ICSSG_PRUETH_H
>  
> +#include <linux/bpf.h>
> +#include <linux/bpf_trace.h>
>  #include <linux/etherdevice.h>
>  #include <linux/genalloc.h>
>  #include <linux/if_vlan.h>
> @@ -134,6 +136,7 @@ struct prueth_rx_chn {
>  	unsigned int irq[ICSSG_MAX_RFLOWS];	/* separate irq per flow */
>  	char name[32];
>  	struct page_pool *pg_pool;
> +	struct xdp_rxq_info xdp_rxq;
>  };
>  
>  enum prueth_swdata_type {
> @@ -141,6 +144,7 @@ enum prueth_swdata_type {
>  	PRUETH_SWDATA_SKB,
>  	PRUETH_SWDATA_PAGE,
>  	PRUETH_SWDATA_CMD,
> +	PRUETH_SWDATA_XDPF,
>  };
>  
>  struct prueth_swdata {
> @@ -149,6 +153,7 @@ struct prueth_swdata {
>  		struct sk_buff *skb;
>  		struct page *page;
>  		u32 cmd;
> +		struct xdp_frame *xdpf;
>  	} data;
>  };
>  
> @@ -159,6 +164,12 @@ struct prueth_swdata {
>  
>  #define PRUETH_MAX_TX_TS_REQUESTS	50 /* Max simultaneous TX_TS requests */
>  
> +/* XDP BPF state */
> +#define ICSSG_XDP_PASS           0
> +#define ICSSG_XDP_CONSUMED       BIT(0)
> +#define ICSSG_XDP_TX             BIT(1)
> +#define ICSSG_XDP_REDIR          BIT(2)
> +
>  /* Minimum coalesce time in usecs for both Tx and Rx */
>  #define ICSSG_MIN_COALESCE_USECS 20
>  
> @@ -227,6 +238,8 @@ struct prueth_emac {
>  	unsigned long rx_pace_timeout_ns;
>  
>  	struct netdev_hw_addr_list vlan_mcast_list[MAX_VLAN_ID];
> +	struct bpf_prog *xdp_prog;
> +	struct xdp_attachment_info xdpi;
>  };
>  
>  /* The buf includes headroom compatible with both skb and xdpf */
> @@ -465,5 +478,9 @@ void prueth_put_cores(struct prueth *prueth, int slice);
>  
>  /* Revision specific helper */
>  u64 icssg_ts_to_ns(u32 hi_sw, u32 hi, u32 lo, u32 cycle_time_ns);
> +int emac_xmit_xdp_frame(struct prueth_emac *emac,
> +			struct xdp_frame *xdpf,
> +			struct page *page,
> +			unsigned int q_idx);
>  
>  #endif /* __NET_TI_ICSSG_PRUETH_H */

-- 
cheers,
-roger
Re: [PATCH net-next v3 3/3] net: ti: icssg-prueth: Add XDP support
Posted by Malladi, Meghana 11 months, 1 week ago

On 2/27/2025 9:07 PM, Roger Quadros wrote:
> 
> 
> On 24/02/2025 13:01, Meghana Malladi wrote:
>> From: Roger Quadros <rogerq@kernel.org>
>>
>> Add native XDP support. We do not support zero copy yet.
>>
>> Signed-off-by: Roger Quadros <rogerq@kernel.org>
>> Signed-off-by: MD Danish Anwar <danishanwar@ti.com>
>> Signed-off-by: Meghana Malladi <m-malladi@ti.com>
>> ---
>> Changes since v2 (v3-v2):
>> - Use page_pool contained in the page instead of using passing page_pool
>> (rx_chn) as part of swdata
>> - dev_sw_netstats_tx_add() instead of incrementing the stats directly
>> - Add missing ndev->stats.tx_dropped++ wherever applicable
>> - Move k3_cppi_desc_pool_alloc() before the DMA mapping for easier cleanup
>> on failure
>> - Replace rxp->napi_id with emac->napi_rx.napi_id in prueth_create_xdp_rxqs()
>>
>> All the above changes have been suggested by Roger Quadros <rogerq@kernel.org>
>>
>>   drivers/net/ethernet/ti/icssg/icssg_common.c | 219 +++++++++++++++++--
>>   drivers/net/ethernet/ti/icssg/icssg_prueth.c | 125 ++++++++++-
>>   drivers/net/ethernet/ti/icssg/icssg_prueth.h |  17 ++
>>   3 files changed, 346 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c
>> index 01eeabe83eff..4716e24ea05d 100644
>> --- a/drivers/net/ethernet/ti/icssg/icssg_common.c
>> +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c
>> @@ -98,11 +98,19 @@ void prueth_xmit_free(struct prueth_tx_chn *tx_chn,
>>   {
>>   	struct cppi5_host_desc_t *first_desc, *next_desc;
>>   	dma_addr_t buf_dma, next_desc_dma;
>> +	struct prueth_swdata *swdata;
>>   	u32 buf_dma_len;
>>   
>>   	first_desc = desc;
>>   	next_desc = first_desc;
>>   
>> +	swdata = cppi5_hdesc_get_swdata(desc);
>> +	if (swdata->type == PRUETH_SWDATA_PAGE) {
>> +		page_pool_recycle_direct(swdata->data.page->pp,
>> +					 swdata->data.page);
>> +		goto free_desc;
>> +	}
>> +
>>   	cppi5_hdesc_get_obuf(first_desc, &buf_dma, &buf_dma_len);
>>   	k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma);
>>   
>> @@ -126,6 +134,7 @@ void prueth_xmit_free(struct prueth_tx_chn *tx_chn,
>>   		k3_cppi_desc_pool_free(tx_chn->desc_pool, next_desc);
>>   	}
>>   
>> +free_desc:
>>   	k3_cppi_desc_pool_free(tx_chn->desc_pool, first_desc);
>>   }
>>   EXPORT_SYMBOL_GPL(prueth_xmit_free);
>> @@ -139,6 +148,7 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn,
>>   	struct prueth_swdata *swdata;
>>   	struct prueth_tx_chn *tx_chn;
>>   	unsigned int total_bytes = 0;
>> +	struct xdp_frame *xdpf;
>>   	struct sk_buff *skb;
>>   	dma_addr_t desc_dma;
>>   	int res, num_tx = 0;
>> @@ -168,21 +178,28 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn,
>>   			continue;
>>   		}
>>   
>> -		if (swdata->type != PRUETH_SWDATA_SKB) {
>> +		switch (swdata->type) {
>> +		case PRUETH_SWDATA_SKB:
>> +			skb = swdata->data.skb;
>> +			dev_sw_netstats_tx_add(skb->dev, 1, skb->len);
>> +			total_bytes += skb->len;
>> +			napi_consume_skb(skb, budget);
>> +			break;
>> +		case PRUETH_SWDATA_XDPF:
>> +			xdpf = swdata->data.xdpf;
>> +			dev_sw_netstats_tx_add(ndev, 1, xdpf->len);
>> +			total_bytes += xdpf->len;
>> +			xdp_return_frame(xdpf);
>> +			break;
>> +		default:
>>   			netdev_err(ndev, "tx_complete: invalid swdata type %d\n", swdata->type);
>>   			prueth_xmit_free(tx_chn, desc_tx);
>> +			ndev->stats.tx_dropped++;
>>   			budget++;
>>   			continue;
>>   		}
>>   
>> -		skb = swdata->data.skb;
>>   		prueth_xmit_free(tx_chn, desc_tx);
>> -
>> -		ndev = skb->dev;
>> -		ndev->stats.tx_packets++;
>> -		ndev->stats.tx_bytes += skb->len;
>> -		total_bytes += skb->len;
>> -		napi_consume_skb(skb, budget);
>>   		num_tx++;
>>   	}
>>   
>> @@ -541,7 +558,153 @@ void emac_rx_timestamp(struct prueth_emac *emac,
>>   	ssh->hwtstamp = ns_to_ktime(ns);
>>   }
>>   
>> -static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
>> +/**
>> + * emac_xmit_xdp_frame - transmits an XDP frame
>> + * @emac: emac device
>> + * @xdpf: data to transmit
>> + * @page: page from page pool if already DMA mapped
>> + * @q_idx: queue id
>> + *
>> + * Return: XDP state
>> + */
>> +int emac_xmit_xdp_frame(struct prueth_emac *emac,
>> +			struct xdp_frame *xdpf,
>> +			struct page *page,
>> +			unsigned int q_idx)
>> +{
>> +	struct cppi5_host_desc_t *first_desc;
>> +	struct net_device *ndev = emac->ndev;
>> +	struct prueth_tx_chn *tx_chn;
>> +	dma_addr_t desc_dma, buf_dma;
>> +	struct prueth_swdata *swdata;
>> +	u32 *epib;
>> +	int ret;
>> +
> drop new line and arrange below declarations in reverse xmas tree order.
> 

As suggested by Dan, will drop these variabled and directly use them.

>> +	void *data = xdpf->data;
>> +	u32 pkt_len = xdpf->len;
>> +
>> +	if (q_idx >= PRUETH_MAX_TX_QUEUES) {
>> +		netdev_err(ndev, "xdp tx: invalid q_id %d\n", q_idx);
>> +		return ICSSG_XDP_CONSUMED;	/* drop */
>> +	}
>> +
>> +	tx_chn = &emac->tx_chns[q_idx];
>> +
>> +	first_desc = k3_cppi_desc_pool_alloc(tx_chn->desc_pool);
>> +	if (!first_desc) {
>> +		netdev_dbg(ndev, "xdp tx: failed to allocate descriptor\n");
>> +		goto drop_free_descs;	/* drop */
>> +	}
>> +
>> +	if (page) { /* already DMA mapped by page_pool */
>> +		buf_dma = page_pool_get_dma_addr(page);
>> +		buf_dma += xdpf->headroom + sizeof(struct xdp_frame);
>> +	} else { /* Map the linear buffer */
>> +		buf_dma = dma_map_single(tx_chn->dma_dev, data, pkt_len, DMA_TO_DEVICE);
>> +		if (dma_mapping_error(tx_chn->dma_dev, buf_dma)) {
>> +			netdev_err(ndev, "xdp tx: failed to map data buffer\n");
>> +			goto drop_free_descs;	/* drop */
>> +		}
>> +	}
>> +
>> +	cppi5_hdesc_init(first_desc, CPPI5_INFO0_HDESC_EPIB_PRESENT,
>> +			 PRUETH_NAV_PS_DATA_SIZE);
>> +	cppi5_hdesc_set_pkttype(first_desc, 0);
>> +	epib = first_desc->epib;
>> +	epib[0] = 0;
>> +	epib[1] = 0;
>> +
>> +	/* set dst tag to indicate internal qid at the firmware which is at
>> +	 * bit8..bit15. bit0..bit7 indicates port num for directed
>> +	 * packets in case of switch mode operation
>> +	 */
>> +	cppi5_desc_set_tags_ids(&first_desc->hdr, 0, (emac->port_id | (q_idx << 8)));
>> +	k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &buf_dma);
>> +	cppi5_hdesc_attach_buf(first_desc, buf_dma, pkt_len, buf_dma, pkt_len);
>> +	swdata = cppi5_hdesc_get_swdata(first_desc);
>> +	if (page) {
>> +		swdata->type = PRUETH_SWDATA_PAGE;
>> +		swdata->data.page = page;
>> +	} else {
>> +		swdata->type = PRUETH_SWDATA_XDPF;
>> +		swdata->data.xdpf = xdpf;
>> +	}
>> +
>> +	cppi5_hdesc_set_pktlen(first_desc, pkt_len);
>> +	desc_dma = k3_cppi_desc_pool_virt2dma(tx_chn->desc_pool, first_desc);
>> +
>> +	ret = k3_udma_glue_push_tx_chn(tx_chn->tx_chn, first_desc, desc_dma);
>> +	if (ret) {
>> +		netdev_err(ndev, "xdp tx: push failed: %d\n", ret);
>> +		goto drop_free_descs;
>> +	}
>> +
>> +	return ICSSG_XDP_TX;
>> +
>> +drop_free_descs:
>> +	prueth_xmit_free(tx_chn, first_desc);
>> +	return ICSSG_XDP_CONSUMED;
>> +}
>> +EXPORT_SYMBOL_GPL(emac_xmit_xdp_frame);
>> +
>> +/**
>> + * emac_run_xdp - run an XDP program
>> + * @emac: emac device
>> + * @xdp: XDP buffer containing the frame
>> + * @page: page with RX data if already DMA mapped
>> + *
>> + * Return: XDP state
>> + */
>> +static int emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp,
>> +			struct page *page)
>> +{
>> +	struct net_device *ndev = emac->ndev;
>> +	int err, result = ICSSG_XDP_PASS;
> 
> you could avoid initialization of result. see below.
> 
>> +	struct bpf_prog *xdp_prog;
>> +	struct xdp_frame *xdpf;
>> +	int q_idx;
>> +	u32 act;
>> +
>> +	xdp_prog = READ_ONCE(emac->xdp_prog);
>> +	act = bpf_prog_run_xdp(xdp_prog, xdp);
>> +	switch (act) {
>> +	case XDP_PASS:
>> +		break;
> instead of break how about?
> 		return ICSSG_XDP_PASS;
> 

I looked into CPSW XDP implementation and its same as what you suggested 
here. Will update this functon with your suggestions.

>> +	case XDP_TX:
>> +		/* Send packet to TX ring for immediate transmission */
>> +		xdpf = xdp_convert_buff_to_frame(xdp);
>> +		if (unlikely(!xdpf))
> 
> TX is dropped so here you need to
> 			ndev->stats.tx_dropped++;

Yes added it.

>> +			goto drop;
>> +
>> +		q_idx = smp_processor_id() % emac->tx_ch_num;
>> +		result = emac_xmit_xdp_frame(emac, xdpf, page, q_idx);
>> +		if (result == ICSSG_XDP_CONSUMED)
>> +			goto drop;
> 
> need to increment rx stats as we received the packet successfully?
> 

Yes I will do that.

>> +		break;
> instead,
> 		return ICSSG_XDP_TX;
> 

returning result here as it depends on what emac_xmit_xdp_frame() returns

>> +	case XDP_REDIRECT:
>> +		err = xdp_do_redirect(emac->ndev, xdp, xdp_prog);
>> +		if (err)
>> +			goto drop;
>> +		result = ICSSG_XDP_REDIR;
>> +		break;
> 
> replace above 2 by
> 		return ICSSG_XDP_REDIR;

I suppose you meant "replace above to by?" I have updated it.

>> +	default:
>> +		bpf_warn_invalid_xdp_action(emac->ndev, xdp_prog, act);
>> +		fallthrough;
>> +	case XDP_ABORTED:
>> +drop:
>> +		trace_xdp_exception(emac->ndev, xdp_prog, act);
>> +		fallthrough; /* handle aborts by dropping packet */
>> +	case XDP_DROP:
>> +		ndev->stats.tx_dropped++;
> 
> shouldn't this be
> 		ndev->stats.rx_dropped++;
> 

Yes correct. Thanks for finding this. Fixed it.

>> +		result = ICSSG_XDP_CONSUMED;
> 
> not required if we directly return this value below.
> 
>> +		page_pool_recycle_direct(emac->rx_chns.pg_pool, page);
>> +		break;
> 		return ICSSG_XDP_CONSUMED;
>> +	}
>> +
>> +	return result;
> 
> drop this

Done.
> 
>> +}
>> +
>> +static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, int *xdp_state)
>>   {
>>   	struct prueth_rx_chn *rx_chn = &emac->rx_chns;
>>   	u32 buf_dma_len, pkt_len, port_id = 0;
>> @@ -552,10 +715,12 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
>>   	struct page *page, *new_page;
>>   	struct page_pool *pool;
>>   	struct sk_buff *skb;
>> +	struct xdp_buff xdp;
>>   	u32 *psdata;
>>   	void *pa;
>>   	int ret;
>>   
>> +	*xdp_state = 0;
>>   	pool = rx_chn->pg_pool;
>>   	ret = k3_udma_glue_pop_rx_chn(rx_chn->rx_chn, flow_id, &desc_dma);
>>   	if (ret) {
>> @@ -596,9 +761,21 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
>>   		goto requeue;
>>   	}
>>   
>> -	/* prepare skb and send to n/w stack */
>>   	pa = page_address(page);
>> -	skb = napi_build_skb(pa, PAGE_SIZE);
> 
> We are running the xdp program after allocating the new page.
> How about running the xdp program first? if the packet has to be dropped
> then it is pointless to allocate a new page. We could just reuse the old page
> and save CPU cycles.
> 

What if the packet need not be dropped (it has been processed by XDP) 
Then we go to requeue, where new page is needed to map it to the next 
upcoming packet descriptor. Hence running XDP needs new_page.

>> +	if (emac->xdp_prog) {
>> +		xdp_init_buff(&xdp, PAGE_SIZE, &rx_chn->xdp_rxq);
>> +		xdp_prepare_buff(&xdp, pa, PRUETH_HEADROOM, pkt_len, false);
>> +
>> +		*xdp_state = emac_run_xdp(emac, &xdp, page);
>> +		if (*xdp_state == ICSSG_XDP_PASS)
>> +			skb = xdp_build_skb_from_buff(&xdp);
>> +		else
>> +			goto requeue;
>> +	} else {
>> +		/* prepare skb and send to n/w stack */
>> +		skb = napi_build_skb(pa, PAGE_SIZE);
>> +	}
>> +
>>   	if (!skb) {
>>   		ndev->stats.rx_dropped++;
>>   		page_pool_recycle_direct(pool, page);
> 
> instead of recycling the old page just reuse it
> 		new_page = page;
> 

With the above explanation I think allocating new_page is inevitable.

>> 		goto requeue;
>> 	}
> 
> here you can allocate the new page cause now we're sure old page
> has to be sent to user space.
> 

likewise to the above comment.

>> @@ -861,13 +1038,23 @@ static void prueth_tx_cleanup(void *data, dma_addr_t desc_dma)
>>   	struct prueth_tx_chn *tx_chn = data;
>>   	struct cppi5_host_desc_t *desc_tx;
>>   	struct prueth_swdata *swdata;
>> +	struct xdp_frame *xdpf;
>>   	struct sk_buff *skb;
>>   
>>   	desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, desc_dma);
>>   	swdata = cppi5_hdesc_get_swdata(desc_tx);
>> -	if (swdata->type == PRUETH_SWDATA_SKB) {
>> +
>> +	switch (swdata->type) {
>> +	case PRUETH_SWDATA_SKB:
>>   		skb = swdata->data.skb;
>>   		dev_kfree_skb_any(skb);
>> +		break;
>> +	case PRUETH_SWDATA_XDPF:
>> +		xdpf = swdata->data.xdpf;
>> +		xdp_return_frame(xdpf);
>> +		break;
>> +	default:
>> +		break;
>>   	}
>>   
>>   	prueth_xmit_free(tx_chn, desc_tx);
>> @@ -904,15 +1091,18 @@ int icssg_napi_rx_poll(struct napi_struct *napi_rx, int budget)
>>   		PRUETH_RX_FLOW_DATA_SR1 : PRUETH_RX_FLOW_DATA;
>>   	int flow = emac->is_sr1 ?
>>   		PRUETH_MAX_RX_FLOWS_SR1 : PRUETH_MAX_RX_FLOWS;
>> +	int xdp_state_or = 0;
>>   	int num_rx = 0;
>>   	int cur_budget;
>> +	int xdp_state;
>>   	int ret;
>>   
>>   	while (flow--) {
>>   		cur_budget = budget - num_rx;
>>   
>>   		while (cur_budget--) {
>> -			ret = emac_rx_packet(emac, flow);
>> +			ret = emac_rx_packet(emac, flow, &xdp_state);
>> +			xdp_state_or |= xdp_state;
>>   			if (ret)
>>   				break;
>>   			num_rx++;
>> @@ -922,6 +1112,9 @@ int icssg_napi_rx_poll(struct napi_struct *napi_rx, int budget)
>>   			break;
>>   	}
>>   
>> +	if (xdp_state_or & ICSSG_XDP_REDIR)
>> +		xdp_do_flush();
>> +
>>   	if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) {
>>   		if (unlikely(emac->rx_pace_timeout_ns)) {
>>   			hrtimer_start(&emac->rx_hrtimer,
>> diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
>> index 3ff8c322f9d9..1acbf9e1bade 100644
>> --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c
>> +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
>> @@ -559,6 +559,33 @@ const struct icss_iep_clockops prueth_iep_clockops = {
>>   	.perout_enable = prueth_perout_enable,
>>   };
>>   
>> +static int prueth_create_xdp_rxqs(struct prueth_emac *emac)
>> +{
>> +	struct xdp_rxq_info *rxq = &emac->rx_chns.xdp_rxq;
>> +	struct page_pool *pool = emac->rx_chns.pg_pool;
>> +	int ret;
>> +
>> +	ret = xdp_rxq_info_reg(rxq, emac->ndev, 0, emac->napi_rx.napi_id);
>> +	if (ret)
>> +		return ret;
>> +
>> +	ret = xdp_rxq_info_reg_mem_model(rxq, MEM_TYPE_PAGE_POOL, pool);
>> +	if (ret)
>> +		xdp_rxq_info_unreg(rxq);
>> +
>> +	return ret;
>> +}
>> +
>> +static void prueth_destroy_xdp_rxqs(struct prueth_emac *emac)
>> +{
>> +	struct xdp_rxq_info *rxq = &emac->rx_chns.xdp_rxq;
>> +
>> +	if (!xdp_rxq_info_is_reg(rxq))
>> +		return;
>> +
>> +	xdp_rxq_info_unreg(rxq);
>> +}
>> +
>>   static int icssg_prueth_add_mcast(struct net_device *ndev, const u8 *addr)
>>   {
>>   	struct net_device *real_dev;
>> @@ -780,10 +807,14 @@ static int emac_ndo_open(struct net_device *ndev)
>>   	if (ret)
>>   		goto free_tx_ts_irq;
>>   
>> -	ret = k3_udma_glue_enable_rx_chn(emac->rx_chns.rx_chn);
>> +	ret = prueth_create_xdp_rxqs(emac);
>>   	if (ret)
>>   		goto reset_rx_chn;
>>   
>> +	ret = k3_udma_glue_enable_rx_chn(emac->rx_chns.rx_chn);
>> +	if (ret)
>> +		goto destroy_xdp_rxqs;
>> +
>>   	for (i = 0; i < emac->tx_ch_num; i++) {
>>   		ret = k3_udma_glue_enable_tx_chn(emac->tx_chns[i].tx_chn);
>>   		if (ret)
>> @@ -809,6 +840,8 @@ static int emac_ndo_open(struct net_device *ndev)
>>   	 * any SKB for completion. So set false to free_skb
>>   	 */
>>   	prueth_reset_tx_chan(emac, i, false);
>> +destroy_xdp_rxqs:
>> +	prueth_destroy_xdp_rxqs(emac);
>>   reset_rx_chn:
>>   	prueth_reset_rx_chan(&emac->rx_chns, max_rx_flows, false);
>>   free_tx_ts_irq:
>> @@ -879,7 +912,7 @@ static int emac_ndo_stop(struct net_device *ndev)
>>   	k3_udma_glue_tdown_rx_chn(emac->rx_chns.rx_chn, true);
>>   
>>   	prueth_reset_rx_chan(&emac->rx_chns, max_rx_flows, true);
>> -
>> +	prueth_destroy_xdp_rxqs(emac);
>>   	napi_disable(&emac->napi_rx);
>>   	hrtimer_cancel(&emac->rx_hrtimer);
>>   
>> @@ -1024,6 +1057,90 @@ static int emac_ndo_vlan_rx_del_vid(struct net_device *ndev,
>>   	return 0;
>>   }
>>   
>> +/**
>> + * emac_xdp_xmit - Implements ndo_xdp_xmit
>> + * @dev: netdev
>> + * @n: number of frames
>> + * @frames: array of XDP buffer pointers
>> + * @flags: XDP extra info
>> + *
>> + * Return: number of frames successfully sent. Failed frames
>> + * will be free'ed by XDP core.
>> + *
>> + * For error cases, a negative errno code is returned and no-frames
>> + * are transmitted (caller must handle freeing frames).
>> + **/
>> +static int emac_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
>> +			 u32 flags)
>> +{
>> +	struct prueth_emac *emac = netdev_priv(dev);
>> +	unsigned int q_idx;
>> +	int nxmit = 0;
>> +	int i;
>> +
>> +	q_idx = smp_processor_id() % emac->tx_ch_num;
>> +
>> +	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
>> +		return -EINVAL;
>> +
>> +	for (i = 0; i < n; i++) {
>> +		struct xdp_frame *xdpf = frames[i];
>> +		int err;
>> +
>> +		err = emac_xmit_xdp_frame(emac, xdpf, NULL, q_idx);
>> +		if (err != ICSSG_XDP_TX)
>> +			break;
>> +		nxmit++;
>> +	}
>> +
>> +	return nxmit;
>> +}
>> +
>> +/**
>> + * emac_xdp_setup - add/remove an XDP program
>> + * @emac: emac device
>> + * @bpf: XDP program
>> + *
>> + * Return: Always 0 (Success)
>> + **/
>> +static int emac_xdp_setup(struct prueth_emac *emac, struct netdev_bpf *bpf)
>> +{
>> +	struct bpf_prog *prog = bpf->prog;
>> +	xdp_features_t val;
>> +
>> +	val = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
>> +	      NETDEV_XDP_ACT_NDO_XMIT;
>> +	xdp_set_features_flag(emac->ndev, val);
>> +
>> +	if (!emac->xdpi.prog && !prog)
>> +		return 0;
>> +
>> +	WRITE_ONCE(emac->xdp_prog, prog);
>> +
>> +	xdp_attachment_setup(&emac->xdpi, bpf);
>> +
>> +	return 0;
>> +}
>> +
>> +/**
>> + * emac_ndo_bpf - implements ndo_bpf for icssg_prueth
>> + * @ndev: network adapter device
>> + * @bpf: XDP program
>> + *
>> + * Return: 0 on success, error code on failure.
>> + **/
>> +static int emac_ndo_bpf(struct net_device *ndev, struct netdev_bpf *bpf)
>> +{
>> +	struct prueth_emac *emac = netdev_priv(ndev);
>> +
>> +	switch (bpf->command) {
>> +	case XDP_SETUP_PROG:
>> +		return emac_xdp_setup(emac, bpf);
>> +	default:
>> +		return -EINVAL;
>> +	}
>> +}
>> +
>>   static const struct net_device_ops emac_netdev_ops = {
>>   	.ndo_open = emac_ndo_open,
>>   	.ndo_stop = emac_ndo_stop,
>> @@ -1038,6 +1155,8 @@ static const struct net_device_ops emac_netdev_ops = {
>>   	.ndo_fix_features = emac_ndo_fix_features,
>>   	.ndo_vlan_rx_add_vid = emac_ndo_vlan_rx_add_vid,
>>   	.ndo_vlan_rx_kill_vid = emac_ndo_vlan_rx_del_vid,
>> +	.ndo_bpf = emac_ndo_bpf,
>> +	.ndo_xdp_xmit = emac_xdp_xmit,
>>   };
>>   
>>   static int prueth_netdev_init(struct prueth *prueth,
>> @@ -1066,6 +1185,8 @@ static int prueth_netdev_init(struct prueth *prueth,
>>   	emac->prueth = prueth;
>>   	emac->ndev = ndev;
>>   	emac->port_id = port;
>> +	emac->xdp_prog = NULL;
>> +	emac->ndev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
>>   	emac->cmd_wq = create_singlethread_workqueue("icssg_cmd_wq");
>>   	if (!emac->cmd_wq) {
>>   		ret = -ENOMEM;
>> diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h
>> index 3bbabd007129..0675919beb94 100644
>> --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h
>> +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h
>> @@ -8,6 +8,8 @@
>>   #ifndef __NET_TI_ICSSG_PRUETH_H
>>   #define __NET_TI_ICSSG_PRUETH_H
>>   
>> +#include <linux/bpf.h>
>> +#include <linux/bpf_trace.h>
>>   #include <linux/etherdevice.h>
>>   #include <linux/genalloc.h>
>>   #include <linux/if_vlan.h>
>> @@ -134,6 +136,7 @@ struct prueth_rx_chn {
>>   	unsigned int irq[ICSSG_MAX_RFLOWS];	/* separate irq per flow */
>>   	char name[32];
>>   	struct page_pool *pg_pool;
>> +	struct xdp_rxq_info xdp_rxq;
>>   };
>>   
>>   enum prueth_swdata_type {
>> @@ -141,6 +144,7 @@ enum prueth_swdata_type {
>>   	PRUETH_SWDATA_SKB,
>>   	PRUETH_SWDATA_PAGE,
>>   	PRUETH_SWDATA_CMD,
>> +	PRUETH_SWDATA_XDPF,
>>   };
>>   
>>   struct prueth_swdata {
>> @@ -149,6 +153,7 @@ struct prueth_swdata {
>>   		struct sk_buff *skb;
>>   		struct page *page;
>>   		u32 cmd;
>> +		struct xdp_frame *xdpf;
>>   	} data;
>>   };
>>   
>> @@ -159,6 +164,12 @@ struct prueth_swdata {
>>   
>>   #define PRUETH_MAX_TX_TS_REQUESTS	50 /* Max simultaneous TX_TS requests */
>>   
>> +/* XDP BPF state */
>> +#define ICSSG_XDP_PASS           0
>> +#define ICSSG_XDP_CONSUMED       BIT(0)
>> +#define ICSSG_XDP_TX             BIT(1)
>> +#define ICSSG_XDP_REDIR          BIT(2)
>> +
>>   /* Minimum coalesce time in usecs for both Tx and Rx */
>>   #define ICSSG_MIN_COALESCE_USECS 20
>>   
>> @@ -227,6 +238,8 @@ struct prueth_emac {
>>   	unsigned long rx_pace_timeout_ns;
>>   
>>   	struct netdev_hw_addr_list vlan_mcast_list[MAX_VLAN_ID];
>> +	struct bpf_prog *xdp_prog;
>> +	struct xdp_attachment_info xdpi;
>>   };
>>   
>>   /* The buf includes headroom compatible with both skb and xdpf */
>> @@ -465,5 +478,9 @@ void prueth_put_cores(struct prueth *prueth, int slice);
>>   
>>   /* Revision specific helper */
>>   u64 icssg_ts_to_ns(u32 hi_sw, u32 hi, u32 lo, u32 cycle_time_ns);
>> +int emac_xmit_xdp_frame(struct prueth_emac *emac,
>> +			struct xdp_frame *xdpf,
>> +			struct page *page,
>> +			unsigned int q_idx);
>>   
>>   #endif /* __NET_TI_ICSSG_PRUETH_H */
>
Re: [PATCH net-next v3 3/3] net: ti: icssg-prueth: Add XDP support
Posted by Dan Carpenter 11 months, 2 weeks ago
On Mon, Feb 24, 2025 at 04:31:02PM +0530, Meghana Malladi wrote:
> @@ -541,7 +558,153 @@ void emac_rx_timestamp(struct prueth_emac *emac,
>  	ssh->hwtstamp = ns_to_ktime(ns);
>  }
>  
> -static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
> +/**
> + * emac_xmit_xdp_frame - transmits an XDP frame
> + * @emac: emac device
> + * @xdpf: data to transmit
> + * @page: page from page pool if already DMA mapped
> + * @q_idx: queue id
> + *
> + * Return: XDP state
> + */
> +int emac_xmit_xdp_frame(struct prueth_emac *emac,
> +			struct xdp_frame *xdpf,
> +			struct page *page,
> +			unsigned int q_idx)
> +{
> +	struct cppi5_host_desc_t *first_desc;
> +	struct net_device *ndev = emac->ndev;
> +	struct prueth_tx_chn *tx_chn;
> +	dma_addr_t desc_dma, buf_dma;
> +	struct prueth_swdata *swdata;
> +	u32 *epib;
> +	int ret;
> +
> +	void *data = xdpf->data;
> +	u32 pkt_len = xdpf->len;

Get rid of these variables?

> +
> +	if (q_idx >= PRUETH_MAX_TX_QUEUES) {
> +		netdev_err(ndev, "xdp tx: invalid q_id %d\n", q_idx);
> +		return ICSSG_XDP_CONSUMED;	/* drop */
> +	}
> +
> +	tx_chn = &emac->tx_chns[q_idx];
> +
> +	first_desc = k3_cppi_desc_pool_alloc(tx_chn->desc_pool);
> +	if (!first_desc) {
> +		netdev_dbg(ndev, "xdp tx: failed to allocate descriptor\n");
> +		goto drop_free_descs;	/* drop */
> +	}
> +
> +	if (page) { /* already DMA mapped by page_pool */
> +		buf_dma = page_pool_get_dma_addr(page);
> +		buf_dma += xdpf->headroom + sizeof(struct xdp_frame);
> +	} else { /* Map the linear buffer */
> +		buf_dma = dma_map_single(tx_chn->dma_dev, data, pkt_len, DMA_TO_DEVICE);
> +		if (dma_mapping_error(tx_chn->dma_dev, buf_dma)) {
> +			netdev_err(ndev, "xdp tx: failed to map data buffer\n");
> +			goto drop_free_descs;	/* drop */
> +		}
> +	}
> +
> +	cppi5_hdesc_init(first_desc, CPPI5_INFO0_HDESC_EPIB_PRESENT,
> +			 PRUETH_NAV_PS_DATA_SIZE);
> +	cppi5_hdesc_set_pkttype(first_desc, 0);
> +	epib = first_desc->epib;
> +	epib[0] = 0;
> +	epib[1] = 0;
> +
> +	/* set dst tag to indicate internal qid at the firmware which is at
> +	 * bit8..bit15. bit0..bit7 indicates port num for directed
> +	 * packets in case of switch mode operation
> +	 */
> +	cppi5_desc_set_tags_ids(&first_desc->hdr, 0, (emac->port_id | (q_idx << 8)));
> +	k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &buf_dma);
> +	cppi5_hdesc_attach_buf(first_desc, buf_dma, pkt_len, buf_dma, pkt_len);
> +	swdata = cppi5_hdesc_get_swdata(first_desc);
> +	if (page) {
> +		swdata->type = PRUETH_SWDATA_PAGE;
> +		swdata->data.page = page;
> +	} else {
> +		swdata->type = PRUETH_SWDATA_XDPF;
> +		swdata->data.xdpf = xdpf;
> +	}
> +
> +	cppi5_hdesc_set_pktlen(first_desc, pkt_len);
> +	desc_dma = k3_cppi_desc_pool_virt2dma(tx_chn->desc_pool, first_desc);
> +
> +	ret = k3_udma_glue_push_tx_chn(tx_chn->tx_chn, first_desc, desc_dma);
> +	if (ret) {
> +		netdev_err(ndev, "xdp tx: push failed: %d\n", ret);
> +		goto drop_free_descs;
> +	}
> +
> +	return ICSSG_XDP_TX;
> +
> +drop_free_descs:
> +	prueth_xmit_free(tx_chn, first_desc);
> +	return ICSSG_XDP_CONSUMED;
> +}
> +EXPORT_SYMBOL_GPL(emac_xmit_xdp_frame);
> +
> +/**
> + * emac_run_xdp - run an XDP program
> + * @emac: emac device
> + * @xdp: XDP buffer containing the frame
> + * @page: page with RX data if already DMA mapped
> + *
> + * Return: XDP state
> + */
> +static int emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp,
> +			struct page *page)
> +{
> +	struct net_device *ndev = emac->ndev;
> +	int err, result = ICSSG_XDP_PASS;
> +	struct bpf_prog *xdp_prog;
> +	struct xdp_frame *xdpf;
> +	int q_idx;
> +	u32 act;
> +
> +	xdp_prog = READ_ONCE(emac->xdp_prog);
> +	act = bpf_prog_run_xdp(xdp_prog, xdp);
> +	switch (act) {
> +	case XDP_PASS:
> +		break;
> +	case XDP_TX:
> +		/* Send packet to TX ring for immediate transmission */
> +		xdpf = xdp_convert_buff_to_frame(xdp);
> +		if (unlikely(!xdpf))

This is the second unlikely() macro which is added in this patchset.
The rule with likely/unlikely() is that it should only be added if it
likely makes a difference in benchmarking.  Quite often the compiler
is able to predict that valid pointers are more likely than NULL
pointers so often these types of annotations don't make any difference
at all to the compiled code.  But it depends on the compiler and the -O2
options.

> +			goto drop;
> +
> +		q_idx = smp_processor_id() % emac->tx_ch_num;
> +		result = emac_xmit_xdp_frame(emac, xdpf, page, q_idx);
> +		if (result == ICSSG_XDP_CONSUMED)
> +			goto drop;
> +		break;
> +	case XDP_REDIRECT:
> +		err = xdp_do_redirect(emac->ndev, xdp, xdp_prog);
> +		if (err)
> +			goto drop;
> +		result = ICSSG_XDP_REDIR;
> +		break;
> +	default:
> +		bpf_warn_invalid_xdp_action(emac->ndev, xdp_prog, act);
> +		fallthrough;
> +	case XDP_ABORTED:
> +drop:
> +		trace_xdp_exception(emac->ndev, xdp_prog, act);
> +		fallthrough; /* handle aborts by dropping packet */
> +	case XDP_DROP:
> +		ndev->stats.tx_dropped++;
> +		result = ICSSG_XDP_CONSUMED;
> +		page_pool_recycle_direct(emac->rx_chns.pg_pool, page);
> +		break;
> +	}
> +
> +	return result;
> +}
> +
> +static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, int *xdp_state)


xdp_state should be a u32 because it's a bitfield.  Bitfields are never
signed.

>  {
>  	struct prueth_rx_chn *rx_chn = &emac->rx_chns;
>  	u32 buf_dma_len, pkt_len, port_id = 0;
> @@ -552,10 +715,12 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
>  	struct page *page, *new_page;
>  	struct page_pool *pool;
>  	struct sk_buff *skb;
> +	struct xdp_buff xdp;
>  	u32 *psdata;
>  	void *pa;
>  	int ret;
>  
> +	*xdp_state = 0;
>  	pool = rx_chn->pg_pool;
>  	ret = k3_udma_glue_pop_rx_chn(rx_chn->rx_chn, flow_id, &desc_dma);
>  	if (ret) {
> @@ -596,9 +761,21 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
>  		goto requeue;
>  	}
>  
> -	/* prepare skb and send to n/w stack */
>  	pa = page_address(page);
> -	skb = napi_build_skb(pa, PAGE_SIZE);
> +	if (emac->xdp_prog) {
> +		xdp_init_buff(&xdp, PAGE_SIZE, &rx_chn->xdp_rxq);
> +		xdp_prepare_buff(&xdp, pa, PRUETH_HEADROOM, pkt_len, false);
> +
> +		*xdp_state = emac_run_xdp(emac, &xdp, page);
> +		if (*xdp_state == ICSSG_XDP_PASS)
> +			skb = xdp_build_skb_from_buff(&xdp);
> +		else
> +			goto requeue;
> +	} else {
> +		/* prepare skb and send to n/w stack */
> +		skb = napi_build_skb(pa, PAGE_SIZE);
> +	}
> +
>  	if (!skb) {
>  		ndev->stats.rx_dropped++;
>  		page_pool_recycle_direct(pool, page);
> @@ -861,13 +1038,23 @@ static void prueth_tx_cleanup(void *data, dma_addr_t desc_dma)
>  	struct prueth_tx_chn *tx_chn = data;
>  	struct cppi5_host_desc_t *desc_tx;
>  	struct prueth_swdata *swdata;
> +	struct xdp_frame *xdpf;
>  	struct sk_buff *skb;
>  
>  	desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, desc_dma);
>  	swdata = cppi5_hdesc_get_swdata(desc_tx);
> -	if (swdata->type == PRUETH_SWDATA_SKB) {
> +
> +	switch (swdata->type) {
> +	case PRUETH_SWDATA_SKB:
>  		skb = swdata->data.skb;
>  		dev_kfree_skb_any(skb);
> +		break;
> +	case PRUETH_SWDATA_XDPF:
> +		xdpf = swdata->data.xdpf;
> +		xdp_return_frame(xdpf);
> +		break;
> +	default:
> +		break;
>  	}
>  
>  	prueth_xmit_free(tx_chn, desc_tx);
> @@ -904,15 +1091,18 @@ int icssg_napi_rx_poll(struct napi_struct *napi_rx, int budget)
>  		PRUETH_RX_FLOW_DATA_SR1 : PRUETH_RX_FLOW_DATA;
>  	int flow = emac->is_sr1 ?
>  		PRUETH_MAX_RX_FLOWS_SR1 : PRUETH_MAX_RX_FLOWS;
> +	int xdp_state_or = 0;
>  	int num_rx = 0;
>  	int cur_budget;
> +	int xdp_state;

Both xdp_state_or and xdp_state should be u32 because they are bitfields.

regards,
dan carpenter

>  	int ret;
>  
>  	while (flow--) {
>  		cur_budget = budget - num_rx;
>  
>  		while (cur_budget--) {
> -			ret = emac_rx_packet(emac, flow);
> +			ret = emac_rx_packet(emac, flow, &xdp_state);
> +			xdp_state_or |= xdp_state;
>  			if (ret)
>  				break;
>  			num_rx++;
Re: [EXTERNAL] Re: [PATCH net-next v3 3/3] net: ti: icssg-prueth: Add XDP support
Posted by Malladi, Meghana 11 months, 1 week ago

On 2/26/2025 4:19 PM, Dan Carpenter wrote:
> On Mon, Feb 24, 2025 at 04: 31: 02PM +0530, Meghana Malladi wrote: > @@ 
> -541,7 +558,153 @@ void emac_rx_timestamp(struct prueth_emac *emac, > 
> ssh->hwtstamp = ns_to_ktime(ns); > } > > -static int 
> emac_rx_packet(struct prueth_emac
> ZjQcmQRYFpfptBannerStart
> This message was sent from outside of Texas Instruments.
> Do not click links or open attachments unless you recognize the source 
> of this email and know the content is safe.
> Report Suspicious
> <https://us-phishalarm-ewt.proofpoint.com/EWT/v1/G3vK! 
> uldgnXdPPE27auXFPdnpH8jx2nnu2fsVXLMVOyEVUH9IX54g6v7RJRENIKzAm7XCuLfioMeFBSH4bAdUdQTaEArV63odoRERqTN_5Pk$>
> ZjQcmQRYFpfptBannerEnd
> 
> On Mon, Feb 24, 2025 at 04:31:02PM +0530, Meghana Malladi wrote:
>> @@ -541,7 +558,153 @@ void emac_rx_timestamp(struct prueth_emac *emac,
>>  	ssh->hwtstamp = ns_to_ktime(ns);
>>  }
>>  
>> -static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
>> +/**
>> + * emac_xmit_xdp_frame - transmits an XDP frame
>> + * @emac: emac device
>> + * @xdpf: data to transmit
>> + * @page: page from page pool if already DMA mapped
>> + * @q_idx: queue id
>> + *
>> + * Return: XDP state
>> + */
>> +int emac_xmit_xdp_frame(struct prueth_emac *emac,
>> +			struct xdp_frame *xdpf,
>> +			struct page *page,
>> +			unsigned int q_idx)
>> +{
>> +	struct cppi5_host_desc_t *first_desc;
>> +	struct net_device *ndev = emac->ndev;
>> +	struct prueth_tx_chn *tx_chn;
>> +	dma_addr_t desc_dma, buf_dma;
>> +	struct prueth_swdata *swdata;
>> +	u32 *epib;
>> +	int ret;
>> +
>> +	void *data = xdpf->data;
>> +	u32 pkt_len = xdpf->len;
> 
> Get rid of these variables?
> 

Yeah ok

>> +
>> +	if (q_idx >= PRUETH_MAX_TX_QUEUES) {
>> +		netdev_err(ndev, "xdp tx: invalid q_id %d\n", q_idx);
>> +		return ICSSG_XDP_CONSUMED;	/* drop */
>> +	}
>> +
>> +	tx_chn = &emac->tx_chns[q_idx];
>> +
>> +	first_desc = k3_cppi_desc_pool_alloc(tx_chn->desc_pool);
>> +	if (!first_desc) {
>> +		netdev_dbg(ndev, "xdp tx: failed to allocate descriptor\n");
>> +		goto drop_free_descs;	/* drop */
>> +	}
>> +
>> +	if (page) { /* already DMA mapped by page_pool */
>> +		buf_dma = page_pool_get_dma_addr(page);
>> +		buf_dma += xdpf->headroom + sizeof(struct xdp_frame);
>> +	} else { /* Map the linear buffer */
>> +		buf_dma = dma_map_single(tx_chn->dma_dev, data, pkt_len, DMA_TO_DEVICE);
>> +		if (dma_mapping_error(tx_chn->dma_dev, buf_dma)) {
>> +			netdev_err(ndev, "xdp tx: failed to map data buffer\n");
>> +			goto drop_free_descs;	/* drop */
>> +		}
>> +	}
>> +
>> +	cppi5_hdesc_init(first_desc, CPPI5_INFO0_HDESC_EPIB_PRESENT,
>> +			 PRUETH_NAV_PS_DATA_SIZE);
>> +	cppi5_hdesc_set_pkttype(first_desc, 0);
>> +	epib = first_desc->epib;
>> +	epib[0] = 0;
>> +	epib[1] = 0;
>> +
>> +	/* set dst tag to indicate internal qid at the firmware which is at
>> +	 * bit8..bit15. bit0..bit7 indicates port num for directed
>> +	 * packets in case of switch mode operation
>> +	 */
>> +	cppi5_desc_set_tags_ids(&first_desc->hdr, 0, (emac->port_id | (q_idx << 8)));
>> +	k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &buf_dma);
>> +	cppi5_hdesc_attach_buf(first_desc, buf_dma, pkt_len, buf_dma, pkt_len);
>> +	swdata = cppi5_hdesc_get_swdata(first_desc);
>> +	if (page) {
>> +		swdata->type = PRUETH_SWDATA_PAGE;
>> +		swdata->data.page = page;
>> +	} else {
>> +		swdata->type = PRUETH_SWDATA_XDPF;
>> +		swdata->data.xdpf = xdpf;
>> +	}
>> +
>> +	cppi5_hdesc_set_pktlen(first_desc, pkt_len);
>> +	desc_dma = k3_cppi_desc_pool_virt2dma(tx_chn->desc_pool, first_desc);
>> +
>> +	ret = k3_udma_glue_push_tx_chn(tx_chn->tx_chn, first_desc, desc_dma);
>> +	if (ret) {
>> +		netdev_err(ndev, "xdp tx: push failed: %d\n", ret);
>> +		goto drop_free_descs;
>> +	}
>> +
>> +	return ICSSG_XDP_TX;
>> +
>> +drop_free_descs:
>> +	prueth_xmit_free(tx_chn, first_desc);
>> +	return ICSSG_XDP_CONSUMED;
>> +}
>> +EXPORT_SYMBOL_GPL(emac_xmit_xdp_frame);
>> +
>> +/**
>> + * emac_run_xdp - run an XDP program
>> + * @emac: emac device
>> + * @xdp: XDP buffer containing the frame
>> + * @page: page with RX data if already DMA mapped
>> + *
>> + * Return: XDP state
>> + */
>> +static int emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp,
>> +			struct page *page)
>> +{
>> +	struct net_device *ndev = emac->ndev;
>> +	int err, result = ICSSG_XDP_PASS;
>> +	struct bpf_prog *xdp_prog;
>> +	struct xdp_frame *xdpf;
>> +	int q_idx;
>> +	u32 act;
>> +
>> +	xdp_prog = READ_ONCE(emac->xdp_prog);
>> +	act = bpf_prog_run_xdp(xdp_prog, xdp);
>> +	switch (act) {
>> +	case XDP_PASS:
>> +		break;
>> +	case XDP_TX:
>> +		/* Send packet to TX ring for immediate transmission */
>> +		xdpf = xdp_convert_buff_to_frame(xdp);
>> +		if (unlikely(!xdpf))
> 
> This is the second unlikely() macro which is added in this patchset.
> The rule with likely/unlikely() is that it should only be added if it
> likely makes a difference in benchmarking.  Quite often the compiler
> is able to predict that valid pointers are more likely than NULL
> pointers so often these types of annotations don't make any difference
> at all to the compiled code.  But it depends on the compiler and the -O2
> options.
> 

Do correct me if I am wrong, but from my understanding, XDP feature 
depends alot of performance and benchmarking and having unlikely does 
make a difference. Atleast in all the other drivers I see this being 
used for XDP.

>> +			goto drop;
>> +
>> +		q_idx = smp_processor_id() % emac->tx_ch_num;
>> +		result = emac_xmit_xdp_frame(emac, xdpf, page, q_idx);
>> +		if (result == ICSSG_XDP_CONSUMED)
>> +			goto drop;
>> +		break;
>> +	case XDP_REDIRECT:
>> +		err = xdp_do_redirect(emac->ndev, xdp, xdp_prog);
>> +		if (err)
>> +			goto drop;
>> +		result = ICSSG_XDP_REDIR;
>> +		break;
>> +	default:
>> +		bpf_warn_invalid_xdp_action(emac->ndev, xdp_prog, act);
>> +		fallthrough;
>> +	case XDP_ABORTED:
>> +drop:
>> +		trace_xdp_exception(emac->ndev, xdp_prog, act);
>> +		fallthrough; /* handle aborts by dropping packet */
>> +	case XDP_DROP:
>> +		ndev->stats.tx_dropped++;
>> +		result = ICSSG_XDP_CONSUMED;
>> +		page_pool_recycle_direct(emac->rx_chns.pg_pool, page);
>> +		break;
>> +	}
>> +
>> +	return result;
>> +}
>> +
>> +static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, int *xdp_state)
> 
> 
> xdp_state should be a u32 because it's a bitfield.  Bitfields are never
> signed.

Ok I will update it.

> 
>>  {
>>  	struct prueth_rx_chn *rx_chn = &emac->rx_chns;
>>  	u32 buf_dma_len, pkt_len, port_id = 0;
>> @@ -552,10 +715,12 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
>>  	struct page *page, *new_page;
>>  	struct page_pool *pool;
>>  	struct sk_buff *skb;
>> +	struct xdp_buff xdp;
>>  	u32 *psdata;
>>  	void *pa;
>>  	int ret;
>>  
>> +	*xdp_state = 0;
>>  	pool = rx_chn->pg_pool;
>>  	ret = k3_udma_glue_pop_rx_chn(rx_chn->rx_chn, flow_id, &desc_dma);
>>  	if (ret) {
>> @@ -596,9 +761,21 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
>>  		goto requeue;
>>  	}
>>  
>> -	/* prepare skb and send to n/w stack */
>>  	pa = page_address(page);
>> -	skb = napi_build_skb(pa, PAGE_SIZE);
>> +	if (emac->xdp_prog) {
>> +		xdp_init_buff(&xdp, PAGE_SIZE, &rx_chn->xdp_rxq);
>> +		xdp_prepare_buff(&xdp, pa, PRUETH_HEADROOM, pkt_len, false);
>> +
>> +		*xdp_state = emac_run_xdp(emac, &xdp, page);
>> +		if (*xdp_state == ICSSG_XDP_PASS)
>> +			skb = xdp_build_skb_from_buff(&xdp);
>> +		else
>> +			goto requeue;
>> +	} else {
>> +		/* prepare skb and send to n/w stack */
>> +		skb = napi_build_skb(pa, PAGE_SIZE);
>> +	}
>> +
>>  	if (!skb) {
>>  		ndev->stats.rx_dropped++;
>>  		page_pool_recycle_direct(pool, page);
>> @@ -861,13 +1038,23 @@ static void prueth_tx_cleanup(void *data, dma_addr_t desc_dma)
>>  	struct prueth_tx_chn *tx_chn = data;
>>  	struct cppi5_host_desc_t *desc_tx;
>>  	struct prueth_swdata *swdata;
>> +	struct xdp_frame *xdpf;
>>  	struct sk_buff *skb;
>>  
>>  	desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, desc_dma);
>>  	swdata = cppi5_hdesc_get_swdata(desc_tx);
>> -	if (swdata->type == PRUETH_SWDATA_SKB) {
>> +
>> +	switch (swdata->type) {
>> +	case PRUETH_SWDATA_SKB:
>>  		skb = swdata->data.skb;
>>  		dev_kfree_skb_any(skb);
>> +		break;
>> +	case PRUETH_SWDATA_XDPF:
>> +		xdpf = swdata->data.xdpf;
>> +		xdp_return_frame(xdpf);
>> +		break;
>> +	default:
>> +		break;
>>  	}
>>  
>>  	prueth_xmit_free(tx_chn, desc_tx);
>> @@ -904,15 +1091,18 @@ int icssg_napi_rx_poll(struct napi_struct *napi_rx, int budget)
>>  		PRUETH_RX_FLOW_DATA_SR1 : PRUETH_RX_FLOW_DATA;
>>  	int flow = emac->is_sr1 ?
>>  		PRUETH_MAX_RX_FLOWS_SR1 : PRUETH_MAX_RX_FLOWS;
>> +	int xdp_state_or = 0;
>>  	int num_rx = 0;
>>  	int cur_budget;
>> +	int xdp_state;
> 
> Both xdp_state_or and xdp_state should be u32 because they are bitfields.
> 

Ok I will update it.

> regards,
> dan carpenter
> 
>>  	int ret;
>>  
>>  	while (flow--) {
>>  		cur_budget = budget - num_rx;
>>  
>>  		while (cur_budget--) {
>> -			ret = emac_rx_packet(emac, flow);
>> +			ret = emac_rx_packet(emac, flow, &xdp_state);
>> +			xdp_state_or |= xdp_state;
>>  			if (ret)
>>  				break;
>>  			num_rx++;
> 

Re: [EXTERNAL] Re: [PATCH net-next v3 3/3] net: ti: icssg-prueth: Add XDP support
Posted by Dan Carpenter 11 months, 1 week ago
On Mon, Mar 03, 2025 at 05:36:41PM +0530, Malladi, Meghana wrote:
> > > +static int emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp,
> > > +			struct page *page)
> > > +{
> > > +	struct net_device *ndev = emac->ndev;
> > > +	int err, result = ICSSG_XDP_PASS;
> > > +	struct bpf_prog *xdp_prog;
> > > +	struct xdp_frame *xdpf;
> > > +	int q_idx;
> > > +	u32 act;
> > > +
> > > +	xdp_prog = READ_ONCE(emac->xdp_prog);
> > > +	act = bpf_prog_run_xdp(xdp_prog, xdp);
> > > +	switch (act) {
> > > +	case XDP_PASS:
> > > +		break;
> > > +	case XDP_TX:
> > > +		/* Send packet to TX ring for immediate transmission */
> > > +		xdpf = xdp_convert_buff_to_frame(xdp);
> > > +		if (unlikely(!xdpf))
> > 
> > This is the second unlikely() macro which is added in this patchset.
> > The rule with likely/unlikely() is that it should only be added if it
> > likely makes a difference in benchmarking.  Quite often the compiler
> > is able to predict that valid pointers are more likely than NULL
> > pointers so often these types of annotations don't make any difference
> > at all to the compiled code.  But it depends on the compiler and the -O2
> > options.
> > 
> 
> Do correct me if I am wrong, but from my understanding, XDP feature depends
> alot of performance and benchmarking and having unlikely does make a
> difference. Atleast in all the other drivers I see this being used for XDP.
> 

Which compiler are you on when you say that "having unlikely does make a
difference"?

I'm on gcc version 14.2.0 (Debian 14.2.0-16) and it doesn't make a
difference to the compiled code.  This matches what one would expect from
a compiler.  Valid pointers are fast path and NULL pointers are slow path.

Adding an unlikely() is a micro optimization.  There are so many other
things you can do to speed up the code.  I wouldn't start with that.

regards,
dan
Re: [EXTERNAL] Re: [PATCH net-next v3 3/3] net: ti: icssg-prueth: Add XDP support
Posted by Malladi, Meghana 11 months, 1 week ago

On 3/3/2025 6:01 PM, Dan Carpenter wrote:
> On Mon, Mar 03, 2025 at 05: 36: 41PM +0530, Malladi, Meghana wrote: > > 
>  > +static int emac_run_xdp(struct prueth_emac *emac, struct xdp_buff 
> *xdp, > > > + struct page *page) > > > +{ > > > + struct net_device
> ZjQcmQRYFpfptBannerStart
> This message was sent from outside of Texas Instruments.
> Do not click links or open attachments unless you recognize the source 
> of this email and know the content is safe.
> Report Suspicious
> <https://us-phishalarm-ewt.proofpoint.com/EWT/v1/G3vK! 
> uldqV3eFFkc7oMXFHHkDX4AFLVsE3ldskf6bPMMFmxDOsNtMfZjUscGelUkBFpAeybNre38L_c2LiiUb7AZxLvAeqSk9ifgbPE1AYFU$>
> ZjQcmQRYFpfptBannerEnd
> 
> On Mon, Mar 03, 2025 at 05:36:41PM +0530, Malladi, Meghana wrote:
>> > > +static int emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp,
>> > > +			struct page *page)
>> > > +{
>> > > +	struct net_device *ndev = emac->ndev;
>> > > +	int err, result = ICSSG_XDP_PASS;
>> > > +	struct bpf_prog *xdp_prog;
>> > > +	struct xdp_frame *xdpf;
>> > > +	int q_idx;
>> > > +	u32 act;
>> > > +
>> > > +	xdp_prog = READ_ONCE(emac->xdp_prog);
>> > > +	act = bpf_prog_run_xdp(xdp_prog, xdp);
>> > > +	switch (act) {
>> > > +	case XDP_PASS:
>> > > +		break;
>> > > +	case XDP_TX:
>> > > +		/* Send packet to TX ring for immediate transmission */
>> > > +		xdpf = xdp_convert_buff_to_frame(xdp);
>> > > +		if (unlikely(!xdpf))
>> > 
>> > This is the second unlikely() macro which is added in this patchset.
>> > The rule with likely/unlikely() is that it should only be added if it
>> > likely makes a difference in benchmarking.  Quite often the compiler
>> > is able to predict that valid pointers are more likely than NULL
>> > pointers so often these types of annotations don't make any difference
>> > at all to the compiled code.  But it depends on the compiler and the -O2
>> > options.
>> > 
>> 
>> Do correct me if I am wrong, but from my understanding, XDP feature depends
>> alot of performance and benchmarking and having unlikely does make a
>> difference. Atleast in all the other drivers I see this being used for XDP.
>> 
> 
> Which compiler are you on when you say that "having unlikely does make a
> difference"?

I'm on gcc version 10.3.1.

> 
> I'm on gcc version 14.2.0 (Debian 14.2.0-16) and it doesn't make a
> difference to the compiled code.  This matches what one would expect from
> a compiler.  Valid pointers are fast path and NULL pointers are slow path.
> 

Can you tell me how did you verify this? I actually don't know what 
level of optimization to expect from a compiler. I said so, because I 
have checked with other drivers which implemented XDP and everywhere 
unlikely is used. But now I understand its not the driver but the 
compiler that plays the major role in defining the optimization.

> Adding an unlikely() is a micro optimization.  There are so many other
> things you can do to speed up the code.  I wouldn't start with that.
>

Ok, if you believe that unlikely is doing more harm than good, I am ok 
with dropping them off.

> regards,
> dan
> 

Re: [EXTERNAL] Re: [PATCH net-next v3 3/3] net: ti: icssg-prueth: Add XDP support
Posted by Dan Carpenter 11 months, 1 week ago
What I mean is just compile the .o file with and without the unlikely().

$ md5sum drivers/net/ethernet/ti/icssg/icssg_common.o*
2de875935222b9ecd8483e61848c4fc9  drivers/net/ethernet/ti/icssg/icssg_common.o.annotation
2de875935222b9ecd8483e61848c4fc9  drivers/net/ethernet/ti/icssg/icssg_common.o.no_anotation

Generally the rule is that you should leave likely/unlikely() annotations
out unless it's going to make a difference on a benchmark.  I'm not going
to jump down people's throat about this, and if you want to leave it,
it's fine.  But it just struct me as weird so that's why I commented on
it.

regards,
dan carpenter

diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c
index 34d16e00c2ec..3db5bae44e61 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_common.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_common.c
@@ -672,7 +672,7 @@ static int emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp,
 	case XDP_TX:
 		/* Send packet to TX ring for immediate transmission */
 		xdpf = xdp_convert_buff_to_frame(xdp);
-		if (unlikely(!xdpf))
+		if (!xdpf)
 			goto drop;
 
 		q_idx = smp_processor_id() % emac->tx_ch_num;
Re: [EXTERNAL] Re: [PATCH net-next v3 3/3] net: ti: icssg-prueth: Add XDP support
Posted by Malladi, Meghana 11 months, 1 week ago
Hi Dan,

On 3/3/2025 7:38 PM, Dan Carpenter wrote:
> What I mean is just compile the .o file with and without the unlikely(). 
> $ md5sum drivers/net/ethernet/ti/icssg/icssg_common. o* 
> 2de875935222b9ecd8483e61848c4fc9 drivers/net/ethernet/ti/icssg/ 
> icssg_common. o. annotation 2de875935222b9ecd8483e61848c4fc9
> ZjQcmQRYFpfptBannerStart
> This message was sent from outside of Texas Instruments.
> Do not click links or open attachments unless you recognize the source 
> of this email and know the content is safe.
> Report Suspicious
> <https://us-phishalarm-ewt.proofpoint.com/EWT/v1/G3vK! 
> uldq3TevVoc7KuXEXHnDf- 
> TXtuZ0bON9iO0jTE7PyIS1jjfs_CzpvIiMi93PVt0MVDzjHGQSK__vY_-6rO7q86rFmBMGW4SSqK5pvNE$>
> ZjQcmQRYFpfptBannerEnd
> 
> What I mean is just compile the .o file with and without the unlikely().
> 
> $ md5sum drivers/net/ethernet/ti/icssg/icssg_common.o*
> 2de875935222b9ecd8483e61848c4fc9  drivers/net/ethernet/ti/icssg/icssg_common.o.annotation
> 2de875935222b9ecd8483e61848c4fc9  drivers/net/ethernet/ti/icssg/icssg_common.o.no_anotation
> 
> Generally the rule is that you should leave likely/unlikely() annotations
> out unless it's going to make a difference on a benchmark.  I'm not going
> to jump down people's throat about this, and if you want to leave it,
> it's fine.  But it just struct me as weird so that's why I commented on
> it.
> 

I have done some performance tests to see if unlikely() is gonna make 
any impact and I see around ~9000 pps and 6Mbps drop without unlikely() 
for small packet sizes (60 Bytes)

You can see summary of the tests here:

packet size   with unlikely(pps)  without unlikely(pps)   regression

       60        462377                453251                 9126

       80        403020                399372                 3648

       96        402059                396881                 5178

      120        392725                391312                 4413

      140        327706                327099                 607

packet size  with unlikely(Mbps)  without unlikely(Mbps)  regression

      60         311                   305                    6

      80         335                   332                    3

      96         386                   381                    5

      120        456                   451                    5

      140        430                   429                    1

For more details on the logs, please 
refer:https://gist.github.com/MeghanaMalladiTI/cc6cc7709791376cb486eb1222de67be

Regards,
Meghana Malladi

> regards,
> dan carpenter
> 
> diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c
> index 34d16e00c2ec..3db5bae44e61 100644
> --- a/drivers/net/ethernet/ti/icssg/icssg_common.c
> +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c
> @@ -672,7 +672,7 @@ static int emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp,
>   	case XDP_TX:
>   		/* Send packet to TX ring for immediate transmission */
>   		xdpf = xdp_convert_buff_to_frame(xdp);
> -		if (unlikely(!xdpf))
> +		if (!xdpf)
>   			goto drop;
>   
>   		q_idx = smp_processor_id() % emac->tx_ch_num;
> 

Re: [EXTERNAL] Re: [PATCH net-next v3 3/3] net: ti: icssg-prueth: Add XDP support
Posted by Dan Carpenter 11 months, 1 week ago
On Wed, Mar 05, 2025 at 02:53:07PM +0530, Malladi, Meghana wrote:
> Hi Dan,
> 
> On 3/3/2025 7:38 PM, Dan Carpenter wrote:
> > What I mean is just compile the .o file with and without the unlikely().
> > $ md5sum drivers/net/ethernet/ti/icssg/icssg_common. o*
> > 2de875935222b9ecd8483e61848c4fc9 drivers/net/ethernet/ti/icssg/
> > icssg_common. o. annotation 2de875935222b9ecd8483e61848c4fc9
> > ZjQcmQRYFpfptBannerStart
> > This message was sent from outside of Texas Instruments.
> > Do not click links or open attachments unless you recognize the source
> > of this email and know the content is safe.
> > Report Suspicious
> > <https://us-phishalarm-ewt.proofpoint.com/EWT/v1/G3vK!
> > uldq3TevVoc7KuXEXHnDf- TXtuZ0bON9iO0jTE7PyIS1jjfs_CzpvIiMi93PVt0MVDzjHGQSK__vY_-6rO7q86rFmBMGW4SSqK5pvNE$>
> > ZjQcmQRYFpfptBannerEnd
> > 
> > What I mean is just compile the .o file with and without the unlikely().
> > 
> > $ md5sum drivers/net/ethernet/ti/icssg/icssg_common.o*
> > 2de875935222b9ecd8483e61848c4fc9  drivers/net/ethernet/ti/icssg/icssg_common.o.annotation
> > 2de875935222b9ecd8483e61848c4fc9  drivers/net/ethernet/ti/icssg/icssg_common.o.no_anotation
> > 
> > Generally the rule is that you should leave likely/unlikely() annotations
> > out unless it's going to make a difference on a benchmark.  I'm not going
> > to jump down people's throat about this, and if you want to leave it,
> > it's fine.  But it just struct me as weird so that's why I commented on
> > it.
> > 
> 
> I have done some performance tests to see if unlikely() is gonna make any
> impact and I see around ~9000 pps and 6Mbps drop without unlikely() for
> small packet sizes (60 Bytes)
> 
> You can see summary of the tests here:
> 
> packet size   with unlikely(pps)  without unlikely(pps)   regression
> 
>       60        462377                453251                 9126
> 
>       80        403020                399372                 3648
> 
>       96        402059                396881                 5178
> 
>      120        392725                391312                 4413
> 
>      140        327706                327099                 607
> 
> packet size  with unlikely(Mbps)  without unlikely(Mbps)  regression
> 
>      60         311                   305                    6
> 
>      80         335                   332                    3
> 
>      96         386                   381                    5
> 
>      120        456                   451                    5
> 
>      140        430                   429                    1
> 
> For more details on the logs, please refer:https://gist.github.com/MeghanaMalladiTI/cc6cc7709791376cb486eb1222de67be
> 

Huh.  That's very interesting.  Fine, then.

regards,
dan carpenter