From: Roger Quadros <rogerq@kernel.org>
Add native XDP support. We do not support zero copy yet.
Signed-off-by: Roger Quadros <rogerq@kernel.org>
Signed-off-by: MD Danish Anwar <danishanwar@ti.com>
Signed-off-by: Meghana Malladi <m-malladi@ti.com>
---
Changes since v2 (v3-v2):
- Use page_pool contained in the page instead of using passing page_pool
(rx_chn) as part of swdata
- dev_sw_netstats_tx_add() instead of incrementing the stats directly
- Add missing ndev->stats.tx_dropped++ wherever applicable
- Move k3_cppi_desc_pool_alloc() before the DMA mapping for easier cleanup
on failure
- Replace rxp->napi_id with emac->napi_rx.napi_id in prueth_create_xdp_rxqs()
All the above changes have been suggested by Roger Quadros <rogerq@kernel.org>
drivers/net/ethernet/ti/icssg/icssg_common.c | 219 +++++++++++++++++--
drivers/net/ethernet/ti/icssg/icssg_prueth.c | 125 ++++++++++-
drivers/net/ethernet/ti/icssg/icssg_prueth.h | 17 ++
3 files changed, 346 insertions(+), 15 deletions(-)
diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c
index 01eeabe83eff..4716e24ea05d 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_common.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_common.c
@@ -98,11 +98,19 @@ void prueth_xmit_free(struct prueth_tx_chn *tx_chn,
{
struct cppi5_host_desc_t *first_desc, *next_desc;
dma_addr_t buf_dma, next_desc_dma;
+ struct prueth_swdata *swdata;
u32 buf_dma_len;
first_desc = desc;
next_desc = first_desc;
+ swdata = cppi5_hdesc_get_swdata(desc);
+ if (swdata->type == PRUETH_SWDATA_PAGE) {
+ page_pool_recycle_direct(swdata->data.page->pp,
+ swdata->data.page);
+ goto free_desc;
+ }
+
cppi5_hdesc_get_obuf(first_desc, &buf_dma, &buf_dma_len);
k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma);
@@ -126,6 +134,7 @@ void prueth_xmit_free(struct prueth_tx_chn *tx_chn,
k3_cppi_desc_pool_free(tx_chn->desc_pool, next_desc);
}
+free_desc:
k3_cppi_desc_pool_free(tx_chn->desc_pool, first_desc);
}
EXPORT_SYMBOL_GPL(prueth_xmit_free);
@@ -139,6 +148,7 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn,
struct prueth_swdata *swdata;
struct prueth_tx_chn *tx_chn;
unsigned int total_bytes = 0;
+ struct xdp_frame *xdpf;
struct sk_buff *skb;
dma_addr_t desc_dma;
int res, num_tx = 0;
@@ -168,21 +178,28 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn,
continue;
}
- if (swdata->type != PRUETH_SWDATA_SKB) {
+ switch (swdata->type) {
+ case PRUETH_SWDATA_SKB:
+ skb = swdata->data.skb;
+ dev_sw_netstats_tx_add(skb->dev, 1, skb->len);
+ total_bytes += skb->len;
+ napi_consume_skb(skb, budget);
+ break;
+ case PRUETH_SWDATA_XDPF:
+ xdpf = swdata->data.xdpf;
+ dev_sw_netstats_tx_add(ndev, 1, xdpf->len);
+ total_bytes += xdpf->len;
+ xdp_return_frame(xdpf);
+ break;
+ default:
netdev_err(ndev, "tx_complete: invalid swdata type %d\n", swdata->type);
prueth_xmit_free(tx_chn, desc_tx);
+ ndev->stats.tx_dropped++;
budget++;
continue;
}
- skb = swdata->data.skb;
prueth_xmit_free(tx_chn, desc_tx);
-
- ndev = skb->dev;
- ndev->stats.tx_packets++;
- ndev->stats.tx_bytes += skb->len;
- total_bytes += skb->len;
- napi_consume_skb(skb, budget);
num_tx++;
}
@@ -541,7 +558,153 @@ void emac_rx_timestamp(struct prueth_emac *emac,
ssh->hwtstamp = ns_to_ktime(ns);
}
-static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
+/**
+ * emac_xmit_xdp_frame - transmits an XDP frame
+ * @emac: emac device
+ * @xdpf: data to transmit
+ * @page: page from page pool if already DMA mapped
+ * @q_idx: queue id
+ *
+ * Return: XDP state
+ */
+int emac_xmit_xdp_frame(struct prueth_emac *emac,
+ struct xdp_frame *xdpf,
+ struct page *page,
+ unsigned int q_idx)
+{
+ struct cppi5_host_desc_t *first_desc;
+ struct net_device *ndev = emac->ndev;
+ struct prueth_tx_chn *tx_chn;
+ dma_addr_t desc_dma, buf_dma;
+ struct prueth_swdata *swdata;
+ u32 *epib;
+ int ret;
+
+ void *data = xdpf->data;
+ u32 pkt_len = xdpf->len;
+
+ if (q_idx >= PRUETH_MAX_TX_QUEUES) {
+ netdev_err(ndev, "xdp tx: invalid q_id %d\n", q_idx);
+ return ICSSG_XDP_CONSUMED; /* drop */
+ }
+
+ tx_chn = &emac->tx_chns[q_idx];
+
+ first_desc = k3_cppi_desc_pool_alloc(tx_chn->desc_pool);
+ if (!first_desc) {
+ netdev_dbg(ndev, "xdp tx: failed to allocate descriptor\n");
+ goto drop_free_descs; /* drop */
+ }
+
+ if (page) { /* already DMA mapped by page_pool */
+ buf_dma = page_pool_get_dma_addr(page);
+ buf_dma += xdpf->headroom + sizeof(struct xdp_frame);
+ } else { /* Map the linear buffer */
+ buf_dma = dma_map_single(tx_chn->dma_dev, data, pkt_len, DMA_TO_DEVICE);
+ if (dma_mapping_error(tx_chn->dma_dev, buf_dma)) {
+ netdev_err(ndev, "xdp tx: failed to map data buffer\n");
+ goto drop_free_descs; /* drop */
+ }
+ }
+
+ cppi5_hdesc_init(first_desc, CPPI5_INFO0_HDESC_EPIB_PRESENT,
+ PRUETH_NAV_PS_DATA_SIZE);
+ cppi5_hdesc_set_pkttype(first_desc, 0);
+ epib = first_desc->epib;
+ epib[0] = 0;
+ epib[1] = 0;
+
+ /* set dst tag to indicate internal qid at the firmware which is at
+ * bit8..bit15. bit0..bit7 indicates port num for directed
+ * packets in case of switch mode operation
+ */
+ cppi5_desc_set_tags_ids(&first_desc->hdr, 0, (emac->port_id | (q_idx << 8)));
+ k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &buf_dma);
+ cppi5_hdesc_attach_buf(first_desc, buf_dma, pkt_len, buf_dma, pkt_len);
+ swdata = cppi5_hdesc_get_swdata(first_desc);
+ if (page) {
+ swdata->type = PRUETH_SWDATA_PAGE;
+ swdata->data.page = page;
+ } else {
+ swdata->type = PRUETH_SWDATA_XDPF;
+ swdata->data.xdpf = xdpf;
+ }
+
+ cppi5_hdesc_set_pktlen(first_desc, pkt_len);
+ desc_dma = k3_cppi_desc_pool_virt2dma(tx_chn->desc_pool, first_desc);
+
+ ret = k3_udma_glue_push_tx_chn(tx_chn->tx_chn, first_desc, desc_dma);
+ if (ret) {
+ netdev_err(ndev, "xdp tx: push failed: %d\n", ret);
+ goto drop_free_descs;
+ }
+
+ return ICSSG_XDP_TX;
+
+drop_free_descs:
+ prueth_xmit_free(tx_chn, first_desc);
+ return ICSSG_XDP_CONSUMED;
+}
+EXPORT_SYMBOL_GPL(emac_xmit_xdp_frame);
+
+/**
+ * emac_run_xdp - run an XDP program
+ * @emac: emac device
+ * @xdp: XDP buffer containing the frame
+ * @page: page with RX data if already DMA mapped
+ *
+ * Return: XDP state
+ */
+static int emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp,
+ struct page *page)
+{
+ struct net_device *ndev = emac->ndev;
+ int err, result = ICSSG_XDP_PASS;
+ struct bpf_prog *xdp_prog;
+ struct xdp_frame *xdpf;
+ int q_idx;
+ u32 act;
+
+ xdp_prog = READ_ONCE(emac->xdp_prog);
+ act = bpf_prog_run_xdp(xdp_prog, xdp);
+ switch (act) {
+ case XDP_PASS:
+ break;
+ case XDP_TX:
+ /* Send packet to TX ring for immediate transmission */
+ xdpf = xdp_convert_buff_to_frame(xdp);
+ if (unlikely(!xdpf))
+ goto drop;
+
+ q_idx = smp_processor_id() % emac->tx_ch_num;
+ result = emac_xmit_xdp_frame(emac, xdpf, page, q_idx);
+ if (result == ICSSG_XDP_CONSUMED)
+ goto drop;
+ break;
+ case XDP_REDIRECT:
+ err = xdp_do_redirect(emac->ndev, xdp, xdp_prog);
+ if (err)
+ goto drop;
+ result = ICSSG_XDP_REDIR;
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(emac->ndev, xdp_prog, act);
+ fallthrough;
+ case XDP_ABORTED:
+drop:
+ trace_xdp_exception(emac->ndev, xdp_prog, act);
+ fallthrough; /* handle aborts by dropping packet */
+ case XDP_DROP:
+ ndev->stats.tx_dropped++;
+ result = ICSSG_XDP_CONSUMED;
+ page_pool_recycle_direct(emac->rx_chns.pg_pool, page);
+ break;
+ }
+
+ return result;
+}
+
+static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, int *xdp_state)
{
struct prueth_rx_chn *rx_chn = &emac->rx_chns;
u32 buf_dma_len, pkt_len, port_id = 0;
@@ -552,10 +715,12 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
struct page *page, *new_page;
struct page_pool *pool;
struct sk_buff *skb;
+ struct xdp_buff xdp;
u32 *psdata;
void *pa;
int ret;
+ *xdp_state = 0;
pool = rx_chn->pg_pool;
ret = k3_udma_glue_pop_rx_chn(rx_chn->rx_chn, flow_id, &desc_dma);
if (ret) {
@@ -596,9 +761,21 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
goto requeue;
}
- /* prepare skb and send to n/w stack */
pa = page_address(page);
- skb = napi_build_skb(pa, PAGE_SIZE);
+ if (emac->xdp_prog) {
+ xdp_init_buff(&xdp, PAGE_SIZE, &rx_chn->xdp_rxq);
+ xdp_prepare_buff(&xdp, pa, PRUETH_HEADROOM, pkt_len, false);
+
+ *xdp_state = emac_run_xdp(emac, &xdp, page);
+ if (*xdp_state == ICSSG_XDP_PASS)
+ skb = xdp_build_skb_from_buff(&xdp);
+ else
+ goto requeue;
+ } else {
+ /* prepare skb and send to n/w stack */
+ skb = napi_build_skb(pa, PAGE_SIZE);
+ }
+
if (!skb) {
ndev->stats.rx_dropped++;
page_pool_recycle_direct(pool, page);
@@ -861,13 +1038,23 @@ static void prueth_tx_cleanup(void *data, dma_addr_t desc_dma)
struct prueth_tx_chn *tx_chn = data;
struct cppi5_host_desc_t *desc_tx;
struct prueth_swdata *swdata;
+ struct xdp_frame *xdpf;
struct sk_buff *skb;
desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, desc_dma);
swdata = cppi5_hdesc_get_swdata(desc_tx);
- if (swdata->type == PRUETH_SWDATA_SKB) {
+
+ switch (swdata->type) {
+ case PRUETH_SWDATA_SKB:
skb = swdata->data.skb;
dev_kfree_skb_any(skb);
+ break;
+ case PRUETH_SWDATA_XDPF:
+ xdpf = swdata->data.xdpf;
+ xdp_return_frame(xdpf);
+ break;
+ default:
+ break;
}
prueth_xmit_free(tx_chn, desc_tx);
@@ -904,15 +1091,18 @@ int icssg_napi_rx_poll(struct napi_struct *napi_rx, int budget)
PRUETH_RX_FLOW_DATA_SR1 : PRUETH_RX_FLOW_DATA;
int flow = emac->is_sr1 ?
PRUETH_MAX_RX_FLOWS_SR1 : PRUETH_MAX_RX_FLOWS;
+ int xdp_state_or = 0;
int num_rx = 0;
int cur_budget;
+ int xdp_state;
int ret;
while (flow--) {
cur_budget = budget - num_rx;
while (cur_budget--) {
- ret = emac_rx_packet(emac, flow);
+ ret = emac_rx_packet(emac, flow, &xdp_state);
+ xdp_state_or |= xdp_state;
if (ret)
break;
num_rx++;
@@ -922,6 +1112,9 @@ int icssg_napi_rx_poll(struct napi_struct *napi_rx, int budget)
break;
}
+ if (xdp_state_or & ICSSG_XDP_REDIR)
+ xdp_do_flush();
+
if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) {
if (unlikely(emac->rx_pace_timeout_ns)) {
hrtimer_start(&emac->rx_hrtimer,
diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
index 3ff8c322f9d9..1acbf9e1bade 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
@@ -559,6 +559,33 @@ const struct icss_iep_clockops prueth_iep_clockops = {
.perout_enable = prueth_perout_enable,
};
+static int prueth_create_xdp_rxqs(struct prueth_emac *emac)
+{
+ struct xdp_rxq_info *rxq = &emac->rx_chns.xdp_rxq;
+ struct page_pool *pool = emac->rx_chns.pg_pool;
+ int ret;
+
+ ret = xdp_rxq_info_reg(rxq, emac->ndev, 0, emac->napi_rx.napi_id);
+ if (ret)
+ return ret;
+
+ ret = xdp_rxq_info_reg_mem_model(rxq, MEM_TYPE_PAGE_POOL, pool);
+ if (ret)
+ xdp_rxq_info_unreg(rxq);
+
+ return ret;
+}
+
+static void prueth_destroy_xdp_rxqs(struct prueth_emac *emac)
+{
+ struct xdp_rxq_info *rxq = &emac->rx_chns.xdp_rxq;
+
+ if (!xdp_rxq_info_is_reg(rxq))
+ return;
+
+ xdp_rxq_info_unreg(rxq);
+}
+
static int icssg_prueth_add_mcast(struct net_device *ndev, const u8 *addr)
{
struct net_device *real_dev;
@@ -780,10 +807,14 @@ static int emac_ndo_open(struct net_device *ndev)
if (ret)
goto free_tx_ts_irq;
- ret = k3_udma_glue_enable_rx_chn(emac->rx_chns.rx_chn);
+ ret = prueth_create_xdp_rxqs(emac);
if (ret)
goto reset_rx_chn;
+ ret = k3_udma_glue_enable_rx_chn(emac->rx_chns.rx_chn);
+ if (ret)
+ goto destroy_xdp_rxqs;
+
for (i = 0; i < emac->tx_ch_num; i++) {
ret = k3_udma_glue_enable_tx_chn(emac->tx_chns[i].tx_chn);
if (ret)
@@ -809,6 +840,8 @@ static int emac_ndo_open(struct net_device *ndev)
* any SKB for completion. So set false to free_skb
*/
prueth_reset_tx_chan(emac, i, false);
+destroy_xdp_rxqs:
+ prueth_destroy_xdp_rxqs(emac);
reset_rx_chn:
prueth_reset_rx_chan(&emac->rx_chns, max_rx_flows, false);
free_tx_ts_irq:
@@ -879,7 +912,7 @@ static int emac_ndo_stop(struct net_device *ndev)
k3_udma_glue_tdown_rx_chn(emac->rx_chns.rx_chn, true);
prueth_reset_rx_chan(&emac->rx_chns, max_rx_flows, true);
-
+ prueth_destroy_xdp_rxqs(emac);
napi_disable(&emac->napi_rx);
hrtimer_cancel(&emac->rx_hrtimer);
@@ -1024,6 +1057,90 @@ static int emac_ndo_vlan_rx_del_vid(struct net_device *ndev,
return 0;
}
+/**
+ * emac_xdp_xmit - Implements ndo_xdp_xmit
+ * @dev: netdev
+ * @n: number of frames
+ * @frames: array of XDP buffer pointers
+ * @flags: XDP extra info
+ *
+ * Return: number of frames successfully sent. Failed frames
+ * will be free'ed by XDP core.
+ *
+ * For error cases, a negative errno code is returned and no-frames
+ * are transmitted (caller must handle freeing frames).
+ **/
+static int emac_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags)
+{
+ struct prueth_emac *emac = netdev_priv(dev);
+ unsigned int q_idx;
+ int nxmit = 0;
+ int i;
+
+ q_idx = smp_processor_id() % emac->tx_ch_num;
+
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
+
+ for (i = 0; i < n; i++) {
+ struct xdp_frame *xdpf = frames[i];
+ int err;
+
+ err = emac_xmit_xdp_frame(emac, xdpf, NULL, q_idx);
+ if (err != ICSSG_XDP_TX)
+ break;
+ nxmit++;
+ }
+
+ return nxmit;
+}
+
+/**
+ * emac_xdp_setup - add/remove an XDP program
+ * @emac: emac device
+ * @bpf: XDP program
+ *
+ * Return: Always 0 (Success)
+ **/
+static int emac_xdp_setup(struct prueth_emac *emac, struct netdev_bpf *bpf)
+{
+ struct bpf_prog *prog = bpf->prog;
+ xdp_features_t val;
+
+ val = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_NDO_XMIT;
+ xdp_set_features_flag(emac->ndev, val);
+
+ if (!emac->xdpi.prog && !prog)
+ return 0;
+
+ WRITE_ONCE(emac->xdp_prog, prog);
+
+ xdp_attachment_setup(&emac->xdpi, bpf);
+
+ return 0;
+}
+
+/**
+ * emac_ndo_bpf - implements ndo_bpf for icssg_prueth
+ * @ndev: network adapter device
+ * @bpf: XDP program
+ *
+ * Return: 0 on success, error code on failure.
+ **/
+static int emac_ndo_bpf(struct net_device *ndev, struct netdev_bpf *bpf)
+{
+ struct prueth_emac *emac = netdev_priv(ndev);
+
+ switch (bpf->command) {
+ case XDP_SETUP_PROG:
+ return emac_xdp_setup(emac, bpf);
+ default:
+ return -EINVAL;
+ }
+}
+
static const struct net_device_ops emac_netdev_ops = {
.ndo_open = emac_ndo_open,
.ndo_stop = emac_ndo_stop,
@@ -1038,6 +1155,8 @@ static const struct net_device_ops emac_netdev_ops = {
.ndo_fix_features = emac_ndo_fix_features,
.ndo_vlan_rx_add_vid = emac_ndo_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = emac_ndo_vlan_rx_del_vid,
+ .ndo_bpf = emac_ndo_bpf,
+ .ndo_xdp_xmit = emac_xdp_xmit,
};
static int prueth_netdev_init(struct prueth *prueth,
@@ -1066,6 +1185,8 @@ static int prueth_netdev_init(struct prueth *prueth,
emac->prueth = prueth;
emac->ndev = ndev;
emac->port_id = port;
+ emac->xdp_prog = NULL;
+ emac->ndev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
emac->cmd_wq = create_singlethread_workqueue("icssg_cmd_wq");
if (!emac->cmd_wq) {
ret = -ENOMEM;
diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h
index 3bbabd007129..0675919beb94 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h
+++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h
@@ -8,6 +8,8 @@
#ifndef __NET_TI_ICSSG_PRUETH_H
#define __NET_TI_ICSSG_PRUETH_H
+#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
#include <linux/etherdevice.h>
#include <linux/genalloc.h>
#include <linux/if_vlan.h>
@@ -134,6 +136,7 @@ struct prueth_rx_chn {
unsigned int irq[ICSSG_MAX_RFLOWS]; /* separate irq per flow */
char name[32];
struct page_pool *pg_pool;
+ struct xdp_rxq_info xdp_rxq;
};
enum prueth_swdata_type {
@@ -141,6 +144,7 @@ enum prueth_swdata_type {
PRUETH_SWDATA_SKB,
PRUETH_SWDATA_PAGE,
PRUETH_SWDATA_CMD,
+ PRUETH_SWDATA_XDPF,
};
struct prueth_swdata {
@@ -149,6 +153,7 @@ struct prueth_swdata {
struct sk_buff *skb;
struct page *page;
u32 cmd;
+ struct xdp_frame *xdpf;
} data;
};
@@ -159,6 +164,12 @@ struct prueth_swdata {
#define PRUETH_MAX_TX_TS_REQUESTS 50 /* Max simultaneous TX_TS requests */
+/* XDP BPF state */
+#define ICSSG_XDP_PASS 0
+#define ICSSG_XDP_CONSUMED BIT(0)
+#define ICSSG_XDP_TX BIT(1)
+#define ICSSG_XDP_REDIR BIT(2)
+
/* Minimum coalesce time in usecs for both Tx and Rx */
#define ICSSG_MIN_COALESCE_USECS 20
@@ -227,6 +238,8 @@ struct prueth_emac {
unsigned long rx_pace_timeout_ns;
struct netdev_hw_addr_list vlan_mcast_list[MAX_VLAN_ID];
+ struct bpf_prog *xdp_prog;
+ struct xdp_attachment_info xdpi;
};
/* The buf includes headroom compatible with both skb and xdpf */
@@ -465,5 +478,9 @@ void prueth_put_cores(struct prueth *prueth, int slice);
/* Revision specific helper */
u64 icssg_ts_to_ns(u32 hi_sw, u32 hi, u32 lo, u32 cycle_time_ns);
+int emac_xmit_xdp_frame(struct prueth_emac *emac,
+ struct xdp_frame *xdpf,
+ struct page *page,
+ unsigned int q_idx);
#endif /* __NET_TI_ICSSG_PRUETH_H */
--
2.43.0
On 24/02/2025 13:01, Meghana Malladi wrote:
> From: Roger Quadros <rogerq@kernel.org>
>
> Add native XDP support. We do not support zero copy yet.
>
> Signed-off-by: Roger Quadros <rogerq@kernel.org>
> Signed-off-by: MD Danish Anwar <danishanwar@ti.com>
> Signed-off-by: Meghana Malladi <m-malladi@ti.com>
> ---
> Changes since v2 (v3-v2):
> - Use page_pool contained in the page instead of using passing page_pool
> (rx_chn) as part of swdata
> - dev_sw_netstats_tx_add() instead of incrementing the stats directly
> - Add missing ndev->stats.tx_dropped++ wherever applicable
> - Move k3_cppi_desc_pool_alloc() before the DMA mapping for easier cleanup
> on failure
> - Replace rxp->napi_id with emac->napi_rx.napi_id in prueth_create_xdp_rxqs()
>
> All the above changes have been suggested by Roger Quadros <rogerq@kernel.org>
>
> drivers/net/ethernet/ti/icssg/icssg_common.c | 219 +++++++++++++++++--
> drivers/net/ethernet/ti/icssg/icssg_prueth.c | 125 ++++++++++-
> drivers/net/ethernet/ti/icssg/icssg_prueth.h | 17 ++
> 3 files changed, 346 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c
> index 01eeabe83eff..4716e24ea05d 100644
> --- a/drivers/net/ethernet/ti/icssg/icssg_common.c
> +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c
> @@ -98,11 +98,19 @@ void prueth_xmit_free(struct prueth_tx_chn *tx_chn,
> {
> struct cppi5_host_desc_t *first_desc, *next_desc;
> dma_addr_t buf_dma, next_desc_dma;
> + struct prueth_swdata *swdata;
> u32 buf_dma_len;
>
> first_desc = desc;
> next_desc = first_desc;
>
> + swdata = cppi5_hdesc_get_swdata(desc);
> + if (swdata->type == PRUETH_SWDATA_PAGE) {
> + page_pool_recycle_direct(swdata->data.page->pp,
> + swdata->data.page);
> + goto free_desc;
> + }
> +
> cppi5_hdesc_get_obuf(first_desc, &buf_dma, &buf_dma_len);
> k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma);
>
> @@ -126,6 +134,7 @@ void prueth_xmit_free(struct prueth_tx_chn *tx_chn,
> k3_cppi_desc_pool_free(tx_chn->desc_pool, next_desc);
> }
>
> +free_desc:
> k3_cppi_desc_pool_free(tx_chn->desc_pool, first_desc);
> }
> EXPORT_SYMBOL_GPL(prueth_xmit_free);
> @@ -139,6 +148,7 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn,
> struct prueth_swdata *swdata;
> struct prueth_tx_chn *tx_chn;
> unsigned int total_bytes = 0;
> + struct xdp_frame *xdpf;
> struct sk_buff *skb;
> dma_addr_t desc_dma;
> int res, num_tx = 0;
> @@ -168,21 +178,28 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn,
> continue;
> }
>
> - if (swdata->type != PRUETH_SWDATA_SKB) {
> + switch (swdata->type) {
> + case PRUETH_SWDATA_SKB:
> + skb = swdata->data.skb;
> + dev_sw_netstats_tx_add(skb->dev, 1, skb->len);
> + total_bytes += skb->len;
> + napi_consume_skb(skb, budget);
> + break;
> + case PRUETH_SWDATA_XDPF:
> + xdpf = swdata->data.xdpf;
> + dev_sw_netstats_tx_add(ndev, 1, xdpf->len);
> + total_bytes += xdpf->len;
> + xdp_return_frame(xdpf);
> + break;
> + default:
> netdev_err(ndev, "tx_complete: invalid swdata type %d\n", swdata->type);
> prueth_xmit_free(tx_chn, desc_tx);
> + ndev->stats.tx_dropped++;
> budget++;
> continue;
> }
>
> - skb = swdata->data.skb;
> prueth_xmit_free(tx_chn, desc_tx);
> -
> - ndev = skb->dev;
> - ndev->stats.tx_packets++;
> - ndev->stats.tx_bytes += skb->len;
> - total_bytes += skb->len;
> - napi_consume_skb(skb, budget);
> num_tx++;
> }
>
> @@ -541,7 +558,153 @@ void emac_rx_timestamp(struct prueth_emac *emac,
> ssh->hwtstamp = ns_to_ktime(ns);
> }
>
> -static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
> +/**
> + * emac_xmit_xdp_frame - transmits an XDP frame
> + * @emac: emac device
> + * @xdpf: data to transmit
> + * @page: page from page pool if already DMA mapped
> + * @q_idx: queue id
> + *
> + * Return: XDP state
> + */
> +int emac_xmit_xdp_frame(struct prueth_emac *emac,
> + struct xdp_frame *xdpf,
> + struct page *page,
> + unsigned int q_idx)
> +{
> + struct cppi5_host_desc_t *first_desc;
> + struct net_device *ndev = emac->ndev;
> + struct prueth_tx_chn *tx_chn;
> + dma_addr_t desc_dma, buf_dma;
> + struct prueth_swdata *swdata;
> + u32 *epib;
> + int ret;
> +
drop new line and arrange below declarations in reverse xmas tree order.
> + void *data = xdpf->data;
> + u32 pkt_len = xdpf->len;
> +
> + if (q_idx >= PRUETH_MAX_TX_QUEUES) {
> + netdev_err(ndev, "xdp tx: invalid q_id %d\n", q_idx);
> + return ICSSG_XDP_CONSUMED; /* drop */
> + }
> +
> + tx_chn = &emac->tx_chns[q_idx];
> +
> + first_desc = k3_cppi_desc_pool_alloc(tx_chn->desc_pool);
> + if (!first_desc) {
> + netdev_dbg(ndev, "xdp tx: failed to allocate descriptor\n");
> + goto drop_free_descs; /* drop */
> + }
> +
> + if (page) { /* already DMA mapped by page_pool */
> + buf_dma = page_pool_get_dma_addr(page);
> + buf_dma += xdpf->headroom + sizeof(struct xdp_frame);
> + } else { /* Map the linear buffer */
> + buf_dma = dma_map_single(tx_chn->dma_dev, data, pkt_len, DMA_TO_DEVICE);
> + if (dma_mapping_error(tx_chn->dma_dev, buf_dma)) {
> + netdev_err(ndev, "xdp tx: failed to map data buffer\n");
> + goto drop_free_descs; /* drop */
> + }
> + }
> +
> + cppi5_hdesc_init(first_desc, CPPI5_INFO0_HDESC_EPIB_PRESENT,
> + PRUETH_NAV_PS_DATA_SIZE);
> + cppi5_hdesc_set_pkttype(first_desc, 0);
> + epib = first_desc->epib;
> + epib[0] = 0;
> + epib[1] = 0;
> +
> + /* set dst tag to indicate internal qid at the firmware which is at
> + * bit8..bit15. bit0..bit7 indicates port num for directed
> + * packets in case of switch mode operation
> + */
> + cppi5_desc_set_tags_ids(&first_desc->hdr, 0, (emac->port_id | (q_idx << 8)));
> + k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &buf_dma);
> + cppi5_hdesc_attach_buf(first_desc, buf_dma, pkt_len, buf_dma, pkt_len);
> + swdata = cppi5_hdesc_get_swdata(first_desc);
> + if (page) {
> + swdata->type = PRUETH_SWDATA_PAGE;
> + swdata->data.page = page;
> + } else {
> + swdata->type = PRUETH_SWDATA_XDPF;
> + swdata->data.xdpf = xdpf;
> + }
> +
> + cppi5_hdesc_set_pktlen(first_desc, pkt_len);
> + desc_dma = k3_cppi_desc_pool_virt2dma(tx_chn->desc_pool, first_desc);
> +
> + ret = k3_udma_glue_push_tx_chn(tx_chn->tx_chn, first_desc, desc_dma);
> + if (ret) {
> + netdev_err(ndev, "xdp tx: push failed: %d\n", ret);
> + goto drop_free_descs;
> + }
> +
> + return ICSSG_XDP_TX;
> +
> +drop_free_descs:
> + prueth_xmit_free(tx_chn, first_desc);
> + return ICSSG_XDP_CONSUMED;
> +}
> +EXPORT_SYMBOL_GPL(emac_xmit_xdp_frame);
> +
> +/**
> + * emac_run_xdp - run an XDP program
> + * @emac: emac device
> + * @xdp: XDP buffer containing the frame
> + * @page: page with RX data if already DMA mapped
> + *
> + * Return: XDP state
> + */
> +static int emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp,
> + struct page *page)
> +{
> + struct net_device *ndev = emac->ndev;
> + int err, result = ICSSG_XDP_PASS;
you could avoid initialization of result. see below.
> + struct bpf_prog *xdp_prog;
> + struct xdp_frame *xdpf;
> + int q_idx;
> + u32 act;
> +
> + xdp_prog = READ_ONCE(emac->xdp_prog);
> + act = bpf_prog_run_xdp(xdp_prog, xdp);
> + switch (act) {
> + case XDP_PASS:
> + break;
instead of break how about?
return ICSSG_XDP_PASS;
> + case XDP_TX:
> + /* Send packet to TX ring for immediate transmission */
> + xdpf = xdp_convert_buff_to_frame(xdp);
> + if (unlikely(!xdpf))
TX is dropped so here you need to
ndev->stats.tx_dropped++;
> + goto drop;
> +
> + q_idx = smp_processor_id() % emac->tx_ch_num;
> + result = emac_xmit_xdp_frame(emac, xdpf, page, q_idx);
> + if (result == ICSSG_XDP_CONSUMED)
> + goto drop;
need to increment rx stats as we received the packet successfully?
> + break;
instead,
return ICSSG_XDP_TX;
> + case XDP_REDIRECT:
> + err = xdp_do_redirect(emac->ndev, xdp, xdp_prog);
> + if (err)
> + goto drop;
> + result = ICSSG_XDP_REDIR;
> + break;
replace above 2 by
return ICSSG_XDP_REDIR;
> + default:
> + bpf_warn_invalid_xdp_action(emac->ndev, xdp_prog, act);
> + fallthrough;
> + case XDP_ABORTED:
> +drop:
> + trace_xdp_exception(emac->ndev, xdp_prog, act);
> + fallthrough; /* handle aborts by dropping packet */
> + case XDP_DROP:
> + ndev->stats.tx_dropped++;
shouldn't this be
ndev->stats.rx_dropped++;
> + result = ICSSG_XDP_CONSUMED;
not required if we directly return this value below.
> + page_pool_recycle_direct(emac->rx_chns.pg_pool, page);
> + break;
return ICSSG_XDP_CONSUMED;
> + }
> +
> + return result;
drop this
> +}
> +
> +static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, int *xdp_state)
> {
> struct prueth_rx_chn *rx_chn = &emac->rx_chns;
> u32 buf_dma_len, pkt_len, port_id = 0;
> @@ -552,10 +715,12 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
> struct page *page, *new_page;
> struct page_pool *pool;
> struct sk_buff *skb;
> + struct xdp_buff xdp;
> u32 *psdata;
> void *pa;
> int ret;
>
> + *xdp_state = 0;
> pool = rx_chn->pg_pool;
> ret = k3_udma_glue_pop_rx_chn(rx_chn->rx_chn, flow_id, &desc_dma);
> if (ret) {
> @@ -596,9 +761,21 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
> goto requeue;
> }
>
> - /* prepare skb and send to n/w stack */
> pa = page_address(page);
> - skb = napi_build_skb(pa, PAGE_SIZE);
We are running the xdp program after allocating the new page.
How about running the xdp program first? if the packet has to be dropped
then it is pointless to allocate a new page. We could just reuse the old page
and save CPU cycles.
> + if (emac->xdp_prog) {
> + xdp_init_buff(&xdp, PAGE_SIZE, &rx_chn->xdp_rxq);
> + xdp_prepare_buff(&xdp, pa, PRUETH_HEADROOM, pkt_len, false);
> +
> + *xdp_state = emac_run_xdp(emac, &xdp, page);
> + if (*xdp_state == ICSSG_XDP_PASS)
> + skb = xdp_build_skb_from_buff(&xdp);
> + else
> + goto requeue;
> + } else {
> + /* prepare skb and send to n/w stack */
> + skb = napi_build_skb(pa, PAGE_SIZE);
> + }
> +
> if (!skb) {
> ndev->stats.rx_dropped++;
> page_pool_recycle_direct(pool, page);
instead of recycling the old page just reuse it
new_page = page;
> goto requeue;
> }
here you can allocate the new page cause now we're sure old page
has to be sent to user space.
> @@ -861,13 +1038,23 @@ static void prueth_tx_cleanup(void *data, dma_addr_t desc_dma)
> struct prueth_tx_chn *tx_chn = data;
> struct cppi5_host_desc_t *desc_tx;
> struct prueth_swdata *swdata;
> + struct xdp_frame *xdpf;
> struct sk_buff *skb;
>
> desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, desc_dma);
> swdata = cppi5_hdesc_get_swdata(desc_tx);
> - if (swdata->type == PRUETH_SWDATA_SKB) {
> +
> + switch (swdata->type) {
> + case PRUETH_SWDATA_SKB:
> skb = swdata->data.skb;
> dev_kfree_skb_any(skb);
> + break;
> + case PRUETH_SWDATA_XDPF:
> + xdpf = swdata->data.xdpf;
> + xdp_return_frame(xdpf);
> + break;
> + default:
> + break;
> }
>
> prueth_xmit_free(tx_chn, desc_tx);
> @@ -904,15 +1091,18 @@ int icssg_napi_rx_poll(struct napi_struct *napi_rx, int budget)
> PRUETH_RX_FLOW_DATA_SR1 : PRUETH_RX_FLOW_DATA;
> int flow = emac->is_sr1 ?
> PRUETH_MAX_RX_FLOWS_SR1 : PRUETH_MAX_RX_FLOWS;
> + int xdp_state_or = 0;
> int num_rx = 0;
> int cur_budget;
> + int xdp_state;
> int ret;
>
> while (flow--) {
> cur_budget = budget - num_rx;
>
> while (cur_budget--) {
> - ret = emac_rx_packet(emac, flow);
> + ret = emac_rx_packet(emac, flow, &xdp_state);
> + xdp_state_or |= xdp_state;
> if (ret)
> break;
> num_rx++;
> @@ -922,6 +1112,9 @@ int icssg_napi_rx_poll(struct napi_struct *napi_rx, int budget)
> break;
> }
>
> + if (xdp_state_or & ICSSG_XDP_REDIR)
> + xdp_do_flush();
> +
> if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) {
> if (unlikely(emac->rx_pace_timeout_ns)) {
> hrtimer_start(&emac->rx_hrtimer,
> diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
> index 3ff8c322f9d9..1acbf9e1bade 100644
> --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c
> +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
> @@ -559,6 +559,33 @@ const struct icss_iep_clockops prueth_iep_clockops = {
> .perout_enable = prueth_perout_enable,
> };
>
> +static int prueth_create_xdp_rxqs(struct prueth_emac *emac)
> +{
> + struct xdp_rxq_info *rxq = &emac->rx_chns.xdp_rxq;
> + struct page_pool *pool = emac->rx_chns.pg_pool;
> + int ret;
> +
> + ret = xdp_rxq_info_reg(rxq, emac->ndev, 0, emac->napi_rx.napi_id);
> + if (ret)
> + return ret;
> +
> + ret = xdp_rxq_info_reg_mem_model(rxq, MEM_TYPE_PAGE_POOL, pool);
> + if (ret)
> + xdp_rxq_info_unreg(rxq);
> +
> + return ret;
> +}
> +
> +static void prueth_destroy_xdp_rxqs(struct prueth_emac *emac)
> +{
> + struct xdp_rxq_info *rxq = &emac->rx_chns.xdp_rxq;
> +
> + if (!xdp_rxq_info_is_reg(rxq))
> + return;
> +
> + xdp_rxq_info_unreg(rxq);
> +}
> +
> static int icssg_prueth_add_mcast(struct net_device *ndev, const u8 *addr)
> {
> struct net_device *real_dev;
> @@ -780,10 +807,14 @@ static int emac_ndo_open(struct net_device *ndev)
> if (ret)
> goto free_tx_ts_irq;
>
> - ret = k3_udma_glue_enable_rx_chn(emac->rx_chns.rx_chn);
> + ret = prueth_create_xdp_rxqs(emac);
> if (ret)
> goto reset_rx_chn;
>
> + ret = k3_udma_glue_enable_rx_chn(emac->rx_chns.rx_chn);
> + if (ret)
> + goto destroy_xdp_rxqs;
> +
> for (i = 0; i < emac->tx_ch_num; i++) {
> ret = k3_udma_glue_enable_tx_chn(emac->tx_chns[i].tx_chn);
> if (ret)
> @@ -809,6 +840,8 @@ static int emac_ndo_open(struct net_device *ndev)
> * any SKB for completion. So set false to free_skb
> */
> prueth_reset_tx_chan(emac, i, false);
> +destroy_xdp_rxqs:
> + prueth_destroy_xdp_rxqs(emac);
> reset_rx_chn:
> prueth_reset_rx_chan(&emac->rx_chns, max_rx_flows, false);
> free_tx_ts_irq:
> @@ -879,7 +912,7 @@ static int emac_ndo_stop(struct net_device *ndev)
> k3_udma_glue_tdown_rx_chn(emac->rx_chns.rx_chn, true);
>
> prueth_reset_rx_chan(&emac->rx_chns, max_rx_flows, true);
> -
> + prueth_destroy_xdp_rxqs(emac);
> napi_disable(&emac->napi_rx);
> hrtimer_cancel(&emac->rx_hrtimer);
>
> @@ -1024,6 +1057,90 @@ static int emac_ndo_vlan_rx_del_vid(struct net_device *ndev,
> return 0;
> }
>
> +/**
> + * emac_xdp_xmit - Implements ndo_xdp_xmit
> + * @dev: netdev
> + * @n: number of frames
> + * @frames: array of XDP buffer pointers
> + * @flags: XDP extra info
> + *
> + * Return: number of frames successfully sent. Failed frames
> + * will be free'ed by XDP core.
> + *
> + * For error cases, a negative errno code is returned and no-frames
> + * are transmitted (caller must handle freeing frames).
> + **/
> +static int emac_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
> + u32 flags)
> +{
> + struct prueth_emac *emac = netdev_priv(dev);
> + unsigned int q_idx;
> + int nxmit = 0;
> + int i;
> +
> + q_idx = smp_processor_id() % emac->tx_ch_num;
> +
> + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
> + return -EINVAL;
> +
> + for (i = 0; i < n; i++) {
> + struct xdp_frame *xdpf = frames[i];
> + int err;
> +
> + err = emac_xmit_xdp_frame(emac, xdpf, NULL, q_idx);
> + if (err != ICSSG_XDP_TX)
> + break;
> + nxmit++;
> + }
> +
> + return nxmit;
> +}
> +
> +/**
> + * emac_xdp_setup - add/remove an XDP program
> + * @emac: emac device
> + * @bpf: XDP program
> + *
> + * Return: Always 0 (Success)
> + **/
> +static int emac_xdp_setup(struct prueth_emac *emac, struct netdev_bpf *bpf)
> +{
> + struct bpf_prog *prog = bpf->prog;
> + xdp_features_t val;
> +
> + val = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
> + NETDEV_XDP_ACT_NDO_XMIT;
> + xdp_set_features_flag(emac->ndev, val);
> +
> + if (!emac->xdpi.prog && !prog)
> + return 0;
> +
> + WRITE_ONCE(emac->xdp_prog, prog);
> +
> + xdp_attachment_setup(&emac->xdpi, bpf);
> +
> + return 0;
> +}
> +
> +/**
> + * emac_ndo_bpf - implements ndo_bpf for icssg_prueth
> + * @ndev: network adapter device
> + * @bpf: XDP program
> + *
> + * Return: 0 on success, error code on failure.
> + **/
> +static int emac_ndo_bpf(struct net_device *ndev, struct netdev_bpf *bpf)
> +{
> + struct prueth_emac *emac = netdev_priv(ndev);
> +
> + switch (bpf->command) {
> + case XDP_SETUP_PROG:
> + return emac_xdp_setup(emac, bpf);
> + default:
> + return -EINVAL;
> + }
> +}
> +
> static const struct net_device_ops emac_netdev_ops = {
> .ndo_open = emac_ndo_open,
> .ndo_stop = emac_ndo_stop,
> @@ -1038,6 +1155,8 @@ static const struct net_device_ops emac_netdev_ops = {
> .ndo_fix_features = emac_ndo_fix_features,
> .ndo_vlan_rx_add_vid = emac_ndo_vlan_rx_add_vid,
> .ndo_vlan_rx_kill_vid = emac_ndo_vlan_rx_del_vid,
> + .ndo_bpf = emac_ndo_bpf,
> + .ndo_xdp_xmit = emac_xdp_xmit,
> };
>
> static int prueth_netdev_init(struct prueth *prueth,
> @@ -1066,6 +1185,8 @@ static int prueth_netdev_init(struct prueth *prueth,
> emac->prueth = prueth;
> emac->ndev = ndev;
> emac->port_id = port;
> + emac->xdp_prog = NULL;
> + emac->ndev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
> emac->cmd_wq = create_singlethread_workqueue("icssg_cmd_wq");
> if (!emac->cmd_wq) {
> ret = -ENOMEM;
> diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h
> index 3bbabd007129..0675919beb94 100644
> --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h
> +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h
> @@ -8,6 +8,8 @@
> #ifndef __NET_TI_ICSSG_PRUETH_H
> #define __NET_TI_ICSSG_PRUETH_H
>
> +#include <linux/bpf.h>
> +#include <linux/bpf_trace.h>
> #include <linux/etherdevice.h>
> #include <linux/genalloc.h>
> #include <linux/if_vlan.h>
> @@ -134,6 +136,7 @@ struct prueth_rx_chn {
> unsigned int irq[ICSSG_MAX_RFLOWS]; /* separate irq per flow */
> char name[32];
> struct page_pool *pg_pool;
> + struct xdp_rxq_info xdp_rxq;
> };
>
> enum prueth_swdata_type {
> @@ -141,6 +144,7 @@ enum prueth_swdata_type {
> PRUETH_SWDATA_SKB,
> PRUETH_SWDATA_PAGE,
> PRUETH_SWDATA_CMD,
> + PRUETH_SWDATA_XDPF,
> };
>
> struct prueth_swdata {
> @@ -149,6 +153,7 @@ struct prueth_swdata {
> struct sk_buff *skb;
> struct page *page;
> u32 cmd;
> + struct xdp_frame *xdpf;
> } data;
> };
>
> @@ -159,6 +164,12 @@ struct prueth_swdata {
>
> #define PRUETH_MAX_TX_TS_REQUESTS 50 /* Max simultaneous TX_TS requests */
>
> +/* XDP BPF state */
> +#define ICSSG_XDP_PASS 0
> +#define ICSSG_XDP_CONSUMED BIT(0)
> +#define ICSSG_XDP_TX BIT(1)
> +#define ICSSG_XDP_REDIR BIT(2)
> +
> /* Minimum coalesce time in usecs for both Tx and Rx */
> #define ICSSG_MIN_COALESCE_USECS 20
>
> @@ -227,6 +238,8 @@ struct prueth_emac {
> unsigned long rx_pace_timeout_ns;
>
> struct netdev_hw_addr_list vlan_mcast_list[MAX_VLAN_ID];
> + struct bpf_prog *xdp_prog;
> + struct xdp_attachment_info xdpi;
> };
>
> /* The buf includes headroom compatible with both skb and xdpf */
> @@ -465,5 +478,9 @@ void prueth_put_cores(struct prueth *prueth, int slice);
>
> /* Revision specific helper */
> u64 icssg_ts_to_ns(u32 hi_sw, u32 hi, u32 lo, u32 cycle_time_ns);
> +int emac_xmit_xdp_frame(struct prueth_emac *emac,
> + struct xdp_frame *xdpf,
> + struct page *page,
> + unsigned int q_idx);
>
> #endif /* __NET_TI_ICSSG_PRUETH_H */
--
cheers,
-roger
On 2/27/2025 9:07 PM, Roger Quadros wrote:
>
>
> On 24/02/2025 13:01, Meghana Malladi wrote:
>> From: Roger Quadros <rogerq@kernel.org>
>>
>> Add native XDP support. We do not support zero copy yet.
>>
>> Signed-off-by: Roger Quadros <rogerq@kernel.org>
>> Signed-off-by: MD Danish Anwar <danishanwar@ti.com>
>> Signed-off-by: Meghana Malladi <m-malladi@ti.com>
>> ---
>> Changes since v2 (v3-v2):
>> - Use page_pool contained in the page instead of using passing page_pool
>> (rx_chn) as part of swdata
>> - dev_sw_netstats_tx_add() instead of incrementing the stats directly
>> - Add missing ndev->stats.tx_dropped++ wherever applicable
>> - Move k3_cppi_desc_pool_alloc() before the DMA mapping for easier cleanup
>> on failure
>> - Replace rxp->napi_id with emac->napi_rx.napi_id in prueth_create_xdp_rxqs()
>>
>> All the above changes have been suggested by Roger Quadros <rogerq@kernel.org>
>>
>> drivers/net/ethernet/ti/icssg/icssg_common.c | 219 +++++++++++++++++--
>> drivers/net/ethernet/ti/icssg/icssg_prueth.c | 125 ++++++++++-
>> drivers/net/ethernet/ti/icssg/icssg_prueth.h | 17 ++
>> 3 files changed, 346 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c
>> index 01eeabe83eff..4716e24ea05d 100644
>> --- a/drivers/net/ethernet/ti/icssg/icssg_common.c
>> +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c
>> @@ -98,11 +98,19 @@ void prueth_xmit_free(struct prueth_tx_chn *tx_chn,
>> {
>> struct cppi5_host_desc_t *first_desc, *next_desc;
>> dma_addr_t buf_dma, next_desc_dma;
>> + struct prueth_swdata *swdata;
>> u32 buf_dma_len;
>>
>> first_desc = desc;
>> next_desc = first_desc;
>>
>> + swdata = cppi5_hdesc_get_swdata(desc);
>> + if (swdata->type == PRUETH_SWDATA_PAGE) {
>> + page_pool_recycle_direct(swdata->data.page->pp,
>> + swdata->data.page);
>> + goto free_desc;
>> + }
>> +
>> cppi5_hdesc_get_obuf(first_desc, &buf_dma, &buf_dma_len);
>> k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma);
>>
>> @@ -126,6 +134,7 @@ void prueth_xmit_free(struct prueth_tx_chn *tx_chn,
>> k3_cppi_desc_pool_free(tx_chn->desc_pool, next_desc);
>> }
>>
>> +free_desc:
>> k3_cppi_desc_pool_free(tx_chn->desc_pool, first_desc);
>> }
>> EXPORT_SYMBOL_GPL(prueth_xmit_free);
>> @@ -139,6 +148,7 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn,
>> struct prueth_swdata *swdata;
>> struct prueth_tx_chn *tx_chn;
>> unsigned int total_bytes = 0;
>> + struct xdp_frame *xdpf;
>> struct sk_buff *skb;
>> dma_addr_t desc_dma;
>> int res, num_tx = 0;
>> @@ -168,21 +178,28 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn,
>> continue;
>> }
>>
>> - if (swdata->type != PRUETH_SWDATA_SKB) {
>> + switch (swdata->type) {
>> + case PRUETH_SWDATA_SKB:
>> + skb = swdata->data.skb;
>> + dev_sw_netstats_tx_add(skb->dev, 1, skb->len);
>> + total_bytes += skb->len;
>> + napi_consume_skb(skb, budget);
>> + break;
>> + case PRUETH_SWDATA_XDPF:
>> + xdpf = swdata->data.xdpf;
>> + dev_sw_netstats_tx_add(ndev, 1, xdpf->len);
>> + total_bytes += xdpf->len;
>> + xdp_return_frame(xdpf);
>> + break;
>> + default:
>> netdev_err(ndev, "tx_complete: invalid swdata type %d\n", swdata->type);
>> prueth_xmit_free(tx_chn, desc_tx);
>> + ndev->stats.tx_dropped++;
>> budget++;
>> continue;
>> }
>>
>> - skb = swdata->data.skb;
>> prueth_xmit_free(tx_chn, desc_tx);
>> -
>> - ndev = skb->dev;
>> - ndev->stats.tx_packets++;
>> - ndev->stats.tx_bytes += skb->len;
>> - total_bytes += skb->len;
>> - napi_consume_skb(skb, budget);
>> num_tx++;
>> }
>>
>> @@ -541,7 +558,153 @@ void emac_rx_timestamp(struct prueth_emac *emac,
>> ssh->hwtstamp = ns_to_ktime(ns);
>> }
>>
>> -static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
>> +/**
>> + * emac_xmit_xdp_frame - transmits an XDP frame
>> + * @emac: emac device
>> + * @xdpf: data to transmit
>> + * @page: page from page pool if already DMA mapped
>> + * @q_idx: queue id
>> + *
>> + * Return: XDP state
>> + */
>> +int emac_xmit_xdp_frame(struct prueth_emac *emac,
>> + struct xdp_frame *xdpf,
>> + struct page *page,
>> + unsigned int q_idx)
>> +{
>> + struct cppi5_host_desc_t *first_desc;
>> + struct net_device *ndev = emac->ndev;
>> + struct prueth_tx_chn *tx_chn;
>> + dma_addr_t desc_dma, buf_dma;
>> + struct prueth_swdata *swdata;
>> + u32 *epib;
>> + int ret;
>> +
> drop new line and arrange below declarations in reverse xmas tree order.
>
As suggested by Dan, will drop these variabled and directly use them.
>> + void *data = xdpf->data;
>> + u32 pkt_len = xdpf->len;
>> +
>> + if (q_idx >= PRUETH_MAX_TX_QUEUES) {
>> + netdev_err(ndev, "xdp tx: invalid q_id %d\n", q_idx);
>> + return ICSSG_XDP_CONSUMED; /* drop */
>> + }
>> +
>> + tx_chn = &emac->tx_chns[q_idx];
>> +
>> + first_desc = k3_cppi_desc_pool_alloc(tx_chn->desc_pool);
>> + if (!first_desc) {
>> + netdev_dbg(ndev, "xdp tx: failed to allocate descriptor\n");
>> + goto drop_free_descs; /* drop */
>> + }
>> +
>> + if (page) { /* already DMA mapped by page_pool */
>> + buf_dma = page_pool_get_dma_addr(page);
>> + buf_dma += xdpf->headroom + sizeof(struct xdp_frame);
>> + } else { /* Map the linear buffer */
>> + buf_dma = dma_map_single(tx_chn->dma_dev, data, pkt_len, DMA_TO_DEVICE);
>> + if (dma_mapping_error(tx_chn->dma_dev, buf_dma)) {
>> + netdev_err(ndev, "xdp tx: failed to map data buffer\n");
>> + goto drop_free_descs; /* drop */
>> + }
>> + }
>> +
>> + cppi5_hdesc_init(first_desc, CPPI5_INFO0_HDESC_EPIB_PRESENT,
>> + PRUETH_NAV_PS_DATA_SIZE);
>> + cppi5_hdesc_set_pkttype(first_desc, 0);
>> + epib = first_desc->epib;
>> + epib[0] = 0;
>> + epib[1] = 0;
>> +
>> + /* set dst tag to indicate internal qid at the firmware which is at
>> + * bit8..bit15. bit0..bit7 indicates port num for directed
>> + * packets in case of switch mode operation
>> + */
>> + cppi5_desc_set_tags_ids(&first_desc->hdr, 0, (emac->port_id | (q_idx << 8)));
>> + k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &buf_dma);
>> + cppi5_hdesc_attach_buf(first_desc, buf_dma, pkt_len, buf_dma, pkt_len);
>> + swdata = cppi5_hdesc_get_swdata(first_desc);
>> + if (page) {
>> + swdata->type = PRUETH_SWDATA_PAGE;
>> + swdata->data.page = page;
>> + } else {
>> + swdata->type = PRUETH_SWDATA_XDPF;
>> + swdata->data.xdpf = xdpf;
>> + }
>> +
>> + cppi5_hdesc_set_pktlen(first_desc, pkt_len);
>> + desc_dma = k3_cppi_desc_pool_virt2dma(tx_chn->desc_pool, first_desc);
>> +
>> + ret = k3_udma_glue_push_tx_chn(tx_chn->tx_chn, first_desc, desc_dma);
>> + if (ret) {
>> + netdev_err(ndev, "xdp tx: push failed: %d\n", ret);
>> + goto drop_free_descs;
>> + }
>> +
>> + return ICSSG_XDP_TX;
>> +
>> +drop_free_descs:
>> + prueth_xmit_free(tx_chn, first_desc);
>> + return ICSSG_XDP_CONSUMED;
>> +}
>> +EXPORT_SYMBOL_GPL(emac_xmit_xdp_frame);
>> +
>> +/**
>> + * emac_run_xdp - run an XDP program
>> + * @emac: emac device
>> + * @xdp: XDP buffer containing the frame
>> + * @page: page with RX data if already DMA mapped
>> + *
>> + * Return: XDP state
>> + */
>> +static int emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp,
>> + struct page *page)
>> +{
>> + struct net_device *ndev = emac->ndev;
>> + int err, result = ICSSG_XDP_PASS;
>
> you could avoid initialization of result. see below.
>
>> + struct bpf_prog *xdp_prog;
>> + struct xdp_frame *xdpf;
>> + int q_idx;
>> + u32 act;
>> +
>> + xdp_prog = READ_ONCE(emac->xdp_prog);
>> + act = bpf_prog_run_xdp(xdp_prog, xdp);
>> + switch (act) {
>> + case XDP_PASS:
>> + break;
> instead of break how about?
> return ICSSG_XDP_PASS;
>
I looked into CPSW XDP implementation and its same as what you suggested
here. Will update this functon with your suggestions.
>> + case XDP_TX:
>> + /* Send packet to TX ring for immediate transmission */
>> + xdpf = xdp_convert_buff_to_frame(xdp);
>> + if (unlikely(!xdpf))
>
> TX is dropped so here you need to
> ndev->stats.tx_dropped++;
Yes added it.
>> + goto drop;
>> +
>> + q_idx = smp_processor_id() % emac->tx_ch_num;
>> + result = emac_xmit_xdp_frame(emac, xdpf, page, q_idx);
>> + if (result == ICSSG_XDP_CONSUMED)
>> + goto drop;
>
> need to increment rx stats as we received the packet successfully?
>
Yes I will do that.
>> + break;
> instead,
> return ICSSG_XDP_TX;
>
returning result here as it depends on what emac_xmit_xdp_frame() returns
>> + case XDP_REDIRECT:
>> + err = xdp_do_redirect(emac->ndev, xdp, xdp_prog);
>> + if (err)
>> + goto drop;
>> + result = ICSSG_XDP_REDIR;
>> + break;
>
> replace above 2 by
> return ICSSG_XDP_REDIR;
I suppose you meant "replace above to by?" I have updated it.
>> + default:
>> + bpf_warn_invalid_xdp_action(emac->ndev, xdp_prog, act);
>> + fallthrough;
>> + case XDP_ABORTED:
>> +drop:
>> + trace_xdp_exception(emac->ndev, xdp_prog, act);
>> + fallthrough; /* handle aborts by dropping packet */
>> + case XDP_DROP:
>> + ndev->stats.tx_dropped++;
>
> shouldn't this be
> ndev->stats.rx_dropped++;
>
Yes correct. Thanks for finding this. Fixed it.
>> + result = ICSSG_XDP_CONSUMED;
>
> not required if we directly return this value below.
>
>> + page_pool_recycle_direct(emac->rx_chns.pg_pool, page);
>> + break;
> return ICSSG_XDP_CONSUMED;
>> + }
>> +
>> + return result;
>
> drop this
Done.
>
>> +}
>> +
>> +static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, int *xdp_state)
>> {
>> struct prueth_rx_chn *rx_chn = &emac->rx_chns;
>> u32 buf_dma_len, pkt_len, port_id = 0;
>> @@ -552,10 +715,12 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
>> struct page *page, *new_page;
>> struct page_pool *pool;
>> struct sk_buff *skb;
>> + struct xdp_buff xdp;
>> u32 *psdata;
>> void *pa;
>> int ret;
>>
>> + *xdp_state = 0;
>> pool = rx_chn->pg_pool;
>> ret = k3_udma_glue_pop_rx_chn(rx_chn->rx_chn, flow_id, &desc_dma);
>> if (ret) {
>> @@ -596,9 +761,21 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
>> goto requeue;
>> }
>>
>> - /* prepare skb and send to n/w stack */
>> pa = page_address(page);
>> - skb = napi_build_skb(pa, PAGE_SIZE);
>
> We are running the xdp program after allocating the new page.
> How about running the xdp program first? if the packet has to be dropped
> then it is pointless to allocate a new page. We could just reuse the old page
> and save CPU cycles.
>
What if the packet need not be dropped (it has been processed by XDP)
Then we go to requeue, where new page is needed to map it to the next
upcoming packet descriptor. Hence running XDP needs new_page.
>> + if (emac->xdp_prog) {
>> + xdp_init_buff(&xdp, PAGE_SIZE, &rx_chn->xdp_rxq);
>> + xdp_prepare_buff(&xdp, pa, PRUETH_HEADROOM, pkt_len, false);
>> +
>> + *xdp_state = emac_run_xdp(emac, &xdp, page);
>> + if (*xdp_state == ICSSG_XDP_PASS)
>> + skb = xdp_build_skb_from_buff(&xdp);
>> + else
>> + goto requeue;
>> + } else {
>> + /* prepare skb and send to n/w stack */
>> + skb = napi_build_skb(pa, PAGE_SIZE);
>> + }
>> +
>> if (!skb) {
>> ndev->stats.rx_dropped++;
>> page_pool_recycle_direct(pool, page);
>
> instead of recycling the old page just reuse it
> new_page = page;
>
With the above explanation I think allocating new_page is inevitable.
>> goto requeue;
>> }
>
> here you can allocate the new page cause now we're sure old page
> has to be sent to user space.
>
likewise to the above comment.
>> @@ -861,13 +1038,23 @@ static void prueth_tx_cleanup(void *data, dma_addr_t desc_dma)
>> struct prueth_tx_chn *tx_chn = data;
>> struct cppi5_host_desc_t *desc_tx;
>> struct prueth_swdata *swdata;
>> + struct xdp_frame *xdpf;
>> struct sk_buff *skb;
>>
>> desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, desc_dma);
>> swdata = cppi5_hdesc_get_swdata(desc_tx);
>> - if (swdata->type == PRUETH_SWDATA_SKB) {
>> +
>> + switch (swdata->type) {
>> + case PRUETH_SWDATA_SKB:
>> skb = swdata->data.skb;
>> dev_kfree_skb_any(skb);
>> + break;
>> + case PRUETH_SWDATA_XDPF:
>> + xdpf = swdata->data.xdpf;
>> + xdp_return_frame(xdpf);
>> + break;
>> + default:
>> + break;
>> }
>>
>> prueth_xmit_free(tx_chn, desc_tx);
>> @@ -904,15 +1091,18 @@ int icssg_napi_rx_poll(struct napi_struct *napi_rx, int budget)
>> PRUETH_RX_FLOW_DATA_SR1 : PRUETH_RX_FLOW_DATA;
>> int flow = emac->is_sr1 ?
>> PRUETH_MAX_RX_FLOWS_SR1 : PRUETH_MAX_RX_FLOWS;
>> + int xdp_state_or = 0;
>> int num_rx = 0;
>> int cur_budget;
>> + int xdp_state;
>> int ret;
>>
>> while (flow--) {
>> cur_budget = budget - num_rx;
>>
>> while (cur_budget--) {
>> - ret = emac_rx_packet(emac, flow);
>> + ret = emac_rx_packet(emac, flow, &xdp_state);
>> + xdp_state_or |= xdp_state;
>> if (ret)
>> break;
>> num_rx++;
>> @@ -922,6 +1112,9 @@ int icssg_napi_rx_poll(struct napi_struct *napi_rx, int budget)
>> break;
>> }
>>
>> + if (xdp_state_or & ICSSG_XDP_REDIR)
>> + xdp_do_flush();
>> +
>> if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) {
>> if (unlikely(emac->rx_pace_timeout_ns)) {
>> hrtimer_start(&emac->rx_hrtimer,
>> diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
>> index 3ff8c322f9d9..1acbf9e1bade 100644
>> --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c
>> +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
>> @@ -559,6 +559,33 @@ const struct icss_iep_clockops prueth_iep_clockops = {
>> .perout_enable = prueth_perout_enable,
>> };
>>
>> +static int prueth_create_xdp_rxqs(struct prueth_emac *emac)
>> +{
>> + struct xdp_rxq_info *rxq = &emac->rx_chns.xdp_rxq;
>> + struct page_pool *pool = emac->rx_chns.pg_pool;
>> + int ret;
>> +
>> + ret = xdp_rxq_info_reg(rxq, emac->ndev, 0, emac->napi_rx.napi_id);
>> + if (ret)
>> + return ret;
>> +
>> + ret = xdp_rxq_info_reg_mem_model(rxq, MEM_TYPE_PAGE_POOL, pool);
>> + if (ret)
>> + xdp_rxq_info_unreg(rxq);
>> +
>> + return ret;
>> +}
>> +
>> +static void prueth_destroy_xdp_rxqs(struct prueth_emac *emac)
>> +{
>> + struct xdp_rxq_info *rxq = &emac->rx_chns.xdp_rxq;
>> +
>> + if (!xdp_rxq_info_is_reg(rxq))
>> + return;
>> +
>> + xdp_rxq_info_unreg(rxq);
>> +}
>> +
>> static int icssg_prueth_add_mcast(struct net_device *ndev, const u8 *addr)
>> {
>> struct net_device *real_dev;
>> @@ -780,10 +807,14 @@ static int emac_ndo_open(struct net_device *ndev)
>> if (ret)
>> goto free_tx_ts_irq;
>>
>> - ret = k3_udma_glue_enable_rx_chn(emac->rx_chns.rx_chn);
>> + ret = prueth_create_xdp_rxqs(emac);
>> if (ret)
>> goto reset_rx_chn;
>>
>> + ret = k3_udma_glue_enable_rx_chn(emac->rx_chns.rx_chn);
>> + if (ret)
>> + goto destroy_xdp_rxqs;
>> +
>> for (i = 0; i < emac->tx_ch_num; i++) {
>> ret = k3_udma_glue_enable_tx_chn(emac->tx_chns[i].tx_chn);
>> if (ret)
>> @@ -809,6 +840,8 @@ static int emac_ndo_open(struct net_device *ndev)
>> * any SKB for completion. So set false to free_skb
>> */
>> prueth_reset_tx_chan(emac, i, false);
>> +destroy_xdp_rxqs:
>> + prueth_destroy_xdp_rxqs(emac);
>> reset_rx_chn:
>> prueth_reset_rx_chan(&emac->rx_chns, max_rx_flows, false);
>> free_tx_ts_irq:
>> @@ -879,7 +912,7 @@ static int emac_ndo_stop(struct net_device *ndev)
>> k3_udma_glue_tdown_rx_chn(emac->rx_chns.rx_chn, true);
>>
>> prueth_reset_rx_chan(&emac->rx_chns, max_rx_flows, true);
>> -
>> + prueth_destroy_xdp_rxqs(emac);
>> napi_disable(&emac->napi_rx);
>> hrtimer_cancel(&emac->rx_hrtimer);
>>
>> @@ -1024,6 +1057,90 @@ static int emac_ndo_vlan_rx_del_vid(struct net_device *ndev,
>> return 0;
>> }
>>
>> +/**
>> + * emac_xdp_xmit - Implements ndo_xdp_xmit
>> + * @dev: netdev
>> + * @n: number of frames
>> + * @frames: array of XDP buffer pointers
>> + * @flags: XDP extra info
>> + *
>> + * Return: number of frames successfully sent. Failed frames
>> + * will be free'ed by XDP core.
>> + *
>> + * For error cases, a negative errno code is returned and no-frames
>> + * are transmitted (caller must handle freeing frames).
>> + **/
>> +static int emac_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
>> + u32 flags)
>> +{
>> + struct prueth_emac *emac = netdev_priv(dev);
>> + unsigned int q_idx;
>> + int nxmit = 0;
>> + int i;
>> +
>> + q_idx = smp_processor_id() % emac->tx_ch_num;
>> +
>> + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
>> + return -EINVAL;
>> +
>> + for (i = 0; i < n; i++) {
>> + struct xdp_frame *xdpf = frames[i];
>> + int err;
>> +
>> + err = emac_xmit_xdp_frame(emac, xdpf, NULL, q_idx);
>> + if (err != ICSSG_XDP_TX)
>> + break;
>> + nxmit++;
>> + }
>> +
>> + return nxmit;
>> +}
>> +
>> +/**
>> + * emac_xdp_setup - add/remove an XDP program
>> + * @emac: emac device
>> + * @bpf: XDP program
>> + *
>> + * Return: Always 0 (Success)
>> + **/
>> +static int emac_xdp_setup(struct prueth_emac *emac, struct netdev_bpf *bpf)
>> +{
>> + struct bpf_prog *prog = bpf->prog;
>> + xdp_features_t val;
>> +
>> + val = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
>> + NETDEV_XDP_ACT_NDO_XMIT;
>> + xdp_set_features_flag(emac->ndev, val);
>> +
>> + if (!emac->xdpi.prog && !prog)
>> + return 0;
>> +
>> + WRITE_ONCE(emac->xdp_prog, prog);
>> +
>> + xdp_attachment_setup(&emac->xdpi, bpf);
>> +
>> + return 0;
>> +}
>> +
>> +/**
>> + * emac_ndo_bpf - implements ndo_bpf for icssg_prueth
>> + * @ndev: network adapter device
>> + * @bpf: XDP program
>> + *
>> + * Return: 0 on success, error code on failure.
>> + **/
>> +static int emac_ndo_bpf(struct net_device *ndev, struct netdev_bpf *bpf)
>> +{
>> + struct prueth_emac *emac = netdev_priv(ndev);
>> +
>> + switch (bpf->command) {
>> + case XDP_SETUP_PROG:
>> + return emac_xdp_setup(emac, bpf);
>> + default:
>> + return -EINVAL;
>> + }
>> +}
>> +
>> static const struct net_device_ops emac_netdev_ops = {
>> .ndo_open = emac_ndo_open,
>> .ndo_stop = emac_ndo_stop,
>> @@ -1038,6 +1155,8 @@ static const struct net_device_ops emac_netdev_ops = {
>> .ndo_fix_features = emac_ndo_fix_features,
>> .ndo_vlan_rx_add_vid = emac_ndo_vlan_rx_add_vid,
>> .ndo_vlan_rx_kill_vid = emac_ndo_vlan_rx_del_vid,
>> + .ndo_bpf = emac_ndo_bpf,
>> + .ndo_xdp_xmit = emac_xdp_xmit,
>> };
>>
>> static int prueth_netdev_init(struct prueth *prueth,
>> @@ -1066,6 +1185,8 @@ static int prueth_netdev_init(struct prueth *prueth,
>> emac->prueth = prueth;
>> emac->ndev = ndev;
>> emac->port_id = port;
>> + emac->xdp_prog = NULL;
>> + emac->ndev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
>> emac->cmd_wq = create_singlethread_workqueue("icssg_cmd_wq");
>> if (!emac->cmd_wq) {
>> ret = -ENOMEM;
>> diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h
>> index 3bbabd007129..0675919beb94 100644
>> --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h
>> +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h
>> @@ -8,6 +8,8 @@
>> #ifndef __NET_TI_ICSSG_PRUETH_H
>> #define __NET_TI_ICSSG_PRUETH_H
>>
>> +#include <linux/bpf.h>
>> +#include <linux/bpf_trace.h>
>> #include <linux/etherdevice.h>
>> #include <linux/genalloc.h>
>> #include <linux/if_vlan.h>
>> @@ -134,6 +136,7 @@ struct prueth_rx_chn {
>> unsigned int irq[ICSSG_MAX_RFLOWS]; /* separate irq per flow */
>> char name[32];
>> struct page_pool *pg_pool;
>> + struct xdp_rxq_info xdp_rxq;
>> };
>>
>> enum prueth_swdata_type {
>> @@ -141,6 +144,7 @@ enum prueth_swdata_type {
>> PRUETH_SWDATA_SKB,
>> PRUETH_SWDATA_PAGE,
>> PRUETH_SWDATA_CMD,
>> + PRUETH_SWDATA_XDPF,
>> };
>>
>> struct prueth_swdata {
>> @@ -149,6 +153,7 @@ struct prueth_swdata {
>> struct sk_buff *skb;
>> struct page *page;
>> u32 cmd;
>> + struct xdp_frame *xdpf;
>> } data;
>> };
>>
>> @@ -159,6 +164,12 @@ struct prueth_swdata {
>>
>> #define PRUETH_MAX_TX_TS_REQUESTS 50 /* Max simultaneous TX_TS requests */
>>
>> +/* XDP BPF state */
>> +#define ICSSG_XDP_PASS 0
>> +#define ICSSG_XDP_CONSUMED BIT(0)
>> +#define ICSSG_XDP_TX BIT(1)
>> +#define ICSSG_XDP_REDIR BIT(2)
>> +
>> /* Minimum coalesce time in usecs for both Tx and Rx */
>> #define ICSSG_MIN_COALESCE_USECS 20
>>
>> @@ -227,6 +238,8 @@ struct prueth_emac {
>> unsigned long rx_pace_timeout_ns;
>>
>> struct netdev_hw_addr_list vlan_mcast_list[MAX_VLAN_ID];
>> + struct bpf_prog *xdp_prog;
>> + struct xdp_attachment_info xdpi;
>> };
>>
>> /* The buf includes headroom compatible with both skb and xdpf */
>> @@ -465,5 +478,9 @@ void prueth_put_cores(struct prueth *prueth, int slice);
>>
>> /* Revision specific helper */
>> u64 icssg_ts_to_ns(u32 hi_sw, u32 hi, u32 lo, u32 cycle_time_ns);
>> +int emac_xmit_xdp_frame(struct prueth_emac *emac,
>> + struct xdp_frame *xdpf,
>> + struct page *page,
>> + unsigned int q_idx);
>>
>> #endif /* __NET_TI_ICSSG_PRUETH_H */
>
On Mon, Feb 24, 2025 at 04:31:02PM +0530, Meghana Malladi wrote:
> @@ -541,7 +558,153 @@ void emac_rx_timestamp(struct prueth_emac *emac,
> ssh->hwtstamp = ns_to_ktime(ns);
> }
>
> -static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
> +/**
> + * emac_xmit_xdp_frame - transmits an XDP frame
> + * @emac: emac device
> + * @xdpf: data to transmit
> + * @page: page from page pool if already DMA mapped
> + * @q_idx: queue id
> + *
> + * Return: XDP state
> + */
> +int emac_xmit_xdp_frame(struct prueth_emac *emac,
> + struct xdp_frame *xdpf,
> + struct page *page,
> + unsigned int q_idx)
> +{
> + struct cppi5_host_desc_t *first_desc;
> + struct net_device *ndev = emac->ndev;
> + struct prueth_tx_chn *tx_chn;
> + dma_addr_t desc_dma, buf_dma;
> + struct prueth_swdata *swdata;
> + u32 *epib;
> + int ret;
> +
> + void *data = xdpf->data;
> + u32 pkt_len = xdpf->len;
Get rid of these variables?
> +
> + if (q_idx >= PRUETH_MAX_TX_QUEUES) {
> + netdev_err(ndev, "xdp tx: invalid q_id %d\n", q_idx);
> + return ICSSG_XDP_CONSUMED; /* drop */
> + }
> +
> + tx_chn = &emac->tx_chns[q_idx];
> +
> + first_desc = k3_cppi_desc_pool_alloc(tx_chn->desc_pool);
> + if (!first_desc) {
> + netdev_dbg(ndev, "xdp tx: failed to allocate descriptor\n");
> + goto drop_free_descs; /* drop */
> + }
> +
> + if (page) { /* already DMA mapped by page_pool */
> + buf_dma = page_pool_get_dma_addr(page);
> + buf_dma += xdpf->headroom + sizeof(struct xdp_frame);
> + } else { /* Map the linear buffer */
> + buf_dma = dma_map_single(tx_chn->dma_dev, data, pkt_len, DMA_TO_DEVICE);
> + if (dma_mapping_error(tx_chn->dma_dev, buf_dma)) {
> + netdev_err(ndev, "xdp tx: failed to map data buffer\n");
> + goto drop_free_descs; /* drop */
> + }
> + }
> +
> + cppi5_hdesc_init(first_desc, CPPI5_INFO0_HDESC_EPIB_PRESENT,
> + PRUETH_NAV_PS_DATA_SIZE);
> + cppi5_hdesc_set_pkttype(first_desc, 0);
> + epib = first_desc->epib;
> + epib[0] = 0;
> + epib[1] = 0;
> +
> + /* set dst tag to indicate internal qid at the firmware which is at
> + * bit8..bit15. bit0..bit7 indicates port num for directed
> + * packets in case of switch mode operation
> + */
> + cppi5_desc_set_tags_ids(&first_desc->hdr, 0, (emac->port_id | (q_idx << 8)));
> + k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &buf_dma);
> + cppi5_hdesc_attach_buf(first_desc, buf_dma, pkt_len, buf_dma, pkt_len);
> + swdata = cppi5_hdesc_get_swdata(first_desc);
> + if (page) {
> + swdata->type = PRUETH_SWDATA_PAGE;
> + swdata->data.page = page;
> + } else {
> + swdata->type = PRUETH_SWDATA_XDPF;
> + swdata->data.xdpf = xdpf;
> + }
> +
> + cppi5_hdesc_set_pktlen(first_desc, pkt_len);
> + desc_dma = k3_cppi_desc_pool_virt2dma(tx_chn->desc_pool, first_desc);
> +
> + ret = k3_udma_glue_push_tx_chn(tx_chn->tx_chn, first_desc, desc_dma);
> + if (ret) {
> + netdev_err(ndev, "xdp tx: push failed: %d\n", ret);
> + goto drop_free_descs;
> + }
> +
> + return ICSSG_XDP_TX;
> +
> +drop_free_descs:
> + prueth_xmit_free(tx_chn, first_desc);
> + return ICSSG_XDP_CONSUMED;
> +}
> +EXPORT_SYMBOL_GPL(emac_xmit_xdp_frame);
> +
> +/**
> + * emac_run_xdp - run an XDP program
> + * @emac: emac device
> + * @xdp: XDP buffer containing the frame
> + * @page: page with RX data if already DMA mapped
> + *
> + * Return: XDP state
> + */
> +static int emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp,
> + struct page *page)
> +{
> + struct net_device *ndev = emac->ndev;
> + int err, result = ICSSG_XDP_PASS;
> + struct bpf_prog *xdp_prog;
> + struct xdp_frame *xdpf;
> + int q_idx;
> + u32 act;
> +
> + xdp_prog = READ_ONCE(emac->xdp_prog);
> + act = bpf_prog_run_xdp(xdp_prog, xdp);
> + switch (act) {
> + case XDP_PASS:
> + break;
> + case XDP_TX:
> + /* Send packet to TX ring for immediate transmission */
> + xdpf = xdp_convert_buff_to_frame(xdp);
> + if (unlikely(!xdpf))
This is the second unlikely() macro which is added in this patchset.
The rule with likely/unlikely() is that it should only be added if it
likely makes a difference in benchmarking. Quite often the compiler
is able to predict that valid pointers are more likely than NULL
pointers so often these types of annotations don't make any difference
at all to the compiled code. But it depends on the compiler and the -O2
options.
> + goto drop;
> +
> + q_idx = smp_processor_id() % emac->tx_ch_num;
> + result = emac_xmit_xdp_frame(emac, xdpf, page, q_idx);
> + if (result == ICSSG_XDP_CONSUMED)
> + goto drop;
> + break;
> + case XDP_REDIRECT:
> + err = xdp_do_redirect(emac->ndev, xdp, xdp_prog);
> + if (err)
> + goto drop;
> + result = ICSSG_XDP_REDIR;
> + break;
> + default:
> + bpf_warn_invalid_xdp_action(emac->ndev, xdp_prog, act);
> + fallthrough;
> + case XDP_ABORTED:
> +drop:
> + trace_xdp_exception(emac->ndev, xdp_prog, act);
> + fallthrough; /* handle aborts by dropping packet */
> + case XDP_DROP:
> + ndev->stats.tx_dropped++;
> + result = ICSSG_XDP_CONSUMED;
> + page_pool_recycle_direct(emac->rx_chns.pg_pool, page);
> + break;
> + }
> +
> + return result;
> +}
> +
> +static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, int *xdp_state)
xdp_state should be a u32 because it's a bitfield. Bitfields are never
signed.
> {
> struct prueth_rx_chn *rx_chn = &emac->rx_chns;
> u32 buf_dma_len, pkt_len, port_id = 0;
> @@ -552,10 +715,12 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
> struct page *page, *new_page;
> struct page_pool *pool;
> struct sk_buff *skb;
> + struct xdp_buff xdp;
> u32 *psdata;
> void *pa;
> int ret;
>
> + *xdp_state = 0;
> pool = rx_chn->pg_pool;
> ret = k3_udma_glue_pop_rx_chn(rx_chn->rx_chn, flow_id, &desc_dma);
> if (ret) {
> @@ -596,9 +761,21 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
> goto requeue;
> }
>
> - /* prepare skb and send to n/w stack */
> pa = page_address(page);
> - skb = napi_build_skb(pa, PAGE_SIZE);
> + if (emac->xdp_prog) {
> + xdp_init_buff(&xdp, PAGE_SIZE, &rx_chn->xdp_rxq);
> + xdp_prepare_buff(&xdp, pa, PRUETH_HEADROOM, pkt_len, false);
> +
> + *xdp_state = emac_run_xdp(emac, &xdp, page);
> + if (*xdp_state == ICSSG_XDP_PASS)
> + skb = xdp_build_skb_from_buff(&xdp);
> + else
> + goto requeue;
> + } else {
> + /* prepare skb and send to n/w stack */
> + skb = napi_build_skb(pa, PAGE_SIZE);
> + }
> +
> if (!skb) {
> ndev->stats.rx_dropped++;
> page_pool_recycle_direct(pool, page);
> @@ -861,13 +1038,23 @@ static void prueth_tx_cleanup(void *data, dma_addr_t desc_dma)
> struct prueth_tx_chn *tx_chn = data;
> struct cppi5_host_desc_t *desc_tx;
> struct prueth_swdata *swdata;
> + struct xdp_frame *xdpf;
> struct sk_buff *skb;
>
> desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, desc_dma);
> swdata = cppi5_hdesc_get_swdata(desc_tx);
> - if (swdata->type == PRUETH_SWDATA_SKB) {
> +
> + switch (swdata->type) {
> + case PRUETH_SWDATA_SKB:
> skb = swdata->data.skb;
> dev_kfree_skb_any(skb);
> + break;
> + case PRUETH_SWDATA_XDPF:
> + xdpf = swdata->data.xdpf;
> + xdp_return_frame(xdpf);
> + break;
> + default:
> + break;
> }
>
> prueth_xmit_free(tx_chn, desc_tx);
> @@ -904,15 +1091,18 @@ int icssg_napi_rx_poll(struct napi_struct *napi_rx, int budget)
> PRUETH_RX_FLOW_DATA_SR1 : PRUETH_RX_FLOW_DATA;
> int flow = emac->is_sr1 ?
> PRUETH_MAX_RX_FLOWS_SR1 : PRUETH_MAX_RX_FLOWS;
> + int xdp_state_or = 0;
> int num_rx = 0;
> int cur_budget;
> + int xdp_state;
Both xdp_state_or and xdp_state should be u32 because they are bitfields.
regards,
dan carpenter
> int ret;
>
> while (flow--) {
> cur_budget = budget - num_rx;
>
> while (cur_budget--) {
> - ret = emac_rx_packet(emac, flow);
> + ret = emac_rx_packet(emac, flow, &xdp_state);
> + xdp_state_or |= xdp_state;
> if (ret)
> break;
> num_rx++;
On 2/26/2025 4:19 PM, Dan Carpenter wrote:
> On Mon, Feb 24, 2025 at 04: 31: 02PM +0530, Meghana Malladi wrote: > @@
> -541,7 +558,153 @@ void emac_rx_timestamp(struct prueth_emac *emac, >
> ssh->hwtstamp = ns_to_ktime(ns); > } > > -static int
> emac_rx_packet(struct prueth_emac
> ZjQcmQRYFpfptBannerStart
> This message was sent from outside of Texas Instruments.
> Do not click links or open attachments unless you recognize the source
> of this email and know the content is safe.
> Report Suspicious
> <https://us-phishalarm-ewt.proofpoint.com/EWT/v1/G3vK!
> uldgnXdPPE27auXFPdnpH8jx2nnu2fsVXLMVOyEVUH9IX54g6v7RJRENIKzAm7XCuLfioMeFBSH4bAdUdQTaEArV63odoRERqTN_5Pk$>
> ZjQcmQRYFpfptBannerEnd
>
> On Mon, Feb 24, 2025 at 04:31:02PM +0530, Meghana Malladi wrote:
>> @@ -541,7 +558,153 @@ void emac_rx_timestamp(struct prueth_emac *emac,
>> ssh->hwtstamp = ns_to_ktime(ns);
>> }
>>
>> -static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
>> +/**
>> + * emac_xmit_xdp_frame - transmits an XDP frame
>> + * @emac: emac device
>> + * @xdpf: data to transmit
>> + * @page: page from page pool if already DMA mapped
>> + * @q_idx: queue id
>> + *
>> + * Return: XDP state
>> + */
>> +int emac_xmit_xdp_frame(struct prueth_emac *emac,
>> + struct xdp_frame *xdpf,
>> + struct page *page,
>> + unsigned int q_idx)
>> +{
>> + struct cppi5_host_desc_t *first_desc;
>> + struct net_device *ndev = emac->ndev;
>> + struct prueth_tx_chn *tx_chn;
>> + dma_addr_t desc_dma, buf_dma;
>> + struct prueth_swdata *swdata;
>> + u32 *epib;
>> + int ret;
>> +
>> + void *data = xdpf->data;
>> + u32 pkt_len = xdpf->len;
>
> Get rid of these variables?
>
Yeah ok
>> +
>> + if (q_idx >= PRUETH_MAX_TX_QUEUES) {
>> + netdev_err(ndev, "xdp tx: invalid q_id %d\n", q_idx);
>> + return ICSSG_XDP_CONSUMED; /* drop */
>> + }
>> +
>> + tx_chn = &emac->tx_chns[q_idx];
>> +
>> + first_desc = k3_cppi_desc_pool_alloc(tx_chn->desc_pool);
>> + if (!first_desc) {
>> + netdev_dbg(ndev, "xdp tx: failed to allocate descriptor\n");
>> + goto drop_free_descs; /* drop */
>> + }
>> +
>> + if (page) { /* already DMA mapped by page_pool */
>> + buf_dma = page_pool_get_dma_addr(page);
>> + buf_dma += xdpf->headroom + sizeof(struct xdp_frame);
>> + } else { /* Map the linear buffer */
>> + buf_dma = dma_map_single(tx_chn->dma_dev, data, pkt_len, DMA_TO_DEVICE);
>> + if (dma_mapping_error(tx_chn->dma_dev, buf_dma)) {
>> + netdev_err(ndev, "xdp tx: failed to map data buffer\n");
>> + goto drop_free_descs; /* drop */
>> + }
>> + }
>> +
>> + cppi5_hdesc_init(first_desc, CPPI5_INFO0_HDESC_EPIB_PRESENT,
>> + PRUETH_NAV_PS_DATA_SIZE);
>> + cppi5_hdesc_set_pkttype(first_desc, 0);
>> + epib = first_desc->epib;
>> + epib[0] = 0;
>> + epib[1] = 0;
>> +
>> + /* set dst tag to indicate internal qid at the firmware which is at
>> + * bit8..bit15. bit0..bit7 indicates port num for directed
>> + * packets in case of switch mode operation
>> + */
>> + cppi5_desc_set_tags_ids(&first_desc->hdr, 0, (emac->port_id | (q_idx << 8)));
>> + k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &buf_dma);
>> + cppi5_hdesc_attach_buf(first_desc, buf_dma, pkt_len, buf_dma, pkt_len);
>> + swdata = cppi5_hdesc_get_swdata(first_desc);
>> + if (page) {
>> + swdata->type = PRUETH_SWDATA_PAGE;
>> + swdata->data.page = page;
>> + } else {
>> + swdata->type = PRUETH_SWDATA_XDPF;
>> + swdata->data.xdpf = xdpf;
>> + }
>> +
>> + cppi5_hdesc_set_pktlen(first_desc, pkt_len);
>> + desc_dma = k3_cppi_desc_pool_virt2dma(tx_chn->desc_pool, first_desc);
>> +
>> + ret = k3_udma_glue_push_tx_chn(tx_chn->tx_chn, first_desc, desc_dma);
>> + if (ret) {
>> + netdev_err(ndev, "xdp tx: push failed: %d\n", ret);
>> + goto drop_free_descs;
>> + }
>> +
>> + return ICSSG_XDP_TX;
>> +
>> +drop_free_descs:
>> + prueth_xmit_free(tx_chn, first_desc);
>> + return ICSSG_XDP_CONSUMED;
>> +}
>> +EXPORT_SYMBOL_GPL(emac_xmit_xdp_frame);
>> +
>> +/**
>> + * emac_run_xdp - run an XDP program
>> + * @emac: emac device
>> + * @xdp: XDP buffer containing the frame
>> + * @page: page with RX data if already DMA mapped
>> + *
>> + * Return: XDP state
>> + */
>> +static int emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp,
>> + struct page *page)
>> +{
>> + struct net_device *ndev = emac->ndev;
>> + int err, result = ICSSG_XDP_PASS;
>> + struct bpf_prog *xdp_prog;
>> + struct xdp_frame *xdpf;
>> + int q_idx;
>> + u32 act;
>> +
>> + xdp_prog = READ_ONCE(emac->xdp_prog);
>> + act = bpf_prog_run_xdp(xdp_prog, xdp);
>> + switch (act) {
>> + case XDP_PASS:
>> + break;
>> + case XDP_TX:
>> + /* Send packet to TX ring for immediate transmission */
>> + xdpf = xdp_convert_buff_to_frame(xdp);
>> + if (unlikely(!xdpf))
>
> This is the second unlikely() macro which is added in this patchset.
> The rule with likely/unlikely() is that it should only be added if it
> likely makes a difference in benchmarking. Quite often the compiler
> is able to predict that valid pointers are more likely than NULL
> pointers so often these types of annotations don't make any difference
> at all to the compiled code. But it depends on the compiler and the -O2
> options.
>
Do correct me if I am wrong, but from my understanding, XDP feature
depends alot of performance and benchmarking and having unlikely does
make a difference. Atleast in all the other drivers I see this being
used for XDP.
>> + goto drop;
>> +
>> + q_idx = smp_processor_id() % emac->tx_ch_num;
>> + result = emac_xmit_xdp_frame(emac, xdpf, page, q_idx);
>> + if (result == ICSSG_XDP_CONSUMED)
>> + goto drop;
>> + break;
>> + case XDP_REDIRECT:
>> + err = xdp_do_redirect(emac->ndev, xdp, xdp_prog);
>> + if (err)
>> + goto drop;
>> + result = ICSSG_XDP_REDIR;
>> + break;
>> + default:
>> + bpf_warn_invalid_xdp_action(emac->ndev, xdp_prog, act);
>> + fallthrough;
>> + case XDP_ABORTED:
>> +drop:
>> + trace_xdp_exception(emac->ndev, xdp_prog, act);
>> + fallthrough; /* handle aborts by dropping packet */
>> + case XDP_DROP:
>> + ndev->stats.tx_dropped++;
>> + result = ICSSG_XDP_CONSUMED;
>> + page_pool_recycle_direct(emac->rx_chns.pg_pool, page);
>> + break;
>> + }
>> +
>> + return result;
>> +}
>> +
>> +static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, int *xdp_state)
>
>
> xdp_state should be a u32 because it's a bitfield. Bitfields are never
> signed.
Ok I will update it.
>
>> {
>> struct prueth_rx_chn *rx_chn = &emac->rx_chns;
>> u32 buf_dma_len, pkt_len, port_id = 0;
>> @@ -552,10 +715,12 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
>> struct page *page, *new_page;
>> struct page_pool *pool;
>> struct sk_buff *skb;
>> + struct xdp_buff xdp;
>> u32 *psdata;
>> void *pa;
>> int ret;
>>
>> + *xdp_state = 0;
>> pool = rx_chn->pg_pool;
>> ret = k3_udma_glue_pop_rx_chn(rx_chn->rx_chn, flow_id, &desc_dma);
>> if (ret) {
>> @@ -596,9 +761,21 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
>> goto requeue;
>> }
>>
>> - /* prepare skb and send to n/w stack */
>> pa = page_address(page);
>> - skb = napi_build_skb(pa, PAGE_SIZE);
>> + if (emac->xdp_prog) {
>> + xdp_init_buff(&xdp, PAGE_SIZE, &rx_chn->xdp_rxq);
>> + xdp_prepare_buff(&xdp, pa, PRUETH_HEADROOM, pkt_len, false);
>> +
>> + *xdp_state = emac_run_xdp(emac, &xdp, page);
>> + if (*xdp_state == ICSSG_XDP_PASS)
>> + skb = xdp_build_skb_from_buff(&xdp);
>> + else
>> + goto requeue;
>> + } else {
>> + /* prepare skb and send to n/w stack */
>> + skb = napi_build_skb(pa, PAGE_SIZE);
>> + }
>> +
>> if (!skb) {
>> ndev->stats.rx_dropped++;
>> page_pool_recycle_direct(pool, page);
>> @@ -861,13 +1038,23 @@ static void prueth_tx_cleanup(void *data, dma_addr_t desc_dma)
>> struct prueth_tx_chn *tx_chn = data;
>> struct cppi5_host_desc_t *desc_tx;
>> struct prueth_swdata *swdata;
>> + struct xdp_frame *xdpf;
>> struct sk_buff *skb;
>>
>> desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, desc_dma);
>> swdata = cppi5_hdesc_get_swdata(desc_tx);
>> - if (swdata->type == PRUETH_SWDATA_SKB) {
>> +
>> + switch (swdata->type) {
>> + case PRUETH_SWDATA_SKB:
>> skb = swdata->data.skb;
>> dev_kfree_skb_any(skb);
>> + break;
>> + case PRUETH_SWDATA_XDPF:
>> + xdpf = swdata->data.xdpf;
>> + xdp_return_frame(xdpf);
>> + break;
>> + default:
>> + break;
>> }
>>
>> prueth_xmit_free(tx_chn, desc_tx);
>> @@ -904,15 +1091,18 @@ int icssg_napi_rx_poll(struct napi_struct *napi_rx, int budget)
>> PRUETH_RX_FLOW_DATA_SR1 : PRUETH_RX_FLOW_DATA;
>> int flow = emac->is_sr1 ?
>> PRUETH_MAX_RX_FLOWS_SR1 : PRUETH_MAX_RX_FLOWS;
>> + int xdp_state_or = 0;
>> int num_rx = 0;
>> int cur_budget;
>> + int xdp_state;
>
> Both xdp_state_or and xdp_state should be u32 because they are bitfields.
>
Ok I will update it.
> regards,
> dan carpenter
>
>> int ret;
>>
>> while (flow--) {
>> cur_budget = budget - num_rx;
>>
>> while (cur_budget--) {
>> - ret = emac_rx_packet(emac, flow);
>> + ret = emac_rx_packet(emac, flow, &xdp_state);
>> + xdp_state_or |= xdp_state;
>> if (ret)
>> break;
>> num_rx++;
>
On Mon, Mar 03, 2025 at 05:36:41PM +0530, Malladi, Meghana wrote:
> > > +static int emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp,
> > > + struct page *page)
> > > +{
> > > + struct net_device *ndev = emac->ndev;
> > > + int err, result = ICSSG_XDP_PASS;
> > > + struct bpf_prog *xdp_prog;
> > > + struct xdp_frame *xdpf;
> > > + int q_idx;
> > > + u32 act;
> > > +
> > > + xdp_prog = READ_ONCE(emac->xdp_prog);
> > > + act = bpf_prog_run_xdp(xdp_prog, xdp);
> > > + switch (act) {
> > > + case XDP_PASS:
> > > + break;
> > > + case XDP_TX:
> > > + /* Send packet to TX ring for immediate transmission */
> > > + xdpf = xdp_convert_buff_to_frame(xdp);
> > > + if (unlikely(!xdpf))
> >
> > This is the second unlikely() macro which is added in this patchset.
> > The rule with likely/unlikely() is that it should only be added if it
> > likely makes a difference in benchmarking. Quite often the compiler
> > is able to predict that valid pointers are more likely than NULL
> > pointers so often these types of annotations don't make any difference
> > at all to the compiled code. But it depends on the compiler and the -O2
> > options.
> >
>
> Do correct me if I am wrong, but from my understanding, XDP feature depends
> alot of performance and benchmarking and having unlikely does make a
> difference. Atleast in all the other drivers I see this being used for XDP.
>
Which compiler are you on when you say that "having unlikely does make a
difference"?
I'm on gcc version 14.2.0 (Debian 14.2.0-16) and it doesn't make a
difference to the compiled code. This matches what one would expect from
a compiler. Valid pointers are fast path and NULL pointers are slow path.
Adding an unlikely() is a micro optimization. There are so many other
things you can do to speed up the code. I wouldn't start with that.
regards,
dan
On 3/3/2025 6:01 PM, Dan Carpenter wrote:
> On Mon, Mar 03, 2025 at 05: 36: 41PM +0530, Malladi, Meghana wrote: > >
> > +static int emac_run_xdp(struct prueth_emac *emac, struct xdp_buff
> *xdp, > > > + struct page *page) > > > +{ > > > + struct net_device
> ZjQcmQRYFpfptBannerStart
> This message was sent from outside of Texas Instruments.
> Do not click links or open attachments unless you recognize the source
> of this email and know the content is safe.
> Report Suspicious
> <https://us-phishalarm-ewt.proofpoint.com/EWT/v1/G3vK!
> uldqV3eFFkc7oMXFHHkDX4AFLVsE3ldskf6bPMMFmxDOsNtMfZjUscGelUkBFpAeybNre38L_c2LiiUb7AZxLvAeqSk9ifgbPE1AYFU$>
> ZjQcmQRYFpfptBannerEnd
>
> On Mon, Mar 03, 2025 at 05:36:41PM +0530, Malladi, Meghana wrote:
>> > > +static int emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp,
>> > > + struct page *page)
>> > > +{
>> > > + struct net_device *ndev = emac->ndev;
>> > > + int err, result = ICSSG_XDP_PASS;
>> > > + struct bpf_prog *xdp_prog;
>> > > + struct xdp_frame *xdpf;
>> > > + int q_idx;
>> > > + u32 act;
>> > > +
>> > > + xdp_prog = READ_ONCE(emac->xdp_prog);
>> > > + act = bpf_prog_run_xdp(xdp_prog, xdp);
>> > > + switch (act) {
>> > > + case XDP_PASS:
>> > > + break;
>> > > + case XDP_TX:
>> > > + /* Send packet to TX ring for immediate transmission */
>> > > + xdpf = xdp_convert_buff_to_frame(xdp);
>> > > + if (unlikely(!xdpf))
>> >
>> > This is the second unlikely() macro which is added in this patchset.
>> > The rule with likely/unlikely() is that it should only be added if it
>> > likely makes a difference in benchmarking. Quite often the compiler
>> > is able to predict that valid pointers are more likely than NULL
>> > pointers so often these types of annotations don't make any difference
>> > at all to the compiled code. But it depends on the compiler and the -O2
>> > options.
>> >
>>
>> Do correct me if I am wrong, but from my understanding, XDP feature depends
>> alot of performance and benchmarking and having unlikely does make a
>> difference. Atleast in all the other drivers I see this being used for XDP.
>>
>
> Which compiler are you on when you say that "having unlikely does make a
> difference"?
I'm on gcc version 10.3.1.
>
> I'm on gcc version 14.2.0 (Debian 14.2.0-16) and it doesn't make a
> difference to the compiled code. This matches what one would expect from
> a compiler. Valid pointers are fast path and NULL pointers are slow path.
>
Can you tell me how did you verify this? I actually don't know what
level of optimization to expect from a compiler. I said so, because I
have checked with other drivers which implemented XDP and everywhere
unlikely is used. But now I understand its not the driver but the
compiler that plays the major role in defining the optimization.
> Adding an unlikely() is a micro optimization. There are so many other
> things you can do to speed up the code. I wouldn't start with that.
>
Ok, if you believe that unlikely is doing more harm than good, I am ok
with dropping them off.
> regards,
> dan
>
What I mean is just compile the .o file with and without the unlikely(). $ md5sum drivers/net/ethernet/ti/icssg/icssg_common.o* 2de875935222b9ecd8483e61848c4fc9 drivers/net/ethernet/ti/icssg/icssg_common.o.annotation 2de875935222b9ecd8483e61848c4fc9 drivers/net/ethernet/ti/icssg/icssg_common.o.no_anotation Generally the rule is that you should leave likely/unlikely() annotations out unless it's going to make a difference on a benchmark. I'm not going to jump down people's throat about this, and if you want to leave it, it's fine. But it just struct me as weird so that's why I commented on it. regards, dan carpenter diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index 34d16e00c2ec..3db5bae44e61 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -672,7 +672,7 @@ static int emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp, case XDP_TX: /* Send packet to TX ring for immediate transmission */ xdpf = xdp_convert_buff_to_frame(xdp); - if (unlikely(!xdpf)) + if (!xdpf) goto drop; q_idx = smp_processor_id() % emac->tx_ch_num;
Hi Dan,
On 3/3/2025 7:38 PM, Dan Carpenter wrote:
> What I mean is just compile the .o file with and without the unlikely().
> $ md5sum drivers/net/ethernet/ti/icssg/icssg_common. o*
> 2de875935222b9ecd8483e61848c4fc9 drivers/net/ethernet/ti/icssg/
> icssg_common. o. annotation 2de875935222b9ecd8483e61848c4fc9
> ZjQcmQRYFpfptBannerStart
> This message was sent from outside of Texas Instruments.
> Do not click links or open attachments unless you recognize the source
> of this email and know the content is safe.
> Report Suspicious
> <https://us-phishalarm-ewt.proofpoint.com/EWT/v1/G3vK!
> uldq3TevVoc7KuXEXHnDf-
> TXtuZ0bON9iO0jTE7PyIS1jjfs_CzpvIiMi93PVt0MVDzjHGQSK__vY_-6rO7q86rFmBMGW4SSqK5pvNE$>
> ZjQcmQRYFpfptBannerEnd
>
> What I mean is just compile the .o file with and without the unlikely().
>
> $ md5sum drivers/net/ethernet/ti/icssg/icssg_common.o*
> 2de875935222b9ecd8483e61848c4fc9 drivers/net/ethernet/ti/icssg/icssg_common.o.annotation
> 2de875935222b9ecd8483e61848c4fc9 drivers/net/ethernet/ti/icssg/icssg_common.o.no_anotation
>
> Generally the rule is that you should leave likely/unlikely() annotations
> out unless it's going to make a difference on a benchmark. I'm not going
> to jump down people's throat about this, and if you want to leave it,
> it's fine. But it just struct me as weird so that's why I commented on
> it.
>
I have done some performance tests to see if unlikely() is gonna make
any impact and I see around ~9000 pps and 6Mbps drop without unlikely()
for small packet sizes (60 Bytes)
You can see summary of the tests here:
packet size with unlikely(pps) without unlikely(pps) regression
60 462377 453251 9126
80 403020 399372 3648
96 402059 396881 5178
120 392725 391312 4413
140 327706 327099 607
packet size with unlikely(Mbps) without unlikely(Mbps) regression
60 311 305 6
80 335 332 3
96 386 381 5
120 456 451 5
140 430 429 1
For more details on the logs, please
refer:https://gist.github.com/MeghanaMalladiTI/cc6cc7709791376cb486eb1222de67be
Regards,
Meghana Malladi
> regards,
> dan carpenter
>
> diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c
> index 34d16e00c2ec..3db5bae44e61 100644
> --- a/drivers/net/ethernet/ti/icssg/icssg_common.c
> +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c
> @@ -672,7 +672,7 @@ static int emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp,
> case XDP_TX:
> /* Send packet to TX ring for immediate transmission */
> xdpf = xdp_convert_buff_to_frame(xdp);
> - if (unlikely(!xdpf))
> + if (!xdpf)
> goto drop;
>
> q_idx = smp_processor_id() % emac->tx_ch_num;
>
On Wed, Mar 05, 2025 at 02:53:07PM +0530, Malladi, Meghana wrote: > Hi Dan, > > On 3/3/2025 7:38 PM, Dan Carpenter wrote: > > What I mean is just compile the .o file with and without the unlikely(). > > $ md5sum drivers/net/ethernet/ti/icssg/icssg_common. o* > > 2de875935222b9ecd8483e61848c4fc9 drivers/net/ethernet/ti/icssg/ > > icssg_common. o. annotation 2de875935222b9ecd8483e61848c4fc9 > > ZjQcmQRYFpfptBannerStart > > This message was sent from outside of Texas Instruments. > > Do not click links or open attachments unless you recognize the source > > of this email and know the content is safe. > > Report Suspicious > > <https://us-phishalarm-ewt.proofpoint.com/EWT/v1/G3vK! > > uldq3TevVoc7KuXEXHnDf- TXtuZ0bON9iO0jTE7PyIS1jjfs_CzpvIiMi93PVt0MVDzjHGQSK__vY_-6rO7q86rFmBMGW4SSqK5pvNE$> > > ZjQcmQRYFpfptBannerEnd > > > > What I mean is just compile the .o file with and without the unlikely(). > > > > $ md5sum drivers/net/ethernet/ti/icssg/icssg_common.o* > > 2de875935222b9ecd8483e61848c4fc9 drivers/net/ethernet/ti/icssg/icssg_common.o.annotation > > 2de875935222b9ecd8483e61848c4fc9 drivers/net/ethernet/ti/icssg/icssg_common.o.no_anotation > > > > Generally the rule is that you should leave likely/unlikely() annotations > > out unless it's going to make a difference on a benchmark. I'm not going > > to jump down people's throat about this, and if you want to leave it, > > it's fine. But it just struct me as weird so that's why I commented on > > it. > > > > I have done some performance tests to see if unlikely() is gonna make any > impact and I see around ~9000 pps and 6Mbps drop without unlikely() for > small packet sizes (60 Bytes) > > You can see summary of the tests here: > > packet size with unlikely(pps) without unlikely(pps) regression > > 60 462377 453251 9126 > > 80 403020 399372 3648 > > 96 402059 396881 5178 > > 120 392725 391312 4413 > > 140 327706 327099 607 > > packet size with unlikely(Mbps) without unlikely(Mbps) regression > > 60 311 305 6 > > 80 335 332 3 > > 96 386 381 5 > > 120 456 451 5 > > 140 430 429 1 > > For more details on the logs, please refer:https://gist.github.com/MeghanaMalladiTI/cc6cc7709791376cb486eb1222de67be > Huh. That's very interesting. Fine, then. regards, dan carpenter
© 2016 - 2026 Red Hat, Inc.