drivers/net/ethernet/xilinx/xilinx_axienet.h | 6 +++ .../net/ethernet/xilinx/xilinx_axienet_main.c | 53 +++++++++++++++++++ 2 files changed, 59 insertions(+)
Add support to configure / report interrupt coalesce count and delay via
ethtool in DMAEngine flow.
Netperf numbers are not good when using non-dmaengine default values,
so tuned coalesce count and delay and defined separate default
values in dmaengine flow.
Netperf numbers and CPU utilisation change in DMAengine flow after
introducing coalescing with default parameters:
coalesce parameters:
Transfer type Before(w/o coalescing) After(with coalescing)
TCP Tx, CPU utilisation% 925, 27 941, 22
TCP Rx, CPU utilisation% 607, 32 741, 36
UDP Tx, CPU utilisation% 857, 31 960, 28
UDP Rx, CPU utilisation% 762, 26 783, 18
Above numbers are observed with 4x Cortex-a53.
Signed-off-by: Suraj Gupta <suraj.gupta2@amd.com>
---
This patch depend on following AXI DMA dmengine driver changes sent to
dmaengine mailing list as pre-requisit series:
https://lore.kernel.org/all/20250525101617.1168991-1-suraj.gupta2@amd.com/
---
drivers/net/ethernet/xilinx/xilinx_axienet.h | 6 +++
.../net/ethernet/xilinx/xilinx_axienet_main.c | 53 +++++++++++++++++++
2 files changed, 59 insertions(+)
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h
index 5ff742103beb..cdf6cbb6f2fd 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet.h
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h
@@ -126,6 +126,12 @@
#define XAXIDMA_DFT_TX_USEC 50
#define XAXIDMA_DFT_RX_USEC 16
+/* Default TX/RX Threshold and delay timer values for SGDMA mode with DMAEngine */
+#define XAXIDMAENGINE_DFT_TX_THRESHOLD 16
+#define XAXIDMAENGINE_DFT_TX_USEC 5
+#define XAXIDMAENGINE_DFT_RX_THRESHOLD 24
+#define XAXIDMAENGINE_DFT_RX_USEC 16
+
#define XAXIDMA_BD_CTRL_TXSOF_MASK 0x08000000 /* First tx packet */
#define XAXIDMA_BD_CTRL_TXEOF_MASK 0x04000000 /* Last tx packet */
#define XAXIDMA_BD_CTRL_ALL_MASK 0x0C000000 /* All control bits */
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 1b7a653c1f4e..f9c7d90d4ecb 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -1505,6 +1505,7 @@ static int axienet_init_dmaengine(struct net_device *ndev)
{
struct axienet_local *lp = netdev_priv(ndev);
struct skbuf_dma_descriptor *skbuf_dma;
+ struct dma_slave_config tx_config, rx_config;
int i, ret;
lp->tx_chan = dma_request_chan(lp->dev, "tx_chan0");
@@ -1520,6 +1521,22 @@ static int axienet_init_dmaengine(struct net_device *ndev)
goto err_dma_release_tx;
}
+ tx_config.coalesce_cnt = XAXIDMAENGINE_DFT_TX_THRESHOLD;
+ tx_config.coalesce_usecs = XAXIDMAENGINE_DFT_TX_USEC;
+ rx_config.coalesce_cnt = XAXIDMAENGINE_DFT_RX_THRESHOLD;
+ rx_config.coalesce_usecs = XAXIDMAENGINE_DFT_RX_USEC;
+
+ ret = dmaengine_slave_config(lp->tx_chan, &tx_config);
+ if (ret) {
+ dev_err(lp->dev, "Failed to configure Tx coalesce parameters\n");
+ goto err_dma_release_tx;
+ }
+ ret = dmaengine_slave_config(lp->rx_chan, &rx_config);
+ if (ret) {
+ dev_err(lp->dev, "Failed to configure Rx coalesce parameters\n");
+ goto err_dma_release_tx;
+ }
+
lp->tx_ring_tail = 0;
lp->tx_ring_head = 0;
lp->rx_ring_tail = 0;
@@ -2170,6 +2187,19 @@ axienet_ethtools_get_coalesce(struct net_device *ndev,
struct axienet_local *lp = netdev_priv(ndev);
u32 cr;
+ if (lp->use_dmaengine) {
+ struct dma_slave_caps tx_caps, rx_caps;
+
+ dma_get_slave_caps(lp->tx_chan, &tx_caps);
+ dma_get_slave_caps(lp->rx_chan, &rx_caps);
+
+ ecoalesce->tx_max_coalesced_frames = tx_caps.coalesce_cnt;
+ ecoalesce->tx_coalesce_usecs = tx_caps.coalesce_usecs;
+ ecoalesce->rx_max_coalesced_frames = rx_caps.coalesce_cnt;
+ ecoalesce->rx_coalesce_usecs = rx_caps.coalesce_usecs;
+ return 0;
+ }
+
ecoalesce->use_adaptive_rx_coalesce = lp->rx_dim_enabled;
spin_lock_irq(&lp->rx_cr_lock);
@@ -2233,6 +2263,29 @@ axienet_ethtools_set_coalesce(struct net_device *ndev,
return -EINVAL;
}
+ if (lp->use_dmaengine) {
+ struct dma_slave_config tx_cfg, rx_cfg;
+ int ret;
+
+ tx_cfg.coalesce_cnt = ecoalesce->tx_max_coalesced_frames;
+ tx_cfg.coalesce_usecs = ecoalesce->tx_coalesce_usecs;
+ rx_cfg.coalesce_cnt = ecoalesce->rx_max_coalesced_frames;
+ rx_cfg.coalesce_usecs = ecoalesce->rx_coalesce_usecs;
+
+ ret = dmaengine_slave_config(lp->tx_chan, &tx_cfg);
+ if (ret) {
+ NL_SET_ERR_MSG(extack, "failed to set tx coalesce parameters");
+ return ret;
+ }
+
+ ret = dmaengine_slave_config(lp->rx_chan, &rx_cfg);
+ if (ret) {
+ NL_SET_ERR_MSG(extack, "failed to set rx coalesce parameters");
+ return ret;
+ }
+ return 0;
+ }
+
if (new_dim && !old_dim) {
cr = axienet_calc_cr(lp, axienet_dim_coalesce_count_rx(lp),
ecoalesce->rx_coalesce_usecs);
--
2.25.1
On 5/25/25 06:22, Suraj Gupta wrote:
> Add support to configure / report interrupt coalesce count and delay via
> ethtool in DMAEngine flow.
> Netperf numbers are not good when using non-dmaengine default values,
> so tuned coalesce count and delay and defined separate default
> values in dmaengine flow.
>
> Netperf numbers and CPU utilisation change in DMAengine flow after
> introducing coalescing with default parameters:
> coalesce parameters:
> Transfer type Before(w/o coalescing) After(with coalescing)
> TCP Tx, CPU utilisation% 925, 27 941, 22
> TCP Rx, CPU utilisation% 607, 32 741, 36
> UDP Tx, CPU utilisation% 857, 31 960, 28
> UDP Rx, CPU utilisation% 762, 26 783, 18
>
> Above numbers are observed with 4x Cortex-a53.
How does this affect latency? I would expect these RX settings to
increase latency around 5-10x. I only use these settings with DIM since
it will disable coalescing during periods of light load for better
latency.
(of course the way to fix this in general is RSS or some other method
involving multiple queues).
> Signed-off-by: Suraj Gupta <suraj.gupta2@amd.com>
> ---
> This patch depend on following AXI DMA dmengine driver changes sent to
> dmaengine mailing list as pre-requisit series:
> https://lore.kernel.org/all/20250525101617.1168991-1-suraj.gupta2@amd.com/
> ---
> drivers/net/ethernet/xilinx/xilinx_axienet.h | 6 +++
> .../net/ethernet/xilinx/xilinx_axienet_main.c | 53 +++++++++++++++++++
> 2 files changed, 59 insertions(+)
>
> diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h
> index 5ff742103beb..cdf6cbb6f2fd 100644
> --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h
> +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h
> @@ -126,6 +126,12 @@
> #define XAXIDMA_DFT_TX_USEC 50
> #define XAXIDMA_DFT_RX_USEC 16
>
> +/* Default TX/RX Threshold and delay timer values for SGDMA mode with DMAEngine */
> +#define XAXIDMAENGINE_DFT_TX_THRESHOLD 16
> +#define XAXIDMAENGINE_DFT_TX_USEC 5
> +#define XAXIDMAENGINE_DFT_RX_THRESHOLD 24
> +#define XAXIDMAENGINE_DFT_RX_USEC 16
> +
> #define XAXIDMA_BD_CTRL_TXSOF_MASK 0x08000000 /* First tx packet */
> #define XAXIDMA_BD_CTRL_TXEOF_MASK 0x04000000 /* Last tx packet */
> #define XAXIDMA_BD_CTRL_ALL_MASK 0x0C000000 /* All control bits */
> diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> index 1b7a653c1f4e..f9c7d90d4ecb 100644
> --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> @@ -1505,6 +1505,7 @@ static int axienet_init_dmaengine(struct net_device *ndev)
> {
> struct axienet_local *lp = netdev_priv(ndev);
> struct skbuf_dma_descriptor *skbuf_dma;
> + struct dma_slave_config tx_config, rx_config;
> int i, ret;
>
> lp->tx_chan = dma_request_chan(lp->dev, "tx_chan0");
> @@ -1520,6 +1521,22 @@ static int axienet_init_dmaengine(struct net_device *ndev)
> goto err_dma_release_tx;
> }
>
> + tx_config.coalesce_cnt = XAXIDMAENGINE_DFT_TX_THRESHOLD;
> + tx_config.coalesce_usecs = XAXIDMAENGINE_DFT_TX_USEC;
> + rx_config.coalesce_cnt = XAXIDMAENGINE_DFT_RX_THRESHOLD;
> + rx_config.coalesce_usecs = XAXIDMAENGINE_DFT_RX_USEC;
I think it would be clearer to just do something like
struct dma_slave_config tx_config = {
.coalesce_cnt = 16,
.coalesce_usecs = 5,
};
since these are only used once. And this ensures that you initialize the
whole struct.
But what tree are you using? I don't see these members on net-next or
dmaengine.
> + ret = dmaengine_slave_config(lp->tx_chan, &tx_config);
> + if (ret) {
> + dev_err(lp->dev, "Failed to configure Tx coalesce parameters\n");
> + goto err_dma_release_tx;
> + }
> + ret = dmaengine_slave_config(lp->rx_chan, &rx_config);
> + if (ret) {
> + dev_err(lp->dev, "Failed to configure Rx coalesce parameters\n");
> + goto err_dma_release_tx;
> + }
> +
> lp->tx_ring_tail = 0;
> lp->tx_ring_head = 0;
> lp->rx_ring_tail = 0;
> @@ -2170,6 +2187,19 @@ axienet_ethtools_get_coalesce(struct net_device *ndev,
> struct axienet_local *lp = netdev_priv(ndev);
> u32 cr;
>
> + if (lp->use_dmaengine) {
> + struct dma_slave_caps tx_caps, rx_caps;
> +
> + dma_get_slave_caps(lp->tx_chan, &tx_caps);
> + dma_get_slave_caps(lp->rx_chan, &rx_caps);
> +
> + ecoalesce->tx_max_coalesced_frames = tx_caps.coalesce_cnt;
> + ecoalesce->tx_coalesce_usecs = tx_caps.coalesce_usecs;
> + ecoalesce->rx_max_coalesced_frames = rx_caps.coalesce_cnt;
> + ecoalesce->rx_coalesce_usecs = rx_caps.coalesce_usecs;
> + return 0;
> + }
> +
> ecoalesce->use_adaptive_rx_coalesce = lp->rx_dim_enabled;
>
> spin_lock_irq(&lp->rx_cr_lock);
> @@ -2233,6 +2263,29 @@ axienet_ethtools_set_coalesce(struct net_device *ndev,
> return -EINVAL;
> }
>
> + if (lp->use_dmaengine) {
> + struct dma_slave_config tx_cfg, rx_cfg;
> + int ret;
> +
> + tx_cfg.coalesce_cnt = ecoalesce->tx_max_coalesced_frames;
> + tx_cfg.coalesce_usecs = ecoalesce->tx_coalesce_usecs;
> + rx_cfg.coalesce_cnt = ecoalesce->rx_max_coalesced_frames;
> + rx_cfg.coalesce_usecs = ecoalesce->rx_coalesce_usecs;
> +
> + ret = dmaengine_slave_config(lp->tx_chan, &tx_cfg);
> + if (ret) {
> + NL_SET_ERR_MSG(extack, "failed to set tx coalesce parameters");
> + return ret;
> + }
> +
> + ret = dmaengine_slave_config(lp->rx_chan, &rx_cfg);
> + if (ret) {
> + NL_SET_ERR_MSG(extack, "failed to set rx coalesce parameters");
> + return ret;
> + }
> + return 0;
> + }
> +
> if (new_dim && !old_dim) {
> cr = axienet_calc_cr(lp, axienet_dim_coalesce_count_rx(lp),
> ecoalesce->rx_coalesce_usecs);
Hi Suraj,
kernel test robot noticed the following build errors:
[auto build test ERROR on net-next/main]
url: https://github.com/intel-lab-lkp/linux/commits/Suraj-Gupta/net-xilinx-axienet-Configure-and-report-coalesce-parameters-in-DMAengine-flow/20250525-182400
base: net-next/main
patch link: https://lore.kernel.org/r/20250525102217.1181104-1-suraj.gupta2%40amd.com
patch subject: [PATCH net-next] net: xilinx: axienet: Configure and report coalesce parameters in DMAengine flow
config: alpha-allyesconfig (https://download.01.org/0day-ci/archive/20250526/202505260804.Mhztve8t-lkp@intel.com/config)
compiler: alpha-linux-gcc (GCC) 14.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250526/202505260804.Mhztve8t-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202505260804.Mhztve8t-lkp@intel.com/
All errors (new ones prefixed by >>):
drivers/net/ethernet/xilinx/xilinx_axienet_main.c: In function 'axienet_init_dmaengine':
>> drivers/net/ethernet/xilinx/xilinx_axienet_main.c:1524:18: error: 'struct dma_slave_config' has no member named 'coalesce_cnt'
1524 | tx_config.coalesce_cnt = XAXIDMAENGINE_DFT_TX_THRESHOLD;
| ^
>> drivers/net/ethernet/xilinx/xilinx_axienet_main.c:1525:18: error: 'struct dma_slave_config' has no member named 'coalesce_usecs'
1525 | tx_config.coalesce_usecs = XAXIDMAENGINE_DFT_TX_USEC;
| ^
drivers/net/ethernet/xilinx/xilinx_axienet_main.c:1526:18: error: 'struct dma_slave_config' has no member named 'coalesce_cnt'
1526 | rx_config.coalesce_cnt = XAXIDMAENGINE_DFT_RX_THRESHOLD;
| ^
drivers/net/ethernet/xilinx/xilinx_axienet_main.c:1527:18: error: 'struct dma_slave_config' has no member named 'coalesce_usecs'
1527 | rx_config.coalesce_usecs = XAXIDMAENGINE_DFT_RX_USEC;
| ^
drivers/net/ethernet/xilinx/xilinx_axienet_main.c: In function 'axienet_ethtools_get_coalesce':
>> drivers/net/ethernet/xilinx/xilinx_axienet_main.c:2196:61: error: 'struct dma_slave_caps' has no member named 'coalesce_cnt'
2196 | ecoalesce->tx_max_coalesced_frames = tx_caps.coalesce_cnt;
| ^
>> drivers/net/ethernet/xilinx/xilinx_axienet_main.c:2197:55: error: 'struct dma_slave_caps' has no member named 'coalesce_usecs'
2197 | ecoalesce->tx_coalesce_usecs = tx_caps.coalesce_usecs;
| ^
drivers/net/ethernet/xilinx/xilinx_axienet_main.c:2198:61: error: 'struct dma_slave_caps' has no member named 'coalesce_cnt'
2198 | ecoalesce->rx_max_coalesced_frames = rx_caps.coalesce_cnt;
| ^
drivers/net/ethernet/xilinx/xilinx_axienet_main.c:2199:55: error: 'struct dma_slave_caps' has no member named 'coalesce_usecs'
2199 | ecoalesce->rx_coalesce_usecs = rx_caps.coalesce_usecs;
| ^
drivers/net/ethernet/xilinx/xilinx_axienet_main.c: In function 'axienet_ethtools_set_coalesce':
drivers/net/ethernet/xilinx/xilinx_axienet_main.c:2270:23: error: 'struct dma_slave_config' has no member named 'coalesce_cnt'
2270 | tx_cfg.coalesce_cnt = ecoalesce->tx_max_coalesced_frames;
| ^
drivers/net/ethernet/xilinx/xilinx_axienet_main.c:2271:23: error: 'struct dma_slave_config' has no member named 'coalesce_usecs'
2271 | tx_cfg.coalesce_usecs = ecoalesce->tx_coalesce_usecs;
| ^
drivers/net/ethernet/xilinx/xilinx_axienet_main.c:2272:23: error: 'struct dma_slave_config' has no member named 'coalesce_cnt'
2272 | rx_cfg.coalesce_cnt = ecoalesce->rx_max_coalesced_frames;
| ^
drivers/net/ethernet/xilinx/xilinx_axienet_main.c:2273:23: error: 'struct dma_slave_config' has no member named 'coalesce_usecs'
2273 | rx_cfg.coalesce_usecs = ecoalesce->rx_coalesce_usecs;
| ^
vim +1524 drivers/net/ethernet/xilinx/xilinx_axienet_main.c
1494
1495 /**
1496 * axienet_init_dmaengine - init the dmaengine code.
1497 * @ndev: Pointer to net_device structure
1498 *
1499 * Return: 0, on success.
1500 * non-zero error value on failure
1501 *
1502 * This is the dmaengine initialization code.
1503 */
1504 static int axienet_init_dmaengine(struct net_device *ndev)
1505 {
1506 struct axienet_local *lp = netdev_priv(ndev);
1507 struct skbuf_dma_descriptor *skbuf_dma;
1508 struct dma_slave_config tx_config, rx_config;
1509 int i, ret;
1510
1511 lp->tx_chan = dma_request_chan(lp->dev, "tx_chan0");
1512 if (IS_ERR(lp->tx_chan)) {
1513 dev_err(lp->dev, "No Ethernet DMA (TX) channel found\n");
1514 return PTR_ERR(lp->tx_chan);
1515 }
1516
1517 lp->rx_chan = dma_request_chan(lp->dev, "rx_chan0");
1518 if (IS_ERR(lp->rx_chan)) {
1519 ret = PTR_ERR(lp->rx_chan);
1520 dev_err(lp->dev, "No Ethernet DMA (RX) channel found\n");
1521 goto err_dma_release_tx;
1522 }
1523
> 1524 tx_config.coalesce_cnt = XAXIDMAENGINE_DFT_TX_THRESHOLD;
> 1525 tx_config.coalesce_usecs = XAXIDMAENGINE_DFT_TX_USEC;
> 1526 rx_config.coalesce_cnt = XAXIDMAENGINE_DFT_RX_THRESHOLD;
> 1527 rx_config.coalesce_usecs = XAXIDMAENGINE_DFT_RX_USEC;
1528
1529 ret = dmaengine_slave_config(lp->tx_chan, &tx_config);
1530 if (ret) {
1531 dev_err(lp->dev, "Failed to configure Tx coalesce parameters\n");
1532 goto err_dma_release_tx;
1533 }
1534 ret = dmaengine_slave_config(lp->rx_chan, &rx_config);
1535 if (ret) {
1536 dev_err(lp->dev, "Failed to configure Rx coalesce parameters\n");
1537 goto err_dma_release_tx;
1538 }
1539
1540 lp->tx_ring_tail = 0;
1541 lp->tx_ring_head = 0;
1542 lp->rx_ring_tail = 0;
1543 lp->rx_ring_head = 0;
1544 lp->tx_skb_ring = kcalloc(TX_BD_NUM_MAX, sizeof(*lp->tx_skb_ring),
1545 GFP_KERNEL);
1546 if (!lp->tx_skb_ring) {
1547 ret = -ENOMEM;
1548 goto err_dma_release_rx;
1549 }
1550 for (i = 0; i < TX_BD_NUM_MAX; i++) {
1551 skbuf_dma = kzalloc(sizeof(*skbuf_dma), GFP_KERNEL);
1552 if (!skbuf_dma) {
1553 ret = -ENOMEM;
1554 goto err_free_tx_skb_ring;
1555 }
1556 lp->tx_skb_ring[i] = skbuf_dma;
1557 }
1558
1559 lp->rx_skb_ring = kcalloc(RX_BUF_NUM_DEFAULT, sizeof(*lp->rx_skb_ring),
1560 GFP_KERNEL);
1561 if (!lp->rx_skb_ring) {
1562 ret = -ENOMEM;
1563 goto err_free_tx_skb_ring;
1564 }
1565 for (i = 0; i < RX_BUF_NUM_DEFAULT; i++) {
1566 skbuf_dma = kzalloc(sizeof(*skbuf_dma), GFP_KERNEL);
1567 if (!skbuf_dma) {
1568 ret = -ENOMEM;
1569 goto err_free_rx_skb_ring;
1570 }
1571 lp->rx_skb_ring[i] = skbuf_dma;
1572 }
1573 /* TODO: Instead of BD_NUM_DEFAULT use runtime support */
1574 for (i = 0; i < RX_BUF_NUM_DEFAULT; i++)
1575 axienet_rx_submit_desc(ndev);
1576 dma_async_issue_pending(lp->rx_chan);
1577
1578 return 0;
1579
1580 err_free_rx_skb_ring:
1581 for (i = 0; i < RX_BUF_NUM_DEFAULT; i++)
1582 kfree(lp->rx_skb_ring[i]);
1583 kfree(lp->rx_skb_ring);
1584 err_free_tx_skb_ring:
1585 for (i = 0; i < TX_BD_NUM_MAX; i++)
1586 kfree(lp->tx_skb_ring[i]);
1587 kfree(lp->tx_skb_ring);
1588 err_dma_release_rx:
1589 dma_release_channel(lp->rx_chan);
1590 err_dma_release_tx:
1591 dma_release_channel(lp->tx_chan);
1592 return ret;
1593 }
1594
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
© 2016 - 2025 Red Hat, Inc.