[PATCH v1 net-next] net: lan743x: configure interrupt moderation timers based on speed

Thangaraj Samynathan posted 1 patch 9 months, 1 week ago
drivers/net/ethernet/microchip/lan743x_main.c | 61 +++++++++++--------
drivers/net/ethernet/microchip/lan743x_main.h |  5 +-
2 files changed, 41 insertions(+), 25 deletions(-)
[PATCH v1 net-next] net: lan743x: configure interrupt moderation timers based on speed
Posted by Thangaraj Samynathan 9 months, 1 week ago
Configures the interrupt moderation timer value to 64us for 2.5G,
150us for 1G, 330us for 10/100M. Earlier this was 400us for all
speeds. This improvess UDP TX and Bidirectional performance to
2.3Gbps from 1.4Gbps in 2.5G. These values are derived after
experimenting with different values.

Signed-off-by: Thangaraj Samynathan <thangaraj.s@microchip.com>
---
 drivers/net/ethernet/microchip/lan743x_main.c | 61 +++++++++++--------
 drivers/net/ethernet/microchip/lan743x_main.h |  5 +-
 2 files changed, 41 insertions(+), 25 deletions(-)

diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index e2d6bfb5d693..6d570132f409 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -620,27 +620,6 @@ static int lan743x_intr_open(struct lan743x_adapter *adapter)
 		lan743x_csr_write(adapter, INT_VEC_EN_SET,
 				  INT_VEC_EN_(0));
 
-	if (!(adapter->csr.flags & LAN743X_CSR_FLAG_IS_A0)) {
-		lan743x_csr_write(adapter, INT_MOD_CFG0, LAN743X_INT_MOD);
-		lan743x_csr_write(adapter, INT_MOD_CFG1, LAN743X_INT_MOD);
-		lan743x_csr_write(adapter, INT_MOD_CFG2, LAN743X_INT_MOD);
-		lan743x_csr_write(adapter, INT_MOD_CFG3, LAN743X_INT_MOD);
-		lan743x_csr_write(adapter, INT_MOD_CFG4, LAN743X_INT_MOD);
-		lan743x_csr_write(adapter, INT_MOD_CFG5, LAN743X_INT_MOD);
-		lan743x_csr_write(adapter, INT_MOD_CFG6, LAN743X_INT_MOD);
-		lan743x_csr_write(adapter, INT_MOD_CFG7, LAN743X_INT_MOD);
-		if (adapter->is_pci11x1x) {
-			lan743x_csr_write(adapter, INT_MOD_CFG8, LAN743X_INT_MOD);
-			lan743x_csr_write(adapter, INT_MOD_CFG9, LAN743X_INT_MOD);
-			lan743x_csr_write(adapter, INT_MOD_MAP0, 0x00007654);
-			lan743x_csr_write(adapter, INT_MOD_MAP1, 0x00003210);
-		} else {
-			lan743x_csr_write(adapter, INT_MOD_MAP0, 0x00005432);
-			lan743x_csr_write(adapter, INT_MOD_MAP1, 0x00000001);
-		}
-		lan743x_csr_write(adapter, INT_MOD_MAP2, 0x00FFFFFF);
-	}
-
 	/* enable interrupts */
 	lan743x_csr_write(adapter, INT_EN_SET, INT_BIT_MAS_);
 	ret = lan743x_intr_test_isr(adapter);
@@ -3035,6 +3014,31 @@ static void lan743x_phylink_mac_link_down(struct phylink_config *config,
 	netif_tx_stop_all_queues(netdev);
 }
 
+static void lan743x_config_int_mod(struct lan743x_adapter *adapter, u32 int_mod)
+{
+	if (!(adapter->csr.flags & LAN743X_CSR_FLAG_IS_A0)) {
+		lan743x_csr_write(adapter, INT_MOD_CFG0, int_mod);
+		lan743x_csr_write(adapter, INT_MOD_CFG1, int_mod);
+		lan743x_csr_write(adapter, INT_MOD_CFG2, int_mod);
+		lan743x_csr_write(adapter, INT_MOD_CFG3, int_mod);
+		lan743x_csr_write(adapter, INT_MOD_CFG4, int_mod);
+		lan743x_csr_write(adapter, INT_MOD_CFG5, int_mod);
+		lan743x_csr_write(adapter, INT_MOD_CFG6, int_mod);
+		lan743x_csr_write(adapter, INT_MOD_CFG7, int_mod);
+		if (adapter->is_pci11x1x) {
+			lan743x_csr_write(adapter, INT_MOD_CFG8, int_mod);
+			lan743x_csr_write(adapter, INT_MOD_CFG9, int_mod);
+
+			lan743x_csr_write(adapter, INT_MOD_MAP0, 0x00007654);
+			lan743x_csr_write(adapter, INT_MOD_MAP1, 0x00003210);
+		} else {
+			lan743x_csr_write(adapter, INT_MOD_MAP0, 0x00005432);
+			lan743x_csr_write(adapter, INT_MOD_MAP1, 0x00000001);
+		}
+		lan743x_csr_write(adapter, INT_MOD_MAP2, 0x00FFFFFF);
+	}
+}
+
 static void lan743x_phylink_mac_link_up(struct phylink_config *config,
 					struct phy_device *phydev,
 					unsigned int link_an_mode,
@@ -3044,6 +3048,7 @@ static void lan743x_phylink_mac_link_up(struct phylink_config *config,
 {
 	struct net_device *netdev = to_net_dev(config->dev);
 	struct lan743x_adapter *adapter = netdev_priv(netdev);
+	u32 int_mod;
 	int mac_cr;
 	u8 cap;
 
@@ -3052,15 +3057,23 @@ static void lan743x_phylink_mac_link_up(struct phylink_config *config,
 	 * Resulting value corresponds to SPEED_10
 	 */
 	mac_cr &= ~(MAC_CR_CFG_H_ | MAC_CR_CFG_L_);
-	if (speed == SPEED_2500)
+	if (speed == SPEED_2500) {
 		mac_cr |= MAC_CR_CFG_H_ | MAC_CR_CFG_L_;
-	else if (speed == SPEED_1000)
+		int_mod = LAN743X_INT_MOD_2_5G;
+	} else if (speed == SPEED_1000) {
 		mac_cr |= MAC_CR_CFG_H_;
-	else if (speed == SPEED_100)
+		int_mod = LAN743X_INT_MOD_1G;
+	} else if (speed == SPEED_100) {
 		mac_cr |= MAC_CR_CFG_L_;
+		int_mod = LAN743X_INT_MOD_100M;
+	} else {
+		int_mod = LAN743X_INT_MOD_10M;
+	}
 
 	lan743x_csr_write(adapter, MAC_CR, mac_cr);
 
+	lan743x_config_int_mod(adapter, int_mod);
+
 	lan743x_ptp_update_latency(adapter, speed);
 
 	/* Flow Control operation */
diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h
index db5fc73e41cc..189d979356a0 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.h
+++ b/drivers/net/ethernet/microchip/lan743x_main.h
@@ -860,7 +860,10 @@ struct lan743x_adapter;
 #define LAN743X_USED_RX_CHANNELS	(4)
 #define LAN743X_USED_TX_CHANNELS	(1)
 #define PCI11X1X_USED_TX_CHANNELS	(4)
-#define LAN743X_INT_MOD	(400)
+#define LAN743X_INT_MOD_2_5G		(64)
+#define LAN743X_INT_MOD_1G		(150)
+#define LAN743X_INT_MOD_100M		(330)
+#define LAN743X_INT_MOD_10M		(330)
 
 #if (LAN743X_USED_RX_CHANNELS > LAN743X_MAX_RX_CHANNELS)
 #error Invalid LAN743X_USED_RX_CHANNELS
-- 
2.25.1
Re: [PATCH v1 net-next] net: lan743x: configure interrupt moderation timers based on speed
Posted by Andrew Lunn 9 months, 1 week ago
On Mon, May 05, 2025 at 12:59:43PM +0530, Thangaraj Samynathan wrote:
> Configures the interrupt moderation timer value to 64us for 2.5G,
> 150us for 1G, 330us for 10/100M. Earlier this was 400us for all
> speeds. This improvess UDP TX and Bidirectional performance to
> 2.3Gbps from 1.4Gbps in 2.5G. These values are derived after
> experimenting with different values.

It would be good to also implement:

       ethtool -c|--show-coalesce devname

       ethtool -C|--coalesce devname [adaptive-rx on|off] [adaptive-tx on|off]
              [rx-usecs N] [rx-frames N] [rx-usecs-irq N] [rx-frames-irq N]
              [tx-usecs N] [tx-frames N] [tx-usecs-irq N] [tx-frames-irq N]
              [stats-block-usecs N] [pkt-rate-low N] [rx-usecs-low N]
              [rx-frames-low N] [tx-usecs-low N] [tx-frames-low N]
              [pkt-rate-high N] [rx-usecs-high N] [rx-frames-high N]
              [tx-usecs-high N] [tx-frames-high N] [sample-interval N]
              [cqe-mode-rx on|off] [cqe-mode-tx on|off] [tx-aggr-max-bytes N]
              [tx-aggr-max-frames N] [tx-aggr-time-usecs N]

so the user can configure it. Sometimes lower power is more important
than high speed.

	Andrew
Re: [PATCH v1 net-next] net: lan743x: configure interrupt moderation timers based on speed
Posted by Thangaraj.S@microchip.com 9 months, 1 week ago
Hi Andrew,
Thanks for reviewing the patch,

On Mon, 2025-05-05 at 14:15 +0200, Andrew Lunn wrote:
> EXTERNAL EMAIL: Do not click links or open attachments unless you
> know the content is safe
> 
> On Mon, May 05, 2025 at 12:59:43PM +0530, Thangaraj Samynathan wrote:
> > Configures the interrupt moderation timer value to 64us for 2.5G,
> > 150us for 1G, 330us for 10/100M. Earlier this was 400us for all
> > speeds. This improvess UDP TX and Bidirectional performance to
> > 2.3Gbps from 1.4Gbps in 2.5G. These values are derived after
> > experimenting with different values.
> 
> It would be good to also implement:
> 
>        ethtool -c|--show-coalesce devname
> 
>        ethtool -C|--coalesce devname [adaptive-rx on|off] [adaptive-
> tx on|off]
>               [rx-usecs N] [rx-frames N] [rx-usecs-irq N] [rx-frames-
> irq N]
>               [tx-usecs N] [tx-frames N] [tx-usecs-irq N] [tx-frames-
> irq N]
>               [stats-block-usecs N] [pkt-rate-low N] [rx-usecs-low N]
>               [rx-frames-low N] [tx-usecs-low N] [tx-frames-low N]
>               [pkt-rate-high N] [rx-usecs-high N] [rx-frames-high N]
>               [tx-usecs-high N] [tx-frames-high N] [sample-interval
> N]
>               [cqe-mode-rx on|off] [cqe-mode-tx on|off] [tx-aggr-max-
> bytes N]
>               [tx-aggr-max-frames N] [tx-aggr-time-usecs N]
> 
> so the user can configure it. Sometimes lower power is more important
> than high speed.
> 
>         Andrew

We've tuned the interrupt moderation values based on testing to improve
performance. For now, we’ll keep these fixed values optimized for
performance across all speeds. That said, we agree that adding ethtool
-c/-C support would provide valuable flexibility for users to balance
power and performance, and we’ll consider implementing that in a future
update.
Re: [PATCH v1 net-next] net: lan743x: configure interrupt moderation timers based on speed
Posted by Andrew Lunn 9 months, 1 week ago
On Tue, May 06, 2025 at 04:02:30AM +0000, Thangaraj.S@microchip.com wrote:
> Hi Andrew,
> Thanks for reviewing the patch,
> 
> On Mon, 2025-05-05 at 14:15 +0200, Andrew Lunn wrote:
> > EXTERNAL EMAIL: Do not click links or open attachments unless you
> > know the content is safe
> > 
> > On Mon, May 05, 2025 at 12:59:43PM +0530, Thangaraj Samynathan wrote:
> > > Configures the interrupt moderation timer value to 64us for 2.5G,
> > > 150us for 1G, 330us for 10/100M. Earlier this was 400us for all
> > > speeds. This improvess UDP TX and Bidirectional performance to
> > > 2.3Gbps from 1.4Gbps in 2.5G. These values are derived after
> > > experimenting with different values.
> > 
> > It would be good to also implement:
> > 
> >        ethtool -c|--show-coalesce devname
> > 
> >        ethtool -C|--coalesce devname [adaptive-rx on|off] [adaptive-
> > tx on|off]
> >               [rx-usecs N] [rx-frames N] [rx-usecs-irq N] [rx-frames-
> > irq N]
> >               [tx-usecs N] [tx-frames N] [tx-usecs-irq N] [tx-frames-
> > irq N]
> >               [stats-block-usecs N] [pkt-rate-low N] [rx-usecs-low N]
> >               [rx-frames-low N] [tx-usecs-low N] [tx-frames-low N]
> >               [pkt-rate-high N] [rx-usecs-high N] [rx-frames-high N]
> >               [tx-usecs-high N] [tx-frames-high N] [sample-interval
> > N]
> >               [cqe-mode-rx on|off] [cqe-mode-tx on|off] [tx-aggr-max-
> > bytes N]
> >               [tx-aggr-max-frames N] [tx-aggr-time-usecs N]
> > 
> > so the user can configure it. Sometimes lower power is more important
> > than high speed.
> > 
> >         Andrew
> 
> We've tuned the interrupt moderation values based on testing to improve
> performance. For now, we’ll keep these fixed values optimized for
> performance across all speeds. That said, we agree that adding ethtool
> -c/-C support would provide valuable flexibility for users to balance
> power and performance, and we’ll consider implementing that in a future
> update.

As you said, you have optimised for performance. That might cause
regressions for some users. We try to avoid regressions, and if
somebody does report a regression, we will have to revert this change.
If you were to implement this ethtool option, we are a lot less likely
to make a revert, we can instruct the user how to set the coalesce for
there use case.

	Andrew
Re: [PATCH v1 net-next] net: lan743x: configure interrupt moderation timers based on speed
Posted by Jakub Kicinski 9 months ago
On Tue, 6 May 2025 14:10:09 +0200 Andrew Lunn wrote:
> > We've tuned the interrupt moderation values based on testing to improve
> > performance. For now, we’ll keep these fixed values optimized for
> > performance across all speeds. That said, we agree that adding ethtool
> > -c/-C support would provide valuable flexibility for users to balance
> > power and performance, and we’ll consider implementing that in a future
> > update.  
> 
> As you said, you have optimised for performance. That might cause
> regressions for some users. We try to avoid regressions, and if
> somebody does report a regression, we will have to revert this change.
> If you were to implement this ethtool option, we are a lot less likely
> to make a revert, we can instruct the user how to set the coalesce for
> there use case.

I completely agree. Please let the users decide how they want to balance
throughput vs latency.
-- 
pw-bot: cr
Re: [PATCH v1 net-next] net: lan743x: configure interrupt moderation timers based on speed
Posted by Thangaraj.S@microchip.com 9 months ago
Hi Andrew & Jakub,
Thanks for reviewing the patch.

I agree with your comments and will implement the ethtool option to
provide flexibility, while keeping the default behavior as defined in
this patch based on speed.

Thanks,
Thangaraj Samynathan
On Tue, 2025-05-06 at 17:54 -0700, Jakub Kicinski wrote:
> EXTERNAL EMAIL: Do not click links or open attachments unless you
> know the content is safe
> 
> On Tue, 6 May 2025 14:10:09 +0200 Andrew Lunn wrote:
> > > We've tuned the interrupt moderation values based on testing to
> > > improve
> > > performance. For now, we’ll keep these fixed values optimized for
> > > performance across all speeds. That said, we agree that adding
> > > ethtool
> > > -c/-C support would provide valuable flexibility for users to
> > > balance
> > > power and performance, and we’ll consider implementing that in a
> > > future
> > > update.
> > 
> > As you said, you have optimised for performance. That might cause
> > regressions for some users. We try to avoid regressions, and if
> > somebody does report a regression, we will have to revert this
> > change.
> > If you were to implement this ethtool option, we are a lot less
> > likely
> > to make a revert, we can instruct the user how to set the coalesce
> > for
> > there use case.
> 
> I completely agree. Please let the users decide how they want to
> balance
> throughput vs latency.
> --
> pw-bot: cr
Re: [PATCH v1 net-next] net: lan743x: configure interrupt moderation timers based on speed
Posted by Jakub Kicinski 9 months ago
On Thu, 8 May 2025 03:36:17 +0000 Thangaraj.S@microchip.com wrote:
> I agree with your comments and will implement the ethtool option to
> provide flexibility, while keeping the default behavior as defined in
> this patch based on speed.

Why the speed based defaults? Do other vendors do that?
330usec is a very high latency, and if the link is running 
at 10M we probably don't need IRQ moderation at all?

For Tx deferring the completion based on link speed makes sense.
We want an IRQ for a fixed amount of data, doesn't matter how 
fast its going out. But for Rx the more aggressive the moderation 
the higher the latency. In my experience the Rx moderation should
not depend on link speed.

reminder: please avoid top posting