drivers/net/ethernet/intel/igb/igb_main.c | 4 ++-- drivers/net/ethernet/intel/igb/igb_ptp.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-)
Retrieve Tx timestamp from system BH instead of regular system workqueue.
The current implementation uses schedule_work() which is executed by the
system work queue and kworkers to retrieve Tx timestamps. This increases
latency and can lead to timeouts in case of heavy system load. i210 is
often used in industrial systems, where timestamp timeouts can be fatal.
Therefore, switch to the system BH workqueues which are executed directly
at the end of the IRQ handler.
Tested on Intel i210 and i350 with ptp4l.
Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
---
Changes in v4:
- Use BH workqueue (tasklet) instead of doing timestamping in IRQ path (Jakub Kicinski)
- Link to v3: https://patch.msgid.link/20260205-igb_irq_ts-v3-1-2efc7bc4b885@linutronix.de
Changes in v3:
- Switch back to IRQ, but for i210 only
- Keep kworker for all other NICs like i350 (Miroslav)
- Link to v2: https://lore.kernel.org/r/20250822-igb_irq_ts-v2-1-1ac37078a7a4@linutronix.de
Changes in v2:
- Switch from IRQ to PTP aux worker due to NTP performance regression (Miroslav)
- Link to v1: https://lore.kernel.org/r/20250815-igb_irq_ts-v1-1-8c6fc0353422@linutronix.de
---
drivers/net/ethernet/intel/igb/igb_main.c | 4 ++--
drivers/net/ethernet/intel/igb/igb_ptp.c | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index ee99fd8fd513..9fd29fedb9f5 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -6572,7 +6572,7 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
adapter->ptp_tx_skb = skb_get(skb);
adapter->ptp_tx_start = jiffies;
if (adapter->hw.mac.type == e1000_82576)
- schedule_work(&adapter->ptp_tx_work);
+ queue_work(system_bh_wq, &adapter->ptp_tx_work);
} else {
adapter->tx_hwtstamp_skipped++;
}
@@ -7076,7 +7076,7 @@ static void igb_tsync_interrupt(struct igb_adapter *adapter)
if (tsicr & E1000_TSICR_TXTS) {
/* retrieve hardware timestamp */
- schedule_work(&adapter->ptp_tx_work);
+ queue_work(system_bh_wq, &adapter->ptp_tx_work);
}
if (tsicr & TSINTR_TT0)
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index bd85d02ecadd..7b44f9090631 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -832,7 +832,7 @@ static void igb_ptp_tx_work(struct work_struct *work)
igb_ptp_tx_hwtstamp(adapter);
else
/* reschedule to check later */
- schedule_work(&adapter->ptp_tx_work);
+ queue_work(system_bh_wq, &adapter->ptp_tx_work);
}
static void igb_ptp_overflow_check(struct work_struct *work)
---
base-commit: a0e8c9a5060fbdb72fca767164467a3cf2b8fc30
change-id: 20250813-igb_irq_ts-1aa77cc7b4cb
Best regards,
--
Kurt Kanzenbach <kurt@linutronix.de>
Dear Kurt,
Thank you for the patch.
Am 03.03.26 um 12:48 schrieb Kurt Kanzenbach:
> Retrieve Tx timestamp from system BH instead of regular system workqueue.
>
> The current implementation uses schedule_work() which is executed by the
> system work queue and kworkers to retrieve Tx timestamps. This increases
> latency and can lead to timeouts in case of heavy system load. i210 is
> often used in industrial systems, where timestamp timeouts can be fatal.
>
> Therefore, switch to the system BH workqueues which are executed directly
> at the end of the IRQ handler.
Thank you for implementing this.
> Tested on Intel i210 and i350 with ptp4l.
It would be great, if you shared the numbers. Did Miroslav already test
this?
Also, if you could add a comment/summary, that doing this in the IRQ
path with your theory on why, that’d be great.
> Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
> ---
> Changes in v4:
> - Use BH workqueue (tasklet) instead of doing timestamping in IRQ path (Jakub Kicinski)
> - Link to v3: https://patch.msgid.link/20260205-igb_irq_ts-v3-1-2efc7bc4b885@linutronix.de
>
> Changes in v3:
> - Switch back to IRQ, but for i210 only
> - Keep kworker for all other NICs like i350 (Miroslav)
> - Link to v2: https://lore.kernel.org/r/20250822-igb_irq_ts-v2-1-1ac37078a7a4@linutronix.de
>
> Changes in v2:
> - Switch from IRQ to PTP aux worker due to NTP performance regression (Miroslav)
> - Link to v1: https://lore.kernel.org/r/20250815-igb_irq_ts-v1-1-8c6fc0353422@linutronix.de
> ---
> drivers/net/ethernet/intel/igb/igb_main.c | 4 ++--
> drivers/net/ethernet/intel/igb/igb_ptp.c | 2 +-
> 2 files changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
> index ee99fd8fd513..9fd29fedb9f5 100644
> --- a/drivers/net/ethernet/intel/igb/igb_main.c
> +++ b/drivers/net/ethernet/intel/igb/igb_main.c
> @@ -6572,7 +6572,7 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
> adapter->ptp_tx_skb = skb_get(skb);
> adapter->ptp_tx_start = jiffies;
> if (adapter->hw.mac.type == e1000_82576)
> - schedule_work(&adapter->ptp_tx_work);
> + queue_work(system_bh_wq, &adapter->ptp_tx_work);
> } else {
> adapter->tx_hwtstamp_skipped++;
> }
> @@ -7076,7 +7076,7 @@ static void igb_tsync_interrupt(struct igb_adapter *adapter)
>
> if (tsicr & E1000_TSICR_TXTS) {
> /* retrieve hardware timestamp */
> - schedule_work(&adapter->ptp_tx_work);
> + queue_work(system_bh_wq, &adapter->ptp_tx_work);
> }
>
> if (tsicr & TSINTR_TT0)
> diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
> index bd85d02ecadd..7b44f9090631 100644
> --- a/drivers/net/ethernet/intel/igb/igb_ptp.c
> +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
> @@ -832,7 +832,7 @@ static void igb_ptp_tx_work(struct work_struct *work)
> igb_ptp_tx_hwtstamp(adapter);
> else
> /* reschedule to check later */
> - schedule_work(&adapter->ptp_tx_work);
> + queue_work(system_bh_wq, &adapter->ptp_tx_work);
> }
>
> static void igb_ptp_overflow_check(struct work_struct *work)
The diff looks fine.
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Kind regards,
Paul
On Tue Mar 03 2026, Paul Menzel wrote: > Dear Kurt, > > > Thank you for the patch. > > Am 03.03.26 um 12:48 schrieb Kurt Kanzenbach: >> Retrieve Tx timestamp from system BH instead of regular system workqueue. >> >> The current implementation uses schedule_work() which is executed by the >> system work queue and kworkers to retrieve Tx timestamps. This increases >> latency and can lead to timeouts in case of heavy system load. i210 is >> often used in industrial systems, where timestamp timeouts can be fatal. >> >> Therefore, switch to the system BH workqueues which are executed directly >> at the end of the IRQ handler. > > Thank you for implementing this. > >> Tested on Intel i210 and i350 with ptp4l. > > It would be great, if you shared the numbers. Did Miroslav already test > this? Great question. I did test with ptp4l and synchronization looks fine < below 10ns back to back as expected. I did not test with ntpperf, because I was never able to reproduce the NTP regression to the same extent as Miroslav reported. Therefore, Miroslav is on Cc in case he wants to test it. Let's see. Thanks, Kurt
On Tue, Mar 03, 2026 at 02:38:11PM +0100, Kurt Kanzenbach wrote:
> > It would be great, if you shared the numbers. Did Miroslav already test
> > this?
>
> Great question. I did test with ptp4l and synchronization looks fine <
> below 10ns back to back as expected. I did not test with ntpperf,
> because I was never able to reproduce the NTP regression to the same
> extent as Miroslav reported. Therefore, Miroslav is on Cc in case he
> wants to test it. Let's see.
I ran the same test with I350 as before and there still seems to be a
regression, but interestingly it's quite different to the previous versions of
the patch. It's like there is a load-sensitive on/off switch.
Without the patch:
| responses | response time (ns)
rate clients | lost invalid basic xleave | min mean max stddev
150000 15000 0.00% 0.00% 0.00% 100.00% +4188 +36475 +193328 16179
157500 15750 0.02% 0.00% 0.02% 99.96% +6373 +42969 +683894 22682
165375 16384 0.03% 0.00% 0.00% 99.97% +7911 +43960 +692471 24454
173643 16384 0.06% 0.00% 0.00% 99.94% +8323 +45627 +707240 28452
182325 16384 0.06% 0.00% 0.00% 99.94% +8404 +47292 +722524 26936
191441 16384 0.00% 0.00% 0.00% 100.00% +8930 +51738 +223727 14272
201013 16384 0.05% 0.00% 0.00% 99.95% +9634 +53696 +776445 23783
211063 16384 0.00% 0.00% 0.00% 100.00% +14393 +54558 +329546 20473
221616 16384 2.59% 0.00% 0.05% 97.36% +23924 +321205 +518192 21838
232696 16384 7.00% 0.00% 0.10% 92.90% +33396 +337709 +575661 21017
244330 16384 10.82% 0.00% 0.15% 89.03% +34188 +340248 +556237 20880
With the patch:
150000 15000 5.11% 0.00% 0.00% 94.88% +4426 +460642 +640884 83746
157500 15750 11.54% 0.00% 0.26% 88.20% +14434 +543656 +738355 30349
165375 16384 15.61% 0.00% 0.31% 84.08% +35822 +515304 +833859 25596
173643 16384 19.58% 0.00% 0.37% 80.05% +20762 +568962 +900100 28118
182325 16384 23.46% 0.00% 0.42% 76.13% +41829 +547974 +804170 27890
191441 16384 27.23% 0.00% 0.46% 72.31% +15182 +557920 +798212 28868
201013 16384 30.51% 0.00% 0.49% 69.00% +15980 +560764 +805576 29979
211063 16384 0.06% 0.00% 0.00% 99.94% +12668 +80487 +410555 62182
221616 16384 2.94% 0.00% 0.05% 97.00% +21587 +342769 +517566 23359
232696 16384 6.94% 0.00% 0.10% 92.96% +16581 +336068 +484574 18453
244330 16384 11.45% 0.00% 0.14% 88.41% +23608 +345023 +564130 19177
At 211063 requests per second and higher the performance looks the
same. But at the lower rates there is a clear drop. The higher
mean response time (difference between server TX and RX timestamps)
indicates more of the provided TX timestamps are hardware timestamps
and the chrony server timestamp statistics confirm that.
So, my interpretation is that like with the earlier version of the
patch it's trading performance for timestamp quality at lower rates,
but unlike the earlier version it's not losing performance at the
higher rates. That seems acceptable to me. Admins of busy servers
might need to decide if they should keep HW timestamping enabled. In
theory, chrony could have an option to do that automatically.
Thanks,
--
Miroslav Lichvar
On 3/4/2026 1:35 AM, Miroslav Lichvar wrote: > On Tue, Mar 03, 2026 at 02:38:11PM +0100, Kurt Kanzenbach wrote: >>> It would be great, if you shared the numbers. Did Miroslav already test >>> this? >> >> Great question. I did test with ptp4l and synchronization looks fine < >> below 10ns back to back as expected. I did not test with ntpperf, >> because I was never able to reproduce the NTP regression to the same >> extent as Miroslav reported. Therefore, Miroslav is on Cc in case he >> wants to test it. Let's see. > > I ran the same test with I350 as before and there still seems to be a > regression, but interestingly it's quite different to the previous versions of > the patch. It's like there is a load-sensitive on/off switch. > Could you help me understand this data? I think I've been confused every time I've looked at this. You mention a load sensitive on/off switch... Which i guess kind of makes sense with the numbers here: > Without the patch: > > | responses | response time (ns) > rate clients | lost invalid basic xleave | min mean max stddev > 150000 15000 0.00% 0.00% 0.00% 100.00% +4188 +36475 +193328 16179 > 157500 15750 0.02% 0.00% 0.02% 99.96% +6373 +42969 +683894 22682 > 165375 16384 0.03% 0.00% 0.00% 99.97% +7911 +43960 +692471 24454 > 173643 16384 0.06% 0.00% 0.00% 99.94% +8323 +45627 +707240 28452 > 182325 16384 0.06% 0.00% 0.00% 99.94% +8404 +47292 +722524 26936 > 191441 16384 0.00% 0.00% 0.00% 100.00% +8930 +51738 +223727 14272 > 201013 16384 0.05% 0.00% 0.00% 99.95% +9634 +53696 +776445 23783 > 211063 16384 0.00% 0.00% 0.00% 100.00% +14393 +54558 +329546 20473 > 221616 16384 2.59% 0.00% 0.05% 97.36% +23924 +321205 +518192 21838 > 232696 16384 7.00% 0.00% 0.10% 92.90% +33396 +337709 +575661 21017 > 244330 16384 10.82% 0.00% 0.15% 89.03% +34188 +340248 +556237 20880 > This is without patch, and the "lost" is 0% for low rates, and we have a lower response time mean, max, and standard deviation... But xleave is 100% > With the patch: > 150000 15000 5.11% 0.00% 0.00% 94.88% +4426 +460642 +640884 83746 > 157500 15750 11.54% 0.00% 0.26% 88.20% +14434 +543656 +738355 30349 > 165375 16384 15.61% 0.00% 0.31% 84.08% +35822 +515304 +833859 25596 > 173643 16384 19.58% 0.00% 0.37% 80.05% +20762 +568962 +900100 28118 > 182325 16384 23.46% 0.00% 0.42% 76.13% +41829 +547974 +804170 27890 > 191441 16384 27.23% 0.00% 0.46% 72.31% +15182 +557920 +798212 28868 > 201013 16384 30.51% 0.00% 0.49% 69.00% +15980 +560764 +805576 29979 > 211063 16384 0.06% 0.00% 0.00% 99.94% +12668 +80487 +410555 62182 > 221616 16384 2.94% 0.00% 0.05% 97.00% +21587 +342769 +517566 23359 > 232696 16384 6.94% 0.00% 0.10% 92.96% +16581 +336068 +484574 18453 > 244330 16384 11.45% 0.00% 0.14% 88.41% +23608 +345023 +564130 19177 > With the fix, we have a higher lost percentage, which sounds bad to me..? And we have a higher response time (which also sounds bad??) and we have a much worse standard deviation across all the values from low to high rate. I guess I just don't understand what these numbers mean and why its "better" with the patch. Perhaps its the naming? Or perhaps "xleave" is bad, and this is showing that with the patch we get less of that? But that looks like it gets consistently lower as the rate and number of clients goes up. > At 211063 requests per second and higher the performance looks the > same. But at the lower rates there is a clear drop. The higher > mean response time (difference between server TX and RX timestamps) > indicates more of the provided TX timestamps are hardware timestamps > and the chrony server timestamp statistics confirm that. > So you're saying a higher mean response time is.. better? What is it really measuring then? Oh. I see. it has a higher response time because it takes longer to get a Tx timestamp, but the provided timestamp is higher quality. While previously it was using software timestamps so it could reply faster (since it takes less time to get the software timestamp back out) but the quality is lower? Ok. That makes a bit more sense... > So, my interpretation is that like with the earlier version of the > patch it's trading performance for timestamp quality at lower rates, > but unlike the earlier version it's not losing performance at the > higher rates. That seems acceptable to me. Admins of busy servers > might need to decide if they should keep HW timestamping enabled. In > theory, chrony could have an option to do that automatically. > > Thanks, >
On Mon, Mar 09, 2026 at 01:37:54AM -0700, Jacob Keller wrote: > On 3/4/2026 1:35 AM, Miroslav Lichvar wrote: > > With the patch: > > 150000 15000 5.11% 0.00% 0.00% 94.88% +4426 +460642 +640884 83746 > > 157500 15750 11.54% 0.00% 0.26% 88.20% +14434 +543656 +738355 30349 > > 165375 16384 15.61% 0.00% 0.31% 84.08% +35822 +515304 +833859 25596 > > 173643 16384 19.58% 0.00% 0.37% 80.05% +20762 +568962 +900100 28118 > > 182325 16384 23.46% 0.00% 0.42% 76.13% +41829 +547974 +804170 27890 > > 191441 16384 27.23% 0.00% 0.46% 72.31% +15182 +557920 +798212 28868 > > 201013 16384 30.51% 0.00% 0.49% 69.00% +15980 +560764 +805576 29979 > > 211063 16384 0.06% 0.00% 0.00% 99.94% +12668 +80487 +410555 62182 > > 221616 16384 2.94% 0.00% 0.05% 97.00% +21587 +342769 +517566 23359 > > 232696 16384 6.94% 0.00% 0.10% 92.96% +16581 +336068 +484574 18453 > > 244330 16384 11.45% 0.00% 0.14% 88.41% +23608 +345023 +564130 19177 > > > > > With the fix, we have a higher lost percentage, which sounds bad to > me..? And we have a higher response time (which also sounds bad??) and > we have a much worse standard deviation across all the values from low > to high rate. > > I guess I just don't understand what these numbers mean and why its > "better" with the patch. Perhaps its the naming? Or perhaps "xleave" is > bad, and this is showing that with the patch we get less of that? But > that looks like it gets consistently lower as the rate and number of > clients goes up. xleave is 100% when the server responds to all requests and all responses are in the expected (interleaved) mode. That is the ideal result. The patch doesn't seem to change the absolute maximum rate of responses, unlike with the previous versions, but at some lower rates up to 30% responses are missing, apparently due to the kernel being able to fetch HW timestamps from the NIC at a higher rate, i.e. the driver is trading network traffic for HW timestamps. > > At 211063 requests per second and higher the performance looks the > > same. But at the lower rates there is a clear drop. The higher > > mean response time (difference between server TX and RX timestamps) > > indicates more of the provided TX timestamps are hardware timestamps > > and the chrony server timestamp statistics confirm that. > > > > > So you're saying a higher mean response time is.. better? What is it > really measuring then? Oh. I see. it has a higher response time because > it takes longer to get a Tx timestamp, but the provided timestamp is > higher quality. While previously it was using software timestamps so it > could reply faster (since it takes less time to get the software > timestamp back out) but the quality is lower? > > Ok. That makes a bit more sense... A transmit HW timestamp is taken later than SW timestamp, so there is a larger difference between the TX and RX timestamps. ntpperf can also measure the accuracy of the TX timestamp directly, but it's a more difficult test to set up as the HW clocks need be synchronized to each other. -- Miroslav Lichvar
On Wed Mar 04 2026, Miroslav Lichvar wrote: > On Tue, Mar 03, 2026 at 02:38:11PM +0100, Kurt Kanzenbach wrote: >> > It would be great, if you shared the numbers. Did Miroslav already test >> > this? >> >> Great question. I did test with ptp4l and synchronization looks fine < >> below 10ns back to back as expected. I did not test with ntpperf, >> because I was never able to reproduce the NTP regression to the same >> extent as Miroslav reported. Therefore, Miroslav is on Cc in case he >> wants to test it. Let's see. > > I ran the same test with I350 as before and there still seems to be a > regression, but interestingly it's quite different to the previous versions of > the patch. It's like there is a load-sensitive on/off switch. > > Without the patch: > > | responses | response time (ns) > rate clients | lost invalid basic xleave | min mean max stddev > 150000 15000 0.00% 0.00% 0.00% 100.00% +4188 +36475 +193328 16179 > 157500 15750 0.02% 0.00% 0.02% 99.96% +6373 +42969 +683894 22682 > 165375 16384 0.03% 0.00% 0.00% 99.97% +7911 +43960 +692471 24454 > 173643 16384 0.06% 0.00% 0.00% 99.94% +8323 +45627 +707240 28452 > 182325 16384 0.06% 0.00% 0.00% 99.94% +8404 +47292 +722524 26936 > 191441 16384 0.00% 0.00% 0.00% 100.00% +8930 +51738 +223727 14272 > 201013 16384 0.05% 0.00% 0.00% 99.95% +9634 +53696 +776445 23783 > 211063 16384 0.00% 0.00% 0.00% 100.00% +14393 +54558 +329546 20473 > 221616 16384 2.59% 0.00% 0.05% 97.36% +23924 +321205 +518192 21838 > 232696 16384 7.00% 0.00% 0.10% 92.90% +33396 +337709 +575661 21017 > 244330 16384 10.82% 0.00% 0.15% 89.03% +34188 +340248 +556237 20880 > > With the patch: > 150000 15000 5.11% 0.00% 0.00% 94.88% +4426 +460642 +640884 83746 > 157500 15750 11.54% 0.00% 0.26% 88.20% +14434 +543656 +738355 30349 > 165375 16384 15.61% 0.00% 0.31% 84.08% +35822 +515304 +833859 25596 > 173643 16384 19.58% 0.00% 0.37% 80.05% +20762 +568962 +900100 28118 > 182325 16384 23.46% 0.00% 0.42% 76.13% +41829 +547974 +804170 27890 > 191441 16384 27.23% 0.00% 0.46% 72.31% +15182 +557920 +798212 28868 > 201013 16384 30.51% 0.00% 0.49% 69.00% +15980 +560764 +805576 29979 > 211063 16384 0.06% 0.00% 0.00% 99.94% +12668 +80487 +410555 62182 > 221616 16384 2.94% 0.00% 0.05% 97.00% +21587 +342769 +517566 23359 > 232696 16384 6.94% 0.00% 0.10% 92.96% +16581 +336068 +484574 18453 > 244330 16384 11.45% 0.00% 0.14% 88.41% +23608 +345023 +564130 19177 > > At 211063 requests per second and higher the performance looks the > same. But at the lower rates there is a clear drop. The higher > mean response time (difference between server TX and RX timestamps) > indicates more of the provided TX timestamps are hardware timestamps > and the chrony server timestamp statistics confirm that. > > So, my interpretation is that like with the earlier version of the > patch it's trading performance for timestamp quality at lower rates, > but unlike the earlier version it's not losing performance at the > higher rates. That seems acceptable to me. Admins of busy servers > might need to decide if they should keep HW timestamping enabled. In > theory, chrony could have an option to do that automatically. Great! Thanks a lot for testing and sharing your numbers. I'll send v5 then with updated changelog as suggested by Aleksandr and Paul. Thanks, Kurt
> -----Original Message-----
> From: Intel-wired-lan <intel-wired-lan-bounces@osuosl.org> On Behalf
> Of Kurt Kanzenbach
> Sent: Tuesday, March 3, 2026 12:49 PM
> To: Nguyen, Anthony L <anthony.l.nguyen@intel.com>; Kitszel,
> Przemyslaw <przemyslaw.kitszel@intel.com>
> Cc: Paul Menzel <pmenzel@molgen.mpg.de>; Vadim Fedorenko
> <vadim.fedorenko@linux.dev>; Gomes, Vinicius
> <vinicius.gomes@intel.com>; netdev@vger.kernel.org; Richard Cochran
> <richardcochran@gmail.com>; Kurt Kanzenbach <kurt@linutronix.de>;
> linux-kernel@vger.kernel.org; Andrew Lunn <andrew+netdev@lunn.ch>;
> Eric Dumazet <edumazet@google.com>; intel-wired-lan@lists.osuosl.org;
> Keller, Jacob E <jacob.e.keller@intel.com>; Jakub Kicinski
> <kuba@kernel.org>; Paolo Abeni <pabeni@redhat.com>; David S. Miller
> <davem@davemloft.net>; Sebastian Andrzej Siewior
> <bigeasy@linutronix.de>
> Subject: [Intel-wired-lan] [PATCH iwl-next v4] igb: Retrieve Tx
> timestamp from BH workqueue
>
> Retrieve Tx timestamp from system BH instead of regular system
> workqueue.
>
> The current implementation uses schedule_work() which is executed by
> the system work queue and kworkers to retrieve Tx timestamps. This
> increases latency and can lead to timeouts in case of heavy system
> load. i210 is often used in industrial systems, where timestamp
> timeouts can be fatal.
>
> Therefore, switch to the system BH workqueues which are executed
> directly at the end of the IRQ handler.
>
I'd recommend "executed directly at the end of the IRQ handler" -> "executed in softirq context shortly after the IRQ handler returns"
But anyway.
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
> Tested on Intel i210 and i350 with ptp4l.
>
> Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
> ---
> Changes in v4:
> - Use BH workqueue (tasklet) instead of doing timestamping in IRQ path
> (Jakub Kicinski)
> - Link to v3: https://patch.msgid.link/20260205-igb_irq_ts-v3-1-
> 2efc7bc4b885@linutronix.de
>
> Changes in v3:
> - Switch back to IRQ, but for i210 only
> - Keep kworker for all other NICs like i350 (Miroslav)
> - Link to v2: https://lore.kernel.org/r/20250822-igb_irq_ts-v2-1-
> 1ac37078a7a4@linutronix.de
>
> Changes in v2:
> - Switch from IRQ to PTP aux worker due to NTP performance regression
> (Miroslav)
> - Link to v1: https://lore.kernel.org/r/20250815-igb_irq_ts-v1-1-
> 8c6fc0353422@linutronix.de
> ---
> drivers/net/ethernet/intel/igb/igb_main.c | 4 ++--
> drivers/net/ethernet/intel/igb/igb_ptp.c | 2 +-
> 2 files changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/igb/igb_main.c
> b/drivers/net/ethernet/intel/igb/igb_main.c
> index ee99fd8fd513..9fd29fedb9f5 100644
> --- a/drivers/net/ethernet/intel/igb/igb_main.c
> +++ b/drivers/net/ethernet/intel/igb/igb_main.c
> @@ -6572,7 +6572,7 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff
> *skb,
> adapter->ptp_tx_skb = skb_get(skb);
> adapter->ptp_tx_start = jiffies;
> if (adapter->hw.mac.type == e1000_82576)
> - schedule_work(&adapter->ptp_tx_work);
> + queue_work(system_bh_wq, &adapter-
> >ptp_tx_work);
> } else {
> adapter->tx_hwtstamp_skipped++;
> }
> @@ -7076,7 +7076,7 @@ static void igb_tsync_interrupt(struct
> igb_adapter *adapter)
>
> if (tsicr & E1000_TSICR_TXTS) {
> /* retrieve hardware timestamp */
> - schedule_work(&adapter->ptp_tx_work);
> + queue_work(system_bh_wq, &adapter->ptp_tx_work);
> }
>
> if (tsicr & TSINTR_TT0)
> diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c
> b/drivers/net/ethernet/intel/igb/igb_ptp.c
> index bd85d02ecadd..7b44f9090631 100644
> --- a/drivers/net/ethernet/intel/igb/igb_ptp.c
> +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
> @@ -832,7 +832,7 @@ static void igb_ptp_tx_work(struct work_struct
> *work)
> igb_ptp_tx_hwtstamp(adapter);
> else
> /* reschedule to check later */
> - schedule_work(&adapter->ptp_tx_work);
> + queue_work(system_bh_wq, &adapter->ptp_tx_work);
> }
>
> static void igb_ptp_overflow_check(struct work_struct *work)
>
> ---
> base-commit: a0e8c9a5060fbdb72fca767164467a3cf2b8fc30
> change-id: 20250813-igb_irq_ts-1aa77cc7b4cb
>
> Best regards,
> --
> Kurt Kanzenbach <kurt@linutronix.de>
© 2016 - 2026 Red Hat, Inc.