Allow per-NAPI defer_hard_irqs setting.
The existing sysfs parameter is respected; writes to sysfs will write to
all NAPI structs for the device and the net_device defer_hard_irq field.
Reads from sysfs will read from the net_device field.
sysfs code was updated to guard against what appears to be a potential
overflow as the field is an int, but the value passed in is an unsigned
long.
The ability to set defer_hard_irqs on specific NAPI instances will be
added in a later commit, via netdev-genl.
Signed-off-by: Joe Damato <jdamato@fastly.com>
Reviewed-by: Martin Karsten <mkarsten@uwaterloo.ca>
Tested-by: Martin Karsten <mkarsten@uwaterloo.ca>
---
include/linux/netdevice.h | 23 +++++++++++++++++++++++
net/core/dev.c | 29 ++++++++++++++++++++++++++---
net/core/net-sysfs.c | 5 ++++-
3 files changed, 53 insertions(+), 4 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index fce70990b209..7d53380da4c0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -371,6 +371,7 @@ struct napi_struct {
struct list_head rx_list; /* Pending GRO_NORMAL skbs */
int rx_count; /* length of rx_list */
unsigned int napi_id;
+ int defer_hard_irqs;
struct hrtimer timer;
struct task_struct *thread;
/* control-path-only fields follow */
@@ -534,6 +535,28 @@ static inline void napi_schedule_irqoff(struct napi_struct *n)
__napi_schedule_irqoff(n);
}
+/**
+ * napi_get_defer_hard_irqs - get the NAPI's defer_hard_irqs
+ * @n: napi struct to get the defer_hard_irqs field from
+ *
+ * Returns the per-NAPI value of the defar_hard_irqs field.
+ */
+int napi_get_defer_hard_irqs(const struct napi_struct *n);
+
+/**
+ * napi_set_defer_hard_irqs - set the defer_hard_irqs for a napi
+ * @n: napi_struct to set the defer_hard_irqs field
+ * @defer: the value the field should be set to
+ */
+void napi_set_defer_hard_irqs(struct napi_struct *n, int defer);
+
+/**
+ * netdev_set_defer_hard_irqs - set defer_hard_irqs for all NAPIs of a netdev
+ * @netdev: the net_device for which all NAPIs will have their defer_hard_irqs set
+ * @defer: the defer_hard_irqs value to set
+ */
+void netdev_set_defer_hard_irqs(struct net_device *netdev, int defer);
+
/**
* napi_complete_done - NAPI processing complete
* @n: NAPI context
diff --git a/net/core/dev.c b/net/core/dev.c
index 63987b8b7c85..f7baff0da057 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6212,6 +6212,28 @@ void __napi_schedule_irqoff(struct napi_struct *n)
}
EXPORT_SYMBOL(__napi_schedule_irqoff);
+int napi_get_defer_hard_irqs(const struct napi_struct *n)
+{
+ return READ_ONCE(n->defer_hard_irqs);
+}
+EXPORT_SYMBOL_GPL(napi_get_defer_hard_irqs);
+
+void napi_set_defer_hard_irqs(struct napi_struct *n, int defer)
+{
+ WRITE_ONCE(n->defer_hard_irqs, defer);
+}
+EXPORT_SYMBOL_GPL(napi_set_defer_hard_irqs);
+
+void netdev_set_defer_hard_irqs(struct net_device *netdev, int defer)
+{
+ struct napi_struct *napi;
+
+ WRITE_ONCE(netdev->napi_defer_hard_irqs, defer);
+ list_for_each_entry(napi, &netdev->napi_list, dev_list)
+ napi_set_defer_hard_irqs(napi, defer);
+}
+EXPORT_SYMBOL_GPL(netdev_set_defer_hard_irqs);
+
bool napi_complete_done(struct napi_struct *n, int work_done)
{
unsigned long flags, val, new, timeout = 0;
@@ -6230,7 +6252,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
if (work_done) {
if (n->gro_bitmask)
timeout = READ_ONCE(n->dev->gro_flush_timeout);
- n->defer_hard_irqs_count = READ_ONCE(n->dev->napi_defer_hard_irqs);
+ n->defer_hard_irqs_count = napi_get_defer_hard_irqs(n);
}
if (n->defer_hard_irqs_count > 0) {
n->defer_hard_irqs_count--;
@@ -6368,7 +6390,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
if (flags & NAPI_F_PREFER_BUSY_POLL) {
- napi->defer_hard_irqs_count = READ_ONCE(napi->dev->napi_defer_hard_irqs);
+ napi->defer_hard_irqs_count = napi_get_defer_hard_irqs(napi);
timeout = READ_ONCE(napi->dev->gro_flush_timeout);
if (napi->defer_hard_irqs_count && timeout) {
hrtimer_start(&napi->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED);
@@ -6650,6 +6672,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
INIT_HLIST_NODE(&napi->napi_hash_node);
hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
napi->timer.function = napi_watchdog;
+ napi_set_defer_hard_irqs(napi, READ_ONCE(dev->napi_defer_hard_irqs));
init_gro_hash(napi);
napi->skb = NULL;
INIT_LIST_HEAD(&napi->rx_list);
@@ -11032,7 +11055,7 @@ void netdev_sw_irq_coalesce_default_on(struct net_device *dev)
if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
dev->gro_flush_timeout = 20000;
- dev->napi_defer_hard_irqs = 1;
+ netdev_set_defer_hard_irqs(dev, 1);
}
}
EXPORT_SYMBOL_GPL(netdev_sw_irq_coalesce_default_on);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 0e2084ce7b75..8272f0144d81 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -425,7 +425,10 @@ NETDEVICE_SHOW_RW(gro_flush_timeout, fmt_ulong);
static int change_napi_defer_hard_irqs(struct net_device *dev, unsigned long val)
{
- WRITE_ONCE(dev->napi_defer_hard_irqs, val);
+ if (val > S32_MAX)
+ return -EINVAL;
+
+ netdev_set_defer_hard_irqs(dev, (int)val);
return 0;
}
--
2.25.1
Hi Joe,
kernel test robot noticed the following build errors:
[auto build test ERROR on net-next/main]
url: https://github.com/intel-lab-lkp/linux/commits/Joe-Damato/net-napi-Make-napi_defer_hard_irqs-per-NAPI/20240829-211617
base: net-next/main
patch link: https://lore.kernel.org/r/20240829131214.169977-2-jdamato%40fastly.com
patch subject: [PATCH net-next 1/5] net: napi: Make napi_defer_hard_irqs per-NAPI
config: x86_64-allyesconfig (https://download.01.org/0day-ci/archive/20240831/202408310038.VztWz8YR-lkp@intel.com/config)
compiler: clang version 18.1.5 (https://github.com/llvm/llvm-project 617a15a9eac96088ae5e9134248d8236e34b91b1)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240831/202408310038.VztWz8YR-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202408310038.VztWz8YR-lkp@intel.com/
All errors (new ones prefixed by >>):
In file included from drivers/net/ethernet/intel/idpf/idpf_dev.c:4:
In file included from drivers/net/ethernet/intel/idpf/idpf.h:22:
>> drivers/net/ethernet/intel/idpf/idpf_txrx.h:475:1: error: static assertion failed due to requirement '__builtin_offsetof(struct idpf_q_vector, __cacheline_group_end__read_write) - (__builtin_offsetof(struct idpf_q_vector, __cacheline_group_begin__read_write) + sizeof ((((struct idpf_q_vector *)0)->__cacheline_group_begin__read_write))) == (424 + 2 * sizeof(struct dim))': offsetof(struct idpf_q_vector, __cacheline_group_end__read_write) - offsetofend(struct idpf_q_vector, __cacheline_group_begin__read_write) == (424 + 2 * sizeof(struct dim))
475 | libeth_cacheline_set_assert(struct idpf_q_vector, 104,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
476 | 424 + 2 * sizeof(struct dim),
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
477 | 8 + sizeof(cpumask_var_t));
| ~~~~~~~~~~~~~~~~~~~~~~~~~~
include/net/libeth/cache.h:62:2: note: expanded from macro 'libeth_cacheline_set_assert'
62 | libeth_cacheline_group_assert(type, read_write, rw); \
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/net/libeth/cache.h:17:16: note: expanded from macro 'libeth_cacheline_group_assert'
17 | static_assert(offsetof(type, __cacheline_group_end__##grp) - \
| ~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
18 | offsetofend(type, __cacheline_group_begin__##grp) == \
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
19 | (sz))
| ~~~~~
include/linux/stddef.h:16:32: note: expanded from macro 'offsetof'
16 | #define offsetof(TYPE, MEMBER) __builtin_offsetof(TYPE, MEMBER)
| ^
include/linux/build_bug.h:77:50: note: expanded from macro 'static_assert'
77 | #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr)
| ~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:78:56: note: expanded from macro '__static_assert'
78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
| ^~~~
drivers/net/ethernet/intel/idpf/idpf_txrx.h:475:1: note: expression evaluates to '768 == 760'
475 | libeth_cacheline_set_assert(struct idpf_q_vector, 104,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
476 | 424 + 2 * sizeof(struct dim),
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
477 | 8 + sizeof(cpumask_var_t));
| ~~~~~~~~~~~~~~~~~~~~~~~~~~
include/net/libeth/cache.h:62:2: note: expanded from macro 'libeth_cacheline_set_assert'
62 | libeth_cacheline_group_assert(type, read_write, rw); \
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/net/libeth/cache.h:18:59: note: expanded from macro 'libeth_cacheline_group_assert'
17 | static_assert(offsetof(type, __cacheline_group_end__##grp) - \
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
18 | offsetofend(type, __cacheline_group_begin__##grp) == \
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~
19 | (sz))
| ~~~~~
include/linux/build_bug.h:77:50: note: expanded from macro 'static_assert'
77 | #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr)
| ~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:78:56: note: expanded from macro '__static_assert'
78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
| ^~~~
1 error generated.
--
In file included from drivers/net/ethernet/intel/idpf/idpf_main.c:4:
In file included from drivers/net/ethernet/intel/idpf/idpf.h:22:
>> drivers/net/ethernet/intel/idpf/idpf_txrx.h:475:1: error: static assertion failed due to requirement '__builtin_offsetof(struct idpf_q_vector, __cacheline_group_end__read_write) - (__builtin_offsetof(struct idpf_q_vector, __cacheline_group_begin__read_write) + sizeof ((((struct idpf_q_vector *)0)->__cacheline_group_begin__read_write))) == (424 + 2 * sizeof(struct dim))': offsetof(struct idpf_q_vector, __cacheline_group_end__read_write) - offsetofend(struct idpf_q_vector, __cacheline_group_begin__read_write) == (424 + 2 * sizeof(struct dim))
475 | libeth_cacheline_set_assert(struct idpf_q_vector, 104,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
476 | 424 + 2 * sizeof(struct dim),
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
477 | 8 + sizeof(cpumask_var_t));
| ~~~~~~~~~~~~~~~~~~~~~~~~~~
include/net/libeth/cache.h:62:2: note: expanded from macro 'libeth_cacheline_set_assert'
62 | libeth_cacheline_group_assert(type, read_write, rw); \
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/net/libeth/cache.h:17:16: note: expanded from macro 'libeth_cacheline_group_assert'
17 | static_assert(offsetof(type, __cacheline_group_end__##grp) - \
| ~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
18 | offsetofend(type, __cacheline_group_begin__##grp) == \
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
19 | (sz))
| ~~~~~
include/linux/stddef.h:16:32: note: expanded from macro 'offsetof'
16 | #define offsetof(TYPE, MEMBER) __builtin_offsetof(TYPE, MEMBER)
| ^
include/linux/build_bug.h:77:50: note: expanded from macro 'static_assert'
77 | #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr)
| ~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:78:56: note: expanded from macro '__static_assert'
78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
| ^~~~
drivers/net/ethernet/intel/idpf/idpf_txrx.h:475:1: note: expression evaluates to '768 == 760'
475 | libeth_cacheline_set_assert(struct idpf_q_vector, 104,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
476 | 424 + 2 * sizeof(struct dim),
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
477 | 8 + sizeof(cpumask_var_t));
| ~~~~~~~~~~~~~~~~~~~~~~~~~~
include/net/libeth/cache.h:62:2: note: expanded from macro 'libeth_cacheline_set_assert'
62 | libeth_cacheline_group_assert(type, read_write, rw); \
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/net/libeth/cache.h:18:59: note: expanded from macro 'libeth_cacheline_group_assert'
17 | static_assert(offsetof(type, __cacheline_group_end__##grp) - \
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
18 | offsetofend(type, __cacheline_group_begin__##grp) == \
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~
19 | (sz))
| ~~~~~
include/linux/build_bug.h:77:50: note: expanded from macro 'static_assert'
77 | #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr)
| ~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:78:56: note: expanded from macro '__static_assert'
78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
| ^~~~
drivers/net/ethernet/intel/idpf/idpf_main.c:167:39: warning: shift count >= width of type [-Wshift-count-overflow]
167 | err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
| ^~~~~~~~~~~~~~~~
include/linux/dma-mapping.h:77:54: note: expanded from macro 'DMA_BIT_MASK'
77 | #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1))
| ^ ~~~
1 warning and 1 error generated.
vim +475 drivers/net/ethernet/intel/idpf/idpf_txrx.h
4930fbf419a72d Pavan Kumar Linga 2023-08-07 412
4930fbf419a72d Pavan Kumar Linga 2023-08-07 413 /**
4930fbf419a72d Pavan Kumar Linga 2023-08-07 414 * struct idpf_q_vector
1c325aac10a82f Alan Brady 2023-08-07 415 * @vport: Vport back pointer
5a816aae2d463d Alexander Lobakin 2024-06-20 416 * @num_rxq: Number of RX queues
d4d5587182664b Pavan Kumar Linga 2023-08-07 417 * @num_txq: Number of TX queues
5a816aae2d463d Alexander Lobakin 2024-06-20 418 * @num_bufq: Number of buffer queues
e4891e4687c8dd Alexander Lobakin 2024-06-20 419 * @num_complq: number of completion queues
5a816aae2d463d Alexander Lobakin 2024-06-20 420 * @rx: Array of RX queues to service
1c325aac10a82f Alan Brady 2023-08-07 421 * @tx: Array of TX queues to service
5a816aae2d463d Alexander Lobakin 2024-06-20 422 * @bufq: Array of buffer queues to service
e4891e4687c8dd Alexander Lobakin 2024-06-20 423 * @complq: array of completion queues
5a816aae2d463d Alexander Lobakin 2024-06-20 424 * @intr_reg: See struct idpf_intr_reg
5a816aae2d463d Alexander Lobakin 2024-06-20 425 * @napi: napi handler
5a816aae2d463d Alexander Lobakin 2024-06-20 426 * @total_events: Number of interrupts processed
c2d548cad1508d Joshua Hay 2023-08-07 427 * @tx_dim: Data for TX net_dim algorithm
1c325aac10a82f Alan Brady 2023-08-07 428 * @tx_itr_value: TX interrupt throttling rate
1c325aac10a82f Alan Brady 2023-08-07 429 * @tx_intr_mode: Dynamic ITR or not
1c325aac10a82f Alan Brady 2023-08-07 430 * @tx_itr_idx: TX ITR index
3a8845af66edb3 Alan Brady 2023-08-07 431 * @rx_dim: Data for RX net_dim algorithm
95af467d9a4e3b Alan Brady 2023-08-07 432 * @rx_itr_value: RX interrupt throttling rate
95af467d9a4e3b Alan Brady 2023-08-07 433 * @rx_intr_mode: Dynamic ITR or not
95af467d9a4e3b Alan Brady 2023-08-07 434 * @rx_itr_idx: RX ITR index
5a816aae2d463d Alexander Lobakin 2024-06-20 435 * @v_idx: Vector index
bf9bf7042a38eb Alexander Lobakin 2024-06-20 436 * @affinity_mask: CPU affinity mask
4930fbf419a72d Pavan Kumar Linga 2023-08-07 437 */
4930fbf419a72d Pavan Kumar Linga 2023-08-07 438 struct idpf_q_vector {
5a816aae2d463d Alexander Lobakin 2024-06-20 439 __cacheline_group_begin_aligned(read_mostly);
1c325aac10a82f Alan Brady 2023-08-07 440 struct idpf_vport *vport;
1c325aac10a82f Alan Brady 2023-08-07 441
5a816aae2d463d Alexander Lobakin 2024-06-20 442 u16 num_rxq;
d4d5587182664b Pavan Kumar Linga 2023-08-07 443 u16 num_txq;
5a816aae2d463d Alexander Lobakin 2024-06-20 444 u16 num_bufq;
e4891e4687c8dd Alexander Lobakin 2024-06-20 445 u16 num_complq;
5a816aae2d463d Alexander Lobakin 2024-06-20 446 struct idpf_rx_queue **rx;
e4891e4687c8dd Alexander Lobakin 2024-06-20 447 struct idpf_tx_queue **tx;
5a816aae2d463d Alexander Lobakin 2024-06-20 448 struct idpf_buf_queue **bufq;
e4891e4687c8dd Alexander Lobakin 2024-06-20 449 struct idpf_compl_queue **complq;
e4891e4687c8dd Alexander Lobakin 2024-06-20 450
5a816aae2d463d Alexander Lobakin 2024-06-20 451 struct idpf_intr_reg intr_reg;
5a816aae2d463d Alexander Lobakin 2024-06-20 452 __cacheline_group_end_aligned(read_mostly);
5a816aae2d463d Alexander Lobakin 2024-06-20 453
5a816aae2d463d Alexander Lobakin 2024-06-20 454 __cacheline_group_begin_aligned(read_write);
5a816aae2d463d Alexander Lobakin 2024-06-20 455 struct napi_struct napi;
5a816aae2d463d Alexander Lobakin 2024-06-20 456 u16 total_events;
5a816aae2d463d Alexander Lobakin 2024-06-20 457
c2d548cad1508d Joshua Hay 2023-08-07 458 struct dim tx_dim;
1c325aac10a82f Alan Brady 2023-08-07 459 u16 tx_itr_value;
1c325aac10a82f Alan Brady 2023-08-07 460 bool tx_intr_mode;
1c325aac10a82f Alan Brady 2023-08-07 461 u32 tx_itr_idx;
1c325aac10a82f Alan Brady 2023-08-07 462
3a8845af66edb3 Alan Brady 2023-08-07 463 struct dim rx_dim;
95af467d9a4e3b Alan Brady 2023-08-07 464 u16 rx_itr_value;
95af467d9a4e3b Alan Brady 2023-08-07 465 bool rx_intr_mode;
95af467d9a4e3b Alan Brady 2023-08-07 466 u32 rx_itr_idx;
5a816aae2d463d Alexander Lobakin 2024-06-20 467 __cacheline_group_end_aligned(read_write);
95af467d9a4e3b Alan Brady 2023-08-07 468
5a816aae2d463d Alexander Lobakin 2024-06-20 469 __cacheline_group_begin_aligned(cold);
5a816aae2d463d Alexander Lobakin 2024-06-20 470 u16 v_idx;
bf9bf7042a38eb Alexander Lobakin 2024-06-20 471
bf9bf7042a38eb Alexander Lobakin 2024-06-20 472 cpumask_var_t affinity_mask;
5a816aae2d463d Alexander Lobakin 2024-06-20 473 __cacheline_group_end_aligned(cold);
4930fbf419a72d Pavan Kumar Linga 2023-08-07 474 };
5a816aae2d463d Alexander Lobakin 2024-06-20 @475 libeth_cacheline_set_assert(struct idpf_q_vector, 104,
5a816aae2d463d Alexander Lobakin 2024-06-20 476 424 + 2 * sizeof(struct dim),
5a816aae2d463d Alexander Lobakin 2024-06-20 477 8 + sizeof(cpumask_var_t));
0fe45467a1041e Pavan Kumar Linga 2023-08-07 478
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
On Thu, Aug 29, 2024 at 01:11:57PM +0000, Joe Damato wrote: > Allow per-NAPI defer_hard_irqs setting. > > The existing sysfs parameter is respected; writes to sysfs will write to > all NAPI structs for the device and the net_device defer_hard_irq field. > Reads from sysfs will read from the net_device field. > > sysfs code was updated to guard against what appears to be a potential > overflow as the field is an int, but the value passed in is an unsigned > long. > > The ability to set defer_hard_irqs on specific NAPI instances will be > added in a later commit, via netdev-genl. > > Signed-off-by: Joe Damato <jdamato@fastly.com> > Reviewed-by: Martin Karsten <mkarsten@uwaterloo.ca> > Tested-by: Martin Karsten <mkarsten@uwaterloo.ca> > --- > include/linux/netdevice.h | 23 +++++++++++++++++++++++ > net/core/dev.c | 29 ++++++++++++++++++++++++++--- > net/core/net-sysfs.c | 5 ++++- > 3 files changed, 53 insertions(+), 4 deletions(-) ... > @@ -534,6 +535,28 @@ static inline void napi_schedule_irqoff(struct napi_struct *n) > __napi_schedule_irqoff(n); > } > > +/** > + * napi_get_defer_hard_irqs - get the NAPI's defer_hard_irqs > + * @n: napi struct to get the defer_hard_irqs field from > + * > + * Returns the per-NAPI value of the defar_hard_irqs field. > + */ > +int napi_get_defer_hard_irqs(const struct napi_struct *n); Hi Joe, As it looks like there will be a v2 anyway, a minor nit from my side. Thanks for documenting the return value, but I believe that ./scripts/kernel-doc -none -Wall expects "Return: " or "Returns: " Likewise in patch 3/5. ...
On Fri, Aug 30, 2024 at 09:36:41AM +0100, Simon Horman wrote: > On Thu, Aug 29, 2024 at 01:11:57PM +0000, Joe Damato wrote: > > Allow per-NAPI defer_hard_irqs setting. > > > > The existing sysfs parameter is respected; writes to sysfs will write to > > all NAPI structs for the device and the net_device defer_hard_irq field. > > Reads from sysfs will read from the net_device field. > > > > sysfs code was updated to guard against what appears to be a potential > > overflow as the field is an int, but the value passed in is an unsigned > > long. > > > > The ability to set defer_hard_irqs on specific NAPI instances will be > > added in a later commit, via netdev-genl. > > > > Signed-off-by: Joe Damato <jdamato@fastly.com> > > Reviewed-by: Martin Karsten <mkarsten@uwaterloo.ca> > > Tested-by: Martin Karsten <mkarsten@uwaterloo.ca> > > --- > > include/linux/netdevice.h | 23 +++++++++++++++++++++++ > > net/core/dev.c | 29 ++++++++++++++++++++++++++--- > > net/core/net-sysfs.c | 5 ++++- > > 3 files changed, 53 insertions(+), 4 deletions(-) > > ... > > > @@ -534,6 +535,28 @@ static inline void napi_schedule_irqoff(struct napi_struct *n) > > __napi_schedule_irqoff(n); > > } > > > > +/** > > + * napi_get_defer_hard_irqs - get the NAPI's defer_hard_irqs > > + * @n: napi struct to get the defer_hard_irqs field from > > + * > > + * Returns the per-NAPI value of the defar_hard_irqs field. > > + */ > > +int napi_get_defer_hard_irqs(const struct napi_struct *n); > > Hi Joe, > > As it looks like there will be a v2 anyway, a minor nit from my side. > > Thanks for documenting the return value, but I believe that > ./scripts/kernel-doc -none -Wall expects "Return: " or "Returns: " > > Likewise in patch 3/5. Thanks Simon, will make sure to take care of this in the v2.
On Thu, 29 Aug 2024 13:11:57 +0000 Joe Damato wrote: > +/** > + * napi_get_defer_hard_irqs - get the NAPI's defer_hard_irqs > + * @n: napi struct to get the defer_hard_irqs field from > + * > + * Returns the per-NAPI value of the defar_hard_irqs field. > + */ > +int napi_get_defer_hard_irqs(const struct napi_struct *n); > + > +/** > + * napi_set_defer_hard_irqs - set the defer_hard_irqs for a napi > + * @n: napi_struct to set the defer_hard_irqs field > + * @defer: the value the field should be set to > + */ > +void napi_set_defer_hard_irqs(struct napi_struct *n, int defer); > + > +/** > + * netdev_set_defer_hard_irqs - set defer_hard_irqs for all NAPIs of a netdev > + * @netdev: the net_device for which all NAPIs will have their defer_hard_irqs set > + * @defer: the defer_hard_irqs value to set > + */ > +void netdev_set_defer_hard_irqs(struct net_device *netdev, int defer); Do you expect drivers or modules to call these? I'm not sure we need the wrappers just to cover up the READ/WRITE_ONCE() but if you do want to keep them they can be static inlines in net/core/dev.h nit: IIUC the kdoc should go on the definition, not the declaration.
On Thu, Aug 29, 2024 at 03:05:02PM -0700, Jakub Kicinski wrote: > On Thu, 29 Aug 2024 13:11:57 +0000 Joe Damato wrote: > > +/** > > + * napi_get_defer_hard_irqs - get the NAPI's defer_hard_irqs > > + * @n: napi struct to get the defer_hard_irqs field from > > + * > > + * Returns the per-NAPI value of the defar_hard_irqs field. > > + */ > > +int napi_get_defer_hard_irqs(const struct napi_struct *n); > > + > > +/** > > + * napi_set_defer_hard_irqs - set the defer_hard_irqs for a napi > > + * @n: napi_struct to set the defer_hard_irqs field > > + * @defer: the value the field should be set to > > + */ > > +void napi_set_defer_hard_irqs(struct napi_struct *n, int defer); > > + > > +/** > > + * netdev_set_defer_hard_irqs - set defer_hard_irqs for all NAPIs of a netdev > > + * @netdev: the net_device for which all NAPIs will have their defer_hard_irqs set > > + * @defer: the defer_hard_irqs value to set > > + */ > > +void netdev_set_defer_hard_irqs(struct net_device *netdev, int defer); > > Do you expect drivers or modules to call these? > I'm not sure we need the wrappers just to cover up the READ/WRITE_ONCE() > but if you do want to keep them they can be static inlines in > net/core/dev.h It looked like there were a few call sites for these in net/core/dev.c, the sysfs code, and the netlink code. I figured having it all wrapped up somewhere might be better than repeating the READ/WRITE_ONCE() stuff. I have no preference on whether there are wrappers or not, though. If you'd like me to drop the wrappers for the v2, let me know. Otherwise: I'll make them static inlines as you suggested. Let me know if you have a preference here because I am neutral. > nit: IIUC the kdoc should go on the definition, not the declaration. My mistake; thanks. I suppose if I move them as static inlines, I'll just move the kdoc as well and the problem solves itself :)
On Fri, 30 Aug 2024 10:14:41 +0100 Joe Damato wrote: > Otherwise: I'll make them static inlines as you suggested. > > Let me know if you have a preference here because I am neutral. No strong preference either, static inlines seem like a good middle ground :)
On Fri, Aug 30, 2024 at 01:21:12PM -0700, Jakub Kicinski wrote: > On Fri, 30 Aug 2024 10:14:41 +0100 Joe Damato wrote: > > Otherwise: I'll make them static inlines as you suggested. > > > > Let me know if you have a preference here because I am neutral. > > No strong preference either, static inlines seem like a good > middle ground :) Ack. Already queued up in my v2 branch.
On Thu, Aug 29, 2024 at 3:13 PM Joe Damato <jdamato@fastly.com> wrote:
>
> Allow per-NAPI defer_hard_irqs setting.
>
> The existing sysfs parameter is respected; writes to sysfs will write to
> all NAPI structs for the device and the net_device defer_hard_irq field.
> Reads from sysfs will read from the net_device field.
>
> sysfs code was updated to guard against what appears to be a potential
> overflow as the field is an int, but the value passed in is an unsigned
> long.
>
> The ability to set defer_hard_irqs on specific NAPI instances will be
> added in a later commit, via netdev-genl.
>
> Signed-off-by: Joe Damato <jdamato@fastly.com>
> Reviewed-by: Martin Karsten <mkarsten@uwaterloo.ca>
> Tested-by: Martin Karsten <mkarsten@uwaterloo.ca>
> ---
> include/linux/netdevice.h | 23 +++++++++++++++++++++++
> net/core/dev.c | 29 ++++++++++++++++++++++++++---
> net/core/net-sysfs.c | 5 ++++-
> 3 files changed, 53 insertions(+), 4 deletions(-)
>
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index fce70990b209..7d53380da4c0 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -371,6 +371,7 @@ struct napi_struct {
> struct list_head rx_list; /* Pending GRO_NORMAL skbs */
> int rx_count; /* length of rx_list */
> unsigned int napi_id;
> + int defer_hard_irqs;
> struct hrtimer timer;
> struct task_struct *thread;
> /* control-path-only fields follow */
> @@ -534,6 +535,28 @@ static inline void napi_schedule_irqoff(struct napi_struct *n)
> __napi_schedule_irqoff(n);
> }
>
Since dev->napi_defer_hard_irqs is no longer used in fast path,
please move it outside of the net_device_read_rx group.
Also update Documentation/networking/net_cachelines/net_device.rst
> +/**
> + * napi_get_defer_hard_irqs - get the NAPI's defer_hard_irqs
> + * @n: napi struct to get the defer_hard_irqs field from
> + *
> + * Returns the per-NAPI value of the defar_hard_irqs field.
> + */
> +int napi_get_defer_hard_irqs(const struct napi_struct *n);
> +
> +/**
> + * napi_set_defer_hard_irqs - set the defer_hard_irqs for a napi
> + * @n: napi_struct to set the defer_hard_irqs field
> + * @defer: the value the field should be set to
> + */
> +void napi_set_defer_hard_irqs(struct napi_struct *n, int defer);
> +
> +/**
> + * netdev_set_defer_hard_irqs - set defer_hard_irqs for all NAPIs of a netdev
> + * @netdev: the net_device for which all NAPIs will have their defer_hard_irqs set
> + * @defer: the defer_hard_irqs value to set
> + */
> +void netdev_set_defer_hard_irqs(struct net_device *netdev, int defer);
> +
> /**
> * napi_complete_done - NAPI processing complete
> * @n: NAPI context
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 63987b8b7c85..f7baff0da057 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -6212,6 +6212,28 @@ void __napi_schedule_irqoff(struct napi_struct *n)
> }
> EXPORT_SYMBOL(__napi_schedule_irqoff);
>
> +int napi_get_defer_hard_irqs(const struct napi_struct *n)
> +{
> + return READ_ONCE(n->defer_hard_irqs);
> +}
> +EXPORT_SYMBOL_GPL(napi_get_defer_hard_irqs);
> +
> +void napi_set_defer_hard_irqs(struct napi_struct *n, int defer)
> +{
> + WRITE_ONCE(n->defer_hard_irqs, defer);
> +}
> +EXPORT_SYMBOL_GPL(napi_set_defer_hard_irqs);
> +
> +void netdev_set_defer_hard_irqs(struct net_device *netdev, int defer)
> +{
> + struct napi_struct *napi;
> +
> + WRITE_ONCE(netdev->napi_defer_hard_irqs, defer);
> + list_for_each_entry(napi, &netdev->napi_list, dev_list)
> + napi_set_defer_hard_irqs(napi, defer);
> +}
> +EXPORT_SYMBOL_GPL(netdev_set_defer_hard_irqs);
> +
> bool napi_complete_done(struct napi_struct *n, int work_done)
> {
> unsigned long flags, val, new, timeout = 0;
> @@ -6230,7 +6252,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
> if (work_done) {
> if (n->gro_bitmask)
> timeout = READ_ONCE(n->dev->gro_flush_timeout);
> - n->defer_hard_irqs_count = READ_ONCE(n->dev->napi_defer_hard_irqs);
> + n->defer_hard_irqs_count = napi_get_defer_hard_irqs(n);
> }
> if (n->defer_hard_irqs_count > 0) {
> n->defer_hard_irqs_count--;
> @@ -6368,7 +6390,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
> bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
>
> if (flags & NAPI_F_PREFER_BUSY_POLL) {
> - napi->defer_hard_irqs_count = READ_ONCE(napi->dev->napi_defer_hard_irqs);
> + napi->defer_hard_irqs_count = napi_get_defer_hard_irqs(napi);
> timeout = READ_ONCE(napi->dev->gro_flush_timeout);
> if (napi->defer_hard_irqs_count && timeout) {
> hrtimer_start(&napi->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED);
> @@ -6650,6 +6672,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
> INIT_HLIST_NODE(&napi->napi_hash_node);
> hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
> napi->timer.function = napi_watchdog;
> + napi_set_defer_hard_irqs(napi, READ_ONCE(dev->napi_defer_hard_irqs));
> init_gro_hash(napi);
> napi->skb = NULL;
> INIT_LIST_HEAD(&napi->rx_list);
> @@ -11032,7 +11055,7 @@ void netdev_sw_irq_coalesce_default_on(struct net_device *dev)
>
> if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
> dev->gro_flush_timeout = 20000;
> - dev->napi_defer_hard_irqs = 1;
> + netdev_set_defer_hard_irqs(dev, 1);
> }
> }
> EXPORT_SYMBOL_GPL(netdev_sw_irq_coalesce_default_on);
> diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
> index 0e2084ce7b75..8272f0144d81 100644
> --- a/net/core/net-sysfs.c
> +++ b/net/core/net-sysfs.c
> @@ -425,7 +425,10 @@ NETDEVICE_SHOW_RW(gro_flush_timeout, fmt_ulong);
>
> static int change_napi_defer_hard_irqs(struct net_device *dev, unsigned long val)
> {
> - WRITE_ONCE(dev->napi_defer_hard_irqs, val);
> + if (val > S32_MAX)
> + return -EINVAL;
> +
> + netdev_set_defer_hard_irqs(dev, (int)val);
> return 0;
> }
>
> --
> 2.25.1
>
© 2016 - 2026 Red Hat, Inc.