net: sched: act_gate: fix update races and infoleak

[PATCH 1/2] net/sched: act_gate: fix schedule updates with RCU swap

Posted by Paul Moses 2 weeks, 5 days ago

Switch act_gate parameters to an RCU-protected pointer and update schedule
changes using a prepare-then-swap pattern. This avoids races between the
timer/data paths and configuration updates, and cancels the hrtimer
before swapping schedules.

A gate action replace could free and swap schedules while the hrtimer
callback or data path still dereferences the old entries, leaving a
use-after-free window during updates. The deferred swap and RCU free
close that window. A reproducer is available on request.

Also clear params on early error for newly created actions to avoid
leaving a dangling reference.

Fixes: a51c328df310 ("net: qos: introduce a gate control flow action")
Cc: stable@vger.kernel.org
Signed-off-by: Paul Moses <p@1g4.org>
---
 include/net/tc_act/tc_gate.h |  49 +++++-
 net/sched/act_gate.c         | 298 +++++++++++++++++++++++++++--------
 2 files changed, 270 insertions(+), 77 deletions(-)

diff --git a/include/net/tc_act/tc_gate.h b/include/net/tc_act/tc_gate.h
index c1a67149c6b62..a2a24a62dff85 100644
--- a/include/net/tc_act/tc_gate.h
+++ b/include/net/tc_act/tc_gate.h
@@ -32,6 +32,7 @@ struct tcf_gate_params {
 	s32			tcfg_clockid;
 	size_t			num_entries;
 	struct list_head	entries;
+	struct rcu_head		rcu;
 };
 
 #define GATE_ACT_GATE_OPEN	BIT(0)
@@ -39,7 +40,7 @@ struct tcf_gate_params {
 
 struct tcf_gate {
 	struct tc_action	common;
-	struct tcf_gate_params	param;
+	struct tcf_gate_params __rcu *param;
 	u8			current_gate_status;
 	ktime_t			current_close_time;
 	u32			current_entry_octets;
@@ -53,45 +54,75 @@ struct tcf_gate {
 
 static inline s32 tcf_gate_prio(const struct tc_action *a)
 {
+	struct tcf_gate *gact = to_gate(a);
+	struct tcf_gate_params *p;
 	s32 tcfg_prio;
 
-	tcfg_prio = to_gate(a)->param.tcfg_priority;
+	p = rcu_dereference_protected(gact->param,
+				      lockdep_is_held(&a->tcfa_lock) ||
+				      lockdep_is_held(&gact->tcf_lock) ||
+				      lockdep_rtnl_is_held());
+	tcfg_prio = p->tcfg_priority;
 
 	return tcfg_prio;
 }
 
 static inline u64 tcf_gate_basetime(const struct tc_action *a)
 {
+	struct tcf_gate *gact = to_gate(a);
+	struct tcf_gate_params *p;
 	u64 tcfg_basetime;
 
-	tcfg_basetime = to_gate(a)->param.tcfg_basetime;
+	p = rcu_dereference_protected(gact->param,
+				      lockdep_is_held(&a->tcfa_lock) ||
+				      lockdep_is_held(&gact->tcf_lock) ||
+				      lockdep_rtnl_is_held());
+	tcfg_basetime = p->tcfg_basetime;
 
 	return tcfg_basetime;
 }
 
 static inline u64 tcf_gate_cycletime(const struct tc_action *a)
 {
+	struct tcf_gate *gact = to_gate(a);
+	struct tcf_gate_params *p;
 	u64 tcfg_cycletime;
 
-	tcfg_cycletime = to_gate(a)->param.tcfg_cycletime;
+	p = rcu_dereference_protected(gact->param,
+				      lockdep_is_held(&a->tcfa_lock) ||
+				      lockdep_is_held(&gact->tcf_lock) ||
+				      lockdep_rtnl_is_held());
+	tcfg_cycletime = p->tcfg_cycletime;
 
 	return tcfg_cycletime;
 }
 
 static inline u64 tcf_gate_cycletimeext(const struct tc_action *a)
 {
+	struct tcf_gate *gact = to_gate(a);
+	struct tcf_gate_params *p;
 	u64 tcfg_cycletimeext;
 
-	tcfg_cycletimeext = to_gate(a)->param.tcfg_cycletime_ext;
+	p = rcu_dereference_protected(gact->param,
+				      lockdep_is_held(&a->tcfa_lock) ||
+				      lockdep_is_held(&gact->tcf_lock) ||
+				      lockdep_rtnl_is_held());
+	tcfg_cycletimeext = p->tcfg_cycletime_ext;
 
 	return tcfg_cycletimeext;
 }
 
 static inline u32 tcf_gate_num_entries(const struct tc_action *a)
 {
+	struct tcf_gate *gact = to_gate(a);
+	struct tcf_gate_params *p;
 	u32 num_entries;
 
-	num_entries = to_gate(a)->param.num_entries;
+	p = rcu_dereference_protected(gact->param,
+				      lockdep_is_held(&a->tcfa_lock) ||
+				      lockdep_is_held(&gact->tcf_lock) ||
+				      lockdep_rtnl_is_held());
+	num_entries = p->num_entries;
 
 	return num_entries;
 }
@@ -100,12 +131,16 @@ static inline struct action_gate_entry
 			*tcf_gate_get_list(const struct tc_action *a)
 {
 	struct action_gate_entry *oe;
+	struct tcf_gate *gact = to_gate(a);
 	struct tcf_gate_params *p;
 	struct tcfg_gate_entry *entry;
 	u32 num_entries;
 	int i = 0;
 
-	p = &to_gate(a)->param;
+	p = rcu_dereference_protected(gact->param,
+				      lockdep_is_held(&a->tcfa_lock) ||
+				      lockdep_is_held(&gact->tcf_lock) ||
+				      lockdep_rtnl_is_held());
 	num_entries = p->num_entries;
 
 	list_for_each_entry(entry, &p->entries, list)
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
index c1f75f2727576..3ee07c3deaf97 100644
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -6,6 +6,7 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/errno.h>
+#include <linux/limits.h>
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
 #include <linux/init.h>
@@ -32,9 +33,10 @@ static ktime_t gate_get_time(struct tcf_gate *gact)
 	return KTIME_MAX;
 }
 
-static void gate_get_start_time(struct tcf_gate *gact, ktime_t *start)
+static void gate_get_start_time(struct tcf_gate *gact,
+				struct tcf_gate_params *param,
+				ktime_t *start)
 {
-	struct tcf_gate_params *param = &gact->param;
 	ktime_t now, base, cycle;
 	u64 n;
 
@@ -69,12 +71,14 @@ static enum hrtimer_restart gate_timer_func(struct hrtimer *timer)
 {
 	struct tcf_gate *gact = container_of(timer, struct tcf_gate,
 					     hitimer);
-	struct tcf_gate_params *p = &gact->param;
+	struct tcf_gate_params *p;
 	struct tcfg_gate_entry *next;
 	ktime_t close_time, now;
 
 	spin_lock(&gact->tcf_lock);
 
+	p = rcu_dereference_protected(gact->param,
+				      lockdep_is_held(&gact->tcf_lock));
 	next = gact->next_entry;
 
 	/* cycle start, clear pending bit, clear total octets */
@@ -225,6 +229,14 @@ static void release_entry_list(struct list_head *entries)
 	}
 }
 
+static void tcf_gate_params_release(struct rcu_head *rcu)
+{
+	struct tcf_gate_params *p = container_of(rcu, struct tcf_gate_params, rcu);
+
+	release_entry_list(&p->entries);
+	kfree(p);
+}
+
 static int parse_gate_list(struct nlattr *list_attr,
 			   struct tcf_gate_params *sched,
 			   struct netlink_ext_ack *extack)
@@ -270,24 +282,12 @@ static int parse_gate_list(struct nlattr *list_attr,
 	return err;
 }
 
-static void gate_setup_timer(struct tcf_gate *gact, u64 basetime,
-			     enum tk_offsets tko, s32 clockid,
-			     bool do_init)
+static void gate_setup_timer(struct tcf_gate *gact,
+			     enum tk_offsets tko, s32 clockid)
 {
-	if (!do_init) {
-		if (basetime == gact->param.tcfg_basetime &&
-		    tko == gact->tk_offset &&
-		    clockid == gact->param.tcfg_clockid)
-			return;
-
-		spin_unlock_bh(&gact->tcf_lock);
-		hrtimer_cancel(&gact->hitimer);
-		spin_lock_bh(&gact->tcf_lock);
-	}
-	gact->param.tcfg_basetime = basetime;
-	gact->param.tcfg_clockid = clockid;
 	gact->tk_offset = tko;
-	hrtimer_setup(&gact->hitimer, gate_timer_func, clockid, HRTIMER_MODE_ABS_SOFT);
+	hrtimer_setup(&gact->hitimer, gate_timer_func, clockid,
+		      HRTIMER_MODE_ABS_SOFT);
 }
 
 static int tcf_gate_init(struct net *net, struct nlattr *nla,
@@ -296,20 +296,26 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
 			 struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, act_gate_ops.net_id);
-	enum tk_offsets tk_offset = TK_OFFS_TAI;
-	bool bind = flags & TCA_ACT_FLAGS_BIND;
 	struct nlattr *tb[TCA_GATE_MAX + 1];
 	struct tcf_chain *goto_ch = NULL;
-	u64 cycletime = 0, basetime = 0;
-	struct tcf_gate_params *p;
-	s32 clockid = CLOCK_TAI;
+	struct tcf_gate_params *p, *oldp;
 	struct tcf_gate *gact;
 	struct tc_gate *parm;
-	int ret = 0, err;
-	u32 gflags = 0;
-	s32 prio = -1;
+	struct tcf_gate_params newp = { };
 	ktime_t start;
+	u64 cycletime = 0, basetime = 0, cycletime_ext = 0;
+	enum tk_offsets tk_offset = TK_OFFS_TAI;
+	s32 clockid = CLOCK_TAI;
+	u32 gflags = 0;
 	u32 index;
+	s32 prio = -1;
+	bool bind = flags & TCA_ACT_FLAGS_BIND;
+	bool clockid_set = false;
+	bool setup_timer = false;
+	bool update_timer = false;
+	int ret = 0, err;
+
+	INIT_LIST_HEAD(&newp.entries);
 
 	if (!nla)
 		return -EINVAL;
@@ -323,6 +329,7 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
 
 	if (tb[TCA_GATE_CLOCKID]) {
 		clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]);
+		clockid_set = true;
 		switch (clockid) {
 		case CLOCK_REALTIME:
 			tk_offset = TK_OFFS_REAL;
@@ -349,9 +356,6 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
 	if (err < 0)
 		return err;
 
-	if (err && bind)
-		return ACT_P_BOUND;
-
 	if (!err) {
 		ret = tcf_idr_create_from_flags(tn, index, est, a,
 						&act_gate_ops, bind, flags);
@@ -361,94 +365,245 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
 		}
 
 		ret = ACT_P_CREATED;
-	} else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
-		tcf_idr_release(*a, bind);
-		return -EEXIST;
+		gact = to_gate(*a);
+	} else {
+		if (bind)
+			return ACT_P_BOUND;
+
+		if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
+			tcf_idr_release(*a, bind);
+			return -EEXIST;
+		}
+		gact = to_gate(*a);
 	}
 
+	if (ret != ACT_P_CREATED)
+		oldp = rcu_dereference_protected(gact->param,
+						 lockdep_is_held(&gact->common.tcfa_lock) ||
+						 lockdep_is_held(&gact->tcf_lock) ||
+						 lockdep_rtnl_is_held());
+	else
+		oldp = NULL;
+
 	if (tb[TCA_GATE_PRIORITY])
 		prio = nla_get_s32(tb[TCA_GATE_PRIORITY]);
+	else if (ret != ACT_P_CREATED)
+		prio = oldp->tcfg_priority;
 
-	if (tb[TCA_GATE_BASE_TIME])
+	if (tb[TCA_GATE_BASE_TIME]) {
 		basetime = nla_get_u64(tb[TCA_GATE_BASE_TIME]);
+		if (basetime > (u64)S64_MAX) {
+			NL_SET_ERR_MSG(extack, "Base time out of range");
+			err = -EINVAL;
+			goto release_idr;
+		}
+	} else if (ret != ACT_P_CREATED) {
+		basetime = oldp->tcfg_basetime;
+	}
 
 	if (tb[TCA_GATE_FLAGS])
 		gflags = nla_get_u32(tb[TCA_GATE_FLAGS]);
-
-	gact = to_gate(*a);
-	if (ret == ACT_P_CREATED)
-		INIT_LIST_HEAD(&gact->param.entries);
+	else if (ret != ACT_P_CREATED)
+		gflags = oldp->tcfg_flags;
 
 	err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
 	if (err < 0)
 		goto release_idr;
 
-	spin_lock_bh(&gact->tcf_lock);
-	p = &gact->param;
+	if (!clockid_set) {
+		if (ret != ACT_P_CREATED)
+			clockid = oldp->tcfg_clockid;
+		else
+			clockid = CLOCK_TAI;
+		switch (clockid) {
+		case CLOCK_REALTIME:
+			tk_offset = TK_OFFS_REAL;
+			break;
+		case CLOCK_MONOTONIC:
+			tk_offset = TK_OFFS_MAX;
+			break;
+		case CLOCK_BOOTTIME:
+			tk_offset = TK_OFFS_BOOT;
+			break;
+		case CLOCK_TAI:
+			tk_offset = TK_OFFS_TAI;
+			break;
+		default:
+			NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
+			err = -EINVAL;
+			goto put_chain;
+		}
+	}
 
-	if (tb[TCA_GATE_CYCLE_TIME])
+	if (ret == ACT_P_CREATED)
+		update_timer = true;
+	else if (basetime != oldp->tcfg_basetime ||
+		 tk_offset != gact->tk_offset ||
+		 clockid != oldp->tcfg_clockid)
+		update_timer = true;
+
+	if (ret == ACT_P_CREATED)
+		setup_timer = true;
+	else if (clockid != oldp->tcfg_clockid)
+		setup_timer = true;
+
+	if (tb[TCA_GATE_CYCLE_TIME]) {
 		cycletime = nla_get_u64(tb[TCA_GATE_CYCLE_TIME]);
+		if (cycletime > (u64)S64_MAX) {
+			NL_SET_ERR_MSG(extack, "Cycle time out of range");
+			err = -EINVAL;
+			goto put_chain;
+		}
+	}
 
 	if (tb[TCA_GATE_ENTRY_LIST]) {
-		err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], p, extack);
-		if (err < 0)
-			goto chain_put;
+		err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], &newp, extack);
+		if (err <= 0) {
+			if (!err)
+				NL_SET_ERR_MSG(extack,
+					       "Missing gate schedule (entry list)");
+			err = -EINVAL;
+			goto put_chain;
+		}
+		newp.num_entries = err;
+	} else if (ret == ACT_P_CREATED) {
+		NL_SET_ERR_MSG(extack, "Missing schedule entry list");
+		err = -EINVAL;
+		goto put_chain;
 	}
 
+	if (tb[TCA_GATE_CYCLE_TIME_EXT])
+		cycletime_ext = nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]);
+	else if (ret != ACT_P_CREATED)
+		cycletime_ext = oldp->tcfg_cycletime_ext;
+
 	if (!cycletime) {
 		struct tcfg_gate_entry *entry;
-		ktime_t cycle = 0;
+		struct list_head *entries;
+		u64 cycle = 0;
+
+		if (!list_empty(&newp.entries))
+			entries = &newp.entries;
+		else if (ret != ACT_P_CREATED)
+			entries = &oldp->entries;
+		else
+			entries = NULL;
+
+		if (!entries) {
+			NL_SET_ERR_MSG(extack, "Invalid cycle time");
+			err = -EINVAL;
+			goto release_new_entries;
+		}
+
+		list_for_each_entry(entry, entries, list) {
+			if (entry->interval > (u64)S64_MAX) {
+				NL_SET_ERR_MSG(extack,
+					       "Cycle time out of range");
+				err = -EINVAL;
+				goto release_new_entries;
+			}
+			if (cycle > (u64)S64_MAX - entry->interval) {
+				NL_SET_ERR_MSG(extack,
+					       "Cycle time out of range");
+				err = -EINVAL;
+				goto release_new_entries;
+			}
+			cycle += entry->interval;
+		}
 
-		list_for_each_entry(entry, &p->entries, list)
-			cycle = ktime_add_ns(cycle, entry->interval);
 		cycletime = cycle;
 		if (!cycletime) {
+			NL_SET_ERR_MSG(extack, "Invalid cycle time");
 			err = -EINVAL;
-			goto chain_put;
+			goto release_new_entries;
 		}
 	}
-	p->tcfg_cycletime = cycletime;
 
-	if (tb[TCA_GATE_CYCLE_TIME_EXT])
-		p->tcfg_cycletime_ext =
-			nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]);
+	if (ret != ACT_P_CREATED &&
+	    (tb[TCA_GATE_ENTRY_LIST] || tb[TCA_GATE_CYCLE_TIME] ||
+	     cycletime != oldp->tcfg_cycletime))
+		update_timer = true;
 
-	gate_setup_timer(gact, basetime, tk_offset, clockid,
-			 ret == ACT_P_CREATED);
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p) {
+		err = -ENOMEM;
+		goto release_new_entries;
+	}
+
+	INIT_LIST_HEAD(&p->entries);
 	p->tcfg_priority = prio;
+	p->tcfg_basetime = basetime;
+	p->tcfg_cycletime = cycletime;
+	p->tcfg_cycletime_ext = cycletime_ext;
 	p->tcfg_flags = gflags;
-	gate_get_start_time(gact, &start);
+	p->tcfg_clockid = clockid;
+
+	if (!list_empty(&newp.entries)) {
+		list_splice_init(&newp.entries, &p->entries);
+		p->num_entries = newp.num_entries;
+	} else if (ret != ACT_P_CREATED) {
+		struct tcfg_gate_entry *entry, *ne;
+
+		list_for_each_entry(entry, &oldp->entries, list) {
+			ne = kmemdup(entry, sizeof(*ne), GFP_KERNEL);
+			if (!ne) {
+				err = -ENOMEM;
+				goto free_p;
+			}
+			INIT_LIST_HEAD(&ne->list);
+			list_add_tail(&ne->list, &p->entries);
+		}
+		p->num_entries = oldp->num_entries;
+	}
 
-	gact->current_close_time = start;
-	gact->current_gate_status = GATE_ACT_GATE_OPEN | GATE_ACT_PENDING;
+	if (update_timer && ret != ACT_P_CREATED)
+		hrtimer_cancel(&gact->hitimer);
+
+	spin_lock_bh(&gact->tcf_lock);
+	if (setup_timer)
+		gate_setup_timer(gact, tk_offset, clockid);
 
+	gate_get_start_time(gact, p, &start);
+	gact->current_close_time = start;
 	gact->next_entry = list_first_entry(&p->entries,
 					    struct tcfg_gate_entry, list);
+	gact->current_entry_octets = 0;
+	gact->current_gate_status = GATE_ACT_GATE_OPEN | GATE_ACT_PENDING;
 
 	goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
 
 	gate_start_timer(gact, start);
 
+	oldp = rcu_replace_pointer(gact->param, p,
+				   lockdep_is_held(&gact->tcf_lock));
+
 	spin_unlock_bh(&gact->tcf_lock);
 
+	if (oldp)
+		call_rcu(&oldp->rcu, tcf_gate_params_release);
+
 	if (goto_ch)
 		tcf_chain_put_by_act(goto_ch);
 
 	return ret;
 
-chain_put:
-	spin_unlock_bh(&gact->tcf_lock);
-
+free_p:
+	release_entry_list(&p->entries);
+	kfree(p);
+release_new_entries:
+	release_entry_list(&newp.entries);
+put_chain:
 	if (goto_ch)
 		tcf_chain_put_by_act(goto_ch);
 release_idr:
-	/* action is not inserted in any list: it's safe to init hitimer
-	 * without taking tcf_lock.
-	 */
-	if (ret == ACT_P_CREATED)
-		gate_setup_timer(gact, gact->param.tcfg_basetime,
-				 gact->tk_offset, gact->param.tcfg_clockid,
-				 true);
+	if (ret == ACT_P_CREATED) {
+		p = rcu_dereference_protected(gact->param, 1);
+		if (p) {
+			release_entry_list(&p->entries);
+			kfree(p);
+			rcu_assign_pointer(gact->param, NULL);
+		}
+	}
 	tcf_idr_release(*a, bind);
 	return err;
 }
@@ -458,9 +613,11 @@ static void tcf_gate_cleanup(struct tc_action *a)
 	struct tcf_gate *gact = to_gate(a);
 	struct tcf_gate_params *p;
 
-	p = &gact->param;
 	hrtimer_cancel(&gact->hitimer);
-	release_entry_list(&p->entries);
+
+	p = rcu_dereference_protected(gact->param, 1);
+	if (p)
+		call_rcu(&p->rcu, tcf_gate_params_release);
 }
 
 static int dumping_entry(struct sk_buff *skb,
@@ -512,7 +669,8 @@ static int tcf_gate_dump(struct sk_buff *skb, struct tc_action *a,
 	spin_lock_bh(&gact->tcf_lock);
 	opt.action = gact->tcf_action;
 
-	p = &gact->param;
+	p = rcu_dereference_protected(gact->param,
+				      lockdep_is_held(&gact->tcf_lock));
 
 	if (nla_put(skb, TCA_GATE_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
-- 
2.52.GIT

Re: [PATCH 1/2] net/sched: act_gate: fix schedule updates with RCU swap

Posted by Victor Nogueira 2 weeks, 5 days ago

On 19/01/2026 21:48, Paul Moses wrote:
> Switch act_gate parameters to an RCU-protected pointer and update schedule
> changes using a prepare-then-swap pattern. This avoids races between the
> timer/data paths and configuration updates, and cancels the hrtimer
> before swapping schedules.
> 
> A gate action replace could free and swap schedules while the hrtimer
> callback or data path still dereferences the old entries, leaving a
> use-after-free window during updates. The deferred swap and RCU free
> close that window. A reproducer is available on request.
> 
> Also clear params on early error for newly created actions to avoid
> leaving a dangling reference.
> [...]
> diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
> index c1f75f2727576..3ee07c3deaf97 100644
> --- a/net/sched/act_gate.c
> +++ b/net/sched/act_gate.c
> @@ -6,6 +6,7 @@
>   #include <linux/kernel.h>
>   #include <linux/string.h>
>   #include <linux/errno.h>
> +#include <linux/limits.h>

Do you really need to include this?

> [...]
> @@ -69,12 +71,14 @@ static enum hrtimer_restart gate_timer_func(struct hrtimer *timer)
>   {
>   	struct tcf_gate *gact = container_of(timer, struct tcf_gate,
>   					     hitimer);
> -	struct tcf_gate_params *p = &gact->param;
> +	struct tcf_gate_params *p;

When adding/editing local variables, you should adhere to the
reverse xmas tree style [1].

>   	spin_lock(&gact->tcf_lock);

Shouldn't you call rcu_read_lock before this line now?

> +	p = rcu_dereference_protected(gact->param,
> +				      lockdep_is_held(&gact->tcf_lock));
> [...]
>   static int tcf_gate_init(struct net *net, struct nlattr *nla,
> @@ -296,20 +296,26 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
>   			 struct netlink_ext_ack *extack)
>   {
>   	struct tc_action_net *tn = net_generic(net, act_gate_ops.net_id);
> -	enum tk_offsets tk_offset = TK_OFFS_TAI;
> -	bool bind = flags & TCA_ACT_FLAGS_BIND;
>   	struct nlattr *tb[TCA_GATE_MAX + 1];
>   	struct tcf_chain *goto_ch = NULL;
> -	u64 cycletime = 0, basetime = 0;
> -	struct tcf_gate_params *p;
> -	s32 clockid = CLOCK_TAI;
> +	struct tcf_gate_params *p, *oldp;
>   	struct tcf_gate *gact;
>   	struct tc_gate *parm;
> -	int ret = 0, err;
> -	u32 gflags = 0;
> -	s32 prio = -1;
> +	struct tcf_gate_params newp = { };

Abide by reverse xmas tree when adding local variables.

> [...]
> +	bool clockid_set = false;

I could be missing something, but I don't believe you need this
boolean.

> [...]
> @@ -323,6 +329,7 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
>   
>   	if (tb[TCA_GATE_CLOCKID]) {
>   		clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]);
> +		clockid_set = true;
>   		switch (clockid) {

Instead of using clockid_set and repeating the switch statament.
You could put this if-statement after you already have oldp and do the
following:

          if (tb[TCA_GATE_CLOCKID]) {
                  clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]);
                  switch (clockid) {
                  case CLOCK_REALTIME:
                          tk_offset = TK_OFFS_REAL;
                          break;
                  case CLOCK_MONOTONIC:
                          tk_offset = TK_OFFS_MAX;
                          break;
                  case CLOCK_BOOTTIME:
                          tk_offset = TK_OFFS_BOOT;
                          break;
                  case CLOCK_TAI:
                          tk_offset = TK_OFFS_TAI;
                          break;
                  default:
                          NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
                          return -EINVAL;
                  }
          } else if (ret != ACT_P_CREATED) {
                  clockid = oldp->tcfg_clockid;
                  tk_offset = gact->tk_offset;
          }

> [...]
> -	if (tb[TCA_GATE_CYCLE_TIME])
> +	if (ret == ACT_P_CREATED)
> +		update_timer = true;
> [...]

Here you are assigning update_timer to true when the op is a create...

> [...]
> +	if (update_timer && ret != ACT_P_CREATED)
> +		hrtimer_cancel(&gact->hitimer);

.. however in the if-statement where it is used you are only allowing
updates. This looks weird.

> [...]
> +free_p:
> +	release_entry_list(&p->entries);
> +	kfree(p);

The 2 lines of code above are being repeated below and in
tcf_gate_params_release. You should put them in a common function.

> +release_new_entries:
> +	release_entry_list(&newp.entries);
> +put_chain:
>   	if (goto_ch)
>   		tcf_chain_put_by_act(goto_ch);
>   release_idr:
> -	/* action is not inserted in any list: it's safe to init hitimer
> -	 * without taking tcf_lock.
> -	 */
> -	if (ret == ACT_P_CREATED)
> -		gate_setup_timer(gact, gact->param.tcfg_basetime,
> -				 gact->tk_offset, gact->param.tcfg_clockid,
> -				 true);
> +	if (ret == ACT_P_CREATED) {
> +		p = rcu_dereference_protected(gact->param, 1);
> +		if (p) {
> +			release_entry_list(&p->entries);
> +			kfree(p);
> +			rcu_assign_pointer(gact->param, NULL);
> +		}
> +	}
>   	tcf_idr_release(*a, bind);

Also, the AI review [2] pointed out a real issue.
It's easy to reproduce by running something like:

tc action add action gate base-time 200000000000ns \
      sched-entry close 0ns index 10

I think overall you have the right idea - RCU seems like a good fit here.
The issue is that this patch is confusing because it seems like you are
trying to fix the bug and perform cleanups at the same time.
If that is the case, can you try breaking this patch into two? Do one to
fix the bug (introducing RCU and etc) and another for the cleanups.

[1] 
https://www.kernel.org/doc/html/v6.3/process/maintainer-netdev.html#local-variable-ordering-reverse-xmas-tree-rcs
[2] 
https://netdev-ai.bots.linux.dev/ai-review.html?id=cdc17d0d-fd59-41a8-9c8d-1a42699167fd#patch-0

cheers,
Victor

Re: [PATCH 1/2] net/sched: act_gate: fix schedule updates with RCU swap

Posted by Paul Moses 2 weeks, 4 days ago

> Also, the AI review [2] pointed out a real issue.
> It's easy to reproduce by running something like:
>
> tc action add action gate base-time 200000000000ns \
>   sched-entry close 0ns index 10

This was never allowed. A zero interval has always been invalid for a gate schedule entry. Clang pointed out a no-op branch I added by mistake and the AI review picked it up, but the intent was simply to mirror the existing base-time / cycle-time range checks we already have. Functionally it’s redundant because we were already rejecting this case via the existing validation e.g.:

    if (cycle > (u64)S64_MAX - entry->interval) { ... }

    if (interval == 0) {
        NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry");
        return -EINVAL;
    }

I will prepare and test v3 with your first 8 suggestions and await further input on best practices for avoiding a monolithic patch and on appropriate levels of validation in this specific case.

Thanks
Paul


On Tuesday, January 20th, 2026 at 3:04 PM, Victor Nogueira <victor@mojatatu.com> wrote:

> 
> 
> On 19/01/2026 21:48, Paul Moses wrote:
> 
> > Switch act_gate parameters to an RCU-protected pointer and update schedule
> > changes using a prepare-then-swap pattern. This avoids races between the
> > timer/data paths and configuration updates, and cancels the hrtimer
> > before swapping schedules.
> > 
> > A gate action replace could free and swap schedules while the hrtimer
> > callback or data path still dereferences the old entries, leaving a
> > use-after-free window during updates. The deferred swap and RCU free
> > close that window. A reproducer is available on request.
> > 
> > Also clear params on early error for newly created actions to avoid
> > leaving a dangling reference.
> > [...]
> > diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
> > index c1f75f2727576..3ee07c3deaf97 100644
> > --- a/net/sched/act_gate.c
> > +++ b/net/sched/act_gate.c
> > @@ -6,6 +6,7 @@
> > #include <linux/kernel.h>
> > #include <linux/string.h>
> > #include <linux/errno.h>
> > +#include <linux/limits.h>
> 
> 
> Do you really need to include this?
> 
> > [...]
> > @@ -69,12 +71,14 @@ static enum hrtimer_restart gate_timer_func(struct hrtimer *timer)
> > {
> > struct tcf_gate *gact = container_of(timer, struct tcf_gate,
> > hitimer);
> > - struct tcf_gate_params *p = &gact->param;
> > + struct tcf_gate_params *p;
> 
> 
> When adding/editing local variables, you should adhere to the
> reverse xmas tree style [1].
> 
> > spin_lock(&gact->tcf_lock);
> 
> 
> Shouldn't you call rcu_read_lock before this line now?
> 
> > + p = rcu_dereference_protected(gact->param,
> > + lockdep_is_held(&gact->tcf_lock));
> > [...]
> > static int tcf_gate_init(struct net *net, struct nlattr *nla,
> > @@ -296,20 +296,26 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
> > struct netlink_ext_ack *extack)
> > {
> > struct tc_action_net *tn = net_generic(net, act_gate_ops.net_id);
> > - enum tk_offsets tk_offset = TK_OFFS_TAI;
> > - bool bind = flags & TCA_ACT_FLAGS_BIND;
> > struct nlattr *tb[TCA_GATE_MAX + 1];
> > struct tcf_chain *goto_ch = NULL;
> > - u64 cycletime = 0, basetime = 0;
> > - struct tcf_gate_params *p;
> > - s32 clockid = CLOCK_TAI;
> > + struct tcf_gate_params *p, *oldp;
> > struct tcf_gate *gact;
> > struct tc_gate *parm;
> > - int ret = 0, err;
> > - u32 gflags = 0;
> > - s32 prio = -1;
> > + struct tcf_gate_params newp = { };
> 
> 
> Abide by reverse xmas tree when adding local variables.
> 
> > [...]
> > + bool clockid_set = false;
> 
> 
> I could be missing something, but I don't believe you need this
> boolean.
> 
> > [...]
> > @@ -323,6 +329,7 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
> > 
> > if (tb[TCA_GATE_CLOCKID]) {
> > clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]);
> > + clockid_set = true;
> > switch (clockid) {
> 
> 
> Instead of using clockid_set and repeating the switch statament.
> You could put this if-statement after you already have oldp and do the
> following:
> 
> if (tb[TCA_GATE_CLOCKID]) {
> clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]);
> switch (clockid) {
> case CLOCK_REALTIME:
> tk_offset = TK_OFFS_REAL;
> break;
> case CLOCK_MONOTONIC:
> tk_offset = TK_OFFS_MAX;
> break;
> case CLOCK_BOOTTIME:
> tk_offset = TK_OFFS_BOOT;
> break;
> case CLOCK_TAI:
> tk_offset = TK_OFFS_TAI;
> break;
> default:
> NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
> return -EINVAL;
> }
> } else if (ret != ACT_P_CREATED) {
> clockid = oldp->tcfg_clockid;
> 
> tk_offset = gact->tk_offset;
> 
> }
> 
> > [...]
> > - if (tb[TCA_GATE_CYCLE_TIME])
> > + if (ret == ACT_P_CREATED)
> > + update_timer = true;
> > [...]
> 
> 
> Here you are assigning update_timer to true when the op is a create...
> 
> > [...]
> > + if (update_timer && ret != ACT_P_CREATED)
> > + hrtimer_cancel(&gact->hitimer);
> 
> 
> .. however in the if-statement where it is used you are only allowing
> updates. This looks weird.
> 
> > [...]
> > +free_p:
> > + release_entry_list(&p->entries);
> > + kfree(p);
> 
> 
> The 2 lines of code above are being repeated below and in
> tcf_gate_params_release. You should put them in a common function.
> 
> > +release_new_entries:
> > + release_entry_list(&newp.entries);
> > +put_chain:
> > if (goto_ch)
> > tcf_chain_put_by_act(goto_ch);
> > release_idr:
> > - /* action is not inserted in any list: it's safe to init hitimer
> > - * without taking tcf_lock.
> > - */
> > - if (ret == ACT_P_CREATED)
> > - gate_setup_timer(gact, gact->param.tcfg_basetime,
> > - gact->tk_offset, gact->param.tcfg_clockid,
> > - true);
> > + if (ret == ACT_P_CREATED) {
> > + p = rcu_dereference_protected(gact->param, 1);
> > + if (p) {
> > + release_entry_list(&p->entries);
> > + kfree(p);
> > + rcu_assign_pointer(gact->param, NULL);
> > + }
> > + }
> > tcf_idr_release(*a, bind);
> 
> 
> Also, the AI review [2] pointed out a real issue.
> It's easy to reproduce by running something like:
> 
> tc action add action gate base-time 200000000000ns \
> sched-entry close 0ns index 10
> 
> I think overall you have the right idea - RCU seems like a good fit here.
> The issue is that this patch is confusing because it seems like you are
> trying to fix the bug and perform cleanups at the same time.
> If that is the case, can you try breaking this patch into two? Do one to
> fix the bug (introducing RCU and etc) and another for the cleanups.
> 
> [1]
> https://www.kernel.org/doc/html/v6.3/process/maintainer-netdev.html#local-variable-ordering-reverse-xmas-tree-rcs
> [2]
> https://netdev-ai.bots.linux.dev/ai-review.html?id=cdc17d0d-fd59-41a8-9c8d-1a42699167fd#patch-0
> 
> cheers,
> Victor

Re: [PATCH 1/2] net/sched: act_gate: fix schedule updates with RCU swap

Posted by Victor Nogueira 2 weeks, 4 days ago

On Tue, Jan 20, 2026 at 10:00 PM Paul Moses <p@1g4.org> wrote:
>
> > Also, the AI review [2] pointed out a real issue.
> > It's easy to reproduce by running something like:
> >
> > tc action add action gate base-time 200000000000ns \
> >   sched-entry close 0ns index 10
>
> This was never allowed. A zero interval has always been invalid for a gate schedule entry

The issue is not whether this is valid or not (as you said, it isn't).
The issue is that, with your patch, this will result in a null-ptr deref.
You can avoid this by initialising the timer before calling tcf_idr_release.

> [...]
> I will prepare and test v3 with your first 8 suggestions and await further input on best practices for avoiding a monolithic patch and on appropriate levels of validation in this specific case.

Ok, the main suggestion is to logically break down (as much as possible),
the introduction of RCU and the other additional cleanups/checks.

cheers,
Victor

Re: [PATCH 1/2] net/sched: act_gate: fix schedule updates with RCU swap

Posted by Paul Moses 2 weeks, 5 days ago

act_gate is not just a list and a timer. It is a small state machine that keeps runtime gating insulated from control plane updates using tcf_lock, the hrtimer, and the PENDING bit. The live decision variables are protected, but the schedule backing store (the entries list) was still being mutated or freed on the control path, so updates were not transactional and could expose partially constructed schedules or freed nodes to concurrent readers. The fix is to treat the schedule as an immutable published object: validate and build off to the side, publish atomically, then retire after readers drain (RCU). That preserves the original goal of not interfering with the gate.

I’m trying to strike the right balance of input validation for stable. I kept this as one patch because the validate and build before publish path is part of making the RCU update model correct. I did try implementing the additional validation independently first, but it did not meaningfully reduce the size or complexity because the RCU swap still requires a fully formed schedule object before publish. Without preliminary validation, it is easy to corrupt the timer state or end up with undefined schedule behavior. At minimum, I think we need:
  - interval > 0: prevents zero length entries and immediate refire loops
  - at least one entry: avoids empty schedule deref and undefined state
  - cycle time > 0: required for division and close time computations
  - overflow checks: prevent wrap or underflow in close time arithmetic

I can do the conversion without these checks, but the resulting behavior would be fragile. If there is guidance on what validation level you would prefer here (minimal representable range checks vs stricter sane inputs), I am happy to align with it.

thanks
Paul


On Tuesday, January 20th, 2026 at 3:04 PM, Victor Nogueira <victor@mojatatu.com> wrote:

> 
> 
> On 19/01/2026 21:48, Paul Moses wrote:
> 
> > Switch act_gate parameters to an RCU-protected pointer and update schedule
> > changes using a prepare-then-swap pattern. This avoids races between the
> > timer/data paths and configuration updates, and cancels the hrtimer
> > before swapping schedules.
> > 
> > A gate action replace could free and swap schedules while the hrtimer
> > callback or data path still dereferences the old entries, leaving a
> > use-after-free window during updates. The deferred swap and RCU free
> > close that window. A reproducer is available on request.
> > 
> > Also clear params on early error for newly created actions to avoid
> > leaving a dangling reference.
> > [...]
> > diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
> > index c1f75f2727576..3ee07c3deaf97 100644
> > --- a/net/sched/act_gate.c
> > +++ b/net/sched/act_gate.c
> > @@ -6,6 +6,7 @@
> > #include <linux/kernel.h>
> > #include <linux/string.h>
> > #include <linux/errno.h>
> > +#include <linux/limits.h>
> 
> 
> Do you really need to include this?
> 
> > [...]
> > @@ -69,12 +71,14 @@ static enum hrtimer_restart gate_timer_func(struct hrtimer *timer)
> > {
> > struct tcf_gate *gact = container_of(timer, struct tcf_gate,
> > hitimer);
> > - struct tcf_gate_params *p = &gact->param;
> > + struct tcf_gate_params *p;
> 
> 
> When adding/editing local variables, you should adhere to the
> reverse xmas tree style [1].
> 
> > spin_lock(&gact->tcf_lock);
> 
> 
> Shouldn't you call rcu_read_lock before this line now?
> 
> > + p = rcu_dereference_protected(gact->param,
> > + lockdep_is_held(&gact->tcf_lock));
> > [...]
> > static int tcf_gate_init(struct net *net, struct nlattr *nla,
> > @@ -296,20 +296,26 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
> > struct netlink_ext_ack *extack)
> > {
> > struct tc_action_net *tn = net_generic(net, act_gate_ops.net_id);
> > - enum tk_offsets tk_offset = TK_OFFS_TAI;
> > - bool bind = flags & TCA_ACT_FLAGS_BIND;
> > struct nlattr *tb[TCA_GATE_MAX + 1];
> > struct tcf_chain *goto_ch = NULL;
> > - u64 cycletime = 0, basetime = 0;
> > - struct tcf_gate_params *p;
> > - s32 clockid = CLOCK_TAI;
> > + struct tcf_gate_params *p, *oldp;
> > struct tcf_gate *gact;
> > struct tc_gate *parm;
> > - int ret = 0, err;
> > - u32 gflags = 0;
> > - s32 prio = -1;
> > + struct tcf_gate_params newp = { };
> 
> 
> Abide by reverse xmas tree when adding local variables.
> 
> > [...]
> > + bool clockid_set = false;
> 
> 
> I could be missing something, but I don't believe you need this
> boolean.
> 
> > [...]
> > @@ -323,6 +329,7 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
> > 
> > if (tb[TCA_GATE_CLOCKID]) {
> > clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]);
> > + clockid_set = true;
> > switch (clockid) {
> 
> 
> Instead of using clockid_set and repeating the switch statament.
> You could put this if-statement after you already have oldp and do the
> following:
> 
> if (tb[TCA_GATE_CLOCKID]) {
> clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]);
> switch (clockid) {
> case CLOCK_REALTIME:
> tk_offset = TK_OFFS_REAL;
> break;
> case CLOCK_MONOTONIC:
> tk_offset = TK_OFFS_MAX;
> break;
> case CLOCK_BOOTTIME:
> tk_offset = TK_OFFS_BOOT;
> break;
> case CLOCK_TAI:
> tk_offset = TK_OFFS_TAI;
> break;
> default:
> NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
> return -EINVAL;
> }
> } else if (ret != ACT_P_CREATED) {
> clockid = oldp->tcfg_clockid;
> 
> tk_offset = gact->tk_offset;
> 
> }
> 
> > [...]
> > - if (tb[TCA_GATE_CYCLE_TIME])
> > + if (ret == ACT_P_CREATED)
> > + update_timer = true;
> > [...]
> 
> 
> Here you are assigning update_timer to true when the op is a create...
> 
> > [...]
> > + if (update_timer && ret != ACT_P_CREATED)
> > + hrtimer_cancel(&gact->hitimer);
> 
> 
> .. however in the if-statement where it is used you are only allowing
> updates. This looks weird.
> 
> > [...]
> > +free_p:
> > + release_entry_list(&p->entries);
> > + kfree(p);
> 
> 
> The 2 lines of code above are being repeated below and in
> tcf_gate_params_release. You should put them in a common function.
> 
> > +release_new_entries:
> > + release_entry_list(&newp.entries);
> > +put_chain:
> > if (goto_ch)
> > tcf_chain_put_by_act(goto_ch);
> > release_idr:
> > - /* action is not inserted in any list: it's safe to init hitimer
> > - * without taking tcf_lock.
> > - */
> > - if (ret == ACT_P_CREATED)
> > - gate_setup_timer(gact, gact->param.tcfg_basetime,
> > - gact->tk_offset, gact->param.tcfg_clockid,
> > - true);
> > + if (ret == ACT_P_CREATED) {
> > + p = rcu_dereference_protected(gact->param, 1);
> > + if (p) {
> > + release_entry_list(&p->entries);
> > + kfree(p);
> > + rcu_assign_pointer(gact->param, NULL);
> > + }
> > + }
> > tcf_idr_release(*a, bind);
> 
> 
> Also, the AI review [2] pointed out a real issue.
> It's easy to reproduce by running something like:
> 
> tc action add action gate base-time 200000000000ns \
> sched-entry close 0ns index 10
> 
> I think overall you have the right idea - RCU seems like a good fit here.
> The issue is that this patch is confusing because it seems like you are
> trying to fix the bug and perform cleanups at the same time.
> If that is the case, can you try breaking this patch into two? Do one to
> fix the bug (introducing RCU and etc) and another for the cleanups.
> 
> [1]
> https://www.kernel.org/doc/html/v6.3/process/maintainer-netdev.html#local-variable-ordering-reverse-xmas-tree-rcs
> [2]
> https://netdev-ai.bots.linux.dev/ai-review.html?id=cdc17d0d-fd59-41a8-9c8d-1a42699167fd#patch-0
> 
> cheers,
> Victor

Re: [PATCH 1/2] net/sched: act_gate: fix schedule updates with RCU swap

Posted by Victor Nogueira 2 weeks, 4 days ago

On Tue, Jan 20, 2026 at 7:47 PM Paul Moses <p@1g4.org> wrote:
> [...]
> I’m trying to strike the right balance of input validation for stable. I kept this as one patch because the validate and build before publish path is part of making the RCU update model correct. I did try implementing the additional validation independently first, but it did not meaningfully reduce the size or complexity because the RCU swap still requires a fully formed schedule object before publish. Without preliminary validation, it is easy to corrupt the timer state or end up with undefined schedule behavior. At minimum, I think we need:
>   - interval > 0: prevents zero length entries and immediate refire loops
>   - at least one entry: avoids empty schedule deref and undefined state
>   - cycle time > 0: required for division and close time computations
>   - overflow checks: prevent wrap or underflow in close time arithmetic
>
> I can do the conversion without these checks, but the resulting behavior would be fragile. If there is guidance on what validation level you would prefer here (minimal representable range checks vs stricter sane inputs), I am happy to align with it.

I'd say, for starters, try to keep as much of the original code as possible
whilst introducting RCU. If you think it's still not "ready", you can post
as RFC. Then we can add more checks (if necessary) as we go.
I believe this approach will result in cleaner, more maintainable code.

cheers,
Victor

Re: [PATCH 1/2] net/sched: act_gate: fix schedule updates with RCU swap

Posted by kernel test robot 2 weeks, 5 days ago

Hi Paul,

kernel test robot noticed the following build warnings:

[auto build test WARNING on net/main]

url:    https://github.com/intel-lab-lkp/linux/commits/Paul-Moses/net-sched-act_gate-fix-schedule-updates-with-RCU-swap/20260120-085120
base:   net/main
patch link:    https://lore.kernel.org/r/20260120004720.1886632-2-p%401g4.org
patch subject: [PATCH 1/2] net/sched: act_gate: fix schedule updates with RCU swap
config: powerpc-randconfig-001-20260120 (https://download.01.org/0day-ci/archive/20260120/202601201522.IMzUdE7V-lkp@intel.com/config)
compiler: clang version 19.1.7 (https://github.com/llvm/llvm-project cd708029e0b2869e80abe31ddb175f7c35361f90)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260120/202601201522.IMzUdE7V-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202601201522.IMzUdE7V-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> net/sched/act_gate.c:499:24: warning: result of comparison of constant 9223372036854775807 with expression of type 'u32' (aka 'unsigned int') is always false [-Wtautological-constant-out-of-range-compare]
     499 |                         if (entry->interval > (u64)S64_MAX) {
         |                             ~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~
   1 warning generated.


vim +499 net/sched/act_gate.c

   292	
   293	static int tcf_gate_init(struct net *net, struct nlattr *nla,
   294				 struct nlattr *est, struct tc_action **a,
   295				 struct tcf_proto *tp, u32 flags,
   296				 struct netlink_ext_ack *extack)
   297	{
   298		struct tc_action_net *tn = net_generic(net, act_gate_ops.net_id);
   299		struct nlattr *tb[TCA_GATE_MAX + 1];
   300		struct tcf_chain *goto_ch = NULL;
   301		struct tcf_gate_params *p, *oldp;
   302		struct tcf_gate *gact;
   303		struct tc_gate *parm;
   304		struct tcf_gate_params newp = { };
   305		ktime_t start;
   306		u64 cycletime = 0, basetime = 0, cycletime_ext = 0;
   307		enum tk_offsets tk_offset = TK_OFFS_TAI;
   308		s32 clockid = CLOCK_TAI;
   309		u32 gflags = 0;
   310		u32 index;
   311		s32 prio = -1;
   312		bool bind = flags & TCA_ACT_FLAGS_BIND;
   313		bool clockid_set = false;
   314		bool setup_timer = false;
   315		bool update_timer = false;
   316		int ret = 0, err;
   317	
   318		INIT_LIST_HEAD(&newp.entries);
   319	
   320		if (!nla)
   321			return -EINVAL;
   322	
   323		err = nla_parse_nested(tb, TCA_GATE_MAX, nla, gate_policy, extack);
   324		if (err < 0)
   325			return err;
   326	
   327		if (!tb[TCA_GATE_PARMS])
   328			return -EINVAL;
   329	
   330		if (tb[TCA_GATE_CLOCKID]) {
   331			clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]);
   332			clockid_set = true;
   333			switch (clockid) {
   334			case CLOCK_REALTIME:
   335				tk_offset = TK_OFFS_REAL;
   336				break;
   337			case CLOCK_MONOTONIC:
   338				tk_offset = TK_OFFS_MAX;
   339				break;
   340			case CLOCK_BOOTTIME:
   341				tk_offset = TK_OFFS_BOOT;
   342				break;
   343			case CLOCK_TAI:
   344				tk_offset = TK_OFFS_TAI;
   345				break;
   346			default:
   347				NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
   348				return -EINVAL;
   349			}
   350		}
   351	
   352		parm = nla_data(tb[TCA_GATE_PARMS]);
   353		index = parm->index;
   354	
   355		err = tcf_idr_check_alloc(tn, &index, a, bind);
   356		if (err < 0)
   357			return err;
   358	
   359		if (!err) {
   360			ret = tcf_idr_create_from_flags(tn, index, est, a,
   361							&act_gate_ops, bind, flags);
   362			if (ret) {
   363				tcf_idr_cleanup(tn, index);
   364				return ret;
   365			}
   366	
   367			ret = ACT_P_CREATED;
   368			gact = to_gate(*a);
   369		} else {
   370			if (bind)
   371				return ACT_P_BOUND;
   372	
   373			if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
   374				tcf_idr_release(*a, bind);
   375				return -EEXIST;
   376			}
   377			gact = to_gate(*a);
   378		}
   379	
   380		if (ret != ACT_P_CREATED)
   381			oldp = rcu_dereference_protected(gact->param,
   382							 lockdep_is_held(&gact->common.tcfa_lock) ||
   383							 lockdep_is_held(&gact->tcf_lock) ||
   384							 lockdep_rtnl_is_held());
   385		else
   386			oldp = NULL;
   387	
   388		if (tb[TCA_GATE_PRIORITY])
   389			prio = nla_get_s32(tb[TCA_GATE_PRIORITY]);
   390		else if (ret != ACT_P_CREATED)
   391			prio = oldp->tcfg_priority;
   392	
   393		if (tb[TCA_GATE_BASE_TIME]) {
   394			basetime = nla_get_u64(tb[TCA_GATE_BASE_TIME]);
   395			if (basetime > (u64)S64_MAX) {
   396				NL_SET_ERR_MSG(extack, "Base time out of range");
   397				err = -EINVAL;
   398				goto release_idr;
   399			}
   400		} else if (ret != ACT_P_CREATED) {
   401			basetime = oldp->tcfg_basetime;
   402		}
   403	
   404		if (tb[TCA_GATE_FLAGS])
   405			gflags = nla_get_u32(tb[TCA_GATE_FLAGS]);
   406		else if (ret != ACT_P_CREATED)
   407			gflags = oldp->tcfg_flags;
   408	
   409		err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
   410		if (err < 0)
   411			goto release_idr;
   412	
   413		if (!clockid_set) {
   414			if (ret != ACT_P_CREATED)
   415				clockid = oldp->tcfg_clockid;
   416			else
   417				clockid = CLOCK_TAI;
   418			switch (clockid) {
   419			case CLOCK_REALTIME:
   420				tk_offset = TK_OFFS_REAL;
   421				break;
   422			case CLOCK_MONOTONIC:
   423				tk_offset = TK_OFFS_MAX;
   424				break;
   425			case CLOCK_BOOTTIME:
   426				tk_offset = TK_OFFS_BOOT;
   427				break;
   428			case CLOCK_TAI:
   429				tk_offset = TK_OFFS_TAI;
   430				break;
   431			default:
   432				NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
   433				err = -EINVAL;
   434				goto put_chain;
   435			}
   436		}
   437	
   438		if (ret == ACT_P_CREATED)
   439			update_timer = true;
   440		else if (basetime != oldp->tcfg_basetime ||
   441			 tk_offset != gact->tk_offset ||
   442			 clockid != oldp->tcfg_clockid)
   443			update_timer = true;
   444	
   445		if (ret == ACT_P_CREATED)
   446			setup_timer = true;
   447		else if (clockid != oldp->tcfg_clockid)
   448			setup_timer = true;
   449	
   450		if (tb[TCA_GATE_CYCLE_TIME]) {
   451			cycletime = nla_get_u64(tb[TCA_GATE_CYCLE_TIME]);
   452			if (cycletime > (u64)S64_MAX) {
   453				NL_SET_ERR_MSG(extack, "Cycle time out of range");
   454				err = -EINVAL;
   455				goto put_chain;
   456			}
   457		}
   458	
   459		if (tb[TCA_GATE_ENTRY_LIST]) {
   460			err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], &newp, extack);
   461			if (err <= 0) {
   462				if (!err)
   463					NL_SET_ERR_MSG(extack,
   464						       "Missing gate schedule (entry list)");
   465				err = -EINVAL;
   466				goto put_chain;
   467			}
   468			newp.num_entries = err;
   469		} else if (ret == ACT_P_CREATED) {
   470			NL_SET_ERR_MSG(extack, "Missing schedule entry list");
   471			err = -EINVAL;
   472			goto put_chain;
   473		}
   474	
   475		if (tb[TCA_GATE_CYCLE_TIME_EXT])
   476			cycletime_ext = nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]);
   477		else if (ret != ACT_P_CREATED)
   478			cycletime_ext = oldp->tcfg_cycletime_ext;
   479	
   480		if (!cycletime) {
   481			struct tcfg_gate_entry *entry;
   482			struct list_head *entries;
   483			u64 cycle = 0;
   484	
   485			if (!list_empty(&newp.entries))
   486				entries = &newp.entries;
   487			else if (ret != ACT_P_CREATED)
   488				entries = &oldp->entries;
   489			else
   490				entries = NULL;
   491	
   492			if (!entries) {
   493				NL_SET_ERR_MSG(extack, "Invalid cycle time");
   494				err = -EINVAL;
   495				goto release_new_entries;
   496			}
   497	
   498			list_for_each_entry(entry, entries, list) {
 > 499				if (entry->interval > (u64)S64_MAX) {
   500					NL_SET_ERR_MSG(extack,
   501						       "Cycle time out of range");
   502					err = -EINVAL;
   503					goto release_new_entries;
   504				}
   505				if (cycle > (u64)S64_MAX - entry->interval) {
   506					NL_SET_ERR_MSG(extack,
   507						       "Cycle time out of range");
   508					err = -EINVAL;
   509					goto release_new_entries;
   510				}
   511				cycle += entry->interval;
   512			}
   513	
   514			cycletime = cycle;
   515			if (!cycletime) {
   516				NL_SET_ERR_MSG(extack, "Invalid cycle time");
   517				err = -EINVAL;
   518				goto release_new_entries;
   519			}
   520		}
   521	
   522		if (ret != ACT_P_CREATED &&
   523		    (tb[TCA_GATE_ENTRY_LIST] || tb[TCA_GATE_CYCLE_TIME] ||
   524		     cycletime != oldp->tcfg_cycletime))
   525			update_timer = true;
   526	
   527		p = kzalloc(sizeof(*p), GFP_KERNEL);
   528		if (!p) {
   529			err = -ENOMEM;
   530			goto release_new_entries;
   531		}
   532	
   533		INIT_LIST_HEAD(&p->entries);
   534		p->tcfg_priority = prio;
   535		p->tcfg_basetime = basetime;
   536		p->tcfg_cycletime = cycletime;
   537		p->tcfg_cycletime_ext = cycletime_ext;
   538		p->tcfg_flags = gflags;
   539		p->tcfg_clockid = clockid;
   540	
   541		if (!list_empty(&newp.entries)) {
   542			list_splice_init(&newp.entries, &p->entries);
   543			p->num_entries = newp.num_entries;
   544		} else if (ret != ACT_P_CREATED) {
   545			struct tcfg_gate_entry *entry, *ne;
   546	
   547			list_for_each_entry(entry, &oldp->entries, list) {
   548				ne = kmemdup(entry, sizeof(*ne), GFP_KERNEL);
   549				if (!ne) {
   550					err = -ENOMEM;
   551					goto free_p;
   552				}
   553				INIT_LIST_HEAD(&ne->list);
   554				list_add_tail(&ne->list, &p->entries);
   555			}
   556			p->num_entries = oldp->num_entries;
   557		}
   558	
   559		if (update_timer && ret != ACT_P_CREATED)
   560			hrtimer_cancel(&gact->hitimer);
   561	
   562		spin_lock_bh(&gact->tcf_lock);
   563		if (setup_timer)
   564			gate_setup_timer(gact, tk_offset, clockid);
   565	
   566		gate_get_start_time(gact, p, &start);
   567		gact->current_close_time = start;
   568		gact->next_entry = list_first_entry(&p->entries,
   569						    struct tcfg_gate_entry, list);
   570		gact->current_entry_octets = 0;
   571		gact->current_gate_status = GATE_ACT_GATE_OPEN | GATE_ACT_PENDING;
   572	
   573		goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
   574	
   575		gate_start_timer(gact, start);
   576	
   577		oldp = rcu_replace_pointer(gact->param, p,
   578					   lockdep_is_held(&gact->tcf_lock));
   579	
   580		spin_unlock_bh(&gact->tcf_lock);
   581	
   582		if (oldp)
   583			call_rcu(&oldp->rcu, tcf_gate_params_release);
   584	
   585		if (goto_ch)
   586			tcf_chain_put_by_act(goto_ch);
   587	
   588		return ret;
   589	
   590	free_p:
   591		release_entry_list(&p->entries);
   592		kfree(p);
   593	release_new_entries:
   594		release_entry_list(&newp.entries);
   595	put_chain:
   596		if (goto_ch)
   597			tcf_chain_put_by_act(goto_ch);
   598	release_idr:
   599		if (ret == ACT_P_CREATED) {
   600			p = rcu_dereference_protected(gact->param, 1);
   601			if (p) {
   602				release_entry_list(&p->entries);
   603				kfree(p);
   604				rcu_assign_pointer(gact->param, NULL);
   605			}
   606		}
   607		tcf_idr_release(*a, bind);
   608		return err;
   609	}
   610	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

[PATCH 1/2] net/sched: act_gate: fix schedule updates with RCU swap
[PATCH 2/2] net/sched: act_gate: zero-initialize netlink dump struct