[PATCH mptcp-next v2 8/8] mptcp: reuse sending nlmsg code in dump_addr

Geliang Tang posted 8 patches 1 year, 1 month ago
Only 1 patches received!
There is a newer version of this series
[PATCH mptcp-next v2 8/8] mptcp: reuse sending nlmsg code in dump_addr
Posted by Geliang Tang 1 year, 1 month ago
From: Geliang Tang <tanggeliang@kylinos.cn>

With the help of get_addr(), we can modify dump_addr() interfaces
to reuse send_nlmsg code between the netlink PM and userspace PM.

The current dump_addr() flow looks like this:

	lock();
	for_each_entry(entry)
		send_nlmsg(entry);
	unlock();

After holding the lock, get every entry by walking the address list,
send each one looply, and finally release the lock.

This set changes the process by copying the address list to an id
bitmap while holding the lock, then release the lock immediately.
After that, without locking, walking the copied id bitmap to get
every copy of entry by using get_addr(), and send each one looply:

	lock();
	for_each_entry(entry)
		set_bit(bitmap);
	unlock();

	for_each_bit(bitmap) {
		copy = get_addr();
		send_nlmsg(copy);
	}

With this, we can reuse the send_nlmsg() code in dump_addr() interfaces
between the netlink PM and userspace PM. They only need to implement
their own dump_addr() interfaces to hold the different locks, copy the
different address lists to an id bitmap, then release the locks.

Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
 net/mptcp/pm.c           | 43 ++++++++++++++++++++++++++++++++++++----
 net/mptcp/pm_netlink.c   | 35 +++-----------------------------
 net/mptcp/pm_userspace.c | 41 ++++++++++++++------------------------
 net/mptcp/protocol.h     |  6 ++----
 4 files changed, 59 insertions(+), 66 deletions(-)

diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 0aaf16319c34..22c0ca77ca0d 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -485,20 +485,55 @@ int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info)
 	return ret;
 }
 
-static int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb,
+static int mptcp_pm_dump_addr(mptcp_pm_addr_id_bitmap_t *bitmap,
 			      const struct genl_info *info)
 {
 	if (info->attrs[MPTCP_PM_ATTR_TOKEN])
-		return mptcp_userspace_pm_dump_addr(msg, cb, info);
-	return mptcp_pm_nl_dump_addr(msg, cb, info);
+		return mptcp_userspace_pm_dump_addr(bitmap, info);
+	return mptcp_pm_nl_dump_addr(bitmap, info);
 }
 
 int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg,
 				struct netlink_callback *cb)
 {
 	const struct genl_info *info = genl_info_dump(cb);
+	mptcp_pm_addr_id_bitmap_t *bitmap;
+	struct mptcp_pm_addr_entry entry;
+	int id = cb->args[0];
+	void *hdr;
+	int i;
+
+	bitmap = (mptcp_pm_addr_id_bitmap_t *)cb->ctx;
+
+	mptcp_pm_dump_addr(bitmap, info);
+
+	for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) {
+		if (test_bit(i, bitmap->map)) {
+			if (mptcp_pm_get_addr(i, &entry, info))
+				break;
+
+			if (id && entry.addr.id <= id)
+				continue;
+
+			hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid,
+					  cb->nlh->nlmsg_seq,
+					  &mptcp_genl_family, NLM_F_MULTI,
+					  MPTCP_PM_CMD_GET_ADDR);
+			if (!hdr)
+				break;
+
+			if (mptcp_nl_fill_addr(msg, &entry) < 0) {
+				genlmsg_cancel(msg, hdr);
+				break;
+			}
+
+			id = entry.addr.id;
+			genlmsg_end(msg, hdr);
+		}
+	}
 
-	return mptcp_pm_dump_addr(msg, cb, info);
+	cb->args[0] = id;
+	return msg->len;
 }
 
 static int mptcp_pm_set_flags(struct sk_buff *skb, struct genl_info *info)
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 0d826bfc4718..831c440d6cc5 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -1783,48 +1783,19 @@ int mptcp_pm_nl_get_addr(u8 id, struct mptcp_pm_addr_entry *addr,
 	return ret;
 }
 
-int mptcp_pm_nl_dump_addr(struct sk_buff *msg,
-			  struct netlink_callback *cb,
+int mptcp_pm_nl_dump_addr(mptcp_pm_addr_id_bitmap_t *bitmap,
 			  const struct genl_info *info)
 {
 	struct net *net = genl_info_net(info);
-	struct mptcp_pm_addr_entry *entry;
 	struct pm_nl_pernet *pernet;
-	int id = cb->args[0];
-	void *hdr;
-	int i;
 
 	pernet = pm_nl_get_pernet(net);
 
 	rcu_read_lock();
-	for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) {
-		if (test_bit(i, pernet->id_bitmap)) {
-			entry = __lookup_addr_by_id(pernet, i);
-			if (!entry)
-				break;
-
-			if (entry->addr.id <= id)
-				continue;
-
-			hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid,
-					  cb->nlh->nlmsg_seq, &mptcp_genl_family,
-					  NLM_F_MULTI, MPTCP_PM_CMD_GET_ADDR);
-			if (!hdr)
-				break;
-
-			if (mptcp_nl_fill_addr(msg, entry) < 0) {
-				genlmsg_cancel(msg, hdr);
-				break;
-			}
-
-			id = entry->addr.id;
-			genlmsg_end(msg, hdr);
-		}
-	}
+	bitmap_copy(bitmap->map, pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
 	rcu_read_unlock();
 
-	cb->args[0] = id;
-	return msg->len;
+	return 0;
 }
 
 static int parse_limit(struct genl_info *info, int id, unsigned int *limit)
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index 0d9bea3a04a2..0db477b703a5 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -614,18 +614,25 @@ int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info)
 	return ret;
 }
 
-int mptcp_userspace_pm_dump_addr(struct sk_buff *msg,
-				 struct netlink_callback *cb,
-				 const struct genl_info *info)
+static int mptcp_userspace_pm_reset_bitmap(struct mptcp_sock *msk,
+					   mptcp_pm_addr_id_bitmap_t *bitmap)
 {
-	mptcp_pm_addr_id_bitmap_t *bitmap;
 	struct mptcp_pm_addr_entry *entry;
+
+	bitmap_zero(bitmap->map, MPTCP_PM_MAX_ADDR_ID + 1);
+
+	mptcp_for_each_userspace_pm_addr(msk, entry)
+		__set_bit(entry->addr.id, bitmap->map);
+
+	return 0;
+}
+
+int mptcp_userspace_pm_dump_addr(mptcp_pm_addr_id_bitmap_t *bitmap,
+				 const struct genl_info *info)
+{
 	struct mptcp_sock *msk;
 	int ret = -EINVAL;
 	struct sock *sk;
-	void *hdr;
-
-	bitmap = (mptcp_pm_addr_id_bitmap_t *)cb->ctx;
 
 	msk = mptcp_userspace_pm_get_sock(info);
 	if (!msk)
@@ -635,27 +642,9 @@ int mptcp_userspace_pm_dump_addr(struct sk_buff *msg,
 
 	lock_sock(sk);
 	spin_lock_bh(&msk->pm.lock);
-	mptcp_for_each_userspace_pm_addr(msk, entry) {
-		if (test_bit(entry->addr.id, bitmap->map))
-			continue;
-
-		hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid,
-				  cb->nlh->nlmsg_seq, &mptcp_genl_family,
-				  NLM_F_MULTI, MPTCP_PM_CMD_GET_ADDR);
-		if (!hdr)
-			break;
-
-		if (mptcp_nl_fill_addr(msg, entry) < 0) {
-			genlmsg_cancel(msg, hdr);
-			break;
-		}
-
-		__set_bit(entry->addr.id, bitmap->map);
-		genlmsg_end(msg, hdr);
-	}
+	ret = mptcp_userspace_pm_reset_bitmap(msk, bitmap);
 	spin_unlock_bh(&msk->pm.lock);
 	release_sock(sk);
-	ret = msg->len;
 
 	sock_put(sk);
 	return ret;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index da2cf524c5da..ed629320ba56 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -1124,11 +1124,9 @@ int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_in
 bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc);
 bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
 bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
-int mptcp_pm_nl_dump_addr(struct sk_buff *msg,
-			  struct netlink_callback *cb,
+int mptcp_pm_nl_dump_addr(mptcp_pm_addr_id_bitmap_t *bitmap,
 			  const struct genl_info *info);
-int mptcp_userspace_pm_dump_addr(struct sk_buff *msg,
-				 struct netlink_callback *cb,
+int mptcp_userspace_pm_dump_addr(mptcp_pm_addr_id_bitmap_t *bitmap,
 				 const struct genl_info *info);
 int mptcp_pm_nl_get_addr(u8 id, struct mptcp_pm_addr_entry *addr,
 			 const struct genl_info *info);
-- 
2.45.2
Re: [PATCH mptcp-next v2 8/8] mptcp: reuse sending nlmsg code in dump_addr
Posted by Matthieu Baerts 1 year, 1 month ago
Hi Geliang,

It looks like the code below only works if the dump can be done in one call.

I spent quite a bit of time looking at it, and at the end, I would like
to say it doesn't look like this modification is worth it: it increases
the complexity just to save a bit of duplicated code (which is used
differently). WDYT? Or do you really need this for the BPF PM?

Here below, you can find my review, but in short, I don't think we
should try to unify the code here. WDYT?

On 13/12/2024 08:35, Geliang Tang wrote:
> From: Geliang Tang <tanggeliang@kylinos.cn>
> 
> With the help of get_addr(), we can modify dump_addr() interfaces
> to reuse send_nlmsg code between the netlink PM and userspace PM.
> 
> The current dump_addr() flow looks like this:
> 
> 	lock();
> 	for_each_entry(entry)
> 		send_nlmsg(entry);
> 	unlock();
> 
> After holding the lock, get every entry by walking the address list,
> send each one looply, and finally release the lock.
> 
> This set changes the process by copying the address list to an id
> bitmap while holding the lock, then release the lock immediately.
> After that, without locking, walking the copied id bitmap to get
> every copy of entry by using get_addr(), and send each one looply:
> 
> 	lock();
> 	for_each_entry(entry)
> 		set_bit(bitmap);
> 	unlock();
> 
> 	for_each_bit(bitmap) {
> 		copy = get_addr();

What you are hiding here is that there will be a lock again here for
each address: so we will lock and unlock once before, and once per
address... Maybe we don't care about that here, but is it really worth it?

> 		send_nlmsg(copy);
> 	}
> 
> With this, we can reuse the send_nlmsg() code in dump_addr() interfaces
> between the netlink PM and userspace PM. They only need to implement
> their own dump_addr() interfaces to hold the different locks, copy the
> different address lists to an id bitmap, then release the locks.
> 
> Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> ---
>  net/mptcp/pm.c           | 43 ++++++++++++++++++++++++++++++++++++----
>  net/mptcp/pm_netlink.c   | 35 +++-----------------------------
>  net/mptcp/pm_userspace.c | 41 ++++++++++++++------------------------
>  net/mptcp/protocol.h     |  6 ++----
>  4 files changed, 59 insertions(+), 66 deletions(-)
> 
> diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
> index 0aaf16319c34..22c0ca77ca0d 100644
> --- a/net/mptcp/pm.c
> +++ b/net/mptcp/pm.c
> @@ -485,20 +485,55 @@ int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info)
>  	return ret;
>  }
>  
> -static int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb,
> +static int mptcp_pm_dump_addr(mptcp_pm_addr_id_bitmap_t *bitmap,
>  			      const struct genl_info *info)
>  {
>  	if (info->attrs[MPTCP_PM_ATTR_TOKEN])
> -		return mptcp_userspace_pm_dump_addr(msg, cb, info);
> -	return mptcp_pm_nl_dump_addr(msg, cb, info);
> +		return mptcp_userspace_pm_dump_addr(bitmap, info);
> +	return mptcp_pm_nl_dump_addr(bitmap, info);
>  }
>  
>  int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg,
>  				struct netlink_callback *cb)
>  {
>  	const struct genl_info *info = genl_info_dump(cb);
> +	mptcp_pm_addr_id_bitmap_t *bitmap;
> +	struct mptcp_pm_addr_entry entry;
> +	int id = cb->args[0];
> +	void *hdr;
> +	int i;
> +
> +	bitmap = (mptcp_pm_addr_id_bitmap_t *)cb->ctx;

Mmh, that feels wrong: you are using cb->ctx and cb->args at the same
part, but they are part of an union. This will likely only work the
first time, not if dumpit() is called in multiple chunks, no?

For it to work, I guess you should only copy the bitmap the first time
(not sure what you can look at, reserve some bits in cb->ctx for that?),
then use __test_and_clear_bit() instead of test_bit(). But again, not
sure if it is really worth it, that makes things a bit more complex.


BTW, not related to ↑, but it might be safer to add a build check to
make sure sizeof(cb->ctx) >= sizeof(mptcp_pm_addr_id_bitmap_t), just in
case we change it later.

> +
> +	mptcp_pm_dump_addr(bitmap, info);

Should you not only fill the bitmap the first time? I'm not sure if I
fully understand how it is used, but from what I see, the dump can be
done in multiple calls, hence the use of cb. Here this fill the bitmap
without taking into account what was done before.

> +
> +	for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) {
> +		if (test_bit(i, bitmap->map)) {
> +			if (mptcp_pm_get_addr(i, &entry, info))
> +				break;

I don't think you should "break" here, but "continue" instead:
potentially between the two calls, the address at this ID might have
been removed.

> +
> +			if (id && entry.addr.id <= id)

Why is the condition different from before in pm_netlink.c? Why do you
need to explicitly skip id 0?

If I'm not mistaken, with the in-kernel PM, we cannot have ID 0 here,
but we can with the userspace PM, no?

> +				continue;
> +
> +			hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid,
> +					  cb->nlh->nlmsg_seq,
> +					  &mptcp_genl_family, NLM_F_MULTI,
> +					  MPTCP_PM_CMD_GET_ADDR);
> +			if (!hdr)
> +				break;
> +
> +			if (mptcp_nl_fill_addr(msg, &entry) < 0) {
> +				genlmsg_cancel(msg, hdr);
> +				break;
> +			}
> +
> +			id = entry.addr.id;
> +			genlmsg_end(msg, hdr);
> +		}
> +	}
>  
> -	return mptcp_pm_dump_addr(msg, cb, info);
> +	cb->args[0] = id;
> +	return msg->len;
>  }
>  
>  static int mptcp_pm_set_flags(struct sk_buff *skb, struct genl_info *info)
> diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
> index 0d826bfc4718..831c440d6cc5 100644
> --- a/net/mptcp/pm_netlink.c
> +++ b/net/mptcp/pm_netlink.c
> @@ -1783,48 +1783,19 @@ int mptcp_pm_nl_get_addr(u8 id, struct mptcp_pm_addr_entry *addr,
>  	return ret;
>  }
>  
> -int mptcp_pm_nl_dump_addr(struct sk_buff *msg,
> -			  struct netlink_callback *cb,
> +int mptcp_pm_nl_dump_addr(mptcp_pm_addr_id_bitmap_t *bitmap,
>  			  const struct genl_info *info)
>  {
>  	struct net *net = genl_info_net(info);
> -	struct mptcp_pm_addr_entry *entry;
>  	struct pm_nl_pernet *pernet;
> -	int id = cb->args[0];
> -	void *hdr;
> -	int i;
>  
>  	pernet = pm_nl_get_pernet(net);
>  
>  	rcu_read_lock();
> -	for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) {
> -		if (test_bit(i, pernet->id_bitmap)) {
> -			entry = __lookup_addr_by_id(pernet, i);
> -			if (!entry)
> -				break;
> -
> -			if (entry->addr.id <= id)
> -				continue;
> -
> -			hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid,
> -					  cb->nlh->nlmsg_seq, &mptcp_genl_family,
> -					  NLM_F_MULTI, MPTCP_PM_CMD_GET_ADDR);
> -			if (!hdr)
> -				break;
> -
> -			if (mptcp_nl_fill_addr(msg, entry) < 0) {
> -				genlmsg_cancel(msg, hdr);
> -				break;
> -			}
> -
> -			id = entry->addr.id;
> -			genlmsg_end(msg, hdr);
> -		}
> -	}
> +	bitmap_copy(bitmap->map, pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
>  	rcu_read_unlock();

"pernet->id_bitmap" is not protected by RCU, only the endpoints are
(linked to __lookup_addr_by_id()).

In other words, rcu_read_(un)lock() are no longer needed here.

It also means that potentially, we are changing the behaviour of the
dump when endpoints are being modified but:

— MPTCP netlink commands are processed one by one, so this should not
happen if I'm not mistaken.

— Even if the endpoints were being modified during a dump (maybe it can
if the endpoints cannot all fit in the buffer?), that's OK if it reports
the previous version of just updated info.

>  
> -	cb->args[0] = id;
> -	return msg->len;
> +	return 0;
>  }

This helper no longer dump addresses any more, it only copies the
bitmap. Should it be eventually renamed?

>  static int parse_limit(struct genl_info *info, int id, unsigned int *limit)
> diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
> index 0d9bea3a04a2..0db477b703a5 100644
> --- a/net/mptcp/pm_userspace.c
> +++ b/net/mptcp/pm_userspace.c
> @@ -614,18 +614,25 @@ int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info)
>  	return ret;
>  }
>  
> -int mptcp_userspace_pm_dump_addr(struct sk_buff *msg,
> -				 struct netlink_callback *cb,
> -				 const struct genl_info *info)
> +static int mptcp_userspace_pm_reset_bitmap(struct mptcp_sock *msk,
> +					   mptcp_pm_addr_id_bitmap_t *bitmap)
>  {
> -	mptcp_pm_addr_id_bitmap_t *bitmap;
>  	struct mptcp_pm_addr_entry *entry;
> +
> +	bitmap_zero(bitmap->map, MPTCP_PM_MAX_ADDR_ID + 1);
> +
> +	mptcp_for_each_userspace_pm_addr(msk, entry)
> +		__set_bit(entry->addr.id, bitmap->map);
> +
> +	return 0;
> +}
> +
> +int mptcp_userspace_pm_dump_addr(mptcp_pm_addr_id_bitmap_t *bitmap,
> +				 const struct genl_info *info)
> +{
>  	struct mptcp_sock *msk;
>  	int ret = -EINVAL;
>  	struct sock *sk;
> -	void *hdr;
> -
> -	bitmap = (mptcp_pm_addr_id_bitmap_t *)cb->ctx;
>  
>  	msk = mptcp_userspace_pm_get_sock(info);
>  	if (!msk)
> @@ -635,27 +642,9 @@ int mptcp_userspace_pm_dump_addr(struct sk_buff *msg,
>  
>  	lock_sock(sk);
>  	spin_lock_bh(&msk->pm.lock);
> -	mptcp_for_each_userspace_pm_addr(msk, entry) {
> -		if (test_bit(entry->addr.id, bitmap->map))
> -			continue;
> -
> -		hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid,
> -				  cb->nlh->nlmsg_seq, &mptcp_genl_family,
> -				  NLM_F_MULTI, MPTCP_PM_CMD_GET_ADDR);
> -		if (!hdr)
> -			break;
> -
> -		if (mptcp_nl_fill_addr(msg, entry) < 0) {
> -			genlmsg_cancel(msg, hdr);
> -			break;
> -		}
> -
> -		__set_bit(entry->addr.id, bitmap->map);
> -		genlmsg_end(msg, hdr);
> -	}
> +	ret = mptcp_userspace_pm_reset_bitmap(msk, bitmap);

Mmh, I don't understand this. If I'm not mistaken, before, we were using
a bitmap because in case the dump had to be done in multiple parts, we
required a way to mark which IDs had already been dumped.

But here, with your modification, you fill all the bitmap like we would
be able to dump everything in one go: the bitmap is written with all
entries, and reset each time we need to "dump the rest". So at the end,
we no longer remember what was done in the previous parts, and we would
dump the same thing over and over, no?

>  	spin_unlock_bh(&msk->pm.lock);
>  	release_sock(sk);
> -	ret = msg->len;
>  
>  	sock_put(sk);
>  	return ret;
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index da2cf524c5da..ed629320ba56 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -1124,11 +1124,9 @@ int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_in
>  bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc);
>  bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
>  bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
> -int mptcp_pm_nl_dump_addr(struct sk_buff *msg,
> -			  struct netlink_callback *cb,
> +int mptcp_pm_nl_dump_addr(mptcp_pm_addr_id_bitmap_t *bitmap,
>  			  const struct genl_info *info);
> -int mptcp_userspace_pm_dump_addr(struct sk_buff *msg,
> -				 struct netlink_callback *cb,
> +int mptcp_userspace_pm_dump_addr(mptcp_pm_addr_id_bitmap_t *bitmap,
>  				 const struct genl_info *info);
>  int mptcp_pm_nl_get_addr(u8 id, struct mptcp_pm_addr_entry *addr,
>  			 const struct genl_info *info);

Cheers,
Matt
-- 
Sponsored by the NGI0 Core fund.

  • [PATCH mptcp-next v2 8/8] mptcp: reuse sending nlmsg code in dump_addr