[PATCH net-next v2 4/6] af_unix: stash pidfs dentry when needed

Alexander Mikhalitsyn posted 6 patches 3 months, 1 week ago
There is a newer version of this series
[PATCH net-next v2 4/6] af_unix: stash pidfs dentry when needed
Posted by Alexander Mikhalitsyn 3 months, 1 week ago
We need to ensure that pidfs dentry is allocated when we meet any
struct pid for the first time. This will allows us to open pidfd
even after the task it corresponds to is reaped.

Basically, we need to identify all places where we fill skb/scm_cookie
with struct pid reference for the first time and call pidfs_register_pid().

Tricky thing here is that we have a few places where this happends
depending on what userspace is doing:
- [__scm_replace_pid()] explicitly sending an SCM_CREDENTIALS message
                        and specified pid in a numeric format
- [unix_maybe_add_creds()] enabled SO_PASSCRED/SO_PASSPIDFD but
                           didn't send SCM_CREDENTIALS explicitly
- [scm_send()] force_creds is true. Netlink case.

Cc: linux-kernel@vger.kernel.org
Cc: netdev@vger.kernel.org
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Simon Horman <horms@kernel.org>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Kuniyuki Iwashima <kuniyu@google.com>
Cc: Lennart Poettering <mzxreary@0pointer.de>
Cc: Luca Boccassi <bluca@debian.org>
Cc: David Rheinsberg <david@readahead.eu>
Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
---
v2:
	- renamed __skb_set_pid() -> unix_set_pid_to_skb() [ as Kuniyuki suggested ]
	- get rid of extra helper (__scm_set_cred()) I've introduced before [ as Kuniyuki suggested ]
	- s/__inline__/inline/ for functions I touched [ as Kuniyuki suggested ]
	- get rid of chunk in unix_destruct_scm() with NULLifying UNIXCB(skb).pid [ as Kuniyuki suggested ]
	- added proper error handling in scm_send() for scm_set_cred() return value [ found by me during rework ]
---
 include/net/scm.h  | 32 ++++++++++++++++++++++++--------
 net/core/scm.c     |  6 ++++++
 net/unix/af_unix.c | 33 +++++++++++++++++++++++++++++----
 3 files changed, 59 insertions(+), 12 deletions(-)

diff --git a/include/net/scm.h b/include/net/scm.h
index 84c4707e78a5..597a40779269 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -8,6 +8,7 @@
 #include <linux/file.h>
 #include <linux/security.h>
 #include <linux/pid.h>
+#include <linux/pidfs.h>
 #include <linux/nsproxy.h>
 #include <linux/sched/signal.h>
 #include <net/compat.h>
@@ -66,19 +67,28 @@ static __inline__ void unix_get_peersec_dgram(struct socket *sock, struct scm_co
 { }
 #endif /* CONFIG_SECURITY_NETWORK */
 
-static __inline__ void scm_set_cred(struct scm_cookie *scm,
-				    struct pid *pid, kuid_t uid, kgid_t gid)
+static inline int scm_set_cred(struct scm_cookie *scm,
+			       struct pid *pid, bool pidfs_register,
+			       kuid_t uid, kgid_t gid)
 {
-	scm->pid  = get_pid(pid);
+	if (pidfs_register) {
+		int err = pidfs_register_pid(pid);
+		if (err)
+			return err;
+	}
+
+	scm->pid = get_pid(pid);
+
 	scm->creds.pid = pid_vnr(pid);
 	scm->creds.uid = uid;
 	scm->creds.gid = gid;
+	return 0;
 }
 
 static __inline__ void scm_destroy_cred(struct scm_cookie *scm)
 {
 	put_pid(scm->pid);
-	scm->pid  = NULL;
+	scm->pid = NULL;
 }
 
 static __inline__ void scm_destroy(struct scm_cookie *scm)
@@ -88,14 +98,20 @@ static __inline__ void scm_destroy(struct scm_cookie *scm)
 		__scm_destroy(scm);
 }
 
-static __inline__ int scm_send(struct socket *sock, struct msghdr *msg,
-			       struct scm_cookie *scm, bool forcecreds)
+static inline int scm_send(struct socket *sock, struct msghdr *msg,
+			   struct scm_cookie *scm, bool forcecreds)
 {
 	memset(scm, 0, sizeof(*scm));
 	scm->creds.uid = INVALID_UID;
 	scm->creds.gid = INVALID_GID;
-	if (forcecreds)
-		scm_set_cred(scm, task_tgid(current), current_uid(), current_gid());
+
+	if (forcecreds) {
+		int err = scm_set_cred(scm, task_tgid(current), true,
+				       current_uid(), current_gid());
+		if (err)
+			return err;
+	}
+
 	unix_get_peersec_dgram(sock, scm);
 	if (msg->msg_controllen <= 0)
 		return 0;
diff --git a/net/core/scm.c b/net/core/scm.c
index 68441c024dd8..50dfec6f8a2b 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -147,9 +147,15 @@ EXPORT_SYMBOL(__scm_destroy);
 
 static inline int __scm_replace_pid(struct scm_cookie *scm, struct pid *pid)
 {
+	int err;
+
 	/* drop all previous references */
 	scm_destroy_cred(scm);
 
+	err = pidfs_register_pid(pid);
+	if (err)
+		return err;
+
 	scm->pid = pid;
 	scm->creds.pid = pid_vnr(pid);
 	return 0;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index df2174d9904d..18c677683ddc 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1924,12 +1924,27 @@ static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
 	scm->fp = scm_fp_dup(UNIXCB(skb).fp);
 }
 
+static int unix_set_pid_to_skb(struct sk_buff *skb, struct pid *pid, bool pidfs_register)
+{
+	if (pidfs_register) {
+		int err;
+
+		err = pidfs_register_pid(pid);
+		if (err)
+			return err;
+	}
+
+	UNIXCB(skb).pid = get_pid(pid);
+	return 0;
+}
+
 static void unix_destruct_scm(struct sk_buff *skb)
 {
 	struct scm_cookie scm;
 
 	memset(&scm, 0, sizeof(scm));
-	scm.pid  = UNIXCB(skb).pid;
+	scm.pid = UNIXCB(skb).pid;
+
 	if (UNIXCB(skb).fp)
 		unix_detach_fds(&scm, skb);
 
@@ -1943,7 +1958,10 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
 {
 	int err = 0;
 
-	UNIXCB(skb).pid = get_pid(scm->pid);
+	err = unix_set_pid_to_skb(skb, scm->pid, false);
+	if (unlikely(err))
+		return err;
+
 	UNIXCB(skb).uid = scm->creds.uid;
 	UNIXCB(skb).gid = scm->creds.gid;
 	UNIXCB(skb).fp = NULL;
@@ -1957,7 +1975,8 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
 
 static void unix_skb_to_scm(struct sk_buff *skb, struct scm_cookie *scm)
 {
-	scm_set_cred(scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
+	/* scm_set_cred() can't fail when pidfs_register == false */
+	scm_set_cred(scm, UNIXCB(skb).pid, false, UNIXCB(skb).uid, UNIXCB(skb).gid);
 	unix_set_secdata(scm, skb);
 }
 
@@ -1971,6 +1990,7 @@ static void unix_skb_to_scm(struct sk_buff *skb, struct scm_cookie *scm)
  * We include credentials if source or destination socket
  * asserted SOCK_PASSCRED.
  *
+ * Context: May sleep.
  * Return: On success zero, on error a negative error code is returned.
  */
 static int unix_maybe_add_creds(struct sk_buff *skb, const struct sock *sk,
@@ -1980,7 +2000,12 @@ static int unix_maybe_add_creds(struct sk_buff *skb, const struct sock *sk,
 		return 0;
 
 	if (unix_may_passcred(sk) || unix_may_passcred(other)) {
-		UNIXCB(skb).pid = get_pid(task_tgid(current));
+		int err;
+
+		err = unix_set_pid_to_skb(skb, task_tgid(current), true);
+		if (unlikely(err))
+			return err;
+
 		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
 	}
 
-- 
2.43.0
Re: [PATCH net-next v2 4/6] af_unix: stash pidfs dentry when needed
Posted by Kuniyuki Iwashima 3 months, 1 week ago
On Tue, Jul 1, 2025 at 1:41 AM Alexander Mikhalitsyn
<aleksandr.mikhalitsyn@canonical.com> wrote:
>
> We need to ensure that pidfs dentry is allocated when we meet any
> struct pid for the first time. This will allows us to open pidfd
> even after the task it corresponds to is reaped.
>
> Basically, we need to identify all places where we fill skb/scm_cookie
> with struct pid reference for the first time and call pidfs_register_pid().
>
> Tricky thing here is that we have a few places where this happends
> depending on what userspace is doing:
> - [__scm_replace_pid()] explicitly sending an SCM_CREDENTIALS message
>                         and specified pid in a numeric format
> - [unix_maybe_add_creds()] enabled SO_PASSCRED/SO_PASSPIDFD but
>                            didn't send SCM_CREDENTIALS explicitly
> - [scm_send()] force_creds is true. Netlink case.
>
> Cc: linux-kernel@vger.kernel.org
> Cc: netdev@vger.kernel.org
> Cc: "David S. Miller" <davem@davemloft.net>
> Cc: Eric Dumazet <edumazet@google.com>
> Cc: Jakub Kicinski <kuba@kernel.org>
> Cc: Paolo Abeni <pabeni@redhat.com>
> Cc: Simon Horman <horms@kernel.org>
> Cc: Leon Romanovsky <leon@kernel.org>
> Cc: Arnd Bergmann <arnd@arndb.de>
> Cc: Christian Brauner <brauner@kernel.org>
> Cc: Kuniyuki Iwashima <kuniyu@google.com>
> Cc: Lennart Poettering <mzxreary@0pointer.de>
> Cc: Luca Boccassi <bluca@debian.org>
> Cc: David Rheinsberg <david@readahead.eu>
> Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
> ---
> v2:
>         - renamed __skb_set_pid() -> unix_set_pid_to_skb() [ as Kuniyuki suggested ]
>         - get rid of extra helper (__scm_set_cred()) I've introduced before [ as Kuniyuki suggested ]
>         - s/__inline__/inline/ for functions I touched [ as Kuniyuki suggested ]
>         - get rid of chunk in unix_destruct_scm() with NULLifying UNIXCB(skb).pid [ as Kuniyuki suggested ]
>         - added proper error handling in scm_send() for scm_set_cred() return value [ found by me during rework ]
> ---
>  include/net/scm.h  | 32 ++++++++++++++++++++++++--------
>  net/core/scm.c     |  6 ++++++
>  net/unix/af_unix.c | 33 +++++++++++++++++++++++++++++----
>  3 files changed, 59 insertions(+), 12 deletions(-)
>
> diff --git a/include/net/scm.h b/include/net/scm.h
> index 84c4707e78a5..597a40779269 100644
> --- a/include/net/scm.h
> +++ b/include/net/scm.h
> @@ -8,6 +8,7 @@
>  #include <linux/file.h>
>  #include <linux/security.h>
>  #include <linux/pid.h>
> +#include <linux/pidfs.h>
>  #include <linux/nsproxy.h>
>  #include <linux/sched/signal.h>
>  #include <net/compat.h>
> @@ -66,19 +67,28 @@ static __inline__ void unix_get_peersec_dgram(struct socket *sock, struct scm_co
>  { }
>  #endif /* CONFIG_SECURITY_NETWORK */
>
> -static __inline__ void scm_set_cred(struct scm_cookie *scm,
> -                                   struct pid *pid, kuid_t uid, kgid_t gid)
> +static inline int scm_set_cred(struct scm_cookie *scm,
> +                              struct pid *pid, bool pidfs_register,
> +                              kuid_t uid, kgid_t gid)
>  {
> -       scm->pid  = get_pid(pid);
> +       if (pidfs_register) {
> +               int err = pidfs_register_pid(pid);
> +               if (err)
> +                       return err;
> +       }
> +
> +       scm->pid = get_pid(pid);
> +
>         scm->creds.pid = pid_vnr(pid);
>         scm->creds.uid = uid;
>         scm->creds.gid = gid;
> +       return 0;
>  }
>
>  static __inline__ void scm_destroy_cred(struct scm_cookie *scm)
>  {
>         put_pid(scm->pid);
> -       scm->pid  = NULL;
> +       scm->pid = NULL;

Could you split these double-space changes to another
patch to make review easier ?


>  }
>
>  static __inline__ void scm_destroy(struct scm_cookie *scm)
> @@ -88,14 +98,20 @@ static __inline__ void scm_destroy(struct scm_cookie *scm)
>                 __scm_destroy(scm);
>  }
>
> -static __inline__ int scm_send(struct socket *sock, struct msghdr *msg,
> -                              struct scm_cookie *scm, bool forcecreds)
> +static inline int scm_send(struct socket *sock, struct msghdr *msg,
> +                          struct scm_cookie *scm, bool forcecreds)
>  {
>         memset(scm, 0, sizeof(*scm));
>         scm->creds.uid = INVALID_UID;
>         scm->creds.gid = INVALID_GID;
> -       if (forcecreds)
> -               scm_set_cred(scm, task_tgid(current), current_uid(), current_gid());
> +
> +       if (forcecreds) {
> +               int err = scm_set_cred(scm, task_tgid(current), true,
> +                                      current_uid(), current_gid());

Do we need to pass true here ?

Given this series affects scm_pidfd_recv(), we don't need to
touch netlink path that is not allowed to call scm_recv_unix() ?

Then, all callers pass false to scm_set_cred() and
pidfs_register_pid() there will be unnecessary.


> +               if (err)
> +                       return err;
> +       }
> +
>         unix_get_peersec_dgram(sock, scm);
>         if (msg->msg_controllen <= 0)
>                 return 0;
> diff --git a/net/core/scm.c b/net/core/scm.c
> index 68441c024dd8..50dfec6f8a2b 100644
> --- a/net/core/scm.c
> +++ b/net/core/scm.c
> @@ -147,9 +147,15 @@ EXPORT_SYMBOL(__scm_destroy);
>
>  static inline int __scm_replace_pid(struct scm_cookie *scm, struct pid *pid)
>  {
> +       int err;
> +
>         /* drop all previous references */
>         scm_destroy_cred(scm);
>
> +       err = pidfs_register_pid(pid);
> +       if (err)
> +               return err;
> +
>         scm->pid = pid;
>         scm->creds.pid = pid_vnr(pid);
>         return 0;
> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> index df2174d9904d..18c677683ddc 100644
> --- a/net/unix/af_unix.c
> +++ b/net/unix/af_unix.c
> @@ -1924,12 +1924,27 @@ static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
>         scm->fp = scm_fp_dup(UNIXCB(skb).fp);
>  }
>
> +static int unix_set_pid_to_skb(struct sk_buff *skb, struct pid *pid, bool pidfs_register)
> +{
> +       if (pidfs_register) {
> +               int err;
> +
> +               err = pidfs_register_pid(pid);
> +               if (err)
> +                       return err;
> +       }
> +
> +       UNIXCB(skb).pid = get_pid(pid);
> +       return 0;
> +}
> +
>  static void unix_destruct_scm(struct sk_buff *skb)
>  {
>         struct scm_cookie scm;
>
>         memset(&scm, 0, sizeof(scm));
> -       scm.pid  = UNIXCB(skb).pid;
> +       scm.pid = UNIXCB(skb).pid;
> +
>         if (UNIXCB(skb).fp)
>                 unix_detach_fds(&scm, skb);
>
> @@ -1943,7 +1958,10 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
>  {
>         int err = 0;
>
> -       UNIXCB(skb).pid = get_pid(scm->pid);
> +       err = unix_set_pid_to_skb(skb, scm->pid, false);
> +       if (unlikely(err))

This does not fail too.

Perhaps keep get_pid() here and move pidfs_register_pid()
to unix_maybe_add_creds(), that will look simpler.


> +               return err;
> +
>         UNIXCB(skb).uid = scm->creds.uid;
>         UNIXCB(skb).gid = scm->creds.gid;
>         UNIXCB(skb).fp = NULL;
> @@ -1957,7 +1975,8 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
>
>  static void unix_skb_to_scm(struct sk_buff *skb, struct scm_cookie *scm)
>  {
> -       scm_set_cred(scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
> +       /* scm_set_cred() can't fail when pidfs_register == false */
> +       scm_set_cred(scm, UNIXCB(skb).pid, false, UNIXCB(skb).uid, UNIXCB(skb).gid);
>         unix_set_secdata(scm, skb);
>  }
>
> @@ -1971,6 +1990,7 @@ static void unix_skb_to_scm(struct sk_buff *skb, struct scm_cookie *scm)
>   * We include credentials if source or destination socket
>   * asserted SOCK_PASSCRED.
>   *
> + * Context: May sleep.
>   * Return: On success zero, on error a negative error code is returned.
>   */
>  static int unix_maybe_add_creds(struct sk_buff *skb, const struct sock *sk,
> @@ -1980,7 +2000,12 @@ static int unix_maybe_add_creds(struct sk_buff *skb, const struct sock *sk,
>                 return 0;
>
>         if (unix_may_passcred(sk) || unix_may_passcred(other)) {

I forgot to mention that this part will conflict with net-next.

I guess Christian will take this series via vfs tree ?


> -               UNIXCB(skb).pid = get_pid(task_tgid(current));
> +               int err;
> +
> +               err = unix_set_pid_to_skb(skb, task_tgid(current), true);
> +               if (unlikely(err))
> +                       return err;
> +
>                 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
>         }
>
> --
> 2.43.0
>
Re: [PATCH net-next v2 4/6] af_unix: stash pidfs dentry when needed
Posted by Aleksandr Mikhalitsyn 3 months ago
On Thu, Jul 3, 2025 at 3:53 AM Kuniyuki Iwashima <kuniyu@google.com> wrote:
>
> On Tue, Jul 1, 2025 at 1:41 AM Alexander Mikhalitsyn
> <aleksandr.mikhalitsyn@canonical.com> wrote:
> >
> > We need to ensure that pidfs dentry is allocated when we meet any
> > struct pid for the first time. This will allows us to open pidfd
> > even after the task it corresponds to is reaped.
> >
> > Basically, we need to identify all places where we fill skb/scm_cookie
> > with struct pid reference for the first time and call pidfs_register_pid().
> >
> > Tricky thing here is that we have a few places where this happends
> > depending on what userspace is doing:
> > - [__scm_replace_pid()] explicitly sending an SCM_CREDENTIALS message
> >                         and specified pid in a numeric format
> > - [unix_maybe_add_creds()] enabled SO_PASSCRED/SO_PASSPIDFD but
> >                            didn't send SCM_CREDENTIALS explicitly
> > - [scm_send()] force_creds is true. Netlink case.
> >
> > Cc: linux-kernel@vger.kernel.org
> > Cc: netdev@vger.kernel.org
> > Cc: "David S. Miller" <davem@davemloft.net>
> > Cc: Eric Dumazet <edumazet@google.com>
> > Cc: Jakub Kicinski <kuba@kernel.org>
> > Cc: Paolo Abeni <pabeni@redhat.com>
> > Cc: Simon Horman <horms@kernel.org>
> > Cc: Leon Romanovsky <leon@kernel.org>
> > Cc: Arnd Bergmann <arnd@arndb.de>
> > Cc: Christian Brauner <brauner@kernel.org>
> > Cc: Kuniyuki Iwashima <kuniyu@google.com>
> > Cc: Lennart Poettering <mzxreary@0pointer.de>
> > Cc: Luca Boccassi <bluca@debian.org>
> > Cc: David Rheinsberg <david@readahead.eu>
> > Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
> > ---
> > v2:
> >         - renamed __skb_set_pid() -> unix_set_pid_to_skb() [ as Kuniyuki suggested ]
> >         - get rid of extra helper (__scm_set_cred()) I've introduced before [ as Kuniyuki suggested ]
> >         - s/__inline__/inline/ for functions I touched [ as Kuniyuki suggested ]
> >         - get rid of chunk in unix_destruct_scm() with NULLifying UNIXCB(skb).pid [ as Kuniyuki suggested ]
> >         - added proper error handling in scm_send() for scm_set_cred() return value [ found by me during rework ]
> > ---
> >  include/net/scm.h  | 32 ++++++++++++++++++++++++--------
> >  net/core/scm.c     |  6 ++++++
> >  net/unix/af_unix.c | 33 +++++++++++++++++++++++++++++----
> >  3 files changed, 59 insertions(+), 12 deletions(-)
> >
> > diff --git a/include/net/scm.h b/include/net/scm.h
> > index 84c4707e78a5..597a40779269 100644
> > --- a/include/net/scm.h
> > +++ b/include/net/scm.h
> > @@ -8,6 +8,7 @@
> >  #include <linux/file.h>
> >  #include <linux/security.h>
> >  #include <linux/pid.h>
> > +#include <linux/pidfs.h>
> >  #include <linux/nsproxy.h>
> >  #include <linux/sched/signal.h>
> >  #include <net/compat.h>
> > @@ -66,19 +67,28 @@ static __inline__ void unix_get_peersec_dgram(struct socket *sock, struct scm_co
> >  { }
> >  #endif /* CONFIG_SECURITY_NETWORK */
> >
> > -static __inline__ void scm_set_cred(struct scm_cookie *scm,
> > -                                   struct pid *pid, kuid_t uid, kgid_t gid)
> > +static inline int scm_set_cred(struct scm_cookie *scm,
> > +                              struct pid *pid, bool pidfs_register,
> > +                              kuid_t uid, kgid_t gid)
> >  {
> > -       scm->pid  = get_pid(pid);
> > +       if (pidfs_register) {
> > +               int err = pidfs_register_pid(pid);
> > +               if (err)
> > +                       return err;
> > +       }
> > +
> > +       scm->pid = get_pid(pid);
> > +
> >         scm->creds.pid = pid_vnr(pid);
> >         scm->creds.uid = uid;
> >         scm->creds.gid = gid;
> > +       return 0;
> >  }
> >
> >  static __inline__ void scm_destroy_cred(struct scm_cookie *scm)
> >  {
> >         put_pid(scm->pid);
> > -       scm->pid  = NULL;
> > +       scm->pid = NULL;
>
> Could you split these double-space changes to another
> patch to make review easier ?

Hi Kuniyuki,

Sure, will do!

>
>
> >  }
> >
> >  static __inline__ void scm_destroy(struct scm_cookie *scm)
> > @@ -88,14 +98,20 @@ static __inline__ void scm_destroy(struct scm_cookie *scm)
> >                 __scm_destroy(scm);
> >  }
> >
> > -static __inline__ int scm_send(struct socket *sock, struct msghdr *msg,
> > -                              struct scm_cookie *scm, bool forcecreds)
> > +static inline int scm_send(struct socket *sock, struct msghdr *msg,
> > +                          struct scm_cookie *scm, bool forcecreds)
> >  {
> >         memset(scm, 0, sizeof(*scm));
> >         scm->creds.uid = INVALID_UID;
> >         scm->creds.gid = INVALID_GID;
> > -       if (forcecreds)
> > -               scm_set_cred(scm, task_tgid(current), current_uid(), current_gid());
> > +
> > +       if (forcecreds) {
> > +               int err = scm_set_cred(scm, task_tgid(current), true,
> > +                                      current_uid(), current_gid());
>
> Do we need to pass true here ?
>
> Given this series affects scm_pidfd_recv(), we don't need to
> touch netlink path that is not allowed to call scm_recv_unix() ?
>
> Then, all callers pass false to scm_set_cred() and
> pidfs_register_pid() there will be unnecessary.

I agree. While it is safe to call pidfd_register_pid() for the netlink
case too (and get pidfs dentry allocated),
it is not really useful. Thanks for noticing this!

>
>
> > +               if (err)
> > +                       return err;
> > +       }
> > +
> >         unix_get_peersec_dgram(sock, scm);
> >         if (msg->msg_controllen <= 0)
> >                 return 0;
> > diff --git a/net/core/scm.c b/net/core/scm.c
> > index 68441c024dd8..50dfec6f8a2b 100644
> > --- a/net/core/scm.c
> > +++ b/net/core/scm.c
> > @@ -147,9 +147,15 @@ EXPORT_SYMBOL(__scm_destroy);
> >
> >  static inline int __scm_replace_pid(struct scm_cookie *scm, struct pid *pid)
> >  {
> > +       int err;
> > +
> >         /* drop all previous references */
> >         scm_destroy_cred(scm);
> >
> > +       err = pidfs_register_pid(pid);
> > +       if (err)
> > +               return err;
> > +
> >         scm->pid = pid;
> >         scm->creds.pid = pid_vnr(pid);
> >         return 0;
> > diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> > index df2174d9904d..18c677683ddc 100644
> > --- a/net/unix/af_unix.c
> > +++ b/net/unix/af_unix.c
> > @@ -1924,12 +1924,27 @@ static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
> >         scm->fp = scm_fp_dup(UNIXCB(skb).fp);
> >  }
> >
> > +static int unix_set_pid_to_skb(struct sk_buff *skb, struct pid *pid, bool pidfs_register)
> > +{
> > +       if (pidfs_register) {
> > +               int err;
> > +
> > +               err = pidfs_register_pid(pid);
> > +               if (err)
> > +                       return err;
> > +       }
> > +
> > +       UNIXCB(skb).pid = get_pid(pid);
> > +       return 0;
> > +}
> > +
> >  static void unix_destruct_scm(struct sk_buff *skb)
> >  {
> >         struct scm_cookie scm;
> >
> >         memset(&scm, 0, sizeof(scm));
> > -       scm.pid  = UNIXCB(skb).pid;
> > +       scm.pid = UNIXCB(skb).pid;
> > +
> >         if (UNIXCB(skb).fp)
> >                 unix_detach_fds(&scm, skb);
> >
> > @@ -1943,7 +1958,10 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
> >  {
> >         int err = 0;
> >
> > -       UNIXCB(skb).pid = get_pid(scm->pid);
> > +       err = unix_set_pid_to_skb(skb, scm->pid, false);
> > +       if (unlikely(err))
>
> This does not fail too.
>
> Perhaps keep get_pid() here and move pidfs_register_pid()
> to unix_maybe_add_creds(), that will look simpler.

You are absolutely right. Thanks for pointing this out!
Actually, this was really useful when pidfs_get_pid()/pidfs_put_pid()
API was a thing [1],
because in unix_set_pid_to_skb() we would call pidfs_get_pid() *or*
pidfs_register_pid().

But now, when lifetime rules for pidfs dentries are changed we don't
have pidfs_get_pid()/pidfs_put_pid() API
and we don't need this unix_set_pid_to_skb() helper anymore. So,
basically it's post-vfs-rebase leftovers.

[1] https://github.com/mihalicyn/linux/commit/6a80e241feeea40e9068922eac0452566deccc61#diff-0553d076c243e06ae312480cb8cb52f1cebe1d80fc099d3842593e12c9e0d4f3R1920-R1930

Kind regards,
Alex

>
>
> > +               return err;
> > +
> >         UNIXCB(skb).uid = scm->creds.uid;
> >         UNIXCB(skb).gid = scm->creds.gid;
> >         UNIXCB(skb).fp = NULL;
> > @@ -1957,7 +1975,8 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
> >
> >  static void unix_skb_to_scm(struct sk_buff *skb, struct scm_cookie *scm)
> >  {
> > -       scm_set_cred(scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
> > +       /* scm_set_cred() can't fail when pidfs_register == false */
> > +       scm_set_cred(scm, UNIXCB(skb).pid, false, UNIXCB(skb).uid, UNIXCB(skb).gid);
> >         unix_set_secdata(scm, skb);
> >  }
> >
> > @@ -1971,6 +1990,7 @@ static void unix_skb_to_scm(struct sk_buff *skb, struct scm_cookie *scm)
> >   * We include credentials if source or destination socket
> >   * asserted SOCK_PASSCRED.
> >   *
> > + * Context: May sleep.
> >   * Return: On success zero, on error a negative error code is returned.
> >   */
> >  static int unix_maybe_add_creds(struct sk_buff *skb, const struct sock *sk,
> > @@ -1980,7 +2000,12 @@ static int unix_maybe_add_creds(struct sk_buff *skb, const struct sock *sk,
> >                 return 0;
> >
> >         if (unix_may_passcred(sk) || unix_may_passcred(other)) {
>
> I forgot to mention that this part will conflict with net-next.
>
> I guess Christian will take this series via vfs tree ?
>
>
> > -               UNIXCB(skb).pid = get_pid(task_tgid(current));
> > +               int err;
> > +
> > +               err = unix_set_pid_to_skb(skb, task_tgid(current), true);
> > +               if (unlikely(err))
> > +                       return err;
> > +
> >                 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
> >         }
> >
> > --
> > 2.43.0
> >
Re: [PATCH net-next v2 4/6] af_unix: stash pidfs dentry when needed
Posted by Christian Brauner 3 months, 1 week ago
On Wed, Jul 02, 2025 at 06:53:21PM -0700, Kuniyuki Iwashima wrote:
> On Tue, Jul 1, 2025 at 1:41 AM Alexander Mikhalitsyn
> <aleksandr.mikhalitsyn@canonical.com> wrote:
> >
> > We need to ensure that pidfs dentry is allocated when we meet any
> > struct pid for the first time. This will allows us to open pidfd
> > even after the task it corresponds to is reaped.
> >
> > Basically, we need to identify all places where we fill skb/scm_cookie
> > with struct pid reference for the first time and call pidfs_register_pid().
> >
> > Tricky thing here is that we have a few places where this happends
> > depending on what userspace is doing:
> > - [__scm_replace_pid()] explicitly sending an SCM_CREDENTIALS message
> >                         and specified pid in a numeric format
> > - [unix_maybe_add_creds()] enabled SO_PASSCRED/SO_PASSPIDFD but
> >                            didn't send SCM_CREDENTIALS explicitly
> > - [scm_send()] force_creds is true. Netlink case.
> >
> > Cc: linux-kernel@vger.kernel.org
> > Cc: netdev@vger.kernel.org
> > Cc: "David S. Miller" <davem@davemloft.net>
> > Cc: Eric Dumazet <edumazet@google.com>
> > Cc: Jakub Kicinski <kuba@kernel.org>
> > Cc: Paolo Abeni <pabeni@redhat.com>
> > Cc: Simon Horman <horms@kernel.org>
> > Cc: Leon Romanovsky <leon@kernel.org>
> > Cc: Arnd Bergmann <arnd@arndb.de>
> > Cc: Christian Brauner <brauner@kernel.org>
> > Cc: Kuniyuki Iwashima <kuniyu@google.com>
> > Cc: Lennart Poettering <mzxreary@0pointer.de>
> > Cc: Luca Boccassi <bluca@debian.org>
> > Cc: David Rheinsberg <david@readahead.eu>
> > Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
> > ---
> > v2:
> >         - renamed __skb_set_pid() -> unix_set_pid_to_skb() [ as Kuniyuki suggested ]
> >         - get rid of extra helper (__scm_set_cred()) I've introduced before [ as Kuniyuki suggested ]
> >         - s/__inline__/inline/ for functions I touched [ as Kuniyuki suggested ]
> >         - get rid of chunk in unix_destruct_scm() with NULLifying UNIXCB(skb).pid [ as Kuniyuki suggested ]
> >         - added proper error handling in scm_send() for scm_set_cred() return value [ found by me during rework ]
> > ---
> >  include/net/scm.h  | 32 ++++++++++++++++++++++++--------
> >  net/core/scm.c     |  6 ++++++
> >  net/unix/af_unix.c | 33 +++++++++++++++++++++++++++++----
> >  3 files changed, 59 insertions(+), 12 deletions(-)
> >
> > diff --git a/include/net/scm.h b/include/net/scm.h
> > index 84c4707e78a5..597a40779269 100644
> > --- a/include/net/scm.h
> > +++ b/include/net/scm.h
> > @@ -8,6 +8,7 @@
> >  #include <linux/file.h>
> >  #include <linux/security.h>
> >  #include <linux/pid.h>
> > +#include <linux/pidfs.h>
> >  #include <linux/nsproxy.h>
> >  #include <linux/sched/signal.h>
> >  #include <net/compat.h>
> > @@ -66,19 +67,28 @@ static __inline__ void unix_get_peersec_dgram(struct socket *sock, struct scm_co
> >  { }
> >  #endif /* CONFIG_SECURITY_NETWORK */
> >
> > -static __inline__ void scm_set_cred(struct scm_cookie *scm,
> > -                                   struct pid *pid, kuid_t uid, kgid_t gid)
> > +static inline int scm_set_cred(struct scm_cookie *scm,
> > +                              struct pid *pid, bool pidfs_register,
> > +                              kuid_t uid, kgid_t gid)
> >  {
> > -       scm->pid  = get_pid(pid);
> > +       if (pidfs_register) {
> > +               int err = pidfs_register_pid(pid);
> > +               if (err)
> > +                       return err;
> > +       }
> > +
> > +       scm->pid = get_pid(pid);
> > +
> >         scm->creds.pid = pid_vnr(pid);
> >         scm->creds.uid = uid;
> >         scm->creds.gid = gid;
> > +       return 0;
> >  }
> >
> >  static __inline__ void scm_destroy_cred(struct scm_cookie *scm)
> >  {
> >         put_pid(scm->pid);
> > -       scm->pid  = NULL;
> > +       scm->pid = NULL;
> 
> Could you split these double-space changes to another
> patch to make review easier ?
> 
> 
> >  }
> >
> >  static __inline__ void scm_destroy(struct scm_cookie *scm)
> > @@ -88,14 +98,20 @@ static __inline__ void scm_destroy(struct scm_cookie *scm)
> >                 __scm_destroy(scm);
> >  }
> >
> > -static __inline__ int scm_send(struct socket *sock, struct msghdr *msg,
> > -                              struct scm_cookie *scm, bool forcecreds)
> > +static inline int scm_send(struct socket *sock, struct msghdr *msg,
> > +                          struct scm_cookie *scm, bool forcecreds)
> >  {
> >         memset(scm, 0, sizeof(*scm));
> >         scm->creds.uid = INVALID_UID;
> >         scm->creds.gid = INVALID_GID;
> > -       if (forcecreds)
> > -               scm_set_cred(scm, task_tgid(current), current_uid(), current_gid());
> > +
> > +       if (forcecreds) {
> > +               int err = scm_set_cred(scm, task_tgid(current), true,
> > +                                      current_uid(), current_gid());
> 
> Do we need to pass true here ?
> 
> Given this series affects scm_pidfd_recv(), we don't need to
> touch netlink path that is not allowed to call scm_recv_unix() ?
> 
> Then, all callers pass false to scm_set_cred() and
> pidfs_register_pid() there will be unnecessary.
> 
> 
> > +               if (err)
> > +                       return err;
> > +       }
> > +
> >         unix_get_peersec_dgram(sock, scm);
> >         if (msg->msg_controllen <= 0)
> >                 return 0;
> > diff --git a/net/core/scm.c b/net/core/scm.c
> > index 68441c024dd8..50dfec6f8a2b 100644
> > --- a/net/core/scm.c
> > +++ b/net/core/scm.c
> > @@ -147,9 +147,15 @@ EXPORT_SYMBOL(__scm_destroy);
> >
> >  static inline int __scm_replace_pid(struct scm_cookie *scm, struct pid *pid)
> >  {
> > +       int err;
> > +
> >         /* drop all previous references */
> >         scm_destroy_cred(scm);
> >
> > +       err = pidfs_register_pid(pid);
> > +       if (err)
> > +               return err;
> > +
> >         scm->pid = pid;
> >         scm->creds.pid = pid_vnr(pid);
> >         return 0;
> > diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> > index df2174d9904d..18c677683ddc 100644
> > --- a/net/unix/af_unix.c
> > +++ b/net/unix/af_unix.c
> > @@ -1924,12 +1924,27 @@ static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
> >         scm->fp = scm_fp_dup(UNIXCB(skb).fp);
> >  }
> >
> > +static int unix_set_pid_to_skb(struct sk_buff *skb, struct pid *pid, bool pidfs_register)
> > +{
> > +       if (pidfs_register) {
> > +               int err;
> > +
> > +               err = pidfs_register_pid(pid);
> > +               if (err)
> > +                       return err;
> > +       }
> > +
> > +       UNIXCB(skb).pid = get_pid(pid);
> > +       return 0;
> > +}
> > +
> >  static void unix_destruct_scm(struct sk_buff *skb)
> >  {
> >         struct scm_cookie scm;
> >
> >         memset(&scm, 0, sizeof(scm));
> > -       scm.pid  = UNIXCB(skb).pid;
> > +       scm.pid = UNIXCB(skb).pid;
> > +
> >         if (UNIXCB(skb).fp)
> >                 unix_detach_fds(&scm, skb);
> >
> > @@ -1943,7 +1958,10 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
> >  {
> >         int err = 0;
> >
> > -       UNIXCB(skb).pid = get_pid(scm->pid);
> > +       err = unix_set_pid_to_skb(skb, scm->pid, false);
> > +       if (unlikely(err))
> 
> This does not fail too.
> 
> Perhaps keep get_pid() here and move pidfs_register_pid()
> to unix_maybe_add_creds(), that will look simpler.
> 
> 
> > +               return err;
> > +
> >         UNIXCB(skb).uid = scm->creds.uid;
> >         UNIXCB(skb).gid = scm->creds.gid;
> >         UNIXCB(skb).fp = NULL;
> > @@ -1957,7 +1975,8 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
> >
> >  static void unix_skb_to_scm(struct sk_buff *skb, struct scm_cookie *scm)
> >  {
> > -       scm_set_cred(scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
> > +       /* scm_set_cred() can't fail when pidfs_register == false */
> > +       scm_set_cred(scm, UNIXCB(skb).pid, false, UNIXCB(skb).uid, UNIXCB(skb).gid);
> >         unix_set_secdata(scm, skb);
> >  }
> >
> > @@ -1971,6 +1990,7 @@ static void unix_skb_to_scm(struct sk_buff *skb, struct scm_cookie *scm)
> >   * We include credentials if source or destination socket
> >   * asserted SOCK_PASSCRED.
> >   *
> > + * Context: May sleep.
> >   * Return: On success zero, on error a negative error code is returned.
> >   */
> >  static int unix_maybe_add_creds(struct sk_buff *skb, const struct sock *sk,
> > @@ -1980,7 +2000,12 @@ static int unix_maybe_add_creds(struct sk_buff *skb, const struct sock *sk,
> >                 return 0;
> >
> >         if (unix_may_passcred(sk) || unix_may_passcred(other)) {
> 
> I forgot to mention that this part will conflict with net-next.
> 
> I guess Christian will take this series via vfs tree ?

I'll just grab it and take care of the merge conflict.
Thanks for all the reviews. This will be a really helpful extension.
I really really like that we're pushing the envelope on secure client
authentication quite a bit with the recent work in this area!