Update do_sock_getsockopt() to use the new getsockopt_iter callback
when available. Add do_sock_getsockopt_iter() helper that:
1. Reads optlen from user/kernel space
2. Initializes a sockopt_t with the appropriate iov_iter (kvec for
kernel, ubuf for user buffers) and sets opt.optlen
3. Calls the protocol's getsockopt_iter callback
4. Writes opt.optlen back to user/kernel space
The optlen is always written back, even on failure. Some protocols
(e.g. CAN raw) return -ERANGE and set optlen to the required buffer
size so userspace knows how much to allocate.
The callback is responsible for setting opt.optlen to indicate the
returned data size.
Signed-off-by: Breno Leitao <leitao@debian.org>
---
net/socket.c | 48 +++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 45 insertions(+), 3 deletions(-)
diff --git a/net/socket.c b/net/socket.c
index ade2ff5845a0..4a74a4aa1bb4 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -77,6 +77,7 @@
#include <linux/mount.h>
#include <linux/pseudo_fs.h>
#include <linux/security.h>
+#include <linux/uio.h>
#include <linux/syscalls.h>
#include <linux/compat.h>
#include <linux/kmod.h>
@@ -2349,6 +2350,44 @@ SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
int optname));
+static int do_sock_getsockopt_iter(struct socket *sock,
+ const struct proto_ops *ops, int level,
+ int optname, sockptr_t optval,
+ sockptr_t optlen)
+{
+ struct kvec kvec;
+ sockopt_t opt;
+ int koptlen;
+ int err;
+
+ if (copy_from_sockptr(&koptlen, optlen, sizeof(int)))
+ return -EFAULT;
+
+ if (optval.is_kernel) {
+ kvec.iov_base = optval.kernel;
+ kvec.iov_len = koptlen;
+ iov_iter_kvec(&opt.iter, ITER_DEST, &kvec, 1, koptlen);
+ } else {
+ iov_iter_ubuf(&opt.iter, ITER_DEST, optval.user, koptlen);
+ }
+ opt.optlen = koptlen;
+
+ /* iter is initialized as ITER_DEST. Callbacks that need to read
+ * from optval (e.g. PACKET_HDRLEN) must flip data_source to
+ * ITER_SOURCE, then restore ITER_DEST before writing back.
+ */
+ err = ops->getsockopt_iter(sock, level, optname, &opt);
+
+ /* Always write back optlen, even on failure. Some protocols
+ * (e.g. CAN raw) return -ERANGE and set optlen to the required
+ * buffer size so userspace knows how much to allocate.
+ */
+ if (copy_to_sockptr(optlen, &opt.optlen, sizeof(int)))
+ return -EFAULT;
+
+ return err;
+}
+
int do_sock_getsockopt(struct socket *sock, bool compat, int level,
int optname, sockptr_t optval, sockptr_t optlen)
{
@@ -2366,15 +2405,18 @@ int do_sock_getsockopt(struct socket *sock, bool compat, int level,
ops = READ_ONCE(sock->ops);
if (level == SOL_SOCKET) {
err = sk_getsockopt(sock->sk, level, optname, optval, optlen);
- } else if (unlikely(!ops->getsockopt)) {
- err = -EOPNOTSUPP;
- } else {
+ } else if (ops->getsockopt_iter) {
+ err = do_sock_getsockopt_iter(sock, ops, level, optname,
+ optval, optlen);
+ } else if (ops->getsockopt) {
if (WARN_ONCE(optval.is_kernel || optlen.is_kernel,
"Invalid argument type"))
return -EOPNOTSUPP;
err = ops->getsockopt(sock, level, optname, optval.user,
optlen.user);
+ } else {
+ err = -EOPNOTSUPP;
}
if (!compat)
--
2.52.0
On 04/01, Breno Leitao wrote:
> Update do_sock_getsockopt() to use the new getsockopt_iter callback
> when available. Add do_sock_getsockopt_iter() helper that:
>
> 1. Reads optlen from user/kernel space
> 2. Initializes a sockopt_t with the appropriate iov_iter (kvec for
> kernel, ubuf for user buffers) and sets opt.optlen
> 3. Calls the protocol's getsockopt_iter callback
> 4. Writes opt.optlen back to user/kernel space
>
> The optlen is always written back, even on failure. Some protocols
> (e.g. CAN raw) return -ERANGE and set optlen to the required buffer
> size so userspace knows how much to allocate.
>
> The callback is responsible for setting opt.optlen to indicate the
> returned data size.
>
> Signed-off-by: Breno Leitao <leitao@debian.org>
> ---
> net/socket.c | 48 +++++++++++++++++++++++++++++++++++++++++++++---
> 1 file changed, 45 insertions(+), 3 deletions(-)
>
> diff --git a/net/socket.c b/net/socket.c
> index ade2ff5845a0..4a74a4aa1bb4 100644
> --- a/net/socket.c
> +++ b/net/socket.c
> @@ -77,6 +77,7 @@
> #include <linux/mount.h>
> #include <linux/pseudo_fs.h>
> #include <linux/security.h>
> +#include <linux/uio.h>
> #include <linux/syscalls.h>
> #include <linux/compat.h>
> #include <linux/kmod.h>
> @@ -2349,6 +2350,44 @@ SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
> INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
> int optname));
>
> +static int do_sock_getsockopt_iter(struct socket *sock,
> + const struct proto_ops *ops, int level,
> + int optname, sockptr_t optval,
> + sockptr_t optlen)
If we want to eventually remove sockptr_t, why not make this new handler
work with iov_iters from the beginning? The callers can have some new temporary
sockptr_to_iter() or something?
> +{
> + struct kvec kvec;
> + sockopt_t opt;
> + int koptlen;
> + int err;
> +
> + if (copy_from_sockptr(&koptlen, optlen, sizeof(int)))
> + return -EFAULT;
> +
> + if (optval.is_kernel) {
> + kvec.iov_base = optval.kernel;
> + kvec.iov_len = koptlen;
> + iov_iter_kvec(&opt.iter, ITER_DEST, &kvec, 1, koptlen);
> + } else {
> + iov_iter_ubuf(&opt.iter, ITER_DEST, optval.user, koptlen);
> + }
> + opt.optlen = koptlen;
> +
> + /* iter is initialized as ITER_DEST. Callbacks that need to read
> + * from optval (e.g. PACKET_HDRLEN) must flip data_source to
> + * ITER_SOURCE, then restore ITER_DEST before writing back.
> + */
Have you considered creating two iters? opt.iter_in and opt.iter_out.
That way you don't have to flip the source back and forth in the
handlers.
On Wed, Apr 01, 2026 at 09:34:04AM -0700, Stanislav Fomichev wrote: > > +static int do_sock_getsockopt_iter(struct socket *sock, > > + const struct proto_ops *ops, int level, > > + int optname, sockptr_t optval, > > + sockptr_t optlen) > > If we want to eventually remove sockptr_t, why not make this new handler > work with iov_iters from the beginning? The callers can have some new temporary > sockptr_to_iter() or something? The goal is to eliminate __user memory from the callbacks entirely, which would make sockptr_t unnecessary. This series removes the callbacks that originally necessitated sockptr_t's existence. Therefore, working from the callbacks back to userspace seem to be a more logical approach than replacing the middle layers of the implementation, and then touching the callbacks. So, yes, the sockptr_t() is used here as temporary glue to be able to get rid of the elephant in the room. > > + /* iter is initialized as ITER_DEST. Callbacks that need to read > > + * from optval (e.g. PACKET_HDRLEN) must flip data_source to > > + * ITER_SOURCE, then restore ITER_DEST before writing back. > > + */ > > Have you considered creating two iters? opt.iter_in and opt.iter_out. > That way you don't have to flip the source back and forth in the > handlers. That's a good suggestion I hadn't considered. My initial thought was to create a helper like sockopt_read_val() to handle the flip-read-flip dance. Would opt.iter_in and opt.iter_out be clearer than the helper approach? Thanks for the review, --breno
On 04/01, Breno Leitao wrote:
> On Wed, Apr 01, 2026 at 09:34:04AM -0700, Stanislav Fomichev wrote:
> > > +static int do_sock_getsockopt_iter(struct socket *sock,
> > > + const struct proto_ops *ops, int level,
> > > + int optname, sockptr_t optval,
> > > + sockptr_t optlen)
> >
> > If we want to eventually remove sockptr_t, why not make this new handler
> > work with iov_iters from the beginning? The callers can have some new temporary
> > sockptr_to_iter() or something?
>
> The goal is to eliminate __user memory from the callbacks entirely, which
> would make sockptr_t unnecessary. This series removes the callbacks that
> originally necessitated sockptr_t's existence.
>
> Therefore, working from the callbacks back to userspace seem to be a more
> logical approach than replacing the middle layers of the implementation,
> and then touching the callbacks.
>
> So, yes, the sockptr_t() is used here as temporary glue to be able to
> get rid of the elephant in the room.
So maybe something like this is better to communicate your long term intent?
} else if (ops->getsockopt_iter) {
optval = sockptr_to_iter(optval)
optlen = sockptr_to_iter(optlen)
do_sock_getsockopt_iter(...) /* does not know what sockpt_t is */
}
?
Then your new do_sock_getsockopt_iter is sockptr-free from the beginning
and at some point we'll just drop/move those sockptr_to_iter calls?
> > > + /* iter is initialized as ITER_DEST. Callbacks that need to read
> > > + * from optval (e.g. PACKET_HDRLEN) must flip data_source to
> > > + * ITER_SOURCE, then restore ITER_DEST before writing back.
> > > + */
> >
> > Have you considered creating two iters? opt.iter_in and opt.iter_out.
> > That way you don't have to flip the source back and forth in the
> > handlers.
>
> That's a good suggestion I hadn't considered. My initial thought was to
> create a helper like sockopt_read_val() to handle the flip-read-flip
> dance.
>
> Would opt.iter_in and opt.iter_out be clearer than the helper approach?
>
> Thanks for the review,
> --breno
I hope this way it will be easier to review protocol handler changes.
For example, looking at your AF_PACKET patch, you won't have to care
about flipping the source and doing the revert. Most/all of the changes will
be simple:
- s/get_user(len, optlen)/len = opt->optlen/
- s/put_user(len, optlen)/opt->optlen = len/
- s/copy_from_user(xxx, optval, len)/copy_from_iter(xxx, len, &opt->iter_in)/
- s/copy_to_user(optval, xxx, len)/copy_to_iter(xxx, len, &opt->iter_out)/
Might be even possible to express these with coccinelle?
© 2016 - 2026 Red Hat, Inc.